Add original title to story text

This commit is contained in:
Timothy Allen 2023-12-30 12:29:56 +02:00
parent 61d32c5286
commit bedf82d8a1

View File

@ -105,6 +105,9 @@ def parse_and_extract(input_dir, verbose):
text = list() text = list()
lang = "" lang = ""
try: try:
for t in doc.find('./publisher_headline'):
if t.text is not None:
text.append(t.text)
for p in doc.find('./body'): for p in doc.find('./body'):
if p.text is not None: if p.text is not None:
text.append(p.text) text.append(p.text)
@ -184,6 +187,7 @@ def main():
articles = parse_and_extract(args.input, args.verbose) articles = parse_and_extract(args.input, args.verbose)
data = scrub_data(articles, args.verbose) data = scrub_data(articles, args.verbose)
#print(data)
write_csv(data, args.output) write_csv(data, args.output)