Add original title to story text
This commit is contained in:
parent
61d32c5286
commit
bedf82d8a1
@ -105,6 +105,9 @@ def parse_and_extract(input_dir, verbose):
|
||||
text = list()
|
||||
lang = ""
|
||||
try:
|
||||
for t in doc.find('./publisher_headline'):
|
||||
if t.text is not None:
|
||||
text.append(t.text)
|
||||
for p in doc.find('./body'):
|
||||
if p.text is not None:
|
||||
text.append(p.text)
|
||||
@ -184,6 +187,7 @@ def main():
|
||||
articles = parse_and_extract(args.input, args.verbose)
|
||||
|
||||
data = scrub_data(articles, args.verbose)
|
||||
#print(data)
|
||||
|
||||
write_csv(data, args.output)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user