diff --git a/africat/aa_create_dataset.py b/africat/aa_create_dataset.py index dcaf528..6bf7ee2 100755 --- a/africat/aa_create_dataset.py +++ b/africat/aa_create_dataset.py @@ -96,6 +96,8 @@ def parse_and_extract(input_dir, verbose): cats = list() for cat in doc.findall('./category'): + # TODO check against a list of current categories, + # and strip any non-current categories cats.append(cat.text) #entry["categories"] = cats # if you want a list entry["categories"] = ";".join(cats) # if you want a string