diff --git a/load_spreadsheet.py b/load_spreadsheet.py index 3a2a8b2..4040f1d 100755 --- a/load_spreadsheet.py +++ b/load_spreadsheet.py @@ -48,9 +48,10 @@ def main(): if args.calculate: #position_calculations('Winelands Marathon') - position_calculations() + position_calculations(args.calculate) return - elif args.scrape_web: + elif args.scrapeurl: + requrls = args.scrapeurl spreadsheets = [] uniqurl = [] wpa = 'http://www.wpa.org.za/Events/DynamicEvents.asmx/BuildEventDisplay' @@ -79,10 +80,16 @@ def main(): if dist is not None: raceinfo['distance'] = dist.string spreadsheets.append(raceinfo) - #pp.pprint(spreadsheets) - #sys.exit(1) for race in spreadsheets: - url = race['url'] + url = race['url'] + event = race['event'] + ''' Only parse one spreadsheet from the WPA website, from the commandline ''' + isthisevent = False + for checkurl in requrls: + if re.search(checkurl, event, flags=re.IGNORECASE): + isthisevent = True + if type(requrls[0]) != None and url not in requrls and not isthisevent: + continue with urllib.request.urlopen(url) as response, tempfile.TemporaryDirectory() as tmpdir: log.info("Loading data from URL {}".format(race['url'])) data = response.read() @@ -98,6 +105,7 @@ def main(): raise else: load_into_db(rows) + position_calculations(event) log.debug("\n") @@ -106,6 +114,7 @@ def main(): rows = read_spreadsheet(args.input_file, src=args.input_file) log.info("Loading data from file {}".format(args.input_file)) load_into_db(rows) + position_calculations() else: for message in mailbox.Maildir(MAILDIR): @@ -133,18 +142,25 @@ def main(): pass else: load_into_db(rows) + position_calculations() - position_calculations() return -def position_calculations(event=None): +def position_calculations(events=None): db = MySQLdb.connect(user='aac', passwd='saOAcCWHg4LaoSSA', db='AAC', use_unicode=True, charset="utf8", cursorclass=MySQLdb.cursors.DictCursor) c = db.cursor() - where = '' - if event: - where = 'WHERE event LIKE "%{}%"'.format(event) + where = '' + wheres = [] + if isinstance(events, list): + for event in events: + if event: + wheres.append('event LIKE "%{}%"'.format(event)) + elif isinstance(events, str): + wheres.append('event LIKE "%{}%"'.format(events)) + if wheres: + where = 'WHERE ' + ' OR '.join(wheres) sql = 'SELECT event, date, distance FROM `results` {} GROUP BY event, date, distance'.format(where) c.execute(sql) #log.debug(c._last_executed) @@ -153,7 +169,7 @@ def position_calculations(event=None): for race in eventlist: log.debug(race) - log.debug("Recalculating postion information for {}".format(race['event']) + log.debug("Recalculating postion information for {}".format(race['event'])) ''' Calculate total finishers per race ''' sql = 'UPDATE `results` AS r, (SELECT event, date, distance, COUNT(distance) AS finishers FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" GROUP BY event, date, distance) AS f SET r.finishers = f.finishers WHERE r.event = f.event AND r.date = f.date AND r.distance = f.distance AND r.finishers IS NULL;'.format(race['event'], race['date'], race['distance']) c.execute(sql) @@ -476,11 +492,11 @@ def clean_data(input_rows): def parse_arguments(): parser = argparse.ArgumentParser(description='Load a spreadsheet containing WPA results into a database') parser.add_argument( - '--web', '-w', action='store_true', required=False, dest='scrape_web', - help='Scrape WPA website') + '--url', '-u', action='append', required=False, dest='scrapeurl', nargs="?", + help='Scrape WPA website, or, if a link is specified, the spreadsheet at that link') parser.add_argument( - '--calc', '-c', action='store_true', required=False, dest='calculate', - help='Calculate unset positions in the database') + '--calc', '-c', action='append', required=False, dest='calculate', nargs="?", + help='Calculate unset positions in the database, optionally just for specified races') parser.add_argument( '--input', '-i', action='store', required=False, type=str, dest='input_file', help='Manually select the spreadsheet to be imported')