From ee9f7f55c2285f9c76abf18d001862997e266b41 Mon Sep 17 00:00:00 2001 From: Timothy Allen Date: Tue, 14 Aug 2018 11:08:19 +0200 Subject: [PATCH] Fix position calculations, since SQL calculations are flaky; replace rankings with top results. --- aacstats.py | 12 +- load_spreadsheet.py | 287 +++++++++++++++++++++++++++------------- static/style.css | 2 +- templates/list-top.html | 52 ++++++++ templates/tabs.html | 5 +- 5 files changed, 264 insertions(+), 94 deletions(-) create mode 100644 templates/list-top.html diff --git a/aacstats.py b/aacstats.py index e5bbfc4..e344ef2 100644 --- a/aacstats.py +++ b/aacstats.py @@ -10,6 +10,8 @@ app = Flask(__name__) PAGE_SIZE=20 MIN_MONTHS_FOR_LISTINGS=3 +# TODO: Replace rankings with top recent results: position, sexposition, or catposition < 10, sorted by event and date + @app.template_filter('urlescape') def urlescape(string): if string is None: @@ -155,6 +157,11 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), select = 'TRIM(CONCAT_WS(" ", name, surname)) AS person, FORMAT(SUM(distance),0) AS total' group = 'GROUP BY TRIM(CONCAT_WS(" ", name, surname))' order = 'SUM(distance) DESC, TRIM(CONCAT_WS(" ", name, surname))' + elif listing == 'top': + select = '*' + group = '' + where += ' AND (position < 20 OR (position/finishers*1000) < 20 OR catposition < 10 OR (catposition/catfinishers*1000) < 10 OR sexposition < 10 OR (sexposition/sexfinishers*1000) < 10)' + order = 'date DESC, event' elif listing == 'rankings': # SELECT query.person, query.positions, query.races, query.podiums, query.score, sex.positions AS sexpositions, sex.races AS sexraces, cat.positions AS catpositions, cat.races catraces FROM (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(position) positions, COUNT(event) races, SUM(position)/COUNT(event) podiums, FORMAT(SUM(position)/COUNT(event),1) score FROM `results` WHERE club LIKE "AAC" GROUP BY CONCAT_WS(" ", name, surname) ) AS query INNER JOIN (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(sexposition) as positions, COUNT(event) races FROM `results` WHERE club LIKE "AAC" AND sexposition > 0 GROUP BY CONCAT_WS(" ", name, surname) ) sex ON query.person=sex.person INNER JOIN (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(catposition) as positions, COUNT(event) races FROM `results` WHERE club LIKE "AAC" AND catposition > 0 GROUP BY CONCAT_WS(" ", name, surname) ) cat ON query.person=cat.person WHERE query.person NOT LIKE "%no return%" AND query.person NOT LIKE "%no card%" AND query.person NOT LIKE "%blank card%" AND query.person NOT LIKE "%disqualified%" GROUP BY query.person ORDER BY podiums, races DESC; select = 'TRIM(CONCAT_WS(" ", name, surname)) AS person, SUM(position) AS positions, COUNT(event) AS races, SUM(position)/COUNT(event) AS podiums, FORMAT(SUM(position)/COUNT(event), 1) AS score' @@ -164,7 +171,6 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), select = 'licence, date, TRIM(CONCAT_WS(" ", name, surname)) AS person' group = 'GROUP BY licence, name, surname' order = 'TRIM(CONCAT_WS(" ", name, surname)) ASC' - ''' Add elements common to multiple types of queries ''' if year: if not isinstance(year, (int, float)): @@ -194,6 +200,8 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), elif listing == 'runners': select = 'COUNT(*) FROM ( SELECT COUNT(name)' close = ') AS runners' + elif listing == 'top': + pass elif listing == 'rankings': select = 'COUNT(*) FROM ( SELECT COUNT(name)' close = ') AS rankings' @@ -222,7 +230,7 @@ def list(title=None, year=None): year = now().year title = 'runners' title = urllib.parse.unquote_plus(title) - if title not in ( 'races', 'rankings', 'runners', 'licence', ): + if title not in ( 'races', 'top', 'rankings', 'runners', 'licence', ): abort(404) ''' In early January, we'll be left with blank pages in listings, since there won't diff --git a/load_spreadsheet.py b/load_spreadsheet.py index 4040f1d..146294c 100755 --- a/load_spreadsheet.py +++ b/load_spreadsheet.py @@ -24,6 +24,7 @@ import os import re import sys import tempfile +from collections import defaultdict import pprint # Set up MySQL database, if not done @@ -68,8 +69,8 @@ def main(): raceinfo = dict() link = event.find('a', href=re.compile('.xlsx?$')) name = event.find('td', class_=re.compile('EventHeadline')) - date = event.find('td', class_=re.compile('EventDate')) - dist = event.find('td', class_=re.compile('EventDist'), string=re.compile('^\s*[\d+\.]\s*(KM)?\s*$')) + date = event.find('td', class_=re.compile('EventDate'), string=re.compile('^\s*\d+[/-]\d+[/-]\d+')) + dist = event.find('td', class_=re.compile('Events?Distance'), string=re.compile('^\s*[\d\.,]+\s*(KM)?\s*$', flags=re.IGNORECASE)) if link is not None and name is not None: if not link['href'] in uniqurl: uniqurl.append(link['href']) @@ -86,15 +87,19 @@ def main(): ''' Only parse one spreadsheet from the WPA website, from the commandline ''' isthisevent = False for checkurl in requrls: - if re.search(checkurl, event, flags=re.IGNORECASE): + if checkurl and re.search(checkurl, event, flags=re.IGNORECASE): isthisevent = True - if type(requrls[0]) != None and url not in requrls and not isthisevent: + if requrls[0] and url not in requrls and not isthisevent: + continue + if file_in_db(url): continue with urllib.request.urlopen(url) as response, tempfile.TemporaryDirectory() as tmpdir: - log.info("Loading data from URL {}".format(race['url'])) data = response.read() urlparts = urllib.parse.urlparse(url) filename = os.path.basename(urlparts.path) + if re.search('WALKRESULTS', filename, flags=re.IGNORECASE): + continue + log.info("Loading data from URL {}".format(url)) filepath = os.path.join(tmpdir, filename) with open(filepath, 'wb') as fp: fp.write(data) @@ -104,17 +109,13 @@ def main(): log.warning("ERROR: Unable to load data from URL {}".format(url)) raise else: - load_into_db(rows) - position_calculations(event) + load_into_db(rows, event) log.debug("\n") - - elif args.input_file: rows = read_spreadsheet(args.input_file, src=args.input_file) log.info("Loading data from file {}".format(args.input_file)) load_into_db(rows) - position_calculations() else: for message in mailbox.Maildir(MAILDIR): @@ -142,7 +143,6 @@ def main(): pass else: load_into_db(rows) - position_calculations() return @@ -167,9 +167,7 @@ def position_calculations(events=None): eventlist = [e for e in c.fetchall()] for race in eventlist: - log.debug(race) - - log.debug("Recalculating postion information for {}".format(race['event'])) + log.debug("Recalculating position information for {}".format(race['event'])) ''' Calculate total finishers per race ''' sql = 'UPDATE `results` AS r, (SELECT event, date, distance, COUNT(distance) AS finishers FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" GROUP BY event, date, distance) AS f SET r.finishers = f.finishers WHERE r.event = f.event AND r.date = f.date AND r.distance = f.distance AND r.finishers IS NULL;'.format(race['event'], race['date'], race['distance']) c.execute(sql) @@ -181,12 +179,21 @@ def position_calculations(events=None): c.execute(sql) result = c.fetchall() - ''' Update individual positions per sex per race ''' - c.execute('SET @rank = 0;') - sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@sex = sex, @rank+1, 1) AS srank, @sex := sex FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex IS NOT NULL ORDER BY sex, position) AS s SET r.sexposition = s.srank WHERE r.result_key = s.result_key AND r.sexposition IS NULL;'.format(race['event'], race['date'], race['distance']) - #print(sql) - c.execute(sql) - result = c.fetchall() +# ''' Update individual positions per sex per race ''' +# sql = 'SELECT DISTINCT sex from `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex IS NOT NULL;'.format(race['event'], race['date'], race['distance']) +# c.execute(sql) +# result = c.fetchall() +# for row in result: +# sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := @rank+1 AS rank FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex LIKE "{}" ORDER BY position) AS c, (SELECT @rank := 0) AS n SET r.sexposition = c.rank WHERE r.result_key = c.result_key;'.format(race['event'], race['date'], race['distance'], row['sex']) +# print(sql) +# c.execute(sql) +# result = c.fetchall() +# # ''' This seems to generate a universal ranking on 1 the first time the statement is run ''' +# #c.execute('SET @rank = 0;') +# #sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@sex = sex, @rank+1, 1) AS srank, @sex := sex FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex IS NOT NULL ORDER BY sex, position) AS s SET r.sexposition = s.srank WHERE r.result_key = s.result_key;'.format(race['event'], race['date'], race['distance']) +# #print(sql) +# #c.execute(sql) +# #result = c.fetchall() ''' Update total finishers per category per race ''' sql = 'UPDATE `results` AS r, (SELECT event, date, distance, category, COUNT(category) as catfinishers FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" GROUP BY category) AS c SET r.catfinishers = c.catfinishers WHERE r.event = c.event AND r.date = c.date AND r.distance = c.distance AND r.catfinishers IS NULL AND r.category = c.category;'.format(race['event'], race['date'], race['distance']) @@ -194,18 +201,39 @@ def position_calculations(events=None): c.execute(sql) result = c.fetchall() - ''' Update individual positions per category per race ''' - c.execute('SET @rank = 0;') - sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@cat = category, @rank+1, 1) AS crank, @cat := category FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category IS NOT NULL ORDER BY category, position) AS c SET r.catposition = c.crank WHERE r.result_key = c.result_key AND r.catposition IS NULL;'.format(race['event'], race['date'], race['distance']) - #print(sql) - c.execute(sql) - result = c.fetchall() +# ''' Update individual positions per category per race ''' +# sql = 'SELECT DISTINCT category from `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category IS NOT NULL;'.format(race['event'], race['date'], race['distance']) +# c.execute(sql) +# result = c.fetchall() +# for row in result: +# sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := @rank+1 AS rank FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category LIKE "{}" ORDER BY position) AS c, (SELECT @rank := 0) AS n SET r.catposition = c.rank WHERE r.result_key = c.result_key;'.format(race['event'], race['date'], race['distance'], row['category']) +# print(sql) +# c.execute(sql) +# result = c.fetchall() +# # ''' This seems to generate a universal ranking on 1 the first time the statement is run ''' +# #c.execute('SET @rank = 0;') +# #sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@cat = category, @rank+1, 1) AS crank, @cat := category FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category IS NOT NULL ORDER BY category, position) AS c SET r.catposition = c.crank WHERE r.result_key = c.result_key;'.format(race['event'], race['date'], race['distance']) +# #print(sql) +# #c.execute(sql) +# #result = c.fetchall() db.commit() return +def file_in_db(url): + db = MySQLdb.connect(user='aac', passwd='saOAcCWHg4LaoSSA', db='AAC', + use_unicode=True, charset="utf8") + c = db.cursor() + + ''' Check for duplicate values by DATE and POSITION and RACE and EVENT ''' + sql = 'SELECT COUNT(*) FROM `results` WHERE source LIKE %s' + c.execute(sql, (url,)) + #log.debug(c._last_executed) + if (c.fetchone()[0] > 0): + return True + return False -def load_into_db(rows): +def load_into_db(rows, event=None): ''' CREATE TABLE `results` ( `result_key` int(11) NOT NULL AUTO_INCREMENT, @@ -218,7 +246,7 @@ def load_into_db(rows): `name` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, `surname` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, `licence` varchar(20) COLLATE utf8_unicode_ci DEFAULT NULL, - `club` varchar(40) COLLATE utf8_unicode_ci DEFAULT NULL, + `club` varchar(80) COLLATE utf8_unicode_ci DEFAULT NULL, `age` int(3) DEFAULT NULL, `sex` varchar(10) COLLATE utf8_unicode_ci DEFAULT NULL, `sexposition` int(5) NOT NULL, @@ -240,14 +268,14 @@ def load_into_db(rows): ''' Check for duplicate values by DATE and POSITION and RACE and EVENT ''' sql = 'SELECT COUNT(*) FROM `results` WHERE source LIKE %s' c.execute(sql, (rows[0].get('source'),)) - log.debug(c._last_executed) + #log.debug(c._last_executed) if (c.fetchone()[0] > 0): log.info("Spreadsheet data already loaded") return sql = 'SELECT COUNT(*) FROM `results` WHERE date=%s AND position=%s AND distance LIKE %s AND event LIKE %s' c.execute(sql, (rows[0].get('date'), rows[0].get('position'), rows[0].get('distance'), rows[0].get('event'),)) - log.debug(c._last_executed) + #log.debug(c._last_executed) if (c.fetchone()[0] > 0): log.info("Spreadsheet data already loaded") return @@ -266,6 +294,7 @@ def load_into_db(rows): #pass db.commit() + #position_calculations(event) return @@ -275,6 +304,8 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even if re.search('.xlsx?$', spreadsheet, flags=re.IGNORECASE) is not None: book = xlrd.open_workbook(spreadsheet) for sheetname in book.sheet_names(): + if re.search('walk', sheetname, flags=re.IGNORECASE) is not None: + continue sheet = book.sheet_by_name(sheetname) log.debug("Processing sheet {}".format(sheetname)) @@ -282,7 +313,7 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even fields = [] for row in range(0, 15): try: - if re.search('((pos\w*|no\w*|num\w*|surname|name|time|club)\s*){2,}', ' '.join(str(x) for x in (sheet.row_values(row))), flags=re.IGNORECASE) is not None: + if re.search('((pos\w*|no\w*|num\w*|(last\s*|sur)name|(first\s*)?name|time|club)\s*){2,}', ' '.join(str(x) for x in (sheet.row_values(row))), flags=re.IGNORECASE) is not None: fields = sheet.row_values(row) log.debug("Spreadsheet fields: {}".format(', '.join(str(x) for x in fields))) break @@ -291,33 +322,38 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even continue ''' Translate field names, and delete unwanted fields ''' position_idx = None - time_idx = None + time_idx = {} for i in range(len(fields)): - if re.search('^\s*pos', str(fields[i]), flags=re.IGNORECASE) is not None: + if 'position' not in fields and re.search('^\s*(overall)?\s*(pos|place|index)', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'position' ''' Store the index of this field for later processing ''' position_idx = i - elif re.search('^\s*(time|h:?m:?s?)', str(fields[i]), flags=re.IGNORECASE) is not None: + elif 'time' not in fields and re.search('^\s*(race\s*)?(finish|elapsed_?|f\S?|net|chip)?\s*(time|h:?m:?s?)', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'time' ''' Store the index of this field for later processing ''' - time_idx = i - elif re.search('^\s*cat\S*\s*pos(\.|\w+)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: + time_idx[fields[i]] = i + elif re.search('^\s*start\s*time', str(fields[i]), flags=re.IGNORECASE) is not None: + fields[i] = 'starttime' + time_idx[fields[i]] = i + elif re.search('^\s*(age\s*)?cat\S*\s*pos(\.|\w+)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'catposition' elif re.search('^\s*(sex|gender)\s*pos(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'sexposition' - elif re.search('^\s*(sur|last\s*)name', str(fields[i]), flags=re.IGNORECASE) is not None: + elif re.search('^\s*pos(\.|\w+)\s*(sex|gender)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: + fields[i] = 'sexposition' + elif re.search('^\s*(sur|last)\s*name', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'surname' - elif re.search('^\s*name', str(fields[i]), flags=re.IGNORECASE) is not None: + elif re.search('^\s*((first|nick)?\s*name|participant)', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'name' - elif re.search('^\s*club(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: + elif re.search('^\s*(club(\.|\w*)|team)\s*(name)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'club' elif re.search('^\s*age(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'age' elif re.search('^\s*(sex|gender|m.?f|male|female)(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'sex' - elif re.search('^\s*cat(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: + elif re.search('^\s*((age\s*)?cat|extra group)(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'category' - elif re.search('^\s*(lic|no|num)(\.|\S*)\s*\S*\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: + elif re.search('^\s*(race)?\s*(lic|no|num)(\.|\S*)\s*\S*\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'licence' elif re.search('^\s*(race)?date', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'date' @@ -325,7 +361,6 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even fields[i] = 'distance' elif re.search('^\s*(race)?(event|name)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: fields[i] = 'event' - pass ''' If there isn't a position field or a time field, we don't want this sheet ''' if position_idx is None or time_idx is None: @@ -362,13 +397,20 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even ''' Look for the race distance in the sheet name, or in the filename ''' distance = eventdistance - filedistance = re.search('([\d\.]+)\s*KM', filename, flags=re.IGNORECASE) - if filedistance is not None: + log.info("Race distance: {}".format(distance)) + eventnamedistance = re.search('(Half)?[\s-]*(Marathon)', eventname, flags=re.IGNORECASE) + if eventnamedistance is not None: + if eventnamedistance.group(1) is not None: + distance = 21.1 + else: + distance = 42.2 + filedistance = re.search('(\d{1,2}([\.,]\d)?)\s*KM', filename, flags=re.IGNORECASE) + if not distance and filedistance is not None: distance = filedistance.group(1) - eventnamedistance = re.search('([\d\.]+)\s*KM', eventname, flags=re.IGNORECASE) + eventnamedistance = re.search('([\d\.,]{2,3})\s*KM', eventname, flags=re.IGNORECASE) if eventnamedistance is not None: distance = eventnamedistance.group(1) - sheetdistance = re.search('([\d\.]+\s*KM)', sheetname, flags=re.IGNORECASE) + sheetdistance = re.search('([\d\.,]+\s*KM)', sheetname, flags=re.IGNORECASE) if sheetdistance is not None: distance = sheetdistance.group(1) sheetdistance = re.search('(helper|marshal)', sheetname, flags=re.IGNORECASE) @@ -385,18 +427,29 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even data.append(sheet.cell(row, col).value) item = dict(zip(fields, data)) ''' If the time has been modified by Excel, unmodify it ''' - if 'time' in item and isinstance(item['time'], float): - try: - item['time'] = xlrd.xldate_as_tuple(sheet.cell(row, time_idx).value, book.datemode) - except: - ''' Skip this row if the date can't be parsed, as it's probably wrong anyway (41 hours or something silly) ''' - continue - item['date'] = eventdate - item['event'] = eventname - item['distance'] = distance - item['source'] = src + timecols = ( 'time', 'starttime' ) + for timecol in timecols: + if timecol in item and isinstance(item[timecol], float): + try: + item[timecol] = xlrd.xldate_as_tuple(sheet.cell(row, time_idx[timecol]).value, book.datemode) + except: + try: + if book.datemode == 1: + flipdatemode = 0 + else: + flipdatemode = 1 + item[timecol] = xlrd.xldate_as_tuple(sheet.cell(row, time_idx[timecol]).value, flipdatemode) + except: + continue + ''' Skip this row if the date can't be parsed, as it's probably wrong anyway (41 hours or something silly) ''' + continue + item['source'] = src + item['date'] = eventdate + item['event'] = eventname + if not 'distance' in item: + item['distance'] = distance rows.append(item) - + rows = clean_data(rows) if len(rows) > 0: log.debug("Sample output: {}".format(pp.pformat(rows[0]))) @@ -407,6 +460,7 @@ def clean_data(input_rows): rows = [] for ir in input_rows: r = dict() + ''' Fix date ''' date = ir.get('date') if isinstance(date, str): @@ -417,39 +471,63 @@ def clean_data(input_rows): ''' Check time ''' time = ir['time'] + #print("1: {} {}".format(time, type(time))) ''' Deal with various formats that xlrd might give us. Note that floats should already be converted to tuples ''' if isinstance(time, tuple): time = dt.datetime.combine(dt.date(year=1900, month=1, day=1), dt.time(hour=time[3], minute=time[4], second=time[5])) - elif isinstance(time, str): - try: - time = dt.datetime.strptime(time, '%H:%M:%S') - except: + #print("2: {} {}".format(time, type(time))) + elif isinstance(time, float): + for fmt in ( '%H.%M.%S', '%M.%S', ): try: - time = dt.datetime.strptime(time, '%M:%S') + time = dt.datetime.strptime(time, fmt) + #print("3: {} {} {}".format(time, type(time), fmt)) except: - continue + pass + elif isinstance(time, str): + for fmt in ( '%H:%M:%S', '%M:%S', '%H.%M.%S', '%M.%S', ): + try: + time = dt.datetime.strptime(time, fmt) + #print("4: {} {} {}".format(time, type(time), fmt)) + except: + pass + ''' If time is still a string, ignore it, as it's probably blank/DNF ''' + if isinstance(time, str): + continue + if 'starttime' in ir: + starttime = ir['starttime'] + #print("Start 1: {} {}".format(starttime, type(starttime))) + if isinstance(starttime, tuple): + starttime = dt.datetime.combine(dt.date(year=1900, month=1, day=1), dt.time(hour=starttime[3], minute=starttime[4], second=starttime[5])) + #print("Start 2: {} {}".format(starttime, type(starttime))) + if isinstance(starttime, float): + for fmt in ( '%H.%M.%S', '%M.%S', ): + try: + starttime = dt.datetime.strptime(starttime, fmt) + #print("Start 3: {} {} {}".format(starttime, type(starttime), fmt)) + except: + pass + elif isinstance(starttime, str): + for fmt in ( '%H:%M:%S', '%M:%S', '%H.%M.%S', '%M.%S', ): + try: + starttime = dt.datetime.strptime(starttime, fmt) + #print("Start 4: {} {} {}".format(starttime, type(starttime), fmt)) + except: + pass + ''' If starttime is still a string, ignore it, as it's probably blank ''' + if not isinstance(time, str) and not isinstance(starttime, str): + timedelta = time - starttime + time = dt.datetime.min + timedelta + #print("5: {} {}".format(time, type(time))) r['time'] = time.time() ''' Fix distance ''' - length = re.search('([\d\.]+)\s*km', str(ir.get('distance')), flags=re.IGNORECASE) + length = re.search('([\d\.,]+)(?:\s*km)?', str(ir.get('distance')), flags=re.IGNORECASE) if length is not None: - r['distance'] = length.group(1) + distance = re.sub(",", ".", length.group(1)) + r['distance'] = float(distance) else: r['distance'] = 0 - ''' Fix sex ''' - if 'sex' in ir: - sex = 'sex' in ir and re.search('^\s*(F|M)', str(ir.get('sex')), flags=re.IGNORECASE) - if sex is not None: - if sex.group(1) == 'F': - r['sex'] = 'F' - else: - r['sex'] = 'M' - - ''' Fix club ''' - if re.search('^\s*(AAC\b|Atlantic\s*Athletic)', str(ir.get('club')), flags=re.IGNORECASE) is not None: - r['club'] = 'AAC' - ''' Should be an int ''' for key in ( 'position', 'sexposition', 'catposition', 'age', ): val = ir.get(key) @@ -459,17 +537,6 @@ def clean_data(input_rows): except: pass - ''' Should be a float ''' - for key in ( 'distance', ): - val = ir.get(key) - if val is not None: - try: - r[key] = float(val) - except: - pass - else: - r[key] = 0 - ''' Should be a string ''' for key in ( 'event', 'name', 'surname', 'licence', 'club', 'category', ): val = ir.get(key) @@ -485,7 +552,49 @@ def clean_data(input_rows): for key in ( 'event', 'source', ): r[key] = ir.get(key) + ''' Fix sex ''' + if 'sex' in r: + sex = 'sex' in r and re.search('^\s*(F|M)', str(r.get('sex')), flags=re.IGNORECASE) + if sex is not None: + if sex.group(1) == 'F': + r['sex'] = 'F' + else: + r['sex'] = 'M' + elif 'category' in r: + sex = 'category' in r and re.search('^\s*(F|M)', str(r.get('category')), flags=re.IGNORECASE) + if sex is not None: + if sex.group(1) == 'F': + r['sex'] = 'F' + else: + r['sex'] = 'M' + + ''' Fix club ''' + if re.search('^\s*(AAC$|Atlantic\s*Athletic)', str(r.get('club')), flags=re.IGNORECASE) is not None: + r['club'] = 'AAC' + rows.append(r) + + ''' sort rows by position, then populate sexposition and catposition for each sex and category ''' + sorted(rows, key=lambda r: r['position']) + totals = defaultdict(int) + for r in rows: + totals['positions'] += 1 + if 'sex' in r: + totals[r['sex']] += 1 + #log.debug("{} {}".format(totals[r['sex']], r['sex'])) + if 'sexposition' not in r: + r['sexposition'] = totals[r['sex']] + if 'category' in r: + totals[r['category']] += 1 + #log.debug("{} {}".format(totals[r['category']], r['category'])) + if 'catposition' not in r: + r['catposition'] = totals[r['category']] + for i in rows: + r['finishers'] = totals['positions'] + if 'sex' in r: + r['sexfinishers'] = totals[r['sex']] + if 'category' in r: + r['catfinishers'] = totals[r['category']] return rows diff --git a/static/style.css b/static/style.css index 9edf7a2..783adbb 100644 --- a/static/style.css +++ b/static/style.css @@ -1,6 +1,6 @@ body { margin: 0 auto; - max-width: 800px; + max-width: 1000px; font-family: 'Roboto Condensed', sans-serif; font-size: 11pt; } diff --git a/templates/list-top.html b/templates/list-top.html new file mode 100644 index 0000000..bed0d30 --- /dev/null +++ b/templates/list-top.html @@ -0,0 +1,52 @@ +{% set ns = namespace() -%} + +{% include 'head.html' with context %} +
+

AAC: Top Results{% if year %} {{ year }}{% endif %}

+ +{% if results -%} + {%- set ns.total = 0 -%} + {%- if 'count' in results -%} + {%- set ns.total = results['count'] -%} + {%- endif -%} + + + + + + + + + + + + + + + {%- for row in results['rows'] -%} + {%- set person = '{} {}'.format(row.name or '', row.surname or '') -%} + + + + + + + + + + + {%- endfor -%} + +
PositionNameLicenceTimeAverage PaceRaceDateNotes
Position {{ row.position|e }}{% if row.finishers %} / {{ row.finishers }}{% endif %}Name {{ person|trim|e }}Licence {% if row.licence %}{{ row.licence|trim|e }}{% endif %}Time {{ row.time|e }}Average Pace {% if row.distance is number and row.distance|float != 0 %}{{ (row.time / row.distance|float) | pace }} min/KM{% endif %}Race {{ row.event|trim|e }} ({{ row.distance|trim|e }} KM)Date {{ row.date|cleandate|e }}Notes + {%- if row.sex and row.sexposition and row.sexposition | int <= 100 %}{{ row.sexposition|ordinal|e }} {{ row.sex|lower|gender|e }}{% endif -%} + {%- if row.sexposition and row.sexposition | int <= 100 and row.catposition and row.catposition | int <= 100 %} and {% endif -%} + {%- if row.catposition and row.catposition | int <= 100 %}{{ row.catposition|ordinal|e }} in category{% endif -%} + +
+{%- endif %} +
+ + + diff --git a/templates/tabs.html b/templates/tabs.html index 70e86c7..81e4a7c 100644 --- a/templates/tabs.html +++ b/templates/tabs.html @@ -1,8 +1,9 @@