Fix position calculations, since SQL calculations are flaky; replace rankings with top results.

This commit is contained in:
Timothy Allen 2018-08-14 11:08:19 +02:00
parent d7f0030c21
commit ee9f7f55c2
5 changed files with 264 additions and 94 deletions

View File

@ -10,6 +10,8 @@ app = Flask(__name__)
PAGE_SIZE=20 PAGE_SIZE=20
MIN_MONTHS_FOR_LISTINGS=3 MIN_MONTHS_FOR_LISTINGS=3
# TODO: Replace rankings with top recent results: position, sexposition, or catposition < 10, sorted by event and date
@app.template_filter('urlescape') @app.template_filter('urlescape')
def urlescape(string): def urlescape(string):
if string is None: if string is None:
@ -155,6 +157,11 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(),
select = 'TRIM(CONCAT_WS(" ", name, surname)) AS person, FORMAT(SUM(distance),0) AS total' select = 'TRIM(CONCAT_WS(" ", name, surname)) AS person, FORMAT(SUM(distance),0) AS total'
group = 'GROUP BY TRIM(CONCAT_WS(" ", name, surname))' group = 'GROUP BY TRIM(CONCAT_WS(" ", name, surname))'
order = 'SUM(distance) DESC, TRIM(CONCAT_WS(" ", name, surname))' order = 'SUM(distance) DESC, TRIM(CONCAT_WS(" ", name, surname))'
elif listing == 'top':
select = '*'
group = ''
where += ' AND (position < 20 OR (position/finishers*1000) < 20 OR catposition < 10 OR (catposition/catfinishers*1000) < 10 OR sexposition < 10 OR (sexposition/sexfinishers*1000) < 10)'
order = 'date DESC, event'
elif listing == 'rankings': elif listing == 'rankings':
# SELECT query.person, query.positions, query.races, query.podiums, query.score, sex.positions AS sexpositions, sex.races AS sexraces, cat.positions AS catpositions, cat.races catraces FROM (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(position) positions, COUNT(event) races, SUM(position)/COUNT(event) podiums, FORMAT(SUM(position)/COUNT(event),1) score FROM `results` WHERE club LIKE "AAC" GROUP BY CONCAT_WS(" ", name, surname) ) AS query INNER JOIN (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(sexposition) as positions, COUNT(event) races FROM `results` WHERE club LIKE "AAC" AND sexposition > 0 GROUP BY CONCAT_WS(" ", name, surname) ) sex ON query.person=sex.person INNER JOIN (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(catposition) as positions, COUNT(event) races FROM `results` WHERE club LIKE "AAC" AND catposition > 0 GROUP BY CONCAT_WS(" ", name, surname) ) cat ON query.person=cat.person WHERE query.person NOT LIKE "%no return%" AND query.person NOT LIKE "%no card%" AND query.person NOT LIKE "%blank card%" AND query.person NOT LIKE "%disqualified%" GROUP BY query.person ORDER BY podiums, races DESC; # SELECT query.person, query.positions, query.races, query.podiums, query.score, sex.positions AS sexpositions, sex.races AS sexraces, cat.positions AS catpositions, cat.races catraces FROM (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(position) positions, COUNT(event) races, SUM(position)/COUNT(event) podiums, FORMAT(SUM(position)/COUNT(event),1) score FROM `results` WHERE club LIKE "AAC" GROUP BY CONCAT_WS(" ", name, surname) ) AS query INNER JOIN (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(sexposition) as positions, COUNT(event) races FROM `results` WHERE club LIKE "AAC" AND sexposition > 0 GROUP BY CONCAT_WS(" ", name, surname) ) sex ON query.person=sex.person INNER JOIN (SELECT *, CONCAT_WS(" ", name, surname) person, SUM(catposition) as positions, COUNT(event) races FROM `results` WHERE club LIKE "AAC" AND catposition > 0 GROUP BY CONCAT_WS(" ", name, surname) ) cat ON query.person=cat.person WHERE query.person NOT LIKE "%no return%" AND query.person NOT LIKE "%no card%" AND query.person NOT LIKE "%blank card%" AND query.person NOT LIKE "%disqualified%" GROUP BY query.person ORDER BY podiums, races DESC;
select = 'TRIM(CONCAT_WS(" ", name, surname)) AS person, SUM(position) AS positions, COUNT(event) AS races, SUM(position)/COUNT(event) AS podiums, FORMAT(SUM(position)/COUNT(event), 1) AS score' select = 'TRIM(CONCAT_WS(" ", name, surname)) AS person, SUM(position) AS positions, COUNT(event) AS races, SUM(position)/COUNT(event) AS podiums, FORMAT(SUM(position)/COUNT(event), 1) AS score'
@ -164,7 +171,6 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(),
select = 'licence, date, TRIM(CONCAT_WS(" ", name, surname)) AS person' select = 'licence, date, TRIM(CONCAT_WS(" ", name, surname)) AS person'
group = 'GROUP BY licence, name, surname' group = 'GROUP BY licence, name, surname'
order = 'TRIM(CONCAT_WS(" ", name, surname)) ASC' order = 'TRIM(CONCAT_WS(" ", name, surname)) ASC'
''' Add elements common to multiple types of queries ''' ''' Add elements common to multiple types of queries '''
if year: if year:
if not isinstance(year, (int, float)): if not isinstance(year, (int, float)):
@ -194,6 +200,8 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(),
elif listing == 'runners': elif listing == 'runners':
select = 'COUNT(*) FROM ( SELECT COUNT(name)' select = 'COUNT(*) FROM ( SELECT COUNT(name)'
close = ') AS runners' close = ') AS runners'
elif listing == 'top':
pass
elif listing == 'rankings': elif listing == 'rankings':
select = 'COUNT(*) FROM ( SELECT COUNT(name)' select = 'COUNT(*) FROM ( SELECT COUNT(name)'
close = ') AS rankings' close = ') AS rankings'
@ -222,7 +230,7 @@ def list(title=None, year=None):
year = now().year year = now().year
title = 'runners' title = 'runners'
title = urllib.parse.unquote_plus(title) title = urllib.parse.unquote_plus(title)
if title not in ( 'races', 'rankings', 'runners', 'licence', ): if title not in ( 'races', 'top', 'rankings', 'runners', 'licence', ):
abort(404) abort(404)
''' In early January, we'll be left with blank pages in listings, since there won't ''' In early January, we'll be left with blank pages in listings, since there won't

View File

@ -24,6 +24,7 @@ import os
import re import re
import sys import sys
import tempfile import tempfile
from collections import defaultdict
import pprint import pprint
# Set up MySQL database, if not done # Set up MySQL database, if not done
@ -68,8 +69,8 @@ def main():
raceinfo = dict() raceinfo = dict()
link = event.find('a', href=re.compile('.xlsx?$')) link = event.find('a', href=re.compile('.xlsx?$'))
name = event.find('td', class_=re.compile('EventHeadline')) name = event.find('td', class_=re.compile('EventHeadline'))
date = event.find('td', class_=re.compile('EventDate')) date = event.find('td', class_=re.compile('EventDate'), string=re.compile('^\s*\d+[/-]\d+[/-]\d+'))
dist = event.find('td', class_=re.compile('EventDist'), string=re.compile('^\s*[\d+\.]\s*(KM)?\s*$')) dist = event.find('td', class_=re.compile('Events?Distance'), string=re.compile('^\s*[\d\.,]+\s*(KM)?\s*$', flags=re.IGNORECASE))
if link is not None and name is not None: if link is not None and name is not None:
if not link['href'] in uniqurl: if not link['href'] in uniqurl:
uniqurl.append(link['href']) uniqurl.append(link['href'])
@ -86,15 +87,19 @@ def main():
''' Only parse one spreadsheet from the WPA website, from the commandline ''' ''' Only parse one spreadsheet from the WPA website, from the commandline '''
isthisevent = False isthisevent = False
for checkurl in requrls: for checkurl in requrls:
if re.search(checkurl, event, flags=re.IGNORECASE): if checkurl and re.search(checkurl, event, flags=re.IGNORECASE):
isthisevent = True isthisevent = True
if type(requrls[0]) != None and url not in requrls and not isthisevent: if requrls[0] and url not in requrls and not isthisevent:
continue
if file_in_db(url):
continue continue
with urllib.request.urlopen(url) as response, tempfile.TemporaryDirectory() as tmpdir: with urllib.request.urlopen(url) as response, tempfile.TemporaryDirectory() as tmpdir:
log.info("Loading data from URL {}".format(race['url']))
data = response.read() data = response.read()
urlparts = urllib.parse.urlparse(url) urlparts = urllib.parse.urlparse(url)
filename = os.path.basename(urlparts.path) filename = os.path.basename(urlparts.path)
if re.search('WALKRESULTS', filename, flags=re.IGNORECASE):
continue
log.info("Loading data from URL {}".format(url))
filepath = os.path.join(tmpdir, filename) filepath = os.path.join(tmpdir, filename)
with open(filepath, 'wb') as fp: with open(filepath, 'wb') as fp:
fp.write(data) fp.write(data)
@ -104,17 +109,13 @@ def main():
log.warning("ERROR: Unable to load data from URL {}".format(url)) log.warning("ERROR: Unable to load data from URL {}".format(url))
raise raise
else: else:
load_into_db(rows) load_into_db(rows, event)
position_calculations(event)
log.debug("\n") log.debug("\n")
elif args.input_file: elif args.input_file:
rows = read_spreadsheet(args.input_file, src=args.input_file) rows = read_spreadsheet(args.input_file, src=args.input_file)
log.info("Loading data from file {}".format(args.input_file)) log.info("Loading data from file {}".format(args.input_file))
load_into_db(rows) load_into_db(rows)
position_calculations()
else: else:
for message in mailbox.Maildir(MAILDIR): for message in mailbox.Maildir(MAILDIR):
@ -142,7 +143,6 @@ def main():
pass pass
else: else:
load_into_db(rows) load_into_db(rows)
position_calculations()
return return
@ -167,9 +167,7 @@ def position_calculations(events=None):
eventlist = [e for e in c.fetchall()] eventlist = [e for e in c.fetchall()]
for race in eventlist: for race in eventlist:
log.debug(race) log.debug("Recalculating position information for {}".format(race['event']))
log.debug("Recalculating postion information for {}".format(race['event']))
''' Calculate total finishers per race ''' ''' Calculate total finishers per race '''
sql = 'UPDATE `results` AS r, (SELECT event, date, distance, COUNT(distance) AS finishers FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" GROUP BY event, date, distance) AS f SET r.finishers = f.finishers WHERE r.event = f.event AND r.date = f.date AND r.distance = f.distance AND r.finishers IS NULL;'.format(race['event'], race['date'], race['distance']) sql = 'UPDATE `results` AS r, (SELECT event, date, distance, COUNT(distance) AS finishers FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" GROUP BY event, date, distance) AS f SET r.finishers = f.finishers WHERE r.event = f.event AND r.date = f.date AND r.distance = f.distance AND r.finishers IS NULL;'.format(race['event'], race['date'], race['distance'])
c.execute(sql) c.execute(sql)
@ -181,12 +179,21 @@ def position_calculations(events=None):
c.execute(sql) c.execute(sql)
result = c.fetchall() result = c.fetchall()
''' Update individual positions per sex per race ''' # ''' Update individual positions per sex per race '''
c.execute('SET @rank = 0;') # sql = 'SELECT DISTINCT sex from `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex IS NOT NULL;'.format(race['event'], race['date'], race['distance'])
sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@sex = sex, @rank+1, 1) AS srank, @sex := sex FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex IS NOT NULL ORDER BY sex, position) AS s SET r.sexposition = s.srank WHERE r.result_key = s.result_key AND r.sexposition IS NULL;'.format(race['event'], race['date'], race['distance']) # c.execute(sql)
#print(sql) # result = c.fetchall()
c.execute(sql) # for row in result:
result = c.fetchall() # sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := @rank+1 AS rank FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex LIKE "{}" ORDER BY position) AS c, (SELECT @rank := 0) AS n SET r.sexposition = c.rank WHERE r.result_key = c.result_key;'.format(race['event'], race['date'], race['distance'], row['sex'])
# print(sql)
# c.execute(sql)
# result = c.fetchall()
# # ''' This seems to generate a universal ranking on 1 the first time the statement is run '''
# #c.execute('SET @rank = 0;')
# #sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@sex = sex, @rank+1, 1) AS srank, @sex := sex FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND sex IS NOT NULL ORDER BY sex, position) AS s SET r.sexposition = s.srank WHERE r.result_key = s.result_key;'.format(race['event'], race['date'], race['distance'])
# #print(sql)
# #c.execute(sql)
# #result = c.fetchall()
''' Update total finishers per category per race ''' ''' Update total finishers per category per race '''
sql = 'UPDATE `results` AS r, (SELECT event, date, distance, category, COUNT(category) as catfinishers FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" GROUP BY category) AS c SET r.catfinishers = c.catfinishers WHERE r.event = c.event AND r.date = c.date AND r.distance = c.distance AND r.catfinishers IS NULL AND r.category = c.category;'.format(race['event'], race['date'], race['distance']) sql = 'UPDATE `results` AS r, (SELECT event, date, distance, category, COUNT(category) as catfinishers FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" GROUP BY category) AS c SET r.catfinishers = c.catfinishers WHERE r.event = c.event AND r.date = c.date AND r.distance = c.distance AND r.catfinishers IS NULL AND r.category = c.category;'.format(race['event'], race['date'], race['distance'])
@ -194,18 +201,39 @@ def position_calculations(events=None):
c.execute(sql) c.execute(sql)
result = c.fetchall() result = c.fetchall()
''' Update individual positions per category per race ''' # ''' Update individual positions per category per race '''
c.execute('SET @rank = 0;') # sql = 'SELECT DISTINCT category from `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category IS NOT NULL;'.format(race['event'], race['date'], race['distance'])
sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@cat = category, @rank+1, 1) AS crank, @cat := category FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category IS NOT NULL ORDER BY category, position) AS c SET r.catposition = c.crank WHERE r.result_key = c.result_key AND r.catposition IS NULL;'.format(race['event'], race['date'], race['distance']) # c.execute(sql)
#print(sql) # result = c.fetchall()
c.execute(sql) # for row in result:
result = c.fetchall() # sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := @rank+1 AS rank FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category LIKE "{}" ORDER BY position) AS c, (SELECT @rank := 0) AS n SET r.catposition = c.rank WHERE r.result_key = c.result_key;'.format(race['event'], race['date'], race['distance'], row['category'])
# print(sql)
# c.execute(sql)
# result = c.fetchall()
# # ''' This seems to generate a universal ranking on 1 the first time the statement is run '''
# #c.execute('SET @rank = 0;')
# #sql = 'UPDATE `results` AS r, (SELECT result_key, position, @rank := IF(@cat = category, @rank+1, 1) AS crank, @cat := category FROM `results` WHERE event LIKE "{}" AND date = "{}" AND distance LIKE "{}" AND category IS NOT NULL ORDER BY category, position) AS c SET r.catposition = c.crank WHERE r.result_key = c.result_key;'.format(race['event'], race['date'], race['distance'])
# #print(sql)
# #c.execute(sql)
# #result = c.fetchall()
db.commit() db.commit()
return return
def file_in_db(url):
db = MySQLdb.connect(user='aac', passwd='saOAcCWHg4LaoSSA', db='AAC',
use_unicode=True, charset="utf8")
c = db.cursor()
def load_into_db(rows): ''' Check for duplicate values by DATE and POSITION and RACE and EVENT '''
sql = 'SELECT COUNT(*) FROM `results` WHERE source LIKE %s'
c.execute(sql, (url,))
#log.debug(c._last_executed)
if (c.fetchone()[0] > 0):
return True
return False
def load_into_db(rows, event=None):
''' '''
CREATE TABLE `results` ( CREATE TABLE `results` (
`result_key` int(11) NOT NULL AUTO_INCREMENT, `result_key` int(11) NOT NULL AUTO_INCREMENT,
@ -218,7 +246,7 @@ def load_into_db(rows):
`name` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, `name` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL,
`surname` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, `surname` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL,
`licence` varchar(20) COLLATE utf8_unicode_ci DEFAULT NULL, `licence` varchar(20) COLLATE utf8_unicode_ci DEFAULT NULL,
`club` varchar(40) COLLATE utf8_unicode_ci DEFAULT NULL, `club` varchar(80) COLLATE utf8_unicode_ci DEFAULT NULL,
`age` int(3) DEFAULT NULL, `age` int(3) DEFAULT NULL,
`sex` varchar(10) COLLATE utf8_unicode_ci DEFAULT NULL, `sex` varchar(10) COLLATE utf8_unicode_ci DEFAULT NULL,
`sexposition` int(5) NOT NULL, `sexposition` int(5) NOT NULL,
@ -240,14 +268,14 @@ def load_into_db(rows):
''' Check for duplicate values by DATE and POSITION and RACE and EVENT ''' ''' Check for duplicate values by DATE and POSITION and RACE and EVENT '''
sql = 'SELECT COUNT(*) FROM `results` WHERE source LIKE %s' sql = 'SELECT COUNT(*) FROM `results` WHERE source LIKE %s'
c.execute(sql, (rows[0].get('source'),)) c.execute(sql, (rows[0].get('source'),))
log.debug(c._last_executed) #log.debug(c._last_executed)
if (c.fetchone()[0] > 0): if (c.fetchone()[0] > 0):
log.info("Spreadsheet data already loaded") log.info("Spreadsheet data already loaded")
return return
sql = 'SELECT COUNT(*) FROM `results` WHERE date=%s AND position=%s AND distance LIKE %s AND event LIKE %s' sql = 'SELECT COUNT(*) FROM `results` WHERE date=%s AND position=%s AND distance LIKE %s AND event LIKE %s'
c.execute(sql, (rows[0].get('date'), rows[0].get('position'), rows[0].get('distance'), rows[0].get('event'),)) c.execute(sql, (rows[0].get('date'), rows[0].get('position'), rows[0].get('distance'), rows[0].get('event'),))
log.debug(c._last_executed) #log.debug(c._last_executed)
if (c.fetchone()[0] > 0): if (c.fetchone()[0] > 0):
log.info("Spreadsheet data already loaded") log.info("Spreadsheet data already loaded")
return return
@ -266,6 +294,7 @@ def load_into_db(rows):
#pass #pass
db.commit() db.commit()
#position_calculations(event)
return return
@ -275,6 +304,8 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even
if re.search('.xlsx?$', spreadsheet, flags=re.IGNORECASE) is not None: if re.search('.xlsx?$', spreadsheet, flags=re.IGNORECASE) is not None:
book = xlrd.open_workbook(spreadsheet) book = xlrd.open_workbook(spreadsheet)
for sheetname in book.sheet_names(): for sheetname in book.sheet_names():
if re.search('walk', sheetname, flags=re.IGNORECASE) is not None:
continue
sheet = book.sheet_by_name(sheetname) sheet = book.sheet_by_name(sheetname)
log.debug("Processing sheet {}".format(sheetname)) log.debug("Processing sheet {}".format(sheetname))
@ -282,7 +313,7 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even
fields = [] fields = []
for row in range(0, 15): for row in range(0, 15):
try: try:
if re.search('((pos\w*|no\w*|num\w*|surname|name|time|club)\s*){2,}', ' '.join(str(x) for x in (sheet.row_values(row))), flags=re.IGNORECASE) is not None: if re.search('((pos\w*|no\w*|num\w*|(last\s*|sur)name|(first\s*)?name|time|club)\s*){2,}', ' '.join(str(x) for x in (sheet.row_values(row))), flags=re.IGNORECASE) is not None:
fields = sheet.row_values(row) fields = sheet.row_values(row)
log.debug("Spreadsheet fields: {}".format(', '.join(str(x) for x in fields))) log.debug("Spreadsheet fields: {}".format(', '.join(str(x) for x in fields)))
break break
@ -291,33 +322,38 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even
continue continue
''' Translate field names, and delete unwanted fields ''' ''' Translate field names, and delete unwanted fields '''
position_idx = None position_idx = None
time_idx = None time_idx = {}
for i in range(len(fields)): for i in range(len(fields)):
if re.search('^\s*pos', str(fields[i]), flags=re.IGNORECASE) is not None: if 'position' not in fields and re.search('^\s*(overall)?\s*(pos|place|index)', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'position' fields[i] = 'position'
''' Store the index of this field for later processing ''' ''' Store the index of this field for later processing '''
position_idx = i position_idx = i
elif re.search('^\s*(time|h:?m:?s?)', str(fields[i]), flags=re.IGNORECASE) is not None: elif 'time' not in fields and re.search('^\s*(race\s*)?(finish|elapsed_?|f\S?|net|chip)?\s*(time|h:?m:?s?)', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'time' fields[i] = 'time'
''' Store the index of this field for later processing ''' ''' Store the index of this field for later processing '''
time_idx = i time_idx[fields[i]] = i
elif re.search('^\s*cat\S*\s*pos(\.|\w+)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*start\s*time', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'starttime'
time_idx[fields[i]] = i
elif re.search('^\s*(age\s*)?cat\S*\s*pos(\.|\w+)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'catposition' fields[i] = 'catposition'
elif re.search('^\s*(sex|gender)\s*pos(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*(sex|gender)\s*pos(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'sexposition' fields[i] = 'sexposition'
elif re.search('^\s*(sur|last\s*)name', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*pos(\.|\w+)\s*(sex|gender)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'sexposition'
elif re.search('^\s*(sur|last)\s*name', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'surname' fields[i] = 'surname'
elif re.search('^\s*name', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*((first|nick)?\s*name|participant)', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'name' fields[i] = 'name'
elif re.search('^\s*club(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*(club(\.|\w*)|team)\s*(name)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'club' fields[i] = 'club'
elif re.search('^\s*age(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*age(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'age' fields[i] = 'age'
elif re.search('^\s*(sex|gender|m.?f|male|female)(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*(sex|gender|m.?f|male|female)(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'sex' fields[i] = 'sex'
elif re.search('^\s*cat(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*((age\s*)?cat|extra group)(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'category' fields[i] = 'category'
elif re.search('^\s*(lic|no|num)(\.|\S*)\s*\S*\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*(race)?\s*(lic|no|num)(\.|\S*)\s*\S*\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'licence' fields[i] = 'licence'
elif re.search('^\s*(race)?date', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*(race)?date', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'date' fields[i] = 'date'
@ -325,7 +361,6 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even
fields[i] = 'distance' fields[i] = 'distance'
elif re.search('^\s*(race)?(event|name)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None: elif re.search('^\s*(race)?(event|name)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
fields[i] = 'event' fields[i] = 'event'
pass
''' If there isn't a position field or a time field, we don't want this sheet ''' ''' If there isn't a position field or a time field, we don't want this sheet '''
if position_idx is None or time_idx is None: if position_idx is None or time_idx is None:
@ -362,13 +397,20 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even
''' Look for the race distance in the sheet name, or in the filename ''' ''' Look for the race distance in the sheet name, or in the filename '''
distance = eventdistance distance = eventdistance
filedistance = re.search('([\d\.]+)\s*KM', filename, flags=re.IGNORECASE) log.info("Race distance: {}".format(distance))
if filedistance is not None: eventnamedistance = re.search('(Half)?[\s-]*(Marathon)', eventname, flags=re.IGNORECASE)
if eventnamedistance is not None:
if eventnamedistance.group(1) is not None:
distance = 21.1
else:
distance = 42.2
filedistance = re.search('(\d{1,2}([\.,]\d)?)\s*KM', filename, flags=re.IGNORECASE)
if not distance and filedistance is not None:
distance = filedistance.group(1) distance = filedistance.group(1)
eventnamedistance = re.search('([\d\.]+)\s*KM', eventname, flags=re.IGNORECASE) eventnamedistance = re.search('([\d\.,]{2,3})\s*KM', eventname, flags=re.IGNORECASE)
if eventnamedistance is not None: if eventnamedistance is not None:
distance = eventnamedistance.group(1) distance = eventnamedistance.group(1)
sheetdistance = re.search('([\d\.]+\s*KM)', sheetname, flags=re.IGNORECASE) sheetdistance = re.search('([\d\.,]+\s*KM)', sheetname, flags=re.IGNORECASE)
if sheetdistance is not None: if sheetdistance is not None:
distance = sheetdistance.group(1) distance = sheetdistance.group(1)
sheetdistance = re.search('(helper|marshal)', sheetname, flags=re.IGNORECASE) sheetdistance = re.search('(helper|marshal)', sheetname, flags=re.IGNORECASE)
@ -385,16 +427,27 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even
data.append(sheet.cell(row, col).value) data.append(sheet.cell(row, col).value)
item = dict(zip(fields, data)) item = dict(zip(fields, data))
''' If the time has been modified by Excel, unmodify it ''' ''' If the time has been modified by Excel, unmodify it '''
if 'time' in item and isinstance(item['time'], float): timecols = ( 'time', 'starttime' )
for timecol in timecols:
if timecol in item and isinstance(item[timecol], float):
try: try:
item['time'] = xlrd.xldate_as_tuple(sheet.cell(row, time_idx).value, book.datemode) item[timecol] = xlrd.xldate_as_tuple(sheet.cell(row, time_idx[timecol]).value, book.datemode)
except: except:
try:
if book.datemode == 1:
flipdatemode = 0
else:
flipdatemode = 1
item[timecol] = xlrd.xldate_as_tuple(sheet.cell(row, time_idx[timecol]).value, flipdatemode)
except:
continue
''' Skip this row if the date can't be parsed, as it's probably wrong anyway (41 hours or something silly) ''' ''' Skip this row if the date can't be parsed, as it's probably wrong anyway (41 hours or something silly) '''
continue continue
item['source'] = src
item['date'] = eventdate item['date'] = eventdate
item['event'] = eventname item['event'] = eventname
if not 'distance' in item:
item['distance'] = distance item['distance'] = distance
item['source'] = src
rows.append(item) rows.append(item)
rows = clean_data(rows) rows = clean_data(rows)
@ -407,6 +460,7 @@ def clean_data(input_rows):
rows = [] rows = []
for ir in input_rows: for ir in input_rows:
r = dict() r = dict()
''' Fix date ''' ''' Fix date '''
date = ir.get('date') date = ir.get('date')
if isinstance(date, str): if isinstance(date, str):
@ -417,39 +471,63 @@ def clean_data(input_rows):
''' Check time ''' ''' Check time '''
time = ir['time'] time = ir['time']
#print("1: {} {}".format(time, type(time)))
''' Deal with various formats that xlrd might give us. Note that floats should already be converted to tuples ''' ''' Deal with various formats that xlrd might give us. Note that floats should already be converted to tuples '''
if isinstance(time, tuple): if isinstance(time, tuple):
time = dt.datetime.combine(dt.date(year=1900, month=1, day=1), dt.time(hour=time[3], minute=time[4], second=time[5])) time = dt.datetime.combine(dt.date(year=1900, month=1, day=1), dt.time(hour=time[3], minute=time[4], second=time[5]))
#print("2: {} {}".format(time, type(time)))
elif isinstance(time, float):
for fmt in ( '%H.%M.%S', '%M.%S', ):
try:
time = dt.datetime.strptime(time, fmt)
#print("3: {} {} {}".format(time, type(time), fmt))
except:
pass
elif isinstance(time, str): elif isinstance(time, str):
for fmt in ( '%H:%M:%S', '%M:%S', '%H.%M.%S', '%M.%S', ):
try: try:
time = dt.datetime.strptime(time, '%H:%M:%S') time = dt.datetime.strptime(time, fmt)
except: #print("4: {} {} {}".format(time, type(time), fmt))
try:
time = dt.datetime.strptime(time, '%M:%S')
except: except:
pass
''' If time is still a string, ignore it, as it's probably blank/DNF '''
if isinstance(time, str):
continue continue
if 'starttime' in ir:
starttime = ir['starttime']
#print("Start 1: {} {}".format(starttime, type(starttime)))
if isinstance(starttime, tuple):
starttime = dt.datetime.combine(dt.date(year=1900, month=1, day=1), dt.time(hour=starttime[3], minute=starttime[4], second=starttime[5]))
#print("Start 2: {} {}".format(starttime, type(starttime)))
if isinstance(starttime, float):
for fmt in ( '%H.%M.%S', '%M.%S', ):
try:
starttime = dt.datetime.strptime(starttime, fmt)
#print("Start 3: {} {} {}".format(starttime, type(starttime), fmt))
except:
pass
elif isinstance(starttime, str):
for fmt in ( '%H:%M:%S', '%M:%S', '%H.%M.%S', '%M.%S', ):
try:
starttime = dt.datetime.strptime(starttime, fmt)
#print("Start 4: {} {} {}".format(starttime, type(starttime), fmt))
except:
pass
''' If starttime is still a string, ignore it, as it's probably blank '''
if not isinstance(time, str) and not isinstance(starttime, str):
timedelta = time - starttime
time = dt.datetime.min + timedelta
#print("5: {} {}".format(time, type(time)))
r['time'] = time.time() r['time'] = time.time()
''' Fix distance ''' ''' Fix distance '''
length = re.search('([\d\.]+)\s*km', str(ir.get('distance')), flags=re.IGNORECASE) length = re.search('([\d\.,]+)(?:\s*km)?', str(ir.get('distance')), flags=re.IGNORECASE)
if length is not None: if length is not None:
r['distance'] = length.group(1) distance = re.sub(",", ".", length.group(1))
r['distance'] = float(distance)
else: else:
r['distance'] = 0 r['distance'] = 0
''' Fix sex '''
if 'sex' in ir:
sex = 'sex' in ir and re.search('^\s*(F|M)', str(ir.get('sex')), flags=re.IGNORECASE)
if sex is not None:
if sex.group(1) == 'F':
r['sex'] = 'F'
else:
r['sex'] = 'M'
''' Fix club '''
if re.search('^\s*(AAC\b|Atlantic\s*Athletic)', str(ir.get('club')), flags=re.IGNORECASE) is not None:
r['club'] = 'AAC'
''' Should be an int ''' ''' Should be an int '''
for key in ( 'position', 'sexposition', 'catposition', 'age', ): for key in ( 'position', 'sexposition', 'catposition', 'age', ):
val = ir.get(key) val = ir.get(key)
@ -459,17 +537,6 @@ def clean_data(input_rows):
except: except:
pass pass
''' Should be a float '''
for key in ( 'distance', ):
val = ir.get(key)
if val is not None:
try:
r[key] = float(val)
except:
pass
else:
r[key] = 0
''' Should be a string ''' ''' Should be a string '''
for key in ( 'event', 'name', 'surname', 'licence', 'club', 'category', ): for key in ( 'event', 'name', 'surname', 'licence', 'club', 'category', ):
val = ir.get(key) val = ir.get(key)
@ -485,7 +552,49 @@ def clean_data(input_rows):
for key in ( 'event', 'source', ): for key in ( 'event', 'source', ):
r[key] = ir.get(key) r[key] = ir.get(key)
''' Fix sex '''
if 'sex' in r:
sex = 'sex' in r and re.search('^\s*(F|M)', str(r.get('sex')), flags=re.IGNORECASE)
if sex is not None:
if sex.group(1) == 'F':
r['sex'] = 'F'
else:
r['sex'] = 'M'
elif 'category' in r:
sex = 'category' in r and re.search('^\s*(F|M)', str(r.get('category')), flags=re.IGNORECASE)
if sex is not None:
if sex.group(1) == 'F':
r['sex'] = 'F'
else:
r['sex'] = 'M'
''' Fix club '''
if re.search('^\s*(AAC$|Atlantic\s*Athletic)', str(r.get('club')), flags=re.IGNORECASE) is not None:
r['club'] = 'AAC'
rows.append(r) rows.append(r)
''' sort rows by position, then populate sexposition and catposition for each sex and category '''
sorted(rows, key=lambda r: r['position'])
totals = defaultdict(int)
for r in rows:
totals['positions'] += 1
if 'sex' in r:
totals[r['sex']] += 1
#log.debug("{} {}".format(totals[r['sex']], r['sex']))
if 'sexposition' not in r:
r['sexposition'] = totals[r['sex']]
if 'category' in r:
totals[r['category']] += 1
#log.debug("{} {}".format(totals[r['category']], r['category']))
if 'catposition' not in r:
r['catposition'] = totals[r['category']]
for i in rows:
r['finishers'] = totals['positions']
if 'sex' in r:
r['sexfinishers'] = totals[r['sex']]
if 'category' in r:
r['catfinishers'] = totals[r['category']]
return rows return rows

View File

@ -1,6 +1,6 @@
body { body {
margin: 0 auto; margin: 0 auto;
max-width: 800px; max-width: 1000px;
font-family: 'Roboto Condensed', sans-serif; font-family: 'Roboto Condensed', sans-serif;
font-size: 11pt; font-size: 11pt;
} }

52
templates/list-top.html Normal file
View File

@ -0,0 +1,52 @@
{% set ns = namespace() -%}
{% include 'head.html' with context %}
<article>
<h1>AAC: Top Results{% if year %} {{ year }}{% endif %}</h1>
{% if results -%}
{%- set ns.total = 0 -%}
{%- if 'count' in results -%}
{%- set ns.total = results['count'] -%}
{%- endif -%}
<table class="wide">
<thead>
<tr>
<th>Position</th>
<th>Name</th>
<th>Licence</th>
<th>Time</th>
<th>Average Pace</th>
<th>Race</th>
<th>Date</th>
<th>Notes</th>
</tr>
</thead>
<tbody>
{%- for row in results['rows'] -%}
{%- set person = '{} {}'.format(row.name or '', row.surname or '') -%}
<tr>
<td class="nowrap"><span class="label">Position</span> <span>{{ row.position|e }}{% if row.finishers %} / {{ row.finishers }}{% endif %}</span></td>
<td class="nowrap"><span class="label">Name</span> <span><a href="{{ url_for('person', title=person|trim|urlescape, start=None) }}">{{ person|trim|e }}</a></span></td>
<td class="nowrap"><span class="label">Licence</span> <span>{% if row.licence %}<a href="{{ url_for('licence', title=row.licence|trim|urlescape, year=row.date|year, start=None, show=ns.show) }}">{{ row.licence|trim|e }}</a>{% endif %}</span></td>
<td><span class="label">Time</span> <span>{{ row.time|e }}</span></td>
<td class="nowrap"><span class="label">Average Pace</span> <span>{% if row.distance is number and row.distance|float != 0 %}{{ (row.time / row.distance|float) | pace }} min/KM{% endif %}</span></td>
<td class="long"><span class="label">Race</span> <span><a href="{{ url_for('races', title=row.event|trim|urlescape, year=row.date|year, start=None, show=ns.show) }}">{{ row.event|trim|e }} ({{ row.distance|trim|e }} KM)</a></span></td>
<td class="nowrap"><span class="label">Date</span> <span>{{ row.date|cleandate|e }}</span></td>
<td class="long"><span class="label">Notes</span> <span>
{%- if row.sex and row.sexposition and row.sexposition | int <= 100 %}{{ row.sexposition|ordinal|e }} {{ row.sex|lower|gender|e }}{% endif -%}
{%- if row.sexposition and row.sexposition | int <= 100 and row.catposition and row.catposition | int <= 100 %} and {% endif -%}
{%- if row.catposition and row.catposition | int <= 100 %}{{ row.catposition|ordinal|e }} in category{% endif -%}
</span>
</td>
</tr>
{%- endfor -%}
</tbody>
</table>
{%- endif %}
</article>
<footer>
{% include 'prevnext.html' with context %}
</footer>
</body>
</html>

View File

@ -1,8 +1,9 @@
<nav class="tabs"> <nav class="tabs">
<span><a href="{{ url_for('list', title='runners', year=now|year, start=None, show=ns.show) }}">Runners</a></span> <span><a href="{{ url_for('list', title='runners', year=now|year, start=None, show=ns.show) }}">Runners</a></span>
<span><a href="{{ url_for('list', title='rankings', year=now|year, start=None, show=ns.show) }}">Rankings</a></span> <!--span><a href="{{ url_for('list', title='rankings', year=now|year, start=None, show=ns.show) }}">Rankings</a></span-->
<span><a href="{{ url_for('list', title='top', year=None, start=None, show=ns.show) }}">Top Results</a></span>
<span><a href="{{ url_for('index', title=None, year=None, start=None, show=ns.show) }}">All Results</a></span>
<span><a href="{{ url_for('list', title='races', year=None, start=None, show=ns.show) }}">Races</a></span> <span><a href="{{ url_for('list', title='races', year=None, start=None, show=ns.show) }}">Races</a></span>
<span><a href="{{ url_for('list', title='licence', year=now|year, start=None, show=ns.show) }}">Licences</a></span> <span><a href="{{ url_for('list', title='licence', year=now|year, start=None, show=ns.show) }}">Licences</a></span>
<span><a href="{{ url_for('index', title=None, year=None, start=None, show=ns.show) }}">All Results</a></span>
<span><a href="{{ url_for('search', title=None, year=None, start=None, show=ns.show) }}">Search</a></span> <span><a href="{{ url_for('search', title=None, year=None, start=None, show=ns.show) }}">Search</a></span>
</nav> </nav>