diff --git a/aacstats.py b/aacstats.py index f600324..ee14ff7 100644 --- a/aacstats.py +++ b/aacstats.py @@ -10,25 +10,10 @@ app = Flask(__name__) PAGE_SIZE=20 MIN_MONTHS_FOR_LISTINGS=3 -def now(): - return dt.datetime.now() - -def getstart(): - start = request.args.get('start', '0') - if not isinstance(start, (int, float)): - return 0 - return start - -def getshow(): - show = request.args.get('show', PAGE_SIZE) - if show == 'all': - return -1 - if not isinstance(show, (int, float)): - return PAGE_SIZE - return show - @app.template_filter('urlescape') def urlescape(string): + if string is None: + return '' return urllib.parse.quote_plus(string) @app.template_filter('pace') @@ -40,7 +25,7 @@ def year(time): return time.strftime('%Y') @app.template_filter('cleandate') -def clean_date(time): +def cleandate(time): if time.month == 1 and time.day == 1: return time.strftime('%Y') return time.strftime('%Y-%m-%d') @@ -51,7 +36,40 @@ def ordinal(n): return return "%d%s" % (n,"tsnrhtdd"[(math.floor(n/10)%10!=1)*(n%10<4)*n%10::4]) -def read_db(listing=None, event=None, person=None, licence=None, search=dict(), year=None): +@app.template_filter('cleandict') +def cleandict(dict): + ''' Prevent duplication of existing query strings when calling url_for(..., **request.args) ''' + newdict = {} + for key, value in dict.items(): + if key not in ( 'title', 'year', 'start', 'show' ) and value not in ( None, '', ): + newdict[key] = value + return newdict + +def now(): + return dt.datetime.now() + +def getstart(): + start = request.args.get('start', '0') + if not isinstance(start, (int, float)): + try: + return int(start) + except: + return 0 + return start + +def getshow(): + show = request.args.get('show', PAGE_SIZE) + if show == 'all': + return -1 + if not isinstance(show, (int, float)): + try: + return int(show) + except: + return PAGE_SIZE + return show + + +def read_db(listing=None, event=None, person=None, licence=None, search=dict(), year=None, finishers=False): db = MySQLdb.connect(user='aac', passwd='saOAcCWHg4LaoSSA', db='AAC', use_unicode=True, charset="utf8", cursorclass=MySQLdb.cursors.DictCursor) c = db.cursor() @@ -60,11 +78,11 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), show = getshow() select = '*' - close = '' - where = 'WHERE club LIKE "AAC"' - group = '' - order = 'date DESC, event, position' - limit = 'LIMIT {},{}'.format(start, show) + close = '' + where = 'WHERE club LIKE "AAC"' + group = '' + order = 'date DESC, event, position' + limit = 'LIMIT {},{}'.format(start, show) if show == -1: limit = '' @@ -88,6 +106,10 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), firstdate = firstdate.replace(year=int(year)) lastdate = lastdate.replace(year=int(year)) where += ' AND date > "{}" AND date < "{}"'.format(firstdate, lastdate) + ''' This statement is expensive but doesn't increase the count, so don't change the count statement ''' + if finishers: + select = 'total.finishers, query.* FROM( SELECT *' + close = ') AS query INNER JOIN (SELECT event, date, distance, COUNT(event) as finishers FROM `results` GROUP BY event, distance, date) AS total ON total.event=query.event AND total.date=query.date AND total.distance=query.distance' for column in search.keys(): if isinstance(column, str): @@ -147,6 +169,7 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), queryresults = c.fetchall() select = 'COUNT(*)' + close = '' if listing: if listing == 'race': select = 'COUNT(*) FROM ( SELECT COUNT(event)' @@ -227,7 +250,7 @@ def race(year=None, title=None): def person(title=None, year=None): if title is not None: title = urllib.parse.unquote_plus(title) - results = read_db(person=title, year=year) + results = read_db(person=title, year=year, finishers=True) return render_template('index.html', ltype='person', title=title, results=results, year=year, request=request, getstart=getstart(), getshow=getshow(), now=now(), PAGE_SIZE=PAGE_SIZE) @@ -236,7 +259,7 @@ def person(title=None, year=None): def licence(year=now().year, title=None): if title is not None: title = urllib.parse.unquote_plus(title) - results = read_db(licence=title, year=year) + results = read_db(licence=title, year=year, finishers=True) return render_template('index.html', ltype='licence', title=title, results=results, year=year, request=request, getstart=getstart(), getshow=getshow(), now=now(), PAGE_SIZE=PAGE_SIZE) diff --git a/load_spreadsheet.py b/load_spreadsheet.py index b2cf825..c868c2d 100755 --- a/load_spreadsheet.py +++ b/load_spreadsheet.py @@ -19,7 +19,6 @@ import argparse import datetime as dt import dateutil.parser as dtp import logging -import uuid import os import re import sys @@ -61,7 +60,7 @@ def main(): soup = bs4.BeautifulSoup(page, 'html.parser') for event in soup.find_all('tr'): raceinfo = dict() - link = event.find('a', href=re.compile('.xls$')) + link = event.find('a', href=re.compile('.xlsx?$')) name = event.find('td', class_=re.compile('EventHeadline')) date = event.find('td', class_=re.compile('EventDate')) dist = event.find('td', class_=re.compile('EventDist'), string=re.compile('^\s*[\d+\.]\s*(KM)?\s*$')) @@ -75,6 +74,8 @@ def main(): if dist is not None: raceinfo['distance'] = dist.string spreadsheets.append(raceinfo) + #pp.pprint(spreadsheets) + #sys.exit(1) for race in spreadsheets: url = race['url'] with urllib.request.urlopen(url) as response, tempfile.TemporaryDirectory() as tmpdir: @@ -92,6 +93,8 @@ def main(): raise else: load_into_db(rows) + log.debug("\n") + elif args.input_file: @@ -135,8 +138,8 @@ def load_into_db(rows): `date` datetime DEFAULT NULL, `distance` float(10) DEFAULT NULL, `event` varchar(100) COLLATE utf8_unicode_ci NOT NULL, - `eventuuid` varchar(36) COLLATE utf8_unicode_ci NOT NULL, `position` int(5) NOT NULL, + `finishers` int(5) DEFAULT NULL, `time` time NOT NULL, `name` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, `surname` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, @@ -194,8 +197,6 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even rows = [] filename = os.path.basename(spreadsheet) if re.search('.xlsx?$', spreadsheet, flags=re.IGNORECASE) is not None: - ''' The eventuuid should be unique for this event, but are not derived from the name, and cannot be used to detect duplicate events ''' - eventuuid = uuid.uuid4() book = xlrd.open_workbook(spreadsheet) for sheetname in book.sheet_names(): sheet = book.sheet_by_name(sheetname) @@ -313,7 +314,6 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even continue item['date'] = eventdate item['event'] = eventname - item['eventuuid'] = eventuuid item['distance'] = distance item['source'] = src rows.append(item) @@ -357,10 +357,10 @@ def clean_data(input_rows): r['distance'] = length.group(1) ''' Fix sex ''' - if 'sex' in ir and re.search('^\sF', str(ir.get('sex')), flags=re.IGNORECASE) is not None: - r['sex'] = 'F' + if 'sex' in ir and re.search('^\s*F', str(ir.get('sex')), flags=re.IGNORECASE) is not None: + r['sex'] = 'female' else: - r['sex'] = 'M' + r['sex'] = 'male' ''' Fix club ''' if re.search('^\s*(AAC\b|Atlantic\s*Athletic)', str(ir.get('club')), flags=re.IGNORECASE) is not None: @@ -389,14 +389,16 @@ def clean_data(input_rows): ''' Should be a string ''' for key in ( 'event', 'name', 'surname', 'licence', 'club', 'category', 'sex', ): val = ir.get(key) + if isinstance(val, float): + val = int(val) if val is not None: try: - r[key] = str(val) + r[key] = re.sub('(^\s*|\s*$)', '', str(val)) except: pass ''' Leave alone ''' - for key in ( 'event', 'eventuuid', 'source', ): + for key in ( 'event', 'source', ): r[key] = ir.get(key) rows.append(r) diff --git a/templates/head.html b/templates/head.html index 6b77056..27040b3 100644 --- a/templates/head.html +++ b/templates/head.html @@ -11,6 +11,7 @@ {%- set ns.show = getshow -%} {# Reset arguments, so as not to display standard arguments in the query part of the URL #} +{%- set ns.query = request.args | cleandict -%} {%- if ns.start == 0 -%} {%- set ns.start = None -%} {%- endif -%} @@ -20,4 +21,4 @@ {%- endif -%}
-{% include 'tabs.html' with context %} +{% include 'tabs.html' with context -%} diff --git a/templates/index.html b/templates/index.html index f3d9dd6..892af84 100644 --- a/templates/index.html +++ b/templates/index.html @@ -16,10 +16,8 @@