From 6c1e9d82a0c4adf198fcf3dfd95f16017784cc0e Mon Sep 17 00:00:00 2001 From: Timothy Allen Date: Tue, 14 Aug 2018 11:01:26 +0200 Subject: [PATCH] URL argument cleanup; calculate finishers and category/sex positions. --- aacstats.py | 75 +++++++++++++++++++++++------------- load_spreadsheet.py | 24 ++++++------ templates/head.html | 3 +- templates/index.html | 12 +++--- templates/list-licence.html | 4 +- templates/list-race.html | 2 +- templates/list-rankings.html | 2 +- templates/list-runners.html | 2 +- templates/prevnext.html | 10 ++--- templates/search.html | 2 +- 10 files changed, 80 insertions(+), 56 deletions(-) diff --git a/aacstats.py b/aacstats.py index f600324..ee14ff7 100644 --- a/aacstats.py +++ b/aacstats.py @@ -10,25 +10,10 @@ app = Flask(__name__) PAGE_SIZE=20 MIN_MONTHS_FOR_LISTINGS=3 -def now(): - return dt.datetime.now() - -def getstart(): - start = request.args.get('start', '0') - if not isinstance(start, (int, float)): - return 0 - return start - -def getshow(): - show = request.args.get('show', PAGE_SIZE) - if show == 'all': - return -1 - if not isinstance(show, (int, float)): - return PAGE_SIZE - return show - @app.template_filter('urlescape') def urlescape(string): + if string is None: + return '' return urllib.parse.quote_plus(string) @app.template_filter('pace') @@ -40,7 +25,7 @@ def year(time): return time.strftime('%Y') @app.template_filter('cleandate') -def clean_date(time): +def cleandate(time): if time.month == 1 and time.day == 1: return time.strftime('%Y') return time.strftime('%Y-%m-%d') @@ -51,7 +36,40 @@ def ordinal(n): return return "%d%s" % (n,"tsnrhtdd"[(math.floor(n/10)%10!=1)*(n%10<4)*n%10::4]) -def read_db(listing=None, event=None, person=None, licence=None, search=dict(), year=None): +@app.template_filter('cleandict') +def cleandict(dict): + ''' Prevent duplication of existing query strings when calling url_for(..., **request.args) ''' + newdict = {} + for key, value in dict.items(): + if key not in ( 'title', 'year', 'start', 'show' ) and value not in ( None, '', ): + newdict[key] = value + return newdict + +def now(): + return dt.datetime.now() + +def getstart(): + start = request.args.get('start', '0') + if not isinstance(start, (int, float)): + try: + return int(start) + except: + return 0 + return start + +def getshow(): + show = request.args.get('show', PAGE_SIZE) + if show == 'all': + return -1 + if not isinstance(show, (int, float)): + try: + return int(show) + except: + return PAGE_SIZE + return show + + +def read_db(listing=None, event=None, person=None, licence=None, search=dict(), year=None, finishers=False): db = MySQLdb.connect(user='aac', passwd='saOAcCWHg4LaoSSA', db='AAC', use_unicode=True, charset="utf8", cursorclass=MySQLdb.cursors.DictCursor) c = db.cursor() @@ -60,11 +78,11 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), show = getshow() select = '*' - close = '' - where = 'WHERE club LIKE "AAC"' - group = '' - order = 'date DESC, event, position' - limit = 'LIMIT {},{}'.format(start, show) + close = '' + where = 'WHERE club LIKE "AAC"' + group = '' + order = 'date DESC, event, position' + limit = 'LIMIT {},{}'.format(start, show) if show == -1: limit = '' @@ -88,6 +106,10 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), firstdate = firstdate.replace(year=int(year)) lastdate = lastdate.replace(year=int(year)) where += ' AND date > "{}" AND date < "{}"'.format(firstdate, lastdate) + ''' This statement is expensive but doesn't increase the count, so don't change the count statement ''' + if finishers: + select = 'total.finishers, query.* FROM( SELECT *' + close = ') AS query INNER JOIN (SELECT event, date, distance, COUNT(event) as finishers FROM `results` GROUP BY event, distance, date) AS total ON total.event=query.event AND total.date=query.date AND total.distance=query.distance' for column in search.keys(): if isinstance(column, str): @@ -147,6 +169,7 @@ def read_db(listing=None, event=None, person=None, licence=None, search=dict(), queryresults = c.fetchall() select = 'COUNT(*)' + close = '' if listing: if listing == 'race': select = 'COUNT(*) FROM ( SELECT COUNT(event)' @@ -227,7 +250,7 @@ def race(year=None, title=None): def person(title=None, year=None): if title is not None: title = urllib.parse.unquote_plus(title) - results = read_db(person=title, year=year) + results = read_db(person=title, year=year, finishers=True) return render_template('index.html', ltype='person', title=title, results=results, year=year, request=request, getstart=getstart(), getshow=getshow(), now=now(), PAGE_SIZE=PAGE_SIZE) @@ -236,7 +259,7 @@ def person(title=None, year=None): def licence(year=now().year, title=None): if title is not None: title = urllib.parse.unquote_plus(title) - results = read_db(licence=title, year=year) + results = read_db(licence=title, year=year, finishers=True) return render_template('index.html', ltype='licence', title=title, results=results, year=year, request=request, getstart=getstart(), getshow=getshow(), now=now(), PAGE_SIZE=PAGE_SIZE) diff --git a/load_spreadsheet.py b/load_spreadsheet.py index b2cf825..c868c2d 100755 --- a/load_spreadsheet.py +++ b/load_spreadsheet.py @@ -19,7 +19,6 @@ import argparse import datetime as dt import dateutil.parser as dtp import logging -import uuid import os import re import sys @@ -61,7 +60,7 @@ def main(): soup = bs4.BeautifulSoup(page, 'html.parser') for event in soup.find_all('tr'): raceinfo = dict() - link = event.find('a', href=re.compile('.xls$')) + link = event.find('a', href=re.compile('.xlsx?$')) name = event.find('td', class_=re.compile('EventHeadline')) date = event.find('td', class_=re.compile('EventDate')) dist = event.find('td', class_=re.compile('EventDist'), string=re.compile('^\s*[\d+\.]\s*(KM)?\s*$')) @@ -75,6 +74,8 @@ def main(): if dist is not None: raceinfo['distance'] = dist.string spreadsheets.append(raceinfo) + #pp.pprint(spreadsheets) + #sys.exit(1) for race in spreadsheets: url = race['url'] with urllib.request.urlopen(url) as response, tempfile.TemporaryDirectory() as tmpdir: @@ -92,6 +93,8 @@ def main(): raise else: load_into_db(rows) + log.debug("\n") + elif args.input_file: @@ -135,8 +138,8 @@ def load_into_db(rows): `date` datetime DEFAULT NULL, `distance` float(10) DEFAULT NULL, `event` varchar(100) COLLATE utf8_unicode_ci NOT NULL, - `eventuuid` varchar(36) COLLATE utf8_unicode_ci NOT NULL, `position` int(5) NOT NULL, + `finishers` int(5) DEFAULT NULL, `time` time NOT NULL, `name` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, `surname` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL, @@ -194,8 +197,6 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even rows = [] filename = os.path.basename(spreadsheet) if re.search('.xlsx?$', spreadsheet, flags=re.IGNORECASE) is not None: - ''' The eventuuid should be unique for this event, but are not derived from the name, and cannot be used to detect duplicate events ''' - eventuuid = uuid.uuid4() book = xlrd.open_workbook(spreadsheet) for sheetname in book.sheet_names(): sheet = book.sheet_by_name(sheetname) @@ -313,7 +314,6 @@ def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, even continue item['date'] = eventdate item['event'] = eventname - item['eventuuid'] = eventuuid item['distance'] = distance item['source'] = src rows.append(item) @@ -357,10 +357,10 @@ def clean_data(input_rows): r['distance'] = length.group(1) ''' Fix sex ''' - if 'sex' in ir and re.search('^\sF', str(ir.get('sex')), flags=re.IGNORECASE) is not None: - r['sex'] = 'F' + if 'sex' in ir and re.search('^\s*F', str(ir.get('sex')), flags=re.IGNORECASE) is not None: + r['sex'] = 'female' else: - r['sex'] = 'M' + r['sex'] = 'male' ''' Fix club ''' if re.search('^\s*(AAC\b|Atlantic\s*Athletic)', str(ir.get('club')), flags=re.IGNORECASE) is not None: @@ -389,14 +389,16 @@ def clean_data(input_rows): ''' Should be a string ''' for key in ( 'event', 'name', 'surname', 'licence', 'club', 'category', 'sex', ): val = ir.get(key) + if isinstance(val, float): + val = int(val) if val is not None: try: - r[key] = str(val) + r[key] = re.sub('(^\s*|\s*$)', '', str(val)) except: pass ''' Leave alone ''' - for key in ( 'event', 'eventuuid', 'source', ): + for key in ( 'event', 'source', ): r[key] = ir.get(key) rows.append(r) diff --git a/templates/head.html b/templates/head.html index 6b77056..27040b3 100644 --- a/templates/head.html +++ b/templates/head.html @@ -11,6 +11,7 @@ {%- set ns.show = getshow -%} {# Reset arguments, so as not to display standard arguments in the query part of the URL #} +{%- set ns.query = request.args | cleandict -%} {%- if ns.start == 0 -%} {%- set ns.start = None -%} {%- endif -%} @@ -20,4 +21,4 @@ {%- endif -%} -{% include 'tabs.html' with context %} +{% include 'tabs.html' with context -%} diff --git a/templates/index.html b/templates/index.html index f3d9dd6..892af84 100644 --- a/templates/index.html +++ b/templates/index.html @@ -16,10 +16,8 @@ Position - {% if ltype != 'person' %} Name Licence - {% endif %} Time Average Pace {% if ltype != 'event' %} @@ -31,15 +29,15 @@ {%- for row in results['rows'] -%} - {%- set person='{} {}'.format(row.name|e, row.surname|e) -%} + {%- set person = '{} {}'.format(row.name or '', row.surname or '') -%} - Position {{ row.position|e }} - Name {{ person }} - Licence {{ row.licence|e }} + Position {{ row.position|e }}{% if row.finishers %} / {{ row.finishers }}{% endif %} + Name {{ person|trim|e }} + Licence {% if row.licence %}{{ row.licence|trim|e }}{% endif %} Time {{ row.time|e }} Average Pace {% if row.distance is number and row.distance|int != 0 %}{{ (row.time / row.distance) | pace }} min/KM{% endif %} {%- if ltype != 'event' -%} - Race {{ row.event|e }} ({{ row.distance|e }} KM) + Race {{ row.event|trim|e }} ({{ row.distance|trim|e }} KM) {%- endif -%} Date {{ row.date|cleandate|e }} Notes diff --git a/templates/list-licence.html b/templates/list-licence.html index e801325..5dcb011 100644 --- a/templates/list-licence.html +++ b/templates/list-licence.html @@ -19,8 +19,8 @@ {%- for row in results['rows'] -%} - Licence {{ row.licence|e }} - Name {{ row.person|e }} + Licence {{ row.licence|trim|e }} + Name {{ row.person|trim|e }} Year {{ row.date | year }} {%- endfor -%} diff --git a/templates/list-race.html b/templates/list-race.html index e4e549d..e0ae830 100644 --- a/templates/list-race.html +++ b/templates/list-race.html @@ -18,7 +18,7 @@ {%- for row in results['rows'] -%} - Race {{ row.event|e }} + Race {{ row.event|trim|e }} Date {{ row.date | cleandate }} {%- endfor -%} diff --git a/templates/list-rankings.html b/templates/list-rankings.html index 32863a2..5e0e9d5 100644 --- a/templates/list-rankings.html +++ b/templates/list-rankings.html @@ -20,7 +20,7 @@ {%- for row in results['rows'] -%} - Name {{ row.person|e }} + Name {{ row.person|trim|e }} Average Position {{ row.score|e }} Number of Races {{ row.races|e }} diff --git a/templates/list-runners.html b/templates/list-runners.html index 15f609b..f87eb97 100644 --- a/templates/list-runners.html +++ b/templates/list-runners.html @@ -18,7 +18,7 @@ {%- for row in results['rows'] -%} - Name {{ row.person|e }} + Name {{ row.person|trim|e }} Distance {{ row.total|e }} {%- endfor -%} diff --git a/templates/prevnext.html b/templates/prevnext.html index 96e1b19..e902567 100644 --- a/templates/prevnext.html +++ b/templates/prevnext.html @@ -14,16 +14,16 @@ {%- set ns.ellipsis = False -%} {% if thispage > 1 %} - « First + « First {% if (getstart - getshow) > 1 %} - ‹ Prev + ‹ Prev {% endif %} {% endif %} {% for page in range(1, totalpages+1) %} {% if page < 4 or page > (totalpages+1-4) or (page > (thispage-3) and page < (thispage+3)) %} {%- if page != thispage -%} - {{ page }} + {{ page }} {%- else -%} {{ page }} {%- endif %} @@ -38,8 +38,8 @@ {% if thispage < totalpages %} {% if (getstart + getshow) != (totalpages - 1) * getshow %} - Next › + Next › {% endif %} - Last » + Last » {% endif %} diff --git a/templates/search.html b/templates/search.html index 9127edf..d6ef39c 100644 --- a/templates/search.html +++ b/templates/search.html @@ -24,6 +24,6 @@ -
+