Initial commit.
This commit is contained in:
commit
5b7a78223e
127
aacstats.py
Normal file
127
aacstats.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
from flask import Flask, request, render_template, url_for
|
||||||
|
import math
|
||||||
|
import MySQLdb
|
||||||
|
import MySQLdb.cursors
|
||||||
|
import datetime as dt
|
||||||
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
@app.route('/')
|
||||||
|
def index():
|
||||||
|
start = int(request.args.get('start', '0'))
|
||||||
|
limit = int(request.args.get('limit', '10'))
|
||||||
|
results = read_db(start, limit)
|
||||||
|
total = int(total_entries())
|
||||||
|
return render_template('index.html', ltype='index', request=request, start=start, limit=limit, results=results, total=total)
|
||||||
|
|
||||||
|
@app.route('/race/<year>/<title>')
|
||||||
|
def race(start=0, year=None, title=None):
|
||||||
|
start = int(request.args.get('start', '0'))
|
||||||
|
limit = int(request.args.get('limit', '10'))
|
||||||
|
total = int(total_entries(event=title, year=year))
|
||||||
|
results = read_db(start, limit, event=title, year=year)
|
||||||
|
return render_template('index.html', ltype='race', title=title, year=year, request=request, start=start, limit=limit, results=results, total=total)
|
||||||
|
|
||||||
|
@app.route('/person/<title>')
|
||||||
|
@app.route('/person/<title>/<year>')
|
||||||
|
def person(start=0, title=None, year=None):
|
||||||
|
start = int(request.args.get('start', '0'))
|
||||||
|
limit = int(request.args.get('limit', '10'))
|
||||||
|
total = int(total_entries(person=title, year=year))
|
||||||
|
results = read_db(start, limit, person=title, year=year)
|
||||||
|
return render_template('index.html', ltype='person', title=title, year=year, request=request, start=start, limit=limit, results=results, total=total)
|
||||||
|
|
||||||
|
@app.route('/licence/<year>/<title>')
|
||||||
|
def licence(start=0, year=dt.datetime.now().year, title=None):
|
||||||
|
start = int(request.args.get('start', '0'))
|
||||||
|
limit = int(request.args.get('limit', '10'))
|
||||||
|
total = int(total_entries(licence=title, year=year))
|
||||||
|
results = read_db(start, limit, licence=title, year=year)
|
||||||
|
return render_template('index.html', ltype='licence', title=title, year=year, request=request, start=start, limit=limit, results=results, total=total)
|
||||||
|
|
||||||
|
|
||||||
|
def read_db(start=0, limit=10, person=None, licence=None, event=None, year=None):
|
||||||
|
db = MySQLdb.connect(user = 'aac', passwd = 'saOAcCWHg4LaoSSA', db = 'AAC', cursorclass = MySQLdb.cursors.DictCursor)
|
||||||
|
c = db.cursor()
|
||||||
|
where = 'club LIKE "AAC"'
|
||||||
|
if person:
|
||||||
|
where += ' AND CONCAT_WS(" ", name, surname) LIKE "{}"'.format(person)
|
||||||
|
if event:
|
||||||
|
where += ' AND event LIKE "{}"'.format(event)
|
||||||
|
if licence:
|
||||||
|
where += ' AND licence LIKE "{}"'.format(licence)
|
||||||
|
if year:
|
||||||
|
firstdate = dt.datetime.min
|
||||||
|
lastdate = dt.datetime.max
|
||||||
|
firstdate = firstdate.replace(year=int(year))
|
||||||
|
lastdate = lastdate.replace(year=int(year))
|
||||||
|
where += ' AND date > "{}" AND date < "{}"'.format(firstdate, lastdate)
|
||||||
|
sql = 'SELECT * FROM `results` WHERE {} ORDER BY date DESC, event, position LIMIT {},{};'.format(where, start, limit)
|
||||||
|
c.execute(sql)
|
||||||
|
results = c.fetchall()
|
||||||
|
return results
|
||||||
|
|
||||||
|
@app.template_filter('total_entries')
|
||||||
|
def total_entries(person=None, licence=None, event=None, year=None):
|
||||||
|
db = MySQLdb.connect(user = 'aac', passwd = 'saOAcCWHg4LaoSSA', db = 'AAC', cursorclass = MySQLdb.cursors.DictCursor)
|
||||||
|
c = db.cursor()
|
||||||
|
where = 'club LIKE "AAC"'
|
||||||
|
if person:
|
||||||
|
where += ' AND CONCAT_WS(" ", name, surname) LIKE "{}"'.format(person)
|
||||||
|
if event:
|
||||||
|
where += ' AND event LIKE "{}"'.format(event)
|
||||||
|
if licence:
|
||||||
|
where += ' AND licence LIKE "{}"'.format(licence)
|
||||||
|
if year:
|
||||||
|
firstdate = dt.datetime.min
|
||||||
|
lastdate = dt.datetime.max
|
||||||
|
firstdate = firstdate.replace(year=int(year))
|
||||||
|
lastdate = lastdate.replace(year=int(year))
|
||||||
|
where += ' AND date > "{}" AND date < "{}"'.format(firstdate, lastdate)
|
||||||
|
sql = 'SELECT COUNT(*) FROM `results` WHERE {}'.format(where)
|
||||||
|
c.execute(sql)
|
||||||
|
for x in c.fetchone().values():
|
||||||
|
return x
|
||||||
|
|
||||||
|
@app.template_filter('pace')
|
||||||
|
def pace(time):
|
||||||
|
return (dt.datetime(1,1,1) + time).strftime('%M:%S')
|
||||||
|
|
||||||
|
@app.template_filter('clean_date')
|
||||||
|
def clean_date(time):
|
||||||
|
if time.month == 1 and time.day == 1:
|
||||||
|
return time.strftime('%Y')
|
||||||
|
return time.strftime('%Y-%m-%d')
|
||||||
|
|
||||||
|
@app.template_filter('year')
|
||||||
|
def year(time):
|
||||||
|
return time.strftime('%Y')
|
||||||
|
|
||||||
|
@app.template_filter('ordinal')
|
||||||
|
def ordinal(n):
|
||||||
|
return "%d%s" % (n,"tsnrhtdd"[(math.floor(n/10)%10!=1)*(n%10<4)*n%10::4])
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
app.run(debug=True)
|
||||||
|
|
||||||
|
|
||||||
|
# most race KMs in the year, by distance
|
||||||
|
# fastest race pace over the year (time / KMs)
|
||||||
|
# individual KMs, and race results (race, position, time) by name
|
||||||
|
# - and by race number and year
|
||||||
|
# by race
|
||||||
|
# - and by gender
|
||||||
|
|
||||||
|
# default: list of races (sorted by recent)
|
||||||
|
# tabs: list of [races (sorted by recent), people (sorted by total kms for the year), licences for the year (sorted by number of races), podiums/winners (people sorted by total_position/total_races), ]
|
||||||
|
# click to expand by [race (all AAC members by position), person (races by recent), person (races by pace)]
|
||||||
|
# SEARCH
|
||||||
|
|
||||||
|
# /?sort={distance,pace}&sex={m,f}
|
||||||
|
# /race/2018/HEWAT ETC?sort={position,pace}&sex={m,f}
|
||||||
|
# /person/Timothy Allen?sort={pace,date}
|
||||||
|
# /person/Timothy Allen/2018
|
||||||
|
# /license/2018/4356
|
||||||
|
# /license/2018/4356/2018
|
||||||
|
#
|
||||||
|
# TODO LIMIT/pagination
|
3
aacstats.wsgi
Normal file
3
aacstats.wsgi
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
import sys
|
||||||
|
sys.path.insert(0, '/var/www/node/aac')
|
||||||
|
from aacstats import app as application
|
435
load_spreadsheet.py
Executable file
435
load_spreadsheet.py
Executable file
@ -0,0 +1,435 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
''' Utility to load WPA results sheets into a MySQL database.'''
|
||||||
|
__author__ = 'Timothy Allen'
|
||||||
|
__email__ = 'tim@allen.org.za'
|
||||||
|
__license__ = 'MIT'
|
||||||
|
|
||||||
|
import bs4
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
|
import json
|
||||||
|
import mailbox
|
||||||
|
import email
|
||||||
|
import mimetypes
|
||||||
|
import csv
|
||||||
|
import xlrd
|
||||||
|
import MySQLdb
|
||||||
|
import argparse
|
||||||
|
import datetime as dt
|
||||||
|
import dateutil.parser as dtp
|
||||||
|
import logging
|
||||||
|
import uuid
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import tempfile
|
||||||
|
import pprint
|
||||||
|
|
||||||
|
# Set up MySQL database, if not done
|
||||||
|
# Read Excel/ODS/CSV database into MySQL
|
||||||
|
# Check MIME attachments in email for spreadsheet, and load that ***
|
||||||
|
# Then display the data in a (separate) web application
|
||||||
|
|
||||||
|
# The user is in /etc/dovecot/users
|
||||||
|
# Password is zi6ohYae0OeYie8eegei (not that you'll ever need it)
|
||||||
|
MAILDIR = '/var/mail/virtual/aac/Maildir'
|
||||||
|
|
||||||
|
log = logging.getLogger(__file__)
|
||||||
|
pp = pprint.PrettyPrinter(indent=4)
|
||||||
|
|
||||||
|
def main():
|
||||||
|
if sys.version_info < (3, 2):
|
||||||
|
raise Exception(
|
||||||
|
'Unsupported Python version, please use at least Python 3.2')
|
||||||
|
|
||||||
|
args = parse_arguments()
|
||||||
|
rows = []
|
||||||
|
|
||||||
|
if args.scrape_web:
|
||||||
|
spreadsheets = []
|
||||||
|
uniqurl = []
|
||||||
|
wpa = 'http://www.wpa.org.za/Events/DynamicEvents.asmx/BuildEventDisplay'
|
||||||
|
for year in range(2016, dt.datetime.now().year + 1):
|
||||||
|
log.debug("Finding results for %s" % year);
|
||||||
|
args = {"WPAExtra":"True","TimeColumn":"True","entityid":"674417","selectedyear":year,"selectedmonth":0,"commissionid":"0","selectedstate":"0","categoryid":0,"themeid":"46"}
|
||||||
|
data = bytes(json.dumps(args).encode('utf8'))
|
||||||
|
req = urllib.request.Request(wpa, data=data, headers={'content-type': 'application/json'})
|
||||||
|
with urllib.request.urlopen(req) as response:
|
||||||
|
data = json.loads(response.read().decode('utf8'))
|
||||||
|
page, *_ = data.values() # get the first value
|
||||||
|
soup = bs4.BeautifulSoup(page, 'html.parser')
|
||||||
|
for event in soup.find_all('tr'):
|
||||||
|
raceinfo = dict()
|
||||||
|
link = event.find('a', href=re.compile('.xls$'))
|
||||||
|
name = event.find('td', class_=re.compile('EventHeadline'))
|
||||||
|
date = event.find('td', class_=re.compile('EventDate'))
|
||||||
|
dist = event.find('td', class_=re.compile('EventDist'), string=re.compile('^\s*[\d+\.]\s*(KM)?\s*$'))
|
||||||
|
if link is not None and name is not None:
|
||||||
|
if not link['href'] in uniqurl:
|
||||||
|
uniqurl.append(link['href'])
|
||||||
|
raceinfo['url'] = link['href']
|
||||||
|
raceinfo['event'] = name.string
|
||||||
|
raceinfo['date'] = dtp.parse(date.string, dayfirst=True)
|
||||||
|
raceinfo['distance'] = None
|
||||||
|
if dist is not None:
|
||||||
|
raceinfo['distance'] = dist.string
|
||||||
|
spreadsheets.append(raceinfo)
|
||||||
|
for race in spreadsheets:
|
||||||
|
url = race['url']
|
||||||
|
with urllib.request.urlopen(url) as response, tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
log.info("Loading data from URL %s" % race['url'])
|
||||||
|
data = response.read()
|
||||||
|
urlparts = urllib.parse.urlparse(url)
|
||||||
|
filename = os.path.basename(urlparts.path)
|
||||||
|
filepath = os.path.join(tmpdir, filename)
|
||||||
|
with open(filepath, 'wb') as fp:
|
||||||
|
fp.write(data)
|
||||||
|
try:
|
||||||
|
rows = read_spreadsheet(filepath, src=url, eventname=race['event'], eventdate=race['date'], eventdistance=race['distance'])
|
||||||
|
except:
|
||||||
|
log.warning("ERROR: Unable to load data from URL %s" % url)
|
||||||
|
raise
|
||||||
|
else:
|
||||||
|
load_into_db(rows)
|
||||||
|
|
||||||
|
|
||||||
|
elif args.input_file:
|
||||||
|
rows = read_spreadsheet(args.input_file, src=args.input_file)
|
||||||
|
log.info("Loading data from file %s" % args.input_file)
|
||||||
|
load_into_db(rows)
|
||||||
|
|
||||||
|
else:
|
||||||
|
for message in mailbox.Maildir(MAILDIR):
|
||||||
|
counter = 1
|
||||||
|
for part in message.walk():
|
||||||
|
if part.get_content_maintype() == 'multipart':
|
||||||
|
continue
|
||||||
|
filename = part.get_filename()
|
||||||
|
ext = mimetypes.guess_extension(part.get_content_type())
|
||||||
|
if not filename:
|
||||||
|
if not ext:
|
||||||
|
ext = '.xls' # attempt to decode as a spreadsheet
|
||||||
|
filename = 'part-%03d%s' % (counter, ext)
|
||||||
|
counter += 1
|
||||||
|
if re.search('.xl(b|s)x?$', filename, flags=re.IGNORECASE) is not None:
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
filepath = os.path.join(tmpdir, filename)
|
||||||
|
with open(filepath, 'wb') as fp:
|
||||||
|
fp.write(part.get_payload(decode=True))
|
||||||
|
log.info("Loading data from file %s" % filename)
|
||||||
|
try:
|
||||||
|
rows = read_spreadsheet(filepath, src=message['from'])
|
||||||
|
load_into_db(rows)
|
||||||
|
except:
|
||||||
|
log.info("Unable to load data from file %s" % filename)
|
||||||
|
pass
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
|
||||||
|
def load_into_db(rows):
|
||||||
|
'''
|
||||||
|
CREATE TABLE `results` (
|
||||||
|
`result_key` int(11) NOT NULL AUTO_INCREMENT,
|
||||||
|
`date` datetime DEFAULT NULL,
|
||||||
|
`distance` float(10) DEFAULT NULL,
|
||||||
|
`event` varchar(100) COLLATE utf8_unicode_ci NOT NULL,
|
||||||
|
`eventuuid` varchar(36) COLLATE utf8_unicode_ci NOT NULL,
|
||||||
|
`position` int(5) NOT NULL,
|
||||||
|
`time` time NOT NULL,
|
||||||
|
`name` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL,
|
||||||
|
`surname` varchar(75) COLLATE utf8_unicode_ci DEFAULT NULL,
|
||||||
|
`licence` varchar(20) COLLATE utf8_unicode_ci DEFAULT NULL,
|
||||||
|
`club` varchar(40) COLLATE utf8_unicode_ci DEFAULT NULL,
|
||||||
|
`age` int(3) DEFAULT NULL,
|
||||||
|
`sex` varchar(10) COLLATE utf8_unicode_ci DEFAULT NULL,
|
||||||
|
`category` varchar(15) COLLATE utf8_unicode_ci DEFAULT NULL,
|
||||||
|
`sexposition` int(5) DEFAULT NULL,
|
||||||
|
`catposition` int(5) DEFAULT NULL,
|
||||||
|
`source` varchar(200) COLLATE utf8_unicode_ci DEFAULT NULL,
|
||||||
|
PRIMARY KEY (`result_key`)
|
||||||
|
) ENGINE=InnoDB CHARSET=utf8 COLLATE=utf8_unicode_ci;
|
||||||
|
'''
|
||||||
|
if rows is None or len(rows) < 1:
|
||||||
|
log.warning("No data found in spreadsheet")
|
||||||
|
else:
|
||||||
|
db = MySQLdb.connect(user='aac', passwd='saOAcCWHg4LaoSSA', db='AAC', use_unicode=True, charset="utf8")
|
||||||
|
c = db.cursor()
|
||||||
|
|
||||||
|
''' Check for duplicate values by DATE and POSITION and RACE and EVENT '''
|
||||||
|
sql = 'SELECT COUNT(*) FROM `results` WHERE source LIKE %s'
|
||||||
|
c.execute(sql, (rows[0].get('source'),))
|
||||||
|
log.debug(c._last_executed)
|
||||||
|
if (c.fetchone()[0] > 0):
|
||||||
|
log.info("Spreadsheet data already loaded")
|
||||||
|
return
|
||||||
|
|
||||||
|
sql = 'SELECT COUNT(*) FROM `results` WHERE date=%s AND position=%s AND distance LIKE %s AND event LIKE %s'
|
||||||
|
c.execute(sql, (rows[0].get('date'), rows[0].get('position'), rows[0].get('distance'), rows[0].get('event'),))
|
||||||
|
log.debug(c._last_executed)
|
||||||
|
if (c.fetchone()[0] > 0):
|
||||||
|
log.info("Spreadsheet data already loaded")
|
||||||
|
return
|
||||||
|
|
||||||
|
for r in rows:
|
||||||
|
fields = ', '.join(r.keys())
|
||||||
|
values = ', '.join(['%s'] * len(r)) # placeholder values
|
||||||
|
sql = 'INSERT into `results` ( %s ) VALUES ( %s )' % (fields, values)
|
||||||
|
try:
|
||||||
|
c.execute(sql, r.values())
|
||||||
|
except :
|
||||||
|
e = sys.exc_info()[0]
|
||||||
|
log.debug("ERROR: %s" % e)
|
||||||
|
log.debug("Last query was: %s" % c._last_executed)
|
||||||
|
raise
|
||||||
|
#pass
|
||||||
|
|
||||||
|
db.commit()
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def read_spreadsheet(spreadsheet, src=None, eventname=None, eventdate=None, eventdistance=None):
|
||||||
|
rows = []
|
||||||
|
filename = os.path.basename(spreadsheet)
|
||||||
|
if re.search('.xlsx?$', spreadsheet, flags=re.IGNORECASE) is not None:
|
||||||
|
''' The eventuuid should be unique for this event, but are not derived from the name, and cannot be used to detect duplicate events '''
|
||||||
|
eventuuid = uuid.uuid4()
|
||||||
|
book = xlrd.open_workbook(spreadsheet)
|
||||||
|
for sheetname in book.sheet_names():
|
||||||
|
sheet = book.sheet_by_name(sheetname)
|
||||||
|
log.debug("Processing sheet %s" % sheetname)
|
||||||
|
|
||||||
|
''' Look for the header in the first 15 rows, searching from the top '''
|
||||||
|
fields = []
|
||||||
|
for row in range(0, 15):
|
||||||
|
try:
|
||||||
|
if re.search('((pos\w*|no\w*|num\w*|surname|name|time|club)\s*){2,}', ' '.join(str(x) for x in (sheet.row_values(row))), flags=re.IGNORECASE) is not None:
|
||||||
|
fields = sheet.row_values(row)
|
||||||
|
log.debug("Spreadsheet fields: %s" % ', '.join(str(x) for x in fields))
|
||||||
|
break
|
||||||
|
except:
|
||||||
|
''' Probably a blank sheet, let's skip '''
|
||||||
|
continue
|
||||||
|
''' Translate field names, and delete unwanted fields '''
|
||||||
|
position_idx = None
|
||||||
|
time_idx = None
|
||||||
|
for i in range(len(fields)):
|
||||||
|
if re.search('^\s*pos', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'position'
|
||||||
|
''' Store the index of this field for later processing '''
|
||||||
|
position_idx = i
|
||||||
|
elif re.search('^\s*(time|h:?m:?s?)', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'time'
|
||||||
|
''' Store the index of this field for later processing '''
|
||||||
|
time_idx = i
|
||||||
|
elif re.search('^\s*cat\S*\s*pos(\.|\w+)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'catposition'
|
||||||
|
elif re.search('^\s*(sex|gender)\s*pos(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'sexposition'
|
||||||
|
elif re.search('^\s*(sur|last\s*)name', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'surname'
|
||||||
|
elif re.search('^\s*name', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'name'
|
||||||
|
elif re.search('^\s*club(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'club'
|
||||||
|
elif re.search('^\s*age(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'age'
|
||||||
|
elif re.search('^\s*(sex|gender|m.?f\b|male|female)(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'sex'
|
||||||
|
elif re.search('^\s*cat(\.|\w*)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'category'
|
||||||
|
elif re.search('^\s*(lic|no|num)(\.|\S*)\s*\S*\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'licence'
|
||||||
|
elif re.search('^\s*(race)?date', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'date'
|
||||||
|
elif re.search('^\s*(race)?dist(ance)?\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'distance'
|
||||||
|
elif re.search('^\s*(race)?(event|name)\s*$', str(fields[i]), flags=re.IGNORECASE) is not None:
|
||||||
|
fields[i] = 'event'
|
||||||
|
pass
|
||||||
|
|
||||||
|
''' If there isn't a position field or a time field, we don't want this sheet '''
|
||||||
|
if position_idx is None or time_idx is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
''' Look for the date in the file name, and then look the first 15 rows and override it '''
|
||||||
|
if eventdate is None:
|
||||||
|
filedate = re.search('(20\d{2})', str(filename), flags=re.IGNORECASE)
|
||||||
|
if filedate is not None:
|
||||||
|
eventdate = filedate.group(1)
|
||||||
|
for row in range(0, 15):
|
||||||
|
if re.search('(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s+\d{4}', str(sheet.cell(row, 0).value), flags=re.IGNORECASE) is not None:
|
||||||
|
eventdate = sheet.cell(row,0).value
|
||||||
|
break
|
||||||
|
log.info("Race date: %s" % eventdate.strftime('%Y-%m-%d'))
|
||||||
|
|
||||||
|
''' Look for the race distance in the sheet name, or in the filename '''
|
||||||
|
distance = eventdistance
|
||||||
|
filedistance = re.search('([\d\.]+)\s*KM', filename, flags=re.IGNORECASE)
|
||||||
|
if filedistance is not None:
|
||||||
|
distance = filedistance.group(1)
|
||||||
|
eventnamedistance = re.search('([\d\.]+)\s*KM', eventname, flags=re.IGNORECASE)
|
||||||
|
if eventnamedistance is not None:
|
||||||
|
distance = eventnamedistance.group(1)
|
||||||
|
sheetdistance = re.search('([\d\.]+\s*KM)', sheetname, flags=re.IGNORECASE)
|
||||||
|
if sheetdistance is not None:
|
||||||
|
distance = sheetdistance.group(1)
|
||||||
|
sheetdistance = re.search('(helper|marshal)', sheetname, flags=re.IGNORECASE)
|
||||||
|
if sheetdistance is not None:
|
||||||
|
distance = sheetname
|
||||||
|
log.info("Race distance: %s" % distance)
|
||||||
|
|
||||||
|
if eventname is None:
|
||||||
|
''' Use the filename as the event name :-( '''
|
||||||
|
eventname, *_ = os.path.splitext(filename)
|
||||||
|
eventname = str(eventname)
|
||||||
|
''' Clean up common patterns '''
|
||||||
|
eventname = re.sub('[\-_]', ' ', eventname, flags=re.IGNORECASE)
|
||||||
|
eventname = re.sub('results?(\s*book)?', ' ', eventname, flags=re.IGNORECASE)
|
||||||
|
eventname = re.sub('export', ' ', eventname, flags=re.IGNORECASE)
|
||||||
|
eventname = re.sub('excel', ' ', eventname, flags=re.IGNORECASE)
|
||||||
|
eventname = re.sub('\(\d\)', ' ', eventname, flags=re.IGNORECASE)
|
||||||
|
eventname = re.sub('\d{0,4}20\d{2}\d{0,4}', ' ', eventname, flags=re.IGNORECASE)
|
||||||
|
eventname = re.sub('\s\s+', ' ', eventname, flags=re.IGNORECASE)
|
||||||
|
eventname = re.sub('(^\s*|\s*$)', '', eventname, flags=re.IGNORECASE)
|
||||||
|
log.info("Event name: %s" % eventname)
|
||||||
|
|
||||||
|
for row in range(sheet.nrows):
|
||||||
|
''' TODO: don't assume that the position is the first cell '''
|
||||||
|
if re.search('(^\s*$|[A-Za-z])', str(sheet.cell(row, position_idx).value), flags=re.IGNORECASE) is None:
|
||||||
|
item = dict()
|
||||||
|
data = []
|
||||||
|
for col in range(sheet.ncols):
|
||||||
|
data.append(sheet.cell(row, col).value)
|
||||||
|
item = dict(zip(fields, data))
|
||||||
|
''' If the time has been modified by Excel, unmodify it '''
|
||||||
|
if 'time' in item and isinstance(item['time'], float):
|
||||||
|
try:
|
||||||
|
item['time'] = xlrd.xldate_as_tuple(sheet.cell(row, time_idx).value, book.datemode)
|
||||||
|
except:
|
||||||
|
''' Skip this row if the date can't be parsed, as it's probably wrong anyway (41 hours or something silly) '''
|
||||||
|
continue
|
||||||
|
item['date'] = eventdate
|
||||||
|
item['event'] = eventname
|
||||||
|
item['eventuuid'] = eventuuid
|
||||||
|
item['distance'] = distance
|
||||||
|
item['source'] = src
|
||||||
|
rows.append(item)
|
||||||
|
|
||||||
|
rows = clean_data(rows)
|
||||||
|
if len(rows) > 0:
|
||||||
|
log.debug("Sample output: %s" % pp.pformat(rows[0]))
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def clean_data(input_rows):
|
||||||
|
rows = []
|
||||||
|
for ir in input_rows:
|
||||||
|
r = dict()
|
||||||
|
''' Fix date '''
|
||||||
|
date = ir.get('date')
|
||||||
|
if isinstance(date, str):
|
||||||
|
today = dt.datetime.now()
|
||||||
|
year = dt.datetime.combine(dt.date(year=today.year, month=1, day=1), dt.time(hour=0, minute=0, second=0))
|
||||||
|
date = dtp.parse(date, default=year)
|
||||||
|
r['date'] = date
|
||||||
|
|
||||||
|
''' Check time '''
|
||||||
|
time = ir['time']
|
||||||
|
''' Deal with various formats that xlrd might give us. Note that floats should already be converted to tuples '''
|
||||||
|
if isinstance(time, tuple):
|
||||||
|
time = dt.datetime.combine(dt.date(year=1900, month=1, day=1), dt.time(hour=time[3], minute=time[4], second=time[5]))
|
||||||
|
elif isinstance(time, str):
|
||||||
|
try:
|
||||||
|
time = dt.datetime.strptime(time, '%H:%M:%S')
|
||||||
|
except:
|
||||||
|
try:
|
||||||
|
time = dt.datetime.strptime(time, '%M:%S')
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
r['time'] = time.time()
|
||||||
|
|
||||||
|
''' Fix distance '''
|
||||||
|
length = re.search('([\d\.]+)\s*km', str(ir.get('distance')), flags=re.IGNORECASE)
|
||||||
|
if length is not None:
|
||||||
|
r['distance'] = length.group(1)
|
||||||
|
|
||||||
|
''' Fix sex '''
|
||||||
|
if 'sex' in ir and re.search('^\sF', str(ir.get('sex')), flags=re.IGNORECASE) is not None:
|
||||||
|
r['sex'] = 'F'
|
||||||
|
else:
|
||||||
|
r['sex'] = 'M'
|
||||||
|
|
||||||
|
''' Fix club '''
|
||||||
|
if re.search('^\s*(AAC\b|Atlantic\s*Athletic)', str(ir.get('club')), flags=re.IGNORECASE) is not None:
|
||||||
|
r['club'] = 'AAC'
|
||||||
|
|
||||||
|
''' Should be an int '''
|
||||||
|
for key in ( 'position', 'sexposition', 'catposition', 'age', ):
|
||||||
|
val = ir.get(key)
|
||||||
|
if val is not None:
|
||||||
|
try:
|
||||||
|
r[key] = int(val)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
''' Should be a float '''
|
||||||
|
for key in ( 'distance', ):
|
||||||
|
val = ir.get(key)
|
||||||
|
if val is not None:
|
||||||
|
try:
|
||||||
|
r[key] = float(val)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
r[key] = 0
|
||||||
|
|
||||||
|
''' Should be a string '''
|
||||||
|
for key in ( 'event', 'name', 'surname', 'licence', 'club', 'category', 'sex', ):
|
||||||
|
val = ir.get(key)
|
||||||
|
if val is not None:
|
||||||
|
try:
|
||||||
|
r[key] = str(val)
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
''' Leave alone '''
|
||||||
|
for key in ( 'event', 'eventuuid', 'source', ):
|
||||||
|
r[key] = ir.get(key)
|
||||||
|
|
||||||
|
rows.append(r)
|
||||||
|
return rows
|
||||||
|
|
||||||
|
|
||||||
|
def parse_arguments():
|
||||||
|
parser = argparse.ArgumentParser(description='Load a spreadsheet containing WPA results into a database')
|
||||||
|
parser.add_argument(
|
||||||
|
'--web', '-w', action='store_true', required=False, dest='scrape_web',
|
||||||
|
help='Scrape WPA website')
|
||||||
|
parser.add_argument(
|
||||||
|
'--input', '-i', action='store', required=False, type=str, dest='input_file',
|
||||||
|
help='Manually select the spreadsheet to be imported')
|
||||||
|
parser.add_argument(
|
||||||
|
'--verbose', '-v', action='count', required=False, dest='verbose',
|
||||||
|
help='Print more information')
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.input_file:
|
||||||
|
if not os.path.exists(args.input_file) or not os.access(args.input_file, os.R_OK):
|
||||||
|
raise
|
||||||
|
|
||||||
|
logging.basicConfig()
|
||||||
|
if args.verbose is not None and args.verbose == 1:
|
||||||
|
log.setLevel(logging.INFO)
|
||||||
|
elif args.verbose is not None and args.verbose >= 2:
|
||||||
|
log.setLevel(logging.DEBUG)
|
||||||
|
else:
|
||||||
|
log.setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
return args
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
|
|
||||||
|
# vim: set expandtab shiftwidth=2 softtabstop=2 tw=0 :
|
32
static/style.css
Normal file
32
static/style.css
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
body {
|
||||||
|
margin: 0 auto;
|
||||||
|
max-width: 800px;
|
||||||
|
font-family: 'Roboto Condensed', sans-serif;
|
||||||
|
font-size: 11pt;
|
||||||
|
}
|
||||||
|
table {
|
||||||
|
border-collapse: collapse;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
table th {
|
||||||
|
text-align: left;
|
||||||
|
font-size: 10pt;
|
||||||
|
}
|
||||||
|
table tr td {
|
||||||
|
padding-right: 10px;
|
||||||
|
border-bottom: 1px solid grey;
|
||||||
|
}
|
||||||
|
table tr td:last-child {
|
||||||
|
padding: 0;
|
||||||
|
}
|
||||||
|
table tr td.nowrap {
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
nav.nextprev {
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 10px;
|
||||||
|
}
|
||||||
|
nav.nextprev span.prev {
|
||||||
|
}
|
||||||
|
nav.nextprev span.next {
|
||||||
|
}
|
122
templates/index.html
Normal file
122
templates/index.html
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en-ZA">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<meta name="viewport" content="width=device-width">
|
||||||
|
<title>AAC Statistics</title>
|
||||||
|
<link rel="stylesheet" type="text/css" href="{{ url_for('static', filename='style.css') }}" />
|
||||||
|
<link href="https://fonts.googleapis.com/css?family=Roboto+Condensed" rel="stylesheet">
|
||||||
|
</head>
|
||||||
|
{%- set ns = namespace() -%}
|
||||||
|
|
||||||
|
{%- set ns.limit = limit -%}
|
||||||
|
{%- if ns.limit == 0 or ns.limit == 10 -%}
|
||||||
|
{%- set ns.limit = None -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- set ns.year = year -%}
|
||||||
|
{%- if ns.year == 0 -%}
|
||||||
|
{%- set ns.year = None -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<nav></nav>
|
||||||
|
<article>
|
||||||
|
<h1>AAC Statistics {% if title %}: {{ title }}{% endif %}</h1>
|
||||||
|
{%- if results -%}
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>Position</th>
|
||||||
|
{% if ltype != 'person' %}
|
||||||
|
<th>Name</th>
|
||||||
|
<th>Licence</th>
|
||||||
|
{% endif %}
|
||||||
|
<th>Time</th>
|
||||||
|
<th>Average Pace</th>
|
||||||
|
{% if ltype != 'event' %}
|
||||||
|
<th>Event</th>
|
||||||
|
{% endif %}
|
||||||
|
<th>Date</th>
|
||||||
|
<th>Notes</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{%- for row in results -%}
|
||||||
|
{%- set person='{} {}'.format(row.name, row.surname) -%}
|
||||||
|
{%- if distance %}{# set total_km += row.distance #}{% endif -%}
|
||||||
|
<tr>
|
||||||
|
<td>{{ row.position }}</td>
|
||||||
|
{%- if ltype != 'person' -%}
|
||||||
|
<td><a href="{{ url_for('person', title=person, start=None) }}">{{ person }}</a></td>
|
||||||
|
<td><a href="{{ url_for('licence', title=row.licence, year=row.date|year, start=None, limit=ns.limit) }}">{{ row.licence }}</a></td>
|
||||||
|
{%- endif -%}
|
||||||
|
<td>{{ row.time }}</td>
|
||||||
|
<td class="nowrap">{% if row.distance is number %}{{ (row.time / row.distance) | pace }} min/KM{% endif %}</td>
|
||||||
|
{%- if ltype != 'event' -%}
|
||||||
|
<td><a href="{{ url_for('race', title=row.event, year=row.date|year, start=None, limit=ns.limit) }}">{{ row.event }} ({{ row.distance }} KM)</a></td>
|
||||||
|
{%- endif -%}
|
||||||
|
<td>{{ row.date | clean_date }}</td>
|
||||||
|
<td>
|
||||||
|
{% if row.sex and row.sexposition and row.sexposition <= 100 %}{{ row.sexposition | ordinal }} {{ row.sex.lower() }}{% endif %}
|
||||||
|
{% if row.sexposition and row.catposition %}/{% endif %}
|
||||||
|
{% if row.catposition and row.catposition <= 100 %}{{ row.catposition | ordinal }} in category{% endif %}
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
{%- endfor -%}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
{%- endif -%}
|
||||||
|
<nav class="nextprev">
|
||||||
|
{%- set ns.start = start -%}
|
||||||
|
{%- set ns.prev = start-limit -%}
|
||||||
|
{%- set ns.next = start+limit -%}
|
||||||
|
{%- if ns.prev == 0-%}
|
||||||
|
{%- set ns.prev = None -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if ns.next == 0-%}
|
||||||
|
{%- set ns.next = None -%}
|
||||||
|
{%- endif -%}
|
||||||
|
{%- if ns.start == 0-%}
|
||||||
|
{%- set ns.start = None -%}
|
||||||
|
{%- endif -%}
|
||||||
|
|
||||||
|
{%- set thispage = (start / limit)|round(0,'floor')|int + 1 -%}
|
||||||
|
{%- set totalpages = (total / limit)|round(0,'ceil')|int -%}
|
||||||
|
{%- set ns.ellipsis = False -%}
|
||||||
|
|
||||||
|
{% if thispage > 1 %}
|
||||||
|
<span class="first"><a href="{{ url_for(request.endpoint, title=title, year=ns.year, start=None, limit=ns.limit) }}">«First</a></span>
|
||||||
|
{% if (start-limit) > 1 %}
|
||||||
|
<span class="prev"><a href="{{ url_for(request.endpoint, title=title, year=ns.year, start=ns.prev, limit=ns.limit) }}"><Prev</a></span>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
<span class="navlink">
|
||||||
|
{% for page in range(1, totalpages+1) %}
|
||||||
|
{% if page < 4 or page > (totalpages+1-4) or (page > (thispage-3) and page < (thispage+3)) %}
|
||||||
|
{%- if page != thispage -%}
|
||||||
|
<a href="{{ url_for(request.endpoint, title=title, year=ns.year, start=(page-1)*limit, limit=ns.limit) }}">{{ page }}</a>
|
||||||
|
{%- else -%}
|
||||||
|
<strong>{{ page }}</strong>
|
||||||
|
{%- endif %}
|
||||||
|
{%- set ns.ellipsis = False -%}
|
||||||
|
{%- else -%}
|
||||||
|
{%- if not ns.ellipsis -%}
|
||||||
|
…
|
||||||
|
{%- endif -%}
|
||||||
|
{%- set ns.ellipsis = True -%}
|
||||||
|
{% endif %}
|
||||||
|
{% endfor %}
|
||||||
|
</span>
|
||||||
|
|
||||||
|
{% if thispage < totalpages %}
|
||||||
|
{% if (start+limit) != (totalpages-1)*limit %}
|
||||||
|
<span class="next"><a href="{{ url_for(request.endpoint, title=title, year=ns.year, start=ns.next, limit=ns.limit) }}">Next></a></span>
|
||||||
|
{% endif %}
|
||||||
|
<span class="last"><a href="{{ url_for(request.endpoint, title=title, year=ns.year, start=(totalpages-1)*limit, limit=ns.limit) }}">Last»</a></span>
|
||||||
|
{% endif %}
|
||||||
|
</nav>
|
||||||
|
</article>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Reference in New Issue
Block a user