utility-scripts/glucometer_graphs.pl
2017-12-18 20:15:15 +02:00

448 lines
12 KiB
Perl

#!/usr/bin/perl
#
# Perl script to convert CSV output from glucometer into graphs, using gnuplot.
#
# Author: Timothy Allen <tim@treehouse.org.za>
# License: MIT
#
# TODO Convert to python (see # https://www.physicsforums.com/threads/gnuplot-how-to-find-the-area-under-a-curve-integrate.382070/
# )
use strict;
use warnings;
use Getopt::Long;
use IPC::Open3;
use Time::Piece;
use Data::Dumper;
my $error = "Usage: $0 --input <CSV file> --output <output PDF> [--max <n.n>] [--low <n.n>] [--high <n.n>]\n";
my @lines;
my @sorted_lines;
my @data;
my @avg_data;
my %intervals;
my %seen;
my $a1c_calc;
my $page_size;
my $gnuplot_data;
my $total_graphs;
my $count_graphs = 0;
my $page_number = 0;
my $interval = 15; # The number of minutes to average points for the area range graph
my $input = '';
my $output = '';
# set these values either in mmol/L or mg/dL (don't mix them)
my $max_glucose = 8;
my $min_glucose = 4;
my $graph_max = 21;
my $units = '';
my $page = 'a4';
my $days_per_page = 2;
GetOptions ("input=s" => \$input, # The name of the CSV file from which to read values
"output=s" => \$output, # The name of the PDF file to output
"high:f" => \$max_glucose, # The high end of your target blood glucose level
"low:f" => \$min_glucose, # The low end of your target blood glucose level
"max:i" => \$graph_max, # The highest displayed glucose level on each graph
"units:s" => \$units, # mmol/L or mg/dL
"pagesize:s" => \$page, # size of page to print
"graphs:i" => \$days_per_page) # The number of days printed on each page
or die $error;
open( my $ifh, '<:encoding(UTF-8)', $input )
or die "Could not open file '$input' $!";
while ( my $row = <$ifh> ) {
chomp( $row );
push @lines, $row;
}
close( $ifh )
or warn "close failed: $!";
if ( $page =~ /a4/i ) {
$page_size = "29.7cm,21.0cm";
} elsif ( $page =~ /letter/i ) {
$page_size = "11in,8.5in";
} elsif ( $page =~ /\d+(cm|in),\d+/ ) {
$page_size = $page;
} else {
# A4 size default
$page_size = "29.7cm,21.0cm";
}
# Set up basic gnuplot options for reading the CSV data
push @data, qq(
set terminal pdf size $page_size enhanced font 'Calibri,14' linewidth 1
#set output '$output'
);
# Get the list of days for which to produce graphs
foreach my $row ( @lines ) {
if ( $row =~ m#^"(\d{4}-\d{2}-\d{2})#ms ) {
my $day = $1;
$seen{$day}++;
}
}
$total_graphs = scalar keys %seen;
# Read each line into a $Data variable for use by gnuplot
# Then sample into a smoothed plot for each day, and store each smoothed line in a new $SmoothData$date variable
foreach my $d ( sort keys %seen ) {
my $label = "$1$2$3" if ( $d =~ m#(\d{4})-(\d{2})-(\d{2})# );
push @data, qq(
\$Data$label << EOD
"timestamp","blood glucose","meal","method","comment");
foreach my $row (@lines) {
if ( $row =~ s#^"($d )#"$1#ms ) {
push @data, $row;
}
}
push @data, qq(EOD);
push @data, qq(
set datafile separator ","
set timefmt "%Y-%m-%d %H:%M:%S"
set format x "%s" timedate
set format y "%.2f" numeric
set samples 10000
set xdata
stats \$Data$label using 2
Mean$label = STATS_mean
set xdata time
set table \$SmoothData$label
#plot \$Data$label using "timestamp":"blood glucose"
#plot \$Data$label using "timestamp":"blood glucose" smooth frequency
plot \$Data$label using "timestamp":"blood glucose" smooth mcsplines
#plot \$Data$label using "timestamp":"blood glucose" smooth bezier
unset table
undefine \$Data$label
);
}
# Set up output options for gnuplot.
# We don't bother to do this at the start, since the CSV needs a comma separator
# and the new $SmoothData, which contains a table, needs a whitespace separator
push @data, qq(
# change separator from CSV to table
reset
set datafile separator whitespace
set key off
set style data lines
set xdata time
set timefmt "%H:%M:%S"
set format x "%H:%M" timedate
set format y "%.0f" numeric
set yrange [0:$graph_max]
# If extended to 23:59, the x grid overlaps with the border
set xrange ["00:00":"23:58"]
set style line 100 dt 3 lw 1 lc rgb "#202020"
set style line 101 dt 1 lw 1 lc rgb "#202020"
set linetype 110 lc rgb "red"
set lmargin 12
set rmargin 10
set tmargin 5
set bmargin 5
set multiplot title layout $days_per_page,1
);
# For each day, generate a graph with some fancy options
foreach my $d ( sort keys %seen ) {
my $label = "$1$2$3" if ( $d =~ m#(\d{4})-(\d{2})-(\d{2})# );
my $time = Time::Piece->strptime ( $d, "%Y-%m-%d" );
#my $title = $time->strftime("%a %d %b %Y");
my $title = $time->strftime("%A, %d %B %Y");
$count_graphs++;
push @data, qq(
set title "Daily Glucose Summary for $title" font "Calibri,18"
set xlabel "Time" offset 0,-0.25
set ylabel "Blood glucose"
set xtics left tc rgb "#000000"
set ytics 2 tc rgb "#000000"
set grid ytics ls 100 front
#set arrow from graph 0,first $min_glucose to graph 1,first $min_glucose ls 6 lw 2 nohead
#set arrow from graph 0,first $max_glucose to graph 1,first $max_glucose ls 6 lw 2 nohead
set object 1 rect from graph 0, first $min_glucose to graph 1,first $max_glucose fc ls 6 fs solid 0.2 back
AVG = Mean$label
AVG_LABEL = gprintf("Average glucose: %.2f", AVG)
set object 2 rect at graph 0.9, graph 0.9 fc ls 2 fs transparent solid 0.5 front size char strlen(AVG_LABEL), char 3
set label 2 AVG_LABEL at graph 0.9, graph 0.9 front center
#plot \$SmoothData$label using 1:2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : $count_graphs ) with linespoints ls 120 lc variable
plot \$SmoothData$label using (strftime("%H:%M:%S", \$1)):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable
undefine \$SmoothData$label
# Add an x grid
set multiplot previous
set title " "
set xlabel " " offset 0,-0.25
set ylabel " "
set xtics tc rgb "#ffffff00"
set ytics tc rgb "#ffffff00"
unset grid
unset object 1
set grid xtics ls 101
plot 1/0
);
if ( $count_graphs % $days_per_page == 0 && $count_graphs < $total_graphs ) {
push @data, qq(unset multiplot);
push @data, qq(set multiplot layout $days_per_page,1);
$page_number++;
}
}
# Output data averages by hour of the day
@sorted_lines = ();
push @data, qq(
\$DataAvg << EOD
"timestamp","blood glucose","meal","method","comment");
foreach my $row (@lines) {
if ( $row =~ s#^"\d{4}-\d{2}-\d{2} #"#ms ) {
push @sorted_lines, $row;
}
}
push @data, sort @sorted_lines;
push @data, qq(EOD);
# Output min/max for each time interval
foreach my $row ( @sorted_lines ) {
$row =~ s/"//g;
my ( $time, $value ) = split /,/, $row;
my ( $hour, $minute, $second ) = split /:/, $time;
$time = sprintf( "%02d:%02d:00", $hour, int($minute/$interval)*$interval );
# Override the current minimum values for this interval if it
# exists; otherwise, set it
if ( exists ( $intervals{$time}{min} ) ) {
if ( $intervals{$time}{min} < $value ) {
$intervals{$time}{min} = $value;
}
} else {
$intervals{$time}{min} = $value;
}
# Override the current maximum values for this interval if it
# exists; otherwise, set it
if ( exists ( $intervals{$time}{max} ) ) {
if ( $intervals{$time}{max} > $value ) {
$intervals{$time}{max} = $value;
}
} else {
$intervals{$time}{max} = $value;
}
}
$Data::Dumper::Sortkeys = 1;
#die Dumper(\%intervals);
push @data, qq(
\$DataMaxMin << EOD
"timestamp","max","min");
foreach my $time ( sort keys %intervals ) {
warn $time;
push @data, qq("$time","$intervals{$time}{max}","$intervals{$time}{min}");
}
push @data, qq(EOD);
# Standardise units for gnuplot's A1C calculations
if ( $units =~ /mg/i ) {
$units = 'mg/dL';
} elsif ( $units =~ /mmol/i ) {
$units = 'mmol/L';
} else {
$units = '';
}
push @data, qq(
reset
set datafile separator ","
set timefmt "%H:%M:%S"
set format x "%s" timedate
set format y "%.2f" numeric
set samples 10000
set xdata
stats \$DataAvg using 2
MeanTotal = STATS_mean
set xdata time
set table \$DataAvgTable
#avg(x) = g
#min(x) = x<g
#max(x) = x>g
#f(x) = g
#fit f(x) \$DataAvg using 1:(\$2>MeanTotal?\$2:'') via t, g
#plot f(x) smooth mcsplines
#plot \$DataAvg using 1:(\$2>MeanTotal?\$2:'') every $count_graphs/2 lc 2, \$DataAvg using 1:(\$2<MeanTotal?\$2:'') every $count_graphs/2 lc 1
#samples(x) = \$0 > 4 ? 5 : (\$0+1)
#avg5(x) = (shift5(x), (back1+back2+back3+back4+back5)/samples(\$0))
#shift5(x) = (back5 = back4, back4 = back3, back3 = back2, back2 = back1, back1 = x)
## Initialize a running sum
#init(x) = (back1 = back2 = back3 = back4 = back5 = sum = 0)
#plot sum = init(0), \$DataAvg using 1:2 title 'data' lw 2 lc rgb 'forest-green', '' using 1:(avg5(\$2)) pt 7 ps 0.5 lw 1 lc rgb "blue"
plot \$DataAvg using 1:2 smooth mcsplines
unset table
set table \$SmoothDataAvg
plot \$DataAvg using 1:2 smooth bezier
unset table
undefine \$DataAvg
# Convert DataMaxMin from CSV to table
set table \$DataMaxMinTable
plot \$DataMaxMin using 1:2:3 with table
unset table
reset
set datafile separator whitespace
set key off
set style data lines
set xdata time
set timefmt "%H:%M:%S"
set format x "%H:%M" timedate
set format y "%.0f" numeric
set yrange [0:$graph_max]
# If extended to 23:59, the x grid overlaps with the border
set xrange ["00:00":"23:58"]
set style line 100 dt 3 lw 1 lc rgb "#202020"
set style line 101 dt 1 lw 1 lc rgb "#202020"
set linetype 110 lc rgb "red"
set linetype 111 lc rgb "#B0B0B0"
set style fill transparent solid 0.5 noborder
set lmargin 12
set rmargin 10
set tmargin 5
set bmargin 5
set multiplot title layout $days_per_page,1
set title "Average Daily Glucose" font "Calibri,18"
set xlabel "Time" offset 0,-0.25
set ylabel "Blood glucose"
set xtics left tc rgb "#000000"
set ytics 2 tc rgb "#000000"
set grid ytics ls 100 front
set object 1 rect from graph 0, first $min_glucose to graph 1,first $max_glucose fc ls 6 fs solid 0.05 back
AVG = MeanTotal
AVG_LABEL = gprintf("Average glucose: %.2f", AVG)
set object 2 rect at graph 0.9, graph 0.9 fc ls 2 fs transparent solid 0.5 front size char strlen(AVG_LABEL), char 3
set label 2 AVG_LABEL at graph 0.9, graph 0.9 front center
A1C = 0
if (A1C == 0 && '$units' eq 'mg/dL') {
A1C = (MeanTotal + 46.7) / 28.7
}
if (A1C == 0 && '$units' eq 'mmol/L') {
A1C = (MeanTotal + 2.59) / 1.59
}
# mg/dL numbers tend to be higher than 35
if (A1C == 0 && MeanTotal >= 35) {
A1C = (MeanTotal + 46.7) / 28.7
}
# mmol/L numbers tend to be lower than 35
if (A1C == 0 && MeanTotal < 35) {
A1C = (MeanTotal + 2.59) / 1.59
}
A1C_LABEL = gprintf("Average A1c: %.1f", A1C)
set object 3 rect at graph 0.07, graph 0.9 fc ls 4 fs transparent solid 0.5 front size char strlen(A1C_LABEL), char 3
set label 3 A1C_LABEL at graph 0.07, graph 0.9 front center
#plot \$SmoothDataAvg using ( strftime("%H:%M:%S", \$1) ):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable
#plot \$DataAvgTable using (strftime("%H:%M:%S", \$1)):2 with points lc 5 ps 0.5 pt 37, \$SmoothDataAvg using (strftime("%H:%M:%S", \$1)):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable
plot \$DataMaxMinTable using (strftime("%H:%M:%S", \$1)):2:3 with filledcurves lc 111, \$SmoothDataAvg using (strftime("%H:%M:%S", \$1)):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable
undefine \$DataAvg
undefine \$DataMaxMin
undefine \$DataMaxMinTable
undefine \$SmoothDataAvg
undefine \$DataAvgTable
# Add an x grid
set multiplot previous
set title " "
set xlabel " " offset 0,-0.25
set ylabel " "
set xtics tc rgb "#ffffff00"
set ytics tc rgb "#ffffff00"
unset grid
unset object 1
set grid xtics ls 101
plot 1/0
);
push @data, qq(
unset multiplot
test
);
# run the data through gnuplot
$gnuplot_data = join "\n", @data;
print $gnuplot_data;
open( my $ofh, '>', $output )
or die "Could not open file '$output' $!";
my ( $pid, $stdin, $stdout, $stderr );
use Symbol 'gensym';
$stderr = gensym;
$pid = open3( $stdin, $stdout, $stderr, 'gnuplot' );
print $stdin $gnuplot_data;
close( $stdin );
while ( <$stdout> ) {
print $ofh "$_";
}
while ( <$stderr> ) {
warn $_;
}
close($stdout);
close($stderr);
waitpid( $pid, 0 );
my $child_exit_status = $? >> 8;
close( $ofh )
or warn "close failed: $!";
#open(GNUPLOT, "|gnuplot");
#print GNUPLOT $gnuplot_data;
#close(GNUPLOT);
# vim : set expandtab shiftwidth=4 softtabstop=4 tw=1000 :