#!/usr/bin/perl # # Perl script to convert CSV output from glucometer into graphs, using gnuplot. # # Author: Timothy Allen # License: MIT # # TODO Convert to python (see # https://www.physicsforums.com/threads/gnuplot-how-to-find-the-area-under-a-curve-integrate.382070/ # ) use strict; use warnings; use Getopt::Long; use IPC::Open3; use Time::Piece; use Data::Dumper; my $error = "Usage: $0 --input --output [--max ] [--low ] [--high ]\n"; my @lines; my @sorted_lines; my @data; my @avg_data; my %intervals; my %seen; my $a1c_calc; my $page_size; my $gnuplot_data; my $total_graphs; my $count_graphs = 0; my $page_number = 0; my $interval = 15; # The number of minutes to average points for the area range graph my $input = ''; my $output = ''; # set these values either in mmol/L or mg/dL (don't mix them) my $max_glucose = 8; my $min_glucose = 4; my $graph_max = 21; my $units = ''; my $page = 'a4'; my $days_per_page = 2; GetOptions ("input=s" => \$input, # The name of the CSV file from which to read values "output=s" => \$output, # The name of the PDF file to output "high:f" => \$max_glucose, # The high end of your target blood glucose level "low:f" => \$min_glucose, # The low end of your target blood glucose level "max:i" => \$graph_max, # The highest displayed glucose level on each graph "units:s" => \$units, # mmol/L or mg/dL "pagesize:s" => \$page, # size of page to print "graphs:i" => \$days_per_page) # The number of days printed on each page or die $error; open( my $ifh, '<:encoding(UTF-8)', $input ) or die "Could not open file '$input' $!"; while ( my $row = <$ifh> ) { chomp( $row ); push @lines, $row; } close( $ifh ) or warn "close failed: $!"; if ( $page =~ /a4/i ) { $page_size = "29.7cm,21.0cm"; } elsif ( $page =~ /letter/i ) { $page_size = "11in,8.5in"; } elsif ( $page =~ /\d+(cm|in),\d+/ ) { $page_size = $page; } else { # A4 size default $page_size = "29.7cm,21.0cm"; } # Set up basic gnuplot options for reading the CSV data push @data, qq( set terminal pdf size $page_size enhanced font 'Calibri,14' linewidth 1 #set output '$output' ); # Get the list of days for which to produce graphs foreach my $row ( @lines ) { if ( $row =~ m#^"(\d{4}-\d{2}-\d{2})#ms ) { my $day = $1; $seen{$day}++; } } $total_graphs = scalar keys %seen; # Read each line into a $Data variable for use by gnuplot # Then sample into a smoothed plot for each day, and store each smoothed line in a new $SmoothData$date variable foreach my $d ( sort keys %seen ) { my $label = "$1$2$3" if ( $d =~ m#(\d{4})-(\d{2})-(\d{2})# ); push @data, qq( \$Data$label << EOD "timestamp","blood glucose","meal","method","comment"); foreach my $row (@lines) { if ( $row =~ s#^"($d )#"$1#ms ) { push @data, $row; } } push @data, qq(EOD); push @data, qq( set datafile separator "," set timefmt "%Y-%m-%d %H:%M:%S" set format x "%s" timedate set format y "%.2f" numeric set samples 10000 set xdata stats \$Data$label using 2 Mean$label = STATS_mean set xdata time set table \$SmoothData$label #plot \$Data$label using "timestamp":"blood glucose" #plot \$Data$label using "timestamp":"blood glucose" smooth frequency plot \$Data$label using "timestamp":"blood glucose" smooth mcsplines #plot \$Data$label using "timestamp":"blood glucose" smooth bezier unset table undefine \$Data$label ); } # Set up output options for gnuplot. # We don't bother to do this at the start, since the CSV needs a comma separator # and the new $SmoothData, which contains a table, needs a whitespace separator push @data, qq( # change separator from CSV to table reset set datafile separator whitespace set key off set style data lines set xdata time set timefmt "%H:%M:%S" set format x "%H:%M" timedate set format y "%.0f" numeric set yrange [0:$graph_max] # If extended to 23:59, the x grid overlaps with the border set xrange ["00:00":"23:58"] set style line 100 dt 3 lw 1 lc rgb "#202020" set style line 101 dt 1 lw 1 lc rgb "#202020" set linetype 110 lc rgb "red" set lmargin 12 set rmargin 10 set tmargin 5 set bmargin 5 set multiplot title layout $days_per_page,1 ); # For each day, generate a graph with some fancy options foreach my $d ( sort keys %seen ) { my $label = "$1$2$3" if ( $d =~ m#(\d{4})-(\d{2})-(\d{2})# ); my $time = Time::Piece->strptime ( $d, "%Y-%m-%d" ); #my $title = $time->strftime("%a %d %b %Y"); my $title = $time->strftime("%A, %d %B %Y"); $count_graphs++; push @data, qq( set title "Daily Glucose Summary for $title" font "Calibri,18" set xlabel "Time" offset 0,-0.25 set ylabel "Blood glucose" set xtics left tc rgb "#000000" set ytics 2 tc rgb "#000000" set grid ytics ls 100 front #set arrow from graph 0,first $min_glucose to graph 1,first $min_glucose ls 6 lw 2 nohead #set arrow from graph 0,first $max_glucose to graph 1,first $max_glucose ls 6 lw 2 nohead set object 1 rect from graph 0, first $min_glucose to graph 1,first $max_glucose fc ls 6 fs solid 0.2 back AVG = Mean$label AVG_LABEL = gprintf("Average glucose: %.2f", AVG) set object 2 rect at graph 0.9, graph 0.9 fc ls 2 fs transparent solid 0.5 front size char strlen(AVG_LABEL), char 3 set label 2 AVG_LABEL at graph 0.9, graph 0.9 front center #plot \$SmoothData$label using 1:2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : $count_graphs ) with linespoints ls 120 lc variable plot \$SmoothData$label using (strftime("%H:%M:%S", \$1)):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable undefine \$SmoothData$label # Add an x grid set multiplot previous set title " " set xlabel " " offset 0,-0.25 set ylabel " " set xtics tc rgb "#ffffff00" set ytics tc rgb "#ffffff00" unset grid unset object 1 set grid xtics ls 101 plot 1/0 ); if ( $count_graphs % $days_per_page == 0 && $count_graphs < $total_graphs ) { push @data, qq(unset multiplot); push @data, qq(set multiplot layout $days_per_page,1); $page_number++; } } # Output data averages by hour of the day @sorted_lines = (); push @data, qq( \$DataAvg << EOD "timestamp","blood glucose","meal","method","comment"); foreach my $row (@lines) { if ( $row =~ s#^"\d{4}-\d{2}-\d{2} #"#ms ) { push @sorted_lines, $row; } } push @data, sort @sorted_lines; push @data, qq(EOD); # Output min/max for each time interval foreach my $row ( @sorted_lines ) { $row =~ s/"//g; my ( $time, $value ) = split /,/, $row; my ( $hour, $minute, $second ) = split /:/, $time; $time = sprintf( "%02d:%02d:00", $hour, int($minute/$interval)*$interval ); # Override the current minimum values for this interval if it # exists; otherwise, set it if ( exists ( $intervals{$time}{min} ) ) { if ( $intervals{$time}{min} < $value ) { $intervals{$time}{min} = $value; } } else { $intervals{$time}{min} = $value; } # Override the current maximum values for this interval if it # exists; otherwise, set it if ( exists ( $intervals{$time}{max} ) ) { if ( $intervals{$time}{max} > $value ) { $intervals{$time}{max} = $value; } } else { $intervals{$time}{max} = $value; } } $Data::Dumper::Sortkeys = 1; #die Dumper(\%intervals); push @data, qq( \$DataMaxMin << EOD "timestamp","max","min"); foreach my $time ( sort keys %intervals ) { warn $time; push @data, qq("$time","$intervals{$time}{max}","$intervals{$time}{min}"); } push @data, qq(EOD); # Standardise units for gnuplot's A1C calculations if ( $units =~ /mg/i ) { $units = 'mg/dL'; } elsif ( $units =~ /mmol/i ) { $units = 'mmol/L'; } else { $units = ''; } push @data, qq( reset set datafile separator "," set timefmt "%H:%M:%S" set format x "%s" timedate set format y "%.2f" numeric set samples 10000 set xdata stats \$DataAvg using 2 MeanTotal = STATS_mean set xdata time set table \$DataAvgTable #avg(x) = g #min(x) = xg #f(x) = g #fit f(x) \$DataAvg using 1:(\$2>MeanTotal?\$2:'') via t, g #plot f(x) smooth mcsplines #plot \$DataAvg using 1:(\$2>MeanTotal?\$2:'') every $count_graphs/2 lc 2, \$DataAvg using 1:(\$2 4 ? 5 : (\$0+1) #avg5(x) = (shift5(x), (back1+back2+back3+back4+back5)/samples(\$0)) #shift5(x) = (back5 = back4, back4 = back3, back3 = back2, back2 = back1, back1 = x) ## Initialize a running sum #init(x) = (back1 = back2 = back3 = back4 = back5 = sum = 0) #plot sum = init(0), \$DataAvg using 1:2 title 'data' lw 2 lc rgb 'forest-green', '' using 1:(avg5(\$2)) pt 7 ps 0.5 lw 1 lc rgb "blue" plot \$DataAvg using 1:2 smooth mcsplines unset table set table \$SmoothDataAvg plot \$DataAvg using 1:2 smooth bezier unset table undefine \$DataAvg # Convert DataMaxMin from CSV to table set table \$DataMaxMinTable plot \$DataMaxMin using 1:2:3 with table unset table reset set datafile separator whitespace set key off set style data lines set xdata time set timefmt "%H:%M:%S" set format x "%H:%M" timedate set format y "%.0f" numeric set yrange [0:$graph_max] # If extended to 23:59, the x grid overlaps with the border set xrange ["00:00":"23:58"] set style line 100 dt 3 lw 1 lc rgb "#202020" set style line 101 dt 1 lw 1 lc rgb "#202020" set linetype 110 lc rgb "red" set linetype 111 lc rgb "#B0B0B0" set style fill transparent solid 0.5 noborder set lmargin 12 set rmargin 10 set tmargin 5 set bmargin 5 set multiplot title layout $days_per_page,1 set title "Average Daily Glucose" font "Calibri,18" set xlabel "Time" offset 0,-0.25 set ylabel "Blood glucose" set xtics left tc rgb "#000000" set ytics 2 tc rgb "#000000" set grid ytics ls 100 front set object 1 rect from graph 0, first $min_glucose to graph 1,first $max_glucose fc ls 6 fs solid 0.05 back AVG = MeanTotal AVG_LABEL = gprintf("Average glucose: %.2f", AVG) set object 2 rect at graph 0.9, graph 0.9 fc ls 2 fs transparent solid 0.5 front size char strlen(AVG_LABEL), char 3 set label 2 AVG_LABEL at graph 0.9, graph 0.9 front center A1C = 0 if (A1C == 0 && '$units' eq 'mg/dL') { A1C = (MeanTotal + 46.7) / 28.7 } if (A1C == 0 && '$units' eq 'mmol/L') { A1C = (MeanTotal + 2.59) / 1.59 } # mg/dL numbers tend to be higher than 35 if (A1C == 0 && MeanTotal >= 35) { A1C = (MeanTotal + 46.7) / 28.7 } # mmol/L numbers tend to be lower than 35 if (A1C == 0 && MeanTotal < 35) { A1C = (MeanTotal + 2.59) / 1.59 } A1C_LABEL = gprintf("Average A1c: %.1f", A1C) set object 3 rect at graph 0.07, graph 0.9 fc ls 4 fs transparent solid 0.5 front size char strlen(A1C_LABEL), char 3 set label 3 A1C_LABEL at graph 0.07, graph 0.9 front center #plot \$SmoothDataAvg using ( strftime("%H:%M:%S", \$1) ):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable #plot \$DataAvgTable using (strftime("%H:%M:%S", \$1)):2 with points lc 5 ps 0.5 pt 37, \$SmoothDataAvg using (strftime("%H:%M:%S", \$1)):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable plot \$DataMaxMinTable using (strftime("%H:%M:%S", \$1)):2:3 with filledcurves lc 111, \$SmoothDataAvg using (strftime("%H:%M:%S", \$1)):2:( \$2 > $max_glucose || \$2 < $min_glucose ? 110 : 1 ) with lines lw 3 lc variable undefine \$DataAvg undefine \$DataMaxMin undefine \$DataMaxMinTable undefine \$SmoothDataAvg undefine \$DataAvgTable # Add an x grid set multiplot previous set title " " set xlabel " " offset 0,-0.25 set ylabel " " set xtics tc rgb "#ffffff00" set ytics tc rgb "#ffffff00" unset grid unset object 1 set grid xtics ls 101 plot 1/0 ); push @data, qq( unset multiplot test ); # run the data through gnuplot $gnuplot_data = join "\n", @data; print $gnuplot_data; open( my $ofh, '>', $output ) or die "Could not open file '$output' $!"; my ( $pid, $stdin, $stdout, $stderr ); use Symbol 'gensym'; $stderr = gensym; $pid = open3( $stdin, $stdout, $stderr, 'gnuplot' ); print $stdin $gnuplot_data; close( $stdin ); while ( <$stdout> ) { print $ofh "$_"; } while ( <$stderr> ) { warn $_; } close($stdout); close($stderr); waitpid( $pid, 0 ); my $child_exit_status = $? >> 8; close( $ofh ) or warn "close failed: $!"; #open(GNUPLOT, "|gnuplot"); #print GNUPLOT $gnuplot_data; #close(GNUPLOT); # vim : set expandtab shiftwidth=4 softtabstop=4 tw=1000 :