#!/usr/bin/perl -w
#
# Written by Joerg Hoh (joerg@joerghoh.de)
#   released under GPL Version 2
#
# Purpose: 
#  * parses CQ request.log files and creates output which is suitable for gnuplot


use strict;
use Getopt::Long;

####### The tunables ##########
my $graph_title="(no title given)";

my $mime_opt=".*";          # match all -- no filter
my $path_opt=".*";          # match all -- no filter

my $slow_requests=2; # in percent

my $auto_tune=0;


######## Values for gnuplot ###############
my $output_width=770;
my $time_axis_label="%H:%M";


######## SUBS ###############

my $mime_match;
my $path_match;

my %requests;
my %timetable;


##########################################
#
# checks a found valid request record if it matches the given criteria
#
##########################################
sub evalID($) {
  my $key = shift;
  #printf "timestamp=%s, duration=%s id=%s\n",$requests{"$key"}->{"timestamp"},$requests{"$key"}->{"duration"},$requests{"$key"}->{"id"},
  my $ts = $requests{"$key"}->{"timestamp"};
  return if (!defined $ts);
  return if (!defined $key);
  return if (!defined $requests{"$key"}->{"duration"});

  if (! exists $timetable{"$ts"}){
    $timetable{"$ts"}->{"count"} = 0;
    $timetable{"$ts"}->{"time_sum"} = 1;
  }


  # match the filters
  return if (!defined $requests{"$key"}->{"mimetype"});
  return if ($requests{"$key"}->{"mimetype"} !~m /$mime_match/o); 

  return if (!defined $requests{"$key"}->{"path"});
  return if ($requests{"$key"}->{"path"} !~ /$path_match/o);

  $timetable{"$ts"}->{"count"} = $timetable{"$ts"}->{"count"} +1;
  $timetable{"$ts"}->{"time_sum"} = $timetable{"$ts"}->{"time_sum"} + $requests{"$key"}->{"duration"};

  push @{$timetable{"$ts"}->{"stamps"}}, $requests{"$key"}->{"duration"};
  

}

############################################
#
# parses a single file
#
############################################
sub parseFile($) {

  my $logfilename = shift;
  if (! -f $logfilename) { die "File $logfilename doesn't exist\n";};

  if ($logfilename =~ /.+\.gz$/) {
    open(FILE,"gunzip -c $logfilename |") or die "Cannot open logfile $logfilename\n";
  } else {
    open(FILE,$logfilename) or die "Cannot open logfile $logfilename\n";
  }

  while (<FILE>) {
    my $line = $_;

    my $id,my $orig_id, my $duration, my $handle, my $timestamp, my $mimetype;
    # 17.10.2008 23:19:02 [8231092] -> GET /content/playground/en.html
    if (($timestamp,$id,$handle) = ($line =~ /(\d+\.\d+\.\d+ \d+?:\d+?):\d+? \[(\d+?)\] -> \S+? (\S+).*/o)) {
      $orig_id = $id;
      if (exists($requests{"$id"})) {
        if (exists($requests{"$id"}->{"next_id"})) {
          $requests{"$id"}->{"next_id"} = $requests{"$id"}->{"next_id"} + 1;
        } else {
          $requests{"$id"}->{"next_id"} = 1;
        }
        $id = $id ."_" . $requests{"$id"}->{"next_id"};
        #print ", using $id now\n";
      }
      $requests{"$id"}->{"path"} = $handle;
      $requests{"$id"}->{"id"} = $orig_id;
      $requests{"$id"}->{"timestamp"} = $timestamp;
      #print "D: $id -> $handle\n";
    }
    if (($id,$mimetype,$duration) = ($line =~ /.*\[(\d+?)\] <- \d+? (\S+?) (\d+?)ms.*/o)) {
      if (exists($requests{"$id"}->{"next_id"})) {
	$id = $id . "_" . $requests{"$id"}->{"next_id"};
	#print "reusing duplicate id $id\n";
      }
      $requests{"$id"}->{"duration"} = $duration;
      $requests{"$id"}->{"mimetype"} = $mimetype;
      #print "D: $id -> $duration\n";
      evalID($id);
      delete $requests{"$id"};
    }
  }
  close(FILE);
  foreach my $k (keys %requests) {
    evalID($k);
  }
  %requests=();

}


sub printUsage() {
  print <<EOF

  $0 [options] file ...
  --title TITLE         - the title of the output graph
  --mime MATCH          - only analyze requests which have the MATCHing mime type set (regexp allowed)
  --path-match MATCH    - only analyze requests which URL does match the regular expression MATCH
  --all                 - print both raw data and data with applied filters (--mime and --path-match)
  --width=WIDTH         - the width of the generated image in pixels, default is 770
  --auto                - enable auto tuning to set various settings to reasonable values
  --help                - print this and exit 
 
  Filtering:
  --mime and --path-match are additive.
 

  all files are evaluated and integrated in this output
  
  result will be displayed on STDOUT 

EOF
}


######### Start ##########


#print STDERR join (" ", @ARGV,"\n");

my $result = GetOptions("title=s" => \$graph_title,
                        "mime=s" => \$mime_opt,
                        "path-match=s" => \$path_opt,
                        "width=s" => \$output_width,
                        "auto" => sub {$auto_tune = 1;},
                        "help" => sub { printUsage(); exit 1;}
                        );

# strip quotes
$mime_opt=~s/['"]//g;
$path_opt=~s/['"]//g;

$mime_match=qr/$mime_opt/; 
$path_match=qr/$path_opt/; 

my $noinputfiles = $#ARGV +1; 


# enable auto tuning of parameters
if ( $auto_tune) {
  if ($noinputfiles > 3) {
    $output_width = 1000;
    $time_axis_label="%d.%m"; 
  }
}


foreach my $logfilename (@ARGV) {
  parseFile($logfilename);
}


##### construct the title string ############
my $add_title="";
if ($path_opt !~ /\.\*/) {
  $add_title .= "URL-match = $path_opt ";
}
if ($mime_opt !~ /\.\*/) {
  $add_title .= "Mime-type = $mime_opt";
}

if ($add_title ne "" ) {
  $add_title = "(" . $add_title . ")";
}


######## create gnuplot output  ############

print <<EOF;
#!/usr/bin/gnuplot -persist

set timefmt "%d.%m.%Y %H:%M"
set format x "$time_axis_label"
set xdata time
set title "$graph_title $add_title"
set xlabel "Time"
set ylabel "answered requests  [1/min]"
set y2label "delivery time per request [ms]"
set term png small size $output_width,400 
set y2tics 
set logscale y2
set grid

#set logscale y2 2
#set y2range [:5000]

plot "-" using 1:4 axes x1y2  title "average delivery time per request " , \\
     "-" using 1:3  title "number of delivered requests" 
EOF
 

my $div = 0;
my $div2 = 0;
my $c;

# because it's easy to do, just print all data twice (because there are 2 lines);
# above we defined the columns for each line (see 'plot' statement

foreach (1..2) {
  my @keys = sort (keys %timetable);
  foreach my $k (@keys) {
    if ($k) {
	  $c = $timetable{"$k"}->{"count"};
          #printf "count=%s\n",$c;
	  #printf "%s: count=%s, avg = %s\n",$k,$c,$timetable{"$k"}->{"time_sum"}/$c;
          $div = 1; $div2=1;
          if ($c != 0) { $div = int($timetable{"$k"}->{"time_sum"}/$c); }
         
          # also calculate the average of the slowest 5%
          if ($timetable{"$k"}->{"stamps"}) {
		  my @stamps = sort @{$timetable{"$k"}->{"stamps"}};
		  my $nr = int ($#stamps/100*$slow_requests);
		  #printf "#stamps = %d, nr = %d (",$#stamps,$nr;
		  my $sum = 0;
		  for my $i ($#stamps-$nr..$#stamps) {
		    $sum+=$stamps[$i];
		    #printf "%d=%d ", $i,$stamps[$i];
		  }
		  #printf ") sum = %d\n",$sum;
		  if ($nr != 0) { $div2 = int ($sum/$nr); }
          }
	  printf "%s %s %d %d\n",$k,$c,$div,$div2;
    }

  }
  print "e\n";
}

# that's it
