X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/1b4fe9dd6ba8cbc553a1dd650fc8428ba87d63b6..8e669ac162fe3b1040297f1d021de10778dce9d9:/src/src/eximstats.src diff --git a/src/src/eximstats.src b/src/src/eximstats.src index 200f9d524..32003889b 100644 --- a/src/src/eximstats.src +++ b/src/src/eximstats.src @@ -1,5 +1,5 @@ #!PERL_COMMAND -w -# $Cambridge: exim/src/src/eximstats.src,v 1.2 2004/11/24 14:43:57 ph10 Exp $ +# $Cambridge: exim/src/src/eximstats.src,v 1.6 2005/02/17 11:58:26 ph10 Exp $ # Copyright (c) 2001 University of Cambridge. # See the file NOTICE for conditions of use and distribution. @@ -178,7 +178,7 @@ # Added warnings if required GD::Graph modules are not available or # insufficient -chart* options are specified. # -# 2004-02-20 V1.31 Andrea Balzi +# 2004-02-20 V1.31 Andrea Balzi # Only show the Local Sender/Destination links if the tables exist. # # 2004-07-05 V1.32 Steve Campbell @@ -187,11 +187,20 @@ # 2004-07-15 V1.33 Steve Campbell # Documentation update - I've converted the subroutine # documentation from POD to comments. +# +# 2004-12-10 V1.34 Steve Campbell +# Eximstats can now parse syslog lines as well as mainlog lines. +# +# 2004-12-20 V1.35 Wouter Verhelst +# Pie charts by volume were actually generated by count. Fixed. +# +# 2005-02-07 V1.36 Gregor Herrmann / Steve Campbell +# Added average sizes to HTML Top tables. =head1 NAME -eximstats - generates statistics from Exim mainlog files. +eximstats - generates statistics from Exim mainlog or syslog files. =head1 SYNOPSIS @@ -360,7 +369,7 @@ title! =head1 DESCRIPTION -Eximstats parses exim mainlog files and outputs a statistical +Eximstats parses exim mainlog and syslog files to output a statistical analysis of the messages processed. By default, a text analysis is generated, but you can request an html analysis by using the B<-html> flag. See the help (B<-help>) to learn @@ -413,7 +422,7 @@ use vars qw($COLUMN_WIDTHS); @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334); $gig = 1024 * 1024 * 1024; -$VERSION = '1.33'; +$VERSION = '1.36'; # How much space do we allow for the Hosts/Domains/Emails/Edomains column headers? $COLUMN_WIDTHS = 8; @@ -525,16 +534,16 @@ sub volume_rounded { ####################################################################### # un_round(); -# +# # un_round($rounded_volume,\$bytes,\$gigabytes); -# +# # Given a volume in KB, MB or GB, as generated by volume_rounded(), # do the reverse transformation and convert it back into Bytes and Gigabytes. # These are added to the $bytes and $gigabytes parameters. -# +# # Given a data size in bytes, round it to KB, MB, or GB # as appropriate. -# +# # EG: 500 => (500,0), 14GB => (0,14), etc. ####################################################################### sub un_round { @@ -563,9 +572,9 @@ sub un_round { ####################################################################### # add_volume(); -# +# # add_volume(\$bytes,\$gigs,$size); -# +# # Add $size to $bytes/$gigs where this is a number split into # bytes ($bytes) and gigabytes ($gigs). This is significantly # faster than using Math::BigInt. @@ -584,12 +593,12 @@ sub add_volume { ####################################################################### # format_time(); -# +# # $formatted_time = format_time($seconds); -# +# # Given a time in seconds, break it down into # weeks, days, hours, minutes, and seconds. -# +# # Eg 12005 => 3h20m5s ####################################################################### sub format_time { @@ -614,11 +623,11 @@ $p; ####################################################################### # unformat_time(); -# +# # $seconds = unformat_time($formatted_time); -# +# # Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds. -# +# # Eg 3h20m5s => 12005 ####################################################################### sub unformat_time { @@ -638,9 +647,9 @@ sub unformat_time { ####################################################################### # seconds(); -# +# # $time = seconds($timestamp); -# +# # Given a time-of-day timestamp, convert it into a time() value using # POSIX::mktime. We expect the timestamp to be of the form # "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12, @@ -650,14 +659,14 @@ sub unformat_time { # (with the -utc option), then we adjust the time by the current local # time offset so that it can be compared with the time recorded in message # IDs, which is UTC. -# +# # To improve performance, we only use mktime on the date ($year-$mon-$day), # and only calculate it if the date is different to the previous time we # came here. We then add on seconds for the '$hour:$min:$sec'. -# +# # We also store the results of the last conversion done, and only # recalculate if the date is different. -# +# # We used to have the '-cache' flag which would store the results of the # mktime() call. However, the current way of just using mktime() on the # date obsoletes this. @@ -705,9 +714,9 @@ sub seconds { ####################################################################### # id_seconds(); -# +# # $time = id_seconds($message_id); -# +# # Given a message ID, convert it into a time() value. ####################################################################### sub id_seconds { @@ -722,13 +731,13 @@ $s; ####################################################################### # calculate_localtime_offset(); -# +# # $localtime_offset = calculate_localtime_offset(); -# +# # Calculate the the localtime offset from gmtime in seconds. -# +# # $localtime = time() + $localtime_offset. -# +# # These are the same semantics as ISO 8601 and RFC 2822 timezone offsets. # (West is negative, East is positive.) ####################################################################### @@ -758,9 +767,9 @@ sub calculate_localtime_offset { ####################################################################### # print_queue_times(); -# +# # $time = print_queue_times($message_type,\@queue_times,$queue_more_than); -# +# # Given the type of messages being output, the array of message queue times, # and the number of messages which exceeded the queue times, print out # a table. @@ -861,9 +870,9 @@ print "\n"; ####################################################################### # print_histogram(); -# +# # print_histogram('Deliverieds|Messages received',@interval_count); -# +# # Print a histogram of the messages delivered/received per time slot # (hour by default). ####################################################################### @@ -976,9 +985,9 @@ if ($html) ####################################################################### # print_league_table(); -# +# # print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs); -# +# # Given hashes of message count and message data, which are keyed by # the table type (eg by the sending host), print a league table # showing the top $topcount (defaults to 50). @@ -997,25 +1006,36 @@ if ($html) { print "\n"; print "
\n"; print "\n"; - print "\n"; + print "\n"; # Align non-local addresses to the right (so all the .com's line up). # Local addresses are aligned on the left as they are userids. my $align = ($text !~ /local/i) ? 'right' : 'left'; - $format = "\n"; + $format = "\n"; } else { printf("%s\n%s\n\n", $temp, "-" x length($temp)); $format = "%7d %10s %s\n"; } -my($key,$htmlkey); +my($key,$htmlkey,$rounded_volume,$rounded_average,$count,$data,$gigs); foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) { if ($html) { $htmlkey = $key; $htmlkey =~ s/>/\>\;/g; $htmlkey =~ s/\n"; print "
MessagesBytes\u$text
MessagesBytesAverage\u$text
%d%s%s
%d%s%s%s
\n"; print "\n"; - print "\n"; + print "\n"; } else { printf("%s\n%s\n\n", $temp, "-" x length($temp)); @@ -1081,27 +1101,46 @@ else { @chartdatanames = (); @chartdatavals = (); $chartotherval = 0; +my $use_gig = 0; foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) { + # The largest volume will be the first (top of the list). + # If it has at least 1 gig, then just use gigabytes to avoid + # risking an integer overflow when generating the pie charts. + if ($$m_data_gigs{$key}) { + $use_gig = 1; + } if ($html) { $htmlkey = $key; $htmlkey =~ s/>/\>\;/g; $htmlkey =~ s/new(300, 300); $graph->set( x_label => 'Name', - y_label => 'Volume', + y_label => 'Volume' , title => 'By Volume', ); my $gd = $graph->plot(\@data) or warn($graph->error); @@ -1141,22 +1180,22 @@ print "\n"; ####################################################################### # top_n_sort(); -# +# # @sorted_keys = top_n_sort($n,$href1,$href2,$href3); -# +# # Given a hash which has numerical values, return the sorted $n keys which # point to the top values. The second and third hashes are used as # tiebreakers. They all must have the same keys. -# +# # The idea behind this routine is that when you only want to see the # top n members of a set, rather than sorting the entire set and then # plucking off the top n, sort through the stack as you go, discarding # any member which is lower than your current n'th highest member. -# +# # This proves to be an order of magnitude faster for large hashes. # On 200,000 lines of mainlog it benchmarked 9 times faster. # On 700,000 lines of mainlog it benchmarked 13.8 times faster. -# +# # We assume the values are > 0. ####################################################################### sub top_n_sort { @@ -1182,15 +1221,15 @@ sub top_n_sort { my $n_minus_1 = $n - 1; my $n_minus_2 = $n - 2; - # Pick out the top $n keys. + # Pick out the top $n keys. my($key,$value1,$value2,$value3,$i,$comparison,$insert_position); while (($key,$value1) = each %$href1) { #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n"; - + # Check to see that the new value is bigger than the lowest of the # top n keys that we're keeping. - $comparison = $value1 <=> $minimum_value1 || + $comparison = $value1 <=> $minimum_value1 || $href2->{$key} <=> $minimum_value2 || $href3->{$key} <=> $minimum_value3 || $top_n_key cmp $key; @@ -1217,7 +1256,7 @@ sub top_n_sort { for ($i = 0; $i < $n_minus_1; $i++) { $top_n_key = $top_n_keys[$i]; if ( ($top_n_key eq '_') || - ( ($value1 <=> $href1->{$top_n_key} || + ( ($value1 <=> $href1->{$top_n_key} || $value2 <=> $href2->{$top_n_key} || $value3 <=> $href3->{$top_n_key} || $top_n_key cmp $key) == 1 @@ -1249,9 +1288,9 @@ sub top_n_sort { ####################################################################### # html_header(); -# +# # $header = html_header($title); -# +# # Print our HTML header and start the block. ####################################################################### sub html_header { @@ -1273,9 +1312,9 @@ EoText ####################################################################### # help(); -# +# # help(); -# +# # Display usage instructions and exit. ####################################################################### sub help { @@ -1288,8 +1327,8 @@ Usage: eximstats [Options] mainlog1 mainlog2 ... > report.txt eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_rep.txt eximstats -merge -html [Options] report.1.html ... > weekly_rep.html -Parses exim mainlog files and generates a statistical analysis of -the messages processed. Valid options are: +Parses exim mainlog or syslog files and generates a statistical analysis +of the messages processed. Valid options are: -h histogram divisions per hour. The default is 1, and 0 suppresses histograms. Other valid values are: @@ -1338,15 +1377,15 @@ EoText ####################################################################### # generate_parser(); -# +# # $parser = generate_parser(); -# +# # This subroutine generates the parsing routine which will be # used to parse the mainlog. We take the base operation, and remove bits not in use. # This improves performance depending on what bits you take out or add. -# +# # I've tested using study(), but this does not improve performance. -# +# # We store our parsing routing in a variable, and process it looking for #IFDEF (Expression) # or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If # the expression evaluates to true, then it is included/excluded accordingly. @@ -1356,10 +1395,13 @@ sub generate_parser { my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new); my($tod,$m_hour,$m_min,$id,$flag); while (<$fh>) { - next if length($_) < 38; - # PH/FANF - # next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/; + # Convert syslog lines to mainlog format. + if (! /^\\d{4}/) { + next unless s/^.*? exim\\b.*?: //; + } + + next if length($_) < 38; next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o; ($tod,$m_hour,$m_min) = ($1,$2,$3); @@ -1682,9 +1724,9 @@ sub generate_parser { ####################################################################### # parse(); -# +# # parse($parser,\*FILEHANDLE); -# +# # This subroutine accepts a parser and a filehandle from main and parses each # line. We store the results into global variables. ####################################################################### @@ -1705,9 +1747,9 @@ sub parse { ####################################################################### # print_header(); -# +# # print_header(); -# +# # Print our headers and contents. ####################################################################### sub print_header { @@ -1761,9 +1803,9 @@ sub print_header { ####################################################################### # print_grandtotals(); -# +# # print_grandtotals(); -# +# # Print the grand totals. ####################################################################### sub print_grandtotals { @@ -1848,9 +1890,9 @@ EoText ####################################################################### # print_user_patterns() -# +# # print_user_patterns(); -# +# # Print the counts of user specified patterns. ####################################################################### sub print_user_patterns { @@ -1896,9 +1938,9 @@ sub print_user_patterns { ####################################################################### # print_transport(); -# +# # print_transport(); -# +# # Print totals by transport. ####################################################################### sub print_transport { @@ -2001,9 +2043,9 @@ sub print_transport { ####################################################################### # print_relay(); -# +# # print_relay(); -# +# # Print our totals by relay. ####################################################################### sub print_relay { @@ -2047,9 +2089,9 @@ sub print_relay { ####################################################################### # print_errors(); -# +# # print_errors(); -# +# # Print our errors. In HTML, we display them as a list rather than a table - # Netscape doesn't like large tables! ####################################################################### @@ -2075,7 +2117,7 @@ sub print_errors { $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space. $total_errors += $errors_count{$key}; if ($html) { - + #Translate HTML tag characters. Sergey Sholokh. $text =~ s/\/\>\;/g; @@ -2105,36 +2147,36 @@ sub print_errors { ####################################################################### # parse_old_eximstat_reports(); -# +# # parse_old_eximstat_reports($fh); -# +# # Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc. -# +# # To test that the merging still works after changes, do something like the following. # All the diffs should produce no output. -# +# # options='-bydomain -byemail -byhost -byedomain' # options="$options -pattern 'Completed Messages' /Completed/" # options="$options -pattern 'Received Messages' /<=/" -# +# # ./eximstats $options mainlog > mainlog.txt # ./eximstats $options -merge mainlog.txt > mainlog.2.txt # diff mainlog.txt mainlog.2.txt -# +# # ./eximstats $options -html mainlog > mainlog.html # ./eximstats $options -merge -html mainlog.txt > mainlog.2.html # diff mainlog.html mainlog.2.html -# +# # ./eximstats $options -merge mainlog.html > mainlog.3.txt # diff mainlog.txt mainlog.3.txt -# +# # ./eximstats $options -merge -html mainlog.html > mainlog.3.html # diff mainlog.html mainlog.3.html -# +# # ./eximstats $options -nvr mainlog > mainlog.nvr.txt # ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt # diff mainlog.txt mainlog.4.txt -# +# # # double_mainlog.txt should have twice the values that mainlog.txt has. # ./eximstats $options mainlog mainlog > double_mainlog.txt ####################################################################### @@ -2152,7 +2194,7 @@ sub parse_old_eximstat_reports { # Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent} my(@fields); while (<$fh>) { - $_ = html2txt($_); #Convert general HTML markup to text. + $_ = html2txt($_); #Convert general HTML markup to text. s/At least one addr//g; #Another part of the HTML output we don't want. # TOTAL Volume Messages Hosts Domains Delayed Failed @@ -2180,7 +2222,7 @@ sub parse_old_eximstat_reports { while (<$fh>) { last if (/Total/); } #Wait until we get the table headers. while (<$fh>) { print STDERR "Parsing $_" if $debug; - $_ = html2txt($_); #Convert general HTML markup to text. + $_ = html2txt($_); #Convert general HTML markup to text. if (/^\s*(.*?)\s+(\d+)\s*$/) { $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1}); add_to_totals($report_totals{patterns}{$1},['Total'],$2); @@ -2200,7 +2242,7 @@ sub parse_old_eximstat_reports { while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers. while (<$fh>) { print STDERR "Parsing $_" if $debug; - $_ = html2txt($_); #Convert general HTML markup to text. + $_ = html2txt($_); #Convert general HTML markup to text. if (/(\S+)\s+(\d+\S*\s+\d+)/) { $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1}); add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2); @@ -2250,7 +2292,7 @@ sub parse_old_eximstat_reports { my $bin_aref = ($1 eq 'all messages') ? \@queue_bin : \@remote_queue_bin; my $reached_table = 0; while (<$fh>) { - $_ = html2txt($_); #Convert general HTML markup to text. + $_ = html2txt($_); #Convert general HTML markup to text. $reached_table = 1 if (/^\s*Under/); next unless $reached_table; my $previous_seconds_on_queue = 0; @@ -2315,6 +2357,9 @@ sub parse_old_eximstat_reports { #------------------------------------- # # 48 1468KB local +# Could also have average values for HTML output. +# 48 1468KB 30KB local + my($category,$by_count_or_volume) = ($1,$2); #As we show 2 views of each table (by count and by volume), @@ -2357,9 +2402,13 @@ sub parse_old_eximstat_reports { my $reached_table = 0; while (<$fh>) { - $_ = html2txt($_); #Convert general HTML markup to text. + $_ = html2txt($_); #Convert general HTML markup to text. $reached_table = 1 if (/^\s*\d/); next unless $reached_table; + + # Remove optional 'average value' column. + s/^\s*(\d+)\s+(\S+)\s+(\d+(KB|MB|GB|\b)\s+)/$1 $2 /; + if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) { my($count,$rounded_volume,$entry) = ($1,$2,$3); #Note: $entry fields can be both null and can contain spaces. @@ -2433,7 +2482,7 @@ sub parse_old_eximstat_reports { next unless $reached_table; s/^
  • (\d+) -/$1/; #Convert an HTML line to a text line. - $_ = html2txt($_); #Convert general HTML markup to text. + $_ = html2txt($_); #Convert general HTML markup to text. if (/\t\s*(.*)/) { $error .= ' ' . $1; #Join a multiline error. @@ -2464,9 +2513,9 @@ sub parse_old_eximstat_reports { ####################################################################### # update_relayed(); -# +# # update_relayed($count,$sender,$recipient); -# +# # Adds an entry into the %relayed hash. Currently only used when # merging reports. ####################################################################### @@ -2489,12 +2538,12 @@ sub update_relayed { ####################################################################### # add_to_totals(); -# +# # add_to_totals(\%totals,\@keys,$values); -# +# # Given a line of space seperated values, add them into the provided hash using @keys # as the hash keys. -# +# # If the value contains a '%', then the value is set rather than added. Otherwise, we # convert the value to bytes and gigs. The gigs get added to I-gigs. ####################################################################### @@ -2519,11 +2568,11 @@ sub add_to_totals { ####################################################################### # get_report_total(); -# +# # $total = get_report_total(\%hash,$key); -# +# # If %hash contains values split into Units and Gigs, we calculate and return -# +# # $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"} ####################################################################### sub get_report_total { @@ -2537,9 +2586,9 @@ sub get_report_total { ####################################################################### # html2txt(); -# +# # $text_line = html2txt($html_line); -# +# # Convert a line from html to text. Currently we just convert HTML tags to spaces # and convert >, <, and   tags back. ####################################################################### @@ -2560,16 +2609,16 @@ sub html2txt { ####################################################################### # get_next_arg(); -# +# # $arg = get_next_arg(); -# +# # Because eximstats arguments are often passed as variables, # we can't rely on shell parsing to deal with quotes. This # subroutine returns $ARGV[1] and does a shift. If $ARGV[1] # starts with a quote (' or "), and doesn't end in one, then # we append the next argument to it and shift again. We repeat # until we've got all of the argument. -# +# # This isn't perfect as all white space gets reduced to one space, # but it's as good as we can get! If it's esential that spacing # be preserved precisely, then you get that by not using shell @@ -2658,7 +2707,7 @@ while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-') } elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; } elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; } - elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used. + elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used. elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 } elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 } elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
  • MessagesBytes\u$text
    MessagesBytesAverage\u$text