#!PERL_COMMAND -w
-# $Cambridge: exim/src/src/eximstats.src,v 1.2 2004/11/24 14:43:57 ph10 Exp $
+# $Cambridge: exim/src/src/eximstats.src,v 1.6 2005/02/17 11:58:26 ph10 Exp $
# Copyright (c) 2001 University of Cambridge.
# See the file NOTICE for conditions of use and distribution.
# Added warnings if required GD::Graph modules are not available or
# insufficient -chart* options are specified.
#
-# 2004-02-20 V1.31 Andrea Balzi
+# 2004-02-20 V1.31 Andrea Balzi
# Only show the Local Sender/Destination links if the tables exist.
#
# 2004-07-05 V1.32 Steve Campbell
# 2004-07-15 V1.33 Steve Campbell
# Documentation update - I've converted the subroutine
# documentation from POD to comments.
+#
+# 2004-12-10 V1.34 Steve Campbell
+# Eximstats can now parse syslog lines as well as mainlog lines.
+#
+# 2004-12-20 V1.35 Wouter Verhelst
+# Pie charts by volume were actually generated by count. Fixed.
+#
+# 2005-02-07 V1.36 Gregor Herrmann / Steve Campbell
+# Added average sizes to HTML Top tables.
=head1 NAME
-eximstats - generates statistics from Exim mainlog files.
+eximstats - generates statistics from Exim mainlog or syslog files.
=head1 SYNOPSIS
=head1 DESCRIPTION
-Eximstats parses exim mainlog files and outputs a statistical
+Eximstats parses exim mainlog and syslog files to output a statistical
analysis of the messages processed. By default, a text
analysis is generated, but you can request an html analysis
by using the B<-html> flag. See the help (B<-help>) to learn
@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
$gig = 1024 * 1024 * 1024;
-$VERSION = '1.33';
+$VERSION = '1.36';
# How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
$COLUMN_WIDTHS = 8;
#######################################################################
# un_round();
-#
+#
# un_round($rounded_volume,\$bytes,\$gigabytes);
-#
+#
# Given a volume in KB, MB or GB, as generated by volume_rounded(),
# do the reverse transformation and convert it back into Bytes and Gigabytes.
# These are added to the $bytes and $gigabytes parameters.
-#
+#
# Given a data size in bytes, round it to KB, MB, or GB
# as appropriate.
-#
+#
# EG: 500 => (500,0), 14GB => (0,14), etc.
#######################################################################
sub un_round {
#######################################################################
# add_volume();
-#
+#
# add_volume(\$bytes,\$gigs,$size);
-#
+#
# Add $size to $bytes/$gigs where this is a number split into
# bytes ($bytes) and gigabytes ($gigs). This is significantly
# faster than using Math::BigInt.
#######################################################################
# format_time();
-#
+#
# $formatted_time = format_time($seconds);
-#
+#
# Given a time in seconds, break it down into
# weeks, days, hours, minutes, and seconds.
-#
+#
# Eg 12005 => 3h20m5s
#######################################################################
sub format_time {
#######################################################################
# unformat_time();
-#
+#
# $seconds = unformat_time($formatted_time);
-#
+#
# Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
-#
+#
# Eg 3h20m5s => 12005
#######################################################################
sub unformat_time {
#######################################################################
# seconds();
-#
+#
# $time = seconds($timestamp);
-#
+#
# Given a time-of-day timestamp, convert it into a time() value using
# POSIX::mktime. We expect the timestamp to be of the form
# "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
# (with the -utc option), then we adjust the time by the current local
# time offset so that it can be compared with the time recorded in message
# IDs, which is UTC.
-#
+#
# To improve performance, we only use mktime on the date ($year-$mon-$day),
# and only calculate it if the date is different to the previous time we
# came here. We then add on seconds for the '$hour:$min:$sec'.
-#
+#
# We also store the results of the last conversion done, and only
# recalculate if the date is different.
-#
+#
# We used to have the '-cache' flag which would store the results of the
# mktime() call. However, the current way of just using mktime() on the
# date obsoletes this.
#######################################################################
# id_seconds();
-#
+#
# $time = id_seconds($message_id);
-#
+#
# Given a message ID, convert it into a time() value.
#######################################################################
sub id_seconds {
#######################################################################
# calculate_localtime_offset();
-#
+#
# $localtime_offset = calculate_localtime_offset();
-#
+#
# Calculate the the localtime offset from gmtime in seconds.
-#
+#
# $localtime = time() + $localtime_offset.
-#
+#
# These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
# (West is negative, East is positive.)
#######################################################################
#######################################################################
# print_queue_times();
-#
+#
# $time = print_queue_times($message_type,\@queue_times,$queue_more_than);
-#
+#
# Given the type of messages being output, the array of message queue times,
# and the number of messages which exceeded the queue times, print out
# a table.
#######################################################################
# print_histogram();
-#
+#
# print_histogram('Deliverieds|Messages received',@interval_count);
-#
+#
# Print a histogram of the messages delivered/received per time slot
# (hour by default).
#######################################################################
#######################################################################
# print_league_table();
-#
+#
# print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs);
-#
+#
# Given hashes of message count and message data, which are keyed by
# the table type (eg by the sending host), print a league table
# showing the top $topcount (defaults to 50).
print "<table border=0 width=\"100%\">\n";
print "<tr><td>\n";
print "<table border=1>\n";
- print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
+ print "<tr><th>Messages</th><th>Bytes</th><th>Average</th><th>\u$text</th>\n";
# Align non-local addresses to the right (so all the .com's line up).
# Local addresses are aligned on the left as they are userids.
my $align = ($text !~ /local/i) ? 'right' : 'left';
- $format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
+ $format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
}
else {
printf("%s\n%s\n\n", $temp, "-" x length($temp));
$format = "%7d %10s %s\n";
}
-my($key,$htmlkey);
+my($key,$htmlkey,$rounded_volume,$rounded_average,$count,$data,$gigs);
foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
if ($html) {
$htmlkey = $key;
$htmlkey =~ s/>/\>\;/g;
$htmlkey =~ s/</\<\;/g;
- printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
+
+ # When displaying the average figures, we calculate the average of
+ # the rounded data, as the user would calculate it. This reduces
+ # the accuracy slightly, but we have to do it this way otherwise
+ # when using -merge to convert results from text to HTML and
+ # vice-versa discrepencies would occur.
+ $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
+ $data = $gigs = 0;
+ un_round($rounded_volume,\$data,\$gigs);
+ $count = $$m_count{$key};
+ $rounded_average = volume_rounded($data/$count,$gigs/$count);
+ printf($format, $count, $rounded_volume, $rounded_average, $htmlkey);
}
else {
printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
print "<table border=0 width=\"100%\">\n";
print "<tr><td>\n";
print "<table border=1>\n";
- print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
+ print "<tr><th>Messages</th><th>Bytes</th><th>Average</th><th>\u$text</th>\n";
}
else {
printf("%s\n%s\n\n", $temp, "-" x length($temp));
@chartdatanames = ();
@chartdatavals = ();
$chartotherval = 0;
+my $use_gig = 0;
foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
+ # The largest volume will be the first (top of the list).
+ # If it has at least 1 gig, then just use gigabytes to avoid
+ # risking an integer overflow when generating the pie charts.
+ if ($$m_data_gigs{$key}) {
+ $use_gig = 1;
+ }
if ($html) {
$htmlkey = $key;
$htmlkey =~ s/>/\>\;/g;
$htmlkey =~ s/</\<\;/g;
- printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
+
+ $rounded_volume = volume_rounded($$m_data{$key},$$m_data_gigs{$key});
+ $data = $gigs = 0;
+ un_round($rounded_volume,\$data,\$gigs);
+ $count = $$m_count{$key};
+ $rounded_average = volume_rounded($data/$count,$gigs/$count);
+ printf($format, $count, $rounded_volume, $rounded_average, $htmlkey);
}
else {
printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
}
- if (scalar @chartdatanames < $ntopchart)
- {
- push(@chartdatanames, $key);
- push(@chartdatavals, $$m_count{$key});
+ if (scalar @chartdatanames < $ntopchart) {
+ if ($use_gig) {
+ if ($$m_data_gigs{$key}) {
+ push(@chartdatanames, $key);
+ push(@chartdatavals, $$m_data_gigs{$key});
+ }
}
- else
- {
- $chartotherval += $$m_count{$key};
+ else {
+ push(@chartdatanames, $key);
+ push(@chartdatavals, $$m_data{$key});
}
}
+ else {
+ $chartotherval += ($use_gig) ? $$m_data_gigs{$key} : $$m_data{$key};
+ }
+}
push(@chartdatanames, "Other");
push(@chartdatavals, $chartotherval);
my $graph = GD::Graph::pie->new(300, 300);
$graph->set(
x_label => 'Name',
- y_label => 'Volume',
+ y_label => 'Volume' ,
title => 'By Volume',
);
my $gd = $graph->plot(\@data) or warn($graph->error);
#######################################################################
# top_n_sort();
-#
+#
# @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
-#
+#
# Given a hash which has numerical values, return the sorted $n keys which
# point to the top values. The second and third hashes are used as
# tiebreakers. They all must have the same keys.
-#
+#
# The idea behind this routine is that when you only want to see the
# top n members of a set, rather than sorting the entire set and then
# plucking off the top n, sort through the stack as you go, discarding
# any member which is lower than your current n'th highest member.
-#
+#
# This proves to be an order of magnitude faster for large hashes.
# On 200,000 lines of mainlog it benchmarked 9 times faster.
# On 700,000 lines of mainlog it benchmarked 13.8 times faster.
-#
+#
# We assume the values are > 0.
#######################################################################
sub top_n_sort {
my $n_minus_1 = $n - 1;
my $n_minus_2 = $n - 2;
- # Pick out the top $n keys.
+ # Pick out the top $n keys.
my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
while (($key,$value1) = each %$href1) {
#print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
-
+
# Check to see that the new value is bigger than the lowest of the
# top n keys that we're keeping.
- $comparison = $value1 <=> $minimum_value1 ||
+ $comparison = $value1 <=> $minimum_value1 ||
$href2->{$key} <=> $minimum_value2 ||
$href3->{$key} <=> $minimum_value3 ||
$top_n_key cmp $key;
for ($i = 0; $i < $n_minus_1; $i++) {
$top_n_key = $top_n_keys[$i];
if ( ($top_n_key eq '_') ||
- ( ($value1 <=> $href1->{$top_n_key} ||
+ ( ($value1 <=> $href1->{$top_n_key} ||
$value2 <=> $href2->{$top_n_key} ||
$value3 <=> $href3->{$top_n_key} ||
$top_n_key cmp $key) == 1
#######################################################################
# html_header();
-#
+#
# $header = html_header($title);
-#
+#
# Print our HTML header and start the <body> block.
#######################################################################
sub html_header {
#######################################################################
# help();
-#
+#
# help();
-#
+#
# Display usage instructions and exit.
#######################################################################
sub help {
eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_rep.txt
eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
-Parses exim mainlog files and generates a statistical analysis of
-the messages processed. Valid options are:
+Parses exim mainlog or syslog files and generates a statistical analysis
+of the messages processed. Valid options are:
-h<number> histogram divisions per hour. The default is 1, and
0 suppresses histograms. Other valid values are:
#######################################################################
# generate_parser();
-#
+#
# $parser = generate_parser();
-#
+#
# This subroutine generates the parsing routine which will be
# used to parse the mainlog. We take the base operation, and remove bits not in use.
# This improves performance depending on what bits you take out or add.
-#
+#
# I've tested using study(), but this does not improve performance.
-#
+#
# We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
# or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
# the expression evaluates to true, then it is included/excluded accordingly.
my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
my($tod,$m_hour,$m_min,$id,$flag);
while (<$fh>) {
- next if length($_) < 38;
- # PH/FANF
- # next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/;
+ # Convert syslog lines to mainlog format.
+ if (! /^\\d{4}/) {
+ next unless s/^.*? exim\\b.*?: //;
+ }
+
+ next if length($_) < 38;
next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
($tod,$m_hour,$m_min) = ($1,$2,$3);
#######################################################################
# parse();
-#
+#
# parse($parser,\*FILEHANDLE);
-#
+#
# This subroutine accepts a parser and a filehandle from main and parses each
# line. We store the results into global variables.
#######################################################################
#######################################################################
# print_header();
-#
+#
# print_header();
-#
+#
# Print our headers and contents.
#######################################################################
sub print_header {
#######################################################################
# print_grandtotals();
-#
+#
# print_grandtotals();
-#
+#
# Print the grand totals.
#######################################################################
sub print_grandtotals {
#######################################################################
# print_user_patterns()
-#
+#
# print_user_patterns();
-#
+#
# Print the counts of user specified patterns.
#######################################################################
sub print_user_patterns {
#######################################################################
# print_transport();
-#
+#
# print_transport();
-#
+#
# Print totals by transport.
#######################################################################
sub print_transport {
#######################################################################
# print_relay();
-#
+#
# print_relay();
-#
+#
# Print our totals by relay.
#######################################################################
sub print_relay {
#######################################################################
# print_errors();
-#
+#
# print_errors();
-#
+#
# Print our errors. In HTML, we display them as a list rather than a table -
# Netscape doesn't like large tables!
#######################################################################
$text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
$total_errors += $errors_count{$key};
if ($html) {
-
+
#Translate HTML tag characters. Sergey Sholokh.
$text =~ s/\</\<\;/g;
$text =~ s/\>/\>\;/g;
#######################################################################
# parse_old_eximstat_reports();
-#
+#
# parse_old_eximstat_reports($fh);
-#
+#
# Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
-#
+#
# To test that the merging still works after changes, do something like the following.
# All the diffs should produce no output.
-#
+#
# options='-bydomain -byemail -byhost -byedomain'
# options="$options -pattern 'Completed Messages' /Completed/"
# options="$options -pattern 'Received Messages' /<=/"
-#
+#
# ./eximstats $options mainlog > mainlog.txt
# ./eximstats $options -merge mainlog.txt > mainlog.2.txt
# diff mainlog.txt mainlog.2.txt
-#
+#
# ./eximstats $options -html mainlog > mainlog.html
# ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
# diff mainlog.html mainlog.2.html
-#
+#
# ./eximstats $options -merge mainlog.html > mainlog.3.txt
# diff mainlog.txt mainlog.3.txt
-#
+#
# ./eximstats $options -merge -html mainlog.html > mainlog.3.html
# diff mainlog.html mainlog.3.html
-#
+#
# ./eximstats $options -nvr mainlog > mainlog.nvr.txt
# ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
# diff mainlog.txt mainlog.4.txt
-#
+#
# # double_mainlog.txt should have twice the values that mainlog.txt has.
# ./eximstats $options mainlog mainlog > double_mainlog.txt
#######################################################################
# Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
my(@fields);
while (<$fh>) {
- $_ = html2txt($_); #Convert general HTML markup to text.
+ $_ = html2txt($_); #Convert general HTML markup to text.
s/At least one addr//g; #Another part of the HTML output we don't want.
# TOTAL Volume Messages Hosts Domains Delayed Failed
while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
while (<$fh>) {
print STDERR "Parsing $_" if $debug;
- $_ = html2txt($_); #Convert general HTML markup to text.
+ $_ = html2txt($_); #Convert general HTML markup to text.
if (/^\s*(.*?)\s+(\d+)\s*$/) {
$report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
add_to_totals($report_totals{patterns}{$1},['Total'],$2);
while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
while (<$fh>) {
print STDERR "Parsing $_" if $debug;
- $_ = html2txt($_); #Convert general HTML markup to text.
+ $_ = html2txt($_); #Convert general HTML markup to text.
if (/(\S+)\s+(\d+\S*\s+\d+)/) {
$report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
my $bin_aref = ($1 eq 'all messages') ? \@queue_bin : \@remote_queue_bin;
my $reached_table = 0;
while (<$fh>) {
- $_ = html2txt($_); #Convert general HTML markup to text.
+ $_ = html2txt($_); #Convert general HTML markup to text.
$reached_table = 1 if (/^\s*Under/);
next unless $reached_table;
my $previous_seconds_on_queue = 0;
#-------------------------------------
#
# 48 1468KB local
+# Could also have average values for HTML output.
+# 48 1468KB 30KB local
+
my($category,$by_count_or_volume) = ($1,$2);
#As we show 2 views of each table (by count and by volume),
my $reached_table = 0;
while (<$fh>) {
- $_ = html2txt($_); #Convert general HTML markup to text.
+ $_ = html2txt($_); #Convert general HTML markup to text.
$reached_table = 1 if (/^\s*\d/);
next unless $reached_table;
+
+ # Remove optional 'average value' column.
+ s/^\s*(\d+)\s+(\S+)\s+(\d+(KB|MB|GB|\b)\s+)/$1 $2 /;
+
if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) {
my($count,$rounded_volume,$entry) = ($1,$2,$3);
#Note: $entry fields can be both null and can contain spaces.
next unless $reached_table;
s/^<li>(\d+) -/$1/; #Convert an HTML line to a text line.
- $_ = html2txt($_); #Convert general HTML markup to text.
+ $_ = html2txt($_); #Convert general HTML markup to text.
if (/\t\s*(.*)/) {
$error .= ' ' . $1; #Join a multiline error.
#######################################################################
# update_relayed();
-#
+#
# update_relayed($count,$sender,$recipient);
-#
+#
# Adds an entry into the %relayed hash. Currently only used when
# merging reports.
#######################################################################
#######################################################################
# add_to_totals();
-#
+#
# add_to_totals(\%totals,\@keys,$values);
-#
+#
# Given a line of space seperated values, add them into the provided hash using @keys
# as the hash keys.
-#
+#
# If the value contains a '%', then the value is set rather than added. Otherwise, we
# convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
#######################################################################
#######################################################################
# get_report_total();
-#
+#
# $total = get_report_total(\%hash,$key);
-#
+#
# If %hash contains values split into Units and Gigs, we calculate and return
-#
+#
# $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
#######################################################################
sub get_report_total {
#######################################################################
# html2txt();
-#
+#
# $text_line = html2txt($html_line);
-#
+#
# Convert a line from html to text. Currently we just convert HTML tags to spaces
# and convert >, <, and tags back.
#######################################################################
#######################################################################
# get_next_arg();
-#
+#
# $arg = get_next_arg();
-#
+#
# Because eximstats arguments are often passed as variables,
# we can't rely on shell parsing to deal with quotes. This
# subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
# starts with a quote (' or "), and doesn't end in one, then
# we append the next argument to it and shift again. We repeat
# until we've got all of the argument.
-#
+#
# This isn't perfect as all white space gets reduced to one space,
# but it's as good as we can get! If it's esential that spacing
# be preserved precisely, then you get that by not using shell
}
elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
- elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
+ elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 }
elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }