2 # $Cambridge: exim/src/src/eximstats.src,v 1.1 2004/10/07 10:39:01 ph10 Exp $
4 # Copyright (c) 2001 University of Cambridge.
5 # See the file NOTICE for conditions of use and distribution.
7 # Perl script to generate statistics from one or more Exim log files.
9 # Usage: eximstats [<options>] <log file> <log file> ...
11 # 1996-05-21: Ignore lines not starting with valid date/time, just in case
12 # these get into a log file.
13 # 1996-11-19: Add the -h option to control the size of the histogram,
14 # and optionally turn it off.
15 # Use some Perl 5 things; it should be everywhere by now.
16 # Add the Perl -w option and rewrite so no warnings are given.
17 # Add the -t option to control the length of the "top" listing.
18 # Add the -ne, -nt options to turn off errors and transport
20 # Add information about length of time on queue, and -q<list> to
21 # control the intervals and turn it off.
22 # Add count and percentage of delayed messages to the Received
24 # Show total number of errors.
25 # Add count and percentage of messages with errors to Received
27 # Add information about relaying and -nr to suppress it.
28 # 1997-02-03 Merged in some of the things Nigel Metheringham had done:
30 # Added received histogram as well as delivered
31 # Added local senders' league table
32 # Added local recipients' league table
33 # 1997-03-10 Fixed typo "destinationss"
34 # Allow for intermediate address between final and original
35 # when testing for relaying
36 # Give better message when no input
37 # 1997-04-24 Fixed bug in layout of error listing that was depending on
38 # text length (output line got repeated).
39 # 1997-05-06 Bug in option decoding when only one option.
40 # Overflow bug when handling very large volumes.
41 # 1997-10-28 Updated to handle revised log format that might show
42 # HELO name as well as host name before IP number
43 # 1998-01-26 Bugs in the function for calculating the number of seconds
44 # since 1970 from a log date
45 # 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log
46 # line; cope with this, thereby avoiding undefined problems
47 # Very short log line gave substring error
48 # 1998-02-03 A routed delivery to a local transport may not have <> in the
49 # log line; terminate the address at white space, not <
50 # 1998-09-07 If first line of input was a => line, $thissize was undefined;
52 # 1998-12-21 Adding of $thissize from => line should have been adding $size.
53 # Oops. Should have looked more closely when fixing the previous
55 # 1999-11-12 Increased the field widths for printed integers; numbers are
56 # bigger than originally envisaged.
57 # 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug
58 # whereby seconds($timestamp) - id_seconds($id) gave an
60 # Added POD documentation.
61 # Moved usage instructions into help() subroutine.
62 # Added 'use strict' and declared all global variables.
63 # Added '-html' flag and resultant code.
64 # Added '-cache' flag and resultant code.
65 # Added add_volume() routine and converted all volume variables
66 # to use it, fixing the overflow problems for individual hosts
68 # Converted all volume output to GB/MB/KB as appropriate.
69 # Don't store local user stats if -nfl is specified.
70 # Modifications done by: Steve Campbell (<steve@computurn.com>)
71 # 2001-04-02 Added the -t_remote_users flag. Steve Campbell.
72 # 2001-10-15 Added the -domain flag. Steve Campbell.
73 # 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell.
74 # 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
75 # We now generate our main parsing subroutine as an eval statement
76 # which improves performance dramatically when not all the results
77 # are required. We also cache the last timestamp to time convertion.
79 # NOTE: 'Top 50 destinations by (message count|volume)' lines are
80 # now 'Top N (host|email|domain) destinations by (message count|volume)'
81 # where N is the topcount. Steve Campbell.
83 # 2001-10-30 V1.16 Joachim Wieland.
84 # Fixed minor bugs in add_volume() when taking over this version
85 # for use in Exim 4: -w gave uninitialized value warnings in
86 # two situations: for the first addition to a counter, and if
87 # there were never any gigabytes, thereby leaving the $gigs
89 # Initialized $last_timestamp to stop a -w uninitialized warning.
90 # Minor layout tweak for grand totals (nitpicking).
91 # Put the IP addresses for relaying stats in [] and separated by
92 # a space from the domain name.
93 # Removed the IPv4-specific address test when picking out addresses
94 # for relaying. Anything inside [] is OK.
96 # 2002-07-02 Philip Hazel
97 # Fixed "uninitialized variable" message that occurred for relay
98 # messages that arrived from H=[1.2.3.4] hosts (no name shown).
99 # This bug didn't affect the output.
101 # 2002-04-15 V1.17 Joachim Wieland.
102 # Added -charts, -chartdir. -chartrel options which use
103 # GD::Graph modules to create graphical charts of the statistics.
105 # 2002-04-15 V1.18 Steve Campbell.
106 # Added a check for $domain to to stop a -w uninitialized warning.
107 # Added -byemaildomain option.
108 # Only print HTML header links to included tables!
110 # 2002-08-02 V1.19 Steve Campbell.
111 # Changed the debug mode to dump the parser onto STDERR rather
112 # than STDOUT. Documented the -d flag into the help().
113 # Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
115 # 2002-08-21 V1.20 Steve Campbell.
116 # Added the '-merge' option to allow merging of previous reports.
117 # Fixed a missing semicolon when doing -bydomain.
118 # Make volume charts plot the data gigs and bytes rather than just bytes.
119 # Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
120 # Converted Emaildomain to Edomain - the column header was too wide!
121 # This changes the text output slightly. You can revert to the old
122 # column widths by changing $COLUMN_WIDTHS to 7;
124 # 2002-09-04 V1.21 Andreas J Mueller
125 # Local deliveries domain now defaults to 'localdomain'.
126 # Don't match F=<From> when looking for the user.
128 # 2002-09-05 V1.22 Steve Campbell
129 # Fixed a perl 5.005 incompatibility problem ('our' variables).
131 # 2002-09-11 V1.23 Steve Campbell
132 # Stopped -charts option from throwing errors on null data.
133 # Don't print out 'Errors encountered' unless there are any.
135 # 2002-10-21 V1.23a Philip Hazel - patch from Tony Finch put in until
136 # Steve's eximstats catches up.
137 # Handle log files that include the timezone after the timestamp.
138 # Switch to assuming that log timestamps are in local time, with
139 # an option for UTC timestamps, as in Exim itself.
141 # 2003-02-05 V1.24 Steve Campbell
142 # Added in Sergey Sholokh's code to convert '<' and '>' characters
143 # in HTML output. Also added code to convert them back with -merge.
144 # Fixed timestamp offsets to convert to seconds rather than minutes.
145 # Updated -merge to work with output files using timezones.
146 # Added cacheing to speed up the calculation of timezone offsets.
148 # 2003-02-07 V1.25 Steve Campbell
149 # Optimised the usage of mktime() in the seconds subroutine.
150 # Removed the now redundant '-cache' option.
151 # html2txt() now explicitly matches HTML tags.
152 # Implemented a new sorting algorithm - the top_n_sort() routine.
153 # Added Danny Carroll's '-nvr' flag and code.
155 # 2003-03-13 V1.26 Steve Campbell
156 # Implemented HTML compliance changes recommended by Bernard Massot.
157 # Bug fix to allow top_n_sort() to handle null keys.
158 # Convert all domains and edomains to lowercase.
159 # Remove preceding dots from domains.
161 # 2003-03-13 V1.27 Steve Campbell
162 # Replaced border attributes with 'border=1', as recommended by
165 # 2003-06-03 V1.28 John Newman
166 # Added in the ability to skip over the parsing and evaulation of
167 # specific transports as passed to eximstats via the new "-nt/.../"
168 # command line argument. This new switch allows the viewing of
169 # not more accurate statistics but more applicable statistics when
170 # special transports are in use (ie; SpamAssassin). We need to be
171 # able to ignore transports such as this otherwise the resulting
172 # local deliveries are significantly skewed (doubled)...
174 # 2003-11-06 V1.29 Steve Campbell
175 # Added the '-pattern "Description" "/pattern/"' option.
177 # 2004-02-17 V1.30 Steve Campbell
178 # Added warnings if required GD::Graph modules are not available or
179 # insufficient -chart* options are specified.
181 # 2004-02-20 V1.31 Andrea Balzi
182 # Only show the Local Sender/Destination links if the tables exist.
188 eximstats - generates statistics from Exim mainlog files.
192 eximstats [Options] mainlog1 mainlog2 ... > report.txt
193 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
201 histogram divisions per hour. The default is 1, and
202 0 suppresses histograms. Valid values are:
204 0, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
208 Don't display error information.
212 Don't display relaying information.
214 =item B<-nr>I</pattern/>
216 Don't display relaying information that matches.
220 Don't display transport information.
222 =item B<-nt>I</pattern/>
224 Don't display transport information that matches
228 List of times for queuing information single 0 item suppresses.
232 Display top <number> sources/destinations
233 default is 50, 0 suppresses top listing.
237 Omit local sources/destinations in top listing.
239 =item B<-t_remote_users>
241 Include remote users in the top source/destination listings.
245 Show results by sending host. This may be combined with
246 B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
247 are specified, then B<-byhost> is assumed as a default.
251 Show results by sending domain.
252 May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
256 Show results by sender's email address.
257 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
259 =item B<-byemaildomain> or B<-byedomain>
261 Show results by sender's email domain.
262 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
264 =item B<-pattern> I<Description> I</Pattern/>
266 Look for the specified pattern and count the number of lines in which it appears.
267 This option can be specified multiple times. Eg:
269 -pattern 'Refused connections' '/refused connection/'
274 This option allows eximstats to merge old eximstat reports together. Eg:
276 eximstats mainlog.sun > report.sun.txt
277 eximstats mainlog.mon > report.mon.txt
278 eximstats mainlog.tue > report.tue.txt
279 eximstats mainlog.wed > report.web.txt
280 eximstats mainlog.thu > report.thu.txt
281 eximstats mainlog.fri > report.fri.txt
282 eximstats mainlog.sat > report.sat.txt
283 eximstats -merge report.*.txt > weekly_report.txt
284 eximstats -merge -html report.*.txt > weekly_report.html
290 You can merge text or html reports and output the results as text or html.
294 You can use all the normal eximstat output options, but only data
295 included in the original reports can be shown!
299 When merging reports, some loss of accuracy may occur in the top I<n> lists.
300 This will be towards the ends of the lists.
304 The order of items in the top I<n> lists may vary when the data volumes
305 round to the same value.
311 Output the results in HTML.
315 Create graphical charts to be displayed in HTML output.
317 This requires the following modules which can be obtained
318 from http://www.cpan.org/modules/01modules.index.html
330 To install these, download and unpack them, then use the normal perl installation procedure:
337 =item B<-chartdir>I <dir>
339 Create the charts in the directory <dir>
341 =item B<-chartrel>I <dir>
343 Specify the relative directory for the "img src=" tags from where to include
348 Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
349 easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
356 Eximstats parses exim mainlog files and outputs a statistical
357 analysis of the messages processed. By default, a text
358 analysis is generated, but you can request an html analysis
359 by using the B<-html> flag. See the help (B<-help>) to learn
360 about how to create charts from the tables.
364 There is a web site at http://www.exim.org - this contains details of the
365 mailing list exim-users@exim.org.
369 This program does not perfectly handle messages whose received
370 and delivered log lines are in different files, which can happen
371 when you have multiple mail servers and a message cannot be
372 immeadiately delivered. Fixing this could be tricky...
376 The following section will only be of interest to the
384 # use Time::Local; # PH/FANF
387 use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints);
388 eval { require GD::Graph::pie; };
389 $HAVE_GD_Graph_pie = $@ ? 0 : 1;
390 eval { require GD::Graph::linespoints; };
391 $HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
394 ##################################################
396 ##################################################
397 # 'use vars' instead of 'our' as perl5.005 is still in use out there!
398 use vars qw(@tab62 @days_per_month $gig);
399 use vars qw($VERSION);
400 use vars qw($COLUMN_WIDTHS);
404 (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
405 0,10,11,12,13,14,15,16,17,18,19,20, # A-K
406 21,22,23,24,25,26,27,28,29,30,31,32, # L-W
407 33,34,35, 0, 0, 0, 0, 0, # X-Z
408 0,36,37,38,39,40,41,42,43,44,45,46, # a-k
409 47,48,49,50,51,52,53,54,55,56,57,58, # l-w
412 @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
413 $gig = 1024 * 1024 * 1024;
416 # How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
419 # Declare global variables.
420 use vars qw($total_received_data $total_received_data_gigs $total_received_count);
421 use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_count);
422 use vars qw(%arrival_time %size %from_host %from_address);
423 use vars qw(%timestamp2time); #Hash of timestamp => time.
424 use vars qw($last_timestamp $last_time); #The last time convertion done.
425 use vars qw($last_date $date_seconds); #The last date convertion done.
426 use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
427 use vars qw($localtime_offset);
428 use vars qw($i); #General loop counter.
429 use vars qw($debug); #Debug mode?
430 use vars qw($ntopchart); #How many entries should make it into the chart?
431 use vars qw($gddirectory); #Where to put files from GD::Graph
435 # The following are parameters whose values are
436 # set by command line switches:
437 use vars qw($show_errors $show_relay $show_transport $transport_pattern);
438 use vars qw($topcount $local_league_table $include_remote_users);
439 use vars qw($hist_opt $hist_interval $hist_number $volume_rounding);
440 use vars qw($relay_pattern @queue_times $html @user_patterns @user_descriptions);
442 use vars qw(%do_sender); #Do sender by Host, Domain, Email, and/or Edomain tables.
443 use vars qw($charts $chartrel $chartdir $charts_option_specified);
444 use vars qw($merge_reports); #Merge old reports ?
446 # The following are modified in the parse() routine, and
447 # referred to in the print_*() routines.
448 use vars qw($queue_more_than $delayed_count $relayed_unshown $begin $end);
449 use vars qw(%received_count %received_data %received_data_gigs);
450 use vars qw(%delivered_count %delivered_data %delivered_data_gigs);
451 use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
452 use vars qw(%delivered_count_user %delivered_data_user %delivered_data_gigs_user);
453 use vars qw(%transported_count %transported_data %transported_data_gigs);
454 use vars qw(%remote_delivered %relayed %delayed %had_error %errors_count);
455 use vars qw(@queue_bin @remote_queue_bin @received_interval_count @delivered_interval_count);
456 use vars qw(@user_pattern_totals);
458 use vars qw(%report_totals);
463 ##################################################
465 ##################################################
468 =head2 volume_rounded();
470 $rounded_volume = volume_rounded($bytes,$gigabytes);
472 Given a data size in bytes, round it to KB, MB, or GB
475 Eg 12000 => 12KB, 15000000 => 14GB, etc.
477 Note: I've experimented with Math::BigInt and it results in a 33%
478 performance degredation as opposed to storing numbers split into
494 if ($volume_rounding) {
498 $rounded = sprintf("%6d", $x);
500 elsif ($x < 10000000) {
501 $rounded = sprintf("%4dKB", ($x + 512)/1024);
504 $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
507 # Values between 1GB and 10GB are printed in MB
509 $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
512 # Handle values over 10GB
513 $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
517 # We don't want any rounding to be done.
518 $rounded = sprintf("%4d", ($g * $gig) + $x);
527 un_round($rounded_volume,\$bytes,\$gigabytes);
529 Given a volume in KB, MB or GB, as generated by volume_rounded(),
530 do the reverse transformation and convert it back into Bytes and Gigabytes.
531 These are added to the $bytes and $gigabytes parameters.
533 Given a data size in bytes, round it to KB, MB, or GB
536 EG: 500 => (500,0), 14GB => (0,14), etc.
541 my($rounded,$bytes_sref,$gigabytes_sref) = @_;
543 if ($rounded =~ /(\d+)GB/) {
544 $$gigabytes_sref += $1;
546 elsif ($rounded =~ /(\d+)MB/) {
547 $$gigabytes_sref += $1 / 1024;
548 $$bytes_sref += (($1 % 1024 ) * 1024 * 1024);
550 elsif ($rounded =~ /(\d+)KB/) {
551 $$gigabytes_sref += $1 / (1024 * 1024);
552 $$bytes_sref += ($1 % (1024 * 1024) * 1024);
554 elsif ($rounded =~ /(\d+)/) {
555 $$gigabytes_sref += $1 / $gig;
556 $$bytes_sref += $1 % $gig;
559 #Now reduce the bytes down to less than 1GB.
560 add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
566 add_volume(\$bytes,\$gigs,$size);
568 Add $size to $bytes/$gigs where this is a number split into
569 bytes ($bytes) and gigabytes ($gigs). This is significantly
570 faster than using Math::BigInt.
575 my($bytes_ref,$gigs_ref,$size) = @_;
576 $$bytes_ref = 0 if ! defined $$bytes_ref;
577 $$gigs_ref = 0 if ! defined $$gigs_ref;
578 $$bytes_ref += $size;
579 while ($$bytes_ref > $gig)
587 =head2 format_time();
589 $formatted_time = format_time($seconds);
591 Given a time in seconds, break it down into
592 weeks, days, hours, minutes, and seconds.
609 $p .= "$w"."w" if $w > 0;
610 $p .= "$d"."d" if $d > 0;
611 $p .= "$h"."h" if $h > 0;
612 $p .= "$m"."m" if $m > 0;
613 $p .= "$s"."s" if $s > 0 || $p eq "";
618 =head2 unformat_time();
620 $seconds = unformat_time($formatted_time);
622 Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
629 my($formated_time) = pop @_;
632 while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
633 $time += $1 if ($2 eq '' || $2 eq 's');
634 $time += $1 * 60 if ($2 eq 'm');
635 $time += $1 * 60 * 60 if ($2 eq 'h');
636 $time += $1 * 60 * 60 * 24 if ($2 eq 'd');
637 $time += $1 * 60 * 60 * 24 * 7 if ($2 eq 'w');
645 $time = seconds($timestamp);
647 Given a time-of-day timestamp, convert it into a time() value using
648 POSIX::mktime. We expect the timestamp to be of the form
649 "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
650 and the year to be absolute (we do the necessary conversions). The
651 timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
652 offset is not present, and we have not been told that the log is in UTC
653 (with the -utc option), then we adjust the time by the current local
654 time offset so that it can be compared with the time recorded in message
657 To improve performance, we only use mktime on the date ($year-$mon-$day),
658 and only calculate it if the date is different to the previous time we
659 came here. We then add on seconds for the '$hour:$min:$sec'.
661 We also store the results of the last conversion done, and only
662 recalculate if the date is different.
664 We used to have the '-cache' flag which would store the results of the
665 mktime() call. However, the current way of just using mktime() on the
673 # Is the timestamp the same as the last one?
674 return $last_time if ($last_timestamp eq $timestamp);
676 return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
678 unless ($last_date eq $1) {
680 my(@timestamp) = (0,0,0,$4,$3,$2);
681 $timestamp[5] -= 1900;
683 $date_seconds = mktime(@timestamp);
685 my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
687 # SC. Use cacheing. Also note we want seconds not minutes.
688 #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
689 if (defined $8 && ($8 ne $last_offset)) {
691 $offset_seconds = ($10 * 60 + $11) * 60;
692 $offset_seconds = -$offset_seconds if ($9 eq '-');
697 #$time -= $this_offset;
698 $time -= $offset_seconds;
699 } elsif (defined $localtime_offset) {
700 $time -= $localtime_offset;
703 # Store the last timestamp received.
704 $last_timestamp = $timestamp;
713 $time = id_seconds($message_id);
715 Given a message ID, convert it into a time() value.
720 my($sub_id) = substr((pop @_), 0, 6);
722 my(@c) = split(//, $sub_id);
723 while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
729 =head2 calculate_localtime_offset();
731 $localtime_offset = calculate_localtime_offset();
733 Calculate the the localtime offset from gmtime in seconds.
735 $localtime = time() + $localtime_offset.
737 These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
738 (West is negative, East is positive.)
742 # $localtime = gmtime() + $localtime_offset. OLD COMMENT
743 # This subroutine commented out as it's not currently in use.
745 #sub calculate_localtime_offset {
746 # # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
747 # my (@sample_date) = (0,0,0,5,5,100);
748 # my $localtime = timelocal(@sample_date);
749 # my $gmtime = timegm(@sample_date);
750 # my $offset = $localtime - $gmtime;
754 sub calculate_localtime_offset {
755 # Assume that the offset at the moment is valid across the whole
756 # period covered by the logs that we're analysing. This may not
757 # be true around the time the clocks change in spring or autumn.
759 # mktime works on local time and gmtime works in UTC
760 my $local = mktime(gmtime($utc));
761 return $local - $utc;
765 =head2 print_queue_times();
767 $time = print_queue_times($message_type,\@queue_times,$queue_more_than);
769 Given the type of messages being output, the array of message queue times,
770 and the number of messages which exceeded the queue times, print out
775 sub print_queue_times {
777 my($string,$array,$queue_more_than) = @_;
782 my $cumulative_percent = 0;
783 #$queue_unknown += keys %arrival_time;
785 my $queue_total = $queue_more_than;
786 for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] }
788 my $temp = "Time spent on the queue: $string";
792 print "<hr><a name=\"$string time\"></a><h2>$temp</h2>\n";
793 print "<table border=0 width=\"100%\">\n";
795 print "<table border=1>\n";
796 print "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
797 $format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
801 printf("%s\n%s\n\n", $temp, "-" x length($temp));
802 $format = "%5s %4s %6d %5.1f%% %5.1f%%\n";
805 for ($i = 0; $i <= $#queue_times; $i++) {
808 my $percent = ($$array[$i] * 100)/$queue_total;
809 $cumulative_percent += $percent;
811 $printed_one? " " : "Under",
812 format_time($queue_times[$i]),
813 $$array[$i], $percent, $cumulative_percent);
814 if (!defined($queue_times[$i])) {
817 push(@chartdatanames,
818 ($printed_one? "" : "Under") . format_time($queue_times[$i]));
819 push(@chartdatavals, $$array[$i]);
824 if ($queue_more_than > 0) {
825 my $percent = ($queue_more_than * 100)/$queue_total;
826 $cumulative_percent += $percent;
829 format_time($queue_times[$#queue_times]),
830 $queue_more_than, $percent, $cumulative_percent);
832 push(@chartdatanames, "Over " . format_time($queue_times[$#queue_times]));
833 push(@chartdatavals, $queue_more_than);
835 #printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
840 if ($HAVE_GD_Graph_pie && $charts) {
845 my $graph = GD::Graph::pie->new(200, 200);
848 if ($string =~ /all/) { $pngname = "queue_all.png"; $title = "Queue (all)"; }
849 if ($string =~ /remote/) { $pngname = "queue_rem.png"; $title = "Queue (remote)"; }
853 my $gd = $graph->plot(\@data) or warn($graph->error);
855 open(IMG, ">$chartdir/$pngname") or die $!;
859 print "<img src=\"$chartrel/$pngname\">";
862 print "</td></tr></table>\n";
869 =head2 print_histogram();
871 print_histogram('Deliverieds|Messages received',@interval_count);
873 Print a histogram of the messages delivered/received per time slot
878 sub print_histogram {
880 my(@interval_count) = @_;
884 for ($i = 0; $i < $hist_number; $i++)
885 { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
887 my $scale = int(($maxd + 25)/50);
888 $scale = 1 if $scale == 0;
891 if ($text eq "Deliveries")
893 $type = ($scale == 1)? "delivery" : "deliveries";
897 $type = ($scale == 1)? "message" : "messages";
900 my($title) = sprintf("$text per %s (each dot is $scale $type)",
901 ($hist_interval == 60)? "hour" :
902 ($hist_interval == 1)? "minute" : "$hist_interval minutes");
905 print "<hr><a name=\"$text\"></a><h2>$title</h2>\n";
906 print "<table border=0 width=\"100%\">\n";
907 print "<tr><td><pre>\n";
910 printf("%s\n%s\n\n", $title, "-" x length($title));
915 for ($i = 0; $i < $hist_number; $i++)
917 my $c = $interval_count[$i];
919 # If the interval is an hour (the maximum) print the starting and
920 # ending hours as a label. Otherwise print the starting hour and
921 # minutes, which take up the same space.
926 $temp = sprintf("%02d-%02d", $hour, $hour + 1);
928 push(@chartdatanames, $temp);
934 { $temp = sprintf("%02d:%02d", $hour, $minutes) }
936 { $temp = sprintf(" :%02d", $minutes) }
938 push(@chartdatanames, $temp);
939 $minutes += $hist_interval;
946 push(@chartdatavals, $c);
947 printf(" %6d %s\n", $c, "." x ($c/$scale));
954 if ($HAVE_GD_Graph_linespoints && $charts) {
955 # calculate the graph
960 my $graph = GD::Graph::linespoints->new(300, 300);
965 x_labels_vertical => 1
968 if ($text =~ /Deliveries/) { $pngname = "histogram_del.png"; }
969 if ($text =~ /Messages/) { $pngname = "histogram_mes.png"; }
970 my $gd = $graph->plot(\@data) or warn($graph->error);
972 open(IMG, ">$chartdir/$pngname") or die $!;
976 print "<img src=\"$chartrel/$pngname\">";
979 print "</td></tr></table>\n";
985 =head2 print_league_table();
987 print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs);
989 Given hashes of message count and message data, which are keyed by
990 the table type (eg by the sending host), print a league table
991 showing the top $topcount (defaults to 50).
995 sub print_league_table {
996 my($text,$m_count,$m_data,$m_data_gigs) = @_;
997 my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
998 my($temp) = "Top $name by message count";
999 my(@chartdatanames) = ();
1000 my(@chartdatavals) = ();
1001 my $chartotherval = 0;
1005 print "<hr><a name=\"$text count\"></a><h2>$temp</h2>\n";
1006 print "<table border=0 width=\"100%\">\n";
1008 print "<table border=1>\n";
1009 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1011 # Align non-local addresses to the right (so all the .com's line up).
1012 # Local addresses are aligned on the left as they are userids.
1013 my $align = ($text !~ /local/i) ? 'right' : 'left';
1014 $format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
1017 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1018 $format = "%7d %10s %s\n";
1022 foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
1025 $htmlkey =~ s/>/\>\;/g;
1026 $htmlkey =~ s/</\<\;/g;
1027 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1030 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1032 if (scalar @chartdatanames < $ntopchart)
1034 push(@chartdatanames, $key);
1035 push(@chartdatavals, $$m_count{$key});
1039 $chartotherval += $$m_count{$key};
1042 push(@chartdatanames, "Other");
1043 push(@chartdatavals, $chartotherval);
1048 print "</td><td>\n";
1049 if ($HAVE_GD_Graph_pie && $charts)
1051 # calculate the graph
1056 my $graph = GD::Graph::pie->new(300, 300);
1059 y_label => 'Amount',
1060 title => 'By count',
1062 my $gd = $graph->plot(\@data) or warn($graph->error);
1066 open(IMG, ">$chartdir/${temp}_count.png") or die $!;
1070 print "<img src=\"$chartrel/${temp}_count.png\">";
1073 print "</td><td>\n";
1074 print "</td></tr></table>\n";
1078 $temp = "Top $name by volume";
1080 print "<hr><a name=\"$text volume\"></a><h2>$temp</h2>\n";
1081 print "<table border=0 width=\"100%\">\n";
1083 print "<table border=1>\n";
1084 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1087 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1090 @chartdatanames = ();
1091 @chartdatavals = ();
1093 foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
1096 $htmlkey =~ s/>/\>\;/g;
1097 $htmlkey =~ s/</\<\;/g;
1098 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1101 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1104 if (scalar @chartdatanames < $ntopchart)
1106 push(@chartdatanames, $key);
1107 push(@chartdatavals, $$m_count{$key});
1111 $chartotherval += $$m_count{$key};
1114 push(@chartdatanames, "Other");
1115 push(@chartdatavals, $chartotherval);
1119 print "</td><td>\n";
1120 if ($HAVE_GD_Graph_pie && $charts) {
1121 # calculate the graph
1126 my $graph = GD::Graph::pie->new(300, 300);
1129 y_label => 'Volume',
1130 title => 'By Volume',
1132 my $gd = $graph->plot(\@data) or warn($graph->error);
1136 open(IMG, ">$chartdir/${temp}_volume.png") or die $!;
1140 print "<img src=\"$chartrel/${temp}_volume.png\">";
1143 print "</td><td>\n";
1144 print "</td></tr></table>\n";
1151 =head2 top_n_sort();
1153 @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
1155 Given a hash which has numerical values, return the sorted $n keys which
1156 point to the top values. The second and third hashes are used as
1157 tiebreakers. They all must have the same keys.
1159 The idea behind this routine is that when you only want to see the
1160 top n members of a set, rather than sorting the entire set and then
1161 plucking off the top n, sort through the stack as you go, discarding
1162 any member which is lower than your current n'th highest member.
1164 This proves to be an order of magnitude faster for large hashes.
1165 On 200,000 lines of mainlog it benchmarked 9 times faster.
1166 On 700,000 lines of mainlog it benchmarked 13.8 times faster.
1168 We assume the values are > 0.
1173 my($n,$href1,$href2,$href3) = @_;
1175 # PH's original sort was:
1177 # foreach $key (sort
1179 # $$m_count{$b} <=> $$m_count{$a} ||
1180 # $$m_data_gigs{$b} <=> $$m_data_gigs{$a} ||
1181 # $$m_data{$b} <=> $$m_data{$a} ||
1187 #We use a key of '_' to represent non-existant values, as null keys are valid.
1188 #'_' is not a valid domain, edomain, host, or email.
1189 my(@top_n_keys) = ('_') x $n;
1190 my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1192 my $n_minus_1 = $n - 1;
1193 my $n_minus_2 = $n - 2;
1195 # Pick out the top $n keys.
1196 my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1197 while (($key,$value1) = each %$href1) {
1199 #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
1201 # Check to see that the new value is bigger than the lowest of the
1202 # top n keys that we're keeping.
1203 $comparison = $value1 <=> $minimum_value1 ||
1204 $href2->{$key} <=> $minimum_value2 ||
1205 $href3->{$key} <=> $minimum_value3 ||
1206 $top_n_key cmp $key;
1207 next unless ($comparison == 1);
1209 # As we will be using these values a few times, extract them into scalars.
1210 $value2 = $href2->{$key};
1211 $value3 = $href3->{$key};
1213 # This key is bigger than the bottom n key, so the lowest position we
1214 # will insert it into is $n minus 1 (the bottom of the list).
1215 $insert_position = $n_minus_1;
1217 # Now go through the list, stopping when we find a key that we're
1218 # bigger than, or we come to the penultimate position - we've
1219 # already tested bigger than the last.
1221 # Note: we go top down as the list starts off empty.
1222 # Note: stepping through the list in this way benchmarks nearly
1223 # three times faster than doing a sort() on the reduced list.
1224 # I assume this is because the list is already in order, and
1225 # we get a performance boost from not having to do hash lookups
1227 for ($i = 0; $i < $n_minus_1; $i++) {
1228 $top_n_key = $top_n_keys[$i];
1229 if ( ($top_n_key eq '_') ||
1230 ( ($value1 <=> $href1->{$top_n_key} ||
1231 $value2 <=> $href2->{$top_n_key} ||
1232 $value3 <=> $href3->{$top_n_key} ||
1233 $top_n_key cmp $key) == 1
1236 $insert_position = $i;
1241 # Remove the last element, then insert the new one.
1242 $#top_n_keys = $n_minus_2;
1243 splice(@top_n_keys,$insert_position,0,$key);
1245 # Extract our new minimum values.
1246 $top_n_key = $top_n_keys[$n_minus_1];
1247 if ($top_n_key ne '_') {
1248 $minimum_value1 = $href1->{$top_n_key};
1249 $minimum_value2 = $href2->{$top_n_key};
1250 $minimum_value3 = $href3->{$top_n_key};
1254 # Return the top n list, grepping out non-existant values, just in case
1255 # we didn't have that many values.
1256 return(grep(!/^_$/,@top_n_keys));
1260 =head2 html_header();
1262 $header = html_header($title);
1264 Print our HTML header and start the <body> block.
1270 my $text = << "EoText";
1271 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1274 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1275 <title>$title</title>
1277 <body bgcolor="white">
1289 Display usage instructions and exit.
1296 eximstats Version $VERSION
1298 Usage: eximstats [Options] mainlog1 mainlog2 ... > report.txt
1299 eximstats -html [Options] mainlog1 mainlog2 ... > report.html
1300 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_rep.txt
1301 eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1303 Parses exim mainlog files and generates a statistical analysis of
1304 the messages processed. Valid options are:
1306 -h<number> histogram divisions per hour. The default is 1, and
1307 0 suppresses histograms. Other valid values are:
1308 2, 3, 5, 10, 15, 20, 30 or 60.
1309 -ne don't display error information
1310 -nr don't display relaying information
1311 -nr/pattern/ don't display relaying information that matches
1312 -nt don't display transport information
1313 -nt/pattern/ don't display transport information that matches
1314 -nvr don't do volume rounding. Display in bytes, not KB/MB/GB.
1315 -q<list> list of times for queuing information
1316 single 0 item suppresses
1317 -t<number> display top <number> sources/destinations
1318 default is 50, 0 suppresses top listing
1319 -tnl omit local sources/destinations in top listing
1320 -t_remote_users show top user sources/destinations from non-local domains
1322 -byhost show results by sending host (default unless bydomain or
1323 byemail is specified)
1324 -bydomain show results by sending domain.
1325 -byemail show results by sender's email address
1326 -byedomain show results by sender's email domain
1328 -pattern "Description" /pattern/
1329 Count lines matching specified patterns and show them in
1330 the results. It can be specified multiple times. Eg:
1331 -pattern 'Refused connections' '/refused connection/'
1333 -merge merge previously generated reports into a new report
1335 -html output the results in HTML
1336 -charts Create charts (this requires the GD::Graph modules)
1337 -chartdir <dir> Create the charts' png files in the directory <dir>
1338 -chartrel <dir> Specify the relative directory for the "img src=" tags
1339 from where to include the charts in the html file
1340 -chartdir and -chartrel default to '.'
1342 -d Debug mode - dump the eval'ed parser onto STDERR.
1351 =head2 generate_parser();
1353 $parser = generate_parser();
1355 This subroutine generates the parsing routine which will be
1356 used to parse the mainlog. We take the base operation, and remove bits not in use.
1357 This improves performance depending on what bits you take out or add.
1359 I've tested using study(), but this does not improve performance.
1361 We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1362 or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1363 the expression evaluates to true, then it is included/excluded accordingly.
1367 sub generate_parser {
1369 my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
1370 my($tod,$m_hour,$m_min,$id,$flag);
1372 next if length($_) < 38;
1375 # next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/;
1376 next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
1378 ($tod,$m_hour,$m_min) = ($1,$2,$3);
1381 my($extra) = defined($4)? 6 : 0;
1382 $id = substr($_, 20 + $extra, 16);
1383 $flag = substr($_, 37 + $extra, 2);
1386 # Watch for user specified patterns.
1387 my $user_pattern_index = 0;
1388 foreach (@user_patterns) {
1389 $user_pattern_totals[$user_pattern_index] = 0;
1390 $parser .= " \$user_pattern_totals[$user_pattern_index]++ if $_;\n";
1391 $user_pattern_index++;
1395 next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co/);
1397 #Strip away the timestamp, ID and flag (which could be "Com" for completed)
1398 #This speeds up the later pattern matches.
1399 # $_ = substr($_, 40);
1401 $_ = substr($_, 40 + $extra); # PH
1403 # JN - Skip over certain transports as specified via the "-nt/.../" command
1404 # line switch (where ... is a perl style regular expression). This is
1405 # required so that transports that skew stats such as SpamAssassin can be
1407 #IFDEF ($transport_pattern)
1408 if (/\\sT=(\\S+)/) {
1409 next if ($1 =~ /$transport_pattern/o) ;
1411 #ENDIF ($transport_pattern)
1414 $host = "local"; #Host is local unless otherwise specified.
1415 $domain = "localdomain"; #Domain is localdomain unless otherwise specified.
1418 # Do some pattern matches to get the host and IP address.
1419 # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1420 # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1421 # We do 2 separate matches to keep the matches simple and fast.
1422 if (/\\sH=(\\S+)/) {
1425 ($ip) = /\\sH=.*?(\\s\\[[^]]+\\])/;
1426 # If there is only an IP address, it will be in $host and $ip will be
1427 # unset. That is OK, because we only use $ip in conjunction with $host
1428 # below. But make it empty to avoid warning messages.
1429 $ip = "" if !defined $ip;
1431 #IFDEF ($do_sender{Domain})
1432 if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
1433 # Remove the host portion from the DNS name. We ensure that we end up with
1434 # at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
1435 $domain = lc("$1.$2");
1436 $domain =~ s/^\\.//; #Remove preceding dot.
1438 #ENDIF ($do_sender{Domain})
1442 #IFDEF ($do_sender{Email})
1443 $email = (/^(\S+)/) ? $1 : "";
1444 #ENDIF ($do_sender{Email})
1446 #IFDEF ($do_sender{Edomain})
1447 $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
1448 #ENDIF ($do_sender{Edomain})
1450 if ($tod lt $begin) {
1453 elsif ($tod gt $end) {
1458 if ($flag eq "<=") {
1459 $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
1460 $size{$id} = $thissize;
1462 #IFDEF ($show_relay)
1463 if ($host ne "local") {
1464 # Save incoming information in case it becomes interesting
1465 # later, when delivery lines are read.
1466 my($from) = /^(\\S+)/;
1467 $from_host{$id} = "$host$ip";
1468 $from_address{$id} = $from;
1470 #ENDIF ($show_relay)
1472 #IFDEF ($local_league_table || $include_remote_users)
1476 #IFDEF ($local_league_table && $include_remote_users)
1477 { #Store both local and remote users.
1478 #ENDIF ($local_league_table && $include_remote_users)
1480 #IFDEF ($local_league_table && ! $include_remote_users)
1481 if ($host eq "local") { #Store local users only.
1482 #ENDIF ($local_league_table && ! $include_remote_users)
1484 #IFDEF ($include_remote_users && ! $local_league_table)
1485 if ($host ne "local") { #Store remote users only.
1486 #ENDIF ($include_remote_users && ! $local_league_table)
1488 $received_count_user{$user}++;
1489 add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
1492 #ENDIF ($local_league_table || $include_remote_users)
1494 #IFDEF ($do_sender{Host})
1495 $received_count{Host}{$host}++;
1496 add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
1497 #ENDIF ($do_sender{Host})
1499 #IFDEF ($do_sender{Domain})
1501 $received_count{Domain}{$domain}++;
1502 add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
1504 #ENDIF ($do_sender{Domain})
1506 #IFDEF ($do_sender{Email})
1507 $received_count{Email}{$email}++;
1508 add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
1509 #ENDIF ($do_sender{Email})
1511 #IFDEF ($do_sender{Edomain})
1512 $received_count{Edomain}{$edomain}++;
1513 add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
1514 #ENDIF ($do_sender{Edomain})
1516 $total_received_count++;
1517 add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
1519 #IFDEF ($#queue_times >= 0)
1520 $arrival_time{$id} = $tod;
1521 #ENDIF ($#queue_times >= 0)
1523 #IFDEF ($hist_opt > 0)
1524 $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1525 #ENDIF ($hist_opt > 0)
1528 elsif ($flag eq "=>") {
1529 $size = $size{$id} || 0;
1530 if ($host ne "local") {
1531 $remote_delivered{$id} = 1;
1534 #IFDEF ($show_relay)
1535 # Determine relaying address if either only one address listed,
1536 # or two the same. If they are different, it implies a forwarding
1537 # or aliasing, which is not relaying. Note that for multi-aliased
1538 # addresses, there may be a further address between the first
1541 if (defined $from_host{$id}) {
1542 if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
1543 ($old,$new) = ($1,$2);
1549 if ("\\L$new" eq "\\L$old") {
1550 ($old) = /^(\\S+)/ if $old eq "";
1551 my $key = "H=\\L$from_host{$id}\\E A=\\L$from_address{$id}\\E => " .
1552 "H=\\L$host\\E$ip A=\\L$old\\E";
1553 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
1554 $relayed{$key} = 0 if !defined $relayed{$key};
1562 #ENDIF ($show_relay)
1566 #IFDEF ($local_league_table || $include_remote_users)
1567 #IFDEF ($local_league_table && $include_remote_users)
1568 { #Store both local and remote users.
1569 #ENDIF ($local_league_table && $include_remote_users)
1571 #IFDEF ($local_league_table && ! $include_remote_users)
1572 if ($host eq "local") { #Store local users only.
1573 #ENDIF ($local_league_table && ! $include_remote_users)
1575 #IFDEF ($include_remote_users && ! $local_league_table)
1576 if ($host ne "local") { #Store remote users only.
1577 #ENDIF ($include_remote_users && ! $local_league_table)
1579 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
1580 if ($user =~ /^[\\/|]/) {
1581 my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
1582 $user = "$user $parent" if defined $parent;
1584 $delivered_count_user{$user}++;
1585 add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
1588 #ENDIF ($local_league_table || $include_remote_users)
1590 #IFDEF ($do_sender{Host})
1591 $delivered_count{Host}{$host}++;
1592 add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
1593 #ENDIF ($do_sender{Host})
1594 #IFDEF ($do_sender{Domain})
1596 $delivered_count{Domain}{$domain}++;
1597 add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
1599 #ENDIF ($do_sender{Domain})
1600 #IFDEF ($do_sender{Email})
1601 $delivered_count{Email}{$email}++;
1602 add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
1603 #ENDIF ($do_sender{Email})
1604 #IFDEF ($do_sender{Edomain})
1605 $delivered_count{Edomain}{$edomain}++;
1606 add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
1607 #ENDIF ($do_sender{Edomain})
1609 $total_delivered_count++;
1610 add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
1612 #IFDEF ($show_transport)
1613 my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
1614 $transported_count{$transport}++;
1615 add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
1616 #ENDIF ($show_transport)
1618 #IFDEF ($hist_opt > 0)
1619 $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1620 #ENDIF ($hist_opt > 0)
1624 elsif ($flag eq "==" && defined($size{$id}) && !defined($delayed{$id})) {
1629 elsif ($flag eq "**") {
1630 $had_error{$id} = 1 if defined ($size{$id});
1632 #IFDEF ($show_errors)
1633 $errors_count{$_}++;
1634 #ENDIF ($show_errors)
1638 elsif ($flag eq "Co") {
1640 #IFDEF ($#queue_times >= 0)
1641 #Note: id_seconds() benchmarks as 42% slower than seconds() and computing
1642 #the time accounts for a significant portion of the run time.
1644 if (defined $arrival_time{$id}) {
1645 $queued = seconds($tod) - seconds($arrival_time{$id});
1646 delete($arrival_time{$id});
1649 $queued = seconds($tod) - id_seconds($id);
1652 for ($i = 0; $i <= $#queue_times; $i++) {
1653 if ($queued < $queue_times[$i]) {
1655 $remote_queue_bin[$i]++ if $remote_delivered{$id};
1659 $queue_more_than++ if $i > $#queue_times;
1660 #ENDIF ($#queue_times >= 0)
1662 #IFDEF ($show_relay)
1663 delete($from_host{$id});
1664 delete($from_address{$id});
1665 #ENDIF ($show_relay)
1670 # We now do a 'C preprocessor style operation on our parser
1671 # to remove bits not in use.
1672 my(%defines_in_operation,$removing_lines,$processed_parser);
1673 foreach (split (/\n/,$parser)) {
1674 if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) ||
1675 (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) {
1676 $defines_in_operation{$1} = 1;
1677 $removing_lines = 1;
1680 $processed_parser .= $_."\n" unless $removing_lines;
1682 if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
1683 delete $defines_in_operation{$1};
1684 unless (keys %defines_in_operation) {
1685 $removing_lines = 0;
1689 print STDERR "# START OF PARSER:\n$processed_parser\n# END OF PARSER\n\n" if $debug;
1691 return $processed_parser;
1698 parse($parser,\*FILEHANDLE);
1700 This subroutine accepts a parser and a filehandle from main and parses each
1701 line. We store the results into global variables.
1706 my($parser,$fh) = @_;
1708 if ($merge_reports) {
1709 parse_old_eximstat_reports($fh);
1720 =head2 print_header();
1724 Print our headers and contents.
1730 my $title = "Exim statistics from $begin to $end";
1733 print html_header($title);
1735 print "<li><a href=\"#grandtotal\">Grand total summary</a>\n";
1736 print "<li><a href=\"#patterns\">User Specified Patterns</a>\n" if @user_patterns;
1737 print "<li><a href=\"#transport\">Deliveries by Transport</a>\n" if $show_transport;
1739 print "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
1740 print "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
1742 if ($#queue_times >= 0) {
1743 print "<li><a href=\"#all messages time\">Time spent on the queue: all messages</a>\n";
1744 print "<li><a href=\"#messages with at least one remote delivery time\">Time spent on the queue: messages with at least one remote delivery</a>\n";
1746 print "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
1748 foreach ('Host','Domain','Email','Edomain') {
1749 next unless $do_sender{$_};
1750 print "<li><a href=\"#sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
1751 print "<li><a href=\"#sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
1753 if ($local_league_table || $include_remote_users) {
1754 print "<li><a href=\"#local sender count\">Top $topcount local senders by message count</a>\n";
1755 print "<li><a href=\"#local sender volume\">Top $topcount local senders by volume</a>\n";
1757 foreach ('Host','Domain','Email','Edomain') {
1758 next unless $do_sender{$_};
1759 print "<li><a href=\"#\l$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
1760 print "<li><a href=\"#\l$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
1762 if ($local_league_table || $include_remote_users) {
1763 print "<li><a href=\"#local destination count\">Top $topcount local destinations by message count</a>\n";
1764 print "<li><a href=\"#local destination volume\">Top $topcount local destinations by volume</a>\n";
1767 print "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
1768 print "</ul>\n<hr>\n";
1777 =head2 print_grandtotals();
1779 print_grandtotals();
1781 Print the grand totals.
1785 sub print_grandtotals {
1787 # Get the sender by headings and results. This is complicated as we can have
1788 # different numbers of columns.
1789 my($sender_txt_header,$sender_html_header,$sender_txt_format,$sender_html_format);
1790 my(@received_totals,@delivered_totals);
1791 foreach ('Host','Domain','Email','Edomain') {
1792 next unless $do_sender{$_};
1793 if ($merge_reports) {
1794 push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
1795 push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
1798 push(@received_totals,scalar(keys %{$received_data{$_}}));
1799 push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
1801 $sender_html_header .= "<th>${_}s</th>";
1802 $sender_txt_header .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
1803 $sender_html_format .= "<td align=\"right\">%d</td>";
1804 $sender_txt_format .= " " x ($COLUMN_WIDTHS - 5) . "%6d";
1807 my($format1,$format2);
1810 <a name="grandtotal"></a>
1811 <h2>Grand total summary</h2>
1813 <tr><th>TOTAL</th><th>Volume</th><th>Messages</th>$sender_html_header<th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>
1816 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td>$sender_html_format<td align=\"right\">%d</td>";
1817 $format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
1820 my $sender_spaces = " " x length($sender_txt_header);
1825 $sender_spaces At least one address
1826 TOTAL Volume Messages $sender_txt_header Delayed Failed
1828 $format1 = " %-16s %9s %6d $sender_txt_format";
1829 $format2 = " %6d %4.1f%% %6d %4.1f%%",
1832 my($volume,$failed_count);
1833 if ($merge_reports) {
1834 $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
1835 $total_received_count = get_report_total($report_totals{Received},'Messages');
1836 $failed_count = get_report_total($report_totals{Received},'Failed');
1837 $delayed_count = get_report_total($report_totals{Received},'Delayed');
1840 $volume = volume_rounded($total_received_data, $total_received_data_gigs);
1841 $failed_count = keys %had_error;
1846 printf("$format1$format2\n",'Received',$volume,$total_received_count,
1847 @received_totals,$delayed_count,
1848 ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
1850 ($total_received_count) ? ($failed_count*100/$total_received_count) : 0);
1853 if ($merge_reports) {
1854 $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
1855 $total_delivered_count = get_report_total($report_totals{Delivered},'Messages');
1858 $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
1860 printf("$format1\n\n",'Delivered',$volume,$total_delivered_count,@delivered_totals);
1861 print "</table>\n" if $html;
1865 =head2 print_user_patterns()
1867 print_user_patterns();
1869 Print the counts of user specified patterns.
1873 sub print_user_patterns {
1877 print "<hr><a name=\"patterns\"></a><h2>User Specified Patterns</h2>\n";
1878 print "<table border=0 width=\"100%\">\n";
1880 print "<table border=1>\n";
1881 print "<tr><th> </th><th>Total</th>\n";
1882 $format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
1885 print "User Specified Patterns\n";
1886 print "-----------------------";
1888 $format1 = " %-18s %6d";
1892 if ($merge_reports) {
1893 # We are getting our data from previous reports.
1894 foreach $key (@user_descriptions) {
1895 my $count = get_report_total($report_totals{patterns}{$key},'Total');
1896 printf("$format1\n",$key,$count);
1900 # We are getting our data from mainlog files.
1901 my $user_pattern_index = 0;
1902 foreach $key (@user_descriptions) {
1903 printf("$format1\n",$key,$user_pattern_totals[$user_pattern_index]);
1904 $user_pattern_index++;
1914 =head2 print_transport();
1918 Print totals by transport.
1922 sub print_transport {
1924 my(@chartdatanames);
1925 my(@chartdatavals_count);
1926 my(@chartdatavals_vol);
1927 no integer; #Lose this for charting the data.
1930 print "<hr><a name=\"transport\"></a><h2>Deliveries by Transport</h2>\n";
1931 print "<table border=0 width=\"100%\">\n";
1933 print "<table border=1>\n";
1934 print "<tr><th> </th><th>Volume</th><th>Messages</th>\n";
1935 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
1938 print "Deliveries by transport\n";
1939 print "-----------------------";
1940 print "\n Volume Messages\n";
1941 $format1 = " %-18s %6s %6d";
1945 if ($merge_reports) {
1946 # We are getting our data from previous reports.
1947 foreach $key (sort keys %{$report_totals{transport}}) {
1948 my $count = get_report_total($report_totals{transport}{$key},'Messages');
1949 printf("$format1\n",$key,
1950 volume_rounded($report_totals{transport}{$key}{Volume},$report_totals{transport}{$key}{'Volume-gigs'}),
1952 push(@chartdatanames, $key);
1953 push(@chartdatavals_count, $count);
1954 push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
1958 # We are getting our data from mainlog files.
1959 foreach $key (sort keys %transported_data) {
1960 printf("$format1\n",$key,
1961 volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
1962 $transported_count{$key});
1963 push(@chartdatanames, $key);
1964 push(@chartdatavals_count, $transported_count{$key});
1965 push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
1970 print "</td><td>\n";
1971 if ($HAVE_GD_Graph_pie && $charts)
1973 # calculate the graph
1976 \@chartdatavals_count
1978 my $graph = GD::Graph::pie->new(200, 200);
1980 x_label => 'Transport',
1981 y_label => 'Messages',
1982 title => 'By count',
1984 my $gd = $graph->plot(\@data) or warn($graph->error);
1986 open(IMG, ">$chartdir/transports_count.png") or die $!;
1990 print "<img src=\"$chartrel/transports_count.png\">";
1993 print "</td><td>\n";
1995 if ($HAVE_GD_Graph_pie && $charts) {
2000 my $graph = GD::Graph::pie->new(200, 200);
2002 title => 'By volume',
2004 my $gd = $graph->plot(\@data) or warn($graph->error);
2006 open(IMG, ">$chartdir/transports_vol.png") or die $!;
2010 print "<img src=\"$chartrel/transports_vol.png\">";
2013 print "</td></tr></table>\n";
2020 =head2 print_relay();
2024 Print our totals by relay.
2029 my $temp = "Relayed messages";
2030 print "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $html;
2031 if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
2037 print "<table border=1>\n";
2038 print "<tr><th>Count</th><th>From</th><th>To</th>\n";
2039 $format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
2042 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2043 $format = "%7d %s\n => %s\n";
2047 foreach $key (sort keys %relayed) {
2048 my $count = $relayed{$key};
2051 my($one,$two) = split(/=> /, $key);
2052 printf($format, $count, $one, $two);
2055 print "</table>\n<p>\n" if $html;
2056 print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n";
2059 print "No relayed messages\n";
2060 print "-------------------\n" unless $html;
2067 =head2 print_errors();
2071 Print our errors. In HTML, we display them as a list rather than a table -
2072 Netscape doesn't like large tables!
2077 my $total_errors = 0;
2079 if (scalar(keys %errors_count) != 0) {
2080 my $temp = "List of errors";
2083 print "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
2084 print "<ul><li><b>Count - Error</b>\n";
2085 $format = "<li>%d - %s\n";
2088 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2092 foreach $key (sort keys %errors_count) {
2095 $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
2096 $total_errors += $errors_count{$key};
2099 #Translate HTML tag characters. Sergey Sholokh.
2100 $text =~ s/\</\<\;/g;
2101 $text =~ s/\>/\>\;/g;
2103 printf($format,$errors_count{$key},$text);
2106 printf("%5d ", $errors_count{$key});
2107 while (length($text) > 65) {
2108 my($first,$rest) = $text =~ /(.{50}\S*)\s+(.+)/;
2110 printf("%s\n ", $first);
2113 printf("%s\n\n", $text);
2116 print "</ul>\n<p>\n" if $html;
2118 $temp = "Errors encountered: $total_errors";
2120 print "-" x length($temp),"\n" unless $html;
2126 =head2 parse_old_eximstat_reports();
2128 parse_old_eximstat_reports($fh);
2130 Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
2132 To test that the merging still works after changes, do something like the following.
2133 All the diffs should produce no output.
2135 options='-bydomain -byemail -byhost -byedomain'
2136 options="$options -pattern 'Completed Messages' /Completed/"
2137 options="$options -pattern 'Received Messages' /<=/"
2139 ./eximstats $options mainlog > mainlog.txt
2140 ./eximstats $options -merge mainlog.txt > mainlog.2.txt
2141 diff mainlog.txt mainlog.2.txt
2143 ./eximstats $options -html mainlog > mainlog.html
2144 ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
2145 diff mainlog.html mainlog.2.html
2147 ./eximstats $options -merge mainlog.html > mainlog.3.txt
2148 diff mainlog.txt mainlog.3.txt
2150 ./eximstats $options -merge -html mainlog.html > mainlog.3.html
2151 diff mainlog.html mainlog.3.html
2153 ./eximstats $options -nvr mainlog > mainlog.nvr.txt
2154 ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
2155 diff mainlog.txt mainlog.4.txt
2157 # double_mainlog.txt should have twice the values that mainlog.txt has.
2158 ./eximstats $options mainlog mainlog > double_mainlog.txt
2162 sub parse_old_eximstat_reports {
2165 my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
2168 if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
2169 $begin = $1 if ($1 lt $begin);
2170 $end = $3 if ($3 gt $end);
2172 elsif (/Grand total summary/) {
2173 # Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
2176 $_ = html2txt($_); #Convert general HTML markup to text.
2177 s/At least one addr//g; #Another part of the HTML output we don't want.
2179 # TOTAL Volume Messages Hosts Domains Delayed Failed
2180 # Received 26MB 237 177 23 8 3.4% 28 11.8%
2181 # Delivered 13MB 233 99 88
2182 if (/TOTAL\s+(.*?)\s*$/) {
2183 @fields = split(/\s+/,$1);
2184 #Delayed and Failed have two columns each, so add the extra field names in.
2185 splice(@fields,-1,1,'DelayedPercent','Failed','FailedPercent');
2187 elsif (/(Received|Delivered)\s+(.*?)\s*$/) {
2188 print STDERR "Parsing $_" if $debug;
2189 add_to_totals($report_totals{$1},\@fields,$2);
2191 last if (/Delivered/); #Last line of this section.
2195 elsif (/User Specified Patterns/i) {
2196 #User Specified Patterns
2197 #-----------------------
2201 while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
2203 print STDERR "Parsing $_" if $debug;
2204 $_ = html2txt($_); #Convert general HTML markup to text.
2205 if (/^\s*(.*?)\s+(\d+)\s*$/) {
2206 $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
2207 add_to_totals($report_totals{patterns}{$1},['Total'],$2);
2209 last if (/^\s*$/); #Finished if we have a blank line.
2213 elsif (/Deliveries by transport/i) {
2214 #Deliveries by transport
2215 #-----------------------
2217 # :blackhole: 70KB 51
2218 # address_pipe 655KB 1
2221 while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
2223 print STDERR "Parsing $_" if $debug;
2224 $_ = html2txt($_); #Convert general HTML markup to text.
2225 if (/(\S+)\s+(\d+\S*\s+\d+)/) {
2226 $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
2227 add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
2229 last if (/^\s*$/); #Finished if we have a blank line.
2232 elsif (/(Messages received|Deliveries) per/) {
2233 # Messages received per hour (each dot is 2 messages)
2234 #---------------------------------------------------
2236 #00-01 106 .....................................................
2237 #01-02 103 ...................................................
2239 # Set a pointer to the interval array so we can use the same code
2240 # block for both messages received and delivered.
2241 my $interval_aref = ($1 eq 'Deliveries') ? \@delivered_interval_count : \@received_interval_count;
2242 my $reached_table = 0;
2244 $reached_table = 1 if (/^00/);
2245 next unless $reached_table;
2246 print STDERR "Parsing $_" if $debug;
2247 if (/^(\d+):(\d+)\s+(\d+)/) { #hh:mm start time format ?
2248 $$interval_aref[($1*60 + $2)/$hist_interval] += $3;
2250 elsif (/^(\d+)-(\d+)\s+(\d+)/) { #hh-hh start-end time format ?
2251 $$interval_aref[($1*60)/$hist_interval] += $3;
2253 else { #Finished the table ?
2259 elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
2260 #Time spent on the queue: all messages
2261 #-------------------------------------
2263 #Under 1m 217 91.9% 91.9%
2269 # Set a pointer to the queue bin so we can use the same code
2270 # block for both all messages and remote deliveries.
2271 my $bin_aref = ($1 eq 'all messages') ? \@queue_bin : \@remote_queue_bin;
2272 my $reached_table = 0;
2274 $_ = html2txt($_); #Convert general HTML markup to text.
2275 $reached_table = 1 if (/^\s*Under/);
2276 next unless $reached_table;
2277 my $previous_seconds_on_queue = 0;
2278 if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
2279 print STDERR "Parsing $_" if $debug;
2280 my($modifier,$formated_time,$count) = ($1,$2,$3);
2281 my $seconds = unformat_time($formated_time);
2282 my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
2283 $previous_seconds_on_queue = $seconds;
2284 $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
2286 for ($i = 0; $i <= $#queue_times; $i++) {
2287 if ($time_on_queue < $queue_times[$i]) {
2288 $$bin_aref[$i] += $count;
2292 # There's only one counter for messages going over the queue
2293 # times so make sure we only count it once.
2294 $queue_more_than += $count if (($bin_aref == \@queue_bin) && ($i > $#queue_times));
2297 last; #Finished the table ?
2302 elsif (/Relayed messages/) {
2306 # 1 addr.domain.com [1.2.3.4] a.user@domain.com
2307 # => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
2309 #<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
2311 my $reached_table = 0;
2314 unless ($reached_table) {
2315 last if (/No relayed messages/);
2316 $reached_table = 1 if (/^\s*\d/ || />\d+</);
2317 next unless $reached_table;
2319 if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
2320 update_relayed($1,$2,$3);
2322 elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
2323 ($count,$sender) = ($1,$2);
2325 elsif (/=>\s+(.*?)\s*$/) {
2326 update_relayed($count,$sender,$1);
2329 last; #Finished the table ?
2334 elsif (/Top (.*?) by (message count|volume)/) {
2335 #Top 50 sending hosts by message count
2336 #-------------------------------------
2339 my($category,$by_count_or_volume) = ($1,$2);
2341 #As we show 2 views of each table (by count and by volume),
2342 #most (but not all) entries will appear in both tables.
2343 #Set up a hash to record which entries we have already seen
2344 #and one to record which ones we are seeing for the first time.
2345 if ($by_count_or_volume =~ /count/) {
2346 undef %league_table_value_entered;
2347 undef %league_table_value_was_zero;
2351 #As this section processes multiple different table categories,
2352 #set up pointers to the hashes to be updated.
2353 my($count_href,$data_href,$data_gigs_href);
2354 if ($category =~ /local sender/) {
2355 $count_href = \%received_count_user;
2356 $data_href = \%received_data_user;
2357 $data_gigs_href = \%received_data_gigs_user;
2359 elsif ($category =~ /sending (\S+?)s?\b/) {
2360 #Top 50 sending (host|domain|email|edomain)s
2361 #Top sending (host|domain|email|edomain)
2362 $count_href = \%{$received_count{"\u$1"}};
2363 $data_href = \%{$received_data{"\u$1"}};
2364 $data_gigs_href = \%{$received_data_gigs{"\u$1"}};
2366 elsif ($category =~ /local destination/) {
2367 $count_href = \%delivered_count_user;
2368 $data_href = \%delivered_data_user;
2369 $data_gigs_href = \%delivered_data_gigs_user;
2371 elsif ($category =~ /(\S+) destination/) {
2372 #Top 50 (host|domain|email|edomain) destinations
2373 #Top (host|domain|email|edomain) destination
2374 $count_href = \%{$delivered_count{"\u$1"}};
2375 $data_href = \%{$delivered_data{"\u$1"}};
2376 $data_gigs_href = \%{$delivered_data_gigs{"\u$1"}};
2379 my $reached_table = 0;
2381 $_ = html2txt($_); #Convert general HTML markup to text.
2382 $reached_table = 1 if (/^\s*\d/);
2383 next unless $reached_table;
2384 if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) {
2385 my($count,$rounded_volume,$entry) = ($1,$2,$3);
2386 #Note: $entry fields can be both null and can contain spaces.
2388 #Add the entry into the %table_order hash if it has a rounded volume (KB/MB/GB).
2389 push(@{$table_order{$rounded_volume}{$by_count_or_volume}},$entry) if ($rounded_volume =~ /\D/);
2391 unless ($league_table_value_entered{$entry}) {
2392 $league_table_value_entered{$entry} = 1;
2393 unless ($$count_href{$entry}) {
2394 $$count_href{$entry} = 0;
2395 $$data_href{$entry} = 0;
2396 $$data_gigs_href{$entry} = 0;
2397 $league_table_value_was_zero{$entry} = 1;
2400 $$count_href{$entry} += $count;
2401 #Add the rounded value to the data and data_gigs hashes.
2402 un_round($rounded_volume,\$$data_href{$entry},\$$data_gigs_href{$entry});
2403 print STDERR "$category by $by_count_or_volume: added $count,$rounded_volume to $entry\n" if $debug;
2406 else { #Finished the table ?
2407 if ($by_count_or_volume =~ /volume/) {
2408 #Add a few bytes to appropriate entries to preserve the order.
2410 my($rounded_volume);
2411 foreach $rounded_volume (keys %table_order) {
2412 #For each rounded volume, we want to create a list which has things
2413 #ordered from the volume table at the front, and additional things
2414 #from the count table ordered at the back.
2415 @{$table_order{$rounded_volume}{volume}} = () unless defined $table_order{$rounded_volume}{volume};
2416 @{$table_order{$rounded_volume}{'message count'}} = () unless defined $table_order{$rounded_volume}{'message count'};
2418 map {$mark{$_} = 1} @{$table_order{$rounded_volume}{volume}};
2419 @order = @{$table_order{$rounded_volume}{volume}};
2420 map {push(@order,$_)} grep(!$mark{$_},@{$table_order{$rounded_volume}{'message count'}});
2422 my $bonus_bytes = $#order;
2423 $bonus_bytes = 511 if ($bonus_bytes > 511); #Don't go over the half-K boundary!
2424 while (@order and ($bonus_bytes > 0)) {
2425 my $entry = shift(@order);
2426 if ($league_table_value_was_zero{$entry}) {
2427 $$data_href{$entry} += $bonus_bytes;
2428 print STDERR "$category by $by_count_or_volume: added $bonus_bytes bonus bytes to $entry\n" if $debug;
2439 elsif (/List of errors/) {
2443 # 1 07904931641@one2one.net R=external T=smtp: SMTP error
2444 # from remote mailer after RCPT TO:<07904931641@one2one.net>:
2445 # host mail.one2one.net [193.133.192.24]: 550 User unknown
2447 #<li>1 - ally.dufc@dunbar.org.uk R=external T=smtp: SMTP error from remote mailer after RCPT TO:<ally.dufc@dunbar.org.uk>: host mail.dunbar.org.uk [216.167.89.88]: 550 Unknown local part ally.dufc in <ally.dufc@dunbar.org.uk>
2450 my $reached_table = 0;
2451 my($count,$error,$blanks);
2453 $reached_table = 1 if (/^( *|<li>)(\d+)/);
2454 next unless $reached_table;
2456 s/^<li>(\d+) -/$1/; #Convert an HTML line to a text line.
2457 $_ = html2txt($_); #Convert general HTML markup to text.
2460 $error .= ' ' . $1; #Join a multiline error.
2462 elsif (/^\s*(\d+)\s+(.*)/) {
2464 #Finished with a previous multiline error so save it.
2465 $errors_count{$error} = 0 unless $errors_count{$error};
2466 $errors_count{$error} += $count;
2468 ($count,$error) = ($1,$2);
2470 elsif (/Errors encountered/) {
2472 #Finished the section, so save our stored last error.
2473 $errors_count{$error} = 0 unless $errors_count{$error};
2474 $errors_count{$error} += $count;
2486 =head2 update_relayed();
2488 update_relayed($count,$sender,$recipient);
2490 Adds an entry into the %relayed hash. Currently only used when
2495 sub update_relayed {
2496 my($count,$sender,$recipient) = @_;
2498 #When generating the key, put in the 'H=' and 'A=' which can be used
2500 my $key = "H=$sender => H=$recipient";
2501 $key =~ s/ ([^=\s]+\@\S+|<>)/ A=$1/g;
2502 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
2503 $relayed{$key} = 0 if !defined $relayed{$key};
2504 $relayed{$key} += $count;
2507 $relayed_unshown += $count;
2512 =head2 add_to_totals();
2514 add_to_totals(\%totals,\@keys,$values);
2516 Given a line of space seperated values, add them into the provided hash using @keys
2519 If the value contains a '%', then the value is set rather than added. Otherwise, we
2520 convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
2525 my($totals_href,$keys_aref,$values) = @_;
2526 my(@values) = split(/\s+/,$values);
2527 my(@keys) = @$keys_aref; #Make a copy as we destroy the one we use.
2529 foreach $value (@values) {
2530 my $key = shift(@keys) or next;
2531 if ($value =~ /%/) {
2532 $$totals_href{$key} = $value;
2535 $$totals_href{$key} = 0 unless ($$totals_href{$key});
2536 $$totals_href{"$key-gigs"} = 0 unless ($$totals_href{"$key-gigs"});
2537 un_round($value, \$$totals_href{$key}, \$$totals_href{"$key-gigs"});
2538 print STDERR "Added $value to $key - $$totals_href{$key} , " . $$totals_href{"$key-gigs"} . "GB.\n" if $debug;
2543 =head2 get_report_total();
2545 $total = get_report_total(\%hash,$key);
2547 If %hash contains values split into Units and Gigs, we calculate and return
2549 $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
2553 sub get_report_total {
2555 my($hash_ref,$key) = @_;
2556 if ($$hash_ref{"${key}-gigs"}) {
2557 return $$hash_ref{$key} + $gig * $$hash_ref{"${key}-gigs"};
2559 return $$hash_ref{$key} || 0;
2564 $text_line = html2txt($html_line);
2566 Convert a line from html to text. Currently we just convert HTML tags to spaces
2567 and convert >, <, and tags back.
2574 # Convert HTML tags to spacing. Note that the reports may contain <Userid> and
2575 # <Userid@Domain> words, so explicitly specify the HTML tags we will remove
2576 # (the ones used by this program). If someone is careless enough to have their
2577 # Userid the same as an HTML tag, there's not much we can do about it.
2578 s/<\/?(html|head|title|body|h\d|ul|li|a\s+|table|tr|td|th|pre|hr|p|br)\b.*?>/ /og;
2580 s/\<\;/\</og; #Convert '<' to '<'.
2581 s/\>\;/\>/og; #Convert '>' to '>'.
2582 s/\ \;/ /og; #Convert ' ' to ' '.
2586 =head2 get_next_arg();
2588 $arg = get_next_arg();
2590 Because eximstats arguments are often passed as variables,
2591 we can't rely on shell parsing to deal with quotes. This
2592 subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
2593 starts with a quote (' or "), and doesn't end in one, then
2594 we append the next argument to it and shift again. We repeat
2595 until we've got all of the argument.
2597 This isn't perfect as all white space gets reduced to one space,
2598 but it's as good as we can get! If it's esential that spacing
2599 be preserved precisely, then you get that by not using shell
2606 my $matched_pattern = 0;
2608 $arg .= ' ' if $arg;
2609 $arg .= $ARGV[1]; shift(@ARGV);
2610 if ($arg !~ /^['"]/) {
2611 $matched_pattern = 1;
2614 if ($arg =~ s/^(['"])(.*)\1$/$2/) {
2615 $matched_pattern = 1;
2619 die "Mismatched argument quotes - <$arg>.\n" unless $matched_pattern;
2625 ##################################################
2627 ##################################################
2630 $last_timestamp = '';
2634 $show_transport = 1;
2636 $local_league_table = 1;
2637 $include_remote_users = 0;
2639 $volume_rounding = 1;
2640 $localtime_offset = calculate_localtime_offset(); # PH/FANF
2643 $charts_option_specified = 0;
2647 @queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60,
2648 12*60*60, 24*60*60);
2651 $offset_seconds = 0;
2655 while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-')
2657 if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
2658 elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 }
2659 elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/)
2661 if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
2663 elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/)
2665 @queue_times = split(/,/, $1);
2667 foreach $q (@queue_times) { $q = eval($q) + 0 }
2668 @queue_times = sort { $a <=> $b } @queue_times;
2669 @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0);
2671 elsif ($ARGV[0] =~ /^-nt$/) { $show_transport = 0 }
2672 elsif ($ARGV[0] =~ /^\-nt(.?)(.*)\1$/)
2674 if ($1 eq "") { $show_transport = 0 } else { $transport_pattern = $2 }
2676 elsif ($ARGV[0] =~ /^-t(\d+)$/) { $topcount = $1 }
2677 elsif ($ARGV[0] =~ /^-tnl$/) { $local_league_table = 0 }
2678 elsif ($ARGV[0] =~ /^-html$/) { $html = 1 }
2679 elsif ($ARGV[0] =~ /^-merge$/) { $merge_reports = 1 }
2680 elsif ($ARGV[0] =~ /^-charts$/) {
2682 warn "WARNING: CPAN Module GD::Graph::pie not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_pie;
2683 warn "WARNING: CPAN Module GD::Graph::linespoints not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_linespoints;
2685 elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
2686 elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
2687 elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
2688 elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 }
2689 elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
2690 elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
2691 elsif ($ARGV[0] =~ /^-byemaildomain$/) { $do_sender{Edomain} = 1 }
2692 elsif ($ARGV[0] =~ /^-byedomain$/) { $do_sender{Edomain} = 1 }
2693 elsif ($ARGV[0] =~ /^-nvr$/) { $volume_rounding = 0 }
2694 elsif ($ARGV[0] =~ /^-d$/) { $debug = 1 }
2695 elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() }
2696 elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 }
2697 elsif ($ARGV[0] =~ /^-pattern$/)
2699 push(@user_descriptions,get_next_arg());
2700 push(@user_patterns,get_next_arg());
2702 elsif ($ARGV[0] =~ /^-utc$/)
2704 # We don't need this value if the log is in UTC.
2705 $localtime_offset = undef;
2709 print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n";
2715 # Check that all the charts options are specified.
2716 warn "-charts option not specified. Use -help for help.\n" if ($charts_option_specified && ! $charts);
2718 # Default to display tables by sending Host.
2719 $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email} || $do_sender{Edomain});
2722 for (my $i = 0; $i <= $#queue_times; $i++) {
2724 $remote_queue_bin[$i] = 0;
2727 # Compute the number of slots for the histogram
2731 if ($hist_opt > 60 || 60 % $hist_opt != 0)
2733 print "Eximstats: -h must specify a factor of 60\n";
2736 $hist_interval = 60/$hist_opt; #Interval in minutes.
2737 $hist_number = (24*60)/$hist_interval; #Number of intervals per day.
2738 @received_interval_count = (0) x $hist_number;
2739 @delivered_interval_count = (0) x $hist_number;
2742 #$queue_unknown = 0;
2744 $total_received_data = 0;
2745 $total_received_data_gigs = 0;
2746 $total_received_count = 0;
2748 $total_delivered_data = 0;
2749 $total_delivered_data_gigs = 0;
2750 $total_delivered_count = 0;
2752 $queue_more_than = 0;
2754 $relayed_unshown = 0;
2755 $begin = "9999-99-99 99:99:99";
2756 $end = "0000-00-00 00:00:00";
2758 foreach $section ('Received','Delivered') {
2759 foreach $type ('Volume','Messages','Delayed','Failed','Hosts','Domains','Emails','Edomains') {
2760 $report_totals{$section}{$type} = 0;
2764 # Generate our parser.
2765 my $parser = generate_parser();
2770 # Scan the input files and collect the data
2771 foreach my $file (@ARGV) {
2772 if ($file =~ /\.gz/) {
2773 unless (open(FILE,"gunzip -c $file |")) {
2774 print STDERR "Failed to gunzip -c $file: $!";
2778 elsif ($file =~ /\.Z/) {
2779 unless (open(FILE,"uncompress -c $file |")) {
2780 print STDERR "Failed to uncompress -c $file: $!";
2785 unless (open(FILE,$file)) {
2786 print STDERR "Failed to read $file: $!";
2790 #Now parse the filehandle, updating the global variables.
2791 parse($parser,\*FILE);
2796 #No files provided. Parse STDIN, updating the global variables.
2797 parse($parser,\*STDIN);
2801 if ($begin eq "9999-99-99 99:99:99") {
2802 print "**** No valid log lines read\n";
2806 # Output our results.
2808 print_grandtotals();
2810 # Print counts of user specified patterns if required.
2811 print_user_patterns() if @user_patterns;
2813 # Print totals by transport if required.
2814 print_transport() if $show_transport;
2816 # Print the deliveries per interval as a histogram, unless configured not to.
2817 # First find the maximum in one interval and scale accordingly.
2818 if ($hist_opt > 0) {
2819 print_histogram("Messages received", @received_interval_count);
2820 print_histogram("Deliveries", @delivered_interval_count);
2823 # Print times on queue if required.
2824 if ($#queue_times >= 0) {
2825 print_queue_times("all messages", \@queue_bin,$queue_more_than);
2826 print_queue_times("messages with at least one remote delivery",\@remote_queue_bin,$queue_more_than);
2829 # Print relay information if required.
2830 print_relay() if $show_relay;
2832 # Print the league tables, if topcount isn't zero.
2833 if ($topcount > 0) {
2834 foreach ('Host','Domain','Email','Edomain') {
2835 next unless $do_sender{$_};
2836 print_league_table("sending \l$_", $received_count{$_}, $received_data{$_},$received_data_gigs{$_});
2839 print_league_table("local sender", \%received_count_user,
2840 \%received_data_user,\%received_data_gigs_user) if ($local_league_table || $include_remote_users);
2841 foreach ('Host','Domain','Email','Edomain') {
2842 next unless $do_sender{$_};
2843 print_league_table("\l$_ destination", $delivered_count{$_}, $delivered_data{$_},$delivered_data_gigs{$_});
2845 print_league_table("local destination", \%delivered_count_user,
2846 \%delivered_data_user,\%delivered_data_gigs_user) if ($local_league_table || $include_remote_users);
2849 # Print the error statistics if required.
2850 print_errors() if $show_errors;
2853 print "</body>\n</html>\n"