2 # $Cambridge: exim/src/src/eximstats.src,v 1.2 2004/11/24 14:43:57 ph10 Exp $
4 # Copyright (c) 2001 University of Cambridge.
5 # See the file NOTICE for conditions of use and distribution.
7 # Perl script to generate statistics from one or more Exim log files.
9 # Usage: eximstats [<options>] <log file> <log file> ...
11 # 1996-05-21: Ignore lines not starting with valid date/time, just in case
12 # these get into a log file.
13 # 1996-11-19: Add the -h option to control the size of the histogram,
14 # and optionally turn it off.
15 # Use some Perl 5 things; it should be everywhere by now.
16 # Add the Perl -w option and rewrite so no warnings are given.
17 # Add the -t option to control the length of the "top" listing.
18 # Add the -ne, -nt options to turn off errors and transport
20 # Add information about length of time on queue, and -q<list> to
21 # control the intervals and turn it off.
22 # Add count and percentage of delayed messages to the Received
24 # Show total number of errors.
25 # Add count and percentage of messages with errors to Received
27 # Add information about relaying and -nr to suppress it.
28 # 1997-02-03 Merged in some of the things Nigel Metheringham had done:
30 # Added received histogram as well as delivered
31 # Added local senders' league table
32 # Added local recipients' league table
33 # 1997-03-10 Fixed typo "destinationss"
34 # Allow for intermediate address between final and original
35 # when testing for relaying
36 # Give better message when no input
37 # 1997-04-24 Fixed bug in layout of error listing that was depending on
38 # text length (output line got repeated).
39 # 1997-05-06 Bug in option decoding when only one option.
40 # Overflow bug when handling very large volumes.
41 # 1997-10-28 Updated to handle revised log format that might show
42 # HELO name as well as host name before IP number
43 # 1998-01-26 Bugs in the function for calculating the number of seconds
44 # since 1970 from a log date
45 # 1998-02-02 Delivery to :blackhole: doesn't have a T= entry in the log
46 # line; cope with this, thereby avoiding undefined problems
47 # Very short log line gave substring error
48 # 1998-02-03 A routed delivery to a local transport may not have <> in the
49 # log line; terminate the address at white space, not <
50 # 1998-09-07 If first line of input was a => line, $thissize was undefined;
52 # 1998-12-21 Adding of $thissize from => line should have been adding $size.
53 # Oops. Should have looked more closely when fixing the previous
55 # 1999-11-12 Increased the field widths for printed integers; numbers are
56 # bigger than originally envisaged.
57 # 2001-03-21 Converted seconds() routine to use Time::Local, fixing a bug
58 # whereby seconds($timestamp) - id_seconds($id) gave an
60 # Added POD documentation.
61 # Moved usage instructions into help() subroutine.
62 # Added 'use strict' and declared all global variables.
63 # Added '-html' flag and resultant code.
64 # Added '-cache' flag and resultant code.
65 # Added add_volume() routine and converted all volume variables
66 # to use it, fixing the overflow problems for individual hosts
68 # Converted all volume output to GB/MB/KB as appropriate.
69 # Don't store local user stats if -nfl is specified.
70 # Modifications done by: Steve Campbell (<steve@computurn.com>)
71 # 2001-04-02 Added the -t_remote_users flag. Steve Campbell.
72 # 2001-10-15 Added the -domain flag. Steve Campbell.
73 # 2001-10-16 Accept files on STDIN or on the command line. Steve Campbell.
74 # 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
75 # We now generate our main parsing subroutine as an eval statement
76 # which improves performance dramatically when not all the results
77 # are required. We also cache the last timestamp to time convertion.
79 # NOTE: 'Top 50 destinations by (message count|volume)' lines are
80 # now 'Top N (host|email|domain) destinations by (message count|volume)'
81 # where N is the topcount. Steve Campbell.
83 # 2001-10-30 V1.16 Joachim Wieland.
84 # Fixed minor bugs in add_volume() when taking over this version
85 # for use in Exim 4: -w gave uninitialized value warnings in
86 # two situations: for the first addition to a counter, and if
87 # there were never any gigabytes, thereby leaving the $gigs
89 # Initialized $last_timestamp to stop a -w uninitialized warning.
90 # Minor layout tweak for grand totals (nitpicking).
91 # Put the IP addresses for relaying stats in [] and separated by
92 # a space from the domain name.
93 # Removed the IPv4-specific address test when picking out addresses
94 # for relaying. Anything inside [] is OK.
96 # 2002-07-02 Philip Hazel
97 # Fixed "uninitialized variable" message that occurred for relay
98 # messages that arrived from H=[1.2.3.4] hosts (no name shown).
99 # This bug didn't affect the output.
101 # 2002-04-15 V1.17 Joachim Wieland.
102 # Added -charts, -chartdir. -chartrel options which use
103 # GD::Graph modules to create graphical charts of the statistics.
105 # 2002-04-15 V1.18 Steve Campbell.
106 # Added a check for $domain to to stop a -w uninitialized warning.
107 # Added -byemaildomain option.
108 # Only print HTML header links to included tables!
110 # 2002-08-02 V1.19 Steve Campbell.
111 # Changed the debug mode to dump the parser onto STDERR rather
112 # than STDOUT. Documented the -d flag into the help().
113 # Rejoined the divergent 2002-04-15 and 2002-07-02 releases.
115 # 2002-08-21 V1.20 Steve Campbell.
116 # Added the '-merge' option to allow merging of previous reports.
117 # Fixed a missing semicolon when doing -bydomain.
118 # Make volume charts plot the data gigs and bytes rather than just bytes.
119 # Only process log lines with $flag =~ /<=|=>|->|==|\*\*|Co/
120 # Converted Emaildomain to Edomain - the column header was too wide!
121 # This changes the text output slightly. You can revert to the old
122 # column widths by changing $COLUMN_WIDTHS to 7;
124 # 2002-09-04 V1.21 Andreas J Mueller
125 # Local deliveries domain now defaults to 'localdomain'.
126 # Don't match F=<From> when looking for the user.
128 # 2002-09-05 V1.22 Steve Campbell
129 # Fixed a perl 5.005 incompatibility problem ('our' variables).
131 # 2002-09-11 V1.23 Steve Campbell
132 # Stopped -charts option from throwing errors on null data.
133 # Don't print out 'Errors encountered' unless there are any.
135 # 2002-10-21 V1.23a Philip Hazel - patch from Tony Finch put in until
136 # Steve's eximstats catches up.
137 # Handle log files that include the timezone after the timestamp.
138 # Switch to assuming that log timestamps are in local time, with
139 # an option for UTC timestamps, as in Exim itself.
141 # 2003-02-05 V1.24 Steve Campbell
142 # Added in Sergey Sholokh's code to convert '<' and '>' characters
143 # in HTML output. Also added code to convert them back with -merge.
144 # Fixed timestamp offsets to convert to seconds rather than minutes.
145 # Updated -merge to work with output files using timezones.
146 # Added cacheing to speed up the calculation of timezone offsets.
148 # 2003-02-07 V1.25 Steve Campbell
149 # Optimised the usage of mktime() in the seconds subroutine.
150 # Removed the now redundant '-cache' option.
151 # html2txt() now explicitly matches HTML tags.
152 # Implemented a new sorting algorithm - the top_n_sort() routine.
153 # Added Danny Carroll's '-nvr' flag and code.
155 # 2003-03-13 V1.26 Steve Campbell
156 # Implemented HTML compliance changes recommended by Bernard Massot.
157 # Bug fix to allow top_n_sort() to handle null keys.
158 # Convert all domains and edomains to lowercase.
159 # Remove preceding dots from domains.
161 # 2003-03-13 V1.27 Steve Campbell
162 # Replaced border attributes with 'border=1', as recommended by
165 # 2003-06-03 V1.28 John Newman
166 # Added in the ability to skip over the parsing and evaulation of
167 # specific transports as passed to eximstats via the new "-nt/.../"
168 # command line argument. This new switch allows the viewing of
169 # not more accurate statistics but more applicable statistics when
170 # special transports are in use (ie; SpamAssassin). We need to be
171 # able to ignore transports such as this otherwise the resulting
172 # local deliveries are significantly skewed (doubled)...
174 # 2003-11-06 V1.29 Steve Campbell
175 # Added the '-pattern "Description" "/pattern/"' option.
177 # 2004-02-17 V1.30 Steve Campbell
178 # Added warnings if required GD::Graph modules are not available or
179 # insufficient -chart* options are specified.
181 # 2004-02-20 V1.31 Andrea Balzi
182 # Only show the Local Sender/Destination links if the tables exist.
184 # 2004-07-05 V1.32 Steve Campbell
185 # Fix '-merge -h0' divide by zero error.
187 # 2004-07-15 V1.33 Steve Campbell
188 # Documentation update - I've converted the subroutine
189 # documentation from POD to comments.
194 eximstats - generates statistics from Exim mainlog files.
198 eximstats [Options] mainlog1 mainlog2 ... > report.txt
199 eximstats -html [Options] mainlog1 mainlog2 ... > report.html
200 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_report.txt
208 histogram divisions per hour. The default is 1, and
209 0 suppresses histograms. Valid values are:
211 0, 1, 2, 3, 5, 10, 15, 20, 30 or 60.
215 Don't display error information.
219 Don't display relaying information.
221 =item B<-nr>I</pattern/>
223 Don't display relaying information that matches.
227 Don't display transport information.
229 =item B<-nt>I</pattern/>
231 Don't display transport information that matches
235 List of times for queuing information single 0 item suppresses.
239 Display top <number> sources/destinations
240 default is 50, 0 suppresses top listing.
244 Omit local sources/destinations in top listing.
246 =item B<-t_remote_users>
248 Include remote users in the top source/destination listings.
252 Show results by sending host. This may be combined with
253 B<-bydomain> and/or B<-byemail> and/or B<-byedomain>. If none of these options
254 are specified, then B<-byhost> is assumed as a default.
258 Show results by sending domain.
259 May be combined with B<-byhost> and/or B<-byemail> and/or B<-byedomain>.
263 Show results by sender's email address.
264 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byedomain>.
266 =item B<-byemaildomain> or B<-byedomain>
268 Show results by sender's email domain.
269 May be combined with B<-byhost> and/or B<-bydomain> and/or B<-byemail>.
271 =item B<-pattern> I<Description> I</Pattern/>
273 Look for the specified pattern and count the number of lines in which it appears.
274 This option can be specified multiple times. Eg:
276 -pattern 'Refused connections' '/refused connection/'
281 This option allows eximstats to merge old eximstat reports together. Eg:
283 eximstats mainlog.sun > report.sun.txt
284 eximstats mainlog.mon > report.mon.txt
285 eximstats mainlog.tue > report.tue.txt
286 eximstats mainlog.wed > report.web.txt
287 eximstats mainlog.thu > report.thu.txt
288 eximstats mainlog.fri > report.fri.txt
289 eximstats mainlog.sat > report.sat.txt
290 eximstats -merge report.*.txt > weekly_report.txt
291 eximstats -merge -html report.*.txt > weekly_report.html
297 You can merge text or html reports and output the results as text or html.
301 You can use all the normal eximstat output options, but only data
302 included in the original reports can be shown!
306 When merging reports, some loss of accuracy may occur in the top I<n> lists.
307 This will be towards the ends of the lists.
311 The order of items in the top I<n> lists may vary when the data volumes
312 round to the same value.
318 Output the results in HTML.
322 Create graphical charts to be displayed in HTML output.
324 This requires the following modules which can be obtained
325 from http://www.cpan.org/modules/01modules.index.html
337 To install these, download and unpack them, then use the normal perl installation procedure:
344 =item B<-chartdir>I <dir>
346 Create the charts in the directory <dir>
348 =item B<-chartrel>I <dir>
350 Specify the relative directory for the "img src=" tags from where to include
355 Debug flag. This outputs the eval()'d parser onto STDOUT which makes it
356 easier to trap errors in the eval section. Remember to add 1 to the line numbers to allow for the
363 Eximstats parses exim mainlog files and outputs a statistical
364 analysis of the messages processed. By default, a text
365 analysis is generated, but you can request an html analysis
366 by using the B<-html> flag. See the help (B<-help>) to learn
367 about how to create charts from the tables.
371 There is a web site at http://www.exim.org - this contains details of the
372 mailing list exim-users@exim.org.
376 This program does not perfectly handle messages whose received
377 and delivered log lines are in different files, which can happen
378 when you have multiple mail servers and a message cannot be
379 immeadiately delivered. Fixing this could be tricky...
386 # use Time::Local; # PH/FANF
389 use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints);
390 eval { require GD::Graph::pie; };
391 $HAVE_GD_Graph_pie = $@ ? 0 : 1;
392 eval { require GD::Graph::linespoints; };
393 $HAVE_GD_Graph_linespoints = $@ ? 0 : 1;
396 ##################################################
398 ##################################################
399 # 'use vars' instead of 'our' as perl5.005 is still in use out there!
400 use vars qw(@tab62 @days_per_month $gig);
401 use vars qw($VERSION);
402 use vars qw($COLUMN_WIDTHS);
406 (0,1,2,3,4,5,6,7,8,9,0,0,0,0,0,0, # 0-9
407 0,10,11,12,13,14,15,16,17,18,19,20, # A-K
408 21,22,23,24,25,26,27,28,29,30,31,32, # L-W
409 33,34,35, 0, 0, 0, 0, 0, # X-Z
410 0,36,37,38,39,40,41,42,43,44,45,46, # a-k
411 47,48,49,50,51,52,53,54,55,56,57,58, # l-w
414 @days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
415 $gig = 1024 * 1024 * 1024;
418 # How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
421 # Declare global variables.
422 use vars qw($total_received_data $total_received_data_gigs $total_received_count);
423 use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_count);
424 use vars qw(%arrival_time %size %from_host %from_address);
425 use vars qw(%timestamp2time); #Hash of timestamp => time.
426 use vars qw($last_timestamp $last_time); #The last time convertion done.
427 use vars qw($last_date $date_seconds); #The last date convertion done.
428 use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
429 use vars qw($localtime_offset);
430 use vars qw($i); #General loop counter.
431 use vars qw($debug); #Debug mode?
432 use vars qw($ntopchart); #How many entries should make it into the chart?
433 use vars qw($gddirectory); #Where to put files from GD::Graph
437 # The following are parameters whose values are
438 # set by command line switches:
439 use vars qw($show_errors $show_relay $show_transport $transport_pattern);
440 use vars qw($topcount $local_league_table $include_remote_users);
441 use vars qw($hist_opt $hist_interval $hist_number $volume_rounding);
442 use vars qw($relay_pattern @queue_times $html @user_patterns @user_descriptions);
444 use vars qw(%do_sender); #Do sender by Host, Domain, Email, and/or Edomain tables.
445 use vars qw($charts $chartrel $chartdir $charts_option_specified);
446 use vars qw($merge_reports); #Merge old reports ?
448 # The following are modified in the parse() routine, and
449 # referred to in the print_*() routines.
450 use vars qw($queue_more_than $delayed_count $relayed_unshown $begin $end);
451 use vars qw(%received_count %received_data %received_data_gigs);
452 use vars qw(%delivered_count %delivered_data %delivered_data_gigs);
453 use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
454 use vars qw(%delivered_count_user %delivered_data_user %delivered_data_gigs_user);
455 use vars qw(%transported_count %transported_data %transported_data_gigs);
456 use vars qw(%remote_delivered %relayed %delayed %had_error %errors_count);
457 use vars qw(@queue_bin @remote_queue_bin @received_interval_count @delivered_interval_count);
458 use vars qw(@user_pattern_totals);
460 use vars qw(%report_totals);
465 ##################################################
467 ##################################################
470 #######################################################################
473 # $rounded_volume = volume_rounded($bytes,$gigabytes);
475 # Given a data size in bytes, round it to KB, MB, or GB
478 # Eg 12000 => 12KB, 15000000 => 14GB, etc.
480 # Note: I've experimented with Math::BigInt and it results in a 33%
481 # performance degredation as opposed to storing numbers split into
482 # bytes and gigabytes.
483 #######################################################################
495 if ($volume_rounding) {
499 $rounded = sprintf("%6d", $x);
501 elsif ($x < 10000000) {
502 $rounded = sprintf("%4dKB", ($x + 512)/1024);
505 $rounded = sprintf("%4dMB", ($x + 512*1024)/(1024*1024));
508 # Values between 1GB and 10GB are printed in MB
510 $rounded = sprintf("%4dMB", ($g * 1024) + ($x + 512*1024)/(1024*1024));
513 # Handle values over 10GB
514 $rounded = sprintf("%4dGB", $g + ($x + $gig/2)/$gig);
518 # We don't want any rounding to be done.
519 $rounded = sprintf("%4d", ($g * $gig) + $x);
526 #######################################################################
529 # un_round($rounded_volume,\$bytes,\$gigabytes);
531 # Given a volume in KB, MB or GB, as generated by volume_rounded(),
532 # do the reverse transformation and convert it back into Bytes and Gigabytes.
533 # These are added to the $bytes and $gigabytes parameters.
535 # Given a data size in bytes, round it to KB, MB, or GB
538 # EG: 500 => (500,0), 14GB => (0,14), etc.
539 #######################################################################
541 my($rounded,$bytes_sref,$gigabytes_sref) = @_;
543 if ($rounded =~ /(\d+)GB/) {
544 $$gigabytes_sref += $1;
546 elsif ($rounded =~ /(\d+)MB/) {
547 $$gigabytes_sref += $1 / 1024;
548 $$bytes_sref += (($1 % 1024 ) * 1024 * 1024);
550 elsif ($rounded =~ /(\d+)KB/) {
551 $$gigabytes_sref += $1 / (1024 * 1024);
552 $$bytes_sref += ($1 % (1024 * 1024) * 1024);
554 elsif ($rounded =~ /(\d+)/) {
555 $$gigabytes_sref += $1 / $gig;
556 $$bytes_sref += $1 % $gig;
559 #Now reduce the bytes down to less than 1GB.
560 add_volume($bytes_sref,$gigabytes_sref,0) if ($$bytes_sref > $gig);
564 #######################################################################
567 # add_volume(\$bytes,\$gigs,$size);
569 # Add $size to $bytes/$gigs where this is a number split into
570 # bytes ($bytes) and gigabytes ($gigs). This is significantly
571 # faster than using Math::BigInt.
572 #######################################################################
574 my($bytes_ref,$gigs_ref,$size) = @_;
575 $$bytes_ref = 0 if ! defined $$bytes_ref;
576 $$gigs_ref = 0 if ! defined $$gigs_ref;
577 $$bytes_ref += $size;
578 while ($$bytes_ref > $gig) {
585 #######################################################################
588 # $formatted_time = format_time($seconds);
590 # Given a time in seconds, break it down into
591 # weeks, days, hours, minutes, and seconds.
593 # Eg 12005 => 3h20m5s
594 #######################################################################
606 $p .= "$w"."w" if $w > 0;
607 $p .= "$d"."d" if $d > 0;
608 $p .= "$h"."h" if $h > 0;
609 $p .= "$m"."m" if $m > 0;
610 $p .= "$s"."s" if $s > 0 || $p eq "";
615 #######################################################################
618 # $seconds = unformat_time($formatted_time);
620 # Given a time in weeks, days, hours, minutes, or seconds, convert it to seconds.
622 # Eg 3h20m5s => 12005
623 #######################################################################
625 my($formated_time) = pop @_;
628 while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
629 $time += $1 if ($2 eq '' || $2 eq 's');
630 $time += $1 * 60 if ($2 eq 'm');
631 $time += $1 * 60 * 60 if ($2 eq 'h');
632 $time += $1 * 60 * 60 * 24 if ($2 eq 'd');
633 $time += $1 * 60 * 60 * 24 * 7 if ($2 eq 'w');
639 #######################################################################
642 # $time = seconds($timestamp);
644 # Given a time-of-day timestamp, convert it into a time() value using
645 # POSIX::mktime. We expect the timestamp to be of the form
646 # "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
647 # and the year to be absolute (we do the necessary conversions). The
648 # timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
649 # offset is not present, and we have not been told that the log is in UTC
650 # (with the -utc option), then we adjust the time by the current local
651 # time offset so that it can be compared with the time recorded in message
654 # To improve performance, we only use mktime on the date ($year-$mon-$day),
655 # and only calculate it if the date is different to the previous time we
656 # came here. We then add on seconds for the '$hour:$min:$sec'.
658 # We also store the results of the last conversion done, and only
659 # recalculate if the date is different.
661 # We used to have the '-cache' flag which would store the results of the
662 # mktime() call. However, the current way of just using mktime() on the
663 # date obsoletes this.
664 #######################################################################
668 # Is the timestamp the same as the last one?
669 return $last_time if ($last_timestamp eq $timestamp);
671 return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
673 unless ($last_date eq $1) {
675 my(@timestamp) = (0,0,0,$4,$3,$2);
676 $timestamp[5] -= 1900;
678 $date_seconds = mktime(@timestamp);
680 my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
682 # SC. Use cacheing. Also note we want seconds not minutes.
683 #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
684 if (defined $8 && ($8 ne $last_offset)) {
686 $offset_seconds = ($10 * 60 + $11) * 60;
687 $offset_seconds = -$offset_seconds if ($9 eq '-');
692 #$time -= $this_offset;
693 $time -= $offset_seconds;
694 } elsif (defined $localtime_offset) {
695 $time -= $localtime_offset;
698 # Store the last timestamp received.
699 $last_timestamp = $timestamp;
706 #######################################################################
709 # $time = id_seconds($message_id);
711 # Given a message ID, convert it into a time() value.
712 #######################################################################
714 my($sub_id) = substr((pop @_), 0, 6);
716 my(@c) = split(//, $sub_id);
717 while($#c >= 0) { $s = $s * 62 + $tab62[ord(shift @c) - ord('0')] }
723 #######################################################################
724 # calculate_localtime_offset();
726 # $localtime_offset = calculate_localtime_offset();
728 # Calculate the the localtime offset from gmtime in seconds.
730 # $localtime = time() + $localtime_offset.
732 # These are the same semantics as ISO 8601 and RFC 2822 timezone offsets.
733 # (West is negative, East is positive.)
734 #######################################################################
736 # $localtime = gmtime() + $localtime_offset. OLD COMMENT
737 # This subroutine commented out as it's not currently in use.
739 #sub calculate_localtime_offset {
740 # # Pick an arbitrary date, convert it to localtime & gmtime, and return the difference.
741 # my (@sample_date) = (0,0,0,5,5,100);
742 # my $localtime = timelocal(@sample_date);
743 # my $gmtime = timegm(@sample_date);
744 # my $offset = $localtime - $gmtime;
748 sub calculate_localtime_offset {
749 # Assume that the offset at the moment is valid across the whole
750 # period covered by the logs that we're analysing. This may not
751 # be true around the time the clocks change in spring or autumn.
753 # mktime works on local time and gmtime works in UTC
754 my $local = mktime(gmtime($utc));
755 return $local - $utc;
759 #######################################################################
760 # print_queue_times();
762 # $time = print_queue_times($message_type,\@queue_times,$queue_more_than);
764 # Given the type of messages being output, the array of message queue times,
765 # and the number of messages which exceeded the queue times, print out
767 #######################################################################
768 sub print_queue_times {
770 my($string,$array,$queue_more_than) = @_;
775 my $cumulative_percent = 0;
776 #$queue_unknown += keys %arrival_time;
778 my $queue_total = $queue_more_than;
779 for ($i = 0; $i <= $#queue_times; $i++) { $queue_total += $$array[$i] }
781 my $temp = "Time spent on the queue: $string";
785 print "<hr><a name=\"$string time\"></a><h2>$temp</h2>\n";
786 print "<table border=0 width=\"100%\">\n";
788 print "<table border=1>\n";
789 print "<tr><th>Time</th><th>Messages</th><th>Percentage</th><th>Cumulative Percentage</th>\n";
790 $format = "<tr><td align=\"right\">%s %s</td><td align=\"right\">%d</td><td align=\"right\">%5.1f%%</td><td align=\"right\">%5.1f%%</td>\n";
794 printf("%s\n%s\n\n", $temp, "-" x length($temp));
795 $format = "%5s %4s %6d %5.1f%% %5.1f%%\n";
798 for ($i = 0; $i <= $#queue_times; $i++) {
801 my $percent = ($$array[$i] * 100)/$queue_total;
802 $cumulative_percent += $percent;
804 $printed_one? " " : "Under",
805 format_time($queue_times[$i]),
806 $$array[$i], $percent, $cumulative_percent);
807 if (!defined($queue_times[$i])) {
810 push(@chartdatanames,
811 ($printed_one? "" : "Under") . format_time($queue_times[$i]));
812 push(@chartdatavals, $$array[$i]);
817 if ($queue_more_than > 0) {
818 my $percent = ($queue_more_than * 100)/$queue_total;
819 $cumulative_percent += $percent;
822 format_time($queue_times[$#queue_times]),
823 $queue_more_than, $percent, $cumulative_percent);
825 push(@chartdatanames, "Over " . format_time($queue_times[$#queue_times]));
826 push(@chartdatavals, $queue_more_than);
828 #printf("Unknown %6d\n", $queue_unknown) if $queue_unknown > 0;
833 if ($HAVE_GD_Graph_pie && $charts) {
838 my $graph = GD::Graph::pie->new(200, 200);
841 if ($string =~ /all/) { $pngname = "queue_all.png"; $title = "Queue (all)"; }
842 if ($string =~ /remote/) { $pngname = "queue_rem.png"; $title = "Queue (remote)"; }
846 my $gd = $graph->plot(\@data) or warn($graph->error);
848 open(IMG, ">$chartdir/$pngname") or die $!;
852 print "<img src=\"$chartrel/$pngname\">";
855 print "</td></tr></table>\n";
862 #######################################################################
865 # print_histogram('Deliverieds|Messages received',@interval_count);
867 # Print a histogram of the messages delivered/received per time slot
869 #######################################################################
870 sub print_histogram {
872 my(@interval_count) = @_;
876 for ($i = 0; $i < $hist_number; $i++)
877 { $maxd = $interval_count[$i] if $interval_count[$i] > $maxd; }
879 my $scale = int(($maxd + 25)/50);
880 $scale = 1 if $scale == 0;
883 if ($text eq "Deliveries")
885 $type = ($scale == 1)? "delivery" : "deliveries";
889 $type = ($scale == 1)? "message" : "messages";
892 my($title) = sprintf("$text per %s (each dot is $scale $type)",
893 ($hist_interval == 60)? "hour" :
894 ($hist_interval == 1)? "minute" : "$hist_interval minutes");
897 print "<hr><a name=\"$text\"></a><h2>$title</h2>\n";
898 print "<table border=0 width=\"100%\">\n";
899 print "<tr><td><pre>\n";
902 printf("%s\n%s\n\n", $title, "-" x length($title));
907 for ($i = 0; $i < $hist_number; $i++)
909 my $c = $interval_count[$i];
911 # If the interval is an hour (the maximum) print the starting and
912 # ending hours as a label. Otherwise print the starting hour and
913 # minutes, which take up the same space.
918 $temp = sprintf("%02d-%02d", $hour, $hour + 1);
920 push(@chartdatanames, $temp);
926 { $temp = sprintf("%02d:%02d", $hour, $minutes) }
928 { $temp = sprintf(" :%02d", $minutes) }
930 push(@chartdatanames, $temp);
931 $minutes += $hist_interval;
938 push(@chartdatavals, $c);
939 printf(" %6d %s\n", $c, "." x ($c/$scale));
946 if ($HAVE_GD_Graph_linespoints && $charts) {
947 # calculate the graph
952 my $graph = GD::Graph::linespoints->new(300, 300);
957 x_labels_vertical => 1
960 if ($text =~ /Deliveries/) { $pngname = "histogram_del.png"; }
961 if ($text =~ /Messages/) { $pngname = "histogram_mes.png"; }
962 my $gd = $graph->plot(\@data) or warn($graph->error);
964 open(IMG, ">$chartdir/$pngname") or die $!;
968 print "<img src=\"$chartrel/$pngname\">";
971 print "</td></tr></table>\n";
977 #######################################################################
978 # print_league_table();
980 # print_league_table($league_table_type,\%message_count,\%message_data,\%message_data_gigs);
982 # Given hashes of message count and message data, which are keyed by
983 # the table type (eg by the sending host), print a league table
984 # showing the top $topcount (defaults to 50).
985 #######################################################################
986 sub print_league_table {
987 my($text,$m_count,$m_data,$m_data_gigs) = @_;
988 my($name) = ($topcount == 1)? "$text" : "$topcount ${text}s";
989 my($temp) = "Top $name by message count";
990 my(@chartdatanames) = ();
991 my(@chartdatavals) = ();
992 my $chartotherval = 0;
996 print "<hr><a name=\"$text count\"></a><h2>$temp</h2>\n";
997 print "<table border=0 width=\"100%\">\n";
999 print "<table border=1>\n";
1000 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1002 # Align non-local addresses to the right (so all the .com's line up).
1003 # Local addresses are aligned on the left as they are userids.
1004 my $align = ($text !~ /local/i) ? 'right' : 'left';
1005 $format = "<tr><td align=\"right\">%d</td><td align=\"right\">%s</td><td align=\"$align\" nowrap>%s</td>\n";
1008 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1009 $format = "%7d %10s %s\n";
1013 foreach $key (top_n_sort($topcount,$m_count,$m_data_gigs,$m_data)) {
1016 $htmlkey =~ s/>/\>\;/g;
1017 $htmlkey =~ s/</\<\;/g;
1018 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1021 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1023 if (scalar @chartdatanames < $ntopchart)
1025 push(@chartdatanames, $key);
1026 push(@chartdatavals, $$m_count{$key});
1030 $chartotherval += $$m_count{$key};
1033 push(@chartdatanames, "Other");
1034 push(@chartdatavals, $chartotherval);
1039 print "</td><td>\n";
1040 if ($HAVE_GD_Graph_pie && $charts)
1042 # calculate the graph
1047 my $graph = GD::Graph::pie->new(300, 300);
1050 y_label => 'Amount',
1051 title => 'By count',
1053 my $gd = $graph->plot(\@data) or warn($graph->error);
1057 open(IMG, ">$chartdir/${temp}_count.png") or die $!;
1061 print "<img src=\"$chartrel/${temp}_count.png\">";
1064 print "</td><td>\n";
1065 print "</td></tr></table>\n";
1069 $temp = "Top $name by volume";
1071 print "<hr><a name=\"$text volume\"></a><h2>$temp</h2>\n";
1072 print "<table border=0 width=\"100%\">\n";
1074 print "<table border=1>\n";
1075 print "<tr><th>Messages</th><th>Bytes</th><th>\u$text</th>\n";
1078 printf("%s\n%s\n\n", $temp, "-" x length($temp));
1081 @chartdatanames = ();
1082 @chartdatavals = ();
1084 foreach $key (top_n_sort($topcount,$m_data_gigs,$m_data,$m_count)) {
1087 $htmlkey =~ s/>/\>\;/g;
1088 $htmlkey =~ s/</\<\;/g;
1089 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $htmlkey);
1092 printf($format, $$m_count{$key}, volume_rounded($$m_data{$key},$$m_data_gigs{$key}), $key);
1095 if (scalar @chartdatanames < $ntopchart)
1097 push(@chartdatanames, $key);
1098 push(@chartdatavals, $$m_count{$key});
1102 $chartotherval += $$m_count{$key};
1105 push(@chartdatanames, "Other");
1106 push(@chartdatavals, $chartotherval);
1110 print "</td><td>\n";
1111 if ($HAVE_GD_Graph_pie && $charts) {
1112 # calculate the graph
1117 my $graph = GD::Graph::pie->new(300, 300);
1120 y_label => 'Volume',
1121 title => 'By Volume',
1123 my $gd = $graph->plot(\@data) or warn($graph->error);
1127 open(IMG, ">$chartdir/${temp}_volume.png") or die $!;
1131 print "<img src=\"$chartrel/${temp}_volume.png\">";
1134 print "</td><td>\n";
1135 print "</td></tr></table>\n";
1142 #######################################################################
1145 # @sorted_keys = top_n_sort($n,$href1,$href2,$href3);
1147 # Given a hash which has numerical values, return the sorted $n keys which
1148 # point to the top values. The second and third hashes are used as
1149 # tiebreakers. They all must have the same keys.
1151 # The idea behind this routine is that when you only want to see the
1152 # top n members of a set, rather than sorting the entire set and then
1153 # plucking off the top n, sort through the stack as you go, discarding
1154 # any member which is lower than your current n'th highest member.
1156 # This proves to be an order of magnitude faster for large hashes.
1157 # On 200,000 lines of mainlog it benchmarked 9 times faster.
1158 # On 700,000 lines of mainlog it benchmarked 13.8 times faster.
1160 # We assume the values are > 0.
1161 #######################################################################
1163 my($n,$href1,$href2,$href3) = @_;
1165 # PH's original sort was:
1167 # foreach $key (sort
1169 # $$m_count{$b} <=> $$m_count{$a} ||
1170 # $$m_data_gigs{$b} <=> $$m_data_gigs{$a} ||
1171 # $$m_data{$b} <=> $$m_data{$a} ||
1177 #We use a key of '_' to represent non-existant values, as null keys are valid.
1178 #'_' is not a valid domain, edomain, host, or email.
1179 my(@top_n_keys) = ('_') x $n;
1180 my($minimum_value1,$minimum_value2,$minimum_value3) = (0,0,0);
1182 my $n_minus_1 = $n - 1;
1183 my $n_minus_2 = $n - 2;
1185 # Pick out the top $n keys.
1186 my($key,$value1,$value2,$value3,$i,$comparison,$insert_position);
1187 while (($key,$value1) = each %$href1) {
1189 #print STDERR "key $key ($value1,",$href2->{$key},",",$href3->{$key},") <=> ($minimum_value1,$minimum_value2,$minimum_value3)\n";
1191 # Check to see that the new value is bigger than the lowest of the
1192 # top n keys that we're keeping.
1193 $comparison = $value1 <=> $minimum_value1 ||
1194 $href2->{$key} <=> $minimum_value2 ||
1195 $href3->{$key} <=> $minimum_value3 ||
1196 $top_n_key cmp $key;
1197 next unless ($comparison == 1);
1199 # As we will be using these values a few times, extract them into scalars.
1200 $value2 = $href2->{$key};
1201 $value3 = $href3->{$key};
1203 # This key is bigger than the bottom n key, so the lowest position we
1204 # will insert it into is $n minus 1 (the bottom of the list).
1205 $insert_position = $n_minus_1;
1207 # Now go through the list, stopping when we find a key that we're
1208 # bigger than, or we come to the penultimate position - we've
1209 # already tested bigger than the last.
1211 # Note: we go top down as the list starts off empty.
1212 # Note: stepping through the list in this way benchmarks nearly
1213 # three times faster than doing a sort() on the reduced list.
1214 # I assume this is because the list is already in order, and
1215 # we get a performance boost from not having to do hash lookups
1217 for ($i = 0; $i < $n_minus_1; $i++) {
1218 $top_n_key = $top_n_keys[$i];
1219 if ( ($top_n_key eq '_') ||
1220 ( ($value1 <=> $href1->{$top_n_key} ||
1221 $value2 <=> $href2->{$top_n_key} ||
1222 $value3 <=> $href3->{$top_n_key} ||
1223 $top_n_key cmp $key) == 1
1226 $insert_position = $i;
1231 # Remove the last element, then insert the new one.
1232 $#top_n_keys = $n_minus_2;
1233 splice(@top_n_keys,$insert_position,0,$key);
1235 # Extract our new minimum values.
1236 $top_n_key = $top_n_keys[$n_minus_1];
1237 if ($top_n_key ne '_') {
1238 $minimum_value1 = $href1->{$top_n_key};
1239 $minimum_value2 = $href2->{$top_n_key};
1240 $minimum_value3 = $href3->{$top_n_key};
1244 # Return the top n list, grepping out non-existant values, just in case
1245 # we didn't have that many values.
1246 return(grep(!/^_$/,@top_n_keys));
1250 #######################################################################
1253 # $header = html_header($title);
1255 # Print our HTML header and start the <body> block.
1256 #######################################################################
1259 my $text = << "EoText";
1260 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
1263 <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-15">
1264 <title>$title</title>
1266 <body bgcolor="white">
1274 #######################################################################
1279 # Display usage instructions and exit.
1280 #######################################################################
1284 eximstats Version $VERSION
1286 Usage: eximstats [Options] mainlog1 mainlog2 ... > report.txt
1287 eximstats -html [Options] mainlog1 mainlog2 ... > report.html
1288 eximstats -merge [Options] report.1.txt report.2.txt ... > weekly_rep.txt
1289 eximstats -merge -html [Options] report.1.html ... > weekly_rep.html
1291 Parses exim mainlog files and generates a statistical analysis of
1292 the messages processed. Valid options are:
1294 -h<number> histogram divisions per hour. The default is 1, and
1295 0 suppresses histograms. Other valid values are:
1296 2, 3, 5, 10, 15, 20, 30 or 60.
1297 -ne don't display error information
1298 -nr don't display relaying information
1299 -nr/pattern/ don't display relaying information that matches
1300 -nt don't display transport information
1301 -nt/pattern/ don't display transport information that matches
1302 -nvr don't do volume rounding. Display in bytes, not KB/MB/GB.
1303 -q<list> list of times for queuing information
1304 single 0 item suppresses
1305 -t<number> display top <number> sources/destinations
1306 default is 50, 0 suppresses top listing
1307 -tnl omit local sources/destinations in top listing
1308 -t_remote_users show top user sources/destinations from non-local domains
1310 -byhost show results by sending host (default unless bydomain or
1311 byemail is specified)
1312 -bydomain show results by sending domain.
1313 -byemail show results by sender's email address
1314 -byedomain show results by sender's email domain
1316 -pattern "Description" /pattern/
1317 Count lines matching specified patterns and show them in
1318 the results. It can be specified multiple times. Eg:
1319 -pattern 'Refused connections' '/refused connection/'
1321 -merge merge previously generated reports into a new report
1323 -html output the results in HTML
1324 -charts Create charts (this requires the GD::Graph modules)
1325 -chartdir <dir> Create the charts' png files in the directory <dir>
1326 -chartrel <dir> Specify the relative directory for the "img src=" tags
1327 from where to include the charts in the html file
1328 -chartdir and -chartrel default to '.'
1330 -d Debug mode - dump the eval'ed parser onto STDERR.
1339 #######################################################################
1340 # generate_parser();
1342 # $parser = generate_parser();
1344 # This subroutine generates the parsing routine which will be
1345 # used to parse the mainlog. We take the base operation, and remove bits not in use.
1346 # This improves performance depending on what bits you take out or add.
1348 # I've tested using study(), but this does not improve performance.
1350 # We store our parsing routing in a variable, and process it looking for #IFDEF (Expression)
1351 # or #IFNDEF (Expression) statements and corresponding #ENDIF (Expression) statements. If
1352 # the expression evaluates to true, then it is included/excluded accordingly.
1353 #######################################################################
1354 sub generate_parser {
1356 my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
1357 my($tod,$m_hour,$m_min,$id,$flag);
1359 next if length($_) < 38;
1362 # next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d)/;
1363 next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)/o;
1365 ($tod,$m_hour,$m_min) = ($1,$2,$3);
1368 my($extra) = defined($4)? 6 : 0;
1369 $id = substr($_, 20 + $extra, 16);
1370 $flag = substr($_, 37 + $extra, 2);
1373 # Watch for user specified patterns.
1374 my $user_pattern_index = 0;
1375 foreach (@user_patterns) {
1376 $user_pattern_totals[$user_pattern_index] = 0;
1377 $parser .= " \$user_pattern_totals[$user_pattern_index]++ if $_;\n";
1378 $user_pattern_index++;
1382 next unless ($flag =~ /<=|=>|->|==|\\*\\*|Co/);
1384 #Strip away the timestamp, ID and flag (which could be "Com" for completed)
1385 #This speeds up the later pattern matches.
1386 # $_ = substr($_, 40);
1388 $_ = substr($_, 40 + $extra); # PH
1390 # JN - Skip over certain transports as specified via the "-nt/.../" command
1391 # line switch (where ... is a perl style regular expression). This is
1392 # required so that transports that skew stats such as SpamAssassin can be
1394 #IFDEF ($transport_pattern)
1395 if (/\\sT=(\\S+)/) {
1396 next if ($1 =~ /$transport_pattern/o) ;
1398 #ENDIF ($transport_pattern)
1401 $host = "local"; #Host is local unless otherwise specified.
1402 $domain = "localdomain"; #Domain is localdomain unless otherwise specified.
1405 # Do some pattern matches to get the host and IP address.
1406 # We expect lines to be of the form "H=[IpAddr]" or "H=Host [IpAddr]" or
1407 # "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
1408 # We do 2 separate matches to keep the matches simple and fast.
1409 if (/\\sH=(\\S+)/) {
1412 ($ip) = /\\sH=.*?(\\s\\[[^]]+\\])/;
1413 # If there is only an IP address, it will be in $host and $ip will be
1414 # unset. That is OK, because we only use $ip in conjunction with $host
1415 # below. But make it empty to avoid warning messages.
1416 $ip = "" if !defined $ip;
1418 #IFDEF ($do_sender{Domain})
1419 if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
1420 # Remove the host portion from the DNS name. We ensure that we end up with
1421 # at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
1422 $domain = lc("$1.$2");
1423 $domain =~ s/^\\.//; #Remove preceding dot.
1425 #ENDIF ($do_sender{Domain})
1429 #IFDEF ($do_sender{Email})
1430 $email = (/^(\S+)/) ? $1 : "";
1431 #ENDIF ($do_sender{Email})
1433 #IFDEF ($do_sender{Edomain})
1434 $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
1435 #ENDIF ($do_sender{Edomain})
1437 if ($tod lt $begin) {
1440 elsif ($tod gt $end) {
1445 if ($flag eq "<=") {
1446 $thissize = (/\\sS=(\\d+)( |$)/) ? $1 : 0;
1447 $size{$id} = $thissize;
1449 #IFDEF ($show_relay)
1450 if ($host ne "local") {
1451 # Save incoming information in case it becomes interesting
1452 # later, when delivery lines are read.
1453 my($from) = /^(\\S+)/;
1454 $from_host{$id} = "$host$ip";
1455 $from_address{$id} = $from;
1457 #ENDIF ($show_relay)
1459 #IFDEF ($local_league_table || $include_remote_users)
1463 #IFDEF ($local_league_table && $include_remote_users)
1464 { #Store both local and remote users.
1465 #ENDIF ($local_league_table && $include_remote_users)
1467 #IFDEF ($local_league_table && ! $include_remote_users)
1468 if ($host eq "local") { #Store local users only.
1469 #ENDIF ($local_league_table && ! $include_remote_users)
1471 #IFDEF ($include_remote_users && ! $local_league_table)
1472 if ($host ne "local") { #Store remote users only.
1473 #ENDIF ($include_remote_users && ! $local_league_table)
1475 $received_count_user{$user}++;
1476 add_volume(\\$received_data_user{$user},\\$received_data_gigs_user{$user},$thissize);
1479 #ENDIF ($local_league_table || $include_remote_users)
1481 #IFDEF ($do_sender{Host})
1482 $received_count{Host}{$host}++;
1483 add_volume(\\$received_data{Host}{$host},\\$received_data_gigs{Host}{$host},$thissize);
1484 #ENDIF ($do_sender{Host})
1486 #IFDEF ($do_sender{Domain})
1488 $received_count{Domain}{$domain}++;
1489 add_volume(\\$received_data{Domain}{$domain},\\$received_data_gigs{Domain}{$domain},$thissize);
1491 #ENDIF ($do_sender{Domain})
1493 #IFDEF ($do_sender{Email})
1494 $received_count{Email}{$email}++;
1495 add_volume(\\$received_data{Email}{$email},\\$received_data_gigs{Email}{$email},$thissize);
1496 #ENDIF ($do_sender{Email})
1498 #IFDEF ($do_sender{Edomain})
1499 $received_count{Edomain}{$edomain}++;
1500 add_volume(\\$received_data{Edomain}{$edomain},\\$received_data_gigs{Edomain}{$edomain},$thissize);
1501 #ENDIF ($do_sender{Edomain})
1503 $total_received_count++;
1504 add_volume(\\$total_received_data,\\$total_received_data_gigs,$thissize);
1506 #IFDEF ($#queue_times >= 0)
1507 $arrival_time{$id} = $tod;
1508 #ENDIF ($#queue_times >= 0)
1510 #IFDEF ($hist_opt > 0)
1511 $received_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1512 #ENDIF ($hist_opt > 0)
1515 elsif ($flag eq "=>") {
1516 $size = $size{$id} || 0;
1517 if ($host ne "local") {
1518 $remote_delivered{$id} = 1;
1521 #IFDEF ($show_relay)
1522 # Determine relaying address if either only one address listed,
1523 # or two the same. If they are different, it implies a forwarding
1524 # or aliasing, which is not relaying. Note that for multi-aliased
1525 # addresses, there may be a further address between the first
1528 if (defined $from_host{$id}) {
1529 if (/^(\\S+)(?:\\s+\\([^)]\\))?\\s+<([^>]+)>/) {
1530 ($old,$new) = ($1,$2);
1536 if ("\\L$new" eq "\\L$old") {
1537 ($old) = /^(\\S+)/ if $old eq "";
1538 my $key = "H=\\L$from_host{$id}\\E A=\\L$from_address{$id}\\E => " .
1539 "H=\\L$host\\E$ip A=\\L$old\\E";
1540 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
1541 $relayed{$key} = 0 if !defined $relayed{$key};
1549 #ENDIF ($show_relay)
1553 #IFDEF ($local_league_table || $include_remote_users)
1554 #IFDEF ($local_league_table && $include_remote_users)
1555 { #Store both local and remote users.
1556 #ENDIF ($local_league_table && $include_remote_users)
1558 #IFDEF ($local_league_table && ! $include_remote_users)
1559 if ($host eq "local") { #Store local users only.
1560 #ENDIF ($local_league_table && ! $include_remote_users)
1562 #IFDEF ($include_remote_users && ! $local_league_table)
1563 if ($host ne "local") { #Store remote users only.
1564 #ENDIF ($include_remote_users && ! $local_league_table)
1566 if (my($user) = split((/\\s</)? " <" : " ", $_)) {
1567 if ($user =~ /^[\\/|]/) {
1568 my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
1569 $user = "$user $parent" if defined $parent;
1571 $delivered_count_user{$user}++;
1572 add_volume(\\$delivered_data_user{$user},\\$delivered_data_gigs_user{$user},$size);
1575 #ENDIF ($local_league_table || $include_remote_users)
1577 #IFDEF ($do_sender{Host})
1578 $delivered_count{Host}{$host}++;
1579 add_volume(\\$delivered_data{Host}{$host},\\$delivered_data_gigs{Host}{$host},$size);
1580 #ENDIF ($do_sender{Host})
1581 #IFDEF ($do_sender{Domain})
1583 $delivered_count{Domain}{$domain}++;
1584 add_volume(\\$delivered_data{Domain}{$domain},\\$delivered_data_gigs{Domain}{$domain},$size);
1586 #ENDIF ($do_sender{Domain})
1587 #IFDEF ($do_sender{Email})
1588 $delivered_count{Email}{$email}++;
1589 add_volume(\\$delivered_data{Email}{$email},\\$delivered_data_gigs{Email}{$email},$size);
1590 #ENDIF ($do_sender{Email})
1591 #IFDEF ($do_sender{Edomain})
1592 $delivered_count{Edomain}{$edomain}++;
1593 add_volume(\\$delivered_data{Edomain}{$edomain},\\$delivered_data_gigs{Edomain}{$edomain},$size);
1594 #ENDIF ($do_sender{Edomain})
1596 $total_delivered_count++;
1597 add_volume(\\$total_delivered_data,\\$total_delivered_data_gigs,$size);
1599 #IFDEF ($show_transport)
1600 my $transport = (/\\sT=(\\S+)/) ? $1 : ":blackhole:";
1601 $transported_count{$transport}++;
1602 add_volume(\\$transported_data{$transport},\\$transported_data_gigs{$transport},$size);
1603 #ENDIF ($show_transport)
1605 #IFDEF ($hist_opt > 0)
1606 $delivered_interval_count[($m_hour*60 + $m_min)/$hist_interval]++;
1607 #ENDIF ($hist_opt > 0)
1611 elsif ($flag eq "==" && defined($size{$id}) && !defined($delayed{$id})) {
1616 elsif ($flag eq "**") {
1617 $had_error{$id} = 1 if defined ($size{$id});
1619 #IFDEF ($show_errors)
1620 $errors_count{$_}++;
1621 #ENDIF ($show_errors)
1625 elsif ($flag eq "Co") {
1627 #IFDEF ($#queue_times >= 0)
1628 #Note: id_seconds() benchmarks as 42% slower than seconds() and computing
1629 #the time accounts for a significant portion of the run time.
1631 if (defined $arrival_time{$id}) {
1632 $queued = seconds($tod) - seconds($arrival_time{$id});
1633 delete($arrival_time{$id});
1636 $queued = seconds($tod) - id_seconds($id);
1639 for ($i = 0; $i <= $#queue_times; $i++) {
1640 if ($queued < $queue_times[$i]) {
1642 $remote_queue_bin[$i]++ if $remote_delivered{$id};
1646 $queue_more_than++ if $i > $#queue_times;
1647 #ENDIF ($#queue_times >= 0)
1649 #IFDEF ($show_relay)
1650 delete($from_host{$id});
1651 delete($from_address{$id});
1652 #ENDIF ($show_relay)
1657 # We now do a 'C preprocessor style operation on our parser
1658 # to remove bits not in use.
1659 my(%defines_in_operation,$removing_lines,$processed_parser);
1660 foreach (split (/\n/,$parser)) {
1661 if ((/^\s*#\s*IFDEF\s*\((.*?)\)/i && ! eval $1) ||
1662 (/^\s*#\s*IFNDEF\s*\((.*?)\)/i && eval $1) ) {
1663 $defines_in_operation{$1} = 1;
1664 $removing_lines = 1;
1667 $processed_parser .= $_."\n" unless $removing_lines;
1669 if (/^\s*#\s*ENDIF\s*\((.*?)\)/i) {
1670 delete $defines_in_operation{$1};
1671 unless (keys %defines_in_operation) {
1672 $removing_lines = 0;
1676 print STDERR "# START OF PARSER:\n$processed_parser\n# END OF PARSER\n\n" if $debug;
1678 return $processed_parser;
1683 #######################################################################
1686 # parse($parser,\*FILEHANDLE);
1688 # This subroutine accepts a parser and a filehandle from main and parses each
1689 # line. We store the results into global variables.
1690 #######################################################################
1692 my($parser,$fh) = @_;
1694 if ($merge_reports) {
1695 parse_old_eximstat_reports($fh);
1706 #######################################################################
1711 # Print our headers and contents.
1712 #######################################################################
1715 my $title = "Exim statistics from $begin to $end";
1718 print html_header($title);
1720 print "<li><a href=\"#grandtotal\">Grand total summary</a>\n";
1721 print "<li><a href=\"#patterns\">User Specified Patterns</a>\n" if @user_patterns;
1722 print "<li><a href=\"#transport\">Deliveries by Transport</a>\n" if $show_transport;
1724 print "<li><a href=\"#Messages received\">Messages received per hour</a>\n";
1725 print "<li><a href=\"#Deliveries\">Deliveries per hour</a>\n";
1727 if ($#queue_times >= 0) {
1728 print "<li><a href=\"#all messages time\">Time spent on the queue: all messages</a>\n";
1729 print "<li><a href=\"#messages with at least one remote delivery time\">Time spent on the queue: messages with at least one remote delivery</a>\n";
1731 print "<li><a href=\"#Relayed messages\">Relayed messages</a>\n" if $show_relay;
1733 foreach ('Host','Domain','Email','Edomain') {
1734 next unless $do_sender{$_};
1735 print "<li><a href=\"#sending \l$_ count\">Top $topcount sending \l${_}s by message count</a>\n";
1736 print "<li><a href=\"#sending \l$_ volume\">Top $topcount sending \l${_}s by volume</a>\n";
1738 if ($local_league_table || $include_remote_users) {
1739 print "<li><a href=\"#local sender count\">Top $topcount local senders by message count</a>\n";
1740 print "<li><a href=\"#local sender volume\">Top $topcount local senders by volume</a>\n";
1742 foreach ('Host','Domain','Email','Edomain') {
1743 next unless $do_sender{$_};
1744 print "<li><a href=\"#\l$_ destination count\">Top $topcount \l$_ destinations by message count</a>\n";
1745 print "<li><a href=\"#\l$_ destination volume\">Top $topcount \l$_ destinations by volume</a>\n";
1747 if ($local_league_table || $include_remote_users) {
1748 print "<li><a href=\"#local destination count\">Top $topcount local destinations by message count</a>\n";
1749 print "<li><a href=\"#local destination volume\">Top $topcount local destinations by volume</a>\n";
1752 print "<li><a href=\"#errors\">List of errors</a>\n" if %errors_count;
1753 print "</ul>\n<hr>\n";
1762 #######################################################################
1763 # print_grandtotals();
1765 # print_grandtotals();
1767 # Print the grand totals.
1768 #######################################################################
1769 sub print_grandtotals {
1771 # Get the sender by headings and results. This is complicated as we can have
1772 # different numbers of columns.
1773 my($sender_txt_header,$sender_html_header,$sender_txt_format,$sender_html_format);
1774 my(@received_totals,@delivered_totals);
1775 foreach ('Host','Domain','Email','Edomain') {
1776 next unless $do_sender{$_};
1777 if ($merge_reports) {
1778 push(@received_totals, get_report_total($report_totals{Received},"${_}s"));
1779 push(@delivered_totals,get_report_total($report_totals{Delivered},"${_}s"));
1782 push(@received_totals,scalar(keys %{$received_data{$_}}));
1783 push(@delivered_totals,scalar(keys %{$delivered_data{$_}}));
1785 $sender_html_header .= "<th>${_}s</th>";
1786 $sender_txt_header .= " " x ($COLUMN_WIDTHS - length($_)) . $_ . 's';
1787 $sender_html_format .= "<td align=\"right\">%d</td>";
1788 $sender_txt_format .= " " x ($COLUMN_WIDTHS - 5) . "%6d";
1791 my($format1,$format2);
1794 <a name="grandtotal"></a>
1795 <h2>Grand total summary</h2>
1797 <tr><th>TOTAL</th><th>Volume</th><th>Messages</th>$sender_html_header<th colspan=2>At least one addr<br>Delayed</th><th colspan=2>At least one addr<br>Failed</th>
1800 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td>$sender_html_format<td align=\"right\">%d</td>";
1801 $format2 = "<td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td><td align=\"right\">%d</td><td align=\"right\">%4.1f%%</td>";
1804 my $sender_spaces = " " x length($sender_txt_header);
1809 $sender_spaces At least one address
1810 TOTAL Volume Messages $sender_txt_header Delayed Failed
1812 $format1 = " %-16s %9s %6d $sender_txt_format";
1813 $format2 = " %6d %4.1f%% %6d %4.1f%%",
1816 my($volume,$failed_count);
1817 if ($merge_reports) {
1818 $volume = volume_rounded($report_totals{Received}{Volume}, $report_totals{Received}{'Volume-gigs'});
1819 $total_received_count = get_report_total($report_totals{Received},'Messages');
1820 $failed_count = get_report_total($report_totals{Received},'Failed');
1821 $delayed_count = get_report_total($report_totals{Received},'Delayed');
1824 $volume = volume_rounded($total_received_data, $total_received_data_gigs);
1825 $failed_count = keys %had_error;
1830 printf("$format1$format2\n",'Received',$volume,$total_received_count,
1831 @received_totals,$delayed_count,
1832 ($total_received_count) ? ($delayed_count*100/$total_received_count) : 0,
1834 ($total_received_count) ? ($failed_count*100/$total_received_count) : 0);
1837 if ($merge_reports) {
1838 $volume = volume_rounded($report_totals{Delivered}{Volume}, $report_totals{Delivered}{'Volume-gigs'});
1839 $total_delivered_count = get_report_total($report_totals{Delivered},'Messages');
1842 $volume = volume_rounded($total_delivered_data, $total_delivered_data_gigs);
1844 printf("$format1\n\n",'Delivered',$volume,$total_delivered_count,@delivered_totals);
1845 print "</table>\n" if $html;
1849 #######################################################################
1850 # print_user_patterns()
1852 # print_user_patterns();
1854 # Print the counts of user specified patterns.
1855 #######################################################################
1856 sub print_user_patterns {
1860 print "<hr><a name=\"patterns\"></a><h2>User Specified Patterns</h2>\n";
1861 print "<table border=0 width=\"100%\">\n";
1863 print "<table border=1>\n";
1864 print "<tr><th> </th><th>Total</th>\n";
1865 $format1 = "<tr><td>%s</td><td align=\"right\">%d</td>";
1868 print "User Specified Patterns\n";
1869 print "-----------------------";
1871 $format1 = " %-18s %6d";
1875 if ($merge_reports) {
1876 # We are getting our data from previous reports.
1877 foreach $key (@user_descriptions) {
1878 my $count = get_report_total($report_totals{patterns}{$key},'Total');
1879 printf("$format1\n",$key,$count);
1883 # We are getting our data from mainlog files.
1884 my $user_pattern_index = 0;
1885 foreach $key (@user_descriptions) {
1886 printf("$format1\n",$key,$user_pattern_totals[$user_pattern_index]);
1887 $user_pattern_index++;
1897 #######################################################################
1898 # print_transport();
1900 # print_transport();
1902 # Print totals by transport.
1903 #######################################################################
1904 sub print_transport {
1906 my(@chartdatanames);
1907 my(@chartdatavals_count);
1908 my(@chartdatavals_vol);
1909 no integer; #Lose this for charting the data.
1912 print "<hr><a name=\"transport\"></a><h2>Deliveries by Transport</h2>\n";
1913 print "<table border=0 width=\"100%\">\n";
1915 print "<table border=1>\n";
1916 print "<tr><th> </th><th>Volume</th><th>Messages</th>\n";
1917 $format1 = "<tr><td>%s</td><td align=\"right\">%s</td><td align=\"right\">%d</td>";
1920 print "Deliveries by transport\n";
1921 print "-----------------------";
1922 print "\n Volume Messages\n";
1923 $format1 = " %-18s %6s %6d";
1927 if ($merge_reports) {
1928 # We are getting our data from previous reports.
1929 foreach $key (sort keys %{$report_totals{transport}}) {
1930 my $count = get_report_total($report_totals{transport}{$key},'Messages');
1931 printf("$format1\n",$key,
1932 volume_rounded($report_totals{transport}{$key}{Volume},$report_totals{transport}{$key}{'Volume-gigs'}),
1934 push(@chartdatanames, $key);
1935 push(@chartdatavals_count, $count);
1936 push(@chartdatavals_vol, $report_totals{transport}{$key}{'Volume-gigs'}*$gig + $report_totals{transport}{$key}{Volume} );
1940 # We are getting our data from mainlog files.
1941 foreach $key (sort keys %transported_data) {
1942 printf("$format1\n",$key,
1943 volume_rounded($transported_data{$key},$transported_data_gigs{$key}),
1944 $transported_count{$key});
1945 push(@chartdatanames, $key);
1946 push(@chartdatavals_count, $transported_count{$key});
1947 push(@chartdatavals_vol, $transported_data_gigs{$key}*$gig + $transported_data{$key});
1952 print "</td><td>\n";
1953 if ($HAVE_GD_Graph_pie && $charts)
1955 # calculate the graph
1958 \@chartdatavals_count
1960 my $graph = GD::Graph::pie->new(200, 200);
1962 x_label => 'Transport',
1963 y_label => 'Messages',
1964 title => 'By count',
1966 my $gd = $graph->plot(\@data) or warn($graph->error);
1968 open(IMG, ">$chartdir/transports_count.png") or die $!;
1972 print "<img src=\"$chartrel/transports_count.png\">";
1975 print "</td><td>\n";
1977 if ($HAVE_GD_Graph_pie && $charts) {
1982 my $graph = GD::Graph::pie->new(200, 200);
1984 title => 'By volume',
1986 my $gd = $graph->plot(\@data) or warn($graph->error);
1988 open(IMG, ">$chartdir/transports_vol.png") or die $!;
1992 print "<img src=\"$chartrel/transports_vol.png\">";
1995 print "</td></tr></table>\n";
2002 #######################################################################
2007 # Print our totals by relay.
2008 #######################################################################
2010 my $temp = "Relayed messages";
2011 print "<hr><a name=\"$temp\"></a><h2>$temp</h2>\n" if $html;
2012 if (scalar(keys %relayed) > 0 || $relayed_unshown > 0) {
2018 print "<table border=1>\n";
2019 print "<tr><th>Count</th><th>From</th><th>To</th>\n";
2020 $format = "<tr><td align=\"right\">%d</td><td>%s</td><td>%s</td>\n";
2023 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2024 $format = "%7d %s\n => %s\n";
2028 foreach $key (sort keys %relayed) {
2029 my $count = $relayed{$key};
2032 my($one,$two) = split(/=> /, $key);
2033 printf($format, $count, $one, $two);
2036 print "</table>\n<p>\n" if $html;
2037 print "${spacing}Total: $shown (plus $relayed_unshown unshown)\n";
2040 print "No relayed messages\n";
2041 print "-------------------\n" unless $html;
2048 #######################################################################
2053 # Print our errors. In HTML, we display them as a list rather than a table -
2054 # Netscape doesn't like large tables!
2055 #######################################################################
2057 my $total_errors = 0;
2059 if (scalar(keys %errors_count) != 0) {
2060 my $temp = "List of errors";
2063 print "<hr><a name=\"errors\"></a><h2>$temp</h2>\n";
2064 print "<ul><li><b>Count - Error</b>\n";
2065 $format = "<li>%d - %s\n";
2068 printf("%s\n%s\n\n", $temp, "-" x length($temp));
2072 foreach $key (sort keys %errors_count) {
2075 $text =~ s/\s\s+/ /g; #Convert multiple spaces to a single space.
2076 $total_errors += $errors_count{$key};
2079 #Translate HTML tag characters. Sergey Sholokh.
2080 $text =~ s/\</\<\;/g;
2081 $text =~ s/\>/\>\;/g;
2083 printf($format,$errors_count{$key},$text);
2086 printf("%5d ", $errors_count{$key});
2087 while (length($text) > 65) {
2088 my($first,$rest) = $text =~ /(.{50}\S*)\s+(.+)/;
2090 printf("%s\n ", $first);
2093 printf("%s\n\n", $text);
2096 print "</ul>\n<p>\n" if $html;
2098 $temp = "Errors encountered: $total_errors";
2100 print "-" x length($temp),"\n" unless $html;
2106 #######################################################################
2107 # parse_old_eximstat_reports();
2109 # parse_old_eximstat_reports($fh);
2111 # Parse old eximstat output so we can merge daily stats to weekly stats and weekly to monthly etc.
2113 # To test that the merging still works after changes, do something like the following.
2114 # All the diffs should produce no output.
2116 # options='-bydomain -byemail -byhost -byedomain'
2117 # options="$options -pattern 'Completed Messages' /Completed/"
2118 # options="$options -pattern 'Received Messages' /<=/"
2120 # ./eximstats $options mainlog > mainlog.txt
2121 # ./eximstats $options -merge mainlog.txt > mainlog.2.txt
2122 # diff mainlog.txt mainlog.2.txt
2124 # ./eximstats $options -html mainlog > mainlog.html
2125 # ./eximstats $options -merge -html mainlog.txt > mainlog.2.html
2126 # diff mainlog.html mainlog.2.html
2128 # ./eximstats $options -merge mainlog.html > mainlog.3.txt
2129 # diff mainlog.txt mainlog.3.txt
2131 # ./eximstats $options -merge -html mainlog.html > mainlog.3.html
2132 # diff mainlog.html mainlog.3.html
2134 # ./eximstats $options -nvr mainlog > mainlog.nvr.txt
2135 # ./eximstats $options -merge mainlog.nvr.txt > mainlog.4.txt
2136 # diff mainlog.txt mainlog.4.txt
2138 # # double_mainlog.txt should have twice the values that mainlog.txt has.
2139 # ./eximstats $options mainlog mainlog > double_mainlog.txt
2140 #######################################################################
2141 sub parse_old_eximstat_reports {
2144 my(%league_table_value_entered, %league_table_value_was_zero, %table_order);
2147 if (/Exim statistics from ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?) to ([\d\-]+ [\d:]+(\s+[\+\-]\d+)?)/) {
2148 $begin = $1 if ($1 lt $begin);
2149 $end = $3 if ($3 gt $end);
2151 elsif (/Grand total summary/) {
2152 # Fill in $report_totals{Received|Delivered}{Volume|Messages|Hosts|Domains|...|Delayed|DelayedPercent|Failed|FailedPercent}
2155 $_ = html2txt($_); #Convert general HTML markup to text.
2156 s/At least one addr//g; #Another part of the HTML output we don't want.
2158 # TOTAL Volume Messages Hosts Domains Delayed Failed
2159 # Received 26MB 237 177 23 8 3.4% 28 11.8%
2160 # Delivered 13MB 233 99 88
2161 if (/TOTAL\s+(.*?)\s*$/) {
2162 @fields = split(/\s+/,$1);
2163 #Delayed and Failed have two columns each, so add the extra field names in.
2164 splice(@fields,-1,1,'DelayedPercent','Failed','FailedPercent');
2166 elsif (/(Received|Delivered)\s+(.*?)\s*$/) {
2167 print STDERR "Parsing $_" if $debug;
2168 add_to_totals($report_totals{$1},\@fields,$2);
2170 last if (/Delivered/); #Last line of this section.
2174 elsif (/User Specified Patterns/i) {
2175 #User Specified Patterns
2176 #-----------------------
2180 while (<$fh>) { last if (/Total/); } #Wait until we get the table headers.
2182 print STDERR "Parsing $_" if $debug;
2183 $_ = html2txt($_); #Convert general HTML markup to text.
2184 if (/^\s*(.*?)\s+(\d+)\s*$/) {
2185 $report_totals{patterns}{$1} = {} unless (defined $report_totals{patterns}{$1});
2186 add_to_totals($report_totals{patterns}{$1},['Total'],$2);
2188 last if (/^\s*$/); #Finished if we have a blank line.
2192 elsif (/Deliveries by transport/i) {
2193 #Deliveries by transport
2194 #-----------------------
2196 # :blackhole: 70KB 51
2197 # address_pipe 655KB 1
2200 while (<$fh>) { last if (/Volume/); } #Wait until we get the table headers.
2202 print STDERR "Parsing $_" if $debug;
2203 $_ = html2txt($_); #Convert general HTML markup to text.
2204 if (/(\S+)\s+(\d+\S*\s+\d+)/) {
2205 $report_totals{transport}{$1} = {} unless (defined $report_totals{transport}{$1});
2206 add_to_totals($report_totals{transport}{$1},['Volume','Messages'],$2);
2208 last if (/^\s*$/); #Finished if we have a blank line.
2211 elsif (/(Messages received|Deliveries) per/) {
2212 # Messages received per hour (each dot is 2 messages)
2213 #---------------------------------------------------
2215 #00-01 106 .....................................................
2216 #01-02 103 ...................................................
2218 # Set a pointer to the interval array so we can use the same code
2219 # block for both messages received and delivered.
2220 my $interval_aref = ($1 eq 'Deliveries') ? \@delivered_interval_count : \@received_interval_count;
2221 my $reached_table = 0;
2223 $reached_table = 1 if (/^00/);
2224 next unless $reached_table;
2225 print STDERR "Parsing $_" if $debug;
2226 if (/^(\d+):(\d+)\s+(\d+)/) { #hh:mm start time format ?
2227 $$interval_aref[($1*60 + $2)/$hist_interval] += $3 if $hist_opt;
2229 elsif (/^(\d+)-(\d+)\s+(\d+)/) { #hh-hh start-end time format ?
2230 $$interval_aref[($1*60)/$hist_interval] += $3 if $hist_opt;
2232 else { #Finished the table ?
2238 elsif (/Time spent on the queue: (all messages|messages with at least one remote delivery)/) {
2239 #Time spent on the queue: all messages
2240 #-------------------------------------
2242 #Under 1m 217 91.9% 91.9%
2248 # Set a pointer to the queue bin so we can use the same code
2249 # block for both all messages and remote deliveries.
2250 my $bin_aref = ($1 eq 'all messages') ? \@queue_bin : \@remote_queue_bin;
2251 my $reached_table = 0;
2253 $_ = html2txt($_); #Convert general HTML markup to text.
2254 $reached_table = 1 if (/^\s*Under/);
2255 next unless $reached_table;
2256 my $previous_seconds_on_queue = 0;
2257 if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
2258 print STDERR "Parsing $_" if $debug;
2259 my($modifier,$formated_time,$count) = ($1,$2,$3);
2260 my $seconds = unformat_time($formated_time);
2261 my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
2262 $previous_seconds_on_queue = $seconds;
2263 $time_on_queue = $seconds * 2 if ($modifier eq 'Over');
2265 for ($i = 0; $i <= $#queue_times; $i++) {
2266 if ($time_on_queue < $queue_times[$i]) {
2267 $$bin_aref[$i] += $count;
2271 # There's only one counter for messages going over the queue
2272 # times so make sure we only count it once.
2273 $queue_more_than += $count if (($bin_aref == \@queue_bin) && ($i > $#queue_times));
2276 last; #Finished the table ?
2281 elsif (/Relayed messages/) {
2285 # 1 addr.domain.com [1.2.3.4] a.user@domain.com
2286 # => addr2.domain2.com [5.6.7.8] a2.user2@domain2.com
2288 #<tr><td align="right">1</td><td>addr.domain.com [1.2.3.4] a.user@domain.com </td><td>addr2.domain2.com [5.6.7.8] a2.user2@domain2.com</td>
2290 my $reached_table = 0;
2293 unless ($reached_table) {
2294 last if (/No relayed messages/);
2295 $reached_table = 1 if (/^\s*\d/ || />\d+</);
2296 next unless $reached_table;
2298 if (/>(\d+)<.td><td>(.*?) ?<.td><td>(.*?)</) {
2299 update_relayed($1,$2,$3);
2301 elsif (/^\s*(\d+)\s+(.*?)\s*$/) {
2302 ($count,$sender) = ($1,$2);
2304 elsif (/=>\s+(.*?)\s*$/) {
2305 update_relayed($count,$sender,$1);
2308 last; #Finished the table ?
2313 elsif (/Top (.*?) by (message count|volume)/) {
2314 #Top 50 sending hosts by message count
2315 #-------------------------------------
2318 my($category,$by_count_or_volume) = ($1,$2);
2320 #As we show 2 views of each table (by count and by volume),
2321 #most (but not all) entries will appear in both tables.
2322 #Set up a hash to record which entries we have already seen
2323 #and one to record which ones we are seeing for the first time.
2324 if ($by_count_or_volume =~ /count/) {
2325 undef %league_table_value_entered;
2326 undef %league_table_value_was_zero;
2330 #As this section processes multiple different table categories,
2331 #set up pointers to the hashes to be updated.
2332 my($count_href,$data_href,$data_gigs_href);
2333 if ($category =~ /local sender/) {
2334 $count_href = \%received_count_user;
2335 $data_href = \%received_data_user;
2336 $data_gigs_href = \%received_data_gigs_user;
2338 elsif ($category =~ /sending (\S+?)s?\b/) {
2339 #Top 50 sending (host|domain|email|edomain)s
2340 #Top sending (host|domain|email|edomain)
2341 $count_href = \%{$received_count{"\u$1"}};
2342 $data_href = \%{$received_data{"\u$1"}};
2343 $data_gigs_href = \%{$received_data_gigs{"\u$1"}};
2345 elsif ($category =~ /local destination/) {
2346 $count_href = \%delivered_count_user;
2347 $data_href = \%delivered_data_user;
2348 $data_gigs_href = \%delivered_data_gigs_user;
2350 elsif ($category =~ /(\S+) destination/) {
2351 #Top 50 (host|domain|email|edomain) destinations
2352 #Top (host|domain|email|edomain) destination
2353 $count_href = \%{$delivered_count{"\u$1"}};
2354 $data_href = \%{$delivered_data{"\u$1"}};
2355 $data_gigs_href = \%{$delivered_data_gigs{"\u$1"}};
2358 my $reached_table = 0;
2360 $_ = html2txt($_); #Convert general HTML markup to text.
2361 $reached_table = 1 if (/^\s*\d/);
2362 next unless $reached_table;
2363 if (/^\s*(\d+)\s+(\S+)\s*(.*?)\s*$/) {
2364 my($count,$rounded_volume,$entry) = ($1,$2,$3);
2365 #Note: $entry fields can be both null and can contain spaces.
2367 #Add the entry into the %table_order hash if it has a rounded volume (KB/MB/GB).
2368 push(@{$table_order{$rounded_volume}{$by_count_or_volume}},$entry) if ($rounded_volume =~ /\D/);
2370 unless ($league_table_value_entered{$entry}) {
2371 $league_table_value_entered{$entry} = 1;
2372 unless ($$count_href{$entry}) {
2373 $$count_href{$entry} = 0;
2374 $$data_href{$entry} = 0;
2375 $$data_gigs_href{$entry} = 0;
2376 $league_table_value_was_zero{$entry} = 1;
2379 $$count_href{$entry} += $count;
2380 #Add the rounded value to the data and data_gigs hashes.
2381 un_round($rounded_volume,\$$data_href{$entry},\$$data_gigs_href{$entry});
2382 print STDERR "$category by $by_count_or_volume: added $count,$rounded_volume to $entry\n" if $debug;
2385 else { #Finished the table ?
2386 if ($by_count_or_volume =~ /volume/) {
2387 #Add a few bytes to appropriate entries to preserve the order.
2389 my($rounded_volume);
2390 foreach $rounded_volume (keys %table_order) {
2391 #For each rounded volume, we want to create a list which has things
2392 #ordered from the volume table at the front, and additional things
2393 #from the count table ordered at the back.
2394 @{$table_order{$rounded_volume}{volume}} = () unless defined $table_order{$rounded_volume}{volume};
2395 @{$table_order{$rounded_volume}{'message count'}} = () unless defined $table_order{$rounded_volume}{'message count'};
2397 map {$mark{$_} = 1} @{$table_order{$rounded_volume}{volume}};
2398 @order = @{$table_order{$rounded_volume}{volume}};
2399 map {push(@order,$_)} grep(!$mark{$_},@{$table_order{$rounded_volume}{'message count'}});
2401 my $bonus_bytes = $#order;
2402 $bonus_bytes = 511 if ($bonus_bytes > 511); #Don't go over the half-K boundary!
2403 while (@order and ($bonus_bytes > 0)) {
2404 my $entry = shift(@order);
2405 if ($league_table_value_was_zero{$entry}) {
2406 $$data_href{$entry} += $bonus_bytes;
2407 print STDERR "$category by $by_count_or_volume: added $bonus_bytes bonus bytes to $entry\n" if $debug;
2418 elsif (/List of errors/) {
2422 # 1 07904931641@one2one.net R=external T=smtp: SMTP error
2423 # from remote mailer after RCPT TO:<07904931641@one2one.net>:
2424 # host mail.one2one.net [193.133.192.24]: 550 User unknown
2426 #<li>1 - ally.dufc@dunbar.org.uk R=external T=smtp: SMTP error from remote mailer after RCPT TO:<ally.dufc@dunbar.org.uk>: host mail.dunbar.org.uk [216.167.89.88]: 550 Unknown local part ally.dufc in <ally.dufc@dunbar.org.uk>
2429 my $reached_table = 0;
2430 my($count,$error,$blanks);
2432 $reached_table = 1 if (/^( *|<li>)(\d+)/);
2433 next unless $reached_table;
2435 s/^<li>(\d+) -/$1/; #Convert an HTML line to a text line.
2436 $_ = html2txt($_); #Convert general HTML markup to text.
2439 $error .= ' ' . $1; #Join a multiline error.
2441 elsif (/^\s*(\d+)\s+(.*)/) {
2443 #Finished with a previous multiline error so save it.
2444 $errors_count{$error} = 0 unless $errors_count{$error};
2445 $errors_count{$error} += $count;
2447 ($count,$error) = ($1,$2);
2449 elsif (/Errors encountered/) {
2451 #Finished the section, so save our stored last error.
2452 $errors_count{$error} = 0 unless $errors_count{$error};
2453 $errors_count{$error} += $count;
2465 #######################################################################
2468 # update_relayed($count,$sender,$recipient);
2470 # Adds an entry into the %relayed hash. Currently only used when
2472 #######################################################################
2473 sub update_relayed {
2474 my($count,$sender,$recipient) = @_;
2476 #When generating the key, put in the 'H=' and 'A=' which can be used
2478 my $key = "H=$sender => H=$recipient";
2479 $key =~ s/ ([^=\s]+\@\S+|<>)/ A=$1/g;
2480 if (!defined $relay_pattern || $key !~ /$relay_pattern/o) {
2481 $relayed{$key} = 0 if !defined $relayed{$key};
2482 $relayed{$key} += $count;
2485 $relayed_unshown += $count;
2490 #######################################################################
2493 # add_to_totals(\%totals,\@keys,$values);
2495 # Given a line of space seperated values, add them into the provided hash using @keys
2498 # If the value contains a '%', then the value is set rather than added. Otherwise, we
2499 # convert the value to bytes and gigs. The gigs get added to I<Key>-gigs.
2500 #######################################################################
2502 my($totals_href,$keys_aref,$values) = @_;
2503 my(@values) = split(/\s+/,$values);
2504 my(@keys) = @$keys_aref; #Make a copy as we destroy the one we use.
2506 foreach $value (@values) {
2507 my $key = shift(@keys) or next;
2508 if ($value =~ /%/) {
2509 $$totals_href{$key} = $value;
2512 $$totals_href{$key} = 0 unless ($$totals_href{$key});
2513 $$totals_href{"$key-gigs"} = 0 unless ($$totals_href{"$key-gigs"});
2514 un_round($value, \$$totals_href{$key}, \$$totals_href{"$key-gigs"});
2515 print STDERR "Added $value to $key - $$totals_href{$key} , " . $$totals_href{"$key-gigs"} . "GB.\n" if $debug;
2520 #######################################################################
2521 # get_report_total();
2523 # $total = get_report_total(\%hash,$key);
2525 # If %hash contains values split into Units and Gigs, we calculate and return
2527 # $hash{$key} + 1024*1024*1024 * $hash{"${key}-gigs"}
2528 #######################################################################
2529 sub get_report_total {
2531 my($hash_ref,$key) = @_;
2532 if ($$hash_ref{"${key}-gigs"}) {
2533 return $$hash_ref{$key} + $gig * $$hash_ref{"${key}-gigs"};
2535 return $$hash_ref{$key} || 0;
2538 #######################################################################
2541 # $text_line = html2txt($html_line);
2543 # Convert a line from html to text. Currently we just convert HTML tags to spaces
2544 # and convert >, <, and tags back.
2545 #######################################################################
2549 # Convert HTML tags to spacing. Note that the reports may contain <Userid> and
2550 # <Userid@Domain> words, so explicitly specify the HTML tags we will remove
2551 # (the ones used by this program). If someone is careless enough to have their
2552 # Userid the same as an HTML tag, there's not much we can do about it.
2553 s/<\/?(html|head|title|body|h\d|ul|li|a\s+|table|tr|td|th|pre|hr|p|br)\b.*?>/ /og;
2555 s/\<\;/\</og; #Convert '<' to '<'.
2556 s/\>\;/\>/og; #Convert '>' to '>'.
2557 s/\ \;/ /og; #Convert ' ' to ' '.
2561 #######################################################################
2564 # $arg = get_next_arg();
2566 # Because eximstats arguments are often passed as variables,
2567 # we can't rely on shell parsing to deal with quotes. This
2568 # subroutine returns $ARGV[1] and does a shift. If $ARGV[1]
2569 # starts with a quote (' or "), and doesn't end in one, then
2570 # we append the next argument to it and shift again. We repeat
2571 # until we've got all of the argument.
2573 # This isn't perfect as all white space gets reduced to one space,
2574 # but it's as good as we can get! If it's esential that spacing
2575 # be preserved precisely, then you get that by not using shell
2577 #######################################################################
2580 my $matched_pattern = 0;
2582 $arg .= ' ' if $arg;
2583 $arg .= $ARGV[1]; shift(@ARGV);
2584 if ($arg !~ /^['"]/) {
2585 $matched_pattern = 1;
2588 if ($arg =~ s/^(['"])(.*)\1$/$2/) {
2589 $matched_pattern = 1;
2593 die "Mismatched argument quotes - <$arg>.\n" unless $matched_pattern;
2599 ##################################################
2601 ##################################################
2604 $last_timestamp = '';
2608 $show_transport = 1;
2610 $local_league_table = 1;
2611 $include_remote_users = 0;
2613 $volume_rounding = 1;
2614 $localtime_offset = calculate_localtime_offset(); # PH/FANF
2617 $charts_option_specified = 0;
2621 @queue_times = (60, 5*60, 15*60, 30*60, 60*60, 3*60*60, 6*60*60,
2622 12*60*60, 24*60*60);
2625 $offset_seconds = 0;
2629 while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-')
2631 if ($ARGV[0] =~ /^\-h(\d+)$/) { $hist_opt = $1 }
2632 elsif ($ARGV[0] =~ /^\-ne$/) { $show_errors = 0 }
2633 elsif ($ARGV[0] =~ /^\-nr(.?)(.*)\1$/)
2635 if ($1 eq "") { $show_relay = 0 } else { $relay_pattern = $2 }
2637 elsif ($ARGV[0] =~ /^\-q([,\d\+\-\*\/]+)$/)
2639 @queue_times = split(/,/, $1);
2641 foreach $q (@queue_times) { $q = eval($q) + 0 }
2642 @queue_times = sort { $a <=> $b } @queue_times;
2643 @queue_times = () if ($#queue_times == 0 && $queue_times[0] == 0);
2645 elsif ($ARGV[0] =~ /^-nt$/) { $show_transport = 0 }
2646 elsif ($ARGV[0] =~ /^\-nt(.?)(.*)\1$/)
2648 if ($1 eq "") { $show_transport = 0 } else { $transport_pattern = $2 }
2650 elsif ($ARGV[0] =~ /^-t(\d+)$/) { $topcount = $1 }
2651 elsif ($ARGV[0] =~ /^-tnl$/) { $local_league_table = 0 }
2652 elsif ($ARGV[0] =~ /^-html$/) { $html = 1 }
2653 elsif ($ARGV[0] =~ /^-merge$/) { $merge_reports = 1 }
2654 elsif ($ARGV[0] =~ /^-charts$/) {
2656 warn "WARNING: CPAN Module GD::Graph::pie not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_pie;
2657 warn "WARNING: CPAN Module GD::Graph::linespoints not installed. Obtain from www.cpan.org\n" unless $HAVE_GD_Graph_linespoints;
2659 elsif ($ARGV[0] =~ /^-chartdir$/) { $chartdir = $ARGV[1]; shift; $charts_option_specified = 1; }
2660 elsif ($ARGV[0] =~ /^-chartrel$/) { $chartrel = $ARGV[1]; shift; $charts_option_specified = 1; }
2661 elsif ($ARGV[0] =~ /^-cache$/) { } #Not currently used.
2662 elsif ($ARGV[0] =~ /^-byhost$/) { $do_sender{Host} = 1 }
2663 elsif ($ARGV[0] =~ /^-bydomain$/) { $do_sender{Domain} = 1 }
2664 elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
2665 elsif ($ARGV[0] =~ /^-byemaildomain$/) { $do_sender{Edomain} = 1 }
2666 elsif ($ARGV[0] =~ /^-byedomain$/) { $do_sender{Edomain} = 1 }
2667 elsif ($ARGV[0] =~ /^-nvr$/) { $volume_rounding = 0 }
2668 elsif ($ARGV[0] =~ /^-d$/) { $debug = 1 }
2669 elsif ($ARGV[0] =~ /^--?h(elp)?$/){ help() }
2670 elsif ($ARGV[0] =~ /^-t_remote_users$/) { $include_remote_users = 1 }
2671 elsif ($ARGV[0] =~ /^-pattern$/)
2673 push(@user_descriptions,get_next_arg());
2674 push(@user_patterns,get_next_arg());
2676 elsif ($ARGV[0] =~ /^-utc$/)
2678 # We don't need this value if the log is in UTC.
2679 $localtime_offset = undef;
2683 print STDERR "Eximstats: Unknown or malformed option $ARGV[0]\n";
2689 # Check that all the charts options are specified.
2690 warn "-charts option not specified. Use -help for help.\n" if ($charts_option_specified && ! $charts);
2692 # Default to display tables by sending Host.
2693 $do_sender{Host} = 1 unless ($do_sender{Domain} || $do_sender{Email} || $do_sender{Edomain});
2696 for (my $i = 0; $i <= $#queue_times; $i++) {
2698 $remote_queue_bin[$i] = 0;
2701 # Compute the number of slots for the histogram
2705 if ($hist_opt > 60 || 60 % $hist_opt != 0)
2707 print "Eximstats: -h must specify a factor of 60\n";
2710 $hist_interval = 60/$hist_opt; #Interval in minutes.
2711 $hist_number = (24*60)/$hist_interval; #Number of intervals per day.
2712 @received_interval_count = (0) x $hist_number;
2713 @delivered_interval_count = (0) x $hist_number;
2716 #$queue_unknown = 0;
2718 $total_received_data = 0;
2719 $total_received_data_gigs = 0;
2720 $total_received_count = 0;
2722 $total_delivered_data = 0;
2723 $total_delivered_data_gigs = 0;
2724 $total_delivered_count = 0;
2726 $queue_more_than = 0;
2728 $relayed_unshown = 0;
2729 $begin = "9999-99-99 99:99:99";
2730 $end = "0000-00-00 00:00:00";
2732 foreach $section ('Received','Delivered') {
2733 foreach $type ('Volume','Messages','Delayed','Failed','Hosts','Domains','Emails','Edomains') {
2734 $report_totals{$section}{$type} = 0;
2738 # Generate our parser.
2739 my $parser = generate_parser();
2744 # Scan the input files and collect the data
2745 foreach my $file (@ARGV) {
2746 if ($file =~ /\.gz/) {
2747 unless (open(FILE,"gunzip -c $file |")) {
2748 print STDERR "Failed to gunzip -c $file: $!";
2752 elsif ($file =~ /\.Z/) {
2753 unless (open(FILE,"uncompress -c $file |")) {
2754 print STDERR "Failed to uncompress -c $file: $!";
2759 unless (open(FILE,$file)) {
2760 print STDERR "Failed to read $file: $!";
2764 #Now parse the filehandle, updating the global variables.
2765 parse($parser,\*FILE);
2770 #No files provided. Parse STDIN, updating the global variables.
2771 parse($parser,\*STDIN);
2775 if ($begin eq "9999-99-99 99:99:99") {
2776 print "**** No valid log lines read\n";
2780 # Output our results.
2782 print_grandtotals();
2784 # Print counts of user specified patterns if required.
2785 print_user_patterns() if @user_patterns;
2787 # Print totals by transport if required.
2788 print_transport() if $show_transport;
2790 # Print the deliveries per interval as a histogram, unless configured not to.
2791 # First find the maximum in one interval and scale accordingly.
2792 if ($hist_opt > 0) {
2793 print_histogram("Messages received", @received_interval_count);
2794 print_histogram("Deliveries", @delivered_interval_count);
2797 # Print times on queue if required.
2798 if ($#queue_times >= 0) {
2799 print_queue_times("all messages", \@queue_bin,$queue_more_than);
2800 print_queue_times("messages with at least one remote delivery",\@remote_queue_bin,$queue_more_than);
2803 # Print relay information if required.
2804 print_relay() if $show_relay;
2806 # Print the league tables, if topcount isn't zero.
2807 if ($topcount > 0) {
2808 foreach ('Host','Domain','Email','Edomain') {
2809 next unless $do_sender{$_};
2810 print_league_table("sending \l$_", $received_count{$_}, $received_data{$_},$received_data_gigs{$_});
2813 print_league_table("local sender", \%received_count_user,
2814 \%received_data_user,\%received_data_gigs_user) if ($local_league_table || $include_remote_users);
2815 foreach ('Host','Domain','Email','Edomain') {
2816 next unless $do_sender{$_};
2817 print_league_table("\l$_ destination", $delivered_count{$_}, $delivered_data{$_},$delivered_data_gigs{$_});
2819 print_league_table("local destination", \%delivered_count_user,
2820 \%delivered_data_user,\%delivered_data_gigs_user) if ($local_league_table || $include_remote_users);
2823 # Print the error statistics if required.
2824 print_errors() if $show_errors;
2827 print "</body>\n</html>\n"