-#!PERL_COMMAND -w
-# $Cambridge: exim/src/src/eximstats.src,v 1.19 2009/10/19 12:26:34 nm4 Exp $
+#!PERL_COMMAND
-# Copyright (c) 2001 University of Cambridge.
+# Copyright (c) 2001-2017 University of Cambridge.
# See the file NOTICE for conditions of use and distribution.
# Perl script to generate statistics from one or more Exim log files.
# 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
# We now generate our main parsing subroutine as an eval statement
# which improves performance dramatically when not all the results
-# are required. We also cache the last timestamp to time convertion.
+# are required. We also cache the last timestamp to time conversion.
#
# NOTE: 'Top 50 destinations by (message count|volume)' lines are
# now 'Top N (host|email|domain) destinations by (message count|volume)'
# in HTML output. Also added code to convert them back with -merge.
# Fixed timestamp offsets to convert to seconds rather than minutes.
# Updated -merge to work with output files using timezones.
-# Added cacheing to speed up the calculation of timezone offsets.
+# Added caching to speed up the calculation of timezone offsets.
#
# 2003-02-07 V1.25 Steve Campbell
# Optimised the usage of mktime() in the seconds subroutine.
# Bernard Massot.
#
# 2003-06-03 V1.28 John Newman
-# Added in the ability to skip over the parsing and evaulation of
+# Added in the ability to skip over the parsing and evaluation of
# specific transports as passed to eximstats via the new "-nt/.../"
# command line argument. This new switch allows the viewing of
# not more accurate statistics but more applicable statistics when
# Added -xls and the ability to specify output files.
#
# 2005-04-29 V1.38 Steve Campbell
-# Use FileHandles for outputing results.
+# Use FileHandles for outputting results.
# Allow any combination of xls, txt, and html output.
# Fixed display of large numbers with -nvr option
# Fixed merging of reports with empty tables.
# 2007-09-20 V1.60 Heiko Schlittermann
# Fix for misinterpreted log lines
#
+# 2013-01-14 V1.61 Steve Campbell
+# Watch out for senders sending "HELO [IpAddr]"
#
#
# For documentation on the logfile format, see
This program does not perfectly handle messages whose received
and delivered log lines are in different files, which can happen
when you have multiple mail servers and a message cannot be
-immeadiately delivered. Fixing this could be tricky...
+immediately delivered. Fixing this could be tricky...
Merging of xls files is not (yet) possible. Be free to implement :)
=cut
+use warnings;
use integer;
+BEGIN { pop @INC if $INC[-1] eq '.' };
use strict;
use IO::File;
+use File::Basename;
# use Time::Local; # PH/FANF
use POSIX;
+if (@ARGV and $ARGV[0] eq '--version') {
+ print basename($0) . ": $0\n",
+ "build: EXIM_RELEASE_VERSIONEXIM_VARIANT_VERSION\n",
+ "perl(runtime): $]\n";
+ exit 0;
+}
+
use vars qw($HAVE_GD_Graph_pie $HAVE_GD_Graph_linespoints $HAVE_Spreadsheet_WriteExcel);
eval { require GD::Graph::pie; };
$HAVE_GD_Graph_pie = $@ ? 0 : 1;
@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
$gig = 1024 * 1024 * 1024;
-$VERSION = '1.60';
+$VERSION = '1.61';
# How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
$COLUMN_WIDTHS = 8;
use vars qw($total_received_data $total_received_data_gigs $total_received_count);
use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_messages $total_delivered_addresses);
use vars qw(%timestamp2time); #Hash of timestamp => time.
-use vars qw($last_timestamp $last_time); #The last time convertion done.
-use vars qw($last_date $date_seconds); #The last date convertion done.
-use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
+use vars qw($last_timestamp $last_time); #The last time conversion done.
+use vars qw($last_date $date_seconds); #The last date conversion done.
+use vars qw($last_offset $offset_seconds); #The last time offset conversion done.
use vars qw($localtime_offset);
use vars qw($i); #General loop counter.
use vars qw($debug); #Debug mode?
use vars qw(%rejected_count_by_ip %rejected_count_by_reason);
use vars qw(%temporarily_rejected_count_by_ip %temporarily_rejected_count_by_reason);
-#For use in Speadsheed::WriteExcel
+#For use in Spreadsheet::WriteExcel
use vars qw($workbook $ws_global $ws_relayed $ws_errors);
use vars qw($row $col $row_hist $col_hist);
use vars qw($run_hist);
}
else {
# We don't want any rounding to be done.
- # and we don't need broken formated output which on one hand avoids numbers from
- # being interpreted as string by Spreadsheed Calculators, on the other hand
+ # and we don't need broken formatted output which on one hand avoids numbers from
+ # being interpreted as string by Spreadsheet Calculators, on the other hand
# breaks if more than 4 digits! -> flexible length instead of fixed length
# Format the return value at the output routine! -fh
#$rounded = sprintf("%d", ($g * $gig) + $x);
# Eg 3h20m5s => 12005
#######################################################################
sub unformat_time {
- my($formated_time) = pop @_;
+ my($formatted_time) = pop @_;
my $time = 0;
- while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
+ while ($formatted_time =~ s/^(\d+)([wdhms]?)//) {
$time += $1 if ($2 eq '' || $2 eq 's');
$time += $1 * 60 if ($2 eq 'm');
$time += $1 * 60 * 60 if ($2 eq 'h');
# POSIX::mktime. We expect the timestamp to be of the form
# "$year-$mon-$day $hour:$min:$sec", with month going from 1 to 12,
# and the year to be absolute (we do the necessary conversions). The
+# seconds value can be followed by decimals, which we ignore. The
# timestamp may be followed with an offset from UTC like "+$hh$mm"; if the
# offset is not present, and we have not been told that the log is in UTC
# (with the -utc option), then we adjust the time by the current local
# Is the timestamp the same as the last one?
return $last_time if ($last_timestamp eq $timestamp);
- return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)( ([+-])(\d\d)(\d\d))?/o);
+ return 0 unless ($timestamp =~ /^((\d{4})\-(\d\d)-(\d\d))\s(\d\d):(\d\d):(\d\d)(?:\.\d+)?( ([+-])(\d\d)(\d\d))?/o);
unless ($last_date eq $1) {
$last_date = $1;
}
my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
- # SC. Use cacheing. Also note we want seconds not minutes.
- #my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
+ # SC. Use caching. Also note we want seconds not minutes.
+ #my($this_offset) = ($10 * 60 + $12) * ($9 . "1") if defined $8;
if (defined $8 && ($8 ne $last_offset)) {
$last_offset = $8;
$offset_seconds = ($10 * 60 + $11) * 60;
}
- if (defined $7) {
+ if (defined $8) {
#$time -= $this_offset;
$time -= $offset_seconds;
} elsif (defined $localtime_offset) {
# Create a dummy hash entry for the key if required.
# Note that setting the dummy_hash value sets it for both href2 &
- # href3. Also note that currently we are guarenteed to have a real
+ # href3. Also note that currently we are guaranteed to have a real
# value for href3 if a real value for href2 exists so don't need to
# test for it as well.
$dummy_hash{$key} = 0 unless exists $href2->{$key};
$length = length($_);
next if ($length < 38);
- next unless /^(\\d{4}\\-\\d\\d-\\d\\d\\s(\\d\\d):(\\d\\d):\\d\\d( [-+]\\d\\d\\d\\d)?)( \\[\\d+\\])?/o;
-
- ($tod,$m_hour,$m_min) = ($1,$2,$3);
+ next unless /^
+ (\\d{4}\\-\\d\\d-\\d\\d\\s # 1: YYYYMMDD HHMMSS
+ (\\d\\d) # 2: HH
+ :
+ (\\d\\d) # 3: MM
+ :\\d\\d
+ )
+ (\\.\\d+)? # 4: subseconds
+ (\s[-+]\\d\\d\\d\\d)? # 5: tz-offset
+ (\s\\[\\d+\\])? # 6: pid
+ /ox;
+
+ $tod = defined($5) ? $1 . $5 : $1;
+ ($m_hour,$m_min) = ($2,$3);
# PH - watch for GMT offsets in the timestamp.
- if (defined($4)) {
+ if (defined($5)) {
$extra = 6;
next if ($length < 44);
}
$extra = 0;
}
+ # watch for subsecond precision
+ if (defined($4)) {
+ $extra += length($4);
+ next if ($length < 38 + $extra);
+ }
+
# PH - watch for PID added after the timestamp.
- if (defined($5)) {
- $extra += length($5);
+ if (defined($6)) {
+ $extra += length($6);
next if ($length < 38 + $extra);
}
# "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
# We do 2 separate matches to keep the matches simple and fast.
# Host is local unless otherwise specified.
- $ip = (/\\bH=.*?(\\[[^]]+\\])/) ? $1
+ # Watch out for "H=([IpAddr])" in case they send "[IpAddr]" as their HELO!
+ $ip = (/\\bH=(?:|.*? )(\\[[^]]+\\])/) ? $1
# 2008-03-31 06:25:22 Connection from [213.246.33.217]:39456 refused: too many connections from that IP address // .hs
: (/Connection from (\[\S+\])/) ? $1
# 2008-03-31 06:52:40 SMTP call from mail.cacoshrf.com (ccsd02.ccsd.local) [69.24.118.229]:4511 dropped: too many nonmail commands (last was "RSET") // .hs
# 2005-09-23 15:07:49 1EInHJ-0007Ex-Au H=(a.b.c) [10.0.0.1] F=<> rejected after DATA: This message contains a virus: (Eicar-Test-Signature) please scan your system.
# 2005-10-06 10:50:07 1ENRS3-0000Nr-Kt => blackhole (DATA ACL discarded recipients): This message contains a virus: (Worm.SomeFool.P) please scan your system.
/ rejected after DATA: (.*)/ ||
+ / (rejected DATA: .*)/ ||
/.DATA ACL discarded recipients.: (.*)/ ||
/rejected after DATA: (unqualified address not permitted)/ ||
/(VRFY rejected)/ ||
if ($messages > 0) {
@content = ($total_aref->[0], '', $messages, '');
- #Count the number of distict IPs for the Hosts column.
+ #Count the number of distinct IPs for the Hosts column.
push(@content,scalar(keys %{$total_aref->[1]})) if $do_sender{Host};
#These rows do not have entries for the following columns (if specified)
my $previous_seconds_on_queue = 0;
if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
print STDERR "Parsing $_" if $debug;
- my($modifier,$formated_time,$count) = ($1,$2,$3);
- my $seconds = unformat_time($formated_time);
+ my($modifier,$formatted_time,$count) = ($1,$2,$3);
+ my $seconds = unformat_time($formatted_time);
my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
$previous_seconds_on_queue = $seconds;
$time_on_queue = $seconds * 2 if ($modifier eq 'Over');
#
# add_to_totals(\%totals,\@keys,$values);
#
-# Given a line of space seperated values, add them into the provided hash using @keys
+# Given a line of space separated values, add them into the provided hash using @keys
# as the hash keys.
#
# If the value contains a '%', then the value is set rather than added. Otherwise, we
#
# line_to_hash(\%hash,\@keys,$line);
#
-# Given a line of space seperated values, set them into the provided hash
+# Given a line of space separated values, set them into the provided hash
# using @keys as the hash keys.
#######################################################################
sub line_to_hash {
# until we've got all of the argument.
#
# This isn't perfect as all white space gets reduced to one space,
-# but it's as good as we can get! If it's esential that spacing
+# but it's as good as we can get! If it's essential that spacing
# be preserved precisely, then you get that by not using shell
# variables.
#######################################################################
#######################################################################
# @rcpt_times = parse_time_list($string);
#
-# Parse a comma seperated list of time values in seconds given by
+# Parse a comma separated list of time values in seconds given by
# the user and fill an array.
#
# Return a default list if $string is undefined.