X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/30e9ac3f88f757de4a7b68038fc36d893879a501..4d3d955f2791199b35704c3e9784dc99fd229696:/src/src/eximstats.src
diff --git a/src/src/eximstats.src b/src/src/eximstats.src
index bb7a4c2b7..a2113f106 100644
--- a/src/src/eximstats.src
+++ b/src/src/eximstats.src
@@ -1,7 +1,6 @@
-#!PERL_COMMAND -w
-# $Cambridge: exim/src/src/eximstats.src,v 1.15 2007/04/03 15:50:58 steve Exp $
+#!PERL_COMMAND
-# Copyright (c) 2001 University of Cambridge.
+# Copyright (c) 2001-2016 University of Cambridge.
# See the file NOTICE for conditions of use and distribution.
# Perl script to generate statistics from one or more Exim log files.
@@ -74,7 +73,7 @@
# 2001-10-21 Removed -domain flag and added -bydomain, -byhost, and -byemail.
# We now generate our main parsing subroutine as an eval statement
# which improves performance dramatically when not all the results
-# are required. We also cache the last timestamp to time convertion.
+# are required. We also cache the last timestamp to time conversion.
#
# NOTE: 'Top 50 destinations by (message count|volume)' lines are
# now 'Top N (host|email|domain) destinations by (message count|volume)'
@@ -143,7 +142,7 @@
# in HTML output. Also added code to convert them back with -merge.
# Fixed timestamp offsets to convert to seconds rather than minutes.
# Updated -merge to work with output files using timezones.
-# Added cacheing to speed up the calculation of timezone offsets.
+# Added caching to speed up the calculation of timezone offsets.
#
# 2003-02-07 V1.25 Steve Campbell
# Optimised the usage of mktime() in the seconds subroutine.
@@ -163,7 +162,7 @@
# Bernard Massot.
#
# 2003-06-03 V1.28 John Newman
-# Added in the ability to skip over the parsing and evaulation of
+# Added in the ability to skip over the parsing and evaluation of
# specific transports as passed to eximstats via the new "-nt/.../"
# command line argument. This new switch allows the viewing of
# not more accurate statistics but more applicable statistics when
@@ -201,7 +200,7 @@
# Added -xls and the ability to specify output files.
#
# 2005-04-29 V1.38 Steve Campbell
-# Use FileHandles for outputing results.
+# Use FileHandles for outputting results.
# Allow any combination of xls, txt, and html output.
# Fixed display of large numbers with -nvr option
# Fixed merging of reports with empty tables.
@@ -276,6 +275,17 @@
# Fixed Grand Total Summary Domains, Edomains, and Email columns
# for Rejects, Temp Rejects, Ham, and Spam rows.
#
+# 2007-04-11 V1.58 Steve Campbell
+# Fix to get <> and blackhole to show in edomain tables.
+#
+# 2007-09-20 V1.59 Steve Campbell
+# Added the -bylocaldomain option
+#
+# 2007-09-20 V1.60 Heiko Schlittermann
+# Fix for misinterpreted log lines
+#
+# 2013-01-14 V1.61 Steve Campbell
+# Watch out for senders sending "HELO [IpAddr]"
#
#
# For documentation on the logfile format, see
@@ -380,7 +390,7 @@ Useful for finding out which of your mailing lists are receiving mail.
Show the delivery times (B
)for all the messages.
-Exim must have been configured to use the +delivery_time logging option
+Exim must have been configured to use the +deliver_time logging option
for this option to work.
I is an optional list of times. Eg -show_dt1,2,4,8 will show
@@ -531,13 +541,15 @@ mailing list exim-users@exim.org.
This program does not perfectly handle messages whose received
and delivered log lines are in different files, which can happen
when you have multiple mail servers and a message cannot be
-immeadiately delivered. Fixing this could be tricky...
+immediately delivered. Fixing this could be tricky...
Merging of xls files is not (yet) possible. Be free to implement :)
=cut
+use warnings;
use integer;
+BEGIN { pop @INC if $INC[-1] eq '.' };
use strict;
use IO::File;
@@ -574,7 +586,7 @@ use vars qw($WEEK $DAY $HOUR $MINUTE);
@days_per_month = (0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334);
$gig = 1024 * 1024 * 1024;
-$VERSION = '1.57';
+$VERSION = '1.61';
# How much space do we allow for the Hosts/Domains/Emails/Edomains column headers?
$COLUMN_WIDTHS = 8;
@@ -588,9 +600,9 @@ $WEEK = 7 * $DAY;
use vars qw($total_received_data $total_received_data_gigs $total_received_count);
use vars qw($total_delivered_data $total_delivered_data_gigs $total_delivered_messages $total_delivered_addresses);
use vars qw(%timestamp2time); #Hash of timestamp => time.
-use vars qw($last_timestamp $last_time); #The last time convertion done.
-use vars qw($last_date $date_seconds); #The last date convertion done.
-use vars qw($last_offset $offset_seconds); #The last time offset convertion done.
+use vars qw($last_timestamp $last_time); #The last time conversion done.
+use vars qw($last_date $date_seconds); #The last date conversion done.
+use vars qw($last_offset $offset_seconds); #The last time offset conversion done.
use vars qw($localtime_offset);
use vars qw($i); #General loop counter.
use vars qw($debug); #Debug mode?
@@ -604,7 +616,7 @@ use vars qw(%ham_count_by_ip %spam_count_by_ip);
use vars qw(%rejected_count_by_ip %rejected_count_by_reason);
use vars qw(%temporarily_rejected_count_by_ip %temporarily_rejected_count_by_reason);
-#For use in Speadsheed::WriteExcel
+#For use in Spreadsheet::WriteExcel
use vars qw($workbook $ws_global $ws_relayed $ws_errors);
use vars qw($row $col $row_hist $col_hist);
use vars qw($run_hist);
@@ -618,7 +630,7 @@ $ntopchart = 5;
# The following are parameters whose values are
# set by command line switches:
use vars qw($show_errors $show_relay $show_transport $transport_pattern);
-use vars qw($topcount $local_league_table $include_remote_users);
+use vars qw($topcount $local_league_table $include_remote_users $do_local_domain);
use vars qw($hist_opt $hist_interval $hist_number $volume_rounding $emptyOK);
use vars qw($relay_pattern @queue_times @user_patterns @user_descriptions);
use vars qw(@rcpt_times @delivery_times);
@@ -637,6 +649,7 @@ use vars qw(%received_count %received_data %received_data_gigs);
use vars qw(%delivered_messages %delivered_data %delivered_data_gigs %delivered_addresses);
use vars qw(%received_count_user %received_data_user %received_data_gigs_user);
use vars qw(%delivered_messages_user %delivered_addresses_user %delivered_data_user %delivered_data_gigs_user);
+use vars qw(%delivered_messages_local_domain %delivered_addresses_local_domain %delivered_data_local_domain %delivered_data_gigs_local_domain);
use vars qw(%transported_count %transported_data %transported_data_gigs);
use vars qw(%relayed %errors_count $message_errors);
use vars qw(@qt_all_bin @qt_remote_bin);
@@ -746,8 +759,8 @@ sub volume_rounded {
}
else {
# We don't want any rounding to be done.
- # and we don't need broken formated output which on one hand avoids numbers from
- # being interpreted as string by Spreadsheed Calculators, on the other hand
+ # and we don't need broken formatted output which on one hand avoids numbers from
+ # being interpreted as string by Spreadsheet Calculators, on the other hand
# breaks if more than 4 digits! -> flexible length instead of fixed length
# Format the return value at the output routine! -fh
#$rounded = sprintf("%d", ($g * $gig) + $x);
@@ -860,10 +873,10 @@ $p;
# Eg 3h20m5s => 12005
#######################################################################
sub unformat_time {
- my($formated_time) = pop @_;
+ my($formatted_time) = pop @_;
my $time = 0;
- while ($formated_time =~ s/^(\d+)([wdhms]?)//) {
+ while ($formatted_time =~ s/^(\d+)([wdhms]?)//) {
$time += $1 if ($2 eq '' || $2 eq 's');
$time += $1 * 60 if ($2 eq 'm');
$time += $1 * 60 * 60 if ($2 eq 'h');
@@ -917,7 +930,7 @@ sub seconds {
}
my $time = $date_seconds + ($5 * 3600) + ($6 * 60) + $7;
- # SC. Use cacheing. Also note we want seconds not minutes.
+ # SC. Use caching. Also note we want seconds not minutes.
#my($this_offset) = ($10 * 60 + $11) * ($9 . "1") if defined $8;
if (defined $8 && ($8 ne $last_offset)) {
$last_offset = $8;
@@ -1639,7 +1652,7 @@ sub top_n_sort {
# Create a dummy hash entry for the key if required.
# Note that setting the dummy_hash value sets it for both href2 &
- # href3. Also note that currently we are guarenteed to have a real
+ # href3. Also note that currently we are guaranteed to have a real
# value for href3 if a real value for href2 exists so don't need to
# test for it as well.
$dummy_hash{$key} = 0 unless exists $href2->{$key};
@@ -1783,6 +1796,7 @@ Valid options are:
-bydomain show results by sending domain.
-byemail show results by sender's email address
-byedomain show results by sender's email domain
+-bylocaldomain show results by local domain
-pattern "Description" /pattern/
Count lines matching specified patterns and show them in
@@ -1828,7 +1842,7 @@ sub generate_parser {
my $parser = '
my($ip,$host,$email,$edomain,$domain,$thissize,$size,$old,$new);
my($tod,$m_hour,$m_min,$id,$flag,$extra,$length);
- my($seconds,$queued,$rcpt_time);
+ my($seconds,$queued,$rcpt_time,$local_domain);
my $rej_id = 0;
while (<$fh>) {
@@ -1914,13 +1928,23 @@ sub generate_parser {
# "H=Host (UnverifiedHost) [IpAddr]" or "H=(UnverifiedHost) [IpAddr]".
# We do 2 separate matches to keep the matches simple and fast.
# Host is local unless otherwise specified.
- $ip = (/\\bH=.*?(\\[[^]]+\\])/) ? $1 : "local";
+ # Watch out for "H=([IpAddr])" in case they send "[IpAddr]" as their HELO!
+ $ip = (/\\bH=(?:|.*? )(\\[[^]]+\\])/) ? $1
+ # 2008-03-31 06:25:22 Connection from [213.246.33.217]:39456 refused: too many connections from that IP address // .hs
+ : (/Connection from (\[\S+\])/) ? $1
+ # 2008-03-31 06:52:40 SMTP call from mail.cacoshrf.com (ccsd02.ccsd.local) [69.24.118.229]:4511 dropped: too many nonmail commands (last was "RSET") // .hs
+ : (/SMTP call from .*?(\[\S+\])/) ? $1
+ : "local";
$host = (/\\bH=(\\S+)/) ? $1 : "local";
$domain = "localdomain"; #Domain is localdomain unless otherwise specified.
#IFDEF ($do_sender{Domain})
- if ($host !~ /^\\[/ && $host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
+ if ($host =~ /^\\[/ || $host =~ /^[\\d\\.]+$/) {
+ # Host is just an IP address.
+ $domain = $host;
+ }
+ elsif ($host =~ /^(\\(?)[^\\.]+\\.([^\\.]+\\..*)/) {
# Remove the host portion from the DNS name. We ensure that we end up
# with at least xxx.yyy. $host can be "(x.y.z)" or "x.y.z".
$domain = lc("$1.$2");
@@ -1942,16 +1966,25 @@ sub generate_parser {
#ENDIF ($do_sender{Email})
#IFDEF ($do_sender{Edomain})
+ if (/^(<>|blackhole)/) {
+ $edomain = $1;
+ }
#IFDEF ($include_original_destination)
- #$edomain = (/^(\S+) (<\S*?\\@(\S+)>)?/) ? $3 || $1 : "";
- $edomain = (/^(\S+ (<\S*?\\@(\S+?)>)?)/) ? $1 : "";
- chomp($edomain);
- lc($edomain);
+ elsif (/^(\S+ (<\S*?\\@(\S+?)>)?)/) {
+ $edomain = $1;
+ chomp($edomain);
+ $edomain =~ s/@(\S+?)>/"@" . lc($1) . ">"/e;
+ }
#ENDIF ($include_original_destination)
-
#IFNDEF ($include_original_destination)
- $edomain = (/^\S*?\\@(\S+)/) ? lc($1) : "";
+ elsif (/^\S*?\\@(\S+)/) {
+ $edomain = lc($1);
+ }
#ENDIF ($include_original_destination)
+ else {
+ $edomain = "";
+ }
+
#ENDIF ($do_sender{Edomain})
if ($tod lt $begin) {
@@ -2093,7 +2126,17 @@ sub generate_parser {
#ENDIF ($include_original_destination)
#my($parent) = $_ =~ /(<[^@]+@?[^>]*>)/;
my($parent) = $_ =~ / (<.+?>) /; #DT 1.54
- $user = "$user $parent" if defined $parent;
+ if (defined $parent) {
+ $user = "$user $parent";
+ #IFDEF ($do_local_domain)
+ if ($parent =~ /\\@(.+)>/) {
+ $local_domain = lc($1);
+ ++$delivered_messages_local_domain{$local_domain};
+ ++$delivered_addresses_local_domain{$local_domain};
+ add_volume(\\$delivered_data_local_domain{$local_domain},\\$delivered_data_gigs_local_domain{$local_domain},$size);
+ }
+ #ENDIF ($do_local_domain)
+ }
}
++$delivered_messages_user{$user};
++$delivered_addresses_user{$user};
@@ -2328,6 +2371,7 @@ sub generate_parser {
# 2005-09-23 15:07:49 1EInHJ-0007Ex-Au H=(a.b.c) [10.0.0.1] F=<> rejected after DATA: This message contains a virus: (Eicar-Test-Signature) please scan your system.
# 2005-10-06 10:50:07 1ENRS3-0000Nr-Kt => blackhole (DATA ACL discarded recipients): This message contains a virus: (Worm.SomeFool.P) please scan your system.
/ rejected after DATA: (.*)/ ||
+ / (rejected DATA: .*)/ ||
/.DATA ACL discarded recipients.: (.*)/ ||
/rejected after DATA: (unqualified address not permitted)/ ||
/(VRFY rejected)/ ||
@@ -2386,6 +2430,14 @@ sub generate_parser {
++$rejected_count_by_reason{"\u$1$2"};
++$rejected_count_by_ip{$ip};
}
+ elsif (
+ # 2008-03-31 06:25:22 H=mail.densitron.com [216.70.140.224]:45386 temporarily rejected connection in "connect" ACL: too fast reconnects // .hs
+ # 2008-03-31 06:25:22 H=mail.densitron.com [216.70.140.224]:45386 temporarily rejected connection in "connect" ACL // .hs
+ /(temporarily rejected connection in .*?ACL:?.*)/
+ ) {
+ ++$temporarily_rejected_count_by_ip{$ip};
+ ++$temporarily_rejected_count_by_reason{"\u$1"};
+ }
else {
++$rejected_count_by_reason{Unknown};
++$rejected_count_by_ip{$ip};
@@ -2508,6 +2560,10 @@ sub print_header {
print $htm_fh "Top $topcount local destinations by message count\n";
print $htm_fh "Top $topcount local destinations by volume\n";
}
+ if (($local_league_table || $include_remote_users) && %delivered_messages_local_domain) {
+ print $htm_fh "Top $topcount local domain destinations by message count\n";
+ print $htm_fh "Top $topcount local domain destinations by volume\n";
+ }
print $htm_fh "Top $topcount rejected ips by message count\n" if %rejected_count_by_ip;
print $htm_fh "Top $topcount temporarily rejected ips by message count\n" if %temporarily_rejected_count_by_ip;
@@ -2675,7 +2731,7 @@ sub print_grandtotals {
if ($messages > 0) {
@content = ($total_aref->[0], '', $messages, '');
- #Count the number of distict IPs for the Hosts column.
+ #Count the number of distinct IPs for the Hosts column.
push(@content,scalar(keys %{$total_aref->[1]})) if $do_sender{Host};
#These rows do not have entries for the following columns (if specified)
@@ -3306,8 +3362,8 @@ sub parse_old_eximstat_reports {
my $previous_seconds_on_queue = 0;
if (/^\s*(Under|Over|)\s+(\d+[smhdw])\s+(\d+)/) {
print STDERR "Parsing $_" if $debug;
- my($modifier,$formated_time,$count) = ($1,$2,$3);
- my $seconds = unformat_time($formated_time);
+ my($modifier,$formatted_time,$count) = ($1,$2,$3);
+ my $seconds = unformat_time($formatted_time);
my $time_on_queue = ($seconds + $previous_seconds_on_queue) / 2;
$previous_seconds_on_queue = $seconds;
$time_on_queue = $seconds * 2 if ($modifier eq 'Over');
@@ -3398,6 +3454,12 @@ sub parse_old_eximstat_reports {
$data_href = \%delivered_data_user;
$data_gigs_href = \%delivered_data_gigs_user;
}
+ elsif ($category =~ /local domain destination/) {
+ $messages_href = \%delivered_messages_local_domain;
+ $addresses_href = \%delivered_addresses_local_domain;
+ $data_href = \%delivered_data_local_domain;
+ $data_gigs_href = \%delivered_data_gigs_local_domain;
+ }
elsif ($category =~ /(\S+) destination/) {
#Top 50 (host|domain|email|edomain) destinations
#Top (host|domain|email|edomain) destination
@@ -3616,7 +3678,7 @@ sub update_relayed {
#
# add_to_totals(\%totals,\@keys,$values);
#
-# Given a line of space seperated values, add them into the provided hash using @keys
+# Given a line of space separated values, add them into the provided hash using @keys
# as the hash keys.
#
# If the value contains a '%', then the value is set rather than added. Otherwise, we
@@ -3646,7 +3708,7 @@ sub add_to_totals {
#
# line_to_hash(\%hash,\@keys,$line);
#
-# Given a line of space seperated values, set them into the provided hash
+# Given a line of space separated values, set them into the provided hash
# using @keys as the hash keys.
#######################################################################
sub line_to_hash {
@@ -3712,7 +3774,7 @@ sub html2txt {
# until we've got all of the argument.
#
# This isn't perfect as all white space gets reduced to one space,
-# but it's as good as we can get! If it's esential that spacing
+# but it's as good as we can get! If it's essential that spacing
# be preserved precisely, then you get that by not using shell
# variables.
#######################################################################
@@ -3754,7 +3816,7 @@ sub set_worksheet_line {
#######################################################################
# @rcpt_times = parse_time_list($string);
#
-# Parse a comma seperated list of time values in seconds given by
+# Parse a comma separated list of time values in seconds given by
# the user and fill an array.
#
# Return a default list if $string is undefined.
@@ -3863,6 +3925,7 @@ while (@ARGV > 0 && substr($ARGV[0], 0, 1) eq '-') {
elsif ($ARGV[0] =~ /^-byemail$/) { $do_sender{Email} = 1 }
elsif ($ARGV[0] =~ /^-byemaildomain$/) { $do_sender{Edomain} = 1 }
elsif ($ARGV[0] =~ /^-byedomain$/) { $do_sender{Edomain} = 1 }
+ elsif ($ARGV[0] =~ /^-bylocaldomain$/) { $do_local_domain = 1 }
elsif ($ARGV[0] =~ /^-emptyok$/) { $emptyOK = 1 }
elsif ($ARGV[0] =~ /^-nvr$/) { $volume_rounding = 0 }
elsif ($ARGV[0] =~ /^-show_rt([,\d\+\-\*\/]+)?$/) { @rcpt_times = parse_time_list($1) }
@@ -4128,6 +4191,7 @@ if ($topcount > 0) {
print_league_table("\l$_ destination", $delivered_messages{$_}, $delivered_addresses{$_}, $delivered_data{$_},$delivered_data_gigs{$_}, $ws_top50, \$ws_top50_row);
}
print_league_table("local destination", \%delivered_messages_user, \%delivered_addresses_user, \%delivered_data_user,\%delivered_data_gigs_user, $ws_top50, \$ws_top50_row) if (($local_league_table || $include_remote_users) && %delivered_messages_user);
+ print_league_table("local domain destination", \%delivered_messages_local_domain, \%delivered_addresses_local_domain, \%delivered_data_local_domain,\%delivered_data_gigs_local_domain, $ws_top50, \$ws_top50_row) if (($local_league_table || $include_remote_users) && %delivered_messages_local_domain);
print_league_table("rejected ip", \%rejected_count_by_ip, undef, undef, undef, $ws_rej, \$ws_rej_row) if %rejected_count_by_ip;
print_league_table("temporarily rejected ip", \%temporarily_rejected_count_by_ip, undef, undef, undef, $ws_rej, \$ws_rej_row) if %temporarily_rejected_count_by_ip;