From: Philip Hazel Date: Wed, 7 Feb 2007 12:23:35 +0000 (+0000) Subject: Jori Hamalainen's patch to speed up exigrep, and fix two typos. X-Git-Tag: exim-4_67~42 X-Git-Url: https://git.exim.org/users/heiko/exim.git/commitdiff_plain/75b1493f48e2212184289338fa4c42941ac8d426?ds=sidebyside Jori Hamalainen's patch to speed up exigrep, and fix two typos. --- diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog index 2e7d57a20..34d42c92c 100644 --- a/doc/doc-txt/ChangeLog +++ b/doc/doc-txt/ChangeLog @@ -1,4 +1,4 @@ -$Cambridge: exim/doc/doc-txt/ChangeLog,v 1.474 2007/02/07 11:24:56 ph10 Exp $ +$Cambridge: exim/doc/doc-txt/ChangeLog,v 1.475 2007/02/07 12:23:35 ph10 Exp $ Change log file for Exim from version 4.21 ------------------------------------------- @@ -91,6 +91,9 @@ PH/21 Long custom messages for fakedefer and fakereject are now split up into multiline reponses in the same way that messages for "deny" and other ACL rejections are. +PH/22 Applied Jori Hamalainen's speed-up changes and typo fixes to exigrep, + with slight modification. + Exim version 4.66 ----------------- diff --git a/doc/doc-txt/NewStuff b/doc/doc-txt/NewStuff index 3fbb1b662..04177227d 100644 --- a/doc/doc-txt/NewStuff +++ b/doc/doc-txt/NewStuff @@ -1,4 +1,4 @@ -$Cambridge: exim/doc/doc-txt/NewStuff,v 1.139 2007/02/06 14:49:13 ph10 Exp $ +$Cambridge: exim/doc/doc-txt/NewStuff,v 1.140 2007/02/07 12:23:35 ph10 Exp $ New Features in Exim -------------------- @@ -286,6 +286,12 @@ Version 4.67 be used to suppress the use of PIPELINING to certain hosts, while still supporting the other SMTP extensions (cf hosts_avoid_tls). +15. By default, exigrep does case-insensitive matches. There is now a -I option + that makes it case-sensitive. This may give a performance improvement when + searching large log files. Without -I, the Perl pattern matches use the /i + option; with -I they don't. In both cases it is possible to change the case + sensitivity within the pattern using (?i) or (?-i). + Version 4.66 ------------ diff --git a/src/ACKNOWLEDGMENTS b/src/ACKNOWLEDGMENTS index d9238db38..15c1f3fed 100644 --- a/src/ACKNOWLEDGMENTS +++ b/src/ACKNOWLEDGMENTS @@ -1,4 +1,4 @@ -$Cambridge: exim/src/ACKNOWLEDGMENTS,v 1.72 2007/02/06 10:00:24 ph10 Exp $ +$Cambridge: exim/src/ACKNOWLEDGMENTS,v 1.73 2007/02/07 12:23:35 ph10 Exp $ EXIM ACKNOWLEDGEMENTS @@ -20,7 +20,7 @@ relatively small patches. Philip Hazel Lists created: 20 November 2002 -Last updated: 06 February 2007 +Last updated: 07 February 2007 THE OLD LIST @@ -157,6 +157,7 @@ Michael Haardt Tidies to make the code stricter Thomas Hager Patch for saslauthd crash bug Richard Hall Fix for file descriptor leak in redirection Jori Hamalainen Patch to add features to exiqsumm + Patch to speed up exigrep Steve Haslam Lots of stuff, including HMAC computations Better error messages for BDB diff --git a/src/src/exigrep.src b/src/src/exigrep.src index f888984b0..7b6360f01 100644 --- a/src/src/exigrep.src +++ b/src/src/exigrep.src @@ -1,5 +1,5 @@ #! PERL_COMMAND -w -# $Cambridge: exim/src/src/exigrep.src,v 1.4 2007/01/31 16:52:12 ph10 Exp $ +# $Cambridge: exim/src/src/exigrep.src,v 1.5 2007/02/07 12:23:35 ph10 Exp $ use strict; @@ -28,6 +28,10 @@ use strict; # appears to be compressed, it is passed through zcat. We can't just do this # for all files, because zcat chokes on non-compressed files. +# Performance optimized in 02/02/2007 by Jori Hamalainen +# Typical run time acceleration: 4 times + + use Getopt::Std qw(getopts); use POSIX qw(mktime); @@ -38,7 +42,7 @@ use POSIX qw(mktime); sub seconds { my($year,$month,$day,$hour,$min,$sec,$tzs,$tzh,$tzm) = - $_[0] =~ /^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?>\s([+-])(\d\d)(\d\d))?/; + $_[0] =~ /^(\d{4})-(\d\d)-(\d\d)\s(\d\d):(\d\d):(\d\d)(?>\s([+-])(\d\d)(\d\d))?/o; my $seconds = mktime $sec, $min, $hour, $day, $month - 1, $year - 1900; @@ -55,22 +59,22 @@ return $seconds; # This subroutine processes a single line (in $_) from a log file. Program # defensively against short lines finding their way into the log. -my (%saved, %id_list, $pattern, $queue_time); +my (%saved, %id_list, $pattern, $queue_time, $insensitive); sub do_line { # Convert syslog lines to mainlog format, as in eximstats. -if (! /^\\d{4}/) { $_ =~ s/^.*? exim\b.*?: //; } +if (!/^\d{4}-/o) { $_ =~ s/^.*? exim\b.*?: //o; } return unless - my($date,$entry) = /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(.*)/; + my($date,$id) = /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d (?:[+-]\d{4} )?)(\w{6}\-\w{6}\-\w{2})?/o; # Handle the case when the log line belongs to a specific message. We save # lines for specific messages until the message is complete. Then either print # or discard. -if (my($id) = $entry =~ /^(?:\[\d+\]\s)?(\w{6}\-\w{6}\-\w{2})/) +if (defined $id) { $saved{$id} = '' unless defined($saved{$id}); @@ -78,17 +82,20 @@ if (my($id) = $entry =~ /^(?:\[\d+\]\s)?(\w{6}\-\w{6}\-\w{2})/) $saved{$id} .= $_; - # Are we interested in this id ? + # Are we interested in this id ? Short circuit if we already were interested. - $id_list{$id} = 1 if /$pattern/io; + $id_list{$id} = 1 if defined $id_list{$id} || + ($insensitive && /$pattern/io) || /$pattern/o; # See if this is a completion for some message. If it is interesting, # print it, but in any event, throw away what was saved. - if ($entry =~ - /(?:Completed|rejected (?:by local_scan|by non-SMTP ACL|after DATA))/) + if (index($_, 'Completed') != -1 || + (index($_, 'rejected') != -1 && + /rejected (?:by local_scan|by non-SMTP ACL|after DATA)/o)) { - if ($saved{$id} =~ /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ([+-]\d{4} )?)(\w{6}\-\w{6}\-\w{2})/) + if ($queue_time != -1 && + $saved{$id} =~ /^(\d{4}-\d\d-\d\d \d\d:\d\d:\d\d ([+-]\d{4} )?)/o) { my $old_sec = &seconds($1); my $sec = &seconds($date); @@ -108,7 +115,8 @@ if (my($id) = $entry =~ /^(?:\[\d+\]\s)?(\w{6}\-\w{6}\-\w{2})/) # Handle the case where the log line does not belong to a specific message. # Print it if it is interesting. -elsif ($entry =~ /$pattern/io) { print "$_\n"; } +elsif (($insensitive && $_ =~ /$pattern/io) || $_ =~ /$pattern/o) + { print "$_\n"; } } @@ -116,10 +124,11 @@ elsif ($entry =~ /$pattern/io) { print "$_\n"; } # are quoted if the -l flag is given. The -t flag gives a time-on-queue value # which is an additional condition. -getopts('lt:',\my %args); +getopts('Ilt:',\my %args); $queue_time = $args{'t'}? $args{'t'} : -1; +$insensitive = $args{'I'}? 0 : 1; -die "usage: exigrep [-l] [-t ] []...\n" +die "usage: exigrep [-I] [-l] [-t ] []...\n" if ($#ARGV < 0); $pattern = shift @ARGV; @@ -134,7 +143,7 @@ if (@ARGV) foreach (@ARGV) { my $filename = $_; - if ($filename =~ /\.(?:COMPRESS_SUFFIX)$/) + if ($filename =~ /\.(?:COMPRESS_SUFFIX)$/o) { open(LOG, "ZCAT_COMMAND $filename |") || die "Unable to zcat $filename: $!\n"; @@ -154,6 +163,6 @@ else { do_line() while (); } # At the end of processing all the input, print any uncompleted data -for (keys %id_list) { print "+++ $_ not completed +++\n$saved{$_}\n;" } +for (keys %id_list) { print "+++ $_ not completed +++\n$saved{$_}\n"; } # End of exigrep