From: Jeremy Harris Date: Tue, 8 Sep 2015 22:05:20 +0000 (+0100) Subject: Capture substrings in ACL regex= . Bug 425. X-Git-Tag: exim-4_87_RC1~102 X-Git-Url: https://git.exim.org/exim.git/commitdiff_plain/f38917cc94ab337c15ff70c254dd564ee2dcafe7 Capture substrings in ACL regex= . Bug 425. --- diff --git a/doc/doc-docbook/spec.xfpt b/doc/doc-docbook/spec.xfpt index 01cdefcf8..8b8a531b9 100644 --- a/doc/doc-docbook/spec.xfpt +++ b/doc/doc-docbook/spec.xfpt @@ -11110,7 +11110,8 @@ support for TLS or the content scanning extension. When a &%match%& expansion condition succeeds, these variables contain the captured substrings identified by the regular expression during subsequent processing of the success string of the containing &%if%& expansion item. -However, they do not retain their values afterwards; in fact, their previous +In the expansion condition case +they do not retain their values afterwards; in fact, their previous values are restored at the end of processing an &%if%& item. The numerical variables may also be set externally by some other matching process which precedes the expansion of the string. For example, the commands available in @@ -12157,6 +12158,12 @@ increases for each accepted recipient. It can be referenced in an ACL. This variable is set to contain the matching regular expression after a &%regex%& ACL condition has matched (see section &<>&). +.vitem "&$regex1$&, &$regex2$&, etc" +.cindex "regex submatch variables (&$1regex$& &$2regex$& etc)" +When a &%regex%& or &%mime_regex%& ACL condition succeeds, +these variables contain the +captured substrings identified by the regular expression. + .vitem &$reply_address$& .vindex "&$reply_address$&" @@ -31463,6 +31470,8 @@ deny message = contains blacklisted regex ($regex_match_string) The conditions returns true if any one of the regular expressions matches. The &$regex_match_string$& expansion variable is then set up and contains the matching regular expression. +The expansion variables &$regex1$& &$regex2$& etc +are set to any substrings captured by the regular expression. &*Warning*&: With large messages, these conditions can be fairly CPU-intensive. diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog index 09a040caf..f24883d92 100644 --- a/doc/doc-txt/ChangeLog +++ b/doc/doc-txt/ChangeLog @@ -31,6 +31,9 @@ HS/01 Bug 1671: Fix post transport crash. using 'split_spool_directory=yes' the construction of the spool file name failed already, exposing the same netto behaviour. +JH/03 Bug 425: Capture substrings in $regex1, $regex2 etc from regex & + mime_regex ACL conditions. + Exim version 4.86 ----------------- diff --git a/doc/doc-txt/NewStuff b/doc/doc-txt/NewStuff index f1598151e..eee0d347d 100644 --- a/doc/doc-txt/NewStuff +++ b/doc/doc-txt/NewStuff @@ -9,6 +9,9 @@ the documentation is updated, this file is reduced to a short list. Version 4.87 ------------ + 1. The ACL conditions regex and mime_regex now capture substrings + into numeric variables $regex1 to 9, like the "match" expansion condition. + Version 4.86 ------------ diff --git a/src/src/config.h.defaults b/src/src/config.h.defaults index c33e09805..596e651f0 100644 --- a/src/src/config.h.defaults +++ b/src/src/config.h.defaults @@ -116,6 +116,8 @@ it's a default value. */ #define RADIUS_CONFIG_FILE #define RADIUS_LIB_TYPE +#define REGEX_VARS 9 + #define ROUTER_ACCEPT #define ROUTER_DNSLOOKUP #define ROUTER_IPLITERAL diff --git a/src/src/exim.c b/src/src/exim.c index d7cb5d88b..999b94cc1 100644 --- a/src/src/exim.c +++ b/src/src/exim.c @@ -1753,6 +1753,8 @@ regex_whitelisted_macro = regex_must_compile(US"^[A-Za-z0-9_/.-]*$", FALSE, TRUE); #endif +for (i = 0; i < REGEX_VARS; i++) regex_vars[i] = NULL; + /* If the program is called as "mailq" treat it as equivalent to "exim -bp"; this seems to be a generally accepted convention, since one finds symbolic diff --git a/src/src/expand.c b/src/src/expand.c index 89e0ac779..1bff52105 100644 --- a/src/src/expand.c +++ b/src/src/expand.c @@ -1726,7 +1726,14 @@ if (Ustrncmp(name, "auth", 4) == 0) uschar *endptr; int n = Ustrtoul(name + 4, &endptr, 10); if (*endptr == 0 && n != 0 && n <= AUTH_VARS) - return (auth_vars[n-1] == NULL)? US"" : auth_vars[n-1]; + return !auth_vars[n-1] ? US"" : auth_vars[n-1]; + } +else if (Ustrncmp(name, "regex", 5) == 0) + { + uschar *endptr; + int n = Ustrtoul(name + 5, &endptr, 10); + if (*endptr == 0 && n != 0 && n <= REGEX_VARS) + return !regex_vars[n-1] ? US"" : regex_vars[n-1]; } /* For all other variables, search the table */ diff --git a/src/src/globals.c b/src/src/globals.c index 4188b4d84..8445f001c 100644 --- a/src/src/globals.c +++ b/src/src/globals.c @@ -1090,8 +1090,9 @@ const pcre *regex_From = NULL; const pcre *regex_IGNOREQUOTA = NULL; const pcre *regex_PIPELINING = NULL; const pcre *regex_SIZE = NULL; -const pcre *regex_smtp_code = NULL; const pcre *regex_ismsgid = NULL; +const pcre *regex_smtp_code = NULL; +uschar *regex_vars[REGEX_VARS]; #ifdef WHITELIST_D_MACROS const pcre *regex_whitelisted_macro = NULL; #endif diff --git a/src/src/globals.h b/src/src/globals.h index 978a4cc78..3c69e43b2 100644 --- a/src/src/globals.h +++ b/src/src/globals.h @@ -717,8 +717,9 @@ extern const pcre *regex_From; /* For recognizing "From_" lines */ extern const pcre *regex_IGNOREQUOTA; /* For recognizing IGNOREQUOTA (LMTP) */ extern const pcre *regex_PIPELINING; /* For recognizing PIPELINING */ extern const pcre *regex_SIZE; /* For recognizing SIZE settings */ -extern const pcre *regex_smtp_code; /* For recognizing SMTP codes */ extern const pcre *regex_ismsgid; /* Compiled r.e. for message it */ +extern const pcre *regex_smtp_code; /* For recognizing SMTP codes */ +extern uschar *regex_vars[]; /* $regexN variables */ #ifdef WHITELIST_D_MACROS extern const pcre *regex_whitelisted_macro; /* For -D macro values */ #endif diff --git a/src/src/regex.c b/src/src/regex.c index ed73b6e0f..93422fadc 100644 --- a/src/src/regex.c +++ b/src/src/regex.c @@ -25,109 +25,120 @@ uschar regex_match_string_buffer[1024]; extern FILE *mime_stream; extern uschar *mime_current_boundary; -int -regex(const uschar **listptr) +static pcre_list * +compile(const uschar * list) { int sep = 0; - const uschar *list = *listptr; uschar *regex_string; uschar regex_string_buffer[1024]; - unsigned long mbox_size; - FILE *mbox_file; - pcre *re; - pcre_list *re_list_head = NULL; - pcre_list *re_list_item; const char *pcre_error; int pcre_erroffset; + pcre_list *re_list_head = NULL; + pcre_list *ri; + + /* precompile our regexes */ + while ((regex_string = string_nextinlist(&list, &sep, + regex_string_buffer, + sizeof(regex_string_buffer))) != NULL) { + pcre *re; + + /* parse option */ + if ( (strcmpic(regex_string,US"false") == 0) || + (Ustrcmp(regex_string,"0") == 0) ) + continue; /* explicitly no matching */ + + /* compile our regular expression */ + if (!(re = pcre_compile( CS regex_string, + 0, &pcre_error, &pcre_erroffset, NULL ))) { + log_write(0, LOG_MAIN, + "regex acl condition warning - error in regex '%s': %s at offset %d, skipped.", + regex_string, pcre_error, pcre_erroffset); + continue; + } + + ri = store_get(sizeof(pcre_list)); + ri->re = re; + ri->pcre_text = string_copy(regex_string); + ri->next = re_list_head; + re_list_head = ri; + } + return re_list_head; +} + +static int +matcher(pcre_list * re_list_head, uschar * linebuffer, int len) +{ + pcre_list * ri; + + for(ri = re_list_head; ri; ri = ri->next) + { + int ovec[3*(REGEX_VARS+1)]; + int n, nn; + + /* try matcher on the line */ + n = pcre_exec(ri->re, NULL, + CS linebuffer, len, 0, 0, + ovec, nelem(ovec)); + if (n > 0) + { + Ustrncpy(regex_match_string_buffer, ri->pcre_text, 1023); + regex_match_string = regex_match_string_buffer; + + for (nn = 1; nn < n; nn++) + regex_vars[nn-1] = + string_copyn(linebuffer + ovec[nn*2], ovec[nn*2+1] - ovec[nn*2]); + + return OK; + } + } + return FAIL; +} + +int +regex(const uschar **listptr) +{ + unsigned long mbox_size; + FILE *mbox_file; + pcre_list *re_list_head; uschar *linebuffer; long f_pos = 0; + int ret = FAIL; /* reset expansion variable */ regex_match_string = NULL; - if (mime_stream == NULL) { - /* We are in the DATA ACL */ + if (mime_stream == NULL) { /* We are in the DATA ACL */ mbox_file = spool_mbox(&mbox_size, NULL); - if (mbox_file == NULL) { - /* error while spooling */ + if (mbox_file == NULL) { /* error while spooling */ log_write(0, LOG_MAIN|LOG_PANIC, "regex acl condition: error while creating mbox spool file"); return DEFER; - }; + } } else { f_pos = ftell(mime_stream); mbox_file = mime_stream; - }; + } /* precompile our regexes */ - while ((regex_string = string_nextinlist(&list, &sep, - regex_string_buffer, - sizeof(regex_string_buffer))) != NULL) { - - /* parse option */ - if ( (strcmpic(regex_string,US"false") == 0) || - (Ustrcmp(regex_string,"0") == 0) ) { - /* explicitly no matching */ - continue; - }; - - /* compile our regular expression */ - re = pcre_compile( CS regex_string, - 0, - &pcre_error, - &pcre_erroffset, - NULL ); - - if (re == NULL) { - log_write(0, LOG_MAIN, - "regex acl condition warning - error in regex '%s': %s at offset %d, skipped.", regex_string, pcre_error, pcre_erroffset); - continue; - } - else { - re_list_item = store_get(sizeof(pcre_list)); - re_list_item->re = re; - re_list_item->pcre_text = string_copy(regex_string); - re_list_item->next = re_list_head; - re_list_head = re_list_item; - }; - }; - - /* no regexes -> nothing to do */ - if (re_list_head == NULL) { - return FAIL; - }; + if (!(re_list_head = compile(*listptr))) + return FAIL; /* no regexes -> nothing to do */ /* match each line against all regexes */ linebuffer = store_get(32767); while (fgets(CS linebuffer, 32767, mbox_file) != NULL) { - if ( (mime_stream != NULL) && (mime_current_boundary != NULL) ) { - /* check boundary */ - if (Ustrncmp(linebuffer,"--",2) == 0) { - if (Ustrncmp((linebuffer+2),mime_current_boundary,Ustrlen(mime_current_boundary)) == 0) - /* found boundary */ - break; - }; - }; - re_list_item = re_list_head; - do { - /* try matcher on the line */ - if (pcre_exec(re_list_item->re, NULL, CS linebuffer, - (int)Ustrlen(linebuffer), 0, 0, NULL, 0) >= 0) { - Ustrncpy(regex_match_string_buffer, re_list_item->pcre_text, 1023); - regex_match_string = regex_match_string_buffer; - if (mime_stream == NULL) - (void)fclose(mbox_file); - else { - clearerr(mime_stream); - fseek(mime_stream,f_pos,SEEK_SET); - }; - return OK; - }; - re_list_item = re_list_item->next; - } while (re_list_item != NULL); - }; + if ( mime_stream && mime_current_boundary /* check boundary */ + && Ustrncmp(linebuffer,"--",2) == 0 + && Ustrncmp((linebuffer+2),mime_current_boundary,Ustrlen(mime_current_boundary)) == 0) + break; /* found boundary */ + + if ((ret = matcher(re_list_head, linebuffer, (int)Ustrlen(linebuffer))) == OK) + goto done; + } + /* no matches ... */ + +done: if (mime_stream == NULL) (void)fclose(mbox_file); else { @@ -135,67 +146,25 @@ regex(const uschar **listptr) fseek(mime_stream,f_pos,SEEK_SET); }; - /* no matches ... */ - return FAIL; + return ret; } int mime_regex(const uschar **listptr) { - int sep = 0; - const uschar *list = *listptr; - uschar *regex_string; - uschar regex_string_buffer[1024]; - pcre *re; pcre_list *re_list_head = NULL; - pcre_list *re_list_item; - const char *pcre_error; - int pcre_erroffset; FILE *f; uschar *mime_subject = NULL; int mime_subject_len = 0; + int ret; /* reset expansion variable */ regex_match_string = NULL; /* precompile our regexes */ - while ((regex_string = string_nextinlist(&list, &sep, - regex_string_buffer, - sizeof(regex_string_buffer))) != NULL) { - - /* parse option */ - if ( (strcmpic(regex_string,US"false") == 0) || - (Ustrcmp(regex_string,"0") == 0) ) { - /* explicitly no matching */ - continue; - }; - - /* compile our regular expression */ - re = pcre_compile( CS regex_string, - 0, - &pcre_error, - &pcre_erroffset, - NULL ); - - if (re == NULL) { - log_write(0, LOG_MAIN, - "regex acl condition warning - error in regex '%s': %s at offset %d, skipped.", regex_string, pcre_error, pcre_erroffset); - continue; - } - else { - re_list_item = store_get(sizeof(pcre_list)); - re_list_item->re = re; - re_list_item->pcre_text = string_copy(regex_string); - re_list_item->next = re_list_head; - re_list_head = re_list_item; - }; - }; - - /* no regexes -> nothing to do */ - if (re_list_head == NULL) { - return FAIL; - }; + if (!(re_list_head = compile(*listptr))) + return FAIL; /* no regexes -> nothing to do */ /* check if the file is already decoded */ if (mime_decoded_filename == NULL) { @@ -207,43 +176,25 @@ mime_regex(const uschar **listptr) log_write(0, LOG_MAIN, "mime_regex acl condition warning - could not decode MIME part to file."); return DEFER; - }; - }; - + } + } /* open file */ - f = fopen(CS mime_decoded_filename, "rb"); - if (f == NULL) { - /* open failed */ + if (!(f = fopen(CS mime_decoded_filename, "rb"))) { log_write(0, LOG_MAIN, - "mime_regex acl condition warning - can't open '%s' for reading.", mime_decoded_filename); + "mime_regex acl condition warning - can't open '%s' for reading.", + mime_decoded_filename); return DEFER; - }; + } /* get 32k memory */ mime_subject = (uschar *)store_get(32767); - /* read max 32k chars from file */ mime_subject_len = fread(mime_subject, 1, 32766, f); - re_list_item = re_list_head; - do { - /* try matcher on the mmapped file */ - debug_printf("Matching '%s'\n", re_list_item->pcre_text); - if (pcre_exec(re_list_item->re, NULL, CS mime_subject, - mime_subject_len, 0, 0, NULL, 0) >= 0) { - Ustrncpy(regex_match_string_buffer, re_list_item->pcre_text, 1023); - regex_match_string = regex_match_string_buffer; - (void)fclose(f); - return OK; - }; - re_list_item = re_list_item->next; - } while (re_list_item != NULL); - + ret = matcher(re_list_head, mime_subject, mime_subject_len); (void)fclose(f); - - /* no matches ... */ - return FAIL; + return ret; } #endif /* WITH_CONTENT_SCAN */ diff --git a/test/confs/0568 b/test/confs/0568 new file mode 100644 index 000000000..d5a8049c1 --- /dev/null +++ b/test/confs/0568 @@ -0,0 +1,58 @@ +# Exim test configuration 0568: ACL regex= + +exim_path = EXIM_PATH +host_lookup_order = bydns +primary_hostname = myhost.test.ex +rfc1413_query_timeout = 0s +spool_directory = DIR/spool +log_file_path = DIR/spool/log/%slog +gecos_pattern = "" +gecos_name = CALLER_NAME + + +# ----- Main settings ----- + +acl_smtp_rcpt = check_rcpt +acl_smtp_data = check_data +acl_not_smtp = check_data +acl_smtp_mime = check_mime + + +# ----- ACL ----- + +begin acl + +check_rcpt: + accept + +check_data: + warn regex = \N(THIS\s((\w+)\s)?REGEX)\N + message = X-Regex: Regex matched <$regex1> <$regex3> + + warn condition = ${if !eq{$h_fakereject:}{}} + control = fakereject + + warn condition = ${if !eq{$h_fakedefer:}{}} + control = fakedefer + + accept + +# ----- Routers ----- + +begin routers + +r1: + driver = accept + transport = t1 + +# ----- Transports ----- + +begin transports + +t1: + driver = appendfile + file = DIR/test-mail/$local_part + user = CALLER + + +# End diff --git a/test/log/0568 b/test/log/0568 new file mode 100644 index 000000000..1ab5847e6 --- /dev/null +++ b/test/log/0568 @@ -0,0 +1,6 @@ +1999-03-02 09:44:33 10HmaX-0005vi-00 <= CALLER@myhost.test.ex U=CALLER P=local-esmtp S=sss id=41C2F849.3060203@projectile.test.ex +1999-03-02 09:44:33 10HmaX-0005vi-00 => userx R=r1 T=t1 +1999-03-02 09:44:33 10HmaX-0005vi-00 Completed +1999-03-02 09:44:33 10HmaY-0005vi-00 <= CALLER@myhost.test.ex U=CALLER P=local-esmtp S=sss id=41C2F849.3060203@projectile.test.ex +1999-03-02 09:44:33 10HmaY-0005vi-00 => userx R=r1 T=t1 +1999-03-02 09:44:33 10HmaY-0005vi-00 Completed diff --git a/test/mail/0568.userx b/test/mail/0568.userx new file mode 100644 index 000000000..59fa44af8 --- /dev/null +++ b/test/mail/0568.userx @@ -0,0 +1,33 @@ +From CALLER@myhost.test.ex Tue Mar 02 09:44:33 1999 +Received: from CALLER (helo=test.ex) + by myhost.test.ex with local-esmtp (Exim x.yz) + (envelope-from ) + id 10HmaX-0005vi-00 + for userx@test.ex; Tue, 2 Mar 1999 09:44:33 +0000 +From: Test person +To: Me +Subject: A real test message +Date: Tue, 2 Mar 1999 09:44:33 +0000 +Message-ID: <41C2F849.3060203@projectile.test.ex> +Sender: CALLER_NAME + +OK, this should look like a genuine message. + +From CALLER@myhost.test.ex Tue Mar 02 09:44:33 1999 +Received: from CALLER (helo=test.ex) + by myhost.test.ex with local-esmtp (Exim x.yz) + (envelope-from ) + id 10HmaY-0005vi-00 + for userx@test.ex; Tue, 2 Mar 1999 09:44:33 +0000 +From: Test person +To: Me +Subject: A real test message +Date: Tue, 2 Mar 1999 09:44:33 +0000 +Message-ID: <41C2F849.3060203@projectile.test.ex> +FakeReject: test fakereject +Sender: CALLER_NAME +X-Regex: Regex matched + +OK, this should look like a genuine message, but +it will trip on THIS gazornenplaz REGEX. + diff --git a/test/scripts/0000-Basic/0568 b/test/scripts/0000-Basic/0568 new file mode 100644 index 000000000..a660f6805 --- /dev/null +++ b/test/scripts/0000-Basic/0568 @@ -0,0 +1,38 @@ +# ACL regex= test +# +# These tests are copies from testcase 4000; +# they should be removed from there but I don't +# run spamassassin so can't verify the change. +# +exim -odi -bs +ehlo test.ex +mail from:<> +rcpt to: +data +From: Test person +To: Me +Subject: A real test message +Date: Fri, 17 Dec 2004 16:13:04 +0100 +Message-ID: <41C2F849.3060203@projectile.test.ex> + +OK, this should look like a genuine message. +. +quit +**** +exim -odi -bs +ehlo test.ex +mail from:<> +rcpt to: +data +From: Test person +To: Me +Subject: A real test message +Date: Fri, 17 Dec 2004 16:13:04 +0100 +Message-ID: <41C2F849.3060203@projectile.test.ex> +FakeReject: test fakereject + +OK, this should look like a genuine message, but +it will trip on THIS gazornenplaz REGEX. +. +quit +**** diff --git a/test/stdout/0568 b/test/stdout/0568 new file mode 100644 index 000000000..2db3157dd --- /dev/null +++ b/test/stdout/0568 @@ -0,0 +1,24 @@ +220 myhost.test.ex ESMTP Exim x.yz Tue, 2 Mar 1999 09:44:33 +0000 +250-myhost.test.ex Hello CALLER at test.ex +250-SIZE 52428800 +250-8BITMIME +250-PIPELINING +250 HELP +250 OK +250 Accepted +354 Enter message, ending with "." on a line by itself +250 OK id=10HmaX-0005vi-00 +221 myhost.test.ex closing connection +220 myhost.test.ex ESMTP Exim x.yz Tue, 2 Mar 1999 09:44:33 +0000 +250-myhost.test.ex Hello CALLER at test.ex +250-SIZE 52428800 +250-8BITMIME +250-PIPELINING +250 HELP +250 OK +250 Accepted +354 Enter message, ending with "." on a line by itself +550-Your message has been rejected but is being kept for evaluation. +550-If it was a legitimate message, it may still be delivered to the target +550 recipient(s). +221 myhost.test.ex closing connection