From 35aacb69f5c839a4b77158464e401d86eb422ed6 Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Fri, 26 Jan 2024 21:58:59 +0000 Subject: [PATCH] ACL: in "regex" condition, release store every thousand lines. Bug 3047 --- doc/doc-txt/ChangeLog | 2 + src/src/acl.c | 4 +- src/src/macros.h | 5 +++ src/src/regex.c | 77 ++++++++++++++++++++------------- src/src/transports/appendfile.c | 9 +--- 5 files changed, 57 insertions(+), 40 deletions(-) diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog index e258966d8..48cc62910 100644 --- a/doc/doc-txt/ChangeLog +++ b/doc/doc-txt/ChangeLog @@ -99,6 +99,8 @@ JH/20 Bug 3047: A recent (somewhere between 10.34 and 10.42) version of the when a user had over 104207 messages stored and the appendfile maildir_quota_directory_regex option is in use. Release the allocated memory every thosand files to avoid this. + The same issue arises with the ACL regex condition, which is applied + to every line of a received message. Exim version 4.97 diff --git a/src/src/acl.c b/src/src/acl.c index 9223fcec8..ecba07b25 100644 --- a/src/src/acl.c +++ b/src/src/acl.c @@ -3955,11 +3955,11 @@ for (; cb; cb = cb->next) CUSS &recipient_data); break; - #ifdef WITH_CONTENT_SCAN +#ifdef WITH_CONTENT_SCAN case ACLC_REGEX: rc = regex(&arg, textonly); break; - #endif +#endif case ACLC_REMOVE_HEADER: setup_remove_header(arg); diff --git a/src/src/macros.h b/src/src/macros.h index 5279f70d0..8aed335b5 100644 --- a/src/src/macros.h +++ b/src/src/macros.h @@ -1185,4 +1185,9 @@ typedef enum { sw_mrc_tx_fail, /* transmit failed */ } sw_mrc_t; +/* Recent versions of PCRE2 are allocating 20kB per match, rather than the previous 112 B. +When doing en extended loop of matching, release store periodically. */ + +#define REGEX_LOOPCOUNT_STORE_RESET 1000 + /* End of macros.h */ diff --git a/src/src/regex.c b/src/src/regex.c index af425db2d..cda807716 100644 --- a/src/src/regex.c +++ b/src/src/regex.c @@ -31,12 +31,11 @@ extern uschar *mime_current_boundary; static pcre_list * -compile(const uschar * list, BOOL cacheable) +compile(const uschar * list, BOOL cacheable, int * cntp) { -int sep = 0; +int sep = 0, cnt = 0; uschar * regex_string; -pcre_list * re_list_head = NULL; -pcre_list * ri; +pcre_list * re_list_head = NULL, * ri; /* precompile our regexes */ while ((regex_string = string_nextinlist(&list, &sep, NULL, 0))) @@ -58,7 +57,9 @@ while ((regex_string = string_nextinlist(&list, &sep, NULL, 0))) ri->pcre_text = regex_string; ri->next = re_list_head; re_list_head = ri; + cnt++; } +if (cntp) *cntp = cnt; return re_list_head; } @@ -112,7 +113,8 @@ FILE * mbox_file; pcre_list * re_list_head; uschar * linebuffer; long f_pos = 0; -int ret = FAIL; +int ret = FAIL, cnt, lcount = REGEX_LOOPCOUNT_STORE_RESET; +rmark reset_point; regex_vars_clear(); @@ -136,26 +138,34 @@ else mbox_file = mime_stream; } -/* precompile our regexes */ -if (!(re_list_head = compile(*listptr, cacheable))) - return FAIL; /* no regexes -> nothing to do */ - -/* match each line against all regexes */ -linebuffer = store_get(32767, GET_TAINTED); -while (fgets(CS linebuffer, 32767, mbox_file)) +reset_point = store_mark(); { - if ( mime_stream && mime_current_boundary /* check boundary */ - && Ustrncmp(linebuffer, "--", 2) == 0 - && Ustrncmp((linebuffer+2), mime_current_boundary, - Ustrlen(mime_current_boundary)) == 0) - break; /* found boundary */ - - if ((ret = matcher(re_list_head, linebuffer, (int)Ustrlen(linebuffer))) == OK) - goto done; + /* precompile our regexes */ + if ((re_list_head = compile(*listptr, cacheable, &cnt))) + { + /* match each line against all regexes */ + linebuffer = store_get(32767, GET_TAINTED); + while (fgets(CS linebuffer, 32767, mbox_file)) + { + if ( mime_stream && mime_current_boundary /* check boundary */ + && Ustrncmp(linebuffer, "--", 2) == 0 + && Ustrncmp((linebuffer+2), mime_current_boundary, + Ustrlen(mime_current_boundary)) == 0) + break; /* found boundary */ + + if ((ret = matcher(re_list_head, linebuffer, (int)Ustrlen(linebuffer))) == OK) + break; + + if ((lcount -= cnt) <= 0) + { + store_reset(reset_point); reset_point = store_mark(); + lcount = REGEX_LOOPCOUNT_STORE_RESET; + } + } + } } -/* no matches ... */ +store_reset(reset_point); -done: if (!mime_stream) (void)fclose(mbox_file); else @@ -180,14 +190,11 @@ pcre_list * re_list_head = NULL; FILE * f; uschar * mime_subject = NULL; int mime_subject_len = 0; -int ret; +int ret = FAIL; +rmark reset_point; regex_vars_clear(); -/* precompile our regexes */ -if (!(re_list_head = compile(*listptr, cacheable))) - return FAIL; /* no regexes -> nothing to do */ - /* check if the file is already decoded */ if (!mime_decoded_filename) { /* no, decode it first */ @@ -210,12 +217,20 @@ if (!(f = fopen(CS mime_decoded_filename, "rb"))) return DEFER; } -/* get 32k memory, tainted */ -mime_subject = store_get(32767, GET_TAINTED); +reset_point = store_mark(); + { + /* precompile our regexes */ + if ((re_list_head = compile(*listptr, cacheable, NULL))) + { + /* get 32k memory, tainted */ + mime_subject = store_get(32767, GET_TAINTED); -mime_subject_len = fread(mime_subject, 1, 32766, f); + mime_subject_len = fread(mime_subject, 1, 32766, f); -ret = matcher(re_list_head, mime_subject, mime_subject_len); + ret = matcher(re_list_head, mime_subject, mime_subject_len); + } + } +store_reset(reset_point); (void)fclose(f); return ret; } diff --git a/src/src/transports/appendfile.c b/src/src/transports/appendfile.c index 91b353079..ce52cc6ff 100644 --- a/src/src/transports/appendfile.c +++ b/src/src/transports/appendfile.c @@ -153,11 +153,6 @@ static const char *mailbox_formats[] = { (!ob->quota_warn_threshold_is_percent || ob->quota_value > 0)) -/* Free memory allocated by PCRE2 every so often, because a recent version -is now using 20kB for every match call */ - -#define RESET_STORE_FILECNT 1000 - /************************************************* * Setup entry point * *************************************************/ @@ -667,7 +662,7 @@ check_dir_size(const uschar * dirname, int * countptr, const pcre2_code * re) { DIR * dir; off_t sum = 0; -int count = *countptr, lcount = RESET_STORE_FILECNT; +int count = *countptr, lcount = REGEX_LOOPCOUNT_STORE_RESET; rmark reset_point = store_mark(); if (!(dir = exim_opendir(dirname))) return 0; @@ -683,7 +678,7 @@ for (struct dirent * ent; ent = readdir(dir); ) if (--lcount == 0) { store_reset(reset_point); reset_point = store_mark(); - lcount = RESET_STORE_FILECNT; + lcount = REGEX_LOOPCOUNT_STORE_RESET; } /* If there's a regex, try to find the size using it */ -- 2.30.2