X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/d5b80e59458182b2d557a929a18cb8c70cd56b68..3c90bbcdc7cf73298156f7bcd5f5e750e7814e72:/src/src/parse.c diff --git a/src/src/parse.c b/src/src/parse.c index 68a83b0e8..acece9b78 100644 --- a/src/src/parse.c +++ b/src/src/parse.c @@ -2,7 +2,8 @@ * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) University of Cambridge 1995 - 2017 */ +/* Copyright (c) University of Cambridge 1995 - 2018 */ +/* Copyright (c) The Exim Maintainers 2020 */ /* See the file NOTICE for conditions of use and distribution. */ /* Functions for parsing addresses */ @@ -23,7 +24,7 @@ redundant apparatus. */ address_item *deliver_make_addr(uschar *address, BOOL copy) { -address_item *addr = store_get(sizeof(address_item)); +address_item *addr = store_get(sizeof(address_item), FALSE); addr->next = NULL; addr->parent = NULL; addr->address = address; @@ -197,10 +198,10 @@ last_comment_position = s; while (*s) { int c, level; - while (isspace(*s)) s++; - if (*s != '(') break; + + if (Uskip_whitespace(&s) != '(') break; level = 1; - while((c = *(++s)) != 0) + while((c = *(++s))) { if (c == '(') level++; else if (c == ')') { if (--level <= 0) { s++; break; } } @@ -421,10 +422,10 @@ for (;;) if (*s == '\"') { *t++ = '\"'; - while ((c = *(++s)) != 0 && c != '\"') + while ((c = *++s) && c != '\"') { *t++ = c; - if (c == '\\' && s[1] != 0) *t++ = *(++s); + if (c == '\\' && s[1]) *t++ = *++s; } if (c == '\"') { @@ -443,7 +444,7 @@ for (;;) else while (!mac_iscntrl_or_special(*s) || *s == '\\') { c = *t++ = *s++; - if (c == '\\' && *s != 0) *t++ = *s++; + if (c == '\\' && *s) *t++ = *s++; } /* Terminate the word and skip subsequent comment */ @@ -618,7 +619,7 @@ uschar * parse_extract_address(uschar *mailbox, uschar **errorptr, int *start, int *end, int *domain, BOOL allow_null) { -uschar *yield = store_get(Ustrlen(mailbox) + 1); +uschar *yield = store_get(Ustrlen(mailbox) + 1, is_tainted(mailbox)); uschar *startptr, *endptr; uschar *s = US mailbox; uschar *t = US yield; @@ -638,7 +639,7 @@ RESTART: /* Come back here after passing a group name */ s = skip_comment(s); startptr = s; /* In case addr-spec */ s = read_local_part(s, t, errorptr, TRUE); /* Dot separated words */ -if (*errorptr != NULL) goto PARSE_FAILED; +if (*errorptr) goto PARSE_FAILED; /* If the terminator is neither < nor @ then the format of the address must either be a bare local-part (we are now at the end), or a phrase @@ -658,7 +659,7 @@ if (*s != '@' && *s != '<') end of string will produce a null local_part and therefore fail. We don't need to keep updating t, as the phrase isn't to be kept. */ - while (*s != '<' && (!parse_allow_group || *s != ':')) + while (*s != '<' && (!f.parse_allow_group || *s != ':')) { s = read_local_part(s, t, errorptr, FALSE); if (*errorptr) @@ -670,8 +671,8 @@ if (*s != '@' && *s != '<') if (*s == ':') { - parse_found_group = TRUE; - parse_allow_group = FALSE; + f.parse_found_group = TRUE; + f.parse_allow_group = FALSE; s++; goto RESTART; } @@ -745,7 +746,7 @@ if (*s == '<') *errorptr = s[-1] == 0 ? US"'>' missing at end of address" : string_sprintf("malformed address: %.32s may not follow %.*s", - s-1, s - US mailbox - 1, mailbox); + s-1, (int)(s - US mailbox - 1), mailbox); goto PARSE_FAILED; } @@ -790,15 +791,15 @@ move it back past white space if necessary. */ PARSE_SUCCEEDED: if (*s != 0) { - if (parse_found_group && *s == ';') + if (f.parse_found_group && *s == ';') { - parse_found_group = FALSE; - parse_allow_group = TRUE; + f.parse_found_group = FALSE; + f.parse_allow_group = TRUE; } else { *errorptr = string_sprintf("malformed address: %.32s may not follow %.*s", - s, s - US mailbox, mailbox); + s, (int)(s - US mailbox), mailbox); goto PARSE_FAILED; } } @@ -824,10 +825,10 @@ We might have an empty address in a group - the caller can choose to ignore this. We must, however, keep the flags correct. */ PARSE_FAILED: -if (parse_found_group && *s == ';') +if (f.parse_found_group && *s == ';') { - parse_found_group = FALSE; - parse_allow_group = TRUE; + f.parse_found_group = FALSE; + f.parse_allow_group = TRUE; } return NULL; } @@ -842,8 +843,7 @@ return NULL; /* This function is used for quoting text in headers according to RFC 2047. If the only characters that strictly need quoting are spaces, we return the -original string, unmodified. If a quoted string is too long for the buffer, it -is truncated. (This shouldn't happen: this is normally handling short strings.) +original string, unmodified. Hmmph. As always, things get perverted for other uses. This function was originally for the "phrase" part of addresses. Now it is being used for much @@ -855,77 +855,57 @@ Arguments: chars len the length of the string charset the name of the character set; NULL => iso-8859-1 - buffer the buffer to put the answer in - buffer_size the size of the buffer fold if TRUE, a newline is inserted before the separating space when more than one encoded-word is generated Returns: pointer to the original string, if no quoting needed, or - pointer to buffer containing the quoted string, or - a pointer to "String too long" if the buffer can't even hold - the introduction + pointer to allocated memory containing the quoted string */ const uschar * -parse_quote_2047(const uschar *string, int len, uschar *charset, uschar *buffer, - int buffer_size, BOOL fold) +parse_quote_2047(const uschar *string, int len, uschar *charset, BOOL fold) { -const uschar *s = string; -uschar *p, *t; -int hlen; +const uschar * s = string; +int hlen, l; BOOL coded = FALSE; BOOL first_byte = FALSE; +gstring * g = + string_fmt_append(NULL, "=?%s?Q?", charset ? charset : US"iso-8859-1"); -if (charset == NULL) charset = US"iso-8859-1"; - -/* We don't expect this to fail! */ - -if (!string_format(buffer, buffer_size, "=?%s?Q?", charset)) - return US"String too long"; - -hlen = Ustrlen(buffer); -t = buffer + hlen; -p = buffer; +hlen = l = g->ptr; -for (; len > 0; len--) +for (s = string; len > 0; s++, len--) { - int ch = *s++; - if (t > buffer + buffer_size - hlen - 8) break; + int ch = *s; - if ((t - p > 67) && !first_byte) + if (g->ptr - l > 67 && !first_byte) { - *t++ = '?'; - *t++ = '='; - if (fold) *t++ = '\n'; - *t++ = ' '; - p = t; - Ustrncpy(p, buffer, hlen); - t += hlen; + g = fold ? string_catn(g, US"?=\n ", 4) : string_catn(g, US"?= ", 3); + l = g->ptr; + g = string_catn(g, g->s, hlen); } - if (ch < 33 || ch > 126 || - Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL) + if ( ch < 33 || ch > 126 + || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL) { if (ch == ' ') { - *t++ = '_'; + g = string_catn(g, US"_", 1); first_byte = FALSE; } else { - t += sprintf(CS t, "=%02X", ch); + g = string_fmt_append(g, "=%02X", ch); coded = TRUE; first_byte = !first_byte; } } - else { *t++ = ch; first_byte = FALSE; } + else + { g = string_catn(g, s, 1); first_byte = FALSE; } } -*t++ = '?'; -*t++ = '='; -*t = 0; - -return coded? buffer : string; +g = string_catn(g, US"?=", 2); +return coded ? string_from_gstring(g) : string; } @@ -968,32 +948,25 @@ August 2000: Additional code added: We *could* use this for all cases, getting rid of the messy original code, but leave it for now. It would complicate simple cases like "John Q. Smith". -The result is passed back in the buffer; it is usually going to be added to -some other string. In order to be sure there is going to be no overflow, -restrict the length of the input to 1/4 of the buffer size - this allows for -every single character to be quoted or encoded without overflowing, and that -wouldn't happen because of amalgamation. If the phrase is too long, return a -fixed string. +The result is passed back in allocated memory. Arguments: phrase an RFC822 phrase len the length of the phrase - buffer a buffer to put the result in - buffer_size the size of the buffer Returns: the fixed RFC822 phrase */ const uschar * -parse_fix_phrase(const uschar *phrase, int len, uschar *buffer, int buffer_size) +parse_fix_phrase(const uschar *phrase, int len) { int ch, i; BOOL quoted = FALSE; const uschar *s, *end; +uschar * buffer; uschar *t, *yield; while (len > 0 && isspace(*phrase)) { phrase++; len--; } -if (len > buffer_size/4) return US"Name too long"; /* See if there are any non-printing characters, and if so, use the RFC 2047 encoding for the whole thing. */ @@ -1001,11 +974,13 @@ encoding for the whole thing. */ for (i = 0, s = phrase; i < len; i++, s++) if ((*s < 32 && *s != '\t') || *s > 126) break; -if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer, - buffer_size, FALSE); +if (i < len) + return parse_quote_2047(phrase, len, headers_charset, FALSE); /* No non-printers; use the RFC 822 quoting rules */ +buffer = store_get(len*4, is_tainted(phrase)); + s = phrase; end = s + len; yield = t = buffer + 1; @@ -1172,6 +1147,7 @@ while (s < end) } *t = 0; +store_release_above(t+1); return yield; } @@ -1277,10 +1253,10 @@ for (;;) However, if the list is empty only because syntax errors were skipped, we return FF_DELIVERED. */ - if (*s == 0) + if (!*s) { - return (count > 0 || (syntax_errors != NULL && *syntax_errors != NULL))? - FF_DELIVERED : FF_NOTDELIVERED; + return (count > 0 || (syntax_errors && *syntax_errors)) + ? FF_DELIVERED : FF_NOTDELIVERED; /* This previous code returns FF_ERROR if nothing is generated but a syntax error has been skipped. I now think it is the wrong approach, but @@ -1396,7 +1372,7 @@ for (;;) if (flen <= 0) { - *error = string_sprintf("file name missing after :include:"); + *error = US"file name missing after :include:"; return FF_ERROR; } @@ -1411,7 +1387,7 @@ for (;;) /* Insist on absolute path */ - if (filename[0]!= '/') + if (filename[0] != '/') { *error = string_sprintf("included file \"%s\" is not an absolute path", filename); @@ -1420,12 +1396,19 @@ for (;;) /* Check if include is permitted */ - if ((options & RDO_INCLUDE) != 0) + if (options & RDO_INCLUDE) { *error = US"included files not permitted"; return FF_ERROR; } + if (is_tainted(filename)) + { + *error = string_sprintf("Tainted name '%s' for included file not permitted\n", + filename); + return FF_ERROR; + } + /* Check file name if required */ if (directory) @@ -1447,7 +1430,7 @@ for (;;) with a flag that fails symlinks. */ { - int fd = open(CS directory, O_RDONLY); + int fd = exim_open2(CS directory, O_RDONLY); if (fd < 0) { *error = string_sprintf("failed to open directory %s", directory); @@ -1463,7 +1446,7 @@ for (;;) temp = *p; *p = '\0'; - fd2 = openat(fd, CS q, O_RDONLY|O_NOFOLLOW); + fd2 = exim_openat(fd, CS q, O_RDONLY|O_NOFOLLOW); close(fd); *p = temp; if (fd2 < 0) @@ -1517,7 +1500,7 @@ for (;;) if (!f) { - *error = string_open_failed(errno, "included file %s", filename); + *error = string_open_failed("included file %s", filename); return FF_INCLUDEFAIL; } @@ -1547,7 +1530,7 @@ for (;;) return FF_ERROR; } - filebuf = store_get(statbuf.st_size + 1); + filebuf = store_get(statbuf.st_size + 1, is_tainted(filename)); if (fread(filebuf, 1, statbuf.st_size, f) != statbuf.st_size) { *error = string_sprintf("error while reading included file %s: %s", @@ -1607,14 +1590,14 @@ for (;;) { recipient = parse_extract_address(s+1, error, &start, &end, &domain, FALSE); - if (recipient != NULL) - recipient = (domain != 0)? NULL : + if (recipient) + recipient = domain != 0 ? NULL : string_sprintf("%s@%s", recipient, incoming_domain); } /* Try parsing the item as an address. */ - if (recipient == NULL) recipient = + if (!recipient) recipient = parse_extract_address(s, error, &start, &end, &domain, FALSE); /* If item starts with / or | and is not a valid address, or there @@ -1623,7 +1606,7 @@ for (;;) if ((*s == '|' || *s == '/') && (recipient == NULL || domain == 0)) { - uschar *t = store_get(Ustrlen(s) + 1); + uschar *t = store_get(Ustrlen(s) + 1, is_tainted(s)); uschar *p = t; uschar *q = s; while (*q != 0) @@ -1662,7 +1645,7 @@ for (;;) if (syntax_errors != NULL) { - error_block *e = store_get(sizeof(error_block)); + error_block *e = store_get(sizeof(error_block), FALSE); error_block *last = *syntax_errors; if (last == NULL) *syntax_errors = e; else { @@ -1730,6 +1713,7 @@ parse_message_id(uschar *str, uschar **yield, uschar **error) { uschar *domain = NULL; uschar *id; +rmark reset_point; str = skip_comment(str); if (*str != '<') @@ -1742,27 +1726,28 @@ if (*str != '<') for the answer, but it may also be very long if we are processing a header line. Therefore, take care to release unwanted store afterwards. */ -id = *yield = store_get(Ustrlen(str) + 1); +reset_point = store_mark(); +id = *yield = store_get(Ustrlen(str) + 1, is_tainted(str)); *id++ = *str++; str = read_addr_spec(str, id, '>', error, &domain); -if (*error == NULL) +if (!*error) { if (*str != '>') *error = US"Missing '>' after message-id"; else if (domain == NULL) *error = US"domain missing in message-id"; } -if (*error != NULL) +if (*error) { - store_reset(*yield); + store_reset(reset_point); return NULL; } -while (*id != 0) id++; +while (*id) id++; *id++ = *str++; *id++ = 0; -store_reset(id); +store_release_above(id); str = skip_comment(str); return str; @@ -2092,7 +2077,6 @@ int main(void) { int start, end, domain; uschar buffer[1024]; -uschar outbuff[1024]; big_buffer = store_malloc(big_buffer_size); @@ -2105,8 +2089,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) { buffer[Ustrlen(buffer)-1] = 0; if (buffer[0] == 0) break; - printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer), outbuff, - sizeof(outbuff))); + printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer))); } printf("Testing parse_extract_address without group syntax and without UTF-8\n"); @@ -2118,7 +2101,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) buffer[Ustrlen(buffer) - 1] = 0; if (buffer[0] == 0) break; out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE); - if (out == NULL) printf("*** bad address: %s\n", errmess); else + if (!out) + printf("*** bad address: %s\n", errmess); + else { uschar extract[1024]; Ustrncpy(extract, buffer+start, end-start); @@ -2137,7 +2122,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) buffer[Ustrlen(buffer) - 1] = 0; if (buffer[0] == 0) break; out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE); - if (out == NULL) printf("*** bad address: %s\n", errmess); else + if (!out) + printf("*** bad address: %s\n", errmess); + else { uschar extract[1024]; Ustrncpy(extract, buffer+start, end-start); @@ -2149,7 +2136,7 @@ allow_utf8_domains = FALSE; printf("Testing parse_extract_address with group syntax\n"); -parse_allow_group = TRUE; +f.parse_allow_group = TRUE; while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) { uschar *out; @@ -2158,7 +2145,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) buffer[Ustrlen(buffer) - 1] = 0; if (buffer[0] == 0) break; s = buffer; - while (*s != 0) + while (*s) { uschar *ss = parse_find_address_end(s, FALSE); int terminator = *ss; @@ -2166,7 +2153,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE); *ss = terminator; - if (out == NULL) printf("*** bad address: %s\n", errmess); else + if (!out) + printf("*** bad address: %s\n", errmess); + else { uschar extract[1024]; Ustrncpy(extract, buffer+start, end-start); @@ -2175,7 +2164,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) } s = ss + (terminator? 1:0); - while (isspace(*s)) s++; + Uskip_whitespace(&s); } }