X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/c988f1f4faa9f679f79beddf3c14676c5dcb8e28..96c065cb7bcdfc0965fb111e1eab6e9180e9e186:/src/src/parse.c diff --git a/src/src/parse.c b/src/src/parse.c index d2aae351b..0c1b9fe8f 100644 --- a/src/src/parse.c +++ b/src/src/parse.c @@ -1,10 +1,10 @@ -/* $Cambridge: exim/src/src/parse.c,v 1.3 2005/01/04 10:00:42 ph10 Exp $ */ +/* $Cambridge: exim/src/src/parse.c,v 1.10 2006/10/10 15:36:50 ph10 Exp $ */ /************************************************* * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) University of Cambridge 1995 - 2005 */ +/* Copyright (c) University of Cambridge 1995 - 2006 */ /* See the file NOTICE for conditions of use and distribution. */ /* Functions for parsing addresses */ @@ -243,18 +243,17 @@ s = skip_comment(s); any character except [ ] \, including linear white space, and may contain quoted characters. However, RFC 821 restricts literals to being dot-separated 3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence -of digits and dots (hex digits and colons for IPv6) here; later this will be -checked for being a syntactically valid IP address if it ever gets to a router. +of digits, dots, hex digits, and colons here; later this will be checked for +being a syntactically valid IP address if it ever gets to a router. -If IPv6 is supported, allow both the formal form, with IPV6: at the start, and -the informal form without it, and accept IPV4: as well, 'cause someone will use -it sooner or later. */ +Allow both the formal IPv6 form, with IPV6: at the start, and the informal form +without it, and accept IPV4: as well, 'cause someone will use it sooner or +later. */ if (*s == '[') { *t++ = *s++; - #if HAVE_IPV6 if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0) { memcpy(t, s, 5); @@ -263,10 +262,6 @@ if (*s == '[') } while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++; - #else - while (*s == '.' || isdigit(*s)) *t++ = *s++; - #endif - if (*s == ']') *t++ = *s++; else { *errorptr = US"malformed domain literal"; @@ -602,10 +597,15 @@ which may appear in certain headers. If the flag parse_allow_group is set TRUE and parse_found_group is FALSE when this function is called, an address which is the start of a group (i.e. preceded by a phrase and a colon) is recognized; the phrase is ignored and the flag parse_found_group is set. If -this flag is TRUE at the end of an address, then if an extraneous semicolon is -found, it is ignored and the flag is cleared. This logic is used only when -scanning through addresses in headers, either to fulfil the -t option or for -rewriting or checking header syntax. +this flag is TRUE at the end of an address, and if an extraneous semicolon is +found, it is ignored and the flag is cleared. + +This logic is used only when scanning through addresses in headers, either to +fulfil the -t option, or for rewriting, or for checking header syntax. Because +the group "state" has to be remembered between multiple calls of this function, +the variables parse_{allow,found}_group are global. It is important to ensure +that they are reset to FALSE at the end of scanning a header's list of +addresses. Arguments: mailbox points to the RFC822 mailbox @@ -847,9 +847,9 @@ If the only characters that strictly need quoting are spaces, we return the original string, unmodified. If a quoted string is too long for the buffer, it is truncated. (This shouldn't happen: this is normally handling short strings.) -Hmmph. As always, things get perverted for other uses. This function was -originally for the "phrase" part of addresses. Now it is being used for much -longer texts in ACLs and via the ${rfc2047: expansion item. This means we have +Hmmph. As always, things get perverted for other uses. This function was +originally for the "phrase" part of addresses. Now it is being used for much +longer texts in ACLs and via the ${rfc2047: expansion item. This means we have to check for overlong "encoded-word"s and split them. November 2004. Arguments: @@ -859,6 +859,8 @@ Arguments: charset the name of the character set; NULL => iso-8859-1 buffer the buffer to put the answer in buffer_size the size of the buffer + fold if TRUE, a newline is inserted before the separating space when + more than one encoded-word is generated Returns: pointer to the original string, if no quoting needed, or pointer to buffer containing the quoted string, or @@ -868,7 +870,7 @@ Returns: pointer to the original string, if no quoting needed, or uschar * parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer, - int buffer_size) + int buffer_size, BOOL fold) { uschar *s = string; uschar *p, *t; @@ -890,17 +892,18 @@ for (; len > 0; len--) { int ch = *s++; if (t > buffer + buffer_size - hlen - 8) break; - + if (t - p > 70) { *t++ = '?'; *t++ = '='; + if (fold) *t++ = '\n'; *t++ = ' '; p = t; Ustrncpy(p, buffer, hlen); t += hlen; - } - + } + if (ch < 33 || ch > 126 || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL) { @@ -913,11 +916,11 @@ for (; len > 0; len--) } else *t++ = ch; } - + *t++ = '?'; -*t++ = '='; +*t++ = '='; *t = 0; - + return coded? buffer : string; } @@ -994,7 +997,7 @@ for (i = 0, s = phrase; i < len; i++, s++) if ((*s < 32 && *s != '\t') || *s > 126) break; if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer, - buffer_size); + buffer_size, FALSE); /* No non-printers; use the RFC 822 quoting rules */ @@ -1475,7 +1478,7 @@ for (;;) { *error = string_sprintf("failed to stat included file %s: %s", filename, strerror(errno)); - fclose(f); + (void)fclose(f); return FF_INCLUDEFAIL; } @@ -1502,11 +1505,11 @@ for (;;) { *error = string_sprintf("error while reading included file %s: %s", filename, strerror(errno)); - fclose(f); + (void)fclose(f); return FF_ERROR; } filebuf[statbuf.st_size] = 0; - fclose(f); + (void)fclose(f); addr = NULL; frc = parse_forward_list(filebuf, options, &addr, @@ -1661,6 +1664,68 @@ for (;;) } + +/************************************************* +* Extract a Message-ID * +*************************************************/ + +/* This function is used to extract message ids from In-Reply-To: and +References: header lines. + +Arguments: + str pointer to the start of the message-id + yield put pointer to the message id (in dynamic memory) here + error put error message here on failure + +Returns: points after the processed message-id or NULL on error +*/ + +uschar * +parse_message_id(uschar *str, uschar **yield, uschar **error) +{ +uschar *domain = NULL; +uschar *id; + +str = skip_comment(str); +if (*str != '<') + { + *error = US"Missing '<' before message-id"; + return NULL; + } + +/* Getting a block the size of the input string will definitely be sufficient +for the answer, but it may also be very long if we are processing a header +line. Therefore, take care to release unwanted store afterwards. */ + +id = *yield = store_get(Ustrlen(str) + 1); +*id++ = *str++; + +str = read_addr_spec(str, id, '>', error, &domain); + +if (*error == NULL) + { + if (*str != '>') *error = US"Missing '>' after message-id"; + else if (domain == NULL) *error = US"domain missing in message-id"; + } + +if (*error != NULL) + { + store_reset(*yield); + return NULL; + } + +while (*id != 0) id++; +*id++ = *str++; +*id++ = 0; +store_reset(id); + +str = skip_comment(str); +return str; +} + + + + /************************************************* ************************************************** * Stand-alone test program * @@ -1793,6 +1858,26 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) else printf("Failed: %d %s\n", extracted, errmess); } +printf("Testing parse_message_id\n"); + +while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) + { + uschar *s, *t, *errmess; + buffer[Ustrlen(buffer) - 1] = 0; + if (buffer[0] == 0) break; + s = buffer; + while (*s != 0) + { + s = parse_message_id(s, &t, &errmess); + if (errmess != NULL) + { + printf("Failed: %s\n", errmess); + break; + } + printf("%s\n", t); + } + } + return 0; }