X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/8e669ac162fe3b1040297f1d021de10778dce9d9..87359fb001c9fb4d450a35a0b5f6642c16c78c34:/src/src/parse.c diff --git a/src/src/parse.c b/src/src/parse.c index b70265862..e3b471f1a 100644 --- a/src/src/parse.c +++ b/src/src/parse.c @@ -1,10 +1,9 @@ -/* $Cambridge: exim/src/src/parse.c,v 1.4 2005/02/17 11:58:26 ph10 Exp $ */ - /************************************************* * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) University of Cambridge 1995 - 2005 */ +/* Copyright (c) University of Cambridge 1995 - 2018 */ +/* Copyright (c) The Exim Maintainers 2020 */ /* See the file NOTICE for conditions of use and distribution. */ /* Functions for parsing addresses */ @@ -25,7 +24,7 @@ redundant apparatus. */ address_item *deliver_make_addr(uschar *address, BOOL copy) { -address_item *addr = store_get(sizeof(address_item)); +address_item *addr = store_get(sizeof(address_item), FALSE); addr->next = NULL; addr->parent = NULL; addr->address = address; @@ -189,7 +188,7 @@ The start of the last potential comment position is remembered to make it possible to ignore comments at the end of compound items. Argument: current character pointer -Regurns: new character pointer +Returns: new character pointer */ static uschar * @@ -199,10 +198,10 @@ last_comment_position = s; while (*s) { int c, level; - while (isspace(*s)) s++; - if (*s != '(') break; + + if (Uskip_whitespace(&s) != '(') break; level = 1; - while((c = *(++s)) != 0) + while((c = *(++s))) { if (c == '(') level++; else if (c == ')') { if (--level <= 0) { s++; break; } } @@ -243,18 +242,17 @@ s = skip_comment(s); any character except [ ] \, including linear white space, and may contain quoted characters. However, RFC 821 restricts literals to being dot-separated 3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence -of digits and dots (hex digits and colons for IPv6) here; later this will be -checked for being a syntactically valid IP address if it ever gets to a router. +of digits, dots, hex digits, and colons here; later this will be checked for +being a syntactically valid IP address if it ever gets to a router. -If IPv6 is supported, allow both the formal form, with IPV6: at the start, and -the informal form without it, and accept IPV4: as well, 'cause someone will use -it sooner or later. */ +Allow both the formal IPv6 form, with IPV6: at the start, and the informal form +without it, and accept IPV4: as well, 'cause someone will use it sooner or +later. */ if (*s == '[') { *t++ = *s++; - #if HAVE_IPV6 if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0) { memcpy(t, s, 5); @@ -263,10 +261,6 @@ if (*s == '[') } while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++; - #else - while (*s == '.' || isdigit(*s)) *t++ = *s++; - #endif - if (*s == ']') *t++ = *s++; else { *errorptr = US"malformed domain literal"; @@ -428,10 +422,10 @@ for (;;) if (*s == '\"') { *t++ = '\"'; - while ((c = *(++s)) != 0 && c != '\"') + while ((c = *++s) && c != '\"') { *t++ = c; - if (c == '\\' && s[1] != 0) *t++ = *(++s); + if (c == '\\' && s[1]) *t++ = *++s; } if (c == '\"') { @@ -450,7 +444,7 @@ for (;;) else while (!mac_iscntrl_or_special(*s) || *s == '\\') { c = *t++ = *s++; - if (c == '\\' && *s != 0) *t++ = *s++; + if (c == '\\' && *s) *t++ = *s++; } /* Terminate the word and skip subsequent comment */ @@ -557,9 +551,7 @@ read_addr_spec(uschar *s, uschar *t, int term, uschar **errorptr, { s = read_local_part(s, t, errorptr, FALSE); if (*errorptr == NULL) - { if (*s != term) - { if (*s != '@') *errorptr = string_sprintf("\"@\" or \".\" expected after \"%s\"", t); else @@ -569,8 +561,6 @@ if (*errorptr == NULL) *domainptr = t; s = read_domain(s, t, errorptr); } - } - } return s; } @@ -602,10 +592,15 @@ which may appear in certain headers. If the flag parse_allow_group is set TRUE and parse_found_group is FALSE when this function is called, an address which is the start of a group (i.e. preceded by a phrase and a colon) is recognized; the phrase is ignored and the flag parse_found_group is set. If -this flag is TRUE at the end of an address, then if an extraneous semicolon is -found, it is ignored and the flag is cleared. This logic is used only when -scanning through addresses in headers, either to fulfil the -t option or for -rewriting or checking header syntax. +this flag is TRUE at the end of an address, and if an extraneous semicolon is +found, it is ignored and the flag is cleared. + +This logic is used only when scanning through addresses in headers, either to +fulfil the -t option, or for rewriting, or for checking header syntax. Because +the group "state" has to be remembered between multiple calls of this function, +the variables parse_{allow,found}_group are global. It is important to ensure +that they are reset to FALSE at the end of scanning a header's list of +addresses. Arguments: mailbox points to the RFC822 mailbox @@ -624,10 +619,10 @@ uschar * parse_extract_address(uschar *mailbox, uschar **errorptr, int *start, int *end, int *domain, BOOL allow_null) { -uschar *yield = store_get(Ustrlen(mailbox) + 1); +uschar *yield = store_get(Ustrlen(mailbox) + 1, is_tainted(mailbox)); uschar *startptr, *endptr; -uschar *s = (uschar *)mailbox; -uschar *t = (uschar *)yield; +uschar *s = US mailbox; +uschar *t = US yield; *domain = 0; @@ -644,7 +639,7 @@ RESTART: /* Come back here after passing a group name */ s = skip_comment(s); startptr = s; /* In case addr-spec */ s = read_local_part(s, t, errorptr, TRUE); /* Dot separated words */ -if (*errorptr != NULL) goto PARSE_FAILED; +if (*errorptr) goto PARSE_FAILED; /* If the terminator is neither < nor @ then the format of the address must either be a bare local-part (we are now at the end), or a phrase @@ -664,10 +659,10 @@ if (*s != '@' && *s != '<') end of string will produce a null local_part and therefore fail. We don't need to keep updating t, as the phrase isn't to be kept. */ - while (*s != '<' && (!parse_allow_group || *s != ':')) + while (*s != '<' && (!f.parse_allow_group || *s != ':')) { s = read_local_part(s, t, errorptr, FALSE); - if (*errorptr != NULL) + if (*errorptr) { *errorptr = string_sprintf("%s (expected word or \"<\")", *errorptr); goto PARSE_FAILED; @@ -676,8 +671,8 @@ if (*s != '@' && *s != '<') if (*s == ':') { - parse_found_group = TRUE; - parse_allow_group = FALSE; + f.parse_found_group = TRUE; + f.parse_allow_group = FALSE; s++; goto RESTART; } @@ -692,8 +687,8 @@ processing it. Note that this is "if" rather than "else if" because it's also used after reading a preceding phrase. There are a lot of broken sendmails out there that put additional pairs of <> -round s. If strip_excess_angle_brackets is set, allow any number of -them, as long as they match. */ +round s. If strip_excess_angle_brackets is set, allow a limited +number of them, as long as they match. */ if (*s == '<') { @@ -702,8 +697,11 @@ if (*s == '<') int bracket_count = 1; s++; - if (strip_excess_angle_brackets) - while (*s == '<') { bracket_count++; s++; } + if (strip_excess_angle_brackets) while (*s == '<') + { + if(bracket_count++ > 5) FAILED(US"angle-brackets nested too deep"); + s++; + } t = yield; startptr = s; @@ -717,7 +715,7 @@ if (*s == '<') if (*s == '@') { s = read_route(s, t, errorptr); - if (*errorptr != NULL) goto PARSE_FAILED; + if (*errorptr) goto PARSE_FAILED; *t = 0; /* Ensure route is ignored - probably overkill */ source_routed = TRUE; } @@ -735,7 +733,7 @@ if (*s == '<') else { s = read_addr_spec(s, t, '>', errorptr, &domainptr); - if (*errorptr != NULL) goto PARSE_FAILED; + if (*errorptr) goto PARSE_FAILED; *domain = domainptr - yield; if (source_routed && *domain == 0) FAILED(US"domain missing in source-routed address"); @@ -745,9 +743,10 @@ if (*s == '<') if (*errorptr != NULL) goto PARSE_FAILED; while (bracket_count-- > 0) if (*s++ != '>') { - *errorptr = (s[-1] == 0)? US"'>' missing at end of address" : - string_sprintf("malformed address: %.32s may not follow %.*s", - s-1, s - (uschar *)mailbox - 1, mailbox); + *errorptr = s[-1] == 0 + ? US"'>' missing at end of address" + : string_sprintf("malformed address: %.32s may not follow %.*s", + s-1, (int)(s - US mailbox - 1), mailbox); goto PARSE_FAILED; } @@ -792,21 +791,21 @@ move it back past white space if necessary. */ PARSE_SUCCEEDED: if (*s != 0) { - if (parse_found_group && *s == ';') + if (f.parse_found_group && *s == ';') { - parse_found_group = FALSE; - parse_allow_group = TRUE; + f.parse_found_group = FALSE; + f.parse_allow_group = TRUE; } else { *errorptr = string_sprintf("malformed address: %.32s may not follow %.*s", - s, s - (uschar *)mailbox, mailbox); + s, (int)(s - US mailbox), mailbox); goto PARSE_FAILED; } } -*start = startptr - (uschar *)mailbox; /* Return offsets */ +*start = startptr - US mailbox; /* Return offsets */ while (isspace(endptr[-1])) endptr--; -*end = endptr - (uschar *)mailbox; +*end = endptr - US mailbox; /* Although this code has no limitation on the length of address extracted, other parts of Exim may have limits, and in any case, RFC 2821 limits local @@ -819,17 +818,17 @@ if (*end - *start > ADDRESS_MAXLENGTH) return NULL; } -return (uschar *)yield; +return yield; /* Use goto (via the macro FAILED) to get to here from a variety of places. We might have an empty address in a group - the caller can choose to ignore this. We must, however, keep the flags correct. */ PARSE_FAILED: -if (parse_found_group && *s == ';') +if (f.parse_found_group && *s == ';') { - parse_found_group = FALSE; - parse_allow_group = TRUE; + f.parse_found_group = FALSE; + f.parse_allow_group = TRUE; } return NULL; } @@ -859,6 +858,8 @@ Arguments: charset the name of the character set; NULL => iso-8859-1 buffer the buffer to put the answer in buffer_size the size of the buffer + fold if TRUE, a newline is inserted before the separating space when + more than one encoded-word is generated Returns: pointer to the original string, if no quoting needed, or pointer to buffer containing the quoted string, or @@ -866,16 +867,17 @@ Returns: pointer to the original string, if no quoting needed, or the introduction */ -uschar * -parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer, - int buffer_size) +const uschar * +parse_quote_2047(const uschar *string, int len, uschar *charset, uschar *buffer, + int buffer_size, BOOL fold) { -uschar *s = string; +const uschar *s = string; uschar *p, *t; int hlen; BOOL coded = FALSE; +BOOL first_byte = FALSE; -if (charset == NULL) charset = US"iso-8859-1"; +if (!charset) charset = US"iso-8859-1"; /* We don't expect this to fail! */ @@ -891,10 +893,11 @@ for (; len > 0; len--) int ch = *s++; if (t > buffer + buffer_size - hlen - 8) break; - if (t - p > 70) + if ((t - p > 67) && !first_byte) { *t++ = '?'; *t++ = '='; + if (fold) *t++ = '\n'; *t++ = ' '; p = t; Ustrncpy(p, buffer, hlen); @@ -904,21 +907,26 @@ for (; len > 0; len--) if (ch < 33 || ch > 126 || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL) { - if (ch == ' ') *t++ = '_'; else + if (ch == ' ') + { + *t++ = '_'; + first_byte = FALSE; + } + else { - sprintf(CS t, "=%02X", ch); - while (*t != 0) t++; + t += sprintf(CS t, "=%02X", ch); coded = TRUE; + first_byte = !first_byte; } } - else *t++ = ch; + else { *t++ = ch; first_byte = FALSE; } } *t++ = '?'; *t++ = '='; *t = 0; -return coded? buffer : string; +return coded ? buffer : string; } @@ -977,12 +985,13 @@ Arguments: Returns: the fixed RFC822 phrase */ -uschar * -parse_fix_phrase(uschar *phrase, int len, uschar *buffer, int buffer_size) +const uschar * +parse_fix_phrase(const uschar *phrase, int len, uschar *buffer, int buffer_size) { int ch, i; BOOL quoted = FALSE; -uschar *s, *t, *end, *yield; +const uschar *s, *end; +uschar *t, *yield; while (len > 0 && isspace(*phrase)) { phrase++; len--; } if (len > buffer_size/4) return US"Name too long"; @@ -994,7 +1003,7 @@ for (i = 0, s = phrase; i < len; i++, s++) if ((*s < 32 && *s != '\t') || *s > 126) break; if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer, - buffer_size); + buffer_size, FALSE); /* No non-printers; use the RFC 822 quoting rules */ @@ -1111,7 +1120,7 @@ while (s < end) else if (ch == '(') { - uschar *ss = s; /* uschar after '(' */ + const uschar *ss = s; /* uschar after '(' */ int level = 1; while(ss < end) { @@ -1237,7 +1246,7 @@ Returns: FF_DELIVERED addresses extracted int parse_forward_list(uschar *s, int options, address_item **anchor, - uschar **error, uschar *incoming_domain, uschar *directory, + uschar **error, const uschar *incoming_domain, uschar *directory, error_block **syntax_errors) { int count = 0; @@ -1269,10 +1278,10 @@ for (;;) However, if the list is empty only because syntax errors were skipped, we return FF_DELIVERED. */ - if (*s == 0) + if (!*s) { - return (count > 0 || (syntax_errors != NULL && *syntax_errors != NULL))? - FF_DELIVERED : FF_NOTDELIVERED; + return (count > 0 || (syntax_errors && *syntax_errors)) + ? FF_DELIVERED : FF_NOTDELIVERED; /* This previous code returns FF_ERROR if nothing is generated but a syntax error has been skipped. I now think it is the wrong approach, but @@ -1388,7 +1397,7 @@ for (;;) if (flen <= 0) { - *error = string_sprintf("file name missing after :include:"); + *error = US"file name missing after :include:"; return FF_ERROR; } @@ -1403,7 +1412,7 @@ for (;;) /* Insist on absolute path */ - if (filename[0]!= '/') + if (filename[0] != '/') { *error = string_sprintf("included file \"%s\" is not an absolute path", filename); @@ -1412,15 +1421,22 @@ for (;;) /* Check if include is permitted */ - if ((options & RDO_INCLUDE) != 0) + if (options & RDO_INCLUDE) { *error = US"included files not permitted"; return FF_ERROR; } + if (is_tainted(filename)) + { + *error = string_sprintf("Tainted name '%s' for included file not permitted\n", + filename); + return FF_ERROR; + } + /* Check file name if required */ - if (directory != NULL) + if (directory) { int len = Ustrlen(directory); uschar *p = filename + len; @@ -1432,16 +1448,53 @@ for (;;) return FF_ERROR; } +#ifdef EXIM_HAVE_OPENAT + /* It is necessary to check that every component inside the directory + is NOT a symbolic link, in order to keep the file inside the directory. + This is mighty tedious. We open the directory and openat every component, + with a flag that fails symlinks. */ + + { + int fd = exim_open2(CS directory, O_RDONLY); + if (fd < 0) + { + *error = string_sprintf("failed to open directory %s", directory); + return FF_ERROR; + } + while (*p) + { + uschar temp; + int fd2; + uschar * q = p; + + while (*++p && *p != '/') ; + temp = *p; + *p = '\0'; + + fd2 = exim_openat(fd, CS q, O_RDONLY|O_NOFOLLOW); + close(fd); + *p = temp; + if (fd2 < 0) + { + *error = string_sprintf("failed to open %s (component of included " + "file); could be symbolic link", filename); + return FF_ERROR; + } + fd = fd2; + } + f = fdopen(fd, "rb"); + } +#else /* It is necessary to check that every component inside the directory is NOT a symbolic link, in order to keep the file inside the directory. This is mighty tedious. It is also not totally foolproof in that it leaves the possibility of a race attack, but I don't know how to do any better. */ - while (*p != 0) + while (*p) { int temp; - while (*(++p) != 0 && *p != '/'); + while (*++p && *p != '/'); temp = *p; *p = 0; if (Ulstat(filename, &statbuf) != 0) @@ -1461,11 +1514,16 @@ for (;;) return FF_ERROR; } } +#endif } - /* Open and stat the file */ +#ifdef EXIM_HAVE_OPENAT + else +#endif + /* Open and stat the file */ + f = Ufopen(filename, "rb"); - if ((f = Ufopen(filename, "rb")) == NULL) + if (!f) { *error = string_open_failed(errno, "included file %s", filename); return FF_INCLUDEFAIL; @@ -1475,13 +1533,13 @@ for (;;) { *error = string_sprintf("failed to stat included file %s: %s", filename, strerror(errno)); - fclose(f); + (void)fclose(f); return FF_INCLUDEFAIL; } /* If directory was checked, double check that we opened a regular file */ - if (directory != NULL && (statbuf.st_mode & S_IFMT) != S_IFREG) + if (directory && (statbuf.st_mode & S_IFMT) != S_IFREG) { *error = string_sprintf("included file %s is not a regular file in " "the %s directory", filename, directory); @@ -1497,26 +1555,25 @@ for (;;) return FF_ERROR; } - filebuf = store_get(statbuf.st_size + 1); + filebuf = store_get(statbuf.st_size + 1, is_tainted(filename)); if (fread(filebuf, 1, statbuf.st_size, f) != statbuf.st_size) { *error = string_sprintf("error while reading included file %s: %s", filename, strerror(errno)); - fclose(f); + (void)fclose(f); return FF_ERROR; } filebuf[statbuf.st_size] = 0; - fclose(f); + (void)fclose(f); addr = NULL; frc = parse_forward_list(filebuf, options, &addr, error, incoming_domain, directory, syntax_errors); if (frc != FF_DELIVERED && frc != FF_NOTDELIVERED) return frc; - if (addr != NULL) + if (addr) { - last = addr; - while (last->next != NULL) { count++; last = last->next; } + for (last = addr; last->next; last = last->next) count++; last->next = *anchor; *anchor = addr; count++; @@ -1558,14 +1615,14 @@ for (;;) { recipient = parse_extract_address(s+1, error, &start, &end, &domain, FALSE); - if (recipient != NULL) - recipient = (domain != 0)? NULL : + if (recipient) + recipient = domain != 0 ? NULL : string_sprintf("%s@%s", recipient, incoming_domain); } /* Try parsing the item as an address. */ - if (recipient == NULL) recipient = + if (!recipient) recipient = parse_extract_address(s, error, &start, &end, &domain, FALSE); /* If item starts with / or | and is not a valid address, or there @@ -1574,7 +1631,7 @@ for (;;) if ((*s == '|' || *s == '/') && (recipient == NULL || domain == 0)) { - uschar *t = store_get(Ustrlen(s) + 1); + uschar *t = store_get(Ustrlen(s) + 1, is_tainted(s)); uschar *p = t; uschar *q = s; while (*q != 0) @@ -1613,7 +1670,7 @@ for (;;) if (syntax_errors != NULL) { - error_block *e = store_get(sizeof(error_block)); + error_block *e = store_get(sizeof(error_block), FALSE); error_block *last = *syntax_errors; if (last == NULL) *syntax_errors = e; else { @@ -1661,6 +1718,379 @@ for (;;) } +/************************************************* +* Extract a Message-ID * +*************************************************/ + +/* This function is used to extract message ids from In-Reply-To: and +References: header lines. + +Arguments: + str pointer to the start of the message-id + yield put pointer to the message id (in dynamic memory) here + error put error message here on failure + +Returns: points after the processed message-id or NULL on error +*/ + +uschar * +parse_message_id(uschar *str, uschar **yield, uschar **error) +{ +uschar *domain = NULL; +uschar *id; +rmark reset_point; + +str = skip_comment(str); +if (*str != '<') + { + *error = US"Missing '<' before message-id"; + return NULL; + } + +/* Getting a block the size of the input string will definitely be sufficient +for the answer, but it may also be very long if we are processing a header +line. Therefore, take care to release unwanted store afterwards. */ + +reset_point = store_mark(); +id = *yield = store_get(Ustrlen(str) + 1, is_tainted(str)); +*id++ = *str++; + +str = read_addr_spec(str, id, '>', error, &domain); + +if (!*error) + { + if (*str != '>') *error = US"Missing '>' after message-id"; + else if (domain == NULL) *error = US"domain missing in message-id"; + } + +if (*error) + { + store_reset(reset_point); + return NULL; + } + +while (*id) id++; +*id++ = *str++; +*id++ = 0; +store_release_above(id); + +str = skip_comment(str); +return str; +} + + +/************************************************* +* Parse a fixed digit number * +*************************************************/ + +/* Parse a string containing an ASCII encoded fixed digits number + +Arguments: + str pointer to the start of the ASCII encoded number + n pointer to the resulting value + digits number of required digits + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_number(uschar *str, int *n, int digits) +{ + *n=0; + while (digits--) + { + if (*str<'0' || *str>'9') return NULL; + *n=10*(*n)+(*str++-'0'); + } + return str; +} + + +/************************************************* +* Parse a RFC 2822 day of week * +*************************************************/ + +/* Parse the day of the week from a RFC 2822 date, but do not + decode it, because it is only for humans. + +Arguments: + str pointer to the start of the day of the week + +Returns: points after the parsed day or NULL on error +*/ + +static uschar * +parse_day_of_week(uschar *str) +{ +/* +day-of-week = ([FWS] day-name) / obs-day-of-week + +day-name = "Mon" / "Tue" / "Wed" / "Thu" / + "Fri" / "Sat" / "Sun" + +obs-day-of-week = [CFWS] day-name [CFWS] +*/ + +static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" }; +int i; +uschar day[4]; + +str=skip_comment(str); +for (i=0; i<3; ++i) + { + if ((day[i]=tolower(*str))=='\0') return NULL; + ++str; + } +day[3]='\0'; +for (i=0; i<7; ++i) if (Ustrcmp(day,day_name[i])==0) break; +if (i==7) return NULL; +str=skip_comment(str); +return str; +} + + +/************************************************* +* Parse a RFC 2822 date * +*************************************************/ + +/* Parse the date part of a RFC 2822 date-time, extracting the + day, month and year. + +Arguments: + str pointer to the start of the date + d pointer to the resulting day + m pointer to the resulting month + y pointer to the resulting year + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_date(uschar *str, int *d, int *m, int *y) +{ +/* +date = day month year + +year = 4*DIGIT / obs-year + +obs-year = [CFWS] 2*DIGIT [CFWS] + +month = (FWS month-name FWS) / obs-month + +month-name = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" + +obs-month = CFWS month-name CFWS + +day = ([FWS] 1*2DIGIT) / obs-day + +obs-day = [CFWS] 1*2DIGIT [CFWS] +*/ + +uschar *c,*n; +static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" }; +int i; +uschar month[4]; + +str=skip_comment(str); +if ((str=parse_number(str,d,1))==NULL) return NULL; +if (*str>='0' && *str<='9') *d=10*(*d)+(*str++-'0'); +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +for (i=0; i<3; ++i) if ((month[i]=tolower(*(str+i)))=='\0') return NULL; +month[3]='\0'; +for (i=0; i<12; ++i) if (Ustrcmp(month,month_name[i])==0) break; +if (i==12) return NULL; +str+=3; +*m=i; +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +if ((n=parse_number(str,y,4))) + { + str=n; + if (*y<1900) return NULL; + *y=*y-1900; + } +else if ((n=parse_number(str,y,2))) + { + str=skip_comment(n); + while (*(str-1)==' ' || *(str-1)=='\t') --str; /* match last FWS later */ + if (*y<50) *y+=100; + } +else return NULL; +return str; +} + + +/************************************************* +* Parse a RFC 2822 Time * +*************************************************/ + +/* Parse the time part of a RFC 2822 date-time, extracting the + hour, minute, second and timezone. + +Arguments: + str pointer to the start of the time + h pointer to the resulting hour + m pointer to the resulting minute + s pointer to the resulting second + z pointer to the resulting timezone (offset in seconds) + +Returns: points after the processed time or NULL on error +*/ + +static uschar * +parse_time(uschar *str, int *h, int *m, int *s, int *z) +{ +/* +time = time-of-day FWS zone + +time-of-day = hour ":" minute [ ":" second ] + +hour = 2DIGIT / obs-hour + +obs-hour = [CFWS] 2DIGIT [CFWS] + +minute = 2DIGIT / obs-minute + +obs-minute = [CFWS] 2DIGIT [CFWS] + +second = 2DIGIT / obs-second + +obs-second = [CFWS] 2DIGIT [CFWS] + +zone = (( "+" / "-" ) 4DIGIT) / obs-zone + +obs-zone = "UT" / "GMT" / ; Universal Time + ; North American UT + ; offsets + "EST" / "EDT" / ; Eastern: - 5/ - 4 + "CST" / "CDT" / ; Central: - 6/ - 5 + "MST" / "MDT" / ; Mountain: - 7/ - 6 + "PST" / "PDT" / ; Pacific: - 8/ - 7 + + %d65-73 / ; Military zones - "A" + %d75-90 / ; through "I" and "K" + %d97-105 / ; through "Z", both + %d107-122 ; upper and lower case +*/ + +uschar *c; + +str=skip_comment(str); +if ((str=parse_number(str,h,2))==NULL) return NULL; +str=skip_comment(str); +if (*str!=':') return NULL; +++str; +str=skip_comment(str); +if ((str=parse_number(str,m,2))==NULL) return NULL; +c=skip_comment(str); +if (*str==':') + { + ++str; + str=skip_comment(str); + if ((str=parse_number(str,s,2))==NULL) return NULL; + c=skip_comment(str); + } +if (c==str) return NULL; +else str=c; +if (*str=='+' || *str=='-') + { + int neg; + + neg=(*str=='-'); + ++str; + if ((str=parse_number(str,z,4))==NULL) return NULL; + *z=(*z/100)*3600+(*z%100)*60; + if (neg) *z=-*z; + } +else + { + char zone[5]; + struct { const char *name; int off; } zone_name[10]= + { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}}; + int i,j; + + for (i=0; i<4; ++i) + { + zone[i]=tolower(*(str+i)); + if (zone[i]<'a' || zone[i]>'z') break; + } + zone[i]='\0'; + for (j=0; j<10 && strcmp(zone,zone_name[j].name); ++j); + /* Besides zones named in the grammar, RFC 2822 says other alphabetic */ + /* time zones should be treated as unknown offsets. */ + if (j<10) + { + *z=zone_name[j].off*3600; + str+=i; + } + else if (zone[0]<'a' || zone[1]>'z') return 0; + else + { + while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str; + *z=0; + } + } +return str; +} + + +/************************************************* +* Parse a RFC 2822 date-time * +*************************************************/ + +/* Parse a RFC 2822 date-time and return it in seconds since the epoch. + +Arguments: + str pointer to the start of the date-time + t pointer to the parsed time + +Returns: points after the processed date-time or NULL on error +*/ + +uschar * +parse_date_time(uschar *str, time_t *t) +{ +/* +date-time = [ day-of-week "," ] date FWS time [CFWS] +*/ + +struct tm tm; +int zone; +extern char **environ; +char **old_environ; +static char gmt0[]="TZ=GMT0"; +static char *gmt_env[]={ gmt0, (char*)0 }; +uschar *try; + +if ((try=parse_day_of_week(str))) + { + str=try; + if (*str!=',') return 0; + ++str; + } +if ((str=parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year))==NULL) return NULL; +if (*str!=' ' && *str!='\t') return NULL; +while (*str==' ' || *str=='\t') ++str; +if ((str=parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone))==NULL) return NULL; +tm.tm_isdst=0; +old_environ=environ; +environ=gmt_env; +*t=mktime(&tm); +environ=old_environ; +if (*t==-1) return NULL; +*t-=zone; +str=skip_comment(str); +return str; +} + + + + /************************************************* ************************************************** * Stand-alone test program * @@ -1698,7 +2128,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) buffer[Ustrlen(buffer) - 1] = 0; if (buffer[0] == 0) break; out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE); - if (out == NULL) printf("*** bad address: %s\n", errmess); else + if (!out) + printf("*** bad address: %s\n", errmess); + else { uschar extract[1024]; Ustrncpy(extract, buffer+start, end-start); @@ -1717,7 +2149,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) buffer[Ustrlen(buffer) - 1] = 0; if (buffer[0] == 0) break; out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE); - if (out == NULL) printf("*** bad address: %s\n", errmess); else + if (!out) + printf("*** bad address: %s\n", errmess); + else { uschar extract[1024]; Ustrncpy(extract, buffer+start, end-start); @@ -1729,7 +2163,7 @@ allow_utf8_domains = FALSE; printf("Testing parse_extract_address with group syntax\n"); -parse_allow_group = TRUE; +f.parse_allow_group = TRUE; while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) { uschar *out; @@ -1738,7 +2172,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) buffer[Ustrlen(buffer) - 1] = 0; if (buffer[0] == 0) break; s = buffer; - while (*s != 0) + while (*s) { uschar *ss = parse_find_address_end(s, FALSE); int terminator = *ss; @@ -1746,7 +2180,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE); *ss = terminator; - if (out == NULL) printf("*** bad address: %s\n", errmess); else + if (!out) + printf("*** bad address: %s\n", errmess); + else { uschar extract[1024]; Ustrncpy(extract, buffer+start, end-start); @@ -1755,7 +2191,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) } s = ss + (terminator? 1:0); - while (isspace(*s)) s++; + Uskip_whitespace(&s); } } @@ -1793,6 +2229,26 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) else printf("Failed: %d %s\n", extracted, errmess); } +printf("Testing parse_message_id\n"); + +while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) + { + uschar *s, *t, *errmess; + buffer[Ustrlen(buffer) - 1] = 0; + if (buffer[0] == 0) break; + s = buffer; + while (*s != 0) + { + s = parse_message_id(s, &t, &errmess); + if (errmess != NULL) + { + printf("Failed: %s\n", errmess); + break; + } + printf("%s\n", t); + } + } + return 0; }