X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/059ec3d9952740285fb1ebf47961b8aca2eb1b4a..261dc43e32f6039781ca92535e56f5caaa68b809:/src/src/parse.c diff --git a/src/src/parse.c b/src/src/parse.c index e42e0c8a8..09fcf98fe 100644 --- a/src/src/parse.c +++ b/src/src/parse.c @@ -1,10 +1,10 @@ -/* $Cambridge: exim/src/src/parse.c,v 1.1 2004/10/07 10:39:01 ph10 Exp $ */ +/* $Cambridge: exim/src/src/parse.c,v 1.15 2009/11/16 19:50:37 nm4 Exp $ */ /************************************************* * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) University of Cambridge 1995 - 2004 */ +/* Copyright (c) University of Cambridge 1995 - 2009 */ /* See the file NOTICE for conditions of use and distribution. */ /* Functions for parsing addresses */ @@ -243,18 +243,17 @@ s = skip_comment(s); any character except [ ] \, including linear white space, and may contain quoted characters. However, RFC 821 restricts literals to being dot-separated 3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence -of digits and dots (hex digits and colons for IPv6) here; later this will be -checked for being a syntactically valid IP address if it ever gets to a router. +of digits, dots, hex digits, and colons here; later this will be checked for +being a syntactically valid IP address if it ever gets to a router. -If IPv6 is supported, allow both the formal form, with IPV6: at the start, and -the informal form without it, and accept IPV4: as well, 'cause someone will use -it sooner or later. */ +Allow both the formal IPv6 form, with IPV6: at the start, and the informal form +without it, and accept IPV4: as well, 'cause someone will use it sooner or +later. */ if (*s == '[') { *t++ = *s++; - #if HAVE_IPV6 if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0) { memcpy(t, s, 5); @@ -263,10 +262,6 @@ if (*s == '[') } while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++; - #else - while (*s == '.' || isdigit(*s)) *t++ = *s++; - #endif - if (*s == ']') *t++ = *s++; else { *errorptr = US"malformed domain literal"; @@ -602,10 +597,15 @@ which may appear in certain headers. If the flag parse_allow_group is set TRUE and parse_found_group is FALSE when this function is called, an address which is the start of a group (i.e. preceded by a phrase and a colon) is recognized; the phrase is ignored and the flag parse_found_group is set. If -this flag is TRUE at the end of an address, then if an extraneous semicolon is -found, it is ignored and the flag is cleared. This logic is used only when -scanning through addresses in headers, either to fulfil the -t option or for -rewriting or checking header syntax. +this flag is TRUE at the end of an address, and if an extraneous semicolon is +found, it is ignored and the flag is cleared. + +This logic is used only when scanning through addresses in headers, either to +fulfil the -t option, or for rewriting, or for checking header syntax. Because +the group "state" has to be remembered between multiple calls of this function, +the variables parse_{allow,found}_group are global. It is important to ensure +that they are reset to FALSE at the end of scanning a header's list of +addresses. Arguments: mailbox points to the RFC822 mailbox @@ -847,6 +847,11 @@ If the only characters that strictly need quoting are spaces, we return the original string, unmodified. If a quoted string is too long for the buffer, it is truncated. (This shouldn't happen: this is normally handling short strings.) +Hmmph. As always, things get perverted for other uses. This function was +originally for the "phrase" part of addresses. Now it is being used for much +longer texts in ACLs and via the ${rfc2047: expansion item. This means we have +to check for overlong "encoded-word"s and split them. November 2004. + Arguments: string the string to quote - already checked to contain non-printing chars @@ -854,6 +859,8 @@ Arguments: charset the name of the character set; NULL => iso-8859-1 buffer the buffer to put the answer in buffer_size the size of the buffer + fold if TRUE, a newline is inserted before the separating space when + more than one encoded-word is generated Returns: pointer to the original string, if no quoting needed, or pointer to buffer containing the quoted string, or @@ -863,10 +870,11 @@ Returns: pointer to the original string, if no quoting needed, or uschar * parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer, - int buffer_size) + int buffer_size, BOOL fold) { uschar *s = string; -uschar *t; +uschar *p, *t; +int hlen; BOOL coded = FALSE; if (charset == NULL) charset = US"iso-8859-1"; @@ -876,11 +884,26 @@ if (charset == NULL) charset = US"iso-8859-1"; if (!string_format(buffer, buffer_size, "=?%s?Q?", charset)) return US"String too long"; -t = buffer + Ustrlen(buffer); +hlen = Ustrlen(buffer); +t = buffer + hlen; +p = buffer; + for (; len > 0; len--) { int ch = *s++; - if (t > buffer + buffer_size - 8) break; + if (t > buffer + buffer_size - hlen - 8) break; + + if (t - p > 70) + { + *t++ = '?'; + *t++ = '='; + if (fold) *t++ = '\n'; + *t++ = ' '; + p = t; + Ustrncpy(p, buffer, hlen); + t += hlen; + } + if (ch < 33 || ch > 126 || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL) { @@ -893,7 +916,11 @@ for (; len > 0; len--) } else *t++ = ch; } -sprintf(CS t, "?="); + +*t++ = '?'; +*t++ = '='; +*t = 0; + return coded? buffer : string; } @@ -970,7 +997,7 @@ for (i = 0, s = phrase; i < len; i++, s++) if ((*s < 32 && *s != '\t') || *s > 126) break; if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer, - buffer_size); + buffer_size, FALSE); /* No non-printers; use the RFC 822 quoting rules */ @@ -1451,7 +1478,7 @@ for (;;) { *error = string_sprintf("failed to stat included file %s: %s", filename, strerror(errno)); - fclose(f); + (void)fclose(f); return FF_INCLUDEFAIL; } @@ -1478,11 +1505,11 @@ for (;;) { *error = string_sprintf("error while reading included file %s: %s", filename, strerror(errno)); - fclose(f); + (void)fclose(f); return FF_ERROR; } filebuf[statbuf.st_size] = 0; - fclose(f); + (void)fclose(f); addr = NULL; frc = parse_forward_list(filebuf, options, &addr, @@ -1637,6 +1664,377 @@ for (;;) } +/************************************************* +* Extract a Message-ID * +*************************************************/ + +/* This function is used to extract message ids from In-Reply-To: and +References: header lines. + +Arguments: + str pointer to the start of the message-id + yield put pointer to the message id (in dynamic memory) here + error put error message here on failure + +Returns: points after the processed message-id or NULL on error +*/ + +uschar * +parse_message_id(uschar *str, uschar **yield, uschar **error) +{ +uschar *domain = NULL; +uschar *id; + +str = skip_comment(str); +if (*str != '<') + { + *error = US"Missing '<' before message-id"; + return NULL; + } + +/* Getting a block the size of the input string will definitely be sufficient +for the answer, but it may also be very long if we are processing a header +line. Therefore, take care to release unwanted store afterwards. */ + +id = *yield = store_get(Ustrlen(str) + 1); +*id++ = *str++; + +str = read_addr_spec(str, id, '>', error, &domain); + +if (*error == NULL) + { + if (*str != '>') *error = US"Missing '>' after message-id"; + else if (domain == NULL) *error = US"domain missing in message-id"; + } + +if (*error != NULL) + { + store_reset(*yield); + return NULL; + } + +while (*id != 0) id++; +*id++ = *str++; +*id++ = 0; +store_reset(id); + +str = skip_comment(str); +return str; +} + + +/************************************************* +* Parse a fixed digit number * +*************************************************/ + +/* Parse a string containing an ASCII encoded fixed digits number + +Arguments: + str pointer to the start of the ASCII encoded number + n pointer to the resulting value + digits number of required digits + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_number(uschar *str, int *n, int digits) +{ + *n=0; + while (digits--) + { + if (*str<'0' || *str>'9') return NULL; + *n=10*(*n)+(*str++-'0'); + } + return str; +} + + +/************************************************* +* Parse a RFC 2822 day of week * +*************************************************/ + +/* Parse the day of the week from a RFC 2822 date, but do not + decode it, because it is only for humans. + +Arguments: + str pointer to the start of the day of the week + +Returns: points after the parsed day or NULL on error +*/ + +static uschar * +parse_day_of_week(uschar *str) +{ +/* +day-of-week = ([FWS] day-name) / obs-day-of-week + +day-name = "Mon" / "Tue" / "Wed" / "Thu" / + "Fri" / "Sat" / "Sun" + +obs-day-of-week = [CFWS] day-name [CFWS] +*/ + +static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" }; +int i; +uschar day[4]; + +str=skip_comment(str); +for (i=0; i<3; ++i) + { + if ((day[i]=tolower(*str))=='\0') return NULL; + ++str; + } +day[3]='\0'; +for (i=0; i<7; ++i) if (Ustrcmp(day,day_name[i])==0) break; +if (i==7) return NULL; +str=skip_comment(str); +return str; +} + + +/************************************************* +* Parse a RFC 2822 date * +*************************************************/ + +/* Parse the date part of a RFC 2822 date-time, extracting the + day, month and year. + +Arguments: + str pointer to the start of the date + d pointer to the resulting day + m pointer to the resulting month + y pointer to the resulting year + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_date(uschar *str, int *d, int *m, int *y) +{ +/* +date = day month year + +year = 4*DIGIT / obs-year + +obs-year = [CFWS] 2*DIGIT [CFWS] + +month = (FWS month-name FWS) / obs-month + +month-name = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" + +obs-month = CFWS month-name CFWS + +day = ([FWS] 1*2DIGIT) / obs-day + +obs-day = [CFWS] 1*2DIGIT [CFWS] +*/ + +uschar *c,*n; +static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" }; +int i; +uschar month[4]; + +str=skip_comment(str); +if ((str=parse_number(str,d,1))==NULL) return NULL; +if (*str>='0' && *str<='9') *d=10*(*d)+(*str++-'0'); +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +for (i=0; i<3; ++i) if ((month[i]=tolower(*(str+i)))=='\0') return NULL; +month[3]='\0'; +for (i=0; i<12; ++i) if (Ustrcmp(month,month_name[i])==0) break; +if (i==12) return NULL; +str+=3; +*m=i; +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +if ((n=parse_number(str,y,4))) + { + str=n; + if (*y<1900) return NULL; + *y=*y-1900; + } +else if ((n=parse_number(str,y,2))) + { + str=skip_comment(n); + while (*(str-1)==' ' || *(str-1)=='\t') --str; /* match last FWS later */ + if (*y<50) *y+=100; + } +else return NULL; +return str; +} + + +/************************************************* +* Parse a RFC 2822 Time * +*************************************************/ + +/* Parse the time part of a RFC 2822 date-time, extracting the + hour, minute, second and timezone. + +Arguments: + str pointer to the start of the time + h pointer to the resulting hour + m pointer to the resulting minute + s pointer to the resulting second + z pointer to the resulting timezone (offset in seconds) + +Returns: points after the processed time or NULL on error +*/ + +static uschar * +parse_time(uschar *str, int *h, int *m, int *s, int *z) +{ +/* +time = time-of-day FWS zone + +time-of-day = hour ":" minute [ ":" second ] + +hour = 2DIGIT / obs-hour + +obs-hour = [CFWS] 2DIGIT [CFWS] + +minute = 2DIGIT / obs-minute + +obs-minute = [CFWS] 2DIGIT [CFWS] + +second = 2DIGIT / obs-second + +obs-second = [CFWS] 2DIGIT [CFWS] + +zone = (( "+" / "-" ) 4DIGIT) / obs-zone + +obs-zone = "UT" / "GMT" / ; Universal Time + ; North American UT + ; offsets + "EST" / "EDT" / ; Eastern: - 5/ - 4 + "CST" / "CDT" / ; Central: - 6/ - 5 + "MST" / "MDT" / ; Mountain: - 7/ - 6 + "PST" / "PDT" / ; Pacific: - 8/ - 7 + + %d65-73 / ; Military zones - "A" + %d75-90 / ; through "I" and "K" + %d97-105 / ; through "Z", both + %d107-122 ; upper and lower case +*/ + +uschar *c; + +str=skip_comment(str); +if ((str=parse_number(str,h,2))==NULL) return NULL; +str=skip_comment(str); +if (*str!=':') return NULL; +++str; +str=skip_comment(str); +if ((str=parse_number(str,m,2))==NULL) return NULL; +c=skip_comment(str); +if (*str==':') + { + ++str; + str=skip_comment(str); + if ((str=parse_number(str,s,2))==NULL) return NULL; + c=skip_comment(str); + } +if (c==str) return NULL; +else str=c; +if (*str=='+' || *str=='-') + { + int neg; + + neg=(*str=='-'); + ++str; + if ((str=parse_number(str,z,4))==NULL) return NULL; + *z=(*z/100)*3600+(*z%100)*60; + if (neg) *z=-*z; + } +else + { + char zone[5]; + struct { const char *name; int off; } zone_name[10]= + { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}}; + int i,j; + + for (i=0; i<4; ++i) + { + zone[i]=tolower(*(str+i)); + if (zone[i]<'a' || zone[i]>'z') break; + } + zone[i]='\0'; + for (j=0; j<10 && strcmp(zone,zone_name[j].name); ++j); + /* Besides zones named in the grammar, RFC 2822 says other alphabetic */ + /* time zones should be treated as unknown offsets. */ + if (j<10) + { + *z=zone_name[j].off*3600; + str+=i; + } + else if (zone[0]<'a' || zone[1]>'z') return 0; + else + { + while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str; + *z=0; + } + } +return str; +} + + +/************************************************* +* Parse a RFC 2822 date-time * +*************************************************/ + +/* Parse a RFC 2822 date-time and return it in seconds since the epoch. + +Arguments: + str pointer to the start of the date-time + t pointer to the parsed time + +Returns: points after the processed date-time or NULL on error +*/ + +uschar * +parse_date_time(uschar *str, time_t *t) +{ +/* +date-time = [ day-of-week "," ] date FWS time [CFWS] +*/ + +struct tm tm; +int zone; +extern char **environ; +char **old_environ; +static char gmt0[]="TZ=GMT0"; +static char *gmt_env[]={ gmt0, (char*)0 }; +uschar *try; + +if ((try=parse_day_of_week(str))) + { + str=try; + if (*str!=',') return 0; + ++str; + } +if ((str=parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year))==NULL) return NULL; +if (*str!=' ' && *str!='\t') return NULL; +while (*str==' ' || *str=='\t') ++str; +if ((str=parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone))==NULL) return NULL; +tm.tm_isdst=0; +old_environ=environ; +environ=gmt_env; +*t=mktime(&tm); +environ=old_environ; +if (*t==-1) return NULL; +*t-=zone; +str=skip_comment(str); +return str; +} + + + + /************************************************* ************************************************** * Stand-alone test program * @@ -1769,6 +2167,26 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) else printf("Failed: %d %s\n", extracted, errmess); } +printf("Testing parse_message_id\n"); + +while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) + { + uschar *s, *t, *errmess; + buffer[Ustrlen(buffer) - 1] = 0; + if (buffer[0] == 0) break; + s = buffer; + while (*s != 0) + { + s = parse_message_id(s, &t, &errmess); + if (errmess != NULL) + { + printf("Failed: %s\n", errmess); + break; + } + printf("%s\n", t); + } + } + return 0; }