X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/309bd837529724b7574e2b0b7bdaf1a271137199..9d9c374678ae4b04869c90bc5980acfcfb68c336:/src/src/parse.c diff --git a/src/src/parse.c b/src/src/parse.c index c4936ca94..94e42c182 100644 --- a/src/src/parse.c +++ b/src/src/parse.c @@ -1,10 +1,8 @@ -/* $Cambridge: exim/src/src/parse.c,v 1.6 2006/02/03 15:26:54 ph10 Exp $ */ - /************************************************* * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) University of Cambridge 1995 - 2005 */ +/* Copyright (c) University of Cambridge 1995 - 2009 */ /* See the file NOTICE for conditions of use and distribution. */ /* Functions for parsing addresses */ @@ -597,10 +595,15 @@ which may appear in certain headers. If the flag parse_allow_group is set TRUE and parse_found_group is FALSE when this function is called, an address which is the start of a group (i.e. preceded by a phrase and a colon) is recognized; the phrase is ignored and the flag parse_found_group is set. If -this flag is TRUE at the end of an address, then if an extraneous semicolon is -found, it is ignored and the flag is cleared. This logic is used only when -scanning through addresses in headers, either to fulfil the -t option or for -rewriting or checking header syntax. +this flag is TRUE at the end of an address, and if an extraneous semicolon is +found, it is ignored and the flag is cleared. + +This logic is used only when scanning through addresses in headers, either to +fulfil the -t option, or for rewriting, or for checking header syntax. Because +the group "state" has to be remembered between multiple calls of this function, +the variables parse_{allow,found}_group are global. It is important to ensure +that they are reset to FALSE at the end of scanning a header's list of +addresses. Arguments: mailbox points to the RFC822 mailbox @@ -854,6 +857,8 @@ Arguments: charset the name of the character set; NULL => iso-8859-1 buffer the buffer to put the answer in buffer_size the size of the buffer + fold if TRUE, a newline is inserted before the separating space when + more than one encoded-word is generated Returns: pointer to the original string, if no quoting needed, or pointer to buffer containing the quoted string, or @@ -863,12 +868,13 @@ Returns: pointer to the original string, if no quoting needed, or uschar * parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer, - int buffer_size) + int buffer_size, BOOL fold) { uschar *s = string; uschar *p, *t; int hlen; BOOL coded = FALSE; +BOOL first_byte = FALSE; if (charset == NULL) charset = US"iso-8859-1"; @@ -886,10 +892,11 @@ for (; len > 0; len--) int ch = *s++; if (t > buffer + buffer_size - hlen - 8) break; - if (t - p > 70) + if ((t - p > 67) && !first_byte) { *t++ = '?'; *t++ = '='; + if (fold) *t++ = '\n'; *t++ = ' '; p = t; Ustrncpy(p, buffer, hlen); @@ -899,14 +906,20 @@ for (; len > 0; len--) if (ch < 33 || ch > 126 || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL) { - if (ch == ' ') *t++ = '_'; else + if (ch == ' ') + { + *t++ = '_'; + first_byte = FALSE; + } + else { sprintf(CS t, "=%02X", ch); while (*t != 0) t++; coded = TRUE; + first_byte = !first_byte; } } - else *t++ = ch; + else { *t++ = ch; first_byte = FALSE; } } *t++ = '?'; @@ -989,7 +1002,7 @@ for (i = 0, s = phrase; i < len; i++, s++) if ((*s < 32 && *s != '\t') || *s > 126) break; if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer, - buffer_size); + buffer_size, FALSE); /* No non-printers; use the RFC 822 quoting rules */ @@ -1656,6 +1669,377 @@ for (;;) } +/************************************************* +* Extract a Message-ID * +*************************************************/ + +/* This function is used to extract message ids from In-Reply-To: and +References: header lines. + +Arguments: + str pointer to the start of the message-id + yield put pointer to the message id (in dynamic memory) here + error put error message here on failure + +Returns: points after the processed message-id or NULL on error +*/ + +uschar * +parse_message_id(uschar *str, uschar **yield, uschar **error) +{ +uschar *domain = NULL; +uschar *id; + +str = skip_comment(str); +if (*str != '<') + { + *error = US"Missing '<' before message-id"; + return NULL; + } + +/* Getting a block the size of the input string will definitely be sufficient +for the answer, but it may also be very long if we are processing a header +line. Therefore, take care to release unwanted store afterwards. */ + +id = *yield = store_get(Ustrlen(str) + 1); +*id++ = *str++; + +str = read_addr_spec(str, id, '>', error, &domain); + +if (*error == NULL) + { + if (*str != '>') *error = US"Missing '>' after message-id"; + else if (domain == NULL) *error = US"domain missing in message-id"; + } + +if (*error != NULL) + { + store_reset(*yield); + return NULL; + } + +while (*id != 0) id++; +*id++ = *str++; +*id++ = 0; +store_reset(id); + +str = skip_comment(str); +return str; +} + + +/************************************************* +* Parse a fixed digit number * +*************************************************/ + +/* Parse a string containing an ASCII encoded fixed digits number + +Arguments: + str pointer to the start of the ASCII encoded number + n pointer to the resulting value + digits number of required digits + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_number(uschar *str, int *n, int digits) +{ + *n=0; + while (digits--) + { + if (*str<'0' || *str>'9') return NULL; + *n=10*(*n)+(*str++-'0'); + } + return str; +} + + +/************************************************* +* Parse a RFC 2822 day of week * +*************************************************/ + +/* Parse the day of the week from a RFC 2822 date, but do not + decode it, because it is only for humans. + +Arguments: + str pointer to the start of the day of the week + +Returns: points after the parsed day or NULL on error +*/ + +static uschar * +parse_day_of_week(uschar *str) +{ +/* +day-of-week = ([FWS] day-name) / obs-day-of-week + +day-name = "Mon" / "Tue" / "Wed" / "Thu" / + "Fri" / "Sat" / "Sun" + +obs-day-of-week = [CFWS] day-name [CFWS] +*/ + +static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" }; +int i; +uschar day[4]; + +str=skip_comment(str); +for (i=0; i<3; ++i) + { + if ((day[i]=tolower(*str))=='\0') return NULL; + ++str; + } +day[3]='\0'; +for (i=0; i<7; ++i) if (Ustrcmp(day,day_name[i])==0) break; +if (i==7) return NULL; +str=skip_comment(str); +return str; +} + + +/************************************************* +* Parse a RFC 2822 date * +*************************************************/ + +/* Parse the date part of a RFC 2822 date-time, extracting the + day, month and year. + +Arguments: + str pointer to the start of the date + d pointer to the resulting day + m pointer to the resulting month + y pointer to the resulting year + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_date(uschar *str, int *d, int *m, int *y) +{ +/* +date = day month year + +year = 4*DIGIT / obs-year + +obs-year = [CFWS] 2*DIGIT [CFWS] + +month = (FWS month-name FWS) / obs-month + +month-name = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" + +obs-month = CFWS month-name CFWS + +day = ([FWS] 1*2DIGIT) / obs-day + +obs-day = [CFWS] 1*2DIGIT [CFWS] +*/ + +uschar *c,*n; +static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" }; +int i; +uschar month[4]; + +str=skip_comment(str); +if ((str=parse_number(str,d,1))==NULL) return NULL; +if (*str>='0' && *str<='9') *d=10*(*d)+(*str++-'0'); +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +for (i=0; i<3; ++i) if ((month[i]=tolower(*(str+i)))=='\0') return NULL; +month[3]='\0'; +for (i=0; i<12; ++i) if (Ustrcmp(month,month_name[i])==0) break; +if (i==12) return NULL; +str+=3; +*m=i; +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +if ((n=parse_number(str,y,4))) + { + str=n; + if (*y<1900) return NULL; + *y=*y-1900; + } +else if ((n=parse_number(str,y,2))) + { + str=skip_comment(n); + while (*(str-1)==' ' || *(str-1)=='\t') --str; /* match last FWS later */ + if (*y<50) *y+=100; + } +else return NULL; +return str; +} + + +/************************************************* +* Parse a RFC 2822 Time * +*************************************************/ + +/* Parse the time part of a RFC 2822 date-time, extracting the + hour, minute, second and timezone. + +Arguments: + str pointer to the start of the time + h pointer to the resulting hour + m pointer to the resulting minute + s pointer to the resulting second + z pointer to the resulting timezone (offset in seconds) + +Returns: points after the processed time or NULL on error +*/ + +static uschar * +parse_time(uschar *str, int *h, int *m, int *s, int *z) +{ +/* +time = time-of-day FWS zone + +time-of-day = hour ":" minute [ ":" second ] + +hour = 2DIGIT / obs-hour + +obs-hour = [CFWS] 2DIGIT [CFWS] + +minute = 2DIGIT / obs-minute + +obs-minute = [CFWS] 2DIGIT [CFWS] + +second = 2DIGIT / obs-second + +obs-second = [CFWS] 2DIGIT [CFWS] + +zone = (( "+" / "-" ) 4DIGIT) / obs-zone + +obs-zone = "UT" / "GMT" / ; Universal Time + ; North American UT + ; offsets + "EST" / "EDT" / ; Eastern: - 5/ - 4 + "CST" / "CDT" / ; Central: - 6/ - 5 + "MST" / "MDT" / ; Mountain: - 7/ - 6 + "PST" / "PDT" / ; Pacific: - 8/ - 7 + + %d65-73 / ; Military zones - "A" + %d75-90 / ; through "I" and "K" + %d97-105 / ; through "Z", both + %d107-122 ; upper and lower case +*/ + +uschar *c; + +str=skip_comment(str); +if ((str=parse_number(str,h,2))==NULL) return NULL; +str=skip_comment(str); +if (*str!=':') return NULL; +++str; +str=skip_comment(str); +if ((str=parse_number(str,m,2))==NULL) return NULL; +c=skip_comment(str); +if (*str==':') + { + ++str; + str=skip_comment(str); + if ((str=parse_number(str,s,2))==NULL) return NULL; + c=skip_comment(str); + } +if (c==str) return NULL; +else str=c; +if (*str=='+' || *str=='-') + { + int neg; + + neg=(*str=='-'); + ++str; + if ((str=parse_number(str,z,4))==NULL) return NULL; + *z=(*z/100)*3600+(*z%100)*60; + if (neg) *z=-*z; + } +else + { + char zone[5]; + struct { const char *name; int off; } zone_name[10]= + { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}}; + int i,j; + + for (i=0; i<4; ++i) + { + zone[i]=tolower(*(str+i)); + if (zone[i]<'a' || zone[i]>'z') break; + } + zone[i]='\0'; + for (j=0; j<10 && strcmp(zone,zone_name[j].name); ++j); + /* Besides zones named in the grammar, RFC 2822 says other alphabetic */ + /* time zones should be treated as unknown offsets. */ + if (j<10) + { + *z=zone_name[j].off*3600; + str+=i; + } + else if (zone[0]<'a' || zone[1]>'z') return 0; + else + { + while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str; + *z=0; + } + } +return str; +} + + +/************************************************* +* Parse a RFC 2822 date-time * +*************************************************/ + +/* Parse a RFC 2822 date-time and return it in seconds since the epoch. + +Arguments: + str pointer to the start of the date-time + t pointer to the parsed time + +Returns: points after the processed date-time or NULL on error +*/ + +uschar * +parse_date_time(uschar *str, time_t *t) +{ +/* +date-time = [ day-of-week "," ] date FWS time [CFWS] +*/ + +struct tm tm; +int zone; +extern char **environ; +char **old_environ; +static char gmt0[]="TZ=GMT0"; +static char *gmt_env[]={ gmt0, (char*)0 }; +uschar *try; + +if ((try=parse_day_of_week(str))) + { + str=try; + if (*str!=',') return 0; + ++str; + } +if ((str=parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year))==NULL) return NULL; +if (*str!=' ' && *str!='\t') return NULL; +while (*str==' ' || *str=='\t') ++str; +if ((str=parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone))==NULL) return NULL; +tm.tm_isdst=0; +old_environ=environ; +environ=gmt_env; +*t=mktime(&tm); +environ=old_environ; +if (*t==-1) return NULL; +*t-=zone; +str=skip_comment(str); +return str; +} + + + + /************************************************* ************************************************** * Stand-alone test program * @@ -1788,6 +2172,26 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) else printf("Failed: %d %s\n", extracted, errmess); } +printf("Testing parse_message_id\n"); + +while (Ufgets(buffer, sizeof(buffer), stdin) != NULL) + { + uschar *s, *t, *errmess; + buffer[Ustrlen(buffer) - 1] = 0; + if (buffer[0] == 0) break; + s = buffer; + while (*s != 0) + { + s = parse_message_id(s, &t, &errmess); + if (errmess != NULL) + { + printf("Failed: %s\n", errmess); + break; + } + printf("%s\n", t); + } + } + return 0; }