-/* $Cambridge: exim/src/src/parse.c,v 1.3 2005/01/04 10:00:42 ph10 Exp $ */
-
/*************************************************
* Exim - an Internet mail transport agent *
*************************************************/
-/* Copyright (c) University of Cambridge 1995 - 2005 */
+/* Copyright (c) University of Cambridge 1995 - 2009 */
/* See the file NOTICE for conditions of use and distribution. */
/* Functions for parsing addresses */
any character except [ ] \, including linear white space, and may contain
quoted characters. However, RFC 821 restricts literals to being dot-separated
3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence
-of digits and dots (hex digits and colons for IPv6) here; later this will be
-checked for being a syntactically valid IP address if it ever gets to a router.
+of digits, dots, hex digits, and colons here; later this will be checked for
+being a syntactically valid IP address if it ever gets to a router.
-If IPv6 is supported, allow both the formal form, with IPV6: at the start, and
-the informal form without it, and accept IPV4: as well, 'cause someone will use
-it sooner or later. */
+Allow both the formal IPv6 form, with IPV6: at the start, and the informal form
+without it, and accept IPV4: as well, 'cause someone will use it sooner or
+later. */
if (*s == '[')
{
*t++ = *s++;
- #if HAVE_IPV6
if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0)
{
memcpy(t, s, 5);
}
while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
- #else
- while (*s == '.' || isdigit(*s)) *t++ = *s++;
- #endif
-
if (*s == ']') *t++ = *s++; else
{
*errorptr = US"malformed domain literal";
TRUE and parse_found_group is FALSE when this function is called, an address
which is the start of a group (i.e. preceded by a phrase and a colon) is
recognized; the phrase is ignored and the flag parse_found_group is set. If
-this flag is TRUE at the end of an address, then if an extraneous semicolon is
-found, it is ignored and the flag is cleared. This logic is used only when
-scanning through addresses in headers, either to fulfil the -t option or for
-rewriting or checking header syntax.
+this flag is TRUE at the end of an address, and if an extraneous semicolon is
+found, it is ignored and the flag is cleared.
+
+This logic is used only when scanning through addresses in headers, either to
+fulfil the -t option, or for rewriting, or for checking header syntax. Because
+the group "state" has to be remembered between multiple calls of this function,
+the variables parse_{allow,found}_group are global. It is important to ensure
+that they are reset to FALSE at the end of scanning a header's list of
+addresses.
Arguments:
mailbox points to the RFC822 mailbox
original string, unmodified. If a quoted string is too long for the buffer, it
is truncated. (This shouldn't happen: this is normally handling short strings.)
-Hmmph. As always, things get perverted for other uses. This function was
-originally for the "phrase" part of addresses. Now it is being used for much
-longer texts in ACLs and via the ${rfc2047: expansion item. This means we have
+Hmmph. As always, things get perverted for other uses. This function was
+originally for the "phrase" part of addresses. Now it is being used for much
+longer texts in ACLs and via the ${rfc2047: expansion item. This means we have
to check for overlong "encoded-word"s and split them. November 2004.
Arguments:
charset the name of the character set; NULL => iso-8859-1
buffer the buffer to put the answer in
buffer_size the size of the buffer
+ fold if TRUE, a newline is inserted before the separating space when
+ more than one encoded-word is generated
Returns: pointer to the original string, if no quoting needed, or
pointer to buffer containing the quoted string, or
uschar *
parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer,
- int buffer_size)
+ int buffer_size, BOOL fold)
{
uschar *s = string;
uschar *p, *t;
int hlen;
BOOL coded = FALSE;
+BOOL first_byte = FALSE;
if (charset == NULL) charset = US"iso-8859-1";
{
int ch = *s++;
if (t > buffer + buffer_size - hlen - 8) break;
-
- if (t - p > 70)
+
+ if ((t - p > 67) && !first_byte)
{
*t++ = '?';
*t++ = '=';
+ if (fold) *t++ = '\n';
*t++ = ' ';
p = t;
Ustrncpy(p, buffer, hlen);
t += hlen;
- }
-
+ }
+
if (ch < 33 || ch > 126 ||
Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
{
- if (ch == ' ') *t++ = '_'; else
+ if (ch == ' ')
+ {
+ *t++ = '_';
+ first_byte = FALSE;
+ }
+ else
{
sprintf(CS t, "=%02X", ch);
while (*t != 0) t++;
coded = TRUE;
+ first_byte = !first_byte;
}
}
- else *t++ = ch;
+ else { *t++ = ch; first_byte = FALSE; }
}
-
+
*t++ = '?';
-*t++ = '=';
+*t++ = '=';
*t = 0;
-
+
return coded? buffer : string;
}
if ((*s < 32 && *s != '\t') || *s > 126) break;
if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
- buffer_size);
+ buffer_size, FALSE);
/* No non-printers; use the RFC 822 quoting rules */
{
*error = string_sprintf("failed to stat included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_INCLUDEFAIL;
}
{
*error = string_sprintf("error while reading included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_ERROR;
}
filebuf[statbuf.st_size] = 0;
- fclose(f);
+ (void)fclose(f);
addr = NULL;
frc = parse_forward_list(filebuf, options, &addr,
}
+/*************************************************
+* Extract a Message-ID *
+*************************************************/
+
+/* This function is used to extract message ids from In-Reply-To: and
+References: header lines.
+
+Arguments:
+ str pointer to the start of the message-id
+ yield put pointer to the message id (in dynamic memory) here
+ error put error message here on failure
+
+Returns: points after the processed message-id or NULL on error
+*/
+
+uschar *
+parse_message_id(uschar *str, uschar **yield, uschar **error)
+{
+uschar *domain = NULL;
+uschar *id;
+
+str = skip_comment(str);
+if (*str != '<')
+ {
+ *error = US"Missing '<' before message-id";
+ return NULL;
+ }
+
+/* Getting a block the size of the input string will definitely be sufficient
+for the answer, but it may also be very long if we are processing a header
+line. Therefore, take care to release unwanted store afterwards. */
+
+id = *yield = store_get(Ustrlen(str) + 1);
+*id++ = *str++;
+
+str = read_addr_spec(str, id, '>', error, &domain);
+
+if (*error == NULL)
+ {
+ if (*str != '>') *error = US"Missing '>' after message-id";
+ else if (domain == NULL) *error = US"domain missing in message-id";
+ }
+
+if (*error != NULL)
+ {
+ store_reset(*yield);
+ return NULL;
+ }
+
+while (*id != 0) id++;
+*id++ = *str++;
+*id++ = 0;
+store_reset(id);
+
+str = skip_comment(str);
+return str;
+}
+
+
+/*************************************************
+* Parse a fixed digit number *
+*************************************************/
+
+/* Parse a string containing an ASCII encoded fixed digits number
+
+Arguments:
+ str pointer to the start of the ASCII encoded number
+ n pointer to the resulting value
+ digits number of required digits
+
+Returns: points after the processed date or NULL on error
+*/
+
+static uschar *
+parse_number(uschar *str, int *n, int digits)
+{
+ *n=0;
+ while (digits--)
+ {
+ if (*str<'0' || *str>'9') return NULL;
+ *n=10*(*n)+(*str++-'0');
+ }
+ return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 day of week *
+*************************************************/
+
+/* Parse the day of the week from a RFC 2822 date, but do not
+ decode it, because it is only for humans.
+
+Arguments:
+ str pointer to the start of the day of the week
+
+Returns: points after the parsed day or NULL on error
+*/
+
+static uschar *
+parse_day_of_week(uschar *str)
+{
+/*
+day-of-week = ([FWS] day-name) / obs-day-of-week
+
+day-name = "Mon" / "Tue" / "Wed" / "Thu" /
+ "Fri" / "Sat" / "Sun"
+
+obs-day-of-week = [CFWS] day-name [CFWS]
+*/
+
+static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" };
+int i;
+uschar day[4];
+
+str=skip_comment(str);
+for (i=0; i<3; ++i)
+ {
+ if ((day[i]=tolower(*str))=='\0') return NULL;
+ ++str;
+ }
+day[3]='\0';
+for (i=0; i<7; ++i) if (Ustrcmp(day,day_name[i])==0) break;
+if (i==7) return NULL;
+str=skip_comment(str);
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 date *
+*************************************************/
+
+/* Parse the date part of a RFC 2822 date-time, extracting the
+ day, month and year.
+
+Arguments:
+ str pointer to the start of the date
+ d pointer to the resulting day
+ m pointer to the resulting month
+ y pointer to the resulting year
+
+Returns: points after the processed date or NULL on error
+*/
+
+static uschar *
+parse_date(uschar *str, int *d, int *m, int *y)
+{
+/*
+date = day month year
+
+year = 4*DIGIT / obs-year
+
+obs-year = [CFWS] 2*DIGIT [CFWS]
+
+month = (FWS month-name FWS) / obs-month
+
+month-name = "Jan" / "Feb" / "Mar" / "Apr" /
+ "May" / "Jun" / "Jul" / "Aug" /
+ "Sep" / "Oct" / "Nov" / "Dec"
+
+obs-month = CFWS month-name CFWS
+
+day = ([FWS] 1*2DIGIT) / obs-day
+
+obs-day = [CFWS] 1*2DIGIT [CFWS]
+*/
+
+uschar *c,*n;
+static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" };
+int i;
+uschar month[4];
+
+str=skip_comment(str);
+if ((str=parse_number(str,d,1))==NULL) return NULL;
+if (*str>='0' && *str<='9') *d=10*(*d)+(*str++-'0');
+c=skip_comment(str);
+if (c==str) return NULL;
+else str=c;
+for (i=0; i<3; ++i) if ((month[i]=tolower(*(str+i)))=='\0') return NULL;
+month[3]='\0';
+for (i=0; i<12; ++i) if (Ustrcmp(month,month_name[i])==0) break;
+if (i==12) return NULL;
+str+=3;
+*m=i;
+c=skip_comment(str);
+if (c==str) return NULL;
+else str=c;
+if ((n=parse_number(str,y,4)))
+ {
+ str=n;
+ if (*y<1900) return NULL;
+ *y=*y-1900;
+ }
+else if ((n=parse_number(str,y,2)))
+ {
+ str=skip_comment(n);
+ while (*(str-1)==' ' || *(str-1)=='\t') --str; /* match last FWS later */
+ if (*y<50) *y+=100;
+ }
+else return NULL;
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 Time *
+*************************************************/
+
+/* Parse the time part of a RFC 2822 date-time, extracting the
+ hour, minute, second and timezone.
+
+Arguments:
+ str pointer to the start of the time
+ h pointer to the resulting hour
+ m pointer to the resulting minute
+ s pointer to the resulting second
+ z pointer to the resulting timezone (offset in seconds)
+
+Returns: points after the processed time or NULL on error
+*/
+
+static uschar *
+parse_time(uschar *str, int *h, int *m, int *s, int *z)
+{
+/*
+time = time-of-day FWS zone
+
+time-of-day = hour ":" minute [ ":" second ]
+
+hour = 2DIGIT / obs-hour
+
+obs-hour = [CFWS] 2DIGIT [CFWS]
+
+minute = 2DIGIT / obs-minute
+
+obs-minute = [CFWS] 2DIGIT [CFWS]
+
+second = 2DIGIT / obs-second
+
+obs-second = [CFWS] 2DIGIT [CFWS]
+
+zone = (( "+" / "-" ) 4DIGIT) / obs-zone
+
+obs-zone = "UT" / "GMT" / ; Universal Time
+ ; North American UT
+ ; offsets
+ "EST" / "EDT" / ; Eastern: - 5/ - 4
+ "CST" / "CDT" / ; Central: - 6/ - 5
+ "MST" / "MDT" / ; Mountain: - 7/ - 6
+ "PST" / "PDT" / ; Pacific: - 8/ - 7
+
+ %d65-73 / ; Military zones - "A"
+ %d75-90 / ; through "I" and "K"
+ %d97-105 / ; through "Z", both
+ %d107-122 ; upper and lower case
+*/
+
+uschar *c;
+
+str=skip_comment(str);
+if ((str=parse_number(str,h,2))==NULL) return NULL;
+str=skip_comment(str);
+if (*str!=':') return NULL;
+++str;
+str=skip_comment(str);
+if ((str=parse_number(str,m,2))==NULL) return NULL;
+c=skip_comment(str);
+if (*str==':')
+ {
+ ++str;
+ str=skip_comment(str);
+ if ((str=parse_number(str,s,2))==NULL) return NULL;
+ c=skip_comment(str);
+ }
+if (c==str) return NULL;
+else str=c;
+if (*str=='+' || *str=='-')
+ {
+ int neg;
+
+ neg=(*str=='-');
+ ++str;
+ if ((str=parse_number(str,z,4))==NULL) return NULL;
+ *z=(*z/100)*3600+(*z%100)*60;
+ if (neg) *z=-*z;
+ }
+else
+ {
+ char zone[5];
+ struct { const char *name; int off; } zone_name[10]=
+ { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}};
+ int i,j;
+
+ for (i=0; i<4; ++i)
+ {
+ zone[i]=tolower(*(str+i));
+ if (zone[i]<'a' || zone[i]>'z') break;
+ }
+ zone[i]='\0';
+ for (j=0; j<10 && strcmp(zone,zone_name[j].name); ++j);
+ /* Besides zones named in the grammar, RFC 2822 says other alphabetic */
+ /* time zones should be treated as unknown offsets. */
+ if (j<10)
+ {
+ *z=zone_name[j].off*3600;
+ str+=i;
+ }
+ else if (zone[0]<'a' || zone[1]>'z') return 0;
+ else
+ {
+ while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str;
+ *z=0;
+ }
+ }
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 date-time *
+*************************************************/
+
+/* Parse a RFC 2822 date-time and return it in seconds since the epoch.
+
+Arguments:
+ str pointer to the start of the date-time
+ t pointer to the parsed time
+
+Returns: points after the processed date-time or NULL on error
+*/
+
+uschar *
+parse_date_time(uschar *str, time_t *t)
+{
+/*
+date-time = [ day-of-week "," ] date FWS time [CFWS]
+*/
+
+struct tm tm;
+int zone;
+extern char **environ;
+char **old_environ;
+static char gmt0[]="TZ=GMT0";
+static char *gmt_env[]={ gmt0, (char*)0 };
+uschar *try;
+
+if ((try=parse_day_of_week(str)))
+ {
+ str=try;
+ if (*str!=',') return 0;
+ ++str;
+ }
+if ((str=parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year))==NULL) return NULL;
+if (*str!=' ' && *str!='\t') return NULL;
+while (*str==' ' || *str=='\t') ++str;
+if ((str=parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone))==NULL) return NULL;
+tm.tm_isdst=0;
+old_environ=environ;
+environ=gmt_env;
+*t=mktime(&tm);
+environ=old_environ;
+if (*t==-1) return NULL;
+*t-=zone;
+str=skip_comment(str);
+return str;
+}
+
+
+
+
/*************************************************
**************************************************
* Stand-alone test program *
else printf("Failed: %d %s\n", extracted, errmess);
}
+printf("Testing parse_message_id\n");
+
+while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
+ {
+ uschar *s, *t, *errmess;
+ buffer[Ustrlen(buffer) - 1] = 0;
+ if (buffer[0] == 0) break;
+ s = buffer;
+ while (*s != 0)
+ {
+ s = parse_message_id(s, &t, &errmess);
+ if (errmess != NULL)
+ {
+ printf("Failed: %s\n", errmess);
+ break;
+ }
+ printf("%s\n", t);
+ }
+ }
+
return 0;
}