X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/30dba1e609d941013dc8421de5104dad387ac5b1..4dc2379ac1ab6c21f265abed06dd9aaa214976af:/src/src/parse.c diff --git a/src/src/parse.c b/src/src/parse.c index 182df8026..3d942fd95 100644 --- a/src/src/parse.c +++ b/src/src/parse.c @@ -1,10 +1,8 @@ -/* $Cambridge: exim/src/src/parse.c,v 1.8 2006/02/28 11:25:40 ph10 Exp $ */ - /************************************************* * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) University of Cambridge 1995 - 2006 */ +/* Copyright (c) University of Cambridge 1995 - 2015 */ /* See the file NOTICE for conditions of use and distribution. */ /* Functions for parsing addresses */ @@ -552,9 +550,7 @@ read_addr_spec(uschar *s, uschar *t, int term, uschar **errorptr, { s = read_local_part(s, t, errorptr, FALSE); if (*errorptr == NULL) - { if (*s != term) - { if (*s != '@') *errorptr = string_sprintf("\"@\" or \".\" expected after \"%s\"", t); else @@ -564,8 +560,6 @@ if (*errorptr == NULL) *domainptr = t; s = read_domain(s, t, errorptr); } - } - } return s; } @@ -597,10 +591,15 @@ which may appear in certain headers. If the flag parse_allow_group is set TRUE and parse_found_group is FALSE when this function is called, an address which is the start of a group (i.e. preceded by a phrase and a colon) is recognized; the phrase is ignored and the flag parse_found_group is set. If -this flag is TRUE at the end of an address, then if an extraneous semicolon is -found, it is ignored and the flag is cleared. This logic is used only when -scanning through addresses in headers, either to fulfil the -t option or for -rewriting or checking header syntax. +this flag is TRUE at the end of an address, and if an extraneous semicolon is +found, it is ignored and the flag is cleared. + +This logic is used only when scanning through addresses in headers, either to +fulfil the -t option, or for rewriting, or for checking header syntax. Because +the group "state" has to be remembered between multiple calls of this function, +the variables parse_{allow,found}_group are global. It is important to ensure +that they are reset to FALSE at the end of scanning a header's list of +addresses. Arguments: mailbox points to the RFC822 mailbox @@ -814,7 +813,7 @@ if (*end - *start > ADDRESS_MAXLENGTH) return NULL; } -return (uschar *)yield; +return yield; /* Use goto (via the macro FAILED) to get to here from a variety of places. We might have an empty address in a group - the caller can choose to ignore @@ -854,6 +853,8 @@ Arguments: charset the name of the character set; NULL => iso-8859-1 buffer the buffer to put the answer in buffer_size the size of the buffer + fold if TRUE, a newline is inserted before the separating space when + more than one encoded-word is generated Returns: pointer to the original string, if no quoting needed, or pointer to buffer containing the quoted string, or @@ -861,14 +862,15 @@ Returns: pointer to the original string, if no quoting needed, or the introduction */ -uschar * -parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer, - int buffer_size) +const uschar * +parse_quote_2047(const uschar *string, int len, uschar *charset, uschar *buffer, + int buffer_size, BOOL fold) { -uschar *s = string; +const uschar *s = string; uschar *p, *t; int hlen; BOOL coded = FALSE; +BOOL first_byte = FALSE; if (charset == NULL) charset = US"iso-8859-1"; @@ -886,10 +888,11 @@ for (; len > 0; len--) int ch = *s++; if (t > buffer + buffer_size - hlen - 8) break; - if (t - p > 70) + if ((t - p > 67) && !first_byte) { *t++ = '?'; *t++ = '='; + if (fold) *t++ = '\n'; *t++ = ' '; p = t; Ustrncpy(p, buffer, hlen); @@ -899,14 +902,20 @@ for (; len > 0; len--) if (ch < 33 || ch > 126 || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL) { - if (ch == ' ') *t++ = '_'; else + if (ch == ' ') + { + *t++ = '_'; + first_byte = FALSE; + } + else { sprintf(CS t, "=%02X", ch); while (*t != 0) t++; coded = TRUE; + first_byte = !first_byte; } } - else *t++ = ch; + else { *t++ = ch; first_byte = FALSE; } } *t++ = '?'; @@ -972,12 +981,13 @@ Arguments: Returns: the fixed RFC822 phrase */ -uschar * -parse_fix_phrase(uschar *phrase, int len, uschar *buffer, int buffer_size) +const uschar * +parse_fix_phrase(const uschar *phrase, int len, uschar *buffer, int buffer_size) { int ch, i; BOOL quoted = FALSE; -uschar *s, *t, *end, *yield; +const uschar *s, *end; +uschar *t, *yield; while (len > 0 && isspace(*phrase)) { phrase++; len--; } if (len > buffer_size/4) return US"Name too long"; @@ -989,7 +999,7 @@ for (i = 0, s = phrase; i < len; i++, s++) if ((*s < 32 && *s != '\t') || *s > 126) break; if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer, - buffer_size); + buffer_size, FALSE); /* No non-printers; use the RFC 822 quoting rules */ @@ -1106,7 +1116,7 @@ while (s < end) else if (ch == '(') { - uschar *ss = s; /* uschar after '(' */ + const uschar *ss = s; /* uschar after '(' */ int level = 1; while(ss < end) { @@ -1232,7 +1242,7 @@ Returns: FF_DELIVERED addresses extracted int parse_forward_list(uschar *s, int options, address_item **anchor, - uschar **error, uschar *incoming_domain, uschar *directory, + uschar **error, const uschar *incoming_domain, uschar *directory, error_block **syntax_errors) { int count = 0; @@ -1415,7 +1425,7 @@ for (;;) /* Check file name if required */ - if (directory != NULL) + if (directory) { int len = Ustrlen(directory); uschar *p = filename + len; @@ -1427,16 +1437,53 @@ for (;;) return FF_ERROR; } +#ifdef EXIM_HAVE_OPENAT + /* It is necessary to check that every component inside the directory + is NOT a symbolic link, in order to keep the file inside the directory. + This is mighty tedious. We open the directory and openat every component, + with a flag that fails symlinks. */ + + { + int fd = open(CS directory, O_RDONLY); + if (fd < 0) + { + *error = string_sprintf("failed to open directory %s", directory); + return FF_ERROR; + } + while (*p) + { + uschar temp; + int fd2; + uschar * q = p; + + while (*++p && *p != '/') ; + temp = *p; + *p = '\0'; + + fd2 = openat(fd, CS q, O_RDONLY|O_NOFOLLOW); + close(fd); + *p = temp; + if (fd2 < 0) + { + *error = string_sprintf("failed to open %s (component of included " + "file); could be symbolic link", filename); + return FF_ERROR; + } + fd = fd2; + } + f = fdopen(fd, "rb"); + } +#else /* It is necessary to check that every component inside the directory is NOT a symbolic link, in order to keep the file inside the directory. This is mighty tedious. It is also not totally foolproof in that it leaves the possibility of a race attack, but I don't know how to do any better. */ - while (*p != 0) + while (*p) { int temp; - while (*(++p) != 0 && *p != '/'); + while (*++p && *p != '/'); temp = *p; *p = 0; if (Ulstat(filename, &statbuf) != 0) @@ -1456,11 +1503,16 @@ for (;;) return FF_ERROR; } } +#endif } - /* Open and stat the file */ +#ifdef EXIM_HAVE_OPENAT + else +#endif + /* Open and stat the file */ + f = Ufopen(filename, "rb"); - if ((f = Ufopen(filename, "rb")) == NULL) + if (!f) { *error = string_open_failed(errno, "included file %s", filename); return FF_INCLUDEFAIL; @@ -1476,7 +1528,7 @@ for (;;) /* If directory was checked, double check that we opened a regular file */ - if (directory != NULL && (statbuf.st_mode & S_IFMT) != S_IFREG) + if (directory && (statbuf.st_mode & S_IFMT) != S_IFREG) { *error = string_sprintf("included file %s is not a regular file in " "the %s directory", filename, directory); @@ -1508,10 +1560,9 @@ for (;;) error, incoming_domain, directory, syntax_errors); if (frc != FF_DELIVERED && frc != FF_NOTDELIVERED) return frc; - if (addr != NULL) + if (addr) { - last = addr; - while (last->next != NULL) { count++; last = last->next; } + for (last = addr; last->next; last = last->next) count++; last->next = *anchor; *anchor = addr; count++; @@ -1656,7 +1707,6 @@ for (;;) } - /************************************************* * Extract a Message-ID * *************************************************/ @@ -1716,6 +1766,316 @@ return str; } +/************************************************* +* Parse a fixed digit number * +*************************************************/ + +/* Parse a string containing an ASCII encoded fixed digits number + +Arguments: + str pointer to the start of the ASCII encoded number + n pointer to the resulting value + digits number of required digits + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_number(uschar *str, int *n, int digits) +{ + *n=0; + while (digits--) + { + if (*str<'0' || *str>'9') return NULL; + *n=10*(*n)+(*str++-'0'); + } + return str; +} + + +/************************************************* +* Parse a RFC 2822 day of week * +*************************************************/ + +/* Parse the day of the week from a RFC 2822 date, but do not + decode it, because it is only for humans. + +Arguments: + str pointer to the start of the day of the week + +Returns: points after the parsed day or NULL on error +*/ + +static uschar * +parse_day_of_week(uschar *str) +{ +/* +day-of-week = ([FWS] day-name) / obs-day-of-week + +day-name = "Mon" / "Tue" / "Wed" / "Thu" / + "Fri" / "Sat" / "Sun" + +obs-day-of-week = [CFWS] day-name [CFWS] +*/ + +static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" }; +int i; +uschar day[4]; + +str=skip_comment(str); +for (i=0; i<3; ++i) + { + if ((day[i]=tolower(*str))=='\0') return NULL; + ++str; + } +day[3]='\0'; +for (i=0; i<7; ++i) if (Ustrcmp(day,day_name[i])==0) break; +if (i==7) return NULL; +str=skip_comment(str); +return str; +} + + +/************************************************* +* Parse a RFC 2822 date * +*************************************************/ + +/* Parse the date part of a RFC 2822 date-time, extracting the + day, month and year. + +Arguments: + str pointer to the start of the date + d pointer to the resulting day + m pointer to the resulting month + y pointer to the resulting year + +Returns: points after the processed date or NULL on error +*/ + +static uschar * +parse_date(uschar *str, int *d, int *m, int *y) +{ +/* +date = day month year + +year = 4*DIGIT / obs-year + +obs-year = [CFWS] 2*DIGIT [CFWS] + +month = (FWS month-name FWS) / obs-month + +month-name = "Jan" / "Feb" / "Mar" / "Apr" / + "May" / "Jun" / "Jul" / "Aug" / + "Sep" / "Oct" / "Nov" / "Dec" + +obs-month = CFWS month-name CFWS + +day = ([FWS] 1*2DIGIT) / obs-day + +obs-day = [CFWS] 1*2DIGIT [CFWS] +*/ + +uschar *c,*n; +static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" }; +int i; +uschar month[4]; + +str=skip_comment(str); +if ((str=parse_number(str,d,1))==NULL) return NULL; +if (*str>='0' && *str<='9') *d=10*(*d)+(*str++-'0'); +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +for (i=0; i<3; ++i) if ((month[i]=tolower(*(str+i)))=='\0') return NULL; +month[3]='\0'; +for (i=0; i<12; ++i) if (Ustrcmp(month,month_name[i])==0) break; +if (i==12) return NULL; +str+=3; +*m=i; +c=skip_comment(str); +if (c==str) return NULL; +else str=c; +if ((n=parse_number(str,y,4))) + { + str=n; + if (*y<1900) return NULL; + *y=*y-1900; + } +else if ((n=parse_number(str,y,2))) + { + str=skip_comment(n); + while (*(str-1)==' ' || *(str-1)=='\t') --str; /* match last FWS later */ + if (*y<50) *y+=100; + } +else return NULL; +return str; +} + + +/************************************************* +* Parse a RFC 2822 Time * +*************************************************/ + +/* Parse the time part of a RFC 2822 date-time, extracting the + hour, minute, second and timezone. + +Arguments: + str pointer to the start of the time + h pointer to the resulting hour + m pointer to the resulting minute + s pointer to the resulting second + z pointer to the resulting timezone (offset in seconds) + +Returns: points after the processed time or NULL on error +*/ + +static uschar * +parse_time(uschar *str, int *h, int *m, int *s, int *z) +{ +/* +time = time-of-day FWS zone + +time-of-day = hour ":" minute [ ":" second ] + +hour = 2DIGIT / obs-hour + +obs-hour = [CFWS] 2DIGIT [CFWS] + +minute = 2DIGIT / obs-minute + +obs-minute = [CFWS] 2DIGIT [CFWS] + +second = 2DIGIT / obs-second + +obs-second = [CFWS] 2DIGIT [CFWS] + +zone = (( "+" / "-" ) 4DIGIT) / obs-zone + +obs-zone = "UT" / "GMT" / ; Universal Time + ; North American UT + ; offsets + "EST" / "EDT" / ; Eastern: - 5/ - 4 + "CST" / "CDT" / ; Central: - 6/ - 5 + "MST" / "MDT" / ; Mountain: - 7/ - 6 + "PST" / "PDT" / ; Pacific: - 8/ - 7 + + %d65-73 / ; Military zones - "A" + %d75-90 / ; through "I" and "K" + %d97-105 / ; through "Z", both + %d107-122 ; upper and lower case +*/ + +uschar *c; + +str=skip_comment(str); +if ((str=parse_number(str,h,2))==NULL) return NULL; +str=skip_comment(str); +if (*str!=':') return NULL; +++str; +str=skip_comment(str); +if ((str=parse_number(str,m,2))==NULL) return NULL; +c=skip_comment(str); +if (*str==':') + { + ++str; + str=skip_comment(str); + if ((str=parse_number(str,s,2))==NULL) return NULL; + c=skip_comment(str); + } +if (c==str) return NULL; +else str=c; +if (*str=='+' || *str=='-') + { + int neg; + + neg=(*str=='-'); + ++str; + if ((str=parse_number(str,z,4))==NULL) return NULL; + *z=(*z/100)*3600+(*z%100)*60; + if (neg) *z=-*z; + } +else + { + char zone[5]; + struct { const char *name; int off; } zone_name[10]= + { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}}; + int i,j; + + for (i=0; i<4; ++i) + { + zone[i]=tolower(*(str+i)); + if (zone[i]<'a' || zone[i]>'z') break; + } + zone[i]='\0'; + for (j=0; j<10 && strcmp(zone,zone_name[j].name); ++j); + /* Besides zones named in the grammar, RFC 2822 says other alphabetic */ + /* time zones should be treated as unknown offsets. */ + if (j<10) + { + *z=zone_name[j].off*3600; + str+=i; + } + else if (zone[0]<'a' || zone[1]>'z') return 0; + else + { + while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str; + *z=0; + } + } +return str; +} + + +/************************************************* +* Parse a RFC 2822 date-time * +*************************************************/ + +/* Parse a RFC 2822 date-time and return it in seconds since the epoch. + +Arguments: + str pointer to the start of the date-time + t pointer to the parsed time + +Returns: points after the processed date-time or NULL on error +*/ + +uschar * +parse_date_time(uschar *str, time_t *t) +{ +/* +date-time = [ day-of-week "," ] date FWS time [CFWS] +*/ + +struct tm tm; +int zone; +extern char **environ; +char **old_environ; +static char gmt0[]="TZ=GMT0"; +static char *gmt_env[]={ gmt0, (char*)0 }; +uschar *try; + +if ((try=parse_day_of_week(str))) + { + str=try; + if (*str!=',') return 0; + ++str; + } +if ((str=parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year))==NULL) return NULL; +if (*str!=' ' && *str!='\t') return NULL; +while (*str==' ' || *str=='\t') ++str; +if ((str=parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone))==NULL) return NULL; +tm.tm_isdst=0; +old_environ=environ; +environ=gmt_env; +*t=mktime(&tm); +environ=old_environ; +if (*t==-1) return NULL; +*t-=zone; +str=skip_comment(str); +return str; +} + + /*************************************************