X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/30dba1e609d941013dc8421de5104dad387ac5b1..4dc2379ac1ab6c21f265abed06dd9aaa214976af:/src/src/parse.c

diff --git a/src/src/parse.c b/src/src/parse.c
index 182df8026..3d942fd95 100644
--- a/src/src/parse.c
+++ b/src/src/parse.c
@@ -1,10 +1,8 @@
-/* $Cambridge: exim/src/src/parse.c,v 1.8 2006/02/28 11:25:40 ph10 Exp $ */
-
 /*************************************************
 *     Exim - an Internet mail transport agent    *
 *************************************************/
 
-/* Copyright (c) University of Cambridge 1995 - 2006 */
+/* Copyright (c) University of Cambridge 1995 - 2015 */
 /* See the file NOTICE for conditions of use and distribution. */
 
 /* Functions for parsing addresses */
@@ -552,9 +550,7 @@ read_addr_spec(uschar *s, uschar *t, int term, uschar **errorptr,
 {
 s = read_local_part(s, t, errorptr, FALSE);
 if (*errorptr == NULL)
-  {
   if (*s != term)
-    {
     if (*s != '@')
       *errorptr = string_sprintf("\"@\" or \".\" expected after \"%s\"", t);
     else
@@ -564,8 +560,6 @@ if (*errorptr == NULL)
       *domainptr = t;
       s = read_domain(s, t, errorptr);
       }
-    }
-  }
 return s;
 }
 
@@ -597,10 +591,15 @@ which may appear in certain headers. If the flag parse_allow_group is set
 TRUE and parse_found_group is FALSE when this function is called, an address
 which is the start of a group (i.e. preceded by a phrase and a colon) is
 recognized; the phrase is ignored and the flag parse_found_group is set. If
-this flag is TRUE at the end of an address, then if an extraneous semicolon is
-found, it is ignored and the flag is cleared. This logic is used only when
-scanning through addresses in headers, either to fulfil the -t option or for
-rewriting or checking header syntax.
+this flag is TRUE at the end of an address, and if an extraneous semicolon is
+found, it is ignored and the flag is cleared.
+
+This logic is used only when scanning through addresses in headers, either to
+fulfil the -t option, or for rewriting, or for checking header syntax. Because
+the group "state" has to be remembered between multiple calls of this function,
+the variables parse_{allow,found}_group are global. It is important to ensure
+that they are reset to FALSE at the end of scanning a header's list of
+addresses.
 
 Arguments:
   mailbox     points to the RFC822 mailbox
@@ -814,7 +813,7 @@ if (*end - *start > ADDRESS_MAXLENGTH)
   return NULL;
   }
 
-return (uschar *)yield;
+return yield;
 
 /* Use goto (via the macro FAILED) to get to here from a variety of places.
 We might have an empty address in a group - the caller can choose to ignore
@@ -854,6 +853,8 @@ Arguments:
   charset      the name of the character set; NULL => iso-8859-1
   buffer       the buffer to put the answer in
   buffer_size  the size of the buffer
+  fold         if TRUE, a newline is inserted before the separating space when
+                 more than one encoded-word is generated
 
 Returns:       pointer to the original string, if no quoting needed, or
                pointer to buffer containing the quoted string, or
@@ -861,14 +862,15 @@ Returns:       pointer to the original string, if no quoting needed, or
                the introduction
 */
 
-uschar *
-parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer,
-  int buffer_size)
+const uschar *
+parse_quote_2047(const uschar *string, int len, uschar *charset, uschar *buffer,
+  int buffer_size, BOOL fold)
 {
-uschar *s = string;
+const uschar *s = string;
 uschar *p, *t;
 int hlen;
 BOOL coded = FALSE;
+BOOL first_byte = FALSE;
 
 if (charset == NULL) charset = US"iso-8859-1";
 
@@ -886,10 +888,11 @@ for (; len > 0; len--)
   int ch = *s++;
   if (t > buffer + buffer_size - hlen - 8) break;
 
-  if (t - p > 70)
+  if ((t - p > 67) && !first_byte)
     {
     *t++ = '?';
     *t++ = '=';
+    if (fold) *t++ = '\n';
     *t++ = ' ';
     p = t;
     Ustrncpy(p, buffer, hlen);
@@ -899,14 +902,20 @@ for (; len > 0; len--)
   if (ch < 33 || ch > 126 ||
       Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
     {
-    if (ch == ' ') *t++ = '_'; else
+    if (ch == ' ')
+      {
+      *t++ = '_';
+      first_byte = FALSE;
+      }
+    else
       {
       sprintf(CS t, "=%02X", ch);
       while (*t != 0) t++;
       coded = TRUE;
+      first_byte = !first_byte;
       }
     }
-  else *t++ = ch;
+  else { *t++ = ch; first_byte = FALSE; }
   }
 
 *t++ = '?';
@@ -972,12 +981,13 @@ Arguments:
 Returns:       the fixed RFC822 phrase
 */
 
-uschar *
-parse_fix_phrase(uschar *phrase, int len, uschar *buffer, int buffer_size)
+const uschar *
+parse_fix_phrase(const uschar *phrase, int len, uschar *buffer, int buffer_size)
 {
 int ch, i;
 BOOL quoted = FALSE;
-uschar *s, *t, *end, *yield;
+const uschar *s, *end;
+uschar *t, *yield;
 
 while (len > 0 && isspace(*phrase)) { phrase++; len--; }
 if (len > buffer_size/4) return US"Name too long";
@@ -989,7 +999,7 @@ for (i = 0, s = phrase; i < len; i++, s++)
   if ((*s < 32 && *s != '\t') || *s > 126) break;
 
 if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
-  buffer_size);
+  buffer_size, FALSE);
 
 /* No non-printers; use the RFC 822 quoting rules */
 
@@ -1106,7 +1116,7 @@ while (s < end)
 
         else if (ch == '(')
           {
-          uschar *ss = s;     /* uschar after '(' */
+          const uschar *ss = s;     /* uschar after '(' */
           int level = 1;
           while(ss < end)
             {
@@ -1232,7 +1242,7 @@ Returns:      FF_DELIVERED      addresses extracted
 
 int
 parse_forward_list(uschar *s, int options, address_item **anchor,
-  uschar **error, uschar *incoming_domain, uschar *directory,
+  uschar **error, const uschar *incoming_domain, uschar *directory,
   error_block **syntax_errors)
 {
 int count = 0;
@@ -1415,7 +1425,7 @@ for (;;)
 
     /* Check file name if required */
 
-    if (directory != NULL)
+    if (directory)
       {
       int len = Ustrlen(directory);
       uschar *p = filename + len;
@@ -1427,16 +1437,53 @@ for (;;)
         return FF_ERROR;
         }
 
+#ifdef EXIM_HAVE_OPENAT
+      /* It is necessary to check that every component inside the directory
+      is NOT a symbolic link, in order to keep the file inside the directory.
+      This is mighty tedious. We open the directory and openat every component,
+      with a flag that fails symlinks. */
+
+      {
+      int fd = open(CS directory, O_RDONLY);
+      if (fd < 0)
+	{
+	*error = string_sprintf("failed to open directory %s", directory);
+	return FF_ERROR;
+	}
+      while (*p)
+	{
+	uschar temp;
+	int fd2;
+	uschar * q = p;
+
+	while (*++p && *p != '/') ;
+	temp = *p;
+	*p = '\0';
+
+	fd2 = openat(fd, CS q, O_RDONLY|O_NOFOLLOW);
+	close(fd);
+	*p = temp;
+	if (fd2 < 0)
+	  {
+          *error = string_sprintf("failed to open %s (component of included "
+            "file); could be symbolic link", filename);
+	  return FF_ERROR;
+	  }
+	fd = fd2;
+	}
+      f = fdopen(fd, "rb");
+      }
+#else
       /* It is necessary to check that every component inside the directory
       is NOT a symbolic link, in order to keep the file inside the directory.
       This is mighty tedious. It is also not totally foolproof in that it
       leaves the possibility of a race attack, but I don't know how to do
       any better. */
 
-      while (*p != 0)
+      while (*p)
         {
         int temp;
-        while (*(++p) != 0 && *p != '/');
+        while (*++p && *p != '/');
         temp = *p;
         *p = 0;
         if (Ulstat(filename, &statbuf) != 0)
@@ -1456,11 +1503,16 @@ for (;;)
           return FF_ERROR;
           }
         }
+#endif
       }
 
-    /* Open and stat the file */
+#ifdef EXIM_HAVE_OPENAT
+    else
+#endif
+      /* Open and stat the file */
+      f = Ufopen(filename, "rb");
 
-    if ((f = Ufopen(filename, "rb")) == NULL)
+    if (!f)
       {
       *error = string_open_failed(errno, "included file %s", filename);
       return FF_INCLUDEFAIL;
@@ -1476,7 +1528,7 @@ for (;;)
 
     /* If directory was checked, double check that we opened a regular file */
 
-    if (directory != NULL && (statbuf.st_mode & S_IFMT) != S_IFREG)
+    if (directory && (statbuf.st_mode & S_IFMT) != S_IFREG)
       {
       *error = string_sprintf("included file %s is not a regular file in "
         "the %s directory", filename, directory);
@@ -1508,10 +1560,9 @@ for (;;)
       error, incoming_domain, directory, syntax_errors);
     if (frc != FF_DELIVERED && frc != FF_NOTDELIVERED) return frc;
 
-    if (addr != NULL)
+    if (addr)
       {
-      last = addr;
-      while (last->next != NULL) { count++; last = last->next; }
+      for (last = addr; last->next; last = last->next) count++;
       last->next = *anchor;
       *anchor = addr;
       count++;
@@ -1656,7 +1707,6 @@ for (;;)
 }
 
 
-
 /*************************************************
 *            Extract a Message-ID                *
 *************************************************/
@@ -1716,6 +1766,316 @@ return str;
 }
 
 
+/*************************************************
+*        Parse a fixed digit number              *
+*************************************************/
+
+/* Parse a string containing an ASCII encoded fixed digits number
+
+Arguments:
+  str          pointer to the start of the ASCII encoded number
+  n            pointer to the resulting value
+  digits       number of required digits
+
+Returns:       points after the processed date or NULL on error
+*/
+
+static uschar *
+parse_number(uschar *str, int *n, int digits)
+{
+  *n=0;
+  while (digits--)
+  {
+    if (*str<'0' || *str>'9') return NULL;
+    *n=10*(*n)+(*str++-'0');
+  }
+  return str;
+}
+
+
+/*************************************************
+*        Parse a RFC 2822 day of week            *
+*************************************************/
+
+/* Parse the day of the week from a RFC 2822 date, but do not
+   decode it, because it is only for humans.
+
+Arguments:
+  str          pointer to the start of the day of the week
+
+Returns:       points after the parsed day or NULL on error
+*/
+
+static uschar *
+parse_day_of_week(uschar *str)
+{
+/*
+day-of-week     =       ([FWS] day-name) / obs-day-of-week
+
+day-name        =       "Mon" / "Tue" / "Wed" / "Thu" /
+                        "Fri" / "Sat" / "Sun"
+
+obs-day-of-week =       [CFWS] day-name [CFWS]
+*/
+
+static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" };
+int i;
+uschar day[4];
+
+str=skip_comment(str);
+for (i=0; i<3; ++i)
+  {
+  if ((day[i]=tolower(*str))=='\0') return NULL;
+  ++str;
+  }
+day[3]='\0';
+for (i=0; i<7; ++i) if (Ustrcmp(day,day_name[i])==0) break;
+if (i==7) return NULL;
+str=skip_comment(str);
+return str;
+}
+
+
+/*************************************************
+*            Parse a RFC 2822 date               *
+*************************************************/
+
+/* Parse the date part of a RFC 2822 date-time, extracting the
+   day, month and year.
+
+Arguments:
+  str          pointer to the start of the date
+  d            pointer to the resulting day
+  m            pointer to the resulting month
+  y            pointer to the resulting year
+
+Returns:       points after the processed date or NULL on error
+*/
+
+static uschar *
+parse_date(uschar *str, int *d, int *m, int *y)
+{
+/*
+date            =       day month year
+
+year            =       4*DIGIT / obs-year
+
+obs-year        =       [CFWS] 2*DIGIT [CFWS]
+
+month           =       (FWS month-name FWS) / obs-month
+
+month-name      =       "Jan" / "Feb" / "Mar" / "Apr" /
+                        "May" / "Jun" / "Jul" / "Aug" /
+                        "Sep" / "Oct" / "Nov" / "Dec"
+
+obs-month       =       CFWS month-name CFWS
+
+day             =       ([FWS] 1*2DIGIT) / obs-day
+
+obs-day         =       [CFWS] 1*2DIGIT [CFWS]
+*/
+
+uschar *c,*n;
+static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" };
+int i;
+uschar month[4];
+
+str=skip_comment(str);
+if ((str=parse_number(str,d,1))==NULL) return NULL;
+if (*str>='0' && *str<='9') *d=10*(*d)+(*str++-'0');
+c=skip_comment(str);
+if (c==str) return NULL;
+else str=c;
+for (i=0; i<3; ++i) if ((month[i]=tolower(*(str+i)))=='\0') return NULL;
+month[3]='\0';
+for (i=0; i<12; ++i) if (Ustrcmp(month,month_name[i])==0) break;
+if (i==12) return NULL;
+str+=3;
+*m=i;
+c=skip_comment(str);
+if (c==str) return NULL;
+else str=c;
+if ((n=parse_number(str,y,4)))
+  {
+  str=n;
+  if (*y<1900) return NULL;
+  *y=*y-1900;
+  }
+else if ((n=parse_number(str,y,2)))
+  {
+  str=skip_comment(n);
+  while (*(str-1)==' ' || *(str-1)=='\t') --str; /* match last FWS later */
+  if (*y<50) *y+=100;
+  }
+else return NULL;
+return str;
+}
+
+
+/*************************************************
+*            Parse a RFC 2822 Time               *
+*************************************************/
+
+/* Parse the time part of a RFC 2822 date-time, extracting the
+   hour, minute, second and timezone.
+
+Arguments:
+  str          pointer to the start of the time
+  h            pointer to the resulting hour
+  m            pointer to the resulting minute
+  s            pointer to the resulting second
+  z            pointer to the resulting timezone (offset in seconds)
+
+Returns:       points after the processed time or NULL on error
+*/
+
+static uschar *
+parse_time(uschar *str, int *h, int *m, int *s, int *z)
+{
+/*
+time            =       time-of-day FWS zone
+
+time-of-day     =       hour ":" minute [ ":" second ]
+
+hour            =       2DIGIT / obs-hour
+
+obs-hour        =       [CFWS] 2DIGIT [CFWS]
+
+minute          =       2DIGIT / obs-minute
+
+obs-minute      =       [CFWS] 2DIGIT [CFWS]
+
+second          =       2DIGIT / obs-second
+
+obs-second      =       [CFWS] 2DIGIT [CFWS]
+
+zone            =       (( "+" / "-" ) 4DIGIT) / obs-zone
+
+obs-zone        =       "UT" / "GMT" /          ; Universal Time
+                                                ; North American UT
+                                                ; offsets
+                        "EST" / "EDT" /         ; Eastern:  - 5/ - 4
+                        "CST" / "CDT" /         ; Central:  - 6/ - 5
+                        "MST" / "MDT" /         ; Mountain: - 7/ - 6
+                        "PST" / "PDT" /         ; Pacific:  - 8/ - 7
+
+                        %d65-73 /               ; Military zones - "A"
+                        %d75-90 /               ; through "I" and "K"
+                        %d97-105 /              ; through "Z", both
+                        %d107-122               ; upper and lower case
+*/
+
+uschar *c;
+
+str=skip_comment(str);
+if ((str=parse_number(str,h,2))==NULL) return NULL;
+str=skip_comment(str);
+if (*str!=':') return NULL;
+++str;
+str=skip_comment(str);
+if ((str=parse_number(str,m,2))==NULL) return NULL;
+c=skip_comment(str);
+if (*str==':')
+  {
+  ++str;
+  str=skip_comment(str);
+  if ((str=parse_number(str,s,2))==NULL) return NULL;
+  c=skip_comment(str);
+  }
+if (c==str) return NULL;
+else str=c;
+if (*str=='+' || *str=='-')
+  {
+  int neg;
+
+  neg=(*str=='-');
+  ++str;
+  if ((str=parse_number(str,z,4))==NULL) return NULL;
+  *z=(*z/100)*3600+(*z%100)*60;
+  if (neg) *z=-*z;
+  }
+else
+  {
+  char zone[5];
+  struct { const char *name; int off; } zone_name[10]=
+  { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}};
+  int i,j;
+
+  for (i=0; i<4; ++i)
+    {
+    zone[i]=tolower(*(str+i));
+    if (zone[i]<'a' || zone[i]>'z') break;
+    }
+  zone[i]='\0';
+  for (j=0; j<10 && strcmp(zone,zone_name[j].name); ++j);
+  /* Besides zones named in the grammar, RFC 2822 says other alphabetic */
+  /* time zones should be treated as unknown offsets. */
+  if (j<10)
+    {
+    *z=zone_name[j].off*3600;
+    str+=i;
+    }
+  else if (zone[0]<'a' || zone[1]>'z') return 0;
+  else
+    {
+    while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str;
+    *z=0;
+    }
+  }
+return str;
+}
+
+
+/*************************************************
+*          Parse a RFC 2822 date-time            *
+*************************************************/
+
+/* Parse a RFC 2822 date-time and return it in seconds since the epoch.
+
+Arguments:
+  str          pointer to the start of the date-time
+  t            pointer to the parsed time
+
+Returns:       points after the processed date-time or NULL on error
+*/
+
+uschar *
+parse_date_time(uschar *str, time_t *t)
+{
+/*
+date-time       =       [ day-of-week "," ] date FWS time [CFWS]
+*/
+
+struct tm tm;
+int zone;
+extern char **environ;
+char **old_environ;
+static char gmt0[]="TZ=GMT0";
+static char *gmt_env[]={ gmt0, (char*)0 };
+uschar *try;
+
+if ((try=parse_day_of_week(str)))
+  {
+  str=try;
+  if (*str!=',') return 0;
+  ++str;
+  }
+if ((str=parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year))==NULL) return NULL;
+if (*str!=' ' && *str!='\t') return NULL;
+while (*str==' ' || *str=='\t') ++str;
+if ((str=parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone))==NULL) return NULL;
+tm.tm_isdst=0;
+old_environ=environ;
+environ=gmt_env;
+*t=mktime(&tm);
+environ=old_environ;
+if (*t==-1) return NULL;
+*t-=zone;
+str=skip_comment(str);
+return str;
+}
+
+
 
 
 /*************************************************