CVE-2020-28016: Heap out-of-bounds write in parse_fix_phrase()

[exim.git] / src / src / parse.c
diff --git a/src/src/parse.c b/src/src/parse.c

index 5d50d6862181876289bec20ebcf38b5c642abaca..086b010c3f2675f0ae7c5ffdefe0c577e45df857 100644 (file)
--- a/src/src/parse.c
+++ b/src/src/parse.c
@@ -3,6 +3,7 @@
  *************************************************/
  
  /* Copyright (c) University of Cambridge 1995 - 2018 */
+/* Copyright (c) The Exim Maintainers 2020 */
  /* See the file NOTICE for conditions of use and distribution. */
  
  /* Functions for parsing addresses */
@@ -11,7 +12,7 @@
  #include "exim.h"
  
  
-static uschar *last_comment_position;
+static const uschar *last_comment_position;
  
  
  
@@ -142,21 +143,21 @@ Argument:  pointer to an address, possibly unqualified
  Returns:   pointer to the last @ in an address, or NULL if none
  */
  
-uschar *
-parse_find_at(uschar *s)
+const uschar *
+parse_find_at(const uschar *s)
  {
-uschar *t = s + Ustrlen(s);
+const uschar * t = s + Ustrlen(s);
  while (--t >= s)
-  {
    if (*t == '@')
      {
      int backslash_count = 0;
-    uschar *tt = t - 1;
+    const uschar *tt = t - 1;
      while (tt > s && *tt-- == '\\') backslash_count++;
      if ((backslash_count & 1) == 0) return t;
      }
-  else if (*t == '\"') return NULL;
-  }
+  else if (*t == '\"')
+    return NULL;
+
  return NULL;
  }
  
@@ -190,17 +191,17 @@ Argument: current character pointer
  Returns:  new character pointer
  */
  
-static uschar *
-skip_comment(uschar *s)
+static const uschar *
+skip_comment(const uschar *s)
  {
  last_comment_position = s;
  while (*s)
    {
    int c, level;
-  while (isspace(*s)) s++;
-  if (*s != '(') break;
+
+  if (Uskip_whitespace(&s) != '(') break;
    level = 1;
-  while((c = *(++s)) != 0)
+  while((c = *(++s)))
      {
      if (c == '(') level++;
      else if (c == ')') { if (--level <= 0) { s++; break; } }
@@ -231,8 +232,8 @@ Arguments:
  Returns:     new character pointer
  */
  
-static uschar *
-read_domain(uschar *s, uschar *t, uschar **errorptr)
+static const uschar *
+read_domain(const uschar *s, uschar *t, uschar **errorptr)
  {
  uschar *tt = t;
  s = skip_comment(s);
@@ -405,8 +406,8 @@ Arguments:
  Returns:   new character pointer
  */
  
-static uschar *
-read_local_part(uschar *s, uschar *t, uschar **error, BOOL allow_null)
+static const uschar *
+read_local_part(const uschar *s, uschar *t, uschar **error, BOOL allow_null)
  {
  uschar *tt = t;
  *error = NULL;
@@ -490,8 +491,8 @@ Arguments:
  Returns:     new character pointer
  */
  
-static uschar *
-read_route(uschar *s, uschar *t, uschar **errorptr)
+static const uschar *
+read_route(const uschar *s, uschar *t, uschar **errorptr)
  {
  BOOL commas = FALSE;
  *errorptr = NULL;
@@ -544,8 +545,8 @@ Arguments:
  Returns:     new character pointer
  */
  
-static uschar *
-read_addr_spec(uschar *s, uschar *t, int term, uschar **errorptr,
+static const uschar *
+read_addr_spec(const uschar *s, uschar *t, int term, uschar **errorptr,
    uschar **domainptr)
  {
  s = read_local_part(s, t, errorptr, FALSE);
@@ -615,12 +616,12 @@ Returns:      points to the extracted address, or NULL on error
  #define FAILED(s) { *errorptr = s; goto PARSE_FAILED; }
  
  uschar *
-parse_extract_address(uschar *mailbox, uschar **errorptr, int *start, int *end,
+parse_extract_address(const uschar *mailbox, uschar **errorptr, int *start, int *end,
    int *domain, BOOL allow_null)
  {
  uschar *yield = store_get(Ustrlen(mailbox) + 1, is_tainted(mailbox));
-uschar *startptr, *endptr;
-uschar *s = US mailbox;
+const uschar *startptr, *endptr;
+const uschar *s = US mailbox;
  uschar *t = US yield;
  
  *domain = 0;
@@ -807,11 +808,11 @@ while (isspace(endptr[-1])) endptr--;
  *end = endptr - US mailbox;
  
  /* Although this code has no limitation on the length of address extracted,
-other parts of Exim may have limits, and in any case, RFC 2821 limits local
-parts to 64 and domains to 255, so we do a check here, giving an error if the
-address is ridiculously long. */
+other parts of Exim may have limits, and in any case, RFC 5321 limits email
+addresses to 256, so we do a check here, giving an error if the address is
+ridiculously long. */
  
-if (*end - *start > ADDRESS_MAXLENGTH)
+if (*end - *start > EXIM_EMAILADDR_MAX)
    {
    *errorptr = string_sprintf("address is ridiculously long: %.64s...", yield);
    return NULL;
@@ -842,8 +843,7 @@ return NULL;
  
  /* This function is used for quoting text in headers according to RFC 2047.
  If the only characters that strictly need quoting are spaces, we return the
-original string, unmodified. If a quoted string is too long for the buffer, it
-is truncated. (This shouldn't happen: this is normally handling short strings.)
+original string, unmodified.
  
  Hmmph. As always, things get perverted for other uses. This function was
  originally for the "phrase" part of addresses. Now it is being used for much
@@ -855,77 +855,57 @@ Arguments:
                   chars
    len          the length of the string
    charset      the name of the character set; NULL => iso-8859-1
-  buffer       the buffer to put the answer in
-  buffer_size  the size of the buffer
    fold         if TRUE, a newline is inserted before the separating space when
                   more than one encoded-word is generated
  
  Returns:       pointer to the original string, if no quoting needed, or
-               pointer to buffer containing the quoted string, or
-               a pointer to "String too long" if the buffer can't even hold
-               the introduction
+               pointer to allocated memory containing the quoted string
  */
  
  const uschar *
-parse_quote_2047(const uschar *string, int len, uschar *charset, uschar *buffer,
-  int buffer_size, BOOL fold)
+parse_quote_2047(const uschar *string, int len, uschar *charset, BOOL fold)
  {
-const uschar *s = string;
-uschar *p, *t;
-int hlen;
+const uschar * s = string;
+int hlen, l;
  BOOL coded = FALSE;
  BOOL first_byte = FALSE;
+gstring * g =
+  string_fmt_append(NULL, "=?%s?Q?", charset ? charset : US"iso-8859-1");
  
-if (!charset) charset = US"iso-8859-1";
-
-/* We don't expect this to fail! */
-
-if (!string_format(buffer, buffer_size, "=?%s?Q?", charset))
-  return US"String too long";
-
-hlen = Ustrlen(buffer);
-t = buffer + hlen;
-p = buffer;
+hlen = l = g->ptr;
  
-for (; len > 0; len--)
+for (s = string; len > 0; s++, len--)
    {
-  int ch = *s++;
-  if (t > buffer + buffer_size - hlen - 8) break;
+  int ch = *s;
  
-  if ((t - p > 67) && !first_byte)
+  if (g->ptr - l > 67 && !first_byte)
      {
-    *t++ = '?';
-    *t++ = '=';
-    if (fold) *t++ = '\n';
-    *t++ = ' ';
-    p = t;
-    Ustrncpy(p, buffer, hlen);
-    t += hlen;
+    g = fold ? string_catn(g, US"?=\n ", 4) : string_catn(g, US"?= ", 3);
+    l = g->ptr;
+    g = string_catn(g, g->s, hlen);
      }
  
-  if (ch < 33 || ch > 126 ||
-      Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
+  if (  ch < 33 || ch > 126
+     || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
      {
      if (ch == ' ')
        {
-      *t++ = '_';
+      g = string_catn(g, US"_", 1);
        first_byte = FALSE;
        }
      else
        {
-      t += sprintf(CS t, "=%02X", ch);
+      g = string_fmt_append(g, "=%02X", ch);
        coded = TRUE;
        first_byte = !first_byte;
        }
      }
-  else { *t++ = ch; first_byte = FALSE; }
+  else
+    { g = string_catn(g, s, 1); first_byte = FALSE; }
    }
  
-*t++ = '?';
-*t++ = '=';
-*t = 0;
-
-return coded ? buffer : string;
+g = string_catn(g, US"?=", 2);
+return coded ? string_from_gstring(g) : string;
  }
  
  
@@ -968,32 +948,25 @@ August 2000: Additional code added:
    We *could* use this for all cases, getting rid of the messy original code,
    but leave it for now. It would complicate simple cases like "John Q. Smith".
  
-The result is passed back in the buffer; it is usually going to be added to
-some other string. In order to be sure there is going to be no overflow,
-restrict the length of the input to 1/4 of the buffer size - this allows for
-every single character to be quoted or encoded without overflowing, and that
-wouldn't happen because of amalgamation. If the phrase is too long, return a
-fixed string.
+The result is passed back in allocated memory.
  
  Arguments:
    phrase       an RFC822 phrase
    len          the length of the phrase
-  buffer       a buffer to put the result in
-  buffer_size  the size of the buffer
  
  Returns:       the fixed RFC822 phrase
  */
  
  const uschar *
-parse_fix_phrase(const uschar *phrase, int len, uschar *buffer, int buffer_size)
+parse_fix_phrase(const uschar *phrase, int len)
  {
  int ch, i;
  BOOL quoted = FALSE;
  const uschar *s, *end;
+uschar * buffer;
  uschar *t, *yield;
  
  while (len > 0 && isspace(*phrase)) { phrase++; len--; }
-if (len > buffer_size/4) return US"Name too long";
  
  /* See if there are any non-printing characters, and if so, use the RFC 2047
  encoding for the whole thing. */
@@ -1001,11 +974,18 @@ encoding for the whole thing. */
  for (i = 0, s = phrase; i < len; i++, s++)
    if ((*s < 32 && *s != '\t') || *s > 126) break;
  
-if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
-  buffer_size, FALSE);
+if (i < len)
+  return parse_quote_2047(phrase, len, headers_charset, FALSE);
  
  /* No non-printers; use the RFC 822 quoting rules */
  
+if (len <= 0 || len >= INT_MAX/4)
+  {
+  return string_copy_taint(CUS"", is_tainted(phrase));
+  }
+
+buffer = store_get((len+1)*4, is_tainted(phrase));
+
  s = phrase;
  end = s + len;
  yield = t = buffer + 1;
@@ -1149,9 +1129,12 @@ while (s < end)
              {
              if (ss >= end) ss--;
              *t++ = '(';
-            Ustrncpy(t, s, ss-s);
-            t += ss-s;
-            s = ss;
+            if (ss > s)
+              {
+              Ustrncpy(t, s, ss-s);
+              t += ss-s;
+              s = ss;
+              }
              }
            }
  
@@ -1172,6 +1155,7 @@ while (s < end)
    }
  
  *t = 0;
+store_release_above(t+1);
  return yield;
  }
  
@@ -1601,7 +1585,7 @@ for (;;)
    else
      {
      int start, end, domain;
-    uschar *recipient = NULL;
+    const uschar *recipient = NULL;
      int save = s[len];
      s[len] = 0;
  
@@ -1697,8 +1681,8 @@ for (;;)
        recipient = ((options & RDO_REWRITE) != 0)?
          rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
            rewrite_existflags) :
-        rewrite_address_qualify(recipient, TRUE);
-      addr = deliver_make_addr(recipient, TRUE);  /* TRUE => copy recipient */
+        rewrite_address_qualify(recipient, TRUE);      /*XXX loses track of const */
+      addr = deliver_make_addr(US recipient, TRUE);  /* TRUE => copy recipient, so deconst ok */
        }
  
      /* Restore the final character in the original data, and add to the
@@ -1732,8 +1716,8 @@ Arguments:
  Returns:       points after the processed message-id or NULL on error
  */
  
-uschar *
-parse_message_id(uschar *str, uschar **yield, uschar **error)
+const uschar *
+parse_message_id(const uschar *str, uschar **yield, uschar **error)
  {
  uschar *domain = NULL;
  uschar *id;
@@ -1773,8 +1757,7 @@ while (*id) id++;
  *id++ = 0;
  store_release_above(id);
  
-str = skip_comment(str);
-return str;
+return skip_comment(str);
  }
  
  
@@ -1792,16 +1775,16 @@ Arguments:
  Returns:       points after the processed date or NULL on error
  */
  
-static uschar *
-parse_number(uschar *str, int *n, int digits)
+static const uschar *
+parse_number(const uschar *str, int *n, int digits)
  {
-  *n=0;
-  while (digits--)
+*n=0;
+while (digits--)
    {
-    if (*str<'0' || *str>'9') return NULL;
-    *n=10*(*n)+(*str++-'0');
+  if (*str<'0' || *str>'9') return NULL;
+  *n=10*(*n)+(*str++-'0');
    }
-  return str;
+return str;
  }
  
  
@@ -1818,8 +1801,8 @@ Arguments:
  Returns:       points after the parsed day or NULL on error
  */
  
-static uschar *
-parse_day_of_week(uschar *str)
+static const uschar *
+parse_day_of_week(const uschar * str)
  {
  /*
  day-of-week     =       ([FWS] day-name) / obs-day-of-week
@@ -1834,17 +1817,16 @@ static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri",
  int i;
  uschar day[4];
  
-str=skip_comment(str);
-for (i=0; i<3; ++i)
+str = skip_comment(str);
+for (i = 0; i < 3; ++i)
    {
-  if ((day[i]=tolower(*str))=='\0') return NULL;
+  if ((day[i] = tolower(*str)) == '\0') return NULL;
    ++str;
    }
-day[3]='\0';
-for (i=0; i<7; ++i) if (Ustrcmp(day,day_name[i])==0) break;
-if (i==7) return NULL;
-str=skip_comment(str);
-return str;
+day[3] = '\0';
+for (i = 0; i<7; ++i) if (Ustrcmp(day,day_name[i]) == 0) break;
+if (i == 7) return NULL;
+return skip_comment(str);
  }
  
  
@@ -1864,8 +1846,8 @@ Arguments:
  Returns:       points after the processed date or NULL on error
  */
  
-static uschar *
-parse_date(uschar *str, int *d, int *m, int *y)
+static const uschar *
+parse_date(const uschar *str, int *d, int *m, int *y)
  {
  /*
  date            =       day month year
@@ -1887,36 +1869,39 @@ day             =       ([FWS] 1*2DIGIT) / obs-day
  obs-day         =       [CFWS] 1*2DIGIT [CFWS]
  */
  
-uschar *c,*n;
+const uschar * s, * n;
  static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" };
  int i;
  uschar month[4];
  
-str=skip_comment(str);
-if ((str=parse_number(str,d,1))==NULL) return NULL;
-if (*str>='0' && *str<='9') *d=10*(*d)+(*str++-'0');
-c=skip_comment(str);
-if (c==str) return NULL;
-else str=c;
-for (i=0; i<3; ++i) if ((month[i]=tolower(*(str+i)))=='\0') return NULL;
-month[3]='\0';
-for (i=0; i<12; ++i) if (Ustrcmp(month,month_name[i])==0) break;
-if (i==12) return NULL;
+str = skip_comment(str);
+if ((str = parse_number(str,d,1)) == NULL) return NULL;
+
+if (*str>='0' && *str<='9') *d = 10*(*d)+(*str++-'0');
+s = skip_comment(str);
+if (s == str) return NULL;
+str = s;
+
+for (i = 0; i<3; ++i) if ((month[i]=tolower(*(str+i))) == '\0') return NULL;
+month[3] = '\0';
+for (i = 0; i<12; ++i) if (Ustrcmp(month,month_name[i]) == 0) break;
+if (i == 12) return NULL;
  str+=3;
-*m=i;
-c=skip_comment(str);
-if (c==str) return NULL;
-else str=c;
-if ((n=parse_number(str,y,4)))
+*m = i;
+s = skip_comment(str);
+if (s == str) return NULL;
+str=s;
+
+if ((n = parse_number(str,y,4)))
    {
-  str=n;
+  str = n;
    if (*y<1900) return NULL;
-  *y=*y-1900;
+  *y = *y-1900;
    }
-else if ((n=parse_number(str,y,2)))
+else if ((n = parse_number(str,y,2)))
    {
-  str=skip_comment(n);
-  while (*(str-1)==' ' || *(str-1)=='\t') --str; /* match last FWS later */
+  str = skip_comment(n);
+  while (*(str-1) == ' ' || *(str-1) == '\t') --str; /* match last FWS later */
    if (*y<50) *y+=100;
    }
  else return NULL;
@@ -1941,8 +1926,8 @@ Arguments:
  Returns:       points after the processed time or NULL on error
  */
  
-static uschar *
-parse_time(uschar *str, int *h, int *m, int *s, int *z)
+static const uschar *
+parse_time(const uschar *str, int *h, int *m, int *s, int *z)
  {
  /*
  time            =       time-of-day FWS zone
@@ -1977,61 +1962,61 @@ obs-zone        =       "UT" / "GMT" /          ; Universal Time
                          %d107-122               ; upper and lower case
  */
  
-uschar *c;
+const uschar * c;
  
-str=skip_comment(str);
-if ((str=parse_number(str,h,2))==NULL) return NULL;
-str=skip_comment(str);
+str = skip_comment(str);
+if ((str = parse_number(str,h,2)) == NULL) return NULL;
+str = skip_comment(str);
  if (*str!=':') return NULL;
  ++str;
-str=skip_comment(str);
-if ((str=parse_number(str,m,2))==NULL) return NULL;
-c=skip_comment(str);
-if (*str==':')
+str = skip_comment(str);
+if ((str = parse_number(str,m,2)) == NULL) return NULL;
+c = skip_comment(str);
+if (*str == ':')
    {
    ++str;
-  str=skip_comment(str);
-  if ((str=parse_number(str,s,2))==NULL) return NULL;
-  c=skip_comment(str);
+  str = skip_comment(str);
+  if ((str = parse_number(str,s,2)) == NULL) return NULL;
+  c = skip_comment(str);
    }
-if (c==str) return NULL;
+if (c == str) return NULL;
  else str=c;
-if (*str=='+' || *str=='-')
+if (*str == '+' || *str == '-')
    {
    int neg;
  
-  neg=(*str=='-');
+  neg = (*str == '-');
    ++str;
-  if ((str=parse_number(str,z,4))==NULL) return NULL;
-  *z=(*z/100)*3600+(*z%100)*60;
-  if (neg) *z=-*z;
+  if ((str = parse_number(str,z,4)) == NULL) return NULL;
+  *z = (*z/100)*3600+(*z%100)*60;
+  if (neg) *z = -*z;
    }
  else
    {
    char zone[5];
-  struct { const char *name; int off; } zone_name[10]=
+  struct { const char *name; int off; } zone_name[10] =
    { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}};
    int i,j;
  
-  for (i=0; i<4; ++i)
+  for (i = 0; i<4; ++i)
      {
-    zone[i]=tolower(*(str+i));
+    zone[i] = tolower(*(str+i));
      if (zone[i]<'a' || zone[i]>'z') break;
      }
-  zone[i]='\0';
-  for (j=0; j<10 && strcmp(zone,zone_name[j].name); ++j);
+  zone[i] = '\0';
+  for (j = 0; j<10 && strcmp(zone,zone_name[j].name); ++j);
    /* Besides zones named in the grammar, RFC 2822 says other alphabetic */
    /* time zones should be treated as unknown offsets. */
    if (j<10)
      {
-    *z=zone_name[j].off*3600;
+    *z = zone_name[j].off*3600;
      str+=i;
      }
    else if (zone[0]<'a' || zone[1]>'z') return 0;
    else
      {
      while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str;
-    *z=0;
+    *z = 0;
      }
    }
  return str;
@@ -2051,8 +2036,8 @@ Arguments:
  Returns:       points after the processed date-time or NULL on error
  */
  
-uschar *
-parse_date_time(uschar *str, time_t *t)
+const uschar *
+parse_date_time(const uschar *str, time_t *t)
  {
  /*
  date-time       =       [ day-of-week "," ] date FWS time [CFWS]
@@ -2064,27 +2049,26 @@ extern char **environ;
  char **old_environ;
  static char gmt0[]="TZ=GMT0";
  static char *gmt_env[]={ gmt0, (char*)0 };
-uschar *try;
+const uschar * try;
  
-if ((try=parse_day_of_week(str)))
+if ((try = parse_day_of_week(str)))
    {
-  str=try;
+  str = try;
    if (*str!=',') return 0;
    ++str;
    }
-if ((str=parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year))==NULL) return NULL;
+if ((str = parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year)) == NULL) return NULL;
  if (*str!=' ' && *str!='\t') return NULL;
-while (*str==' ' || *str=='\t') ++str;
-if ((str=parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone))==NULL) return NULL;
-tm.tm_isdst=0;
-old_environ=environ;
-environ=gmt_env;
-*t=mktime(&tm);
-environ=old_environ;
-if (*t==-1) return NULL;
+while (*str == ' ' || *str == '\t') ++str;
+if ((str = parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone)) == NULL) return NULL;
+tm.tm_isdst = 0;
+old_environ = environ;
+environ = gmt_env;
+*t = mktime(&tm);
+environ = old_environ;
+if (*t == -1) return NULL;
  *t-=zone;
-str=skip_comment(str);
-return str;
+return skip_comment(str);
  }
  
  
@@ -2101,7 +2085,6 @@ int main(void)
  {
  int start, end, domain;
  uschar buffer[1024];
-uschar outbuff[1024];
  
  big_buffer = store_malloc(big_buffer_size);
  
@@ -2114,8 +2097,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
    {
    buffer[Ustrlen(buffer)-1] = 0;
    if (buffer[0] == 0) break;
-  printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer), outbuff,
-    sizeof(outbuff)));
+  printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer)));
    }
  
  printf("Testing parse_extract_address without group syntax and without UTF-8\n");
@@ -2190,7 +2172,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
        }
  
      s = ss + (terminator? 1:0);
-    while (isspace(*s)) s++;
+    Uskip_whitespace(&s);
      }
    }