testsuite output changes resulting

[exim.git] / src / src / parse.c
diff --git a/src/src/parse.c b/src/src/parse.c

index 68a83b0e802bc2c6032e302d7e6182766a618668..2b2ffd34129ad0e4aaabaf74ed98b493040f3285 100644 (file)
--- a/src/src/parse.c
+++ b/src/src/parse.c
@@ -2,7 +2,8 @@
  *     Exim - an Internet mail transport agent    *
  *************************************************/
  
-/* Copyright (c) University of Cambridge 1995 - 2017 */
+/* Copyright (c) University of Cambridge 1995 - 2018 */
+/* Copyright (c) The Exim Maintainers 2020 */
  /* See the file NOTICE for conditions of use and distribution. */
  
  /* Functions for parsing addresses */
@@ -23,7 +24,7 @@ redundant apparatus. */
  
  address_item *deliver_make_addr(uschar *address, BOOL copy)
  {
-address_item *addr = store_get(sizeof(address_item));
+address_item *addr = store_get(sizeof(address_item), FALSE);
  addr->next = NULL;
  addr->parent = NULL;
  addr->address = address;
@@ -197,10 +198,10 @@ last_comment_position = s;
  while (*s)
    {
    int c, level;
-  while (isspace(*s)) s++;
-  if (*s != '(') break;
+
+  if (Uskip_whitespace(&s) != '(') break;
    level = 1;
-  while((c = *(++s)) != 0)
+  while((c = *(++s)))
      {
      if (c == '(') level++;
      else if (c == ')') { if (--level <= 0) { s++; break; } }
@@ -421,10 +422,10 @@ for (;;)
    if (*s == '\"')
      {
      *t++ = '\"';
-    while ((c = *(++s)) != 0 && c != '\"')
+    while ((c = *++s) && c != '\"')
        {
        *t++ = c;
-      if (c == '\\' && s[1] != 0) *t++ = *(++s);
+      if (c == '\\' && s[1]) *t++ = *++s;
        }
      if (c == '\"')
        {
@@ -443,7 +444,7 @@ for (;;)
    else while (!mac_iscntrl_or_special(*s) || *s == '\\')
      {
      c = *t++ = *s++;
-    if (c == '\\' && *s != 0) *t++ = *s++;
+    if (c == '\\' && *s) *t++ = *s++;
      }
  
    /* Terminate the word and skip subsequent comment */
@@ -618,7 +619,7 @@ uschar *
  parse_extract_address(uschar *mailbox, uschar **errorptr, int *start, int *end,
    int *domain, BOOL allow_null)
  {
-uschar *yield = store_get(Ustrlen(mailbox) + 1);
+uschar *yield = store_get(Ustrlen(mailbox) + 1, is_tainted(mailbox));
  uschar *startptr, *endptr;
  uschar *s = US mailbox;
  uschar *t = US yield;
@@ -638,7 +639,7 @@ RESTART:   /* Come back here after passing a group name */
  s = skip_comment(s);
  startptr = s;                                 /* In case addr-spec */
  s = read_local_part(s, t, errorptr, TRUE);    /* Dot separated words */
-if (*errorptr != NULL) goto PARSE_FAILED;
+if (*errorptr) goto PARSE_FAILED;
  
  /* If the terminator is neither < nor @ then the format of the address
  must either be a bare local-part (we are now at the end), or a phrase
@@ -658,7 +659,7 @@ if (*s != '@' && *s != '<')
    end of string will produce a null local_part and therefore fail. We don't
    need to keep updating t, as the phrase isn't to be kept. */
  
-  while (*s != '<' && (!parse_allow_group || *s != ':'))
+  while (*s != '<' && (!f.parse_allow_group || *s != ':'))
      {
      s = read_local_part(s, t, errorptr, FALSE);
      if (*errorptr)
@@ -670,8 +671,8 @@ if (*s != '@' && *s != '<')
  
    if (*s == ':')
      {
-    parse_found_group = TRUE;
-    parse_allow_group = FALSE;
+    f.parse_found_group = TRUE;
+    f.parse_allow_group = FALSE;
      s++;
      goto RESTART;
      }
@@ -745,7 +746,7 @@ if (*s == '<')
      *errorptr = s[-1] == 0
        ? US"'>' missing at end of address"
        : string_sprintf("malformed address: %.32s may not follow %.*s",
-         s-1, s - US mailbox - 1, mailbox);
+         s-1, (int)(s - US mailbox - 1), mailbox);
      goto PARSE_FAILED;
      }
  
@@ -790,15 +791,15 @@ move it back past white space if necessary. */
  PARSE_SUCCEEDED:
  if (*s != 0)
    {
-  if (parse_found_group && *s == ';')
+  if (f.parse_found_group && *s == ';')
      {
-    parse_found_group = FALSE;
-    parse_allow_group = TRUE;
+    f.parse_found_group = FALSE;
+    f.parse_allow_group = TRUE;
      }
    else
      {
      *errorptr = string_sprintf("malformed address: %.32s may not follow %.*s",
-      s, s - US mailbox, mailbox);
+      s, (int)(s - US mailbox), mailbox);
      goto PARSE_FAILED;
      }
    }
@@ -824,10 +825,10 @@ We might have an empty address in a group - the caller can choose to ignore
  this. We must, however, keep the flags correct. */
  
  PARSE_FAILED:
-if (parse_found_group && *s == ';')
+if (f.parse_found_group && *s == ';')
    {
-  parse_found_group = FALSE;
-  parse_allow_group = TRUE;
+  f.parse_found_group = FALSE;
+  f.parse_allow_group = TRUE;
    }
  return NULL;
  }
@@ -842,8 +843,7 @@ return NULL;
  
  /* This function is used for quoting text in headers according to RFC 2047.
  If the only characters that strictly need quoting are spaces, we return the
-original string, unmodified. If a quoted string is too long for the buffer, it
-is truncated. (This shouldn't happen: this is normally handling short strings.)
+original string, unmodified.
  
  Hmmph. As always, things get perverted for other uses. This function was
  originally for the "phrase" part of addresses. Now it is being used for much
@@ -855,77 +855,62 @@ Arguments:
                   chars
    len          the length of the string
    charset      the name of the character set; NULL => iso-8859-1
-  buffer       the buffer to put the answer in
-  buffer_size  the size of the buffer
    fold         if TRUE, a newline is inserted before the separating space when
                   more than one encoded-word is generated
  
  Returns:       pointer to the original string, if no quoting needed, or
-               pointer to buffer containing the quoted string, or
-               a pointer to "String too long" if the buffer can't even hold
-               the introduction
+               pointer to allocated memory containing the quoted string
  */
  
  const uschar *
-parse_quote_2047(const uschar *string, int len, uschar *charset, uschar *buffer,
-  int buffer_size, BOOL fold)
+parse_quote_2047(const uschar *string, int len, uschar *charset, BOOL fold)
  {
-const uschar *s = string;
-uschar *p, *t;
-int hlen;
+const uschar * s = string;
+int hlen, l;
  BOOL coded = FALSE;
  BOOL first_byte = FALSE;
+gstring * g =
+  string_fmt_append(NULL, "=?%s?Q?", charset ? charset : US"iso-8859-1");
  
-if (charset == NULL) charset = US"iso-8859-1";
-
-/* We don't expect this to fail! */
-
-if (!string_format(buffer, buffer_size, "=?%s?Q?", charset))
-  return US"String too long";
-
-hlen = Ustrlen(buffer);
-t = buffer + hlen;
-p = buffer;
+hlen = l = g->ptr;
  
-for (; len > 0; len--)
+for (s = string; len > 0; s++, len--)
    {
-  int ch = *s++;
-  if (t > buffer + buffer_size - hlen - 8) break;
+  int ch = *s;
  
-  if ((t - p > 67) && !first_byte)
+  if (g->ptr - l > 67 && !first_byte)
      {
-    *t++ = '?';
-    *t++ = '=';
-    if (fold) *t++ = '\n';
-    *t++ = ' ';
-    p = t;
-    Ustrncpy(p, buffer, hlen);
-    t += hlen;
+    g = fold ? string_catn(g, US"?=\n ", 4) : string_catn(g, US"?= ", 3);
+    l = g->ptr;
+    g = string_catn(g, g->s, hlen);
      }
  
-  if (ch < 33 || ch > 126 ||
-      Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
+  if (  ch < 33 || ch > 126
+     || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
      {
      if (ch == ' ')
        {
-      *t++ = '_';
+      g = string_catn(g, US"_", 1);
        first_byte = FALSE;
        }
      else
        {
-      t += sprintf(CS t, "=%02X", ch);
+      g = string_fmt_append(g, "=%02X", ch);
        coded = TRUE;
        first_byte = !first_byte;
        }
      }
-  else { *t++ = ch; first_byte = FALSE; }
+  else
+    { g = string_catn(g, s, 1); first_byte = FALSE; }
    }
  
-*t++ = '?';
-*t++ = '=';
-*t = 0;
+if (coded)
+  string = string_from_gstring(g = string_catn(g, US"?=", 2));
+else
+  g->ptr = -1;
  
-return coded? buffer : string;
+gstring_release_unused(g);
+return string;
  }
  
  
@@ -968,32 +953,25 @@ August 2000: Additional code added:
    We *could* use this for all cases, getting rid of the messy original code,
    but leave it for now. It would complicate simple cases like "John Q. Smith".
  
-The result is passed back in the buffer; it is usually going to be added to
-some other string. In order to be sure there is going to be no overflow,
-restrict the length of the input to 1/4 of the buffer size - this allows for
-every single character to be quoted or encoded without overflowing, and that
-wouldn't happen because of amalgamation. If the phrase is too long, return a
-fixed string.
+The result is passed back in allocated memory.
  
  Arguments:
    phrase       an RFC822 phrase
    len          the length of the phrase
-  buffer       a buffer to put the result in
-  buffer_size  the size of the buffer
  
  Returns:       the fixed RFC822 phrase
  */
  
  const uschar *
-parse_fix_phrase(const uschar *phrase, int len, uschar *buffer, int buffer_size)
+parse_fix_phrase(const uschar *phrase, int len)
  {
  int ch, i;
  BOOL quoted = FALSE;
  const uschar *s, *end;
+uschar * buffer;
  uschar *t, *yield;
  
  while (len > 0 && isspace(*phrase)) { phrase++; len--; }
-if (len > buffer_size/4) return US"Name too long";
  
  /* See if there are any non-printing characters, and if so, use the RFC 2047
  encoding for the whole thing. */
@@ -1001,11 +979,13 @@ encoding for the whole thing. */
  for (i = 0, s = phrase; i < len; i++, s++)
    if ((*s < 32 && *s != '\t') || *s > 126) break;
  
-if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
-  buffer_size, FALSE);
+if (i < len)
+  return parse_quote_2047(phrase, len, headers_charset, FALSE);
  
  /* No non-printers; use the RFC 822 quoting rules */
  
+buffer = store_get(len*4, is_tainted(phrase));
+
  s = phrase;
  end = s + len;
  yield = t = buffer + 1;
@@ -1172,6 +1152,7 @@ while (s < end)
    }
  
  *t = 0;
+store_release_above(t+1);
  return yield;
  }
  
@@ -1277,10 +1258,10 @@ for (;;)
    However, if the list is empty only because syntax errors were skipped, we
    return FF_DELIVERED. */
  
-  if (*s == 0)
+  if (!*s)
      {
-    return (count > 0 || (syntax_errors != NULL && *syntax_errors != NULL))?
-      FF_DELIVERED : FF_NOTDELIVERED;
+    return (count > 0 || (syntax_errors && *syntax_errors))
+      ?  FF_DELIVERED : FF_NOTDELIVERED;
  
      /* This previous code returns FF_ERROR if nothing is generated but a
      syntax error has been skipped. I now think it is the wrong approach, but
@@ -1396,7 +1377,7 @@ for (;;)
  
      if (flen <= 0)
        {
-      *error = string_sprintf("file name missing after :include:");
+      *error = US"file name missing after :include:";
        return FF_ERROR;
        }
  
@@ -1411,7 +1392,7 @@ for (;;)
  
      /* Insist on absolute path */
  
-    if (filename[0]!= '/')
+    if (filename[0] != '/')
        {
        *error = string_sprintf("included file \"%s\" is not an absolute path",
          filename);
@@ -1420,12 +1401,19 @@ for (;;)
  
      /* Check if include is permitted */
  
-    if ((options & RDO_INCLUDE) != 0)
+    if (options & RDO_INCLUDE)
        {
        *error = US"included files not permitted";
        return FF_ERROR;
        }
  
+    if (is_tainted(filename))
+      {
+      *error = string_sprintf("Tainted name '%s' for included file  not permitted\n",
+       filename);
+      return FF_ERROR;
+      }
+
      /* Check file name if required */
  
      if (directory)
@@ -1447,7 +1435,7 @@ for (;;)
        with a flag that fails symlinks. */
  
        {
-      int fd = open(CS directory, O_RDONLY);
+      int fd = exim_open2(CS directory, O_RDONLY);
        if (fd < 0)
         {
         *error = string_sprintf("failed to open directory %s", directory);
@@ -1463,7 +1451,7 @@ for (;;)
         temp = *p;
         *p = '\0';
  
-       fd2 = openat(fd, CS q, O_RDONLY|O_NOFOLLOW);
+       fd2 = exim_openat(fd, CS q, O_RDONLY|O_NOFOLLOW);
         close(fd);
         *p = temp;
         if (fd2 < 0)
@@ -1517,7 +1505,7 @@ for (;;)
  
      if (!f)
        {
-      *error = string_open_failed(errno, "included file %s", filename);
+      *error = string_open_failed("included file %s", filename);
        return FF_INCLUDEFAIL;
        }
  
@@ -1547,7 +1535,7 @@ for (;;)
        return FF_ERROR;
        }
  
-    filebuf = store_get(statbuf.st_size + 1);
+    filebuf = store_get(statbuf.st_size + 1, is_tainted(filename));
      if (fread(filebuf, 1, statbuf.st_size, f) != statbuf.st_size)
        {
        *error = string_sprintf("error while reading included file %s: %s",
@@ -1607,14 +1595,14 @@ for (;;)
        {
        recipient =
          parse_extract_address(s+1, error, &start, &end, &domain, FALSE);
-      if (recipient != NULL)
-        recipient = (domain != 0)? NULL :
+      if (recipient)
+        recipient = domain != 0 ? NULL :
            string_sprintf("%s@%s", recipient, incoming_domain);
        }
  
      /* Try parsing the item as an address. */
  
-    if (recipient == NULL) recipient =
+    if (!recipient) recipient =
        parse_extract_address(s, error, &start, &end, &domain, FALSE);
  
      /* If item starts with / or | and is not a valid address, or there
@@ -1623,7 +1611,7 @@ for (;;)
  
      if ((*s == '|' || *s == '/') && (recipient == NULL || domain == 0))
        {
-      uschar *t = store_get(Ustrlen(s) + 1);
+      uschar *t = store_get(Ustrlen(s) + 1, is_tainted(s));
        uschar *p = t;
        uschar *q = s;
        while (*q != 0)
@@ -1662,7 +1650,7 @@ for (;;)
  
          if (syntax_errors != NULL)
            {
-          error_block *e = store_get(sizeof(error_block));
+          error_block *e = store_get(sizeof(error_block), FALSE);
            error_block *last = *syntax_errors;
            if (last == NULL) *syntax_errors = e; else
              {
@@ -1730,6 +1718,7 @@ parse_message_id(uschar *str, uschar **yield, uschar **error)
  {
  uschar *domain = NULL;
  uschar *id;
+rmark reset_point;
  
  str = skip_comment(str);
  if (*str != '<')
@@ -1742,27 +1731,28 @@ if (*str != '<')
  for the answer, but it may also be very long if we are processing a header
  line. Therefore, take care to release unwanted store afterwards. */
  
-id = *yield = store_get(Ustrlen(str) + 1);
+reset_point = store_mark();
+id = *yield = store_get(Ustrlen(str) + 1, is_tainted(str));
  *id++ = *str++;
  
  str = read_addr_spec(str, id, '>', error, &domain);
  
-if (*error == NULL)
+if (!*error)
    {
    if (*str != '>') *error = US"Missing '>' after message-id";
      else if (domain == NULL) *error = US"domain missing in message-id";
    }
  
-if (*error != NULL)
+if (*error)
    {
-  store_reset(*yield);
+  store_reset(reset_point);
    return NULL;
    }
  
-while (*id != 0) id++;
+while (*id) id++;
  *id++ = *str++;
  *id++ = 0;
-store_reset(id);
+store_release_above(id);
  
  str = skip_comment(str);
  return str;
@@ -2092,7 +2082,6 @@ int main(void)
  {
  int start, end, domain;
  uschar buffer[1024];
-uschar outbuff[1024];
  
  big_buffer = store_malloc(big_buffer_size);
  
@@ -2105,8 +2094,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
    {
    buffer[Ustrlen(buffer)-1] = 0;
    if (buffer[0] == 0) break;
-  printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer), outbuff,
-    sizeof(outbuff)));
+  printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer)));
    }
  
  printf("Testing parse_extract_address without group syntax and without UTF-8\n");
@@ -2118,7 +2106,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
    buffer[Ustrlen(buffer) - 1] = 0;
    if (buffer[0] == 0) break;
    out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
-  if (out == NULL) printf("*** bad address: %s\n", errmess); else
+  if (!out)
+    printf("*** bad address: %s\n", errmess);
+  else
      {
      uschar extract[1024];
      Ustrncpy(extract, buffer+start, end-start);
@@ -2137,7 +2127,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
    buffer[Ustrlen(buffer) - 1] = 0;
    if (buffer[0] == 0) break;
    out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
-  if (out == NULL) printf("*** bad address: %s\n", errmess); else
+  if (!out)
+    printf("*** bad address: %s\n", errmess);
+  else
      {
      uschar extract[1024];
      Ustrncpy(extract, buffer+start, end-start);
@@ -2149,7 +2141,7 @@ allow_utf8_domains = FALSE;
  
  printf("Testing parse_extract_address with group syntax\n");
  
-parse_allow_group = TRUE;
+f.parse_allow_group = TRUE;
  while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
    {
    uschar *out;
@@ -2158,7 +2150,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
    buffer[Ustrlen(buffer) - 1] = 0;
    if (buffer[0] == 0) break;
    s = buffer;
-  while (*s != 0)
+  while (*s)
      {
      uschar *ss = parse_find_address_end(s, FALSE);
      int terminator = *ss;
@@ -2166,7 +2158,9 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
      out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
      *ss = terminator;
  
-    if (out == NULL) printf("*** bad address: %s\n", errmess); else
+    if (!out)
+      printf("*** bad address: %s\n", errmess);
+    else
        {
        uschar extract[1024];
        Ustrncpy(extract, buffer+start, end-start);
@@ -2175,7 +2169,7 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
        }
  
      s = ss + (terminator? 1:0);
-    while (isspace(*s)) s++;
+    Uskip_whitespace(&s);
      }
    }