Docs: tweak list-syntax description

[exim.git] / src / src / parse.c
diff --git a/src/src/parse.c b/src/src/parse.c

index fa339520624f51bb091be5a8cb216b556c669f40..bdba3ecd0cb0be2185485f2099de9184359d05dd 100644 (file)
--- a/src/src/parse.c
+++ b/src/src/parse.c
@@ -2,8 +2,8 @@
  *     Exim - an Internet mail transport agent    *
  *************************************************/
  
+/* Copyright (c) The Exim Maintainers 2020 - 2022 */
  /* Copyright (c) University of Cambridge 1995 - 2018 */
-/* Copyright (c) The Exim Maintainers 2020 */
  /* See the file NOTICE for conditions of use and distribution. */
  
  /* Functions for parsing addresses */
@@ -22,22 +22,25 @@ redundant apparatus. */
  
  #ifdef STAND_ALONE
  
-address_item *deliver_make_addr(uschar *address, BOOL copy)
+address_item *
+deliver_make_addr(uschar *address, BOOL copy)
  {
-address_item *addr = store_get(sizeof(address_item), FALSE);
+address_item *addr = store_get(sizeof(address_item), GET_UNTAINTED);
  addr->next = NULL;
  addr->parent = NULL;
  addr->address = address;
  return addr;
  }
  
-uschar *rewrite_address(uschar *recipient, BOOL dummy1, BOOL dummy2, rewrite_rule
+uschar *
+rewrite_address(uschar *recipient, BOOL dummy1, BOOL dummy2, rewrite_rule
    *dummy3, int dummy4)
  {
  return recipient;
  }
  
-uschar *rewrite_address_qualify(uschar *recipient, BOOL dummy1)
+uschar *
+rewrite_address_qualify(uschar *recipient, BOOL dummy1)
  {
  return recipient;
  }
@@ -65,7 +68,7 @@ Returns:   pointer past the end of the address
  */
  
  uschar *
-parse_find_address_end(uschar *s, BOOL nl_ends)
+parse_find_address_end(const uschar *s, BOOL nl_ends)
  {
  BOOL source_routing = *s == '@';
  int no_term = source_routing? 1 : 0;
@@ -121,7 +124,7 @@ while (*s != 0 && (*s != ',' || no_term > 0) && (*s != '\n' || !nl_ends))
      }
    }
  
-return s;
+return US s;
  }
  
  
@@ -224,16 +227,20 @@ If allow_domain_literals is TRUE, a "domain" may also be an IP address enclosed
  in []. Make sure the output is set to the null string if there is a syntax
  error as well as if there is no domain at all.
  
+Optionally, msg_id domain literals ( printable-ascii enclosed in [] )
+are permitted.
+
  Arguments:
    s          current character pointer
    t          where to put the domain
+  msg_id_literals     flag for relaxed domain-literal processing
    errorptr   put error message here on failure (*t will be 0 on exit)
  
  Returns:     new character pointer
  */
  
  static const uschar *
-read_domain(const uschar *s, uschar *t, uschar **errorptr)
+read_domain(const uschar *s, uschar *t, BOOL msg_id_literals, uschar **errorptr)
  {
  uschar *tt = t;
  s = skip_comment(s);
@@ -259,7 +266,11 @@ if (*s == '[')
      t += 5;
      s += 5;
      }
-  while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
+
+  if (msg_id_literals)
+    while (*s >= 33 && *s <= 90 || *s >= 94 && *s <= 126) *t++ = *s++;
+  else
+    while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
  
    if (*s == ']') *t++ = *s++; else
      {
@@ -267,7 +278,7 @@ if (*s == '[')
      *tt = 0;
      }
  
-  if (!allow_domain_literals)
+  if (!allow_domain_literals && !msg_id_literals)
      {
      *errorptr = US"domain literals not allowed";
      *tt = 0;
@@ -500,7 +511,7 @@ BOOL commas = FALSE;
  while (*s == '@')
    {
    *t++ = '@';
-  s = read_domain(s+1, t, errorptr);
+  s = read_domain(s+1, t, FALSE, errorptr);
    if (*t == 0) return s;
    t += Ustrlen((const uschar *)t);
    if (*s != ',') break;
@@ -559,7 +570,7 @@ if (*errorptr == NULL)
        t += Ustrlen((const uschar *)t);
        *t++ = *s++;
        *domainptr = t;
-      s = read_domain(s, t, errorptr);
+      s = read_domain(s, t, FALSE, errorptr);
        }
  return s;
  }
@@ -619,7 +630,7 @@ uschar *
  parse_extract_address(const uschar *mailbox, uschar **errorptr, int *start, int *end,
    int *domain, BOOL allow_null)
  {
-uschar *yield = store_get(Ustrlen(mailbox) + 1, is_tainted(mailbox));
+uschar * yield = store_get(Ustrlen(mailbox) + 1, mailbox);
  const uschar *startptr, *endptr;
  const uschar *s = US mailbox;
  uschar *t = US yield;
@@ -647,9 +658,9 @@ followed by a route-addr (more words must follow). */
  
  if (*s != '@' && *s != '<')
    {
-  if (*s == 0 || *s == ';')
+  if (!*s || *s == ';')
      {
-    if (*t == 0) FAILED(US"empty address");
+    if (!*t) FAILED(US"empty address");
      endptr = last_comment_position;
      goto PARSE_SUCCEEDED;              /* Bare local part */
      }
@@ -740,7 +751,7 @@ if (*s == '<')
      }
  
    endptr = s;
-  if (*errorptr != NULL) goto PARSE_FAILED;
+  if (*errorptr) goto PARSE_FAILED;
    while (bracket_count-- > 0) if (*s++ != '>')
      {
      *errorptr = s[-1] == 0
@@ -759,14 +770,14 @@ should be the domain. However, for flexibility we allow for a route-address
  not enclosed in <> as well, which is indicated by an empty first local
  part preceding '@'. The source routing is, however, ignored. */
  
-else if (*t == 0)
+else if (!*t)
    {
    uschar *domainptr = yield;
    s = read_route(s, t, errorptr);
-  if (*errorptr != NULL) goto PARSE_FAILED;
+  if (*errorptr) goto PARSE_FAILED;
    *t = 0;         /* Ensure route is ignored - probably overkill */
    s = read_addr_spec(s, t, 0, errorptr, &domainptr);
-  if (*errorptr != NULL) goto PARSE_FAILED;
+  if (*errorptr) goto PARSE_FAILED;
    *domain = domainptr - yield;
    endptr = last_comment_position;
    if (*domain == 0) FAILED(US"domain missing in source-routed address");
@@ -779,8 +790,8 @@ else
    t += Ustrlen((const uschar *)t);
    *t++ = *s++;
    *domain = t - yield;
-  s = read_domain(s, t, errorptr);
-  if (*t == 0) goto PARSE_FAILED;
+  s = read_domain(s, t, TRUE, errorptr);
+  if (!*t) goto PARSE_FAILED;
    endptr = last_comment_position;
    }
  
@@ -789,7 +800,7 @@ through for other cases. Endptr may have been moved over whitespace, so
  move it back past white space if necessary. */
  
  PARSE_SUCCEEDED:
-if (*s != 0)
+if (*s)
    {
    if (f.parse_found_group && *s == ';')
      {
@@ -863,7 +874,8 @@ Returns:       pointer to the original string, if no quoting needed, or
  */
  
  const uschar *
-parse_quote_2047(const uschar *string, int len, uschar *charset, BOOL fold)
+parse_quote_2047(const uschar *string, int len, const uschar *charset,
+  BOOL fold)
  {
  const uschar * s = string;
  int hlen, l;
@@ -985,11 +997,9 @@ if (i < len)
  /* No non-printers; use the RFC 822 quoting rules */
  
  if (len <= 0 || len >= INT_MAX/4)
-  {
-  return string_copy_taint(CUS"", is_tainted(phrase));
-  }
+  return string_copy_taint(CUS"", phrase);
  
-buffer = store_get((len+1)*4, is_tainted(phrase));
+buffer = store_get((len+1)*4, phrase);
  
  s = phrase;
  end = s + len;
@@ -1233,8 +1243,8 @@ Returns:      FF_DELIVERED      addresses extracted
  */
  
  int
-parse_forward_list(uschar *s, int options, address_item **anchor,
-  uschar **error, const uschar *incoming_domain, uschar *directory,
+parse_forward_list(const uschar *s, int options, address_item **anchor,
+  uschar **error, const uschar *incoming_domain, const uschar *directory,
    error_block **syntax_errors)
  {
  int count = 0;
@@ -1243,18 +1253,15 @@ DEBUG(D_route) debug_printf("parse_forward_list: %s\n", s);
  
  for (;;)
    {
-  int len;
-  int special = 0;
-  int specopt = 0;
-  int specbit = 0;
-  uschar *ss, *nexts;
-  address_item *addr;
+  int len, special = 0, specopt = 0, specbit = 0;
+  const uschar * ss, * nexts;
+  address_item * addr;
    BOOL inquote = FALSE;
  
    for (;;)
      {
      while (isspace(*s) || *s == ',') s++;
-    if (*s == '#') { while (*s != 0 && *s != '\n') s++; } else break;
+    if (*s == '#') { while (*s && *s != '\n') s++; } else break;
      }
  
    /* When we reach the end of the list, we return FF_DELIVERED if any child
@@ -1275,18 +1282,17 @@ for (;;)
      syntax error has been skipped. I now think it is the wrong approach, but
      have left this here just in case, and for the record. */
  
-    #ifdef NEVER
+#ifdef NEVER
      if (count > 0) return FF_DELIVERED;   /* Something was generated */
  
-    if (syntax_errors == NULL ||          /* Not skipping syntax errors, or */
-       *syntax_errors == NULL)            /*   we didn't actually skip any */
+    if (!syntax_errors ||          /* Not skipping syntax errors, or */
+       !*syntax_errors)            /*   we didn't actually skip any */
        return FF_NOTDELIVERED;
  
      *error = string_sprintf("no addresses generated: syntax error in %s: %s",
         (*syntax_errors)->text2, (*syntax_errors)->text1);
      return FF_ERROR;
-    #endif
-
+#endif
      }
  
    /* Find the end of the next address. Quoted strings in addresses may contain
@@ -1303,7 +1309,7 @@ for (;;)
  
    /* Remove any trailing spaces; we know there's at least one non-space. */
  
-  while (isspace((ss[-1]))) ss--;
+  while (isspace(ss[-1])) ss--;
  
    /* We now have s->start and ss->end of the next address. Remove quotes
    if they completely enclose, remembering the address started with a quote
@@ -1316,20 +1322,14 @@ for (;;)
      ss--;
      inquote = TRUE;
      while (s < ss && isspace(*s)) s++;
-    while (ss > s && isspace((ss[-1]))) ss--;
+    while (ss > s && isspace(ss[-1])) ss--;
      }
  
    /* Set up the length of the address. */
  
    len = ss - s;
  
-  DEBUG(D_route)
-    {
-    int save = s[len];
-    s[len] = 0;
-    debug_printf("extract item: %s\n", s);
-    s[len] = save;
-    }
+  DEBUG(D_route) debug_printf("extract item: %.*s\n", len, s);
  
    /* Handle special addresses if permitted. If the address is :unknown:
    ignore it - this is for backward compatibility with old alias files. You
@@ -1350,18 +1350,18 @@ for (;;)
    else if (Ustrncmp(s, ":fail:", 6) == 0)
      { special = FF_FAIL; specopt = RDO_FAIL; }  /* specbit is 0 */
  
-  if (special != 0)
+  if (special)
      {
-    uschar *ss = Ustrchr(s+1, ':') + 1;
+    uschar * ss = Ustrchr(s+1, ':') + 1; /* line after the special... */
      if ((options & specopt) == specbit)
        {
        *error = string_sprintf("\"%.*s\" is not permitted", len, s);
        return FF_ERROR;
        }
-    while (*ss != 0 && isspace(*ss)) ss++;
-    while (s[len] != 0 && s[len] != '\n') len++;
-    s[len] = 0;
-    *error = string_copy(ss);
+    while (*ss && isspace(*ss)) ss++;  /* skip leading whitespace */
+    if ((len = Ustrlen(ss)) > 0)       /* ignore trailing newlines */
+      for (const uschar * t = ss + len - 1; t >= ss && *t == '\n'; t--) len--;
+    *error = string_copyn(ss, len);    /* becomes the error */
      return special;
      }
  
@@ -1372,14 +1372,14 @@ for (;;)
  
    if (Ustrncmp(s, ":include:", 9) == 0)
      {
-    uschar *filebuf;
+    uschar * filebuf;
      uschar filename[256];
-    uschar *t = s+9;
+    const uschar * t = s+9;
      int flen = len - 9;
      int frc;
      struct stat statbuf;
-    address_item *last;
-    FILE *f;
+    address_item * last;
+    FILE * f;
  
      while (flen > 0 && isspace(*t)) { t++; flen--; }
  
@@ -1389,7 +1389,7 @@ for (;;)
        return FF_ERROR;
        }
  
-    if (flen > 255)
+    if (flen > sizeof(filename)-1)
        {
        *error = string_sprintf("included file name \"%s\" is too long", t);
        return FF_ERROR;
@@ -1427,8 +1427,10 @@ for (;;)
      if (directory)
        {
        int len = Ustrlen(directory);
-      uschar *p = filename + len;
+      uschar * p;
  
+      while (len > 0 && directory[len-1] == '/') len--;                /* ignore trailing '/' */
+      p = filename + len;
        if (Ustrncmp(filename, directory, len) != 0 || *p != '/')
          {
          *error = string_sprintf("included file %s is not in directory %s",
@@ -1443,7 +1445,7 @@ for (;;)
        with a flag that fails symlinks. */
  
        {
-      int fd = exim_open2(CS directory, O_RDONLY);
+      int fd = exim_open2(CCS directory, O_RDONLY);
        if (fd < 0)
         {
         *error = string_sprintf("failed to open directory %s", directory);
@@ -1453,9 +1455,10 @@ for (;;)
         {
         uschar temp;
         int fd2;
-       uschar * q = p;
+       uschar * q = p + 1;             /* skip dividing '/' */
  
-       while (*++p && *p != '/') ;
+       while (*q == '/') q++;          /* skip extra '/' */
+       while (*++p && *p != '/') ;     /* end of component */
         temp = *p;
         *p = '\0';
  
@@ -1543,7 +1546,7 @@ for (;;)
        return FF_ERROR;
        }
  
-    filebuf = store_get(statbuf.st_size + 1, is_tainted(filename));
+    filebuf = store_get(statbuf.st_size + 1, filename);
      if (fread(filebuf, 1, statbuf.st_size, f) != statbuf.st_size)
        {
        *error = string_sprintf("error while reading included file %s: %s",
@@ -1591,18 +1594,17 @@ for (;;)
      {
      int start, end, domain;
      const uschar *recipient = NULL;
-    int save = s[len];
-    s[len] = 0;
+    uschar * s_ltd = string_copyn(s, len);
  
      /* If it starts with \ and the rest of it parses as a valid mail address
      without a domain, carry on with that address, but qualify it with the
      incoming domain. Otherwise arrange for the address to fall through,
      causing an error message on the re-parse. */
  
-    if (*s == '\\')
+    if (*s_ltd == '\\')
        {
        recipient =
-        parse_extract_address(s+1, error, &start, &end, &domain, FALSE);
+        parse_extract_address(s_ltd+1, error, &start, &end, &domain, FALSE);
        if (recipient)
          recipient = domain != 0 ? NULL :
            string_sprintf("%s@%s", recipient, incoming_domain);
@@ -1611,22 +1613,22 @@ for (;;)
      /* Try parsing the item as an address. */
  
      if (!recipient) recipient =
-      parse_extract_address(s, error, &start, &end, &domain, FALSE);
+      parse_extract_address(s_ltd, error, &start, &end, &domain, FALSE);
  
      /* If item starts with / or | and is not a valid address, or there
      is no domain, treat it as a file or pipe. If it was a quoted item,
      remove the quoting occurrences of \ within it. */
  
-    if ((*s == '|' || *s == '/') && (recipient == NULL || domain == 0))
+    if ((*s_ltd == '|' || *s_ltd == '/') && (!recipient || domain == 0))
        {
-      uschar *t = store_get(Ustrlen(s) + 1, is_tainted(s));
-      uschar *p = t;
-      uschar *q = s;
-      while (*q != 0)
+      uschar * t = store_get(Ustrlen(s_ltd) + 1, s_ltd);
+      uschar * p = t, * q = s_ltd;
+
+      while (*q)
          {
          if (inquote)
            {
-          *p++ = (*q == '\\')? *(++q) : *q;
+          *p++ = *q == '\\' ? *++q : *q;
            q++;
            }
          else *p++ = *q++;
@@ -1634,7 +1636,7 @@ for (;;)
        *p = 0;
        addr = deliver_make_addr(t, TRUE);
        setflag(addr, af_pfr);                   /* indicates pipe/file/reply */
-      if (*s != '|') setflag(addr, af_file);   /* indicates file */
+      if (*s_ltd != '|') setflag(addr, af_file);   /* indicates file */
        }
  
      /* Item must be an address. Complain if not, else qualify, rewrite and set
@@ -1646,36 +1648,35 @@ for (;;)
  
      else
        {
-      if (recipient == NULL)
+      if (!recipient)
          {
          if (Ustrcmp(*error, "empty address") == 0)
            {
            *error = NULL;
-          s[len] = save;
            s = nexts;
            continue;
            }
  
-        if (syntax_errors != NULL)
+        if (syntax_errors)
            {
-          error_block *e = store_get(sizeof(error_block), FALSE);
-          error_block *last = *syntax_errors;
-          if (last == NULL) *syntax_errors = e; else
+          error_block * e = store_get(sizeof(error_block), GET_UNTAINTED);
+          error_block * last = *syntax_errors;
+          if (last)
              {
-            while (last->next != NULL) last = last->next;
+            while (last->next) last = last->next;
              last->next = e;
              }
+          else
+           *syntax_errors = e;
            e->next = NULL;
            e->text1 = *error;
-          e->text2 = string_copy(s);
-          s[len] = save;
+          e->text2 = s_ltd;
            s = nexts;
            continue;
            }
          else
            {
-          *error = string_sprintf("%s in \"%s\"", *error, s);
-          s[len] = save;   /* _after_ using it for *error */
+          *error = string_sprintf("%s in \"%s\"", *error, s_ltd);
            return FF_ERROR;
            }
          }
@@ -1683,17 +1684,15 @@ for (;;)
        /* Address was successfully parsed. Rewrite, and then make an address
        block. */
  
-      recipient = ((options & RDO_REWRITE) != 0)?
-        rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
-          rewrite_existflags) :
-        rewrite_address_qualify(recipient, TRUE);      /*XXX loses track of const */
+      recipient = options & RDO_REWRITE
+       ? rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
+                         rewrite_existflags)
+       : rewrite_address_qualify(recipient, TRUE);     /*XXX loses track of const */
        addr = deliver_make_addr(US recipient, TRUE);  /* TRUE => copy recipient, so deconst ok */
        }
  
-    /* Restore the final character in the original data, and add to the
-    output chain. */
+    /* Add the original data to the output chain. */
  
-    s[len] = save;
      addr->next = *anchor;
      *anchor = addr;
      count++;
@@ -1740,7 +1739,7 @@ for the answer, but it may also be very long if we are processing a header
  line. Therefore, take care to release unwanted store afterwards. */
  
  reset_point = store_mark();
-id = *yield = store_get(Ustrlen(str) + 1, is_tainted(str));
+id = *yield = store_get(Ustrlen(str) + 1, str);
  *id++ = *str++;
  
  str = read_addr_spec(str, id, '>', error, &domain);