MH's patch for allowing control characters as list separators.

author Philip Hazel <ph10@hermes.cam.ac.uk>

Mon, 26 Feb 2007 14:07:04 +0000 (14:07 +0000)

committer Philip Hazel <ph10@hermes.cam.ac.uk>

Mon, 26 Feb 2007 14:07:04 +0000 (14:07 +0000)
author Philip Hazel <ph10@hermes.cam.ac.uk>
Mon, 26 Feb 2007 14:07:04 +0000 (14:07 +0000)
committer Philip Hazel <ph10@hermes.cam.ac.uk>
Mon, 26 Feb 2007 14:07:04 +0000 (14:07 +0000)
diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog

index 6e61fa9b6786adeb4b6e5ea1ae83e8f9a3ebdda7..3202cc9c1dab8b4e0d8cbc0f00dbe579591929e7 100644 (file)
--- a/doc/doc-txt/ChangeLog
+++ b/doc/doc-txt/ChangeLog
@@ -1,4 +1,4 @@
-$Cambridge: exim/doc/doc-txt/ChangeLog,v 1.484 2007/02/20 15:58:02 ph10 Exp $
+$Cambridge: exim/doc/doc-txt/ChangeLog,v 1.485 2007/02/26 14:07:04 ph10 Exp $
  
  Change log file for Exim from version 4.21
  -------------------------------------------
@@ -131,6 +131,8 @@ PH/29 SMTP synchronization checks are implemented when a command is read -
        HELO/EHLO and after the predata ACL, and likewise for MAIL and RCPT when
        pipelining has not been advertised.
  
+PH/30 MH's patch to allow iscntrl() characters to be list separators.
+
  
  Exim version 4.66
  -----------------
diff --git a/doc/doc-txt/NewStuff b/doc/doc-txt/NewStuff

index 677ab21aa9fc39c0be37e7ad8817e101ce0a03f1..d39c6d3e2bcca5a43eafbc8e664df694a8687467 100644 (file)
--- a/doc/doc-txt/NewStuff
+++ b/doc/doc-txt/NewStuff
@@ -1,4 +1,4 @@
-$Cambridge: exim/doc/doc-txt/NewStuff,v 1.142 2007/02/14 15:33:40 ph10 Exp $
+$Cambridge: exim/doc/doc-txt/NewStuff,v 1.143 2007/02/26 14:07:04 ph10 Exp $
  
  New Features in Exim
  --------------------
@@ -375,6 +375,22 @@ Version 4.67
        continue  = <some expansion>
        condition = ${if eq{0}{<some expansion>}{true}{true}}
  
+18. It is now possible to use newline and other control characters (those with
+    values less than 32, plus DEL) as separators in lists. Such separators must
+    be provided literally at the time the list is processed, but the string
+    expansion that happens first means that you can write them using normal
+    escape sequences. For example, if a new-line separated list of domains is
+    generated by a lookup, you can now process it directly by a line such as
+    this:
+
+      domains = <\n ${lookup mysql{.....}}
+
+    This avoids having to change the list separator in such data. Unlike
+    printing character separators, which can be included in list items by
+    doubling, it is not possible to include a control character as data when it
+    is set as the separator. Two such characters in succession are interpreted
+    as enclosing an empty list item.
+
  
  Version 4.66
  ------------
diff --git a/src/src/string.c b/src/src/string.c

index b52d4ab9501f94d51432b4fadf72840007e21327..20bd1d1f30199660eff520951116de5386ca219c 100644 (file)
--- a/src/src/string.c
+++ b/src/src/string.c
@@ -1,4 +1,4 @@
-/* $Cambridge: exim/src/src/string.c,v 1.12 2007/02/07 11:24:56 ph10 Exp $ */
+/* $Cambridge: exim/src/src/string.c,v 1.13 2007/02/26 14:07:04 ph10 Exp $ */
  
  /*************************************************
  *     Exim - an Internet mail transport agent    *
@@ -764,19 +764,26 @@ return NULL;
  /* Leading and trailing space is removed from each item. The separator in the
  list is controlled by the int pointed to by the separator argument as follows:
  
-  If its value is > 0 it is used as the delimiter.
-    (If its value is actually > UCHAR_MAX there is only one item in the list.
+  If the value is > 0 it is used as the separator. This is typically used for
+  sublists such as slash-separated options. The value is always a printing
+  character.
+
+    (If the value is actually > UCHAR_MAX there is only one item in the list.
      This is used for some cases when called via functions that sometimes
      plough through lists, and sometimes are given single items.)
-  If its value is <= 0, the string is inspected for a leading <x, where
-    x is an ispunct() value. If found, it is used as the delimiter. If not
-    found: (a) if separator == 0, ':' is used
-           (b) if separator <0, then -separator is used
-    In all cases the value of the separator that is used is written back to
-      the int so that it is used on subsequent calls as we progress through
-      the list.
  
-The separator can always be represented in the string by doubling.
+  If the value is <= 0, the string is inspected for a leading <x, where x is an
+  ispunct() or an iscntrl() character. If found, x is used as the separator. If
+  not found:
+
+      (a) if separator == 0, ':' is used
+      (b) if separator <0, -separator is used
+
+  In all cases the value of the separator that is used is written back to the
+  int so that it is used on subsequent calls as we progress through the list.
+
+A literal ispunct() separator can be represented in an item by doubling, but
+there is no way to include an iscntrl() separator as part of the data.
  
  Arguments:
    listptr    points to a pointer to the current start of the list; the
@@ -793,20 +800,28 @@ Returns:     pointer to buffer, containing the next substring,
  uschar *
  string_nextinlist(uschar **listptr, int *separator, uschar *buffer, int buflen)
  {
-register int p = 0;
  register int sep = *separator;
  register uschar *s = *listptr;
+BOOL sep_is_special;
  
  if (s == NULL) return NULL;
-while (isspace(*s)) s++;
+
+/* This allows for a fixed specified separator to be an iscntrl() character,
+but at the time of implementation, this is never the case. However, it's best
+to be conservative. */
+
+while (isspace(*s) && *s != sep) s++;
+
+/* A change of separator is permitted, so look for a leading '<' followed by an
+allowed character. */
  
  if (sep <= 0)
    {
-  if (*s == '<' && ispunct(s[1]))
+  if (*s == '<' && (ispunct(s[1]) || iscntrl(s[1])))
      {
      sep = s[1];
      s += 2;
-    while (isspace(*s)) s++;
+    while (isspace(*s) && *s != sep) s++;
      }
    else
      {
@@ -815,15 +830,22 @@ if (sep <= 0)
    *separator = sep;
    }
  
+/* An empty string has no list elements */
+
  if (*s == 0) return NULL;
  
+/* Note whether whether or not the separator is an iscntrl() character. */
+
+sep_is_special = iscntrl(sep);
+
  /* Handle the case when a buffer is provided. */
  
  if (buffer != NULL)
    {
+  register int p = 0;
    for (; *s != 0; s++)
      {
-    if (*s == sep && *(++s) != sep) break;
+    if (*s == sep && (*(++s) != sep || sep_is_special)) break;
      if (p < buflen - 1) buffer[p++] = *s;
      }
    while (p > 0 && isspace(buffer[p-1])) p--;
@@ -834,31 +856,37 @@ if (buffer != NULL)
  
  else
    {
+  int size = 0;
+  int ptr = 0;
+  uschar *ss;
+
    /* We know that *s != 0 at this point. However, it might be pointing to a
-  separator, which could indicate an empty string, or could be doubled to
-  indicate a separator character as data at the start of a string. */
+  separator, which could indicate an empty string, or (if an ispunct()
+  character) could be doubled to indicate a separator character as data at the
+  start of a string. Avoid getting working memory for an empty item. */
  
    if (*s == sep)
      {
      s++;
-    if (*s != sep) buffer = string_copy(US"");
+    if (*s != sep || sep_is_special)
+      {
+      *listptr = s;
+      return string_copy(US"");
+      }
      }
  
-  if (buffer == NULL)
+  /* Not an empty string; the first character is guaranteed to be a data
+  character. */
+
+  for (;;)
      {
-    int size = 0;
-    int ptr = 0;
-    uschar *ss;
-    for (;;)
-      {
-      for (ss = s + 1; *ss != 0 && *ss != sep; ss++);
-      buffer = string_cat(buffer, &size, &ptr, s, ss-s);
-      s = ss;
-      if (*s == 0 || *(++s) != sep) break;
-      }
-    while (ptr > 0 && isspace(buffer[ptr-1])) ptr--;
-    buffer[ptr] = 0;
+    for (ss = s + 1; *ss != 0 && *ss != sep; ss++);
+    buffer = string_cat(buffer, &size, &ptr, s, ss-s);
+    s = ss;
+    if (*s == 0 || *(++s) != sep || sep_is_special) break;
      }
+  while (ptr > 0 && isspace(buffer[ptr-1])) ptr--;
+  buffer[ptr] = 0;
    }
  
  /* Update the current pointer and return the new string */
diff --git a/test/confs/0002 b/test/confs/0002

index 8662e54d07512fe739a4a5bfb396bc27bc85c7d4..af680500c5b9f66c3ea257ed77e43ab215921180 100644 (file)
--- a/test/confs/0002
+++ b/test/confs/0002
@@ -33,7 +33,7 @@ connect0:
    accept
  
  connect1:
-  deny  hosts = partial-lsearch;DIR/aux-fixed/0002.lsearch
+  deny  hosts = <\n partial-lsearch;DIR/aux-fixed/0002.lsearch \n 1.2.3.4
  
  connect2:
    deny  hosts = net-lsearch;DIR/aux-fixed/0002.lsearch
diff --git a/test/scripts/0000-Basic/0002 b/test/scripts/0000-Basic/0002

index 870359eb2cd20990efbb97b63b04f3c4cadeea6c..fd332afa46e2ece74109afadc2df8f685682e20d 100644 (file)
--- a/test/scripts/0000-Basic/0002
+++ b/test/scripts/0000-Basic/0002
@@ -59,6 +59,15 @@ reduce: ${reduce{a:b:c}{+}{$value$item}}
  reduce: ${reduce {<, 1,2,3}{0}{${eval:$value+$item}}}
  reduce: ${reduce {3:0:9:4:6}{0}{${if >{$item}{$value}{$item}{$value}}}}
  
+# Tests with iscntrl() and illegal separators
+
+map: ${map{<\n a\n\nb\nc}{'$item'}}
+
+reduce: ${reduce {<n 1\n2\n3}{0}{${eval:$value+$item}}}
+reduce: ${reduce {<\n 1\n2\n3}{0}{${eval:$value+$item}}}
+reduce: ${reduce { <\n 1\n 2 \n 3 }{0}{${eval:$value+$item}}}
+reduce: ${reduce {<\x7f 1\x7f2\177 3}{0}{${eval:$value+$item}}}
+
  # Operators
  
  addrss: ${address:local-part@dom.ain}
diff --git a/test/stderr/0002 b/test/stderr/0002

index 329b4a2beafb7fd7f0277f1853b4bfaa9b427fda..b052103a007d2c5ef62274275873cfc36e5c23ba 100644 (file)
--- a/test/stderr/0002
+++ b/test/stderr/0002
@@ -154,7 +154,7 @@ host in helo_try_verify_hosts? no (option unset)
  host in helo_accept_junk_hosts? no (option unset)
  using ACL "connect1"
  processing "deny"
-check hosts = partial-lsearch;TESTSUITE/aux-fixed/0002.lsearch
+check hosts = <\n partial-lsearch;TESTSUITE/aux-fixed/0002.lsearch \n 1.2.3.4
  sender host name required, to match against partial-lsearch;TESTSUITE/aux-fixed/0002.lsearch
  looking up host name for V4NET.0.0.1
  DNS lookup of 1.0.0.V4NET.in-addr.arpa (PTR) using fakens
@@ -181,7 +181,9 @@ internal_search_find: file="TESTSUITE/aux-fixed/0002.lsearch"
  file lookup required for ten-1.test.ex
    in TESTSUITE/aux-fixed/0002.lsearch
  lookup yielded: 
-host in "partial-lsearch;TESTSUITE/aux-fixed/0002.lsearch"? yes (matched "partial-lsearch;TESTSUITE/aux-fixed/0002.lsearch")
+host in "<
+ partial-lsearch;TESTSUITE/aux-fixed/0002.lsearch 
+ 1.2.3.4"? yes (matched "partial-lsearch;TESTSUITE/aux-fixed/0002.lsearch")
  deny: condition test succeeded
  SMTP>> 550 Administrative prohibition
  LOG: MAIN REJECT
diff --git a/test/stdout/0002 b/test/stdout/0002

index d985a9c1e565aa3a6d42ce4234f26eb0bdd31c2b..f76f07f518ff7251098cc07cd4e10651231d9907 100644 (file)
--- a/test/stdout/0002
+++ b/test/stdout/0002
@@ -49,6 +49,18 @@
  > reduce: 6
  > reduce: 9
  > 
+> # Tests with iscntrl() and illegal separators
+> 
+> map: 'a'
+''
+'b'
+'c'
+> 
+> Failed: error in expression evaluation: expecting number or opening parenthesis (after processing "0+") inside "reduce" item
+> reduce: 6
+> reduce: 6
+> reduce: 6
+> 
  > # Operators
  > 
  > addrss: local-part@dom.ain
author	Philip Hazel <ph10@hermes.cam.ac.uk>
	Mon, 26 Feb 2007 14:07:04 +0000 (14:07 +0000)
committer	Philip Hazel <ph10@hermes.cam.ac.uk>
	Mon, 26 Feb 2007 14:07:04 +0000 (14:07 +0000)
doc/doc-txt/ChangeLog		patch \| blob \| history
doc/doc-txt/NewStuff		patch \| blob \| history
src/src/string.c		patch \| blob \| history
test/confs/0002		patch \| blob \| history
test/scripts/0000-Basic/0002		patch \| blob \| history
test/stderr/0002		patch \| blob \| history
test/stdout/0002		patch \| blob \| history