From: Jeremy Harris <jgh146exb@wizmail.org>
Date: Mon, 4 May 2015 16:02:27 +0000 (+0100)
Subject: I18N: new ${imapfolder_<sep>:<string>} expansion item.  Bug 420
X-Git-Tag: exim-4_86_RC1~50
X-Git-Url: https://git.exim.org/exim.git/commitdiff_plain/ed0512a1a151a4108d7fe309055219c2da3b2bbc

I18N: new ${imapfolder_<sep>:<string>} expansion item.  Bug 420
---

diff --git a/doc/doc-txt/NewStuff b/doc/doc-txt/NewStuff
index ef6a6dea9..cac6f7c10 100644
--- a/doc/doc-txt/NewStuff
+++ b/doc/doc-txt/NewStuff
@@ -28,6 +28,9 @@ Version 4.86
  8. If built with EXPERIMENTAL_INTERNATIONAL, support is included for
     the transmission of UTF-8 envelope addresses.
 
+ 9. If built with EXPERIMENTAL_INTERNATIONAL, an expansion item for a commonly
+    used encoding of Maildir folder names.
+
 
 Version 4.85
 ------------
diff --git a/doc/doc-txt/experimental-spec.txt b/doc/doc-txt/experimental-spec.txt
index d446f6b67..64916e4a9 100644
--- a/doc/doc-txt/experimental-spec.txt
+++ b/doc/doc-txt/experimental-spec.txt
@@ -1271,8 +1271,9 @@ $tls_out_tlsa_usage (detailed above).
 
 
 
-SMTPUTF8
+INTERNATIONAL
 ------------------------------------------------------------
+SMTPUTF8
 Internationalised mail name handling.
 RFCs 6530, 6533, 5890
 
@@ -1337,6 +1338,36 @@ Known issues:
  - DSN unitext handling is not present
  - no provision for converting logging from or to UTF-8
 
+----
+IMAP folder names
+
+New expansion operator:
+
+${imapfolder {<string>} {<sep>} {<specials>}}
+
+The string is converted from the charset specified by the headers charset 
+command (in a filter file) or headers_charset global option, to the
+modified UTF-7 encoding specified by RFC 2060, with the following
+exception: All occurences of <sep> (which has to be a single character)
+are replaced with periods ("."), and all periods and slashes that aren't
+<sep> and are not in the <specials> string are BASE64 encoded.
+
+The third argument can be omitted, defaulting to an empty string.
+The second argument can be omitted, defaulting to "/".
+
+This is the encoding used by Courier for Maildir names on disk, and followed
+by many other IMAP servers.
+
+   Example 1: ${imapfolder {Foo/Bar}}       yields "Foo.Bar".
+   Example 2: ${imapfolder {Foo/Bar}{.}{/}} yields "Foo&AC8-Bar".
+   Example 3: ${imapfolder {RÃ¤ksmÃ¶rgÃ¥s}}    yields "R&AOQ-ksm&APY-rg&AOU-s".
+
+Note that the source charset setting is vital, and also that characters
+must be representable in UTF-16.
+
+
+
+
 --------------------------------------------------------------
 End of file
 --------------------------------------------------------------
diff --git a/src/OS/Makefile-Base b/src/OS/Makefile-Base
index b7413e201..1d5a5f6f4 100644
--- a/src/OS/Makefile-Base
+++ b/src/OS/Makefile-Base
@@ -295,7 +295,14 @@ convert4r4: Makefile ../src/convert4r4.src
 
 OBJ_WITH_CONTENT_SCAN = malware.o mime.o regex.o spam.o spool_mbox.o
 OBJ_WITH_OLD_DEMIME = demime.o
-OBJ_EXPERIMENTAL = bmi_spam.o spf.o srs.o dcc.o dmarc.o dane.o utf8.o
+OBJ_EXPERIMENTAL = bmi_spam.o \
+				dane.o \
+				dcc.o \
+				dmarc.o \
+				imap_utf7.o \
+				spf.o \
+				srs.o \
+				utf8.o
 
 # Targets for final binaries; the main one has a build number which is
 # updated each time. We don't bother with that for the auxiliaries.
@@ -618,13 +625,14 @@ demime.o:        $(HDRS) demime.c
 
 # Dependencies for EXPERIMENTAL_* modules
 
-bmi_spam.o:      $(HDRS) bmi_spam.c
-dane.o:		 $(HDRS) dane.c dane-gnu.c dane-openssl.c
-dcc.o:           $(HDRS) dcc.h dcc.c
-dmarc.o:         $(HDRS) dmarc.h dmarc.c
-spf.o:           $(HDRS) spf.h spf.c
-srs.o:           $(HDRS) srs.h srs.c
-utf8.o:		 $(HDRS) utf8.c
+bmi_spam.o:		$(HDRS) bmi_spam.c
+dane.o:			$(HDRS) dane.c dane-gnu.c dane-openssl.c
+dcc.o:			$(HDRS) dcc.h dcc.c
+dmarc.o:		$(HDRS) dmarc.h dmarc.c
+imap_utf7.o:	$(HDRS) imap_utf7.c
+spf.o:			$(HDRS) spf.h spf.c
+srs.o:			$(HDRS) srs.h srs.c
+utf8.o:			$(HDRS) utf8.c
 
 # The module containing tables of available lookups, routers, auths, and
 # transports must be rebuilt if any of them are. However, because the makefiles
diff --git a/src/scripts/MakeLinks b/src/scripts/MakeLinks
index f9cc27c2e..2ec572db5 100755
--- a/src/scripts/MakeLinks
+++ b/src/scripts/MakeLinks
@@ -266,16 +266,17 @@ ln -s ../src/demime.h          demime.h
 # EXPERIMENTAL_*
 ln -s ../src/bmi_spam.c        bmi_spam.c
 ln -s ../src/bmi_spam.h        bmi_spam.h
-ln -s ../src/spf.c             spf.c
-ln -s ../src/spf.h             spf.h
-ln -s ../src/srs.c             srs.c
-ln -s ../src/srs.h             srs.h
 ln -s ../src/dcc.c             dcc.c
 ln -s ../src/dcc.h             dcc.h
 ln -s ../src/dane.c            dane.c
 ln -s ../src/dane-gnu.c        dane-gnu.c
 ln -s ../src/dane-openssl.c    dane-openssl.c
 ln -s ../src/danessl.h         danessl.h
+ln -s ../src/imap_utf7.c       imap_utf7.c
+ln -s ../src/spf.c             spf.c
+ln -s ../src/spf.h             spf.h
+ln -s ../src/srs.c             srs.c
+ln -s ../src/srs.h             srs.h
 ln -s ../src/utf8.c            utf8.c
 
 
diff --git a/src/src/expand.c b/src/src/expand.c
index ad97f6fef..209270163 100644
--- a/src/src/expand.c
+++ b/src/src/expand.c
@@ -114,6 +114,9 @@ static uschar *item_table[] = {
   US"hash",
   US"hmac",
   US"if",
+#ifdef EXPERIMENTAL_INTERNATIONAL
+  US"imapfolder",
+#endif
   US"length",
   US"listextract",
   US"lookup",
@@ -140,6 +143,9 @@ enum {
   EITEM_HASH,
   EITEM_HMAC,
   EITEM_IF,
+#ifdef EXPERIMENTAL_INTERNATIONAL
+  EITEM_IMAPFOLDER,
+#endif
   EITEM_LENGTH,
   EITEM_LISTEXTRACT,
   EITEM_LOOKUP,
@@ -4070,6 +4076,45 @@ while (*s != 0)
       continue;
       }
 
+#ifdef EXPERIMENTAL_INTERNATIONAL
+    case EITEM_IMAPFOLDER:
+      {				/* ${imapfolder {name}{sep]{specials}} */
+      uschar *sub_arg[3];
+      uschar *encoded;
+
+      switch(read_subs(sub_arg, 3, 1, &s, skipping, TRUE, name, &resetok))
+        {
+        case 1: goto EXPAND_FAILED_CURLY;
+        case 2:
+        case 3: goto EXPAND_FAILED;
+        }
+
+      if (sub_arg[1] == NULL)		/* One argument */
+	{
+	sub_arg[1] = "/";		/* default separator */
+	sub_arg[2] = NULL;
+	}
+      else if (sub_arg[2] == NULL)	/* Two arguments */
+	sub_arg[2] = NULL;
+
+      if (Ustrlen(sub_arg[1]) != 1)
+	{
+	expand_string_message = 
+	  string_sprintf(
+		"IMAP folder separator must be one character, found \"%s\"", 
+		sub_arg[1]);
+	goto EXPAND_FAILED;
+	}
+
+      if (!(encoded = imap_utf7_encode(sub_arg[0], headers_charset,
+			  sub_arg[1][0], sub_arg[2], &expand_string_message)))
+	goto EXPAND_FAILED;
+      if (!skipping)
+	yield = string_cat(yield, &size, &ptr, encoded, Ustrlen(encoded));
+      continue;
+      }
+#endif
+
     /* Handle database lookups unless locked out. If "skipping" is TRUE, we are
     expanding an internal string that isn't actually going to be used. All we
     need to do is check the syntax, so don't do a lookup at all. Preserve the
diff --git a/src/src/functions.h b/src/src/functions.h
index 1708b7a07..74198a52c 100644
--- a/src/src/functions.h
+++ b/src/src/functions.h
@@ -210,6 +210,9 @@ extern int     host_nmtoa(int, int *, int, uschar *, int);
 extern uschar *host_ntoa(int, const void *, uschar *, int *);
 extern int     host_scan_for_local_hosts(host_item *, host_item **, BOOL *);
 
+extern uschar *imap_utf7_encode(uschar *, const uschar *, 
+				 uschar, uschar *, uschar **);
+
 extern void    invert_address(uschar *, uschar *);
 extern int     ip_addr(void *, int, const uschar *, int);
 extern int     ip_bind(int, int, uschar *, int);
diff --git a/src/src/imap_utf7.c b/src/src/imap_utf7.c
new file mode 100644
index 000000000..10cc1f7fa
--- /dev/null
+++ b/src/src/imap_utf7.c
@@ -0,0 +1,210 @@
+#include "exim.h"
+
+#ifdef EXPERIMENTAL_INTERNATIONAL
+
+uschar *
+imap_utf7_encode(uschar *string, const uschar *charset, uschar sep,
+  uschar *specials, uschar **error)
+{
+static uschar encode_base64[64] =
+  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
+int ptr = 0;
+int size = 0;
+size_t slen;
+uschar *sptr, *yield = NULL;
+int i, j;
+uschar c;
+BOOL base64mode = FALSE;
+BOOL lastsep = FALSE;
+uschar utf16buf[256];
+uschar *utf16ptr;
+uschar *s;
+uschar outbuf[256];
+uschar *outptr = outbuf;
+#if HAVE_ICONV
+iconv_t icd;
+#endif
+
+if (!specials) specials = "";
+
+/* Pass over the string. If it consists entirely of "normal" characters
+   (possibly with leading seps), return it as is. */
+for (s = string; *s; s++)
+  {
+  if (s == string && *s == sep)
+    string++;
+  if (  *s >= 0x7f
+     || *s < 0x20
+     || strchr("./&", *s)
+     || *s == sep
+     || strchr(specials, *s)
+     )
+    break;
+  }
+
+if (!*s)
+  return string;
+
+sptr = string;
+slen = Ustrlen(string);
+
+#if HAVE_ICONV
+if ((icd = iconv_open(US"UTF-16BE", charset)) == (iconv_t)-1)
+  {
+  *error = string_sprintf(
+	"imapfolder: iconv_open(\"UTF-16BE\", \"%s\") failed: %s%s",
+    charset, strerror(errno),
+    errno == EINVAL ? " (maybe unsupported conversion)" : "");
+  return NULL;
+  }
+#endif  
+
+while (slen > 0)
+  {
+#if HAVE_ICONV
+  size_t left = sizeof(utf16buf);
+  utf16ptr = utf16buf;
+
+  if (  iconv(icd, (ICONV_ARG2_TYPE)&sptr, &slen, CSS &utf16ptr, &left)
+		== (size_t)-1
+     && errno != E2BIG
+	 )
+    {
+    *error = string_sprintf("imapfolder: iconv() failed to convert from %s: %s",
+			      charset, strerror(errno));
+    iconv_close(icd);
+    return NULL;
+    }
+#else
+  for (utf16ptr = utf16buf; 
+       slen > 0 && (utf16ptr - utf16buf) < sizeof(utf16buf);
+       utf16ptr += 2, slen--, sptr++)
+    {
+    *utf16ptr = *sptr;
+    *(utf16ptr+1) = '\0';
+    }
+#endif
+
+  s = utf16buf;
+  while (s < utf16ptr)
+    {
+    /* Now encode utf16buf as modified UTF-7 */
+    if (  s[0] != 0
+       || s[1] >= 0x7f
+       || s[1] < 0x20
+       || (strchr(specials, s[1]) && s[1] != sep)
+       )
+      {
+      lastsep = FALSE;
+      /* Encode as modified BASE64 */
+      if (!base64mode) 
+        {
+        *outptr++ = '&';
+        base64mode = TRUE;
+        i = 0;
+        }
+
+      for (j = 0; j < 2; j++, s++) switch (i++) 
+	{
+	case 0:
+	  /* Top 6 bits of the first octet */
+	  *outptr++ = encode_base64[(*s >> 2) & 0x3F];
+	  c = (*s & 0x03); break;
+	case 1:
+	  /* Bottom 2 bits of the first octet, and top 4 bits of the second */
+	  *outptr++ = encode_base64[(c << 4) | ((*s >> 4) & 0x0F)];
+	  c = (*s & 0x0F); break;
+	case 2:
+	  /* Bottom 4 bits of the second octet and top 2 bits of the third */
+	  *outptr++ = encode_base64[(c << 2) | ((*s >> 6) & 0x03)];
+	  /* Bottom 6 bits of the third octet */
+	  *outptr++ = encode_base64[*s & 0x3F];
+	  i = 0;
+	}
+      }
+
+    else if (  (s[1] != '.' && s[1] != '/')
+	    || s[1] == sep
+	    )
+      {
+      /* Encode as self (almost) */
+      if (base64mode)
+        {
+        switch (i) 
+          {
+          case 1:
+		/* Remaining bottom 2 bits of the last octet */
+		*outptr++ = encode_base64[c << 4];
+		break;
+	  case 2:
+		/* Remaining bottom 4 bits of the last octet */
+		*outptr++ = encode_base64[c << 2];
+	  }
+	*outptr++ = '-';
+	base64mode = FALSE;
+	}
+
+      if (*++s == sep)
+	{
+	if (!lastsep)
+	  {
+	  *outptr++ = '.';
+	  lastsep = TRUE;
+	  }
+	}
+      else
+        {
+        *outptr++ = *s;
+        if (*s == '&')
+	  *outptr++ = '-';
+	lastsep = FALSE;
+        }
+
+      s++;
+      }
+    else
+      {
+      *error = string_sprintf("imapfolder: illegal character '%c'", s[1]);
+      if (yield) store_reset(yield);
+      return NULL;
+      }
+
+    if (outptr > outbuf + sizeof(outbuf) - 3)
+      {
+      yield = string_cat(yield, &size, &ptr, outbuf, outptr - outbuf);
+      outptr = outbuf;
+      }
+
+    }
+  } /* End of input string */  
+
+if (base64mode) 
+  {
+  switch (i) 
+    {
+    case 1:
+      /* Remaining bottom 2 bits of the last octet */
+      *outptr++ = encode_base64[c << 4];
+      break;
+    case 2:
+      /* Remaining bottom 4 bits of the last octet */
+      *outptr++ = encode_base64[c << 2];
+    }
+  *outptr++ = '-';
+  }
+
+#if HAVE_ICONV
+iconv_close(icd);
+#endif
+
+yield = string_cat(yield, &size, &ptr, outbuf, outptr - outbuf);
+if (yield[ptr-1] == '.')
+  ptr--;
+yield[ptr] = '\0';
+
+return yield;
+}
+
+#endif	/* whole file */
+/* vi: aw ai sw=2
+*/
diff --git a/test/confs/4200 b/test/confs/4200
index bce8e551f..c0bf41aa8 100644
--- a/test/confs/4200
+++ b/test/confs/4200
@@ -7,6 +7,8 @@ gecos_name = CALLER_NAME
 
 # ----- Main settings -----
 
+headers_charset = UTF8
+
 # ----- ACL -----
 
 # End
diff --git a/test/scripts/4200-International/4200 b/test/scripts/4200-International/4200
index 48918b702..481a5053f 100644
--- a/test/scripts/4200-International/4200
+++ b/test/scripts/4200-International/4200
@@ -1,4 +1,4 @@
-# Internationalised mail: expansions
+# Internationalisation: expansions
 #
 # Sample strings taken from RFC3942
 
@@ -93,4 +93,16 @@ ${utf8_domain_from_alabel:russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com}
 ${utf8_domain_from_alabel:spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com}
 ${utf8_domain_from_alabel:vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com}
 
+===========
+
+${imapfolder {Foo/Bar}}
+Foo.Bar
+
+${imapfolder {Foo/Bar} {.} {/}}
+Foo&AC8-Bar
+
+${imapfolder{RÃ¤ksmÃ¶rgÃ¥s}}
+R&AOQ-ksm&APY-rg&AOU-s
+
+
 ****
diff --git a/test/stdout/4200 b/test/stdout/4200
index 1cfb7a9db..9e2c4bbc5 100644
--- a/test/stdout/4200
+++ b/test/stdout/4200
@@ -66,4 +66,16 @@
 > spanish.PorquÃ©nopuedensimplementehablarenEspaÃ±ol.com
 > vietnamese.Táº¡isaohá»khÃ´ngthá»chá»nÃ³itiáº¿ngViá»t.com
 > 
+> ===========
+> 
+> Foo.Bar
+> Foo.Bar
+> 
+> Foo&AC8-Bar
+> Foo&AC8-Bar
+> 
+> R&AOQ-ksm&APY-rg&AOU-s
+> R&AOQ-ksm&APY-rg&AOU-s
+> 
+> 
 >