A-label expansion operators
authorJeremy Harris <jgh146exb@wizmail.org>
Fri, 3 Apr 2015 18:13:27 +0000 (19:13 +0100)
committerJeremy Harris <jgh146exb@wizmail.org>
Sun, 12 Apr 2015 18:15:31 +0000 (19:15 +0100)
doc/doc-txt/experimental-spec.txt
src/src/expand.c
src/src/utf8.c
test/confs/4200 [new file with mode: 0644]
test/scripts/4200-International/4200 [new file with mode: 0644]
test/stdout/4200 [new file with mode: 0644]

index 0eeb939bfdafc8972301d195fe0d7903a635cdd3..738f02cce6968fe47921b80f41acd5842bce2115 100644 (file)
@@ -1276,7 +1276,13 @@ SMTPUTF8
 Internationalised mail name handling.
 RFCs 6530, 6533, 5890
 
-Compile with libidn.
+Compile with EXPERIMENTAL_INTERNATIONAL and libidn.
+
+Expansion operators:
+       ${utf8_domain_to_alabel:str}
+       ${utf8_domain_from_alabel:str}
+       ${utf8_localpart_to_alabel:str}
+       ${utf8_localpart_from_alabel:str}
 
 
 --------------------------------------------------------------
index b613ef2b47416581aef473fe02dd210801b8a527..ad97f6fef15375c96880ee6ca86c2155a711d4cb 100644 (file)
@@ -168,7 +168,14 @@ static uschar *op_table_underscore[] = {
   US"quote_local_part",
   US"reverse_ip",
   US"time_eval",
-  US"time_interval"};
+  US"time_interval"
+#ifdef EXPERIMENTAL_INTERNATIONAL
+ ,US"utf8_domain_from_alabel",
+  US"utf8_domain_to_alabel",
+  US"utf8_localpart_from_alabel",
+  US"utf8_localpart_to_alabel"
+#endif
+  };
 
 enum {
   EOP_FROM_UTF8,
@@ -176,7 +183,14 @@ enum {
   EOP_QUOTE_LOCAL_PART,
   EOP_REVERSE_IP,
   EOP_TIME_EVAL,
-  EOP_TIME_INTERVAL };
+  EOP_TIME_INTERVAL
+#ifdef EXPERIMENTAL_INTERNATIONAL
+ ,EOP_UTF8_DOMAIN_FROM_ALABEL,
+  EOP_UTF8_DOMAIN_TO_ALABEL,
+  EOP_UTF8_LOCALPART_FROM_ALABEL,
+  EOP_UTF8_LOCALPART_TO_ALABEL
+#endif
+  };
 
 static uschar *op_table_main[] = {
   US"address",
@@ -6555,16 +6569,13 @@ while (*s != 0)
          if (bytes_left)
            {
            if ((c & 0xc0) != 0x80)
-             {
                    /* wrong continuation byte; invalidate all bytes */
              complete = 1; /* error */
-             }
            else
              {
              codepoint = (codepoint << 6) | (c & 0x3f);
              seq_buff[index++] = c;
              if (--bytes_left == 0)            /* codepoint complete */
-               {
                if(codepoint > 0x10FFFF)        /* is it too large? */
                  complete = -1;        /* error (RFC3629 limit) */
                else
@@ -6572,7 +6583,6 @@ while (*s != 0)
                  yield = string_cat(yield, &size, &ptr, seq_buff, seq_len);
                  index = 0;
                  }
-               }
              }
            }
          else  /* no bytes left: new sequence */
@@ -6615,13 +6625,75 @@ while (*s != 0)
            yield = string_cat(yield, &size, &ptr, UTF8_REPLACEMENT_CHAR, 1);
            }
          if ((complete == 1) && ((c & 0x80) == 0))
-           { /* ASCII character follows incomplete sequence */
+                       /* ASCII character follows incomplete sequence */
              yield = string_cat(yield, &size, &ptr, &c, 1);
-           }
          }
         continue;
         }
 
+#ifdef EXPERIMENTAL_INTERNATIONAL
+      case EOP_UTF8_DOMAIN_TO_ALABEL:
+       {
+        uschar * error = NULL;
+       uschar * s = string_domain_utf8_to_alabel(sub, &error);
+       if (error)
+         {
+         expand_string_message = string_sprintf(
+           "error converting utf8 (%s) to alabel: %s",
+           string_printing(sub), error);
+         goto EXPAND_FAILED;
+         }
+       yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+        continue;
+       }
+
+      case EOP_UTF8_DOMAIN_FROM_ALABEL:
+       {
+        uschar * error = NULL;
+       uschar * s = string_domain_alabel_to_utf8(sub, &error);
+       if (error)
+         {
+         expand_string_message = string_sprintf(
+           "error converting alabel (%s) to utf8: %s",
+           string_printing(sub), error);
+         goto EXPAND_FAILED;
+         }
+       yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+        continue;
+       }
+
+      case EOP_UTF8_LOCALPART_TO_ALABEL:
+       {
+        uschar * error = NULL;
+       uschar * s = string_localpart_utf8_to_alabel(sub, &error);
+       if (error)
+         {
+         expand_string_message = string_sprintf(
+           "error converting utf8 (%s) to alabel: %s",
+           string_printing(sub), error);
+         goto EXPAND_FAILED;
+         }
+       yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+       DEBUG(D_expand) debug_printf("yield: '%s'\n", yield);
+        continue;
+       }
+
+      case EOP_UTF8_LOCALPART_FROM_ALABEL:
+       {
+        uschar * error = NULL;
+       uschar * s = string_localpart_alabel_to_utf8(sub, &error);
+       if (error)
+         {
+         expand_string_message = string_sprintf(
+           "error converting alabel (%s) to utf8: %s",
+           string_printing(sub), error);
+         goto EXPAND_FAILED;
+         }
+       yield = string_cat(yield, &size, &ptr, s, Ustrlen(s));
+        continue;
+       }
+#endif /* EXPERIMENTAL_INTERNATIONAL */
+
       /* escape turns all non-printing characters into escape sequences. */
 
       case EOP_ESCAPE:
index 2f8173dc1ad219d1d07e51e6fb6f69450c5ee9b3..9a2b8656e09e42d9c54b04a095780d96daf8eda7 100644 (file)
@@ -78,14 +78,22 @@ size_t p_len = ucs4_len*4;  /* this multiplier is pure guesswork */
 uschar * res = store_get(p_len+5);
 int rc;
 
+DEBUG(D_expand) debug_printf("l_u2a: ulen %d  plen %d\n", ucs4_len, p_len);
+DEBUG(D_expand) for (rc = 0; rc < ucs4_len; rc++) debug_printf("%08x ", p[rc]);
+
 res[0] = 'x'; res[1] = 'n'; res[2] = res[3] = '-';
 
 if ((rc = punycode_encode(ucs4_len, p, NULL, &p_len, res+4)) != PUNYCODE_SUCCESS)
   {
+  DEBUG(D_expand) debug_printf("l_u2a: bad '%s'\n", punycode_strerror(rc));
   free(p);
   if (err) *err = US punycode_strerror(rc);
   return NULL;
   }
+DEBUG(D_expand) debug_printf("l_u2a: plen %d\n", p_len);
+p_len += 4;
+DEBUG(D_expand) for (rc = 0; rc < p_len; rc++) debug_printf("%02x ", res[rc]);
+DEBUG(D_expand) debug_printf("\n");
 free(p);
 res[p_len] = '\0';
 return res;
@@ -97,6 +105,8 @@ string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err)
 {
 size_t p_len = strlen(alabel);
 punycode_uint * p;
+uschar * s;
+uschar * res;
 int rc;
 
 if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-')
@@ -105,6 +115,7 @@ if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-'
   return NULL;
   }
 p_len -= 4;
+DEBUG(D_expand) debug_printf("l_a2u: plen %d\n", p_len);
 
 p = (punycode_uint *) store_get((p_len+1) * sizeof(*p));
 
@@ -113,8 +124,12 @@ if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUC
   if (err) *err = US punycode_strerror(rc);
   return NULL;
   }
-p[p_len] = 0;
-return US p;
+DEBUG(D_expand) debug_printf("l_a2u: dlen %d\n", p_len);
+
+s = stringprep_ucs4_to_utf8(p, p_len, NULL, &p_len);
+res = string_copyn(s, p_len);
+free(s);
+return res;
 }
 
 
diff --git a/test/confs/4200 b/test/confs/4200
new file mode 100644 (file)
index 0000000..bce8e55
--- /dev/null
@@ -0,0 +1,12 @@
+# Exim test configuration 4200
+
+exim_path = EXIM_PATH
+spool_directory = DIR/spool
+log_file_path = DIR/spool/log/%slog
+gecos_name = CALLER_NAME
+
+# ----- Main settings -----
+
+# ----- ACL -----
+
+# End
diff --git a/test/scripts/4200-International/4200 b/test/scripts/4200-International/4200
new file mode 100644 (file)
index 0000000..48918b7
--- /dev/null
@@ -0,0 +1,96 @@
+# Internationalised mail: expansions
+#
+# Sample strings taken from RFC3942
+
+exim -be
+
+utf-8 localpart to a-label:
+
+${utf8_localpart_to_alabel:\xD9\x84}
+xn--ghb
+
+${utf8_localpart_to_alabel:\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\
+\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F}
+xn--egbpdaj6bu4bxfgehfvwxn
+
+a-label localpart to utf-8:
+
+${utf8_localpart_from_alabel:xn--ghb}
+${utf8_localpart_from_alabel:xn--egbpdaj6bu4bxfgehfvwxn}
+
+utf-8 domain to a-label:
+
+${utf8_domain_to_alabel:bogus.\xD9\x84.com}
+bogus.xn--ghb.com
+
+${utf8_domain_to_alabel:arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\
+\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F.com}
+arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+
+${utf8_domain_to_alabel:simpl.chinese.\xE4\xBB\x96\xE4\xBB\xAC\xE4\xB8\xBA\xE4\xBB\x80\
+\xE4\xB9\x88\xE4\xB8\x8D\xE8\xAF\xB4\xE4\xB8\xAD\xE6\x96\x87.com}
+simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com
+
+${utf8_domain_to_alabel:trad.chinese.\xE4\xBB\x96\xE5\x80\x91\xE7\x88\xB2\xE4\xBB\x80\
+\xE9\xBA\xBD\xE4\xB8\x8D\xE8\xAA\xAA\xE4\xB8\xAD\xE6\x96\x87.com}
+trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com
+
+${utf8_domain_to_alabel:czech.\x50\x72\x6F\xC4\x8D\x70\x72\x6F\x73\x74\xC4\x9B\x6E\x65\
+\x6D\x6C\x75\x76\xC3\xAD\xC4\x8D\x65\x73\x6B\x79.com}
+czech.xn--Proprostnemluvesky-uyb24dma41a.com
+
+${utf8_domain_to_alabel:hebrew.\xD7\x9C\xD7\x9E\xD7\x94\xD7\x94\xD7\x9D\xD7\xA4\xD7\xA9\
+\xD7\x95\xD7\x98\xD7\x9C\xD7\x90\xD7\x9E\xD7\x93\xD7\x91\xD7\xA8\xD7\x99\xD7\x9D\xD7\xA2\
+\xD7\x91\xD7\xA8\xD7\x99\xD7\xAA.com}
+hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com
+
+${utf8_domain_to_alabel:hindi.\xE0\xA4\xAF\xE0\xA4\xB9\xE0\xA4\xB2\xE0\xA5\x8B\xE0\xA4\x97\
+\xE0\xA4\xB9\xE0\xA4\xBF\xE0\xA4\xA8\xE0\xA5\x8D\xE0\xA4\xA6\xE0\xA5\x80\xE0\xA4\x95\xE0\xA5\x8D\
+\xE0\xA4\xAF\xE0\xA5\x8B\xE0\xA4\x82\xE0\xA4\xA8\xE0\xA4\xB9\xE0\xA5\x80\xE0\xA4\x82\xE0\xA4\xAC\
+\xE0\xA5\x8B\xE0\xA4\xB2\xE0\xA4\xB8\xE0\xA4\x95\xE0\xA4\xA4\xE0\xA5\x87\xE0\xA4\xB9\xE0\xA5\x88\
+\xE0\xA4\x82.com}
+hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com
+
+${utf8_domain_to_alabel:japanese.\xE3\x81\xAA\xE3\x81\x9C\xE3\x81\xBF\xE3\x82\x93\xE3\x81\xAA\
+\xE6\x97\xA5\xE6\x9C\xAC\xE8\xAA\x9E\xE3\x82\x92\xE8\xA9\xB1\xE3\x81\x97\xE3\x81\xA6\xE3\x81\x8F\
+\xE3\x82\x8C\xE3\x81\xAA\xE3\x81\x84\xE3\x81\xAE\xE3\x81\x8B.com}
+japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com
+
+# the a-label for the phrase in korean is too long for a domain label (63 byte limit)
+korean: ${utf8_localpart_to_alabel:\xEC\x84\xB8\xEA\xB3\x84\xEC\x9D\x98\xEB\xAA\xA8\xEB\x93\xA0\
+\xEC\x82\xAC\xEB\x9E\x8C\xEB\x93\xA4\xEC\x9D\xB4\xED\x95\x9C\xEA\xB5\xAD\xEC\x96\xB4\xEB\xA5\xBC\
+\xEC\x9D\xB4\xED\x95\xB4\xED\x95\x9C\xEB\x8B\xA4\xEB\xA9\xB4\xEC\x96\xBC\xEB\xA7\x88\xEB\x82\x98\
+\xEC\xA2\x8B\xEC\x9D\x84\xEA\xB9\x8C}
+korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c
+
+${utf8_domain_to_alabel:russian.\xD0\xBF\xD0\xBE\xD1\x87\xD0\xB5\xD0\xBC\xD1\x83\xD0\xB6\
+\xD0\xB5\xD0\xBE\xD0\xBD\xD0\xB8\xD0\xBD\xD0\xB5\xD0\xB3\xD0\xBE\xD0\xB2\xD0\xBE\xD1\x80\
+\xD1\x8F\xD1\x82\xD0\xBF\xD0\xBE\xD1\x80\xD1\x83\xD1\x81\xD1\x81\xD0\xBA\xD0\xB8.com}
+russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com
+
+${utf8_domain_to_alabel:spanish.\x50\x6F\x72\x71\x75\xC3\xA9\x6E\x6F\x70\x75\x65\x64\x65\
+\x6E\x73\x69\x6D\x70\x6C\x65\x6D\x65\x6E\x74\x65\x68\x61\x62\x6C\x61\x72\x65\x6E\x45\x73\
+\x70\x61\xC3\xB1\x6F\x6C.com}
+spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com
+
+${utf8_domain_to_alabel:vietnamese.\x54\xE1\xBA\xA1\x69\x73\x61\x6F\x68\xE1\xBB\x8D\x6B\x68\
+\xC3\xB4\x6E\x67\x74\x68\xE1\xBB\x83\x63\x68\xE1\xBB\x89\x6E\xC3\xB3\x69\x74\x69\xE1\xBA\xBF\
+\x6E\x67\x56\x69\xE1\xBB\x87\x74.com}
+vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com
+
+a-label domain to utf-8:
+
+${utf8_domain_from_alabel:arab.xn--ghb.com}
+${utf8_domain_from_alabel:arab.xn--egbpdaj6bu4bxfgehfvwxn.com}
+${utf8_domain_from_alabel:simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com}
+${utf8_domain_from_alabel:trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com}
+${utf8_domain_from_alabel:czech.xn--Proprostnemluvesky-uyb24dma41a.com}
+${utf8_domain_from_alabel:hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com}
+${utf8_domain_from_alabel:hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com}
+${utf8_domain_from_alabel:japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com}
+korean: ${utf8_localpart_from_alabel:xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c}
+${utf8_domain_from_alabel:russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com}
+${utf8_domain_from_alabel:spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com}
+${utf8_domain_from_alabel:vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com}
+
+****
diff --git a/test/stdout/4200 b/test/stdout/4200
new file mode 100644 (file)
index 0000000..1cfb7a9
--- /dev/null
@@ -0,0 +1,69 @@
+> 
+> utf-8 localpart to a-label:
+> 
+> xn--ghb
+> xn--ghb
+> 
+> xn--egbpdaj6bu4bxfgehfvwxn
+> xn--egbpdaj6bu4bxfgehfvwxn
+> 
+> a-label localpart to utf-8:
+> 
+> ل
+> ليهمابتكلموشعربي؟
+> 
+> utf-8 domain to a-label:
+> 
+> bogus.xn--ghb.com
+> bogus.xn--ghb.com
+> 
+> arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+> arabic.xn--egbpdaj6bu4bxfgehfvwxn.com
+> 
+> simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com
+> simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com
+> 
+> trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com
+> trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com
+> 
+> czech.xn--proprostnemluvesky-uyb24dma41a.com
+> czech.xn--Proprostnemluvesky-uyb24dma41a.com
+> 
+> hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com
+> hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com
+> 
+> hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com
+> hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com
+> 
+> japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com
+> japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com
+> 
+> # the a-label for the phrase in korean is too long for a domain label (63 byte limit)
+> korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c
+> korean: xn--989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c
+> 
+> russian.xn--b1abfaaepdrnnbgefbadotcwatmq2g4l.com
+> russian.xn--b1abfaaepdrnnbgefbaDotcwatmq2g4l.com
+> 
+> spanish.xn--porqunopuedensimplementehablarenespaol-fmd56a.com
+> spanish.xn--PorqunopuedensimplementehablarenEspaol-fmd56a.com
+> 
+> vietnamese.xn--tisaohkhngthchnitingvit-kjcr8268qyxafd2f1b9g.com
+> vietnamese.xn--TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g.com
+> 
+> a-label domain to utf-8:
+> 
+> arab.ل.com
+> arab.ليهمابتكلموشعربي؟.com
+> simpl.chinese.他们为什么不说中文.com
+> trad.chinese.他們爲什麽不說中文.com
+> czech.Pročprostěnemluvíčesky.com
+> hebrew.למההםפשוטלאמדבריםעברית.com
+> hindi.यहलोगहिन्दीक्योंनहींबोलसकतेहैं.com
+> japanese.なぜみんな日本語を話してくれないのか.com
+> korean: 세계의모든사람들이한국어를이해한다면얼마나좋을까
+> russian.почемужеонинеговорятпорусски.com
+> spanish.PorquénopuedensimplementehablarenEspañol.com
+> vietnamese.TạisaohọkhôngthểchỉnóitiếngViệt.com
+> 
+>