From 9427e87923288dfe6fdf80011f77bf4a135898ac Mon Sep 17 00:00:00 2001 From: Jeremy Harris Date: Sat, 26 Nov 2016 18:35:48 +0000 Subject: [PATCH] I18N: support IDNA2008. Bug 1911 --- doc/doc-txt/ChangeLog | 4 +- src/src/EDITME | 6 +- src/src/config.h.defaults | 1 + src/src/transports/smtp.c | 9 +- src/src/utf8.c | 124 +++++++++++++++++++++------ src/src/verify.c | 25 +++--- test/scripts/4200-International/4200 | 14 ++- test/stdout/4200 | 11 ++- 8 files changed, 133 insertions(+), 61 deletions(-) diff --git a/doc/doc-txt/ChangeLog b/doc/doc-txt/ChangeLog index 73afe1bb3..46ec11e34 100644 --- a/doc/doc-txt/ChangeLog +++ b/doc/doc-txt/ChangeLog @@ -5,7 +5,9 @@ affect Exim's operation, with an unchanged configuration file. For new options, and new features, see the NewStuff file next to this ChangeLog. Exim version 4.89 ------------------ +------------------- +JH/01 Bug 1922: Support IDNA2008. This has slightly different conversion rules + than -2003 did; needs libidn2 in addition to linidn. Exim version 4.88 diff --git a/src/src/EDITME b/src/src/EDITME index 69293467e..1bff9dab2 100644 --- a/src/src/EDITME +++ b/src/src/EDITME @@ -937,11 +937,15 @@ ZCAT_COMMAND=/usr/bin/zcat # # Uncomment the following to include Internationalisation features. This is the # SMTPUTF8 ESMTP extension, and associated facilities for handling UTF8 domain -# and localparts, per RFCs 5890, 6530 and 6533. +# and localparts, per RFC 3490 (IDNA2003). # You need to have the IDN library installed. +# If you want IDNA2008 mappings per RFCs 5890, 6530 and 6533, you additionally +# need libidn2 and SUPPORT_I18N_2008. # SUPPORT_I18N=yes # LDFLAGS += -lidn +# SUPPORT_I18N_2008=yes +# LDFLAGS += -lidn -lidn2 #------------------------------------------------------------------------------ diff --git a/src/src/config.h.defaults b/src/src/config.h.defaults index bafdc1ba4..58e181309 100644 --- a/src/src/config.h.defaults +++ b/src/src/config.h.defaults @@ -135,6 +135,7 @@ it's a default value. */ #define SUPPORT_CRYPTEQ #define SUPPORT_I18N +#define SUPPORT_I18N_2008 #define SUPPORT_MAILDIR #define SUPPORT_MAILSTORE #define SUPPORT_MBX diff --git a/src/src/transports/smtp.c b/src/src/transports/smtp.c index a19e85ffb..527142967 100644 --- a/src/src/transports/smtp.c +++ b/src/src/transports/smtp.c @@ -2429,17 +2429,14 @@ for (addr = first_addr; rcpt_addr = transport_rcpt_address(addr, tblock->rcpt_include_affixes); #ifdef SUPPORT_I18N - { - uschar * dummy_errstr; if ( testflag(addrlist, af_utf8_downcvt) - && (rcpt_addr = string_address_utf8_to_alabel(rcpt_addr, &dummy_errstr), - dummy_errstr - ) ) + && !(rcpt_addr = string_address_utf8_to_alabel(rcpt_addr, NULL)) + ) { + /*XXX could we use a per-address errstr here? Not fail the whole send? */ errno = ERRNO_EXPANDFAIL; goto SEND_FAILED; } - } #endif count = smtp_write_command(&outblock, no_flush, "RCPT TO:<%s>%s%s\r\n", diff --git a/src/src/utf8.c b/src/src/utf8.c index e394db0a8..be5bcb078 100644 --- a/src/src/utf8.c +++ b/src/src/utf8.c @@ -2,7 +2,7 @@ * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) Jeremy Harris 2015 */ +/* Copyright (c) Jeremy Harris 2015, 2016 */ /* See the file NOTICE for conditions of use and distribution. */ @@ -10,10 +10,20 @@ #ifdef SUPPORT_I18N -#include +#ifdef SUPPORT_I18N_2008 +# include +#else +# include +#endif + #include #include +static uschar * +string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err); + +/**************************************************/ + BOOL string_is_utf8(const uschar * s) { @@ -22,17 +32,44 @@ if (s) while ((c = *s++)) if (c & 0x80) return TRUE; return FALSE; } +static BOOL +string_is_alabel(const uschar * s) +{ +return s[0] == 'x' && s[1] == 'n' && s[2] == '-' && s[3] == '-'; +} + /**************************************************/ -/* Domain conversions */ -/* the *err string pointer should be null before the call */ +/* Domain conversions. +The *err string pointer should be null before the call + +Return NULL for error, with optional errstr pointer filled in +*/ uschar * string_domain_utf8_to_alabel(const uschar * utf8, uschar ** err) { -uschar * s1; -uschar * s; +uschar * s1, * s; int rc; +#ifdef SUPPORT_I18N_2008 +/* Only lowercase is accepted by the library call. A pity since we lose +any mixed-case annotation. This does not really matter for a domain. */ + { + uschar c; + for (s1 = s = US utf8; (c = *s1); s1++) if (!(c & 0x80) && isupper(c)) + { + s = string_copy(utf8); + for (s1 = s + (s1 - utf8); (c = *s1); s1++) if (!(c & 0x80) && isupper(c)) + *s1 = tolower(c); + break; + } + } +if ((rc = idn2_lookup_u8(CCS s, &s1, IDN2_NFC_INPUT)) != IDN2_OK) + { + if (err) *err = US idn2_strerror(rc); + return NULL; + } +#else s = US stringprep_utf8_nfkc_normalize(CCS utf8, -1); if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED)) != IDNA_SUCCESS) @@ -42,6 +79,7 @@ if ( (rc = idna_to_ascii_8z(CCS s, CSS &s1, IDNA_ALLOW_UNASSIGNED)) return NULL; } free(s); +#endif s = string_copy(s1); free(s1); return s; @@ -52,8 +90,23 @@ return s; uschar * string_domain_alabel_to_utf8(const uschar * alabel, uschar ** err) { -uschar * s1; -uschar * s; +#ifdef SUPPORT_I18N_2008 +const uschar * label; +int sep = '.'; +uschar * s = NULL; + +while (label = string_nextinlist(&alabel, &sep, NULL, 0)) + if ( string_is_alabel(label) + && !(label = string_localpart_alabel_to_utf8_(label, err)) + ) + return NULL; + else + s = string_append_listele(s, '.', label); +return s; + +#else + +uschar * s1, * s; int rc; if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES)) @@ -65,6 +118,7 @@ if ( (rc = idna_to_unicode_8z8z(CCS alabel, CSS &s1, IDNA_USE_STD3_ASCII_RULES) s = string_copy(s1); free(s1); return s; +#endif } /**************************************************/ @@ -103,25 +157,20 @@ return res; } -uschar * -string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err) +static uschar * +string_localpart_alabel_to_utf8_(const uschar * alabel, uschar ** err) { -size_t p_len = Ustrlen(alabel); +size_t p_len; punycode_uint * p; -uschar * s; -uschar * res; int rc; +uschar * s, * res; -if (alabel[0] != 'x' || alabel[1] != 'n' || alabel[2] != '-' || alabel[3] != '-') - { - if (err) *err = US"bad alabel prefix"; - return NULL; - } - -p_len -= 4; +DEBUG(D_expand) debug_printf("l_a2u: '%s'\n", alabel); +alabel += 4; +p_len = Ustrlen(alabel); p = (punycode_uint *) store_get((p_len+1) * sizeof(*p)); -if ((rc = punycode_decode(p_len, CCS alabel+4, &p_len, p, NULL)) != PUNYCODE_SUCCESS) +if ((rc = punycode_decode(p_len, CCS alabel, &p_len, p, NULL)) != PUNYCODE_SUCCESS) { if (err) *err = US punycode_strerror(rc); return NULL; @@ -134,9 +183,23 @@ return res; } +uschar * +string_localpart_alabel_to_utf8(const uschar * alabel, uschar ** err) +{ +if (string_is_alabel(alabel)) + return string_localpart_alabel_to_utf8_(alabel, err); + +if (err) *err = US"bad alabel prefix"; +return NULL; +} + + /**************************************************/ -/* whole address conversion */ -/* the *err string pointer should be null before the call */ +/* Whole address conversion. +The *err string pointer should be null before the call. + +Return NULL on oeeror, with (optional) errstring pointer filled in +*/ uschar * string_address_utf8_to_alabel(const uschar * utf8, uschar ** err) @@ -153,8 +216,8 @@ for (s = utf8; *s; s++) if (*s == '@') { l = string_copyn(utf8, s - utf8); - if ( (l = string_localpart_utf8_to_alabel(l, err), err && *err) - || (d = string_domain_utf8_to_alabel(++s, err), err && *err) + if ( !(l = string_localpart_utf8_to_alabel(l, err)) + || !(d = string_domain_utf8_to_alabel(++s, err)) ) return NULL; l = string_sprintf("%s@%s", l, d); @@ -182,10 +245,21 @@ Returns: nothing void utf8_version_report(FILE *f) { +#ifdef SUPPORT_I18N_2008 +fprintf(f, "Library version: IDN2: Compile: %s\n" + " Runtime: %s\n", + IDN2_VERSION, + idn2_check_version(NULL)); +fprintf(f, "Library version: Stringprep: Compile: %s\n" + " Runtime: %s\n", + STRINGPREP_VERSION, + stringprep_check_version(NULL)); +#else fprintf(f, "Library version: IDN: Compile: %s\n" " Runtime: %s\n", STRINGPREP_VERSION, stringprep_check_version(NULL)); +#endif } #endif /* whole file */ diff --git a/src/src/verify.c b/src/src/verify.c index 9652a395f..0959b0051 100644 --- a/src/src/verify.c +++ b/src/src/verify.c @@ -940,11 +940,10 @@ can do it there for the non-rcpt-verify case. For this we keep an addresscount. } else if ( addr->prop.utf8_msg && (addr->prop.utf8_downcvt || !(peer_offered & PEER_OFFERED_UTF8)) - && (setflag(addr, af_utf8_downcvt), - from_address = string_address_utf8_to_alabel(from_address, - &addr->message), - addr->message - ) ) + && !(setflag(addr, af_utf8_downcvt), + from_address = string_address_utf8_to_alabel(from_address, + &addr->message) + ) ) { errno = ERRNO_EXPANDFAIL; setflag(addr, af_verify_nsfail); @@ -1121,16 +1120,14 @@ can do it there for the non-rcpt-verify case. For this we keep an addresscount. #ifdef SUPPORT_I18N /*XXX should the conversion be moved into transport_rcpt_address() ? */ - uschar * dummy_errstr = NULL; if ( testflag(addr, af_utf8_downcvt) - && (rcpt = string_address_utf8_to_alabel(rcpt, &dummy_errstr), - dummy_errstr - ) ) - { - errno = ERRNO_EXPANDFAIL; - *failure_ptr = US"recipient"; - done = FALSE; - } + && !(rcpt = string_address_utf8_to_alabel(rcpt, NULL)) + ) + { + errno = ERRNO_EXPANDFAIL; + *failure_ptr = US"recipient"; + done = FALSE; + } else #endif diff --git a/test/scripts/4200-International/4200 b/test/scripts/4200-International/4200 index d15b67d19..dd5348ea3 100644 --- a/test/scripts/4200-International/4200 +++ b/test/scripts/4200-International/4200 @@ -33,10 +33,10 @@ original: bogus.\xD9\x84.com conversion: ${utf8_domain_to_alabel:bogus.\xD9\x84.com} golden: bogus.xn--ghb.com -original: arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F.com +original: arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A.com conversion: ${utf8_domain_to_alabel:arabic.\xD9\x84\xD9\x8A\xD9\x87\xD9\x85\xD8\xA7\xD8\xA8\xD8\xAA\xD9\x83\xD9\x84\ -\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A\xD8\x9F.com} -golden: arabic.xn--egbpdaj6bu4bxfgehfvwxn.com +\xD9\x85\xD9\x88\xD8\xB4\xD8\xB9\xD8\xB1\xD8\xA8\xD9\x8A.com} +golden: arabic.xn--mgbcah9ar9a4efegftvvn.com original simpl.chinese.\xE4\xBB\x96\xE4\xBB\xAC\xE4\xB8\xBA\xE4\xBB\x80\xE4\xB9\x88\xE4\xB8\x8D\xE8\xAF\xB4\xE4\xB8\xAD\xE6\x96\x87.com conversion: ${utf8_domain_to_alabel:simpl.chinese.\xE4\xBB\x96\xE4\xBB\xAC\xE4\xB8\xBA\xE4\xBB\x80\ @@ -49,9 +49,7 @@ conversion: ${utf8_domain_to_alabel:trad.chinese.\xE4\xBB\x96\xE5\x80\x91\xE7\x8 golden: trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com original czech.\x50\x72\x6F\xC4\x8D\x70\x72\x6F\x73\x74\xC4\x9B\x6E\x65\x6D\x6C\x75\x76\xC3\xAD\xC4\x8D\x65\x73\x6B\x79.com -conversion: ${utf8_domain_to_alabel:czech.\x50\x72\x6F\xC4\x8D\x70\x72\x6F\x73\x74\xC4\x9B\x6E\x65\ -\x6D\x6C\x75\x76\xC3\xAD\xC4\x8D\x65\x73\x6B\x79.com} -golden: czech.xn--Proprostnemluvesky-uyb24dma41a.com +conversion: ${utf8_domain_to_alabel:czech.Pro\xC4\x8Dprost\xC4\x9Bnemluv\xC3\xAD\xC4\x8Desky.com} original hebrew.\xD7\x9C\xD7\x9E\xD7\x94\xD7\x94\xD7\x9D\xD7\xA4\xD7\xA9\xD7\x95\xD7\x98\xD7\x9C\xD7\x90\xD7\x9E\xD7\x93\xD7\x91\xD7\xA8\xD7\x99\xD7\x9D\xD7\xA2\xD7\x91\xD7\xA8\xD7\x99\xD7\xAA.com conversion: ${utf8_domain_to_alabel:hebrew.\xD7\x9C\xD7\x9E\xD7\x94\xD7\x94\xD7\x9D\xD7\xA4\xD7\xA9\ @@ -107,10 +105,10 @@ golden: xn--strae-oqa.de a-label domain to utf-8: conversion: ${utf8_domain_from_alabel:arab.xn--ghb.com} -conversion: ${utf8_domain_from_alabel:arab.xn--egbpdaj6bu4bxfgehfvwxn.com} +conversion: ${utf8_domain_from_alabel:arab.xn--mgbcah9ar9a4efegftvvn.com} conversion: ${utf8_domain_from_alabel:simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com} conversion: ${utf8_domain_from_alabel:trad.chinese.xn--ihqwctvzc91f659drss3x8bo0yb.com} -conversion: ${utf8_domain_from_alabel:czech.xn--Proprostnemluvesky-uyb24dma41a.com} +conversion: ${utf8_domain_from_alabel:czech.xn--proprostnemluvesky-uyb24dma41a.com} conversion: ${utf8_domain_from_alabel:hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com} conversion: ${utf8_domain_from_alabel:hindi.xn--i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd.com} conversion: ${utf8_domain_from_alabel:japanese.xn--n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa.com} diff --git a/test/stdout/4200 b/test/stdout/4200 index 02bc22dce..91e8ea144 100644 --- a/test/stdout/4200 +++ b/test/stdout/4200 @@ -27,9 +27,9 @@ > conversion: bogus.xn--ghb.com > golden: bogus.xn--ghb.com > -> original: arabic.ليهمابتكلموشعربي؟.com -> conversion: arabic.xn--egbpdaj6bu4bxfgehfvwxn.com -> golden: arabic.xn--egbpdaj6bu4bxfgehfvwxn.com +> original: arabic.ليهمابتكلموشعربي.com +> conversion: arabic.xn--mgbcah9ar9a4efegftvvn.com +> golden: arabic.xn--mgbcah9ar9a4efegftvvn.com > > original simpl.chinese.他们为什么不说中文.com > conversion: simpl.chinese.xn--ihqwcrb4cv8a8dqg056pqjye.com @@ -41,7 +41,6 @@ > > original czech.Pročprostěnemluvíčesky.com > conversion: czech.xn--proprostnemluvesky-uyb24dma41a.com -> golden: czech.xn--Proprostnemluvesky-uyb24dma41a.com > > original hebrew.למההםפשוטלאמדבריםעברית.com > conversion: hebrew.xn--4dbcagdahymbxekheh6e0a7fei0b.com @@ -80,10 +79,10 @@ > a-label domain to utf-8: > > conversion: arab.ل.com -> conversion: arab.ليهمابتكلموشعربي؟.com +> conversion: arab.ليهمابتكلموشعربي.com > conversion: simpl.chinese.他们为什么不说中文.com > conversion: trad.chinese.他們爲什麽不說中文.com -> conversion: czech.Pročprostěnemluvíčesky.com +> conversion: czech.pročprostěnemluvíčesky.com > conversion: hebrew.למההםפשוטלאמדבריםעברית.com > conversion: hindi.यहलोगहिन्दीक्योंनहींबोलसकतेहैं.com > conversion: japanese.なぜみんな日本語を話してくれないのか.com -- 2.30.2