X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/2484c35c5deec375307bd4f51aa89ae5080a4e56..HEAD:/src/src/string.c diff --git a/src/src/string.c b/src/src/string.c index 51b12c5e5..1169f0e2c 100644 --- a/src/src/string.c +++ b/src/src/string.c @@ -2,7 +2,7 @@ * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) The Exim Maintainers 2020 - 2022 */ +/* Copyright (c) The Exim Maintainers 2020 - 2024 */ /* Copyright (c) University of Cambridge 1995 - 2018 */ /* See the file NOTICE for conditions of use and distribution. */ /* SPDX-License-Identifier: GPL-2.0-or-later */ @@ -40,119 +40,129 @@ Returns: 0 if the string is not a textual representation of an IP address The legacy string_is_ip_address() function follows below. */ -int -string_is_ip_addressX(const uschar *ip_addr, int *maskptr, const uschar **errp) { - struct addrinfo hints; - struct addrinfo *res; - - uschar *slash, *percent; - uschar *endp = 0; - long int mask = 0; - const uschar *addr = 0; - - /* If there is a slash, but we didn't request a (optional) netmask, - we return failure, as we do if the mask isn't a pure numerical value, - or if it is negative. The actual length is checked later, once we know - the address family. */ - if (slash = Ustrchr(ip_addr, '/')) +int +string_is_ip_addressX(const uschar * ip_addr, int * maskptr, const uschar ** errp) +{ +uschar * slash, * percent, * endp = NULL; +long int mask = 0; +const uschar * addr = NULL; +int af; +union { /* we do not need this, but inet_pton() needs a place for storage */ + struct in_addr sa4; + struct in6_addr sa6; +} sa; + +/* If there is a slash, but we didn't request a (optional) netmask, +we return failure, as we do if the mask isn't a pure numerical value, +or if it is negative. The actual length is checked later, once we know +the address family. */ + +if (slash = Ustrchr(ip_addr, '/')) { - if (!maskptr) + uschar * rest; + + if (!maskptr) { - if (errp) *errp = "netmask found, but not requested"; - return 0; + if (errp) *errp = US"netmask found, but not requested"; + return 0; } - uschar *rest; - mask = Ustrtol(slash+1, &rest, 10); - if (*rest || mask < 0) + mask = Ustrtol(slash+1, &rest, 10); + if (*rest || mask < 0) { - if (errp) *errp = "netmask not numeric or <0"; - return 0; + if (errp) *errp = US"netmask not numeric or <0"; + return 0; } - *maskptr = slash - ip_addr; /* offset of the slash */ - endp = slash; - } else if (maskptr) *maskptr = 0; /* no slash found */ + *maskptr = slash - ip_addr; /* offset of the slash */ + endp = slash; + } +else if (maskptr) + *maskptr = 0; /* no slash found */ - /* The interface-ID suffix (%) is optional (for IPv6). If it - exists, we check it syntactically. Later, if we know the address - family is IPv4, we might reject it. - The interface-ID is mutually exclusive with the netmask, to the - best of my knowledge. */ - if (percent = Ustrchr(ip_addr, '%')) +/* The interface-ID suffix (%) is optional (for IPv6). If it +exists, we check it syntactically. Later, if we know the address +family is IPv4, we might reject it. +The interface-ID is mutually exclusive with the netmask, to the +best of my knowledge. */ + +if (percent = Ustrchr(ip_addr, '%')) { - if (slash) + if (slash) { - if (errp) *errp = "interface-ID and netmask are mutually exclusive"; - return 0; + if (errp) *errp = US"interface-ID and netmask are mutually exclusive"; + return 0; } - for (uschar *p = percent+1; *p; p++) - if (!isalnum(*p) && !ispunct(*p)) - { - if (errp) *errp = "interface-ID must match [[:alnum:][:punct:]]"; - return 0; - } - endp = percent; + for (uschar *p = percent+1; *p; p++) + if (!isalnum(*p) && !ispunct(*p)) + { + if (errp) *errp = US"interface-ID must match [[:alnum:][:punct:]]"; + return 0; + } + endp = percent; } - /* inet_pton() can't parse netmasks and interface IDs, so work on a shortened copy - allocated on the current stack */ - if (endp) { - ptrdiff_t l = endp - ip_addr; - if (l > 255) +/* inet_pton() can't parse netmasks and interface IDs, so work on a shortened copy +allocated on the current stack */ + +if (endp) + { + ptrdiff_t l = endp - ip_addr; + if (l > 255) { - if (errp) *errp = "rudiculous long ip address string"; - return 0; + if (errp) *errp = US"rudiculous long ip address string"; + return 0; } - addr = string_copyn(ip_addr, l); - } else addr = ip_addr; - - int af; - union { /* we do not need this, but inet_pton() needs a place for storage */ - struct in_addr sa4; - struct in6_addr sa6; - } sa; + addr = string_copyn(ip_addr, l); + } +else + addr = ip_addr; - af = Ustrchr(addr, ':') ? AF_INET6 : AF_INET; - if (!inet_pton(af, addr, &sa)) +af = Ustrchr(addr, ':') ? AF_INET6 : AF_INET; +if (!inet_pton(af, CCS addr, &sa)) { - if (errp) *errp = af == AF_INET6 ? "IP address string not parsable as IPv6" - : "IP address string not parsable IPv4"; - return 0; + if (errp) *errp = af == AF_INET6 ? US"IP address string not parsable as IPv6" + : US"IP address string not parsable IPv4"; + return 0; } - /* we do not check the values of the mask here, as - this is done on the callers side (but I don't understand why), so - actually I'd like to do it here, but it breaks at least 0002 */ - switch (af) + +/* we do not check the values of the mask here, as +this is done on the callers side (but I don't understand why), so +actually I'd like to do it here, but it breaks at least testcase 0002 */ + +switch (af) { - case AF_INET6: - if (errp && mask > 128) - { - *errp = "IPv6 netmask value must not be >128"; - return 0; - } - return 6; - case AF_INET: - if (percent) - { - if (errp) *errp = "IPv4 address string must not have an interface-ID"; - return 0; - } - if (errp && mask > 32) { - *errp = "IPv4 netmask value must not be >32"; - return 0; - } - return 4; - default: - if (errp) *errp = "unknown address family (should not happen)"; - return 0; - } + case AF_INET6: + if (errp && mask > 128) + { + *errp = US"IPv6 netmask value must not be >128"; + return 0; + } + return 6; + case AF_INET: + if (percent) + { + if (errp) *errp = US"IPv4 address string must not have an interface-ID"; + return 0; + } + if (errp && mask > 32) + { + *errp = US"IPv4 netmask value must not be >32"; + return 0; + } + return 4; + default: + if (errp) *errp = US"unknown address family (should not happen)"; + return 0; + } } + int -string_is_ip_address(const uschar *ip_addr, int *maskptr) { - return string_is_ip_addressX(ip_addr, maskptr, 0); +string_is_ip_address(const uschar * ip_addr, int * maskptr) +{ +return string_is_ip_addressX(ip_addr, maskptr, NULL); } #endif /* COMPILE_UTILITY */ @@ -639,7 +649,7 @@ uschar * t, * yield; /* First find the end of the string */ if (*s != '\"') - while (*s && !isspace(*s)) s++; + Uskip_nonwhite(&s); else { s++; @@ -1082,6 +1092,49 @@ return list; +/* Listmaker that takes a format string and args for the element. +A flag arg is required to handle embedded sep chars in the (expanded) element; +if false then no check is done */ + +gstring * +string_append_listele_fmt(gstring * list, uschar sep, BOOL check, + const char * fmt, ...) +{ +va_list ap; +unsigned start; +gstring * g; + +if (list && list->ptr) + { + list = string_catn(list, &sep, 1); + start = list->ptr; + } +else + start = 0; + +va_start(ap, fmt); +list = string_vformat_trc(list, US __FUNCTION__, __LINE__, + STRING_SPRINTF_BUFFER_SIZE, SVFMT_REBUFFER|SVFMT_EXTEND, fmt, ap); +va_end(ap); + +(void) string_from_gstring(list); + +/* if the appended element turns out to have an embedded sep char, rewind +and do the lazy-coded separate string method */ + +if (!check || !Ustrchr(&list->s[start], sep)) + return list; + +va_start(ap, fmt); +g = string_vformat_trc(NULL, US __FUNCTION__, __LINE__, + STRING_SPRINTF_BUFFER_SIZE, SVFMT_REBUFFER|SVFMT_EXTEND, fmt, ap); +va_end(ap); + +list->ptr = start; +return string_append_listele_n(list, sep, g->s, g->ptr); +} + + /* A slightly-bogus listmaker utility; the separator is a string so can be multiple chars - there is no checking for the element content containing any of the separator. */ @@ -1224,13 +1277,6 @@ return g; } -gstring * -string_cat(gstring * g, const uschar * s) -{ -return string_catn(g, s, Ustrlen(s)); -} - - /************************************************* * Append strings to another string * @@ -1323,7 +1369,7 @@ Arguments: ap variable-args pointer Flags: - SVFMT_EXTEND buffer can be created or exteded as needed + SVFMT_EXTEND buffer can be created or extended as needed SVFMT_REBUFFER buffer can be recopied to tainted mem as needed SVFMT_TAINT_NOCHK do not check inputs for taint @@ -1338,7 +1384,8 @@ The return value can be NULL to signify overflow. Field width: decimal digits, or * Precision: dot, followed by decimal digits or * Length modifiers: h L l ll z -Conversion specifiers: n d o u x X p f e E g G % c s S T Y D M +Conversion specifiers: n d o u x X p f e E g G % c s S T W V Y D M H Z b +Alternate-form: #: s/Y/b are silent about a null string Returns the possibly-new (if copy for growth or taint-handling was needed) string, not nul-terminated. @@ -1587,11 +1634,143 @@ while (*fp) case 'Y': /* gstring pointer */ { gstring * zg = va_arg(ap, gstring *); - if (zg) { s = CS zg->s; slen = zg->ptr; } + if (zg) { s = CS zg->s; slen = gstring_length(zg); } else { s = null; slen = Ustrlen(s); } goto INSERT_GSTRING; } +#ifndef COMPILE_UTILITY + case 'b': /* blob pointer, carrying a string */ + { + blob * b = va_arg(ap, blob *); + if (b) { s = CS b->data; slen = b->len; } + else { s = null; slen = Ustrlen(s); } + goto INSERT_GSTRING; + } + + case 'V': /* string; maybe convert ascii-art to UTF-8 chars */ + { + gstring * zg = NULL; + s = va_arg(ap, char *); + if (IS_DEBUG(D_noutf8)) + for ( ; *s; s++) + zg = string_catn(zg, CUS (*s == 'K' ? "|" : s), 1); + else + for ( ; *s; s++) switch (*s) + { + case '\\': zg = string_catn(zg, US UTF8_UP_RIGHT, 3); break; + case '/': zg = string_catn(zg, US UTF8_DOWN_RIGHT, 3); break; + case '-': + case '_': zg = string_catn(zg, US UTF8_HORIZ, 3); break; + case '|': zg = string_catn(zg, US UTF8_VERT, 3); break; + case 'K': zg = string_catn(zg, US UTF8_VERT_RIGHT, 3); break; + case '<': zg = string_catn(zg, US UTF8_LEFT_TRIANGLE, 3); break; + case '>': zg = string_catn(zg, US UTF8_RIGHT_TRIANGLE, 3); break; + default: zg = string_catn(zg, CUS s, 1); break; + } + + if (!zg) + break; + s = CS zg->s; + slen = gstring_length(zg); + goto INSERT_GSTRING; + } + + case 'W': /* Maybe mark up ctrls, spaces & newlines */ + s = va_arg(ap, char *); + if (s && !IS_DEBUG(D_noutf8)) + { + gstring * zg = NULL; + int p = precision; + + /* If a precision was given, we can handle embedded NULs. Take it as + applying to the input and expand it for the transformed result */ + + for ( ; precision >= 0 || *s; s++) + if (p >= 0 && --p < 0) + break; + else switch (*s) + { + case ' ': + zg = string_catn(zg, CUS UTF8_LIGHT_SHADE, 3); + if (precision >= 0) precision += 2; + break; + case '\n': + zg = string_catn(zg, CUS UTF8_L_ARROW_HOOK "\n", 4); + if (precision >= 0) precision += 3; + break; + default: + if (*s <= ' ') + { /* base of UTF8 symbols for ASCII control chars */ + uschar ctrl_symbol[3] = {[0]=0xe2, [1]=0x90, [2]=0x80}; + ctrl_symbol[2] |= *s; + zg = string_catn(zg, ctrl_symbol, 3); + if (precision >= 0) precision += 2; + } + else + zg = string_catn(zg, CUS s, 1); + break; + } + if (zg) { s = CS zg->s; slen = gstring_length(zg); } + else { s = ""; slen = 0; } + } + else + { + if (!s) s = null; + slen = Ustrlen(s); + } + goto INSERT_GSTRING; + case 'Z': /* pdkim-style "quoteprint" */ + { + gstring * zg = NULL; + int p = precision; /* If given, we can handle embedded NULs */ + + s = va_arg(ap, char *); + for ( ; precision >= 0 || *s; s++) + if (p >= 0 && --p < 0) + break; + else switch (*s) + { + case ' ' : zg = string_catn(zg, US"{SP}", 4); break; + case '\t': zg = string_catn(zg, US"{TB}", 4); break; + case '\r': zg = string_catn(zg, US"{CR}", 4); break; + case '\n': zg = string_catn(zg, US"{LF}", 4); break; + case '{' : zg = string_catn(zg, US"{BO}", 4); break; + case '}' : zg = string_catn(zg, US"{BC}", 4); break; + default: + { + uschar u = *s; + if ( (u < 32) || (u > 127) ) + zg = string_fmt_append(zg, "{%02x}", u); + else + zg = string_catn(zg, US s, 1); + break; + } + } + if (zg) { s = CS zg->s; precision = slen = gstring_length(zg); } + else { s = ""; slen = 0; } + } + goto INSERT_GSTRING; + + case 'H': /* pdkim-style "hexprint" */ + { + s = va_arg(ap, char *); + if (precision < 0) break; /* precision must be given */ + if (s) + { + gstring * zg = NULL; + for (int p = precision; p > 0; p--) + zg = string_fmt_append(zg, "%02x", * US s++); + + if (zg) { s = CS zg->s; precision = slen = gstring_length(zg); } + else { s = ""; slen = 0; } + } + else + { s = ""; precision = slen = 6; } + } + goto INSERT_GSTRING; + +#endif case 's': case 'S': /* Forces *lower* case */ case 'T': /* Forces *upper* case */ @@ -1600,7 +1779,7 @@ while (*fp) if (!s) s = null; slen = Ustrlen(s); - INSERT_GSTRING: /* Coome to from %Y above */ + INSERT_GSTRING: /* Come to from %Y above */ if (!(flags & SVFMT_TAINT_NOCHK) && is_incompatible(g->s, s)) if (flags & SVFMT_REBUFFER) @@ -1629,7 +1808,7 @@ while (*fp) } /* If a width is not specified and the precision is specified, set - the width to the precision, or the string length if shorted. */ + the width to the precision, or the string length if shorter. */ else if (precision >= 0) width = precision < slen ? precision : slen; @@ -1899,3 +2078,5 @@ return 0; #endif /* End of string.c */ +/* vi: aw ai sw=2 +*/