X-Git-Url: https://git.exim.org/exim.git/blobdiff_plain/3e7e6162870de6545f3ee53d0c52d14a6b9434ef..3f77bd134b64c532748b83c3931df07058268b5e:/src/src/string.c diff --git a/src/src/string.c b/src/src/string.c index 52b1d2fb5..379be2035 100644 --- a/src/src/string.c +++ b/src/src/string.c @@ -2,7 +2,7 @@ * Exim - an Internet mail transport agent * *************************************************/ -/* Copyright (c) The Exim Maintainers 2020 - 2022 */ +/* Copyright (c) The Exim Maintainers 2020 - 2023 */ /* Copyright (c) University of Cambridge 1995 - 2018 */ /* See the file NOTICE for conditions of use and distribution. */ /* SPDX-License-Identifier: GPL-2.0-or-later */ @@ -30,123 +30,141 @@ Arguments: maskptr NULL if no mask is permitted to follow otherwise, points to an int where the offset of '/' is placed if there is no / followed by trailing digits, *maskptr is set 0 + errp NULL if no diagnostic information is required, and if the netmask + length should not be checked. Otherwise it is set pointing to a short + descriptive text. Returns: 0 if the string is not a textual representation of an IP address 4 if it is an IPv4 address 6 if it is an IPv6 address + +The legacy string_is_ip_address() function follows below. */ int -string_is_ip_address(const uschar *s, int *maskptr) +string_is_ip_addressX(const uschar * ip_addr, int * maskptr, const uschar ** errp) { -int yield = 4; +uschar * slash, * percent, * endp = NULL; +long int mask = 0; +const uschar * addr = NULL; +int af; +union { /* we do not need this, but inet_pton() needs a place for storage */ + struct in_addr sa4; + struct in6_addr sa6; +} sa; + +/* If there is a slash, but we didn't request a (optional) netmask, +we return failure, as we do if the mask isn't a pure numerical value, +or if it is negative. The actual length is checked later, once we know +the address family. */ + +if (slash = Ustrchr(ip_addr, '/')) + { + uschar * rest; -/* If an optional mask is permitted, check for it. If found, pass back the -offset. */ + if (!maskptr) + { + if (errp) *errp = US"netmask found, but not requested"; + return 0; + } -if (maskptr) - { - const uschar *ss = s + Ustrlen(s); - *maskptr = 0; - if (s != ss && isdigit(*(--ss))) + mask = Ustrtol(slash+1, &rest, 10); + if (*rest || mask < 0) { - while (ss > s && isdigit(ss[-1])) ss--; - if (ss > s && *(--ss) == '/') *maskptr = ss - s; + if (errp) *errp = US"netmask not numeric or <0"; + return 0; } + + *maskptr = slash - ip_addr; /* offset of the slash */ + endp = slash; } +else if (maskptr) + *maskptr = 0; /* no slash found */ -/* A colon anywhere in the string => IPv6 address */ +/* The interface-ID suffix (%) is optional (for IPv6). If it +exists, we check it syntactically. Later, if we know the address +family is IPv4, we might reject it. +The interface-ID is mutually exclusive with the netmask, to the +best of my knowledge. */ -if (Ustrchr(s, ':') != NULL) +if (percent = Ustrchr(ip_addr, '%')) { - BOOL had_double_colon = FALSE; - BOOL v4end = FALSE; - - yield = 6; - - /* An IPv6 address must start with hex digit or double colon. A single - colon is invalid. */ - - if (*s == ':' && *(++s) != ':') return 0; - - /* Now read up to 8 components consisting of up to 4 hex digits each. There - may be one and only one appearance of double colon, which implies any number - of binary zero bits. The number of preceding components is held in count. */ - - for (int count = 0; count < 8; count++) + if (slash) { - /* If the end of the string is reached before reading 8 components, the - address is valid provided a double colon has been read. This also applies - if we hit the / that introduces a mask or the % that introduces the - interface specifier (scope id) of a link-local address. */ - - if (*s == 0 || *s == '%' || *s == '/') return had_double_colon ? yield : 0; - - /* If a component starts with an additional colon, we have hit a double - colon. This is permitted to appear once only, and counts as at least - one component. The final component may be of this form. */ - - if (*s == ':') - { - if (had_double_colon) return 0; - had_double_colon = TRUE; - s++; - continue; - } - - /* If the remainder of the string contains a dot but no colons, we - can expect a trailing IPv4 address. This is valid if either there has - been no double-colon and this is the 7th component (with the IPv4 address - being the 7th & 8th components), OR if there has been a double-colon - and fewer than 6 components. */ - - if (Ustrchr(s, ':') == NULL && Ustrchr(s, '.') != NULL) + if (errp) *errp = US"interface-ID and netmask are mutually exclusive"; + return 0; + } + for (uschar *p = percent+1; *p; p++) + if (!isalnum(*p) && !ispunct(*p)) { - if ((!had_double_colon && count != 6) || - (had_double_colon && count > 6)) return 0; - v4end = TRUE; - yield = 6; - break; + if (errp) *errp = US"interface-ID must match [[:alnum:][:punct:]]"; + return 0; } + endp = percent; + } - /* Check for at least one and not more than 4 hex digits for this - component. */ - - if (!isxdigit(*s++)) return 0; - if (isxdigit(*s) && isxdigit(*(++s)) && isxdigit(*(++s))) s++; - - /* If the component is terminated by colon and there is more to - follow, skip over the colon. If there is no more to follow the address is - invalid. */ +/* inet_pton() can't parse netmasks and interface IDs, so work on a shortened copy +allocated on the current stack */ - if (*s == ':' && *(++s) == 0) return 0; +if (endp) + { + ptrdiff_t l = endp - ip_addr; + if (l > 255) + { + if (errp) *errp = US"rudiculous long ip address string"; + return 0; } + addr = string_copyn(ip_addr, l); + } +else + addr = ip_addr; - /* If about to handle a trailing IPv4 address, drop through. Otherwise - all is well if we are at the end of the string or at the mask or at a percent - sign, which introduces the interface specifier (scope id) of a link local - address. */ - - if (!v4end) - return (*s == 0 || *s == '%' || - (*s == '/' && maskptr != NULL && *maskptr != 0))? yield : 0; +af = Ustrchr(addr, ':') ? AF_INET6 : AF_INET; +if (!inet_pton(af, CCS addr, &sa)) + { + if (errp) *errp = af == AF_INET6 ? US"IP address string not parsable as IPv6" + : US"IP address string not parsable IPv4"; + return 0; } -/* Test for IPv4 address, which may be the tail-end of an IPv6 address. */ +/* we do not check the values of the mask here, as +this is done on the callers side (but I don't understand why), so +actually I'd like to do it here, but it breaks at least testcase 0002 */ -for (int i = 0; i < 4; i++) +switch (af) { - long n; - uschar * end; - - if (i != 0 && *s++ != '.') return 0; - n = strtol(CCS s, CSS &end, 10); - if (n > 255 || n < 0 || end <= s || end > s+3) return 0; - s = end; + case AF_INET6: + if (errp && mask > 128) + { + *errp = US"IPv6 netmask value must not be >128"; + return 0; + } + return 6; + case AF_INET: + if (percent) + { + if (errp) *errp = US"IPv4 address string must not have an interface-ID"; + return 0; + } + if (errp && mask > 32) + { + *errp = US"IPv4 netmask value must not be >32"; + return 0; + } + return 4; + default: + if (errp) *errp = US"unknown address family (should not happen)"; + return 0; } +} -return !*s || (*s == '/' && maskptr && *maskptr != 0) ? yield : 0; + +int +string_is_ip_address(const uschar * ip_addr, int * maskptr) +{ +return string_is_ip_addressX(ip_addr, maskptr, NULL); } + #endif /* COMPILE_UTILITY */ @@ -631,7 +649,7 @@ uschar * t, * yield; /* First find the end of the string */ if (*s != '\"') - while (*s && !isspace(*s)) s++; + Uskip_nonwhite(&s); else { s++; @@ -1330,7 +1348,7 @@ The return value can be NULL to signify overflow. Field width: decimal digits, or * Precision: dot, followed by decimal digits or * Length modifiers: h L l ll z -Conversion specifiers: n d o u x X p f e E g G % c s S T Y D M +Conversion specifiers: n d o u x X p f e E g G % c s S T W V Y D M Returns the possibly-new (if copy for growth or taint-handling was needed) string, not nul-terminated. @@ -1579,11 +1597,77 @@ while (*fp) case 'Y': /* gstring pointer */ { gstring * zg = va_arg(ap, gstring *); - if (zg) { s = CS zg->s; slen = zg->ptr; } + if (zg) { s = CS zg->s; slen = gstring_length(zg); } else { s = null; slen = Ustrlen(s); } goto INSERT_GSTRING; } +#ifndef COMPILE_UTILITY + case 'V': /* Maybe convert ascii-art to UTF-8 chars */ + { + gstring * zg = NULL; + s = va_arg(ap, char *); + if (IS_DEBUG(D_noutf8)) + for ( ; *s; s++) + zg = string_catn(zg, CUS (*s == 'K' ? "|" : s), 1); + else + for ( ; *s; s++) switch (*s) + { + case '\\': zg = string_catn(zg, US UTF8_UP_RIGHT, 3); break; + case '/': zg = string_catn(zg, US UTF8_DOWN_RIGHT, 3); break; + case '-': + case '_': zg = string_catn(zg, US UTF8_HORIZ, 3); break; + case '|': zg = string_catn(zg, US UTF8_VERT, 3); break; + case 'K': zg = string_catn(zg, US UTF8_VERT_RIGHT, 3); break; + case '<': zg = string_catn(zg, US UTF8_LEFT_TRIANGLE, 3); break; + case '>': zg = string_catn(zg, US UTF8_RIGHT_TRIANGLE, 3); break; + default: zg = string_catn(zg, CUS s, 1); break; + } + if (!zg) + break; + s = CS zg->s; + slen = gstring_length(zg); + goto INSERT_GSTRING; + } + + case 'W': /* Maybe mark up spaces & newlines */ + s = va_arg(ap, char *); + if (Ustrpbrk(s, " \n") && !IS_DEBUG(D_noutf8)) + { + gstring * zg = NULL; + int p = precision; + for ( ; *s; s++) + { + /* Take a given precision as applying to the input; expand + it for the transformed result */ + + if (p >= 0 && --p < 0) break; + switch (*s) + { + case ' ': + zg = string_catn(zg, CUS UTF8_LIGHT_SHADE, 3); + if (precision >= 0) precision += 2; + break; + case '\n': + zg = string_catn(zg, CUS UTF8_L_ARROW_HOOK "\n", 4); + if (precision >= 0) precision += 3; + break; + default: + zg = string_catn(zg, CUS s, 1); + break; + } + } + if (zg) { s = CS zg->s; slen = gstring_length(zg); } + else { s = null; slen = Ustrlen(s); } + } + else + { + if (!s) s = null; + slen = Ustrlen(s); + } + goto INSERT_GSTRING; + +#endif case 's': case 'S': /* Forces *lower* case */ case 'T': /* Forces *upper* case */ @@ -1592,7 +1676,7 @@ while (*fp) if (!s) s = null; slen = Ustrlen(s); - INSERT_GSTRING: /* Coome to from %Y above */ + INSERT_GSTRING: /* Come to from %Y above */ if (!(flags & SVFMT_TAINT_NOCHK) && is_incompatible(g->s, s)) if (flags & SVFMT_REBUFFER) @@ -1891,3 +1975,5 @@ return 0; #endif /* End of string.c */ +/* vi: aw ai sw=2 +*/