-/* $Cambridge: exim/src/src/parse.c,v 1.4 2005/02/17 11:58:26 ph10 Exp $ */
-
/*************************************************
* Exim - an Internet mail transport agent *
*************************************************/
-/* Copyright (c) University of Cambridge 1995 - 2005 */
+/* Copyright (c) The Exim Maintainers 2020 - 2022 */
+/* Copyright (c) University of Cambridge 1995 - 2018 */
/* See the file NOTICE for conditions of use and distribution. */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
/* Functions for parsing addresses */
#include "exim.h"
-static uschar *last_comment_position;
+static const uschar *last_comment_position;
#ifdef STAND_ALONE
-address_item *deliver_make_addr(uschar *address, BOOL copy)
+address_item *
+deliver_make_addr(uschar *address, BOOL copy)
{
-address_item *addr = store_get(sizeof(address_item));
+address_item *addr = store_get(sizeof(address_item), GET_UNTAINTED);
addr->next = NULL;
addr->parent = NULL;
addr->address = address;
return addr;
}
-uschar *rewrite_address(uschar *recipient, BOOL dummy1, BOOL dummy2, rewrite_rule
+uschar *
+rewrite_address(uschar *recipient, BOOL dummy1, BOOL dummy2, rewrite_rule
*dummy3, int dummy4)
{
return recipient;
}
-uschar *rewrite_address_qualify(uschar *recipient, BOOL dummy1)
+uschar *
+rewrite_address_qualify(uschar *recipient, BOOL dummy1)
{
return recipient;
}
*/
uschar *
-parse_find_address_end(uschar *s, BOOL nl_ends)
+parse_find_address_end(const uschar *s, BOOL nl_ends)
{
BOOL source_routing = *s == '@';
int no_term = source_routing? 1 : 0;
}
}
-return s;
+return US s;
}
Returns: pointer to the last @ in an address, or NULL if none
*/
-uschar *
-parse_find_at(uschar *s)
+const uschar *
+parse_find_at(const uschar *s)
{
-uschar *t = s + Ustrlen(s);
+const uschar * t = s + Ustrlen(s);
while (--t >= s)
- {
if (*t == '@')
{
int backslash_count = 0;
- uschar *tt = t - 1;
+ const uschar *tt = t - 1;
while (tt > s && *tt-- == '\\') backslash_count++;
if ((backslash_count & 1) == 0) return t;
}
- else if (*t == '\"') return NULL;
- }
+ else if (*t == '\"')
+ return NULL;
+
return NULL;
}
make it possible to ignore comments at the end of compound items.
Argument: current character pointer
-Regurns: new character pointer
+Returns: new character pointer
*/
-static uschar *
-skip_comment(uschar *s)
+static const uschar *
+skip_comment(const uschar *s)
{
last_comment_position = s;
while (*s)
{
int c, level;
- while (isspace(*s)) s++;
- if (*s != '(') break;
+
+ if (Uskip_whitespace(&s) != '(') break;
level = 1;
- while((c = *(++s)) != 0)
+ while((c = *(++s)))
{
if (c == '(') level++;
else if (c == ')') { if (--level <= 0) { s++; break; } }
in []. Make sure the output is set to the null string if there is a syntax
error as well as if there is no domain at all.
+Optionally, msg_id domain literals ( printable-ascii enclosed in [] )
+are permitted.
+
Arguments:
s current character pointer
t where to put the domain
+ msg_id_literals flag for relaxed domain-literal processing
errorptr put error message here on failure (*t will be 0 on exit)
Returns: new character pointer
*/
-static uschar *
-read_domain(uschar *s, uschar *t, uschar **errorptr)
+static const uschar *
+read_domain(const uschar *s, uschar *t, BOOL msg_id_literals, uschar **errorptr)
{
uschar *tt = t;
s = skip_comment(s);
any character except [ ] \, including linear white space, and may contain
quoted characters. However, RFC 821 restricts literals to being dot-separated
3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence
-of digits and dots (hex digits and colons for IPv6) here; later this will be
-checked for being a syntactically valid IP address if it ever gets to a router.
+of digits, dots, hex digits, and colons here; later this will be checked for
+being a syntactically valid IP address if it ever gets to a router.
-If IPv6 is supported, allow both the formal form, with IPV6: at the start, and
-the informal form without it, and accept IPV4: as well, 'cause someone will use
-it sooner or later. */
+Allow both the formal IPv6 form, with IPV6: at the start, and the informal form
+without it, and accept IPV4: as well, 'cause someone will use it sooner or
+later. */
if (*s == '[')
{
*t++ = *s++;
- #if HAVE_IPV6
if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0)
{
memcpy(t, s, 5);
t += 5;
s += 5;
}
- while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
- #else
- while (*s == '.' || isdigit(*s)) *t++ = *s++;
- #endif
+ if (msg_id_literals)
+ while (*s >= 33 && *s <= 90 || *s >= 94 && *s <= 126) *t++ = *s++;
+ else
+ while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
if (*s == ']') *t++ = *s++; else
{
*tt = 0;
}
- if (!allow_domain_literals)
+ if (!allow_domain_literals && !msg_id_literals)
{
*errorptr = US"domain literals not allowed";
*tt = 0;
Returns: new character pointer
*/
-static uschar *
-read_local_part(uschar *s, uschar *t, uschar **error, BOOL allow_null)
+static const uschar *
+read_local_part(const uschar *s, uschar *t, uschar **error, BOOL allow_null)
{
uschar *tt = t;
*error = NULL;
if (*s == '\"')
{
*t++ = '\"';
- while ((c = *(++s)) != 0 && c != '\"')
+ while ((c = *++s) && c != '\"')
{
*t++ = c;
- if (c == '\\' && s[1] != 0) *t++ = *(++s);
+ if (c == '\\' && s[1]) *t++ = *++s;
}
if (c == '\"')
{
else while (!mac_iscntrl_or_special(*s) || *s == '\\')
{
c = *t++ = *s++;
- if (c == '\\' && *s != 0) *t++ = *s++;
+ if (c == '\\' && *s) *t++ = *s++;
}
/* Terminate the word and skip subsequent comment */
Returns: new character pointer
*/
-static uschar *
-read_route(uschar *s, uschar *t, uschar **errorptr)
+static const uschar *
+read_route(const uschar *s, uschar *t, uschar **errorptr)
{
BOOL commas = FALSE;
*errorptr = NULL;
while (*s == '@')
{
*t++ = '@';
- s = read_domain(s+1, t, errorptr);
+ s = read_domain(s+1, t, FALSE, errorptr);
if (*t == 0) return s;
t += Ustrlen((const uschar *)t);
if (*s != ',') break;
Returns: new character pointer
*/
-static uschar *
-read_addr_spec(uschar *s, uschar *t, int term, uschar **errorptr,
+static const uschar *
+read_addr_spec(const uschar *s, uschar *t, int term, uschar **errorptr,
uschar **domainptr)
{
s = read_local_part(s, t, errorptr, FALSE);
if (*errorptr == NULL)
- {
if (*s != term)
- {
if (*s != '@')
*errorptr = string_sprintf("\"@\" or \".\" expected after \"%s\"", t);
else
t += Ustrlen((const uschar *)t);
*t++ = *s++;
*domainptr = t;
- s = read_domain(s, t, errorptr);
+ s = read_domain(s, t, FALSE, errorptr);
}
- }
- }
return s;
}
TRUE and parse_found_group is FALSE when this function is called, an address
which is the start of a group (i.e. preceded by a phrase and a colon) is
recognized; the phrase is ignored and the flag parse_found_group is set. If
-this flag is TRUE at the end of an address, then if an extraneous semicolon is
-found, it is ignored and the flag is cleared. This logic is used only when
-scanning through addresses in headers, either to fulfil the -t option or for
-rewriting or checking header syntax.
+this flag is TRUE at the end of an address, and if an extraneous semicolon is
+found, it is ignored and the flag is cleared.
+
+This logic is used only when scanning through addresses in headers, either to
+fulfil the -t option, or for rewriting, or for checking header syntax. Because
+the group "state" has to be remembered between multiple calls of this function,
+the variables parse_{allow,found}_group are global. It is important to ensure
+that they are reset to FALSE at the end of scanning a header's list of
+addresses.
Arguments:
mailbox points to the RFC822 mailbox
#define FAILED(s) { *errorptr = s; goto PARSE_FAILED; }
uschar *
-parse_extract_address(uschar *mailbox, uschar **errorptr, int *start, int *end,
+parse_extract_address(const uschar *mailbox, uschar **errorptr, int *start, int *end,
int *domain, BOOL allow_null)
{
-uschar *yield = store_get(Ustrlen(mailbox) + 1);
-uschar *startptr, *endptr;
-uschar *s = (uschar *)mailbox;
-uschar *t = (uschar *)yield;
+uschar * yield = store_get(Ustrlen(mailbox) + 1, mailbox);
+const uschar *startptr, *endptr;
+const uschar *s = US mailbox;
+uschar *t = US yield;
*domain = 0;
s = skip_comment(s);
startptr = s; /* In case addr-spec */
s = read_local_part(s, t, errorptr, TRUE); /* Dot separated words */
-if (*errorptr != NULL) goto PARSE_FAILED;
+if (*errorptr) goto PARSE_FAILED;
/* If the terminator is neither < nor @ then the format of the address
must either be a bare local-part (we are now at the end), or a phrase
if (*s != '@' && *s != '<')
{
- if (*s == 0 || *s == ';')
+ if (!*s || *s == ';')
{
- if (*t == 0) FAILED(US"empty address");
+ if (!*t) FAILED(US"empty address");
endptr = last_comment_position;
goto PARSE_SUCCEEDED; /* Bare local part */
}
end of string will produce a null local_part and therefore fail. We don't
need to keep updating t, as the phrase isn't to be kept. */
- while (*s != '<' && (!parse_allow_group || *s != ':'))
+ while (*s != '<' && (!f.parse_allow_group || *s != ':'))
{
s = read_local_part(s, t, errorptr, FALSE);
- if (*errorptr != NULL)
+ if (*errorptr)
{
*errorptr = string_sprintf("%s (expected word or \"<\")", *errorptr);
goto PARSE_FAILED;
if (*s == ':')
{
- parse_found_group = TRUE;
- parse_allow_group = FALSE;
+ f.parse_found_group = TRUE;
+ f.parse_allow_group = FALSE;
s++;
goto RESTART;
}
used after reading a preceding phrase.
There are a lot of broken sendmails out there that put additional pairs of <>
-round <route-addr>s. If strip_excess_angle_brackets is set, allow any number of
-them, as long as they match. */
+round <route-addr>s. If strip_excess_angle_brackets is set, allow a limited
+number of them, as long as they match. */
if (*s == '<')
{
int bracket_count = 1;
s++;
- if (strip_excess_angle_brackets)
- while (*s == '<') { bracket_count++; s++; }
+ if (strip_excess_angle_brackets) while (*s == '<')
+ {
+ if(bracket_count++ > 5) FAILED(US"angle-brackets nested too deep");
+ s++;
+ }
t = yield;
startptr = s;
if (*s == '@')
{
s = read_route(s, t, errorptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*t = 0; /* Ensure route is ignored - probably overkill */
source_routed = TRUE;
}
else
{
s = read_addr_spec(s, t, '>', errorptr, &domainptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*domain = domainptr - yield;
if (source_routed && *domain == 0)
FAILED(US"domain missing in source-routed address");
}
endptr = s;
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
while (bracket_count-- > 0) if (*s++ != '>')
{
- *errorptr = (s[-1] == 0)? US"'>' missing at end of address" :
- string_sprintf("malformed address: %.32s may not follow %.*s",
- s-1, s - (uschar *)mailbox - 1, mailbox);
+ *errorptr = s[-1] == 0
+ ? US"'>' missing at end of address"
+ : string_sprintf("malformed address: %.32s may not follow %.*s",
+ s-1, (int)(s - US mailbox - 1), mailbox);
goto PARSE_FAILED;
}
not enclosed in <> as well, which is indicated by an empty first local
part preceding '@'. The source routing is, however, ignored. */
-else if (*t == 0)
+else if (!*t)
{
uschar *domainptr = yield;
s = read_route(s, t, errorptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*t = 0; /* Ensure route is ignored - probably overkill */
s = read_addr_spec(s, t, 0, errorptr, &domainptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*domain = domainptr - yield;
endptr = last_comment_position;
if (*domain == 0) FAILED(US"domain missing in source-routed address");
t += Ustrlen((const uschar *)t);
*t++ = *s++;
*domain = t - yield;
- s = read_domain(s, t, errorptr);
- if (*t == 0) goto PARSE_FAILED;
+ s = read_domain(s, t, TRUE, errorptr);
+ if (!*t) goto PARSE_FAILED;
endptr = last_comment_position;
}
move it back past white space if necessary. */
PARSE_SUCCEEDED:
-if (*s != 0)
+if (*s)
{
- if (parse_found_group && *s == ';')
+ if (f.parse_found_group && *s == ';')
{
- parse_found_group = FALSE;
- parse_allow_group = TRUE;
+ f.parse_found_group = FALSE;
+ f.parse_allow_group = TRUE;
}
else
{
*errorptr = string_sprintf("malformed address: %.32s may not follow %.*s",
- s, s - (uschar *)mailbox, mailbox);
+ s, (int)(s - US mailbox), mailbox);
goto PARSE_FAILED;
}
}
-*start = startptr - (uschar *)mailbox; /* Return offsets */
+*start = startptr - US mailbox; /* Return offsets */
while (isspace(endptr[-1])) endptr--;
-*end = endptr - (uschar *)mailbox;
+*end = endptr - US mailbox;
/* Although this code has no limitation on the length of address extracted,
-other parts of Exim may have limits, and in any case, RFC 2821 limits local
-parts to 64 and domains to 255, so we do a check here, giving an error if the
-address is ridiculously long. */
+other parts of Exim may have limits, and in any case, RFC 5321 limits email
+addresses to 256, so we do a check here, giving an error if the address is
+ridiculously long. */
-if (*end - *start > ADDRESS_MAXLENGTH)
+if (*end - *start > EXIM_EMAILADDR_MAX)
{
*errorptr = string_sprintf("address is ridiculously long: %.64s...", yield);
return NULL;
}
-return (uschar *)yield;
+return yield;
/* Use goto (via the macro FAILED) to get to here from a variety of places.
We might have an empty address in a group - the caller can choose to ignore
this. We must, however, keep the flags correct. */
PARSE_FAILED:
-if (parse_found_group && *s == ';')
+if (f.parse_found_group && *s == ';')
{
- parse_found_group = FALSE;
- parse_allow_group = TRUE;
+ f.parse_found_group = FALSE;
+ f.parse_allow_group = TRUE;
}
return NULL;
}
/* This function is used for quoting text in headers according to RFC 2047.
If the only characters that strictly need quoting are spaces, we return the
-original string, unmodified. If a quoted string is too long for the buffer, it
-is truncated. (This shouldn't happen: this is normally handling short strings.)
+original string, unmodified.
Hmmph. As always, things get perverted for other uses. This function was
originally for the "phrase" part of addresses. Now it is being used for much
chars
len the length of the string
charset the name of the character set; NULL => iso-8859-1
- buffer the buffer to put the answer in
- buffer_size the size of the buffer
+ fold if TRUE, a newline is inserted before the separating space when
+ more than one encoded-word is generated
Returns: pointer to the original string, if no quoting needed, or
- pointer to buffer containing the quoted string, or
- a pointer to "String too long" if the buffer can't even hold
- the introduction
+ pointer to allocated memory containing the quoted string
*/
-uschar *
-parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer,
- int buffer_size)
+const uschar *
+parse_quote_2047(const uschar *string, int len, const uschar *charset,
+ BOOL fold)
{
-uschar *s = string;
-uschar *p, *t;
-int hlen;
+const uschar * s = string;
+int hlen, l;
BOOL coded = FALSE;
+BOOL first_byte = FALSE;
+gstring * g =
+ string_fmt_append(NULL, "=?%s?Q?", charset ? charset : US"iso-8859-1");
-if (charset == NULL) charset = US"iso-8859-1";
-
-/* We don't expect this to fail! */
-
-if (!string_format(buffer, buffer_size, "=?%s?Q?", charset))
- return US"String too long";
-
-hlen = Ustrlen(buffer);
-t = buffer + hlen;
-p = buffer;
+hlen = l = g->ptr;
-for (; len > 0; len--)
+for (s = string; len > 0; s++, len--)
{
- int ch = *s++;
- if (t > buffer + buffer_size - hlen - 8) break;
+ int ch = *s;
- if (t - p > 70)
+ if (g->ptr - l > 67 && !first_byte)
{
- *t++ = '?';
- *t++ = '=';
- *t++ = ' ';
- p = t;
- Ustrncpy(p, buffer, hlen);
- t += hlen;
+ g = fold ? string_catn(g, US"?=\n ", 4) : string_catn(g, US"?= ", 3);
+ l = g->ptr;
+ g = string_catn(g, g->s, hlen);
}
- if (ch < 33 || ch > 126 ||
- Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
+ if ( ch < 33 || ch > 126
+ || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
{
- if (ch == ' ') *t++ = '_'; else
+ if (ch == ' ')
{
- sprintf(CS t, "=%02X", ch);
- while (*t != 0) t++;
+ g = string_catn(g, US"_", 1);
+ first_byte = FALSE;
+ }
+ else
+ {
+ g = string_fmt_append(g, "=%02X", ch);
coded = TRUE;
+ first_byte = !first_byte;
}
}
- else *t++ = ch;
+ else
+ { g = string_catn(g, s, 1); first_byte = FALSE; }
}
-*t++ = '?';
-*t++ = '=';
-*t = 0;
+if (coded)
+ string = string_from_gstring(g = string_catn(g, US"?=", 2));
+else
+ g->ptr = -1;
-return coded? buffer : string;
+gstring_release_unused(g);
+return string;
}
We *could* use this for all cases, getting rid of the messy original code,
but leave it for now. It would complicate simple cases like "John Q. Smith".
-The result is passed back in the buffer; it is usually going to be added to
-some other string. In order to be sure there is going to be no overflow,
-restrict the length of the input to 1/4 of the buffer size - this allows for
-every single character to be quoted or encoded without overflowing, and that
-wouldn't happen because of amalgamation. If the phrase is too long, return a
-fixed string.
+The result is passed back in allocated memory.
Arguments:
phrase an RFC822 phrase
len the length of the phrase
- buffer a buffer to put the result in
- buffer_size the size of the buffer
Returns: the fixed RFC822 phrase
*/
-uschar *
-parse_fix_phrase(uschar *phrase, int len, uschar *buffer, int buffer_size)
+const uschar *
+parse_fix_phrase(const uschar *phrase, int len)
{
int ch, i;
BOOL quoted = FALSE;
-uschar *s, *t, *end, *yield;
+const uschar *s, *end;
+uschar * buffer;
+uschar *t, *yield;
while (len > 0 && isspace(*phrase)) { phrase++; len--; }
-if (len > buffer_size/4) return US"Name too long";
/* See if there are any non-printing characters, and if so, use the RFC 2047
encoding for the whole thing. */
for (i = 0, s = phrase; i < len; i++, s++)
if ((*s < 32 && *s != '\t') || *s > 126) break;
-if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
- buffer_size);
+if (i < len)
+ return parse_quote_2047(phrase, len, headers_charset, FALSE);
/* No non-printers; use the RFC 822 quoting rules */
+if (len <= 0 || len >= INT_MAX/4)
+ return string_copy_taint(CUS"", phrase);
+
+buffer = store_get((len+1)*4, phrase);
+
s = phrase;
end = s + len;
yield = t = buffer + 1;
else if (ch == '(')
{
- uschar *ss = s; /* uschar after '(' */
+ const uschar *ss = s; /* uschar after '(' */
int level = 1;
while(ss < end)
{
{
if (ss >= end) ss--;
*t++ = '(';
- Ustrncpy(t, s, ss-s);
- t += ss-s;
- s = ss;
+ if (ss > s)
+ {
+ Ustrncpy(t, s, ss-s);
+ t += ss-s;
+ s = ss;
+ }
}
}
}
*t = 0;
+store_release_above(t+1);
return yield;
}
*/
int
-parse_forward_list(uschar *s, int options, address_item **anchor,
- uschar **error, uschar *incoming_domain, uschar *directory,
+parse_forward_list(const uschar *s, int options, address_item **anchor,
+ uschar **error, const uschar *incoming_domain, const uschar *directory,
error_block **syntax_errors)
{
int count = 0;
for (;;)
{
- int len;
- int special = 0;
- int specopt = 0;
- int specbit = 0;
- uschar *ss, *nexts;
- address_item *addr;
+ int len, special = 0, specopt = 0, specbit = 0;
+ const uschar * ss, * nexts;
+ address_item * addr;
BOOL inquote = FALSE;
for (;;)
{
while (isspace(*s) || *s == ',') s++;
- if (*s == '#') { while (*s != 0 && *s != '\n') s++; } else break;
+ if (*s == '#') { while (*s && *s != '\n') s++; } else break;
}
/* When we reach the end of the list, we return FF_DELIVERED if any child
However, if the list is empty only because syntax errors were skipped, we
return FF_DELIVERED. */
- if (*s == 0)
+ if (!*s)
{
- return (count > 0 || (syntax_errors != NULL && *syntax_errors != NULL))?
- FF_DELIVERED : FF_NOTDELIVERED;
+ return (count > 0 || (syntax_errors && *syntax_errors))
+ ? FF_DELIVERED : FF_NOTDELIVERED;
/* This previous code returns FF_ERROR if nothing is generated but a
syntax error has been skipped. I now think it is the wrong approach, but
have left this here just in case, and for the record. */
- #ifdef NEVER
+#ifdef NEVER
if (count > 0) return FF_DELIVERED; /* Something was generated */
- if (syntax_errors == NULL || /* Not skipping syntax errors, or */
- *syntax_errors == NULL) /* we didn't actually skip any */
+ if (!syntax_errors || /* Not skipping syntax errors, or */
+ !*syntax_errors) /* we didn't actually skip any */
return FF_NOTDELIVERED;
*error = string_sprintf("no addresses generated: syntax error in %s: %s",
(*syntax_errors)->text2, (*syntax_errors)->text1);
return FF_ERROR;
- #endif
-
+#endif
}
/* Find the end of the next address. Quoted strings in addresses may contain
/* Remove any trailing spaces; we know there's at least one non-space. */
- while (isspace((ss[-1]))) ss--;
+ while (isspace(ss[-1])) ss--;
/* We now have s->start and ss->end of the next address. Remove quotes
if they completely enclose, remembering the address started with a quote
ss--;
inquote = TRUE;
while (s < ss && isspace(*s)) s++;
- while (ss > s && isspace((ss[-1]))) ss--;
+ while (ss > s && isspace(ss[-1])) ss--;
}
/* Set up the length of the address. */
len = ss - s;
- DEBUG(D_route)
- {
- int save = s[len];
- s[len] = 0;
- debug_printf("extract item: %s\n", s);
- s[len] = save;
- }
+ DEBUG(D_route) debug_printf("extract item: %.*s\n", len, s);
/* Handle special addresses if permitted. If the address is :unknown:
ignore it - this is for backward compatibility with old alias files. You
else if (Ustrncmp(s, ":fail:", 6) == 0)
{ special = FF_FAIL; specopt = RDO_FAIL; } /* specbit is 0 */
- if (special != 0)
+ if (special)
{
- uschar *ss = Ustrchr(s+1, ':') + 1;
+ uschar * ss = Ustrchr(s+1, ':') + 1; /* line after the special... */
if ((options & specopt) == specbit)
{
*error = string_sprintf("\"%.*s\" is not permitted", len, s);
return FF_ERROR;
}
- while (*ss != 0 && isspace(*ss)) ss++;
- while (s[len] != 0 && s[len] != '\n') len++;
- s[len] = 0;
- *error = string_copy(ss);
+ while (*ss && isspace(*ss)) ss++; /* skip leading whitespace */
+ if ((len = Ustrlen(ss)) > 0) /* ignore trailing newlines */
+ for (const uschar * t = ss + len - 1; t >= ss && *t == '\n'; t--) len--;
+ *error = string_copyn(ss, len); /* becomes the error */
return special;
}
if (Ustrncmp(s, ":include:", 9) == 0)
{
- uschar *filebuf;
+ uschar * filebuf;
uschar filename[256];
- uschar *t = s+9;
+ const uschar * t = s+9;
int flen = len - 9;
int frc;
struct stat statbuf;
- address_item *last;
- FILE *f;
+ address_item * last;
+ FILE * f;
while (flen > 0 && isspace(*t)) { t++; flen--; }
if (flen <= 0)
{
- *error = string_sprintf("file name missing after :include:");
+ *error = US"file name missing after :include:";
return FF_ERROR;
}
- if (flen > 255)
+ if (flen > sizeof(filename)-1)
{
*error = string_sprintf("included file name \"%s\" is too long", t);
return FF_ERROR;
/* Insist on absolute path */
- if (filename[0]!= '/')
+ if (filename[0] != '/')
{
*error = string_sprintf("included file \"%s\" is not an absolute path",
filename);
/* Check if include is permitted */
- if ((options & RDO_INCLUDE) != 0)
+ if (options & RDO_INCLUDE)
{
*error = US"included files not permitted";
return FF_ERROR;
}
+ if (is_tainted(filename))
+ {
+ *error = string_sprintf("Tainted name '%s' for included file not permitted\n",
+ filename);
+ return FF_ERROR;
+ }
+
/* Check file name if required */
- if (directory != NULL)
+ if (directory)
{
int len = Ustrlen(directory);
- uschar *p = filename + len;
+ uschar * p;
+ while (len > 0 && directory[len-1] == '/') len--; /* ignore trailing '/' */
+ p = filename + len;
if (Ustrncmp(filename, directory, len) != 0 || *p != '/')
{
*error = string_sprintf("included file %s is not in directory %s",
return FF_ERROR;
}
+#ifdef EXIM_HAVE_OPENAT
+ /* It is necessary to check that every component inside the directory
+ is NOT a symbolic link, in order to keep the file inside the directory.
+ This is mighty tedious. We open the directory and openat every component,
+ with a flag that fails symlinks. */
+
+ {
+ int fd = exim_open2(CCS directory, O_RDONLY);
+ if (fd < 0)
+ {
+ *error = string_sprintf("failed to open directory %s", directory);
+ return FF_ERROR;
+ }
+ while (*p)
+ {
+ uschar temp;
+ int fd2;
+ uschar * q = p + 1; /* skip dividing '/' */
+
+ while (*q == '/') q++; /* skip extra '/' */
+ while (*++p && *p != '/') ; /* end of component */
+ temp = *p;
+ *p = '\0';
+
+ fd2 = exim_openat(fd, CS q, O_RDONLY|O_NOFOLLOW);
+ close(fd);
+ *p = temp;
+ if (fd2 < 0)
+ {
+ *error = string_sprintf("failed to open %s (component of included "
+ "file); could be symbolic link", filename);
+ return FF_ERROR;
+ }
+ fd = fd2;
+ }
+ f = fdopen(fd, "rb");
+ }
+#else
/* It is necessary to check that every component inside the directory
is NOT a symbolic link, in order to keep the file inside the directory.
This is mighty tedious. It is also not totally foolproof in that it
leaves the possibility of a race attack, but I don't know how to do
any better. */
- while (*p != 0)
+ while (*p)
{
int temp;
- while (*(++p) != 0 && *p != '/');
+ while (*++p && *p != '/');
temp = *p;
*p = 0;
if (Ulstat(filename, &statbuf) != 0)
return FF_ERROR;
}
}
+#endif
}
- /* Open and stat the file */
+#ifdef EXIM_HAVE_OPENAT
+ else
+#endif
+ /* Open and stat the file */
+ f = Ufopen(filename, "rb");
- if ((f = Ufopen(filename, "rb")) == NULL)
+ if (!f)
{
- *error = string_open_failed(errno, "included file %s", filename);
+ *error = string_open_failed("included file %s", filename);
return FF_INCLUDEFAIL;
}
{
*error = string_sprintf("failed to stat included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_INCLUDEFAIL;
}
/* If directory was checked, double check that we opened a regular file */
- if (directory != NULL && (statbuf.st_mode & S_IFMT) != S_IFREG)
+ if (directory && (statbuf.st_mode & S_IFMT) != S_IFREG)
{
*error = string_sprintf("included file %s is not a regular file in "
"the %s directory", filename, directory);
return FF_ERROR;
}
- filebuf = store_get(statbuf.st_size + 1);
+ filebuf = store_get(statbuf.st_size + 1, filename);
if (fread(filebuf, 1, statbuf.st_size, f) != statbuf.st_size)
{
*error = string_sprintf("error while reading included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_ERROR;
}
filebuf[statbuf.st_size] = 0;
- fclose(f);
+ (void)fclose(f);
addr = NULL;
frc = parse_forward_list(filebuf, options, &addr,
error, incoming_domain, directory, syntax_errors);
if (frc != FF_DELIVERED && frc != FF_NOTDELIVERED) return frc;
- if (addr != NULL)
+ if (addr)
{
- last = addr;
- while (last->next != NULL) { count++; last = last->next; }
+ for (last = addr; last->next; last = last->next) count++;
last->next = *anchor;
*anchor = addr;
count++;
else
{
int start, end, domain;
- uschar *recipient = NULL;
- int save = s[len];
- s[len] = 0;
+ const uschar *recipient = NULL;
+ uschar * s_ltd = string_copyn(s, len);
/* If it starts with \ and the rest of it parses as a valid mail address
without a domain, carry on with that address, but qualify it with the
incoming domain. Otherwise arrange for the address to fall through,
causing an error message on the re-parse. */
- if (*s == '\\')
+ if (*s_ltd == '\\')
{
recipient =
- parse_extract_address(s+1, error, &start, &end, &domain, FALSE);
- if (recipient != NULL)
- recipient = (domain != 0)? NULL :
+ parse_extract_address(s_ltd+1, error, &start, &end, &domain, FALSE);
+ if (recipient)
+ recipient = domain != 0 ? NULL :
string_sprintf("%s@%s", recipient, incoming_domain);
}
/* Try parsing the item as an address. */
- if (recipient == NULL) recipient =
- parse_extract_address(s, error, &start, &end, &domain, FALSE);
+ if (!recipient) recipient =
+ parse_extract_address(s_ltd, error, &start, &end, &domain, FALSE);
/* If item starts with / or | and is not a valid address, or there
is no domain, treat it as a file or pipe. If it was a quoted item,
remove the quoting occurrences of \ within it. */
- if ((*s == '|' || *s == '/') && (recipient == NULL || domain == 0))
+ if ((*s_ltd == '|' || *s_ltd == '/') && (!recipient || domain == 0))
{
- uschar *t = store_get(Ustrlen(s) + 1);
- uschar *p = t;
- uschar *q = s;
- while (*q != 0)
+ uschar * t = store_get(Ustrlen(s_ltd) + 1, s_ltd);
+ uschar * p = t, * q = s_ltd;
+
+ while (*q)
{
if (inquote)
{
- *p++ = (*q == '\\')? *(++q) : *q;
+ *p++ = *q == '\\' ? *++q : *q;
q++;
}
else *p++ = *q++;
*p = 0;
addr = deliver_make_addr(t, TRUE);
setflag(addr, af_pfr); /* indicates pipe/file/reply */
- if (*s != '|') setflag(addr, af_file); /* indicates file */
+ if (*s_ltd != '|') setflag(addr, af_file); /* indicates file */
}
/* Item must be an address. Complain if not, else qualify, rewrite and set
else
{
- if (recipient == NULL)
+ if (!recipient)
{
if (Ustrcmp(*error, "empty address") == 0)
{
*error = NULL;
- s[len] = save;
s = nexts;
continue;
}
- if (syntax_errors != NULL)
+ if (syntax_errors)
{
- error_block *e = store_get(sizeof(error_block));
- error_block *last = *syntax_errors;
- if (last == NULL) *syntax_errors = e; else
+ error_block * e = store_get(sizeof(error_block), GET_UNTAINTED);
+ error_block * last = *syntax_errors;
+ if (last)
{
- while (last->next != NULL) last = last->next;
+ while (last->next) last = last->next;
last->next = e;
}
+ else
+ *syntax_errors = e;
e->next = NULL;
e->text1 = *error;
- e->text2 = string_copy(s);
- s[len] = save;
+ e->text2 = s_ltd;
s = nexts;
continue;
}
else
{
- *error = string_sprintf("%s in \"%s\"", *error, s);
- s[len] = save; /* _after_ using it for *error */
+ *error = string_sprintf("%s in \"%s\"", *error, s_ltd);
return FF_ERROR;
}
}
/* Address was successfully parsed. Rewrite, and then make an address
block. */
- recipient = ((options & RDO_REWRITE) != 0)?
- rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
- rewrite_existflags) :
- rewrite_address_qualify(recipient, TRUE);
- addr = deliver_make_addr(recipient, TRUE); /* TRUE => copy recipient */
+ recipient = options & RDO_REWRITE
+ ? rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
+ rewrite_existflags)
+ : rewrite_address_qualify(recipient, TRUE); /*XXX loses track of const */
+ addr = deliver_make_addr(US recipient, TRUE); /* TRUE => copy recipient, so deconst ok */
}
- /* Restore the final character in the original data, and add to the
- output chain. */
+ /* Add the original data to the output chain. */
- s[len] = save;
addr->next = *anchor;
*anchor = addr;
count++;
}
+/*************************************************
+* Extract a Message-ID *
+*************************************************/
+
+/* This function is used to extract message ids from In-Reply-To: and
+References: header lines.
+
+Arguments:
+ str pointer to the start of the message-id
+ yield put pointer to the message id (in dynamic memory) here
+ error put error message here on failure
+
+Returns: points after the processed message-id or NULL on error
+*/
+
+const uschar *
+parse_message_id(const uschar *str, uschar **yield, uschar **error)
+{
+uschar *domain = NULL;
+uschar *id;
+rmark reset_point;
+
+str = skip_comment(str);
+if (*str != '<')
+ {
+ *error = US"Missing '<' before message-id";
+ return NULL;
+ }
+
+/* Getting a block the size of the input string will definitely be sufficient
+for the answer, but it may also be very long if we are processing a header
+line. Therefore, take care to release unwanted store afterwards. */
+
+reset_point = store_mark();
+id = *yield = store_get(Ustrlen(str) + 1, str);
+*id++ = *str++;
+
+str = read_addr_spec(str, id, '>', error, &domain);
+
+if (!*error)
+ {
+ if (*str != '>') *error = US"Missing '>' after message-id";
+ else if (domain == NULL) *error = US"domain missing in message-id";
+ }
+
+if (*error)
+ {
+ store_reset(reset_point);
+ return NULL;
+ }
+
+while (*id) id++;
+*id++ = *str++;
+*id++ = 0;
+store_release_above(id);
+
+return skip_comment(str);
+}
+
+
+/*************************************************
+* Parse a fixed digit number *
+*************************************************/
+
+/* Parse a string containing an ASCII encoded fixed digits number
+
+Arguments:
+ str pointer to the start of the ASCII encoded number
+ n pointer to the resulting value
+ digits number of required digits
+
+Returns: points after the processed date or NULL on error
+*/
+
+static const uschar *
+parse_number(const uschar *str, int *n, int digits)
+{
+*n=0;
+while (digits--)
+ {
+ if (*str<'0' || *str>'9') return NULL;
+ *n=10*(*n)+(*str++-'0');
+ }
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 day of week *
+*************************************************/
+
+/* Parse the day of the week from a RFC 2822 date, but do not
+ decode it, because it is only for humans.
+
+Arguments:
+ str pointer to the start of the day of the week
+
+Returns: points after the parsed day or NULL on error
+*/
+
+static const uschar *
+parse_day_of_week(const uschar * str)
+{
+/*
+day-of-week = ([FWS] day-name) / obs-day-of-week
+
+day-name = "Mon" / "Tue" / "Wed" / "Thu" /
+ "Fri" / "Sat" / "Sun"
+
+obs-day-of-week = [CFWS] day-name [CFWS]
+*/
+
+static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" };
+int i;
+uschar day[4];
+
+str = skip_comment(str);
+for (i = 0; i < 3; ++i)
+ {
+ if ((day[i] = tolower(*str)) == '\0') return NULL;
+ ++str;
+ }
+day[3] = '\0';
+for (i = 0; i<7; ++i) if (Ustrcmp(day,day_name[i]) == 0) break;
+if (i == 7) return NULL;
+return skip_comment(str);
+}
+
+
+/*************************************************
+* Parse a RFC 2822 date *
+*************************************************/
+
+/* Parse the date part of a RFC 2822 date-time, extracting the
+ day, month and year.
+
+Arguments:
+ str pointer to the start of the date
+ d pointer to the resulting day
+ m pointer to the resulting month
+ y pointer to the resulting year
+
+Returns: points after the processed date or NULL on error
+*/
+
+static const uschar *
+parse_date(const uschar *str, int *d, int *m, int *y)
+{
+/*
+date = day month year
+
+year = 4*DIGIT / obs-year
+
+obs-year = [CFWS] 2*DIGIT [CFWS]
+
+month = (FWS month-name FWS) / obs-month
+
+month-name = "Jan" / "Feb" / "Mar" / "Apr" /
+ "May" / "Jun" / "Jul" / "Aug" /
+ "Sep" / "Oct" / "Nov" / "Dec"
+
+obs-month = CFWS month-name CFWS
+
+day = ([FWS] 1*2DIGIT) / obs-day
+
+obs-day = [CFWS] 1*2DIGIT [CFWS]
+*/
+
+const uschar * s, * n;
+static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" };
+int i;
+uschar month[4];
+
+str = skip_comment(str);
+if ((str = parse_number(str,d,1)) == NULL) return NULL;
+
+if (*str>='0' && *str<='9') *d = 10*(*d)+(*str++-'0');
+s = skip_comment(str);
+if (s == str) return NULL;
+str = s;
+
+for (i = 0; i<3; ++i) if ((month[i]=tolower(*(str+i))) == '\0') return NULL;
+month[3] = '\0';
+for (i = 0; i<12; ++i) if (Ustrcmp(month,month_name[i]) == 0) break;
+if (i == 12) return NULL;
+str+=3;
+*m = i;
+s = skip_comment(str);
+if (s == str) return NULL;
+str=s;
+
+if ((n = parse_number(str,y,4)))
+ {
+ str = n;
+ if (*y<1900) return NULL;
+ *y = *y-1900;
+ }
+else if ((n = parse_number(str,y,2)))
+ {
+ str = skip_comment(n);
+ while (*(str-1) == ' ' || *(str-1) == '\t') --str; /* match last FWS later */
+ if (*y<50) *y+=100;
+ }
+else return NULL;
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 Time *
+*************************************************/
+
+/* Parse the time part of a RFC 2822 date-time, extracting the
+ hour, minute, second and timezone.
+
+Arguments:
+ str pointer to the start of the time
+ h pointer to the resulting hour
+ m pointer to the resulting minute
+ s pointer to the resulting second
+ z pointer to the resulting timezone (offset in seconds)
+
+Returns: points after the processed time or NULL on error
+*/
+
+static const uschar *
+parse_time(const uschar *str, int *h, int *m, int *s, int *z)
+{
+/*
+time = time-of-day FWS zone
+
+time-of-day = hour ":" minute [ ":" second ]
+
+hour = 2DIGIT / obs-hour
+
+obs-hour = [CFWS] 2DIGIT [CFWS]
+
+minute = 2DIGIT / obs-minute
+
+obs-minute = [CFWS] 2DIGIT [CFWS]
+
+second = 2DIGIT / obs-second
+
+obs-second = [CFWS] 2DIGIT [CFWS]
+
+zone = (( "+" / "-" ) 4DIGIT) / obs-zone
+
+obs-zone = "UT" / "GMT" / ; Universal Time
+ ; North American UT
+ ; offsets
+ "EST" / "EDT" / ; Eastern: - 5/ - 4
+ "CST" / "CDT" / ; Central: - 6/ - 5
+ "MST" / "MDT" / ; Mountain: - 7/ - 6
+ "PST" / "PDT" / ; Pacific: - 8/ - 7
+
+ %d65-73 / ; Military zones - "A"
+ %d75-90 / ; through "I" and "K"
+ %d97-105 / ; through "Z", both
+ %d107-122 ; upper and lower case
+*/
+
+const uschar * c;
+
+str = skip_comment(str);
+if ((str = parse_number(str,h,2)) == NULL) return NULL;
+str = skip_comment(str);
+if (*str!=':') return NULL;
+++str;
+str = skip_comment(str);
+if ((str = parse_number(str,m,2)) == NULL) return NULL;
+c = skip_comment(str);
+if (*str == ':')
+ {
+ ++str;
+ str = skip_comment(str);
+ if ((str = parse_number(str,s,2)) == NULL) return NULL;
+ c = skip_comment(str);
+ }
+if (c == str) return NULL;
+else str=c;
+if (*str == '+' || *str == '-')
+ {
+ int neg;
+
+ neg = (*str == '-');
+ ++str;
+ if ((str = parse_number(str,z,4)) == NULL) return NULL;
+ *z = (*z/100)*3600+(*z%100)*60;
+ if (neg) *z = -*z;
+ }
+else
+ {
+ char zone[5];
+ struct { const char *name; int off; } zone_name[10] =
+ { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}};
+ int i,j;
+
+ for (i = 0; i<4; ++i)
+ {
+ zone[i] = tolower(*(str+i));
+ if (zone[i]<'a' || zone[i]>'z') break;
+ }
+ zone[i] = '\0';
+ for (j = 0; j<10 && strcmp(zone,zone_name[j].name); ++j);
+ /* Besides zones named in the grammar, RFC 2822 says other alphabetic */
+ /* time zones should be treated as unknown offsets. */
+ if (j<10)
+ {
+ *z = zone_name[j].off*3600;
+ str+=i;
+ }
+ else if (zone[0]<'a' || zone[1]>'z') return 0;
+ else
+ {
+ while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str;
+ *z = 0;
+ }
+ }
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 date-time *
+*************************************************/
+
+/* Parse a RFC 2822 date-time and return it in seconds since the epoch.
+
+Arguments:
+ str pointer to the start of the date-time
+ t pointer to the parsed time
+
+Returns: points after the processed date-time or NULL on error
+*/
+
+const uschar *
+parse_date_time(const uschar *str, time_t *t)
+{
+/*
+date-time = [ day-of-week "," ] date FWS time [CFWS]
+*/
+
+struct tm tm;
+int zone;
+extern char **environ;
+char **old_environ;
+static char gmt0[]="TZ=GMT0";
+static char *gmt_env[]={ gmt0, (char*)0 };
+const uschar * try;
+
+if ((try = parse_day_of_week(str)))
+ {
+ str = try;
+ if (*str!=',') return 0;
+ ++str;
+ }
+if ((str = parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year)) == NULL) return NULL;
+if (*str!=' ' && *str!='\t') return NULL;
+while (*str == ' ' || *str == '\t') ++str;
+if ((str = parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone)) == NULL) return NULL;
+tm.tm_isdst = 0;
+old_environ = environ;
+environ = gmt_env;
+*t = mktime(&tm);
+environ = old_environ;
+if (*t == -1) return NULL;
+*t-=zone;
+return skip_comment(str);
+}
+
+
+
+
/*************************************************
**************************************************
* Stand-alone test program *
{
int start, end, domain;
uschar buffer[1024];
-uschar outbuff[1024];
+store_init();
big_buffer = store_malloc(big_buffer_size);
/* strip_trailing_dot = TRUE; */
{
buffer[Ustrlen(buffer)-1] = 0;
if (buffer[0] == 0) break;
- printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer), outbuff,
- sizeof(outbuff)));
+ printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer)));
}
printf("Testing parse_extract_address without group syntax and without UTF-8\n");
buffer[Ustrlen(buffer) - 1] = 0;
if (buffer[0] == 0) break;
out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
- if (out == NULL) printf("*** bad address: %s\n", errmess); else
+ if (!out)
+ printf("*** bad address: %s\n", errmess);
+ else
{
uschar extract[1024];
Ustrncpy(extract, buffer+start, end-start);
buffer[Ustrlen(buffer) - 1] = 0;
if (buffer[0] == 0) break;
out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
- if (out == NULL) printf("*** bad address: %s\n", errmess); else
+ if (!out)
+ printf("*** bad address: %s\n", errmess);
+ else
{
uschar extract[1024];
Ustrncpy(extract, buffer+start, end-start);
printf("Testing parse_extract_address with group syntax\n");
-parse_allow_group = TRUE;
+f.parse_allow_group = TRUE;
while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
{
uschar *out;
buffer[Ustrlen(buffer) - 1] = 0;
if (buffer[0] == 0) break;
s = buffer;
- while (*s != 0)
+ while (*s)
{
uschar *ss = parse_find_address_end(s, FALSE);
int terminator = *ss;
out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
*ss = terminator;
- if (out == NULL) printf("*** bad address: %s\n", errmess); else
+ if (!out)
+ printf("*** bad address: %s\n", errmess);
+ else
{
uschar extract[1024];
Ustrncpy(extract, buffer+start, end-start);
}
s = ss + (terminator? 1:0);
- while (isspace(*s)) s++;
+ Uskip_whitespace(&s);
}
}
else printf("Failed: %d %s\n", extracted, errmess);
}
+printf("Testing parse_message_id\n");
+
+while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
+ {
+ uschar *s, *t, *errmess;
+ buffer[Ustrlen(buffer) - 1] = 0;
+ if (buffer[0] == 0) break;
+ s = buffer;
+ while (*s != 0)
+ {
+ s = parse_message_id(s, &t, &errmess);
+ if (errmess != NULL)
+ {
+ printf("Failed: %s\n", errmess);
+ break;
+ }
+ printf("%s\n", t);
+ }
+ }
+
return 0;
}