-/* $Cambridge: exim/src/src/parse.c,v 1.1 2004/10/07 10:39:01 ph10 Exp $ */
-
/*************************************************
* Exim - an Internet mail transport agent *
*************************************************/
-/* Copyright (c) University of Cambridge 1995 - 2004 */
+/* Copyright (c) The Exim Maintainers 2020 - 2022 */
+/* Copyright (c) University of Cambridge 1995 - 2018 */
/* See the file NOTICE for conditions of use and distribution. */
/* Functions for parsing addresses */
#include "exim.h"
-static uschar *last_comment_position;
+static const uschar *last_comment_position;
#ifdef STAND_ALONE
-address_item *deliver_make_addr(uschar *address, BOOL copy)
+address_item *
+deliver_make_addr(uschar *address, BOOL copy)
{
-address_item *addr = store_get(sizeof(address_item));
+address_item *addr = store_get(sizeof(address_item), GET_UNTAINTED);
addr->next = NULL;
addr->parent = NULL;
addr->address = address;
return addr;
}
-uschar *rewrite_address(uschar *recipient, BOOL dummy1, BOOL dummy2, rewrite_rule
+uschar *
+rewrite_address(uschar *recipient, BOOL dummy1, BOOL dummy2, rewrite_rule
*dummy3, int dummy4)
{
return recipient;
}
-uschar *rewrite_address_qualify(uschar *recipient, BOOL dummy1)
+uschar *
+rewrite_address_qualify(uschar *recipient, BOOL dummy1)
{
return recipient;
}
*/
uschar *
-parse_find_address_end(uschar *s, BOOL nl_ends)
+parse_find_address_end(const uschar *s, BOOL nl_ends)
{
BOOL source_routing = *s == '@';
int no_term = source_routing? 1 : 0;
}
}
-return s;
+return US s;
}
Returns: pointer to the last @ in an address, or NULL if none
*/
-uschar *
-parse_find_at(uschar *s)
+const uschar *
+parse_find_at(const uschar *s)
{
-uschar *t = s + Ustrlen(s);
+const uschar * t = s + Ustrlen(s);
while (--t >= s)
- {
if (*t == '@')
{
int backslash_count = 0;
- uschar *tt = t - 1;
+ const uschar *tt = t - 1;
while (tt > s && *tt-- == '\\') backslash_count++;
if ((backslash_count & 1) == 0) return t;
}
- else if (*t == '\"') return NULL;
- }
+ else if (*t == '\"')
+ return NULL;
+
return NULL;
}
make it possible to ignore comments at the end of compound items.
Argument: current character pointer
-Regurns: new character pointer
+Returns: new character pointer
*/
-static uschar *
-skip_comment(uschar *s)
+static const uschar *
+skip_comment(const uschar *s)
{
last_comment_position = s;
while (*s)
{
int c, level;
- while (isspace(*s)) s++;
- if (*s != '(') break;
+
+ if (Uskip_whitespace(&s) != '(') break;
level = 1;
- while((c = *(++s)) != 0)
+ while((c = *(++s)))
{
if (c == '(') level++;
else if (c == ')') { if (--level <= 0) { s++; break; } }
in []. Make sure the output is set to the null string if there is a syntax
error as well as if there is no domain at all.
+Optionally, msg_id domain literals ( printable-ascii enclosed in [] )
+are permitted.
+
Arguments:
s current character pointer
t where to put the domain
+ msg_id_literals flag for relaxed domain-literal processing
errorptr put error message here on failure (*t will be 0 on exit)
Returns: new character pointer
*/
-static uschar *
-read_domain(uschar *s, uschar *t, uschar **errorptr)
+static const uschar *
+read_domain(const uschar *s, uschar *t, BOOL msg_id_literals, uschar **errorptr)
{
uschar *tt = t;
s = skip_comment(s);
any character except [ ] \, including linear white space, and may contain
quoted characters. However, RFC 821 restricts literals to being dot-separated
3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence
-of digits and dots (hex digits and colons for IPv6) here; later this will be
-checked for being a syntactically valid IP address if it ever gets to a router.
+of digits, dots, hex digits, and colons here; later this will be checked for
+being a syntactically valid IP address if it ever gets to a router.
-If IPv6 is supported, allow both the formal form, with IPV6: at the start, and
-the informal form without it, and accept IPV4: as well, 'cause someone will use
-it sooner or later. */
+Allow both the formal IPv6 form, with IPV6: at the start, and the informal form
+without it, and accept IPV4: as well, 'cause someone will use it sooner or
+later. */
if (*s == '[')
{
*t++ = *s++;
- #if HAVE_IPV6
if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0)
{
memcpy(t, s, 5);
t += 5;
s += 5;
}
- while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
- #else
- while (*s == '.' || isdigit(*s)) *t++ = *s++;
- #endif
+ if (msg_id_literals)
+ while (*s >= 33 && *s <= 90 || *s >= 94 && *s <= 126) *t++ = *s++;
+ else
+ while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
if (*s == ']') *t++ = *s++; else
{
*tt = 0;
}
- if (!allow_domain_literals)
+ if (!allow_domain_literals && !msg_id_literals)
{
*errorptr = US"domain literals not allowed";
*tt = 0;
Returns: new character pointer
*/
-static uschar *
-read_local_part(uschar *s, uschar *t, uschar **error, BOOL allow_null)
+static const uschar *
+read_local_part(const uschar *s, uschar *t, uschar **error, BOOL allow_null)
{
uschar *tt = t;
*error = NULL;
if (*s == '\"')
{
*t++ = '\"';
- while ((c = *(++s)) != 0 && c != '\"')
+ while ((c = *++s) && c != '\"')
{
*t++ = c;
- if (c == '\\' && s[1] != 0) *t++ = *(++s);
+ if (c == '\\' && s[1]) *t++ = *++s;
}
if (c == '\"')
{
else while (!mac_iscntrl_or_special(*s) || *s == '\\')
{
c = *t++ = *s++;
- if (c == '\\' && *s != 0) *t++ = *s++;
+ if (c == '\\' && *s) *t++ = *s++;
}
/* Terminate the word and skip subsequent comment */
Returns: new character pointer
*/
-static uschar *
-read_route(uschar *s, uschar *t, uschar **errorptr)
+static const uschar *
+read_route(const uschar *s, uschar *t, uschar **errorptr)
{
BOOL commas = FALSE;
*errorptr = NULL;
while (*s == '@')
{
*t++ = '@';
- s = read_domain(s+1, t, errorptr);
+ s = read_domain(s+1, t, FALSE, errorptr);
if (*t == 0) return s;
t += Ustrlen((const uschar *)t);
if (*s != ',') break;
Returns: new character pointer
*/
-static uschar *
-read_addr_spec(uschar *s, uschar *t, int term, uschar **errorptr,
+static const uschar *
+read_addr_spec(const uschar *s, uschar *t, int term, uschar **errorptr,
uschar **domainptr)
{
s = read_local_part(s, t, errorptr, FALSE);
if (*errorptr == NULL)
- {
if (*s != term)
- {
if (*s != '@')
*errorptr = string_sprintf("\"@\" or \".\" expected after \"%s\"", t);
else
t += Ustrlen((const uschar *)t);
*t++ = *s++;
*domainptr = t;
- s = read_domain(s, t, errorptr);
+ s = read_domain(s, t, FALSE, errorptr);
}
- }
- }
return s;
}
TRUE and parse_found_group is FALSE when this function is called, an address
which is the start of a group (i.e. preceded by a phrase and a colon) is
recognized; the phrase is ignored and the flag parse_found_group is set. If
-this flag is TRUE at the end of an address, then if an extraneous semicolon is
-found, it is ignored and the flag is cleared. This logic is used only when
-scanning through addresses in headers, either to fulfil the -t option or for
-rewriting or checking header syntax.
+this flag is TRUE at the end of an address, and if an extraneous semicolon is
+found, it is ignored and the flag is cleared.
+
+This logic is used only when scanning through addresses in headers, either to
+fulfil the -t option, or for rewriting, or for checking header syntax. Because
+the group "state" has to be remembered between multiple calls of this function,
+the variables parse_{allow,found}_group are global. It is important to ensure
+that they are reset to FALSE at the end of scanning a header's list of
+addresses.
Arguments:
mailbox points to the RFC822 mailbox
#define FAILED(s) { *errorptr = s; goto PARSE_FAILED; }
uschar *
-parse_extract_address(uschar *mailbox, uschar **errorptr, int *start, int *end,
+parse_extract_address(const uschar *mailbox, uschar **errorptr, int *start, int *end,
int *domain, BOOL allow_null)
{
-uschar *yield = store_get(Ustrlen(mailbox) + 1);
-uschar *startptr, *endptr;
-uschar *s = (uschar *)mailbox;
-uschar *t = (uschar *)yield;
+uschar * yield = store_get(Ustrlen(mailbox) + 1, mailbox);
+const uschar *startptr, *endptr;
+const uschar *s = US mailbox;
+uschar *t = US yield;
*domain = 0;
s = skip_comment(s);
startptr = s; /* In case addr-spec */
s = read_local_part(s, t, errorptr, TRUE); /* Dot separated words */
-if (*errorptr != NULL) goto PARSE_FAILED;
+if (*errorptr) goto PARSE_FAILED;
/* If the terminator is neither < nor @ then the format of the address
must either be a bare local-part (we are now at the end), or a phrase
if (*s != '@' && *s != '<')
{
- if (*s == 0 || *s == ';')
+ if (!*s || *s == ';')
{
- if (*t == 0) FAILED(US"empty address");
+ if (!*t) FAILED(US"empty address");
endptr = last_comment_position;
goto PARSE_SUCCEEDED; /* Bare local part */
}
end of string will produce a null local_part and therefore fail. We don't
need to keep updating t, as the phrase isn't to be kept. */
- while (*s != '<' && (!parse_allow_group || *s != ':'))
+ while (*s != '<' && (!f.parse_allow_group || *s != ':'))
{
s = read_local_part(s, t, errorptr, FALSE);
- if (*errorptr != NULL)
+ if (*errorptr)
{
*errorptr = string_sprintf("%s (expected word or \"<\")", *errorptr);
goto PARSE_FAILED;
if (*s == ':')
{
- parse_found_group = TRUE;
- parse_allow_group = FALSE;
+ f.parse_found_group = TRUE;
+ f.parse_allow_group = FALSE;
s++;
goto RESTART;
}
used after reading a preceding phrase.
There are a lot of broken sendmails out there that put additional pairs of <>
-round <route-addr>s. If strip_excess_angle_brackets is set, allow any number of
-them, as long as they match. */
+round <route-addr>s. If strip_excess_angle_brackets is set, allow a limited
+number of them, as long as they match. */
if (*s == '<')
{
int bracket_count = 1;
s++;
- if (strip_excess_angle_brackets)
- while (*s == '<') { bracket_count++; s++; }
+ if (strip_excess_angle_brackets) while (*s == '<')
+ {
+ if(bracket_count++ > 5) FAILED(US"angle-brackets nested too deep");
+ s++;
+ }
t = yield;
startptr = s;
if (*s == '@')
{
s = read_route(s, t, errorptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*t = 0; /* Ensure route is ignored - probably overkill */
source_routed = TRUE;
}
else
{
s = read_addr_spec(s, t, '>', errorptr, &domainptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*domain = domainptr - yield;
if (source_routed && *domain == 0)
FAILED(US"domain missing in source-routed address");
}
endptr = s;
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
while (bracket_count-- > 0) if (*s++ != '>')
{
- *errorptr = (s[-1] == 0)? US"'>' missing at end of address" :
- string_sprintf("malformed address: %.32s may not follow %.*s",
- s-1, s - (uschar *)mailbox - 1, mailbox);
+ *errorptr = s[-1] == 0
+ ? US"'>' missing at end of address"
+ : string_sprintf("malformed address: %.32s may not follow %.*s",
+ s-1, (int)(s - US mailbox - 1), mailbox);
goto PARSE_FAILED;
}
not enclosed in <> as well, which is indicated by an empty first local
part preceding '@'. The source routing is, however, ignored. */
-else if (*t == 0)
+else if (!*t)
{
uschar *domainptr = yield;
s = read_route(s, t, errorptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*t = 0; /* Ensure route is ignored - probably overkill */
s = read_addr_spec(s, t, 0, errorptr, &domainptr);
- if (*errorptr != NULL) goto PARSE_FAILED;
+ if (*errorptr) goto PARSE_FAILED;
*domain = domainptr - yield;
endptr = last_comment_position;
if (*domain == 0) FAILED(US"domain missing in source-routed address");
t += Ustrlen((const uschar *)t);
*t++ = *s++;
*domain = t - yield;
- s = read_domain(s, t, errorptr);
- if (*t == 0) goto PARSE_FAILED;
+ s = read_domain(s, t, TRUE, errorptr);
+ if (!*t) goto PARSE_FAILED;
endptr = last_comment_position;
}
move it back past white space if necessary. */
PARSE_SUCCEEDED:
-if (*s != 0)
+if (*s)
{
- if (parse_found_group && *s == ';')
+ if (f.parse_found_group && *s == ';')
{
- parse_found_group = FALSE;
- parse_allow_group = TRUE;
+ f.parse_found_group = FALSE;
+ f.parse_allow_group = TRUE;
}
else
{
*errorptr = string_sprintf("malformed address: %.32s may not follow %.*s",
- s, s - (uschar *)mailbox, mailbox);
+ s, (int)(s - US mailbox), mailbox);
goto PARSE_FAILED;
}
}
-*start = startptr - (uschar *)mailbox; /* Return offsets */
+*start = startptr - US mailbox; /* Return offsets */
while (isspace(endptr[-1])) endptr--;
-*end = endptr - (uschar *)mailbox;
+*end = endptr - US mailbox;
/* Although this code has no limitation on the length of address extracted,
-other parts of Exim may have limits, and in any case, RFC 2821 limits local
-parts to 64 and domains to 255, so we do a check here, giving an error if the
-address is ridiculously long. */
+other parts of Exim may have limits, and in any case, RFC 5321 limits email
+addresses to 256, so we do a check here, giving an error if the address is
+ridiculously long. */
-if (*end - *start > ADDRESS_MAXLENGTH)
+if (*end - *start > EXIM_EMAILADDR_MAX)
{
*errorptr = string_sprintf("address is ridiculously long: %.64s...", yield);
return NULL;
}
-return (uschar *)yield;
+return yield;
/* Use goto (via the macro FAILED) to get to here from a variety of places.
We might have an empty address in a group - the caller can choose to ignore
this. We must, however, keep the flags correct. */
PARSE_FAILED:
-if (parse_found_group && *s == ';')
+if (f.parse_found_group && *s == ';')
{
- parse_found_group = FALSE;
- parse_allow_group = TRUE;
+ f.parse_found_group = FALSE;
+ f.parse_allow_group = TRUE;
}
return NULL;
}
/* This function is used for quoting text in headers according to RFC 2047.
If the only characters that strictly need quoting are spaces, we return the
-original string, unmodified. If a quoted string is too long for the buffer, it
-is truncated. (This shouldn't happen: this is normally handling short strings.)
+original string, unmodified.
+
+Hmmph. As always, things get perverted for other uses. This function was
+originally for the "phrase" part of addresses. Now it is being used for much
+longer texts in ACLs and via the ${rfc2047: expansion item. This means we have
+to check for overlong "encoded-word"s and split them. November 2004.
Arguments:
string the string to quote - already checked to contain non-printing
chars
len the length of the string
charset the name of the character set; NULL => iso-8859-1
- buffer the buffer to put the answer in
- buffer_size the size of the buffer
+ fold if TRUE, a newline is inserted before the separating space when
+ more than one encoded-word is generated
Returns: pointer to the original string, if no quoting needed, or
- pointer to buffer containing the quoted string, or
- a pointer to "String too long" if the buffer can't even hold
- the introduction
+ pointer to allocated memory containing the quoted string
*/
-uschar *
-parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer,
- int buffer_size)
+const uschar *
+parse_quote_2047(const uschar *string, int len, const uschar *charset,
+ BOOL fold)
{
-uschar *s = string;
-uschar *t;
+const uschar * s = string;
+int hlen, l;
BOOL coded = FALSE;
+BOOL first_byte = FALSE;
+gstring * g =
+ string_fmt_append(NULL, "=?%s?Q?", charset ? charset : US"iso-8859-1");
-if (charset == NULL) charset = US"iso-8859-1";
+hlen = l = g->ptr;
-/* We don't expect this to fail! */
+for (s = string; len > 0; s++, len--)
+ {
+ int ch = *s;
-if (!string_format(buffer, buffer_size, "=?%s?Q?", charset))
- return US"String too long";
+ if (g->ptr - l > 67 && !first_byte)
+ {
+ g = fold ? string_catn(g, US"?=\n ", 4) : string_catn(g, US"?= ", 3);
+ l = g->ptr;
+ g = string_catn(g, g->s, hlen);
+ }
-t = buffer + Ustrlen(buffer);
-for (; len > 0; len--)
- {
- int ch = *s++;
- if (t > buffer + buffer_size - 8) break;
- if (ch < 33 || ch > 126 ||
- Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
+ if ( ch < 33 || ch > 126
+ || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
{
- if (ch == ' ') *t++ = '_'; else
+ if (ch == ' ')
+ {
+ g = string_catn(g, US"_", 1);
+ first_byte = FALSE;
+ }
+ else
{
- sprintf(CS t, "=%02X", ch);
- while (*t != 0) t++;
+ g = string_fmt_append(g, "=%02X", ch);
coded = TRUE;
+ first_byte = !first_byte;
}
}
- else *t++ = ch;
+ else
+ { g = string_catn(g, s, 1); first_byte = FALSE; }
}
-sprintf(CS t, "?=");
-return coded? buffer : string;
+
+if (coded)
+ string = string_from_gstring(g = string_catn(g, US"?=", 2));
+else
+ g->ptr = -1;
+
+gstring_release_unused(g);
+return string;
}
We *could* use this for all cases, getting rid of the messy original code,
but leave it for now. It would complicate simple cases like "John Q. Smith".
-The result is passed back in the buffer; it is usually going to be added to
-some other string. In order to be sure there is going to be no overflow,
-restrict the length of the input to 1/4 of the buffer size - this allows for
-every single character to be quoted or encoded without overflowing, and that
-wouldn't happen because of amalgamation. If the phrase is too long, return a
-fixed string.
+The result is passed back in allocated memory.
Arguments:
phrase an RFC822 phrase
len the length of the phrase
- buffer a buffer to put the result in
- buffer_size the size of the buffer
Returns: the fixed RFC822 phrase
*/
-uschar *
-parse_fix_phrase(uschar *phrase, int len, uschar *buffer, int buffer_size)
+const uschar *
+parse_fix_phrase(const uschar *phrase, int len)
{
int ch, i;
BOOL quoted = FALSE;
-uschar *s, *t, *end, *yield;
+const uschar *s, *end;
+uschar * buffer;
+uschar *t, *yield;
while (len > 0 && isspace(*phrase)) { phrase++; len--; }
-if (len > buffer_size/4) return US"Name too long";
/* See if there are any non-printing characters, and if so, use the RFC 2047
encoding for the whole thing. */
for (i = 0, s = phrase; i < len; i++, s++)
if ((*s < 32 && *s != '\t') || *s > 126) break;
-if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
- buffer_size);
+if (i < len)
+ return parse_quote_2047(phrase, len, headers_charset, FALSE);
/* No non-printers; use the RFC 822 quoting rules */
+if (len <= 0 || len >= INT_MAX/4)
+ return string_copy_taint(CUS"", phrase);
+
+buffer = store_get((len+1)*4, phrase);
+
s = phrase;
end = s + len;
yield = t = buffer + 1;
else if (ch == '(')
{
- uschar *ss = s; /* uschar after '(' */
+ const uschar *ss = s; /* uschar after '(' */
int level = 1;
while(ss < end)
{
{
if (ss >= end) ss--;
*t++ = '(';
- Ustrncpy(t, s, ss-s);
- t += ss-s;
- s = ss;
+ if (ss > s)
+ {
+ Ustrncpy(t, s, ss-s);
+ t += ss-s;
+ s = ss;
+ }
}
}
}
*t = 0;
+store_release_above(t+1);
return yield;
}
*/
int
-parse_forward_list(uschar *s, int options, address_item **anchor,
- uschar **error, uschar *incoming_domain, uschar *directory,
+parse_forward_list(const uschar *s, int options, address_item **anchor,
+ uschar **error, const uschar *incoming_domain, const uschar *directory,
error_block **syntax_errors)
{
int count = 0;
for (;;)
{
- int len;
- int special = 0;
- int specopt = 0;
- int specbit = 0;
- uschar *ss, *nexts;
- address_item *addr;
+ int len, special = 0, specopt = 0, specbit = 0;
+ const uschar * ss, * nexts;
+ address_item * addr;
BOOL inquote = FALSE;
for (;;)
{
while (isspace(*s) || *s == ',') s++;
- if (*s == '#') { while (*s != 0 && *s != '\n') s++; } else break;
+ if (*s == '#') { while (*s && *s != '\n') s++; } else break;
}
/* When we reach the end of the list, we return FF_DELIVERED if any child
However, if the list is empty only because syntax errors were skipped, we
return FF_DELIVERED. */
- if (*s == 0)
+ if (!*s)
{
- return (count > 0 || (syntax_errors != NULL && *syntax_errors != NULL))?
- FF_DELIVERED : FF_NOTDELIVERED;
+ return (count > 0 || (syntax_errors && *syntax_errors))
+ ? FF_DELIVERED : FF_NOTDELIVERED;
/* This previous code returns FF_ERROR if nothing is generated but a
syntax error has been skipped. I now think it is the wrong approach, but
have left this here just in case, and for the record. */
- #ifdef NEVER
+#ifdef NEVER
if (count > 0) return FF_DELIVERED; /* Something was generated */
- if (syntax_errors == NULL || /* Not skipping syntax errors, or */
- *syntax_errors == NULL) /* we didn't actually skip any */
+ if (!syntax_errors || /* Not skipping syntax errors, or */
+ !*syntax_errors) /* we didn't actually skip any */
return FF_NOTDELIVERED;
*error = string_sprintf("no addresses generated: syntax error in %s: %s",
(*syntax_errors)->text2, (*syntax_errors)->text1);
return FF_ERROR;
- #endif
-
+#endif
}
/* Find the end of the next address. Quoted strings in addresses may contain
/* Remove any trailing spaces; we know there's at least one non-space. */
- while (isspace((ss[-1]))) ss--;
+ while (isspace(ss[-1])) ss--;
/* We now have s->start and ss->end of the next address. Remove quotes
if they completely enclose, remembering the address started with a quote
ss--;
inquote = TRUE;
while (s < ss && isspace(*s)) s++;
- while (ss > s && isspace((ss[-1]))) ss--;
+ while (ss > s && isspace(ss[-1])) ss--;
}
/* Set up the length of the address. */
len = ss - s;
- DEBUG(D_route)
- {
- int save = s[len];
- s[len] = 0;
- debug_printf("extract item: %s\n", s);
- s[len] = save;
- }
+ DEBUG(D_route) debug_printf("extract item: %.*s\n", len, s);
/* Handle special addresses if permitted. If the address is :unknown:
ignore it - this is for backward compatibility with old alias files. You
else if (Ustrncmp(s, ":fail:", 6) == 0)
{ special = FF_FAIL; specopt = RDO_FAIL; } /* specbit is 0 */
- if (special != 0)
+ if (special)
{
- uschar *ss = Ustrchr(s+1, ':') + 1;
+ uschar * ss = Ustrchr(s+1, ':') + 1; /* line after the special... */
if ((options & specopt) == specbit)
{
*error = string_sprintf("\"%.*s\" is not permitted", len, s);
return FF_ERROR;
}
- while (*ss != 0 && isspace(*ss)) ss++;
- while (s[len] != 0 && s[len] != '\n') len++;
- s[len] = 0;
- *error = string_copy(ss);
+ while (*ss && isspace(*ss)) ss++; /* skip leading whitespace */
+ if ((len = Ustrlen(ss)) > 0) /* ignore trailing newlines */
+ for (const uschar * t = ss + len - 1; t >= ss && *t == '\n'; t--) len--;
+ *error = string_copyn(ss, len); /* becomes the error */
return special;
}
if (Ustrncmp(s, ":include:", 9) == 0)
{
- uschar *filebuf;
+ uschar * filebuf;
uschar filename[256];
- uschar *t = s+9;
+ const uschar * t = s+9;
int flen = len - 9;
int frc;
struct stat statbuf;
- address_item *last;
- FILE *f;
+ address_item * last;
+ FILE * f;
while (flen > 0 && isspace(*t)) { t++; flen--; }
if (flen <= 0)
{
- *error = string_sprintf("file name missing after :include:");
+ *error = US"file name missing after :include:";
return FF_ERROR;
}
- if (flen > 255)
+ if (flen > sizeof(filename)-1)
{
*error = string_sprintf("included file name \"%s\" is too long", t);
return FF_ERROR;
/* Insist on absolute path */
- if (filename[0]!= '/')
+ if (filename[0] != '/')
{
*error = string_sprintf("included file \"%s\" is not an absolute path",
filename);
/* Check if include is permitted */
- if ((options & RDO_INCLUDE) != 0)
+ if (options & RDO_INCLUDE)
{
*error = US"included files not permitted";
return FF_ERROR;
}
+ if (is_tainted(filename))
+ {
+ *error = string_sprintf("Tainted name '%s' for included file not permitted\n",
+ filename);
+ return FF_ERROR;
+ }
+
/* Check file name if required */
- if (directory != NULL)
+ if (directory)
{
int len = Ustrlen(directory);
- uschar *p = filename + len;
+ uschar * p;
+ while (len > 0 && directory[len-1] == '/') len--; /* ignore trailing '/' */
+ p = filename + len;
if (Ustrncmp(filename, directory, len) != 0 || *p != '/')
{
*error = string_sprintf("included file %s is not in directory %s",
return FF_ERROR;
}
+#ifdef EXIM_HAVE_OPENAT
+ /* It is necessary to check that every component inside the directory
+ is NOT a symbolic link, in order to keep the file inside the directory.
+ This is mighty tedious. We open the directory and openat every component,
+ with a flag that fails symlinks. */
+
+ {
+ int fd = exim_open2(CCS directory, O_RDONLY);
+ if (fd < 0)
+ {
+ *error = string_sprintf("failed to open directory %s", directory);
+ return FF_ERROR;
+ }
+ while (*p)
+ {
+ uschar temp;
+ int fd2;
+ uschar * q = p + 1; /* skip dividing '/' */
+
+ while (*q == '/') q++; /* skip extra '/' */
+ while (*++p && *p != '/') ; /* end of component */
+ temp = *p;
+ *p = '\0';
+
+ fd2 = exim_openat(fd, CS q, O_RDONLY|O_NOFOLLOW);
+ close(fd);
+ *p = temp;
+ if (fd2 < 0)
+ {
+ *error = string_sprintf("failed to open %s (component of included "
+ "file); could be symbolic link", filename);
+ return FF_ERROR;
+ }
+ fd = fd2;
+ }
+ f = fdopen(fd, "rb");
+ }
+#else
/* It is necessary to check that every component inside the directory
is NOT a symbolic link, in order to keep the file inside the directory.
This is mighty tedious. It is also not totally foolproof in that it
leaves the possibility of a race attack, but I don't know how to do
any better. */
- while (*p != 0)
+ while (*p)
{
int temp;
- while (*(++p) != 0 && *p != '/');
+ while (*++p && *p != '/');
temp = *p;
*p = 0;
if (Ulstat(filename, &statbuf) != 0)
return FF_ERROR;
}
}
+#endif
}
- /* Open and stat the file */
+#ifdef EXIM_HAVE_OPENAT
+ else
+#endif
+ /* Open and stat the file */
+ f = Ufopen(filename, "rb");
- if ((f = Ufopen(filename, "rb")) == NULL)
+ if (!f)
{
- *error = string_open_failed(errno, "included file %s", filename);
+ *error = string_open_failed("included file %s", filename);
return FF_INCLUDEFAIL;
}
{
*error = string_sprintf("failed to stat included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_INCLUDEFAIL;
}
/* If directory was checked, double check that we opened a regular file */
- if (directory != NULL && (statbuf.st_mode & S_IFMT) != S_IFREG)
+ if (directory && (statbuf.st_mode & S_IFMT) != S_IFREG)
{
*error = string_sprintf("included file %s is not a regular file in "
"the %s directory", filename, directory);
return FF_ERROR;
}
- filebuf = store_get(statbuf.st_size + 1);
+ filebuf = store_get(statbuf.st_size + 1, filename);
if (fread(filebuf, 1, statbuf.st_size, f) != statbuf.st_size)
{
*error = string_sprintf("error while reading included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_ERROR;
}
filebuf[statbuf.st_size] = 0;
- fclose(f);
+ (void)fclose(f);
addr = NULL;
frc = parse_forward_list(filebuf, options, &addr,
error, incoming_domain, directory, syntax_errors);
if (frc != FF_DELIVERED && frc != FF_NOTDELIVERED) return frc;
- if (addr != NULL)
+ if (addr)
{
- last = addr;
- while (last->next != NULL) { count++; last = last->next; }
+ for (last = addr; last->next; last = last->next) count++;
last->next = *anchor;
*anchor = addr;
count++;
else
{
int start, end, domain;
- uschar *recipient = NULL;
- int save = s[len];
- s[len] = 0;
+ const uschar *recipient = NULL;
+ uschar * s_ltd = string_copyn(s, len);
/* If it starts with \ and the rest of it parses as a valid mail address
without a domain, carry on with that address, but qualify it with the
incoming domain. Otherwise arrange for the address to fall through,
causing an error message on the re-parse. */
- if (*s == '\\')
+ if (*s_ltd == '\\')
{
recipient =
- parse_extract_address(s+1, error, &start, &end, &domain, FALSE);
- if (recipient != NULL)
- recipient = (domain != 0)? NULL :
+ parse_extract_address(s_ltd+1, error, &start, &end, &domain, FALSE);
+ if (recipient)
+ recipient = domain != 0 ? NULL :
string_sprintf("%s@%s", recipient, incoming_domain);
}
/* Try parsing the item as an address. */
- if (recipient == NULL) recipient =
- parse_extract_address(s, error, &start, &end, &domain, FALSE);
+ if (!recipient) recipient =
+ parse_extract_address(s_ltd, error, &start, &end, &domain, FALSE);
/* If item starts with / or | and is not a valid address, or there
is no domain, treat it as a file or pipe. If it was a quoted item,
remove the quoting occurrences of \ within it. */
- if ((*s == '|' || *s == '/') && (recipient == NULL || domain == 0))
+ if ((*s_ltd == '|' || *s_ltd == '/') && (!recipient || domain == 0))
{
- uschar *t = store_get(Ustrlen(s) + 1);
- uschar *p = t;
- uschar *q = s;
- while (*q != 0)
+ uschar * t = store_get(Ustrlen(s_ltd) + 1, s_ltd);
+ uschar * p = t, * q = s_ltd;
+
+ while (*q)
{
if (inquote)
{
- *p++ = (*q == '\\')? *(++q) : *q;
+ *p++ = *q == '\\' ? *++q : *q;
q++;
}
else *p++ = *q++;
*p = 0;
addr = deliver_make_addr(t, TRUE);
setflag(addr, af_pfr); /* indicates pipe/file/reply */
- if (*s != '|') setflag(addr, af_file); /* indicates file */
+ if (*s_ltd != '|') setflag(addr, af_file); /* indicates file */
}
/* Item must be an address. Complain if not, else qualify, rewrite and set
else
{
- if (recipient == NULL)
+ if (!recipient)
{
if (Ustrcmp(*error, "empty address") == 0)
{
*error = NULL;
- s[len] = save;
s = nexts;
continue;
}
- if (syntax_errors != NULL)
+ if (syntax_errors)
{
- error_block *e = store_get(sizeof(error_block));
- error_block *last = *syntax_errors;
- if (last == NULL) *syntax_errors = e; else
+ error_block * e = store_get(sizeof(error_block), GET_UNTAINTED);
+ error_block * last = *syntax_errors;
+ if (last)
{
- while (last->next != NULL) last = last->next;
+ while (last->next) last = last->next;
last->next = e;
}
+ else
+ *syntax_errors = e;
e->next = NULL;
e->text1 = *error;
- e->text2 = string_copy(s);
- s[len] = save;
+ e->text2 = s_ltd;
s = nexts;
continue;
}
else
{
- *error = string_sprintf("%s in \"%s\"", *error, s);
- s[len] = save; /* _after_ using it for *error */
+ *error = string_sprintf("%s in \"%s\"", *error, s_ltd);
return FF_ERROR;
}
}
/* Address was successfully parsed. Rewrite, and then make an address
block. */
- recipient = ((options & RDO_REWRITE) != 0)?
- rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
- rewrite_existflags) :
- rewrite_address_qualify(recipient, TRUE);
- addr = deliver_make_addr(recipient, TRUE); /* TRUE => copy recipient */
+ recipient = options & RDO_REWRITE
+ ? rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
+ rewrite_existflags)
+ : rewrite_address_qualify(recipient, TRUE); /*XXX loses track of const */
+ addr = deliver_make_addr(US recipient, TRUE); /* TRUE => copy recipient, so deconst ok */
}
- /* Restore the final character in the original data, and add to the
- output chain. */
+ /* Add the original data to the output chain. */
- s[len] = save;
addr->next = *anchor;
*anchor = addr;
count++;
}
+/*************************************************
+* Extract a Message-ID *
+*************************************************/
+
+/* This function is used to extract message ids from In-Reply-To: and
+References: header lines.
+
+Arguments:
+ str pointer to the start of the message-id
+ yield put pointer to the message id (in dynamic memory) here
+ error put error message here on failure
+
+Returns: points after the processed message-id or NULL on error
+*/
+
+const uschar *
+parse_message_id(const uschar *str, uschar **yield, uschar **error)
+{
+uschar *domain = NULL;
+uschar *id;
+rmark reset_point;
+
+str = skip_comment(str);
+if (*str != '<')
+ {
+ *error = US"Missing '<' before message-id";
+ return NULL;
+ }
+
+/* Getting a block the size of the input string will definitely be sufficient
+for the answer, but it may also be very long if we are processing a header
+line. Therefore, take care to release unwanted store afterwards. */
+
+reset_point = store_mark();
+id = *yield = store_get(Ustrlen(str) + 1, str);
+*id++ = *str++;
+
+str = read_addr_spec(str, id, '>', error, &domain);
+
+if (!*error)
+ {
+ if (*str != '>') *error = US"Missing '>' after message-id";
+ else if (domain == NULL) *error = US"domain missing in message-id";
+ }
+
+if (*error)
+ {
+ store_reset(reset_point);
+ return NULL;
+ }
+
+while (*id) id++;
+*id++ = *str++;
+*id++ = 0;
+store_release_above(id);
+
+return skip_comment(str);
+}
+
+
+/*************************************************
+* Parse a fixed digit number *
+*************************************************/
+
+/* Parse a string containing an ASCII encoded fixed digits number
+
+Arguments:
+ str pointer to the start of the ASCII encoded number
+ n pointer to the resulting value
+ digits number of required digits
+
+Returns: points after the processed date or NULL on error
+*/
+
+static const uschar *
+parse_number(const uschar *str, int *n, int digits)
+{
+*n=0;
+while (digits--)
+ {
+ if (*str<'0' || *str>'9') return NULL;
+ *n=10*(*n)+(*str++-'0');
+ }
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 day of week *
+*************************************************/
+
+/* Parse the day of the week from a RFC 2822 date, but do not
+ decode it, because it is only for humans.
+
+Arguments:
+ str pointer to the start of the day of the week
+
+Returns: points after the parsed day or NULL on error
+*/
+
+static const uschar *
+parse_day_of_week(const uschar * str)
+{
+/*
+day-of-week = ([FWS] day-name) / obs-day-of-week
+
+day-name = "Mon" / "Tue" / "Wed" / "Thu" /
+ "Fri" / "Sat" / "Sun"
+
+obs-day-of-week = [CFWS] day-name [CFWS]
+*/
+
+static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" };
+int i;
+uschar day[4];
+
+str = skip_comment(str);
+for (i = 0; i < 3; ++i)
+ {
+ if ((day[i] = tolower(*str)) == '\0') return NULL;
+ ++str;
+ }
+day[3] = '\0';
+for (i = 0; i<7; ++i) if (Ustrcmp(day,day_name[i]) == 0) break;
+if (i == 7) return NULL;
+return skip_comment(str);
+}
+
+
+/*************************************************
+* Parse a RFC 2822 date *
+*************************************************/
+
+/* Parse the date part of a RFC 2822 date-time, extracting the
+ day, month and year.
+
+Arguments:
+ str pointer to the start of the date
+ d pointer to the resulting day
+ m pointer to the resulting month
+ y pointer to the resulting year
+
+Returns: points after the processed date or NULL on error
+*/
+
+static const uschar *
+parse_date(const uschar *str, int *d, int *m, int *y)
+{
+/*
+date = day month year
+
+year = 4*DIGIT / obs-year
+
+obs-year = [CFWS] 2*DIGIT [CFWS]
+
+month = (FWS month-name FWS) / obs-month
+
+month-name = "Jan" / "Feb" / "Mar" / "Apr" /
+ "May" / "Jun" / "Jul" / "Aug" /
+ "Sep" / "Oct" / "Nov" / "Dec"
+
+obs-month = CFWS month-name CFWS
+
+day = ([FWS] 1*2DIGIT) / obs-day
+
+obs-day = [CFWS] 1*2DIGIT [CFWS]
+*/
+
+const uschar * s, * n;
+static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" };
+int i;
+uschar month[4];
+
+str = skip_comment(str);
+if ((str = parse_number(str,d,1)) == NULL) return NULL;
+
+if (*str>='0' && *str<='9') *d = 10*(*d)+(*str++-'0');
+s = skip_comment(str);
+if (s == str) return NULL;
+str = s;
+
+for (i = 0; i<3; ++i) if ((month[i]=tolower(*(str+i))) == '\0') return NULL;
+month[3] = '\0';
+for (i = 0; i<12; ++i) if (Ustrcmp(month,month_name[i]) == 0) break;
+if (i == 12) return NULL;
+str+=3;
+*m = i;
+s = skip_comment(str);
+if (s == str) return NULL;
+str=s;
+
+if ((n = parse_number(str,y,4)))
+ {
+ str = n;
+ if (*y<1900) return NULL;
+ *y = *y-1900;
+ }
+else if ((n = parse_number(str,y,2)))
+ {
+ str = skip_comment(n);
+ while (*(str-1) == ' ' || *(str-1) == '\t') --str; /* match last FWS later */
+ if (*y<50) *y+=100;
+ }
+else return NULL;
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 Time *
+*************************************************/
+
+/* Parse the time part of a RFC 2822 date-time, extracting the
+ hour, minute, second and timezone.
+
+Arguments:
+ str pointer to the start of the time
+ h pointer to the resulting hour
+ m pointer to the resulting minute
+ s pointer to the resulting second
+ z pointer to the resulting timezone (offset in seconds)
+
+Returns: points after the processed time or NULL on error
+*/
+
+static const uschar *
+parse_time(const uschar *str, int *h, int *m, int *s, int *z)
+{
+/*
+time = time-of-day FWS zone
+
+time-of-day = hour ":" minute [ ":" second ]
+
+hour = 2DIGIT / obs-hour
+
+obs-hour = [CFWS] 2DIGIT [CFWS]
+
+minute = 2DIGIT / obs-minute
+
+obs-minute = [CFWS] 2DIGIT [CFWS]
+
+second = 2DIGIT / obs-second
+
+obs-second = [CFWS] 2DIGIT [CFWS]
+
+zone = (( "+" / "-" ) 4DIGIT) / obs-zone
+
+obs-zone = "UT" / "GMT" / ; Universal Time
+ ; North American UT
+ ; offsets
+ "EST" / "EDT" / ; Eastern: - 5/ - 4
+ "CST" / "CDT" / ; Central: - 6/ - 5
+ "MST" / "MDT" / ; Mountain: - 7/ - 6
+ "PST" / "PDT" / ; Pacific: - 8/ - 7
+
+ %d65-73 / ; Military zones - "A"
+ %d75-90 / ; through "I" and "K"
+ %d97-105 / ; through "Z", both
+ %d107-122 ; upper and lower case
+*/
+
+const uschar * c;
+
+str = skip_comment(str);
+if ((str = parse_number(str,h,2)) == NULL) return NULL;
+str = skip_comment(str);
+if (*str!=':') return NULL;
+++str;
+str = skip_comment(str);
+if ((str = parse_number(str,m,2)) == NULL) return NULL;
+c = skip_comment(str);
+if (*str == ':')
+ {
+ ++str;
+ str = skip_comment(str);
+ if ((str = parse_number(str,s,2)) == NULL) return NULL;
+ c = skip_comment(str);
+ }
+if (c == str) return NULL;
+else str=c;
+if (*str == '+' || *str == '-')
+ {
+ int neg;
+
+ neg = (*str == '-');
+ ++str;
+ if ((str = parse_number(str,z,4)) == NULL) return NULL;
+ *z = (*z/100)*3600+(*z%100)*60;
+ if (neg) *z = -*z;
+ }
+else
+ {
+ char zone[5];
+ struct { const char *name; int off; } zone_name[10] =
+ { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}};
+ int i,j;
+
+ for (i = 0; i<4; ++i)
+ {
+ zone[i] = tolower(*(str+i));
+ if (zone[i]<'a' || zone[i]>'z') break;
+ }
+ zone[i] = '\0';
+ for (j = 0; j<10 && strcmp(zone,zone_name[j].name); ++j);
+ /* Besides zones named in the grammar, RFC 2822 says other alphabetic */
+ /* time zones should be treated as unknown offsets. */
+ if (j<10)
+ {
+ *z = zone_name[j].off*3600;
+ str+=i;
+ }
+ else if (zone[0]<'a' || zone[1]>'z') return 0;
+ else
+ {
+ while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str;
+ *z = 0;
+ }
+ }
+return str;
+}
+
+
+/*************************************************
+* Parse a RFC 2822 date-time *
+*************************************************/
+
+/* Parse a RFC 2822 date-time and return it in seconds since the epoch.
+
+Arguments:
+ str pointer to the start of the date-time
+ t pointer to the parsed time
+
+Returns: points after the processed date-time or NULL on error
+*/
+
+const uschar *
+parse_date_time(const uschar *str, time_t *t)
+{
+/*
+date-time = [ day-of-week "," ] date FWS time [CFWS]
+*/
+
+struct tm tm;
+int zone;
+extern char **environ;
+char **old_environ;
+static char gmt0[]="TZ=GMT0";
+static char *gmt_env[]={ gmt0, (char*)0 };
+const uschar * try;
+
+if ((try = parse_day_of_week(str)))
+ {
+ str = try;
+ if (*str!=',') return 0;
+ ++str;
+ }
+if ((str = parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year)) == NULL) return NULL;
+if (*str!=' ' && *str!='\t') return NULL;
+while (*str == ' ' || *str == '\t') ++str;
+if ((str = parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone)) == NULL) return NULL;
+tm.tm_isdst = 0;
+old_environ = environ;
+environ = gmt_env;
+*t = mktime(&tm);
+environ = old_environ;
+if (*t == -1) return NULL;
+*t-=zone;
+return skip_comment(str);
+}
+
+
+
+
/*************************************************
**************************************************
* Stand-alone test program *
{
int start, end, domain;
uschar buffer[1024];
-uschar outbuff[1024];
+store_init();
big_buffer = store_malloc(big_buffer_size);
/* strip_trailing_dot = TRUE; */
{
buffer[Ustrlen(buffer)-1] = 0;
if (buffer[0] == 0) break;
- printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer), outbuff,
- sizeof(outbuff)));
+ printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer)));
}
printf("Testing parse_extract_address without group syntax and without UTF-8\n");
buffer[Ustrlen(buffer) - 1] = 0;
if (buffer[0] == 0) break;
out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
- if (out == NULL) printf("*** bad address: %s\n", errmess); else
+ if (!out)
+ printf("*** bad address: %s\n", errmess);
+ else
{
uschar extract[1024];
Ustrncpy(extract, buffer+start, end-start);
buffer[Ustrlen(buffer) - 1] = 0;
if (buffer[0] == 0) break;
out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
- if (out == NULL) printf("*** bad address: %s\n", errmess); else
+ if (!out)
+ printf("*** bad address: %s\n", errmess);
+ else
{
uschar extract[1024];
Ustrncpy(extract, buffer+start, end-start);
printf("Testing parse_extract_address with group syntax\n");
-parse_allow_group = TRUE;
+f.parse_allow_group = TRUE;
while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
{
uschar *out;
buffer[Ustrlen(buffer) - 1] = 0;
if (buffer[0] == 0) break;
s = buffer;
- while (*s != 0)
+ while (*s)
{
uschar *ss = parse_find_address_end(s, FALSE);
int terminator = *ss;
out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
*ss = terminator;
- if (out == NULL) printf("*** bad address: %s\n", errmess); else
+ if (!out)
+ printf("*** bad address: %s\n", errmess);
+ else
{
uschar extract[1024];
Ustrncpy(extract, buffer+start, end-start);
}
s = ss + (terminator? 1:0);
- while (isspace(*s)) s++;
+ Uskip_whitespace(&s);
}
}
else printf("Failed: %d %s\n", extracted, errmess);
}
+printf("Testing parse_message_id\n");
+
+while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
+ {
+ uschar *s, *t, *errmess;
+ buffer[Ustrlen(buffer) - 1] = 0;
+ if (buffer[0] == 0) break;
+ s = buffer;
+ while (*s != 0)
+ {
+ s = parse_message_id(s, &t, &errmess);
+ if (errmess != NULL)
+ {
+ printf("Failed: %s\n", errmess);
+ break;
+ }
+ printf("%s\n", t);
+ }
+ }
+
return 0;
}