-/* $Cambridge: exim/src/src/parse.c,v 1.1 2004/10/07 10:39:01 ph10 Exp $ */
+/* $Cambridge: exim/src/src/parse.c,v 1.11 2007/01/08 10:50:18 ph10 Exp $ */
/*************************************************
* Exim - an Internet mail transport agent *
*************************************************/
-/* Copyright (c) University of Cambridge 1995 - 2004 */
+/* Copyright (c) University of Cambridge 1995 - 2007 */
/* See the file NOTICE for conditions of use and distribution. */
/* Functions for parsing addresses */
any character except [ ] \, including linear white space, and may contain
quoted characters. However, RFC 821 restricts literals to being dot-separated
3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence
-of digits and dots (hex digits and colons for IPv6) here; later this will be
-checked for being a syntactically valid IP address if it ever gets to a router.
+of digits, dots, hex digits, and colons here; later this will be checked for
+being a syntactically valid IP address if it ever gets to a router.
-If IPv6 is supported, allow both the formal form, with IPV6: at the start, and
-the informal form without it, and accept IPV4: as well, 'cause someone will use
-it sooner or later. */
+Allow both the formal IPv6 form, with IPV6: at the start, and the informal form
+without it, and accept IPV4: as well, 'cause someone will use it sooner or
+later. */
if (*s == '[')
{
*t++ = *s++;
- #if HAVE_IPV6
if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0)
{
memcpy(t, s, 5);
}
while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
- #else
- while (*s == '.' || isdigit(*s)) *t++ = *s++;
- #endif
-
if (*s == ']') *t++ = *s++; else
{
*errorptr = US"malformed domain literal";
TRUE and parse_found_group is FALSE when this function is called, an address
which is the start of a group (i.e. preceded by a phrase and a colon) is
recognized; the phrase is ignored and the flag parse_found_group is set. If
-this flag is TRUE at the end of an address, then if an extraneous semicolon is
-found, it is ignored and the flag is cleared. This logic is used only when
-scanning through addresses in headers, either to fulfil the -t option or for
-rewriting or checking header syntax.
+this flag is TRUE at the end of an address, and if an extraneous semicolon is
+found, it is ignored and the flag is cleared.
+
+This logic is used only when scanning through addresses in headers, either to
+fulfil the -t option, or for rewriting, or for checking header syntax. Because
+the group "state" has to be remembered between multiple calls of this function,
+the variables parse_{allow,found}_group are global. It is important to ensure
+that they are reset to FALSE at the end of scanning a header's list of
+addresses.
Arguments:
mailbox points to the RFC822 mailbox
original string, unmodified. If a quoted string is too long for the buffer, it
is truncated. (This shouldn't happen: this is normally handling short strings.)
+Hmmph. As always, things get perverted for other uses. This function was
+originally for the "phrase" part of addresses. Now it is being used for much
+longer texts in ACLs and via the ${rfc2047: expansion item. This means we have
+to check for overlong "encoded-word"s and split them. November 2004.
+
Arguments:
string the string to quote - already checked to contain non-printing
chars
charset the name of the character set; NULL => iso-8859-1
buffer the buffer to put the answer in
buffer_size the size of the buffer
+ fold if TRUE, a newline is inserted before the separating space when
+ more than one encoded-word is generated
Returns: pointer to the original string, if no quoting needed, or
pointer to buffer containing the quoted string, or
uschar *
parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer,
- int buffer_size)
+ int buffer_size, BOOL fold)
{
uschar *s = string;
-uschar *t;
+uschar *p, *t;
+int hlen;
BOOL coded = FALSE;
if (charset == NULL) charset = US"iso-8859-1";
if (!string_format(buffer, buffer_size, "=?%s?Q?", charset))
return US"String too long";
-t = buffer + Ustrlen(buffer);
+hlen = Ustrlen(buffer);
+t = buffer + hlen;
+p = buffer;
+
for (; len > 0; len--)
{
int ch = *s++;
- if (t > buffer + buffer_size - 8) break;
+ if (t > buffer + buffer_size - hlen - 8) break;
+
+ if (t - p > 70)
+ {
+ *t++ = '?';
+ *t++ = '=';
+ if (fold) *t++ = '\n';
+ *t++ = ' ';
+ p = t;
+ Ustrncpy(p, buffer, hlen);
+ t += hlen;
+ }
+
if (ch < 33 || ch > 126 ||
Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
{
}
else *t++ = ch;
}
-sprintf(CS t, "?=");
+
+*t++ = '?';
+*t++ = '=';
+*t = 0;
+
return coded? buffer : string;
}
if ((*s < 32 && *s != '\t') || *s > 126) break;
if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
- buffer_size);
+ buffer_size, FALSE);
/* No non-printers; use the RFC 822 quoting rules */
{
*error = string_sprintf("failed to stat included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_INCLUDEFAIL;
}
{
*error = string_sprintf("error while reading included file %s: %s",
filename, strerror(errno));
- fclose(f);
+ (void)fclose(f);
return FF_ERROR;
}
filebuf[statbuf.st_size] = 0;
- fclose(f);
+ (void)fclose(f);
addr = NULL;
frc = parse_forward_list(filebuf, options, &addr,
}
+
+/*************************************************
+* Extract a Message-ID *
+*************************************************/
+
+/* This function is used to extract message ids from In-Reply-To: and
+References: header lines.
+
+Arguments:
+ str pointer to the start of the message-id
+ yield put pointer to the message id (in dynamic memory) here
+ error put error message here on failure
+
+Returns: points after the processed message-id or NULL on error
+*/
+
+uschar *
+parse_message_id(uschar *str, uschar **yield, uschar **error)
+{
+uschar *domain = NULL;
+uschar *id;
+
+str = skip_comment(str);
+if (*str != '<')
+ {
+ *error = US"Missing '<' before message-id";
+ return NULL;
+ }
+
+/* Getting a block the size of the input string will definitely be sufficient
+for the answer, but it may also be very long if we are processing a header
+line. Therefore, take care to release unwanted store afterwards. */
+
+id = *yield = store_get(Ustrlen(str) + 1);
+*id++ = *str++;
+
+str = read_addr_spec(str, id, '>', error, &domain);
+
+if (*error == NULL)
+ {
+ if (*str != '>') *error = US"Missing '>' after message-id";
+ else if (domain == NULL) *error = US"domain missing in message-id";
+ }
+
+if (*error != NULL)
+ {
+ store_reset(*yield);
+ return NULL;
+ }
+
+while (*id != 0) id++;
+*id++ = *str++;
+*id++ = 0;
+store_reset(id);
+
+str = skip_comment(str);
+return str;
+}
+
+
+
+
/*************************************************
**************************************************
* Stand-alone test program *
else printf("Failed: %d %s\n", extracted, errmess);
}
+printf("Testing parse_message_id\n");
+
+while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
+ {
+ uschar *s, *t, *errmess;
+ buffer[Ustrlen(buffer) - 1] = 0;
+ if (buffer[0] == 0) break;
+ s = buffer;
+ while (*s != 0)
+ {
+ s = parse_message_id(s, &t, &errmess);
+ if (errmess != NULL)
+ {
+ printf("Failed: %s\n", errmess);
+ break;
+ }
+ printf("%s\n", t);
+ }
+ }
+
return 0;
}