src/src/parse.c

   1 /*************************************************
   2 *     Exim - an Internet mail transport agent    *
   3 *************************************************/
   4
   5 /* Copyright (c) The Exim Maintainers 2020 - 2022 */
   6 /* Copyright (c) University of Cambridge 1995 - 2018 */
   7 /* See the file NOTICE for conditions of use and distribution. */
   8
   9 /* Functions for parsing addresses */
  10
  11
  12 #include "exim.h"
  13
  14
  15 static const uschar *last_comment_position;
  16
  17
  18
  19 /* In stand-alone mode, provide a replacement for deliver_make_addr()
  20 and rewrite_address[_qualify]() so as to avoid having to drag in too much
  21 redundant apparatus. */
  22
  23 #ifdef STAND_ALONE
  24
  25 address_item *
  26 deliver_make_addr(uschar *address, BOOL copy)
  27 {
  28 address_item *addr = store_get(sizeof(address_item), GET_UNTAINTED);
  29 addr->next = NULL;
  30 addr->parent = NULL;
  31 addr->address = address;
  32 return addr;
  33 }
  34
  35 uschar *
  36 rewrite_address(uschar *recipient, BOOL dummy1, BOOL dummy2, rewrite_rule
  37   *dummy3, int dummy4)
  38 {
  39 return recipient;
  40 }
  41
  42 uschar *
  43 rewrite_address_qualify(uschar *recipient, BOOL dummy1)
  44 {
  45 return recipient;
  46 }
  47
  48 #endif
  49
  50
  51
  52
  53 /*************************************************
  54 *             Find the end of an address         *
  55 *************************************************/
  56
  57 /* Scan over a string looking for the termination of an address at a comma,
  58 or end of the string. It's the source-routed addresses which cause much pain
  59 here. Although Exim ignores source routes, it must recognize such addresses, so
  60 we cannot get rid of this logic.
  61
  62 Argument:
  63   s        pointer to the start of an address
  64   nl_ends  if TRUE, '\n' terminates an address
  65
  66 Returns:   pointer past the end of the address
  67            (i.e. points to null or comma)
  68 */
  69
  70 uschar *
  71 parse_find_address_end(const uschar *s, BOOL nl_ends)
  72 {
  73 BOOL source_routing = *s == '@';
  74 int no_term = source_routing? 1 : 0;
  75
  76 while (*s != 0 && (*s != ',' || no_term > 0) && (*s != '\n' || !nl_ends))
  77   {
  78   /* Skip single quoted characters. Strictly these should not occur outside
  79   quoted strings in RFC 822 addresses, but they can in RFC 821 addresses. Pity
  80   about the lack of consistency, isn't it? */
  81
  82   if (*s == '\\' && s[1] != 0) s += 2;
  83
  84   /* Skip quoted items that are not inside brackets. Note that
  85   quoted pairs are allowed inside quoted strings. */
  86
  87   else if (*s == '\"')
  88     {
  89     while (*(++s) != 0 && (*s != '\n' || !nl_ends))
  90       {
  91       if (*s == '\\' && s[1] != 0) s++;
  92         else if (*s == '\"') { s++; break; }
  93       }
  94     }
  95
  96   /* Skip comments, which may include nested brackets, but quotes
  97   are not recognized inside comments, though quoted pairs are. */
  98
  99   else if (*s == '(')
 100     {
 101     int level = 1;
 102     while (*(++s) != 0 && (*s != '\n' || !nl_ends))
 103       {
 104       if (*s == '\\' && s[1] != 0) s++;
 105         else if (*s == '(') level++;
 106           else if (*s == ')' && --level <= 0) { s++; break; }
 107       }
 108     }
 109
 110   /* Non-special character; just advance. Passing the colon in a source
 111   routed address means that any subsequent comma or colon may terminate unless
 112   inside angle brackets. */
 113
 114   else
 115     {
 116     if (*s == '<')
 117       {
 118       source_routing = s[1] == '@';
 119       no_term = source_routing? 2 : 1;
 120       }
 121     else if (*s == '>') no_term--;
 122     else if (source_routing && *s == ':') no_term--;
 123     s++;
 124     }
 125   }
 126
 127 return US s;
 128 }
 129
 130
 131
 132 /*************************************************
 133 *            Find last @ in an address           *
 134 *************************************************/
 135
 136 /* This function is used when we have something that may not qualified. If we
 137 know it's qualified, searching for the rightmost '@' is sufficient. Here we
 138 have to be a bit more clever than just a plain search, in order to handle
 139 unqualified local parts like "thing@thong" correctly. Since quotes may not
 140 legally be part of a domain name, we can give up on hitting the first quote
 141 when searching from the right. Now that the parsing also permits the RFC 821
 142 form of address, where quoted-pairs are allowed in unquoted local parts, we
 143 must take care to handle that too.
 144
 145 Argument:  pointer to an address, possibly unqualified
 146 Returns:   pointer to the last @ in an address, or NULL if none
 147 */
 148
 149 const uschar *
 150 parse_find_at(const uschar *s)
 151 {
 152 const uschar * t = s + Ustrlen(s);
 153 while (--t >= s)
 154   if (*t == '@')
 155     {
 156     int backslash_count = 0;
 157     const uschar *tt = t - 1;
 158     while (tt > s && *tt-- == '\\') backslash_count++;
 159     if ((backslash_count & 1) == 0) return t;
 160     }
 161   else if (*t == '\"')
 162     return NULL;
 163
 164 return NULL;
 165 }
 166
 167
 168
 169
 170 /***************************************************************************
 171 * In all the functions below that read a particular object type from       *
 172 * the input, return the new value of the pointer s (the first argument),   *
 173 * and put the object into the store pointed to by t (the second argument), *
 174 * adding a terminating zero. If no object is found, t will point to zero   *
 175 * on return.                                                               *
 176 ***************************************************************************/
 177
 178
 179 /*************************************************
 180 *          Skip white space and comment          *
 181 *************************************************/
 182
 183 /* Algorithm:
 184   (1) Skip spaces.
 185   (2) If uschar not '(', return.
 186   (3) Skip till matching ')', not counting any characters
 187       escaped with '\'.
 188   (4) Move past ')' and goto (1).
 189
 190 The start of the last potential comment position is remembered to
 191 make it possible to ignore comments at the end of compound items.
 192
 193 Argument: current character pointer
 194 Returns:  new character pointer
 195 */
 196
 197 static const uschar *
 198 skip_comment(const uschar *s)
 199 {
 200 last_comment_position = s;
 201 while (*s)
 202   {
 203   int c, level;
 204
 205   if (Uskip_whitespace(&s) != '(') break;
 206   level = 1;
 207   while((c = *(++s)))
 208     {
 209     if (c == '(') level++;
 210     else if (c == ')') { if (--level <= 0) { s++; break; } }
 211     else if (c == '\\' && s[1] != 0) s++;
 212     }
 213   }
 214 return s;
 215 }
 216
 217
 218
 219 /*************************************************
 220 *             Read a domain                      *
 221 *************************************************/
 222
 223 /* A domain is a sequence of subdomains, separated by dots. See comments below
 224 for detailed syntax of the subdomains.
 225
 226 If allow_domain_literals is TRUE, a "domain" may also be an IP address enclosed
 227 in []. Make sure the output is set to the null string if there is a syntax
 228 error as well as if there is no domain at all.
 229
 230 Optionally, msg_id domain literals ( printable-ascii enclosed in [] )
 231 are permitted.
 232
 233 Arguments:
 234   s          current character pointer
 235   t          where to put the domain
 236   msg_id_literals     flag for relaxed domain-literal processing
 237   errorptr   put error message here on failure (*t will be 0 on exit)
 238
 239 Returns:     new character pointer
 240 */
 241
 242 static const uschar *
 243 read_domain(const uschar *s, uschar *t, BOOL msg_id_literals, uschar **errorptr)
 244 {
 245 uschar *tt = t;
 246 s = skip_comment(s);
 247
 248 /* Handle domain literals if permitted. An RFC 822 domain literal may contain
 249 any character except [ ] \, including linear white space, and may contain
 250 quoted characters. However, RFC 821 restricts literals to being dot-separated
 251 3-digit numbers, and we make the obvious extension for IPv6. Go for a sequence
 252 of digits, dots, hex digits, and colons here; later this will be checked for
 253 being a syntactically valid IP address if it ever gets to a router.
 254
 255 Allow both the formal IPv6 form, with IPV6: at the start, and the informal form
 256 without it, and accept IPV4: as well, 'cause someone will use it sooner or
 257 later. */
 258
 259 if (*s == '[')
 260   {
 261   *t++ = *s++;
 262
 263   if (strncmpic(s, US"IPv6:", 5) == 0 || strncmpic(s, US"IPv4:", 5) == 0)
 264     {
 265     memcpy(t, s, 5);
 266     t += 5;
 267     s += 5;
 268     }
 269
 270   if (msg_id_literals)
 271     while (*s >= 33 && *s <= 90 || *s >= 94 && *s <= 126) *t++ = *s++;
 272   else
 273     while (*s == '.' || *s == ':' || isxdigit(*s)) *t++ = *s++;
 274
 275   if (*s == ']') *t++ = *s++; else
 276     {
 277     *errorptr = US"malformed domain literal";
 278     *tt = 0;
 279     }
 280
 281   if (!allow_domain_literals && !msg_id_literals)
 282     {
 283     *errorptr = US"domain literals not allowed";
 284     *tt = 0;
 285     }
 286   *t = 0;
 287   return skip_comment(s);
 288   }
 289
 290 /* Handle a proper domain, which is a sequence of dot-separated atoms. Remove
 291 trailing dots if strip_trailing_dot is set. A subdomain is an atom.
 292
 293 An atom is a sequence of any characters except specials, space, and controls.
 294 The specials are ( ) < > @ , ; : \ " . [ and ]. This is the rule for RFC 822
 295 and its successor (RFC 2822). However, RFC 821 and its successor (RFC 2821) is
 296 tighter, allowing only letters, digits, and hyphens, not starting with a
 297 hyphen.
 298
 299 There used to be a global flag that got set when checking addresses that came
 300 in over SMTP and which should therefore should be checked according to the
 301 stricter rule. However, it seems silly to make the distinction, because I don't
 302 suppose anybody ever uses local domains that are 822-compliant and not
 303 821-compliant. Furthermore, Exim now has additional data on the spool file line
 304 after an address (after "one_time" processing), and it makes use of a #
 305 character to delimit it. When I wrote that code, I forgot about this 822-domain
 306 stuff, and assumed # could never appear in a domain.
 307
 308 So the old code is now cut out for Release 4.11 onwards, on 09-Aug-02. In a few
 309 years, when we are sure this isn't actually causing trouble, throw it away.
 310
 311 March 2003: the story continues: There is a camp that is arguing for the use of
 312 UTF-8 in domain names as the way to internationalization, and other MTAs
 313 support this. Therefore, we now have a flag that permits the use of characters
 314 with values greater than 127, encoded in UTF-8, in subdomains, so that Exim can
 315 be used experimentally in this way. */
 316
 317 for (;;)
 318   {
 319   uschar *tsave = t;
 320
 321 /*********************
 322   if (rfc821_domains)
 323     {
 324     if (*s != '-') while (isalnum(*s) || *s == '-') *t++ = *s++;
 325     }
 326   else
 327     while (!mac_iscntrl_or_special(*s)) *t++ = *s++;
 328 *********************/
 329
 330   if (*s != '-')
 331     {
 332     /* Only letters, digits, and hyphens */
 333
 334     if (!allow_utf8_domains)
 335       {
 336       while (isalnum(*s) || *s == '-') *t++ = *s++;
 337       }
 338
 339     /* Permit legal UTF-8 characters to be included */
 340
 341     else for(;;)
 342       {
 343       int i, d;
 344       if (isalnum(*s) || *s == '-')    /* legal ascii characters */
 345         {
 346         *t++ = *s++;
 347         continue;
 348         }
 349       if ((*s & 0xc0) != 0xc0) break;  /* not start of UTF-8 character */
 350       d = *s << 2;
 351       for (i = 1; i < 6; i++)          /* i is the number of additional bytes */
 352         {
 353         if ((d & 0x80) == 0) break;
 354         d <<= 1;
 355         }
 356       if (i == 6) goto BAD_UTF8;       /* invalid UTF-8 */
 357       *t++ = *s++;                     /* leading UTF-8 byte */
 358       while (i-- > 0)                  /* copy and check remainder */
 359         {
 360         if ((*s & 0xc0) != 0x80)
 361           {
 362           BAD_UTF8:
 363           *errorptr = US"invalid UTF-8 byte sequence";
 364           *tt = 0;
 365           return s;
 366           }
 367         *t++ = *s++;
 368         }
 369       }    /* End of loop for UTF-8 character */
 370     }      /* End of subdomain */
 371
 372   s = skip_comment(s);
 373   *t = 0;
 374
 375   if (t == tsave)   /* empty component */
 376     {
 377     if (strip_trailing_dot && t > tt && *s != '.') t[-1] = 0; else
 378       {
 379       *errorptr = US"domain missing or malformed";
 380       *tt = 0;
 381       }
 382     return s;
 383     }
 384
 385   if (*s != '.') break;
 386   *t++ = *s++;
 387   s = skip_comment(s);
 388   }
 389
 390 return s;
 391 }
 392
 393
 394
 395 /*************************************************
 396 *            Read a local-part                   *
 397 *************************************************/
 398
 399 /* A local-part is a sequence of words, separated by periods. A null word
 400 between dots is not strictly allowed but apparently many mailers permit it,
 401 so, sigh, better be compatible. Even accept a trailing dot...
 402
 403 A <word> is either a quoted string, or an <atom>, which is a sequence
 404 of any characters except specials, space, and controls. The specials are
 405 ( ) < > @ , ; : \ " . [ and ]. In RFC 822, a single quoted character, (a
 406 quoted-pair) is not allowed in a word. However, in RFC 821, it is permitted in
 407 the local part of an address. Rather than have separate parsing functions for
 408 the different cases, take the liberal attitude always. At least one MUA is
 409 happy to recognize this case; I don't know how many other programs do.
 410
 411 Arguments:
 412   s           current character pointer
 413   t           where to put the local part
 414   error       where to point error text
 415   allow_null  TRUE if an empty local part is not an error
 416
 417 Returns:   new character pointer
 418 */
 419
 420 static const uschar *
 421 read_local_part(const uschar *s, uschar *t, uschar **error, BOOL allow_null)
 422 {
 423 uschar *tt = t;
 424 *error = NULL;
 425 for (;;)
 426   {
 427   int c;
 428   uschar *tsave = t;
 429   s = skip_comment(s);
 430
 431   /* Handle a quoted string */
 432
 433   if (*s == '\"')
 434     {
 435     *t++ = '\"';
 436     while ((c = *++s) && c != '\"')
 437       {
 438       *t++ = c;
 439       if (c == '\\' && s[1]) *t++ = *++s;
 440       }
 441     if (c == '\"')
 442       {
 443       s++;
 444       *t++ = '\"';
 445       }
 446     else
 447       {
 448       *error = US"unmatched doublequote in local part";
 449       return s;
 450       }
 451     }
 452
 453   /* Handle an atom, but allow quoted pairs within it. */
 454
 455   else while (!mac_iscntrl_or_special(*s) || *s == '\\')
 456     {
 457     c = *t++ = *s++;
 458     if (c == '\\' && *s) *t++ = *s++;
 459     }
 460
 461   /* Terminate the word and skip subsequent comment */
 462
 463   *t = 0;
 464   s = skip_comment(s);
 465
 466   /* If we have read a null component at this point, give an error unless it is
 467   terminated by a dot - an extension to RFC 822 - or if it is the first
 468   component of the local part and an empty local part is permitted, in which
 469   case just return normally. */
 470
 471   if (t == tsave && *s != '.')
 472     {
 473     if (t == tt && !allow_null)
 474       *error = US"missing or malformed local part";
 475     return s;
 476     }
 477
 478   /* Anything other than a dot terminates the local part. Treat multiple dots
 479   as a single dot, as this seems to be a common extension. */
 480
 481   if (*s != '.') break;
 482   do { *t++ = *s++; } while (*s == '.');
 483   }
 484
 485 return s;
 486 }
 487
 488
 489 /*************************************************
 490 *            Read route part of route-addr       *
 491 *************************************************/
 492
 493 /* The pointer is at the initial "@" on entry. Return it following the
 494 terminating colon. Exim no longer supports the use of source routes, but it is
 495 required to accept the syntax.
 496
 497 Arguments:
 498   s          current character pointer
 499   t          where to put the route
 500   errorptr   where to put an error message
 501
 502 Returns:     new character pointer
 503 */
 504
 505 static const uschar *
 506 read_route(const uschar *s, uschar *t, uschar **errorptr)
 507 {
 508 BOOL commas = FALSE;
 509 *errorptr = NULL;
 510
 511 while (*s == '@')
 512   {
 513   *t++ = '@';
 514   s = read_domain(s+1, t, FALSE, errorptr);
 515   if (*t == 0) return s;
 516   t += Ustrlen((const uschar *)t);
 517   if (*s != ',') break;
 518   *t++ = *s++;
 519   commas = TRUE;
 520   s = skip_comment(s);
 521   }
 522
 523 if (*s == ':') *t++ = *s++;
 524
 525 /* If there is no colon, and there were no commas, the most likely error
 526 is in fact a missing local part in the address rather than a missing colon
 527 after the route. */
 528
 529 else *errorptr = commas?
 530   US"colon expected after route list" :
 531   US"no local part";
 532
 533 /* Terminate the route and return */
 534
 535 *t = 0;
 536 return skip_comment(s);
 537 }
 538
 539
 540
 541 /*************************************************
 542 *                Read addr-spec                  *
 543 *************************************************/
 544
 545 /* Addr-spec is local-part@domain. We make the domain optional -
 546 the expected terminator for the whole thing is passed to check this.
 547 This function is called only when we know we have a route-addr.
 548
 549 Arguments:
 550   s          current character pointer
 551   t          where to put the addr-spec
 552   term       expected terminator (0 or >)
 553   errorptr   where to put an error message
 554   domainptr  set to point to the start of the domain
 555
 556 Returns:     new character pointer
 557 */
 558
 559 static const uschar *
 560 read_addr_spec(const uschar *s, uschar *t, int term, uschar **errorptr,
 561   uschar **domainptr)
 562 {
 563 s = read_local_part(s, t, errorptr, FALSE);
 564 if (*errorptr == NULL)
 565   if (*s != term)
 566     if (*s != '@')
 567       *errorptr = string_sprintf("\"@\" or \".\" expected after \"%s\"", t);
 568     else
 569       {
 570       t += Ustrlen((const uschar *)t);
 571       *t++ = *s++;
 572       *domainptr = t;
 573       s = read_domain(s, t, FALSE, errorptr);
 574       }
 575 return s;
 576 }
 577
 578
 579
 580 /*************************************************
 581 *         Extract operative address              *
 582 *************************************************/
 583
 584 /* This function extracts an operative address from a full RFC822 mailbox and
 585 returns it in a piece of dynamic store. We take the easy way and get a piece
 586 of store the same size as the input, and then copy into it whatever is
 587 necessary. If we cannot find a valid address (syntax error), return NULL, and
 588 point the error pointer to the reason. The arguments "start" and "end" are used
 589 to return the offsets of the first and one past the last characters in the
 590 original mailbox of the address that has been extracted, to aid in re-writing.
 591 The argument "domain" is set to point to the first character after "@" in the
 592 final part of the returned address, or zero if there is no @.
 593
 594 Exim no longer supports the use of source routed addresses (those of the form
 595 @domain,...:route_addr). It recognizes the syntax, but collapses such addresses
 596 down to their final components. Formerly, collapse_source_routes had to be set
 597 to achieve this effect. RFC 1123 allows collapsing with MAY, while the revision
 598 of RFC 821 had increased this to SHOULD, so I've gone for it, because it makes
 599 a lot of code elsewhere in Exim much simpler.
 600
 601 There are some special fudges here for handling RFC 822 group address notation
 602 which may appear in certain headers. If the flag parse_allow_group is set
 603 TRUE and parse_found_group is FALSE when this function is called, an address
 604 which is the start of a group (i.e. preceded by a phrase and a colon) is
 605 recognized; the phrase is ignored and the flag parse_found_group is set. If
 606 this flag is TRUE at the end of an address, and if an extraneous semicolon is
 607 found, it is ignored and the flag is cleared.
 608
 609 This logic is used only when scanning through addresses in headers, either to
 610 fulfil the -t option, or for rewriting, or for checking header syntax. Because
 611 the group "state" has to be remembered between multiple calls of this function,
 612 the variables parse_{allow,found}_group are global. It is important to ensure
 613 that they are reset to FALSE at the end of scanning a header's list of
 614 addresses.
 615
 616 Arguments:
 617   mailbox     points to the RFC822 mailbox
 618   errorptr    where to point an error message
 619   start       set to start offset in mailbox
 620   end         set to end offset in mailbox
 621   domain      set to domain offset in result, or 0 if no domain present
 622   allow_null  allow <> if TRUE
 623
 624 Returns:      points to the extracted address, or NULL on error
 625 */
 626
 627 #define FAILED(s) { *errorptr = s; goto PARSE_FAILED; }
 628
 629 uschar *
 630 parse_extract_address(const uschar *mailbox, uschar **errorptr, int *start, int *end,
 631   int *domain, BOOL allow_null)
 632 {
 633 uschar * yield = store_get(Ustrlen(mailbox) + 1, mailbox);
 634 const uschar *startptr, *endptr;
 635 const uschar *s = US mailbox;
 636 uschar *t = US yield;
 637
 638 *domain = 0;
 639
 640 /* At the start of the string we expect either an addr-spec or a phrase
 641 preceding a <route-addr>. If groups are allowed, we might also find a phrase
 642 preceding a colon and an address. If we find an initial word followed by
 643 a dot, strict interpretation of the RFC would cause it to be taken
 644 as the start of an addr-spec. However, many mailers break the rules
 645 and use addresses of the form "a.n.other <ano@somewhere>" and so we
 646 allow this case. */
 647
 648 RESTART:   /* Come back here after passing a group name */
 649
 650 s = skip_comment(s);
 651 startptr = s;                                 /* In case addr-spec */
 652 s = read_local_part(s, t, errorptr, TRUE);    /* Dot separated words */
 653 if (*errorptr) goto PARSE_FAILED;
 654
 655 /* If the terminator is neither < nor @ then the format of the address
 656 must either be a bare local-part (we are now at the end), or a phrase
 657 followed by a route-addr (more words must follow). */
 658
 659 if (*s != '@' && *s != '<')
 660   {
 661   if (*s == 0 || *s == ';')
 662     {
 663     if (!*t) FAILED(US"empty address");
 664     endptr = last_comment_position;
 665     goto PARSE_SUCCEEDED;              /* Bare local part */
 666     }
 667
 668   /* Expect phrase route-addr, or phrase : if groups permitted, but allow
 669   dots in the phrase; complete the loop only when '<' or ':' is encountered -
 670   end of string will produce a null local_part and therefore fail. We don't
 671   need to keep updating t, as the phrase isn't to be kept. */
 672
 673   while (*s != '<' && (!f.parse_allow_group || *s != ':'))
 674     {
 675     s = read_local_part(s, t, errorptr, FALSE);
 676     if (*errorptr)
 677       {
 678       *errorptr = string_sprintf("%s (expected word or \"<\")", *errorptr);
 679       goto PARSE_FAILED;
 680       }
 681     }
 682
 683   if (*s == ':')
 684     {
 685     f.parse_found_group = TRUE;
 686     f.parse_allow_group = FALSE;
 687     s++;
 688     goto RESTART;
 689     }
 690
 691   /* Assert *s == '<' */
 692   }
 693
 694 /* At this point the next character is either '@' or '<'. If it is '@', only a
 695 single local-part has previously been read. An angle bracket signifies the
 696 start of an <addr-spec>. Throw away anything we have saved so far before
 697 processing it. Note that this is "if" rather than "else if" because it's also
 698 used after reading a preceding phrase.
 699
 700 There are a lot of broken sendmails out there that put additional pairs of <>
 701 round <route-addr>s.  If strip_excess_angle_brackets is set, allow a limited
 702 number of them, as long as they match. */
 703
 704 if (*s == '<')
 705   {
 706   uschar *domainptr = yield;
 707   BOOL source_routed = FALSE;
 708   int bracket_count = 1;
 709
 710   s++;
 711   if (strip_excess_angle_brackets) while (*s == '<')
 712    {
 713    if(bracket_count++ > 5) FAILED(US"angle-brackets nested too deep");
 714    s++;
 715    }
 716
 717   t = yield;
 718   startptr = s;
 719   s = skip_comment(s);
 720
 721   /* Read an optional series of routes, each of which is a domain. They
 722   are separated by commas and terminated by a colon. However, we totally ignore
 723   such routes (RFC 1123 says we MAY, and the revision of RFC 821 says we
 724   SHOULD). */
 725
 726   if (*s == '@')
 727     {
 728     s = read_route(s, t, errorptr);
 729     if (*errorptr) goto PARSE_FAILED;
 730     *t = 0;                  /* Ensure route is ignored - probably overkill */
 731     source_routed = TRUE;
 732     }
 733
 734   /* Now an addr-spec, terminated by '>'. If there is no preceding route,
 735   we must allow an empty addr-spec if allow_null is TRUE, to permit the
 736   address "<>" in some circumstances. A source-routed address MUST have
 737   a domain in the final part. */
 738
 739   if (allow_null && !source_routed && *s == '>')
 740     {
 741     *t = 0;
 742     *errorptr = NULL;
 743     }
 744   else
 745     {
 746     s = read_addr_spec(s, t, '>', errorptr, &domainptr);
 747     if (*errorptr) goto PARSE_FAILED;
 748     *domain = domainptr - yield;
 749     if (source_routed && *domain == 0)
 750       FAILED(US"domain missing in source-routed address");
 751     }
 752
 753   endptr = s;
 754   if (*errorptr) goto PARSE_FAILED;
 755   while (bracket_count-- > 0) if (*s++ != '>')
 756     {
 757     *errorptr = s[-1] == 0
 758       ? US"'>' missing at end of address"
 759       : string_sprintf("malformed address: %.32s may not follow %.*s",
 760           s-1, (int)(s - US mailbox - 1), mailbox);
 761     goto PARSE_FAILED;
 762     }
 763
 764   s = skip_comment(s);
 765   }
 766
 767 /* Hitting '@' after the first local-part means we have definitely got an
 768 addr-spec, on a strict reading of the RFC, and the rest of the string
 769 should be the domain. However, for flexibility we allow for a route-address
 770 not enclosed in <> as well, which is indicated by an empty first local
 771 part preceding '@'. The source routing is, however, ignored. */
 772
 773 else if (!*t)
 774   {
 775   uschar *domainptr = yield;
 776   s = read_route(s, t, errorptr);
 777   if (*errorptr) goto PARSE_FAILED;
 778   *t = 0;         /* Ensure route is ignored - probably overkill */
 779   s = read_addr_spec(s, t, 0, errorptr, &domainptr);
 780   if (*errorptr) goto PARSE_FAILED;
 781   *domain = domainptr - yield;
 782   endptr = last_comment_position;
 783   if (*domain == 0) FAILED(US"domain missing in source-routed address");
 784   }
 785
 786 /* This is the strict case of local-part@domain. */
 787
 788 else
 789   {
 790   t += Ustrlen((const uschar *)t);
 791   *t++ = *s++;
 792   *domain = t - yield;
 793   s = read_domain(s, t, TRUE, errorptr);
 794   if (!*t) goto PARSE_FAILED;
 795   endptr = last_comment_position;
 796   }
 797
 798 /* Use goto to get here from the bare local part case. Arrive by falling
 799 through for other cases. Endptr may have been moved over whitespace, so
 800 move it back past white space if necessary. */
 801
 802 PARSE_SUCCEEDED:
 803 if (*s)
 804   {
 805   if (f.parse_found_group && *s == ';')
 806     {
 807     f.parse_found_group = FALSE;
 808     f.parse_allow_group = TRUE;
 809     }
 810   else
 811     {
 812     *errorptr = string_sprintf("malformed address: %.32s may not follow %.*s",
 813       s, (int)(s - US mailbox), mailbox);
 814     goto PARSE_FAILED;
 815     }
 816   }
 817 *start = startptr - US mailbox;      /* Return offsets */
 818 while (isspace(endptr[-1])) endptr--;
 819 *end = endptr - US mailbox;
 820
 821 /* Although this code has no limitation on the length of address extracted,
 822 other parts of Exim may have limits, and in any case, RFC 5321 limits email
 823 addresses to 256, so we do a check here, giving an error if the address is
 824 ridiculously long. */
 825
 826 if (*end - *start > EXIM_EMAILADDR_MAX)
 827   {
 828   *errorptr = string_sprintf("address is ridiculously long: %.64s...", yield);
 829   return NULL;
 830   }
 831
 832 return yield;
 833
 834 /* Use goto (via the macro FAILED) to get to here from a variety of places.
 835 We might have an empty address in a group - the caller can choose to ignore
 836 this. We must, however, keep the flags correct. */
 837
 838 PARSE_FAILED:
 839 if (f.parse_found_group && *s == ';')
 840   {
 841   f.parse_found_group = FALSE;
 842   f.parse_allow_group = TRUE;
 843   }
 844 return NULL;
 845 }
 846
 847 #undef FAILED
 848
 849
 850
 851 /*************************************************
 852 *        Quote according to RFC 2047             *
 853 *************************************************/
 854
 855 /* This function is used for quoting text in headers according to RFC 2047.
 856 If the only characters that strictly need quoting are spaces, we return the
 857 original string, unmodified.
 858
 859 Hmmph. As always, things get perverted for other uses. This function was
 860 originally for the "phrase" part of addresses. Now it is being used for much
 861 longer texts in ACLs and via the ${rfc2047: expansion item. This means we have
 862 to check for overlong "encoded-word"s and split them. November 2004.
 863
 864 Arguments:
 865   string       the string to quote - already checked to contain non-printing
 866                  chars
 867   len          the length of the string
 868   charset      the name of the character set; NULL => iso-8859-1
 869   fold         if TRUE, a newline is inserted before the separating space when
 870                  more than one encoded-word is generated
 871
 872 Returns:       pointer to the original string, if no quoting needed, or
 873                pointer to allocated memory containing the quoted string
 874 */
 875
 876 const uschar *
 877 parse_quote_2047(const uschar *string, int len, const uschar *charset,
 878   BOOL fold)
 879 {
 880 const uschar * s = string;
 881 int hlen, l;
 882 BOOL coded = FALSE;
 883 BOOL first_byte = FALSE;
 884 gstring * g =
 885   string_fmt_append(NULL, "=?%s?Q?", charset ? charset : US"iso-8859-1");
 886
 887 hlen = l = g->ptr;
 888
 889 for (s = string; len > 0; s++, len--)
 890   {
 891   int ch = *s;
 892
 893   if (g->ptr - l > 67 && !first_byte)
 894     {
 895     g = fold ? string_catn(g, US"?=\n ", 4) : string_catn(g, US"?= ", 3);
 896     l = g->ptr;
 897     g = string_catn(g, g->s, hlen);
 898     }
 899
 900   if (  ch < 33 || ch > 126
 901      || Ustrchr("?=()<>@,;:\\\".[]_", ch) != NULL)
 902     {
 903     if (ch == ' ')
 904       {
 905       g = string_catn(g, US"_", 1);
 906       first_byte = FALSE;
 907       }
 908     else
 909       {
 910       g = string_fmt_append(g, "=%02X", ch);
 911       coded = TRUE;
 912       first_byte = !first_byte;
 913       }
 914     }
 915   else
 916     { g = string_catn(g, s, 1); first_byte = FALSE; }
 917   }
 918
 919 if (coded)
 920   string = string_from_gstring(g = string_catn(g, US"?=", 2));
 921 else
 922   g->ptr = -1;
 923
 924 gstring_release_unused(g);
 925 return string;
 926 }
 927
 928
 929
 930
 931 /*************************************************
 932 *            Fix up an RFC 822 "phrase"          *
 933 *************************************************/
 934
 935 /* This function is called to repair any syntactic defects in the "phrase" part
 936 of an RFC822 address. In particular, it is applied to the user's name as read
 937 from the passwd file when accepting a local message, and to the data from the
 938 -F option.
 939
 940 If the string contains existing quoted strings or comments containing
 941 freestanding quotes, then we just quote those bits that need quoting -
 942 otherwise it would get awfully messy and probably not look good. If not, we
 943 quote the whole thing if necessary. Thus
 944
 945    John Q. Smith            =>  "John Q. Smith"
 946    John "Jack" Smith        =>  John "Jack" Smith
 947    John "Jack" Q. Smith     =>  John "Jack" "Q." Smith
 948    John (Jack) Q. Smith     =>  "John (Jack) Q. Smith"
 949    John ("Jack") Q. Smith   =>  John ("Jack") "Q." Smith
 950 but
 951    John (\"Jack\") Q. Smith =>  "John (\"Jack\") Q. Smith"
 952
 953 Sheesh! This is tedious code. It is a great pity that the syntax of RFC822 is
 954 the way it is...
 955
 956 August 2000: Additional code added:
 957
 958   Previously, non-printing characters were turned into question marks, which do
 959   not need to be quoted.
 960
 961   Now, a different tactic is used if there are any non-printing ASCII
 962   characters. The encoding method from RFC 2047 is used, assuming iso-8859-1 as
 963   the character set.
 964
 965   We *could* use this for all cases, getting rid of the messy original code,
 966   but leave it for now. It would complicate simple cases like "John Q. Smith".
 967
 968 The result is passed back in allocated memory.
 969
 970 Arguments:
 971   phrase       an RFC822 phrase
 972   len          the length of the phrase
 973
 974 Returns:       the fixed RFC822 phrase
 975 */
 976
 977 const uschar *
 978 parse_fix_phrase(const uschar *phrase, int len)
 979 {
 980 int ch, i;
 981 BOOL quoted = FALSE;
 982 const uschar *s, *end;
 983 uschar * buffer;
 984 uschar *t, *yield;
 985
 986 while (len > 0 && isspace(*phrase)) { phrase++; len--; }
 987
 988 /* See if there are any non-printing characters, and if so, use the RFC 2047
 989 encoding for the whole thing. */
 990
 991 for (i = 0, s = phrase; i < len; i++, s++)
 992   if ((*s < 32 && *s != '\t') || *s > 126) break;
 993
 994 if (i < len)
 995   return parse_quote_2047(phrase, len, headers_charset, FALSE);
 996
 997 /* No non-printers; use the RFC 822 quoting rules */
 998
 999 if (len <= 0 || len >= INT_MAX/4)
1000   return string_copy_taint(CUS"", phrase);
1001
1002 buffer = store_get((len+1)*4, phrase);
1003
1004 s = phrase;
1005 end = s + len;
1006 yield = t = buffer + 1;
1007
1008 while (s < end)
1009   {
1010   ch = *s++;
1011
1012   /* Copy over quoted strings, remembering we encountered one */
1013
1014   if (ch == '\"')
1015     {
1016     *t++ = '\"';
1017     while (s < end && (ch = *s++) != '\"')
1018       {
1019       *t++ = ch;
1020       if (ch == '\\' && s < end) *t++ = *s++;
1021       }
1022     *t++ = '\"';
1023     if (s >= end) break;
1024     quoted = TRUE;
1025     }
1026
1027   /* Copy over comments, noting if they contain freestanding quote
1028   characters */
1029
1030   else if (ch == '(')
1031     {
1032     int level = 1;
1033     *t++ = '(';
1034     while (s < end)
1035       {
1036       ch = *s++;
1037       *t++ = ch;
1038       if (ch == '(') level++;
1039       else if (ch == ')') { if (--level <= 0) break; }
1040       else if (ch == '\\' && s < end) *t++ = *s++ & 127;
1041       else if (ch == '\"') quoted = TRUE;
1042       }
1043     if (ch == 0)
1044       {
1045       while (level--) *t++ = ')';
1046       break;
1047       }
1048     }
1049
1050   /* Handle special characters that need to be quoted */
1051
1052   else if (Ustrchr(")<>@,;:\\.[]", ch) != NULL)
1053     {
1054     /* If hit previous quotes just make one quoted "word" */
1055
1056     if (quoted)
1057       {
1058       uschar *tt = t++;
1059       while (*(--tt) != ' ' && *tt != '\"' && *tt != ')') tt[1] = *tt;
1060       tt[1] = '\"';
1061       *t++ = ch;
1062       while (s < end)
1063         {
1064         ch = *s++;
1065         if (ch == ' ' || ch == '\"') { s--; break; } else *t++ = ch;
1066         }
1067       *t++ = '\"';
1068       }
1069
1070     /* Else quote the whole string so far, and the rest up to any following
1071     quotes. We must treat anything following a backslash as a literal. */
1072
1073     else
1074       {
1075       BOOL escaped = (ch == '\\');
1076       *(--yield) = '\"';
1077       *t++ = ch;
1078
1079       /* Now look for the end or a quote */
1080
1081       while (s < end)
1082         {
1083         ch = *s++;
1084
1085         /* Handle escaped pairs */
1086
1087         if (escaped)
1088           {
1089           *t++ = ch;
1090           escaped = FALSE;
1091           }
1092
1093         else if (ch == '\\')
1094           {
1095           *t++ = ch;
1096           escaped = TRUE;
1097           }
1098
1099         /* If hit subsequent quotes, insert our quote before any trailing
1100         spaces and back up to re-handle the quote in the outer loop. */
1101
1102         else if (ch == '\"')
1103           {
1104           int count = 0;
1105           while (t[-1] == ' ') { t--; count++; }
1106           *t++ = '\"';
1107           while (count-- > 0) *t++ = ' ';
1108           s--;
1109           break;
1110           }
1111
1112         /* If hit a subsequent comment, check it for unescaped quotes,
1113         and if so, end our quote before it. */
1114
1115         else if (ch == '(')
1116           {
1117           const uschar *ss = s;     /* uschar after '(' */
1118           int level = 1;
1119           while(ss < end)
1120             {
1121             ch = *ss++;
1122             if (ch == '(') level++;
1123             else if (ch == ')') { if (--level <= 0) break; }
1124             else if (ch == '\\' && ss+1 < end) ss++;
1125             else if (ch == '\"') { quoted = TRUE; break; }
1126             }
1127
1128           /* Comment contains unescaped quotes; end our quote before
1129           the start of the comment. */
1130
1131           if (quoted)
1132             {
1133             int count = 0;
1134             while (t[-1] == ' ') { t--; count++; }
1135             *t++ = '\"';
1136             while (count-- > 0) *t++ = ' ';
1137             break;
1138             }
1139
1140           /* Comment does not contain unescaped quotes; include it in
1141           our quote. */
1142
1143           else
1144             {
1145             if (ss >= end) ss--;
1146             *t++ = '(';
1147             if (ss > s)
1148               {
1149               Ustrncpy(t, s, ss-s);
1150               t += ss-s;
1151               s = ss;
1152               }
1153             }
1154           }
1155
1156         /* Not a comment or quote; include this character in our quotes. */
1157
1158         else *t++ = ch;
1159         }
1160       }
1161
1162     /* Add a final quote if we hit the end of the string. */
1163
1164     if (s >= end) *t++ = '\"';
1165     }
1166
1167   /* Non-special character; just copy it over */
1168
1169   else *t++ = ch;
1170   }
1171
1172 *t = 0;
1173 store_release_above(t+1);
1174 return yield;
1175 }
1176
1177
1178 /*************************************************
1179 *          Extract addresses from a list         *
1180 *************************************************/
1181
1182 /* This function is called by the redirect router to scan a string containing a
1183 list of addresses separated by commas (with optional white space) or by
1184 newlines, and to generate a chain of address items from them. In other words,
1185 to unpick data from an alias or .forward file.
1186
1187 The SunOS5 documentation for alias files is not very clear on the syntax; it
1188 does not say that either a comma or a newline can be used for separation.
1189 However, that is the way Smail does it, so we follow suit.
1190
1191 If a # character is encountered in a white space position, then characters from
1192 there to the next newline are skipped.
1193
1194 If an unqualified address begins with '\', just skip that character. This gives
1195 compatibility with Sendmail's use of \ to prevent looping. Exim has its own
1196 loop prevention scheme which handles other cases too - see the code in
1197 route_address().
1198
1199 An "address" can be a specification of a file or a pipe; the latter may often
1200 need to be quoted because it may contain spaces, but we don't want to retain
1201 the quotes. Quotes may appear in normal addresses too, and should be retained.
1202 We can distinguish between these cases, because in addresses, quotes are used
1203 only for parts of the address, not the whole thing. Therefore, we remove quotes
1204 from items when they entirely enclose them, but not otherwise.
1205
1206 An "address" can also be of the form :include:pathname to include a list of
1207 addresses contained in the specified file.
1208
1209 Any unqualified addresses are qualified with and rewritten if necessary, via
1210 the rewrite_address() function.
1211
1212 Arguments:
1213   s                the list of addresses (typically a complete
1214                      .forward file or a list of entries in an alias file)
1215   options          option bits for permitting or denying various special cases;
1216                      not all bits are relevant here - some are for filter
1217                      files; those we use here are:
1218                        RDO_DEFER
1219                        RDO_FREEZE
1220                        RDO_FAIL
1221                        RDO_BLACKHOLE
1222                        RDO_REWRITE
1223                        RDO_INCLUDE
1224   anchor           where to hang the chain of newly-created addresses. This
1225                      should be initialized to NULL.
1226   error            where to return an error text
1227   incoming domain  domain of the incoming address; used to qualify unqualified
1228                      local parts preceded by \
1229   directory        if NULL, no checks are done on :include: files
1230                    otherwise, included file names must start with the given
1231                      directory
1232   syntax_errors    if not NULL, it carries on after syntax errors in addresses,
1233                      building up a list of errors as error blocks chained on
1234                      here.
1235
1236 Returns:      FF_DELIVERED      addresses extracted
1237               FF_NOTDELIVERED   no addresses extracted, but no errors
1238               FF_BLACKHOLE      :blackhole:
1239               FF_DEFER          :defer:
1240               FF_FAIL           :fail:
1241               FF_INCLUDEFAIL    some problem with :include:; *error set
1242               FF_ERROR          other problems; *error is set
1243 */
1244
1245 int
1246 parse_forward_list(const uschar *s, int options, address_item **anchor,
1247   uschar **error, const uschar *incoming_domain, const uschar *directory,
1248   error_block **syntax_errors)
1249 {
1250 int count = 0;
1251
1252 DEBUG(D_route) debug_printf("parse_forward_list: %s\n", s);
1253
1254 for (;;)
1255   {
1256   int len, special = 0, specopt = 0, specbit = 0;
1257   const uschar * ss, * nexts;
1258   address_item * addr;
1259   BOOL inquote = FALSE;
1260
1261   for (;;)
1262     {
1263     while (isspace(*s) || *s == ',') s++;
1264     if (*s == '#') { while (*s && *s != '\n') s++; } else break;
1265     }
1266
1267   /* When we reach the end of the list, we return FF_DELIVERED if any child
1268   addresses have been generated. If nothing has been generated, there are two
1269   possibilities: either the list is really empty, or there were syntax errors
1270   that are being skipped. (If syntax errors are not being skipped, an FF_ERROR
1271   return is generated on hitting a syntax error and we don't get here.) For a
1272   truly empty list we return FF_NOTDELIVERED so that the router can decline.
1273   However, if the list is empty only because syntax errors were skipped, we
1274   return FF_DELIVERED. */
1275
1276   if (!*s)
1277     {
1278     return (count > 0 || (syntax_errors && *syntax_errors))
1279       ?  FF_DELIVERED : FF_NOTDELIVERED;
1280
1281     /* This previous code returns FF_ERROR if nothing is generated but a
1282     syntax error has been skipped. I now think it is the wrong approach, but
1283     have left this here just in case, and for the record. */
1284
1285 #ifdef NEVER
1286     if (count > 0) return FF_DELIVERED;   /* Something was generated */
1287
1288     if (!syntax_errors ||          /* Not skipping syntax errors, or */
1289        !*syntax_errors)            /*   we didn't actually skip any */
1290       return FF_NOTDELIVERED;
1291
1292     *error = string_sprintf("no addresses generated: syntax error in %s: %s",
1293        (*syntax_errors)->text2, (*syntax_errors)->text1);
1294     return FF_ERROR;
1295 #endif
1296     }
1297
1298   /* Find the end of the next address. Quoted strings in addresses may contain
1299   escaped characters; I haven't found a proper specification of .forward or
1300   alias files that mentions the quoting properties, but it seems right to do
1301   the escaping thing in all cases, so use the function that finds the end of an
1302   address. However, don't let a quoted string extend over the end of a line. */
1303
1304   ss = parse_find_address_end(s, TRUE);
1305
1306   /* Remember where we finished, for starting the next one. */
1307
1308   nexts = ss;
1309
1310   /* Remove any trailing spaces; we know there's at least one non-space. */
1311
1312   while (isspace(ss[-1])) ss--;
1313
1314   /* We now have s->start and ss->end of the next address. Remove quotes
1315   if they completely enclose, remembering the address started with a quote
1316   for handling pipes and files. Another round of removal of leading and
1317   trailing spaces is then required. */
1318
1319   if (*s == '\"' && ss[-1] == '\"')
1320     {
1321     s++;
1322     ss--;
1323     inquote = TRUE;
1324     while (s < ss && isspace(*s)) s++;
1325     while (ss > s && isspace(ss[-1])) ss--;
1326     }
1327
1328   /* Set up the length of the address. */
1329
1330   len = ss - s;
1331
1332   DEBUG(D_route) debug_printf("extract item: %.*s\n", len, s);
1333
1334   /* Handle special addresses if permitted. If the address is :unknown:
1335   ignore it - this is for backward compatibility with old alias files. You
1336   don't need to use it nowadays - just generate an empty string. For :defer:,
1337   :blackhole:, or :fail: we have to set up the error message and give up right
1338   away. */
1339
1340   if (Ustrncmp(s, ":unknown:", len) == 0)
1341     {
1342     s = nexts;
1343     continue;
1344     }
1345
1346   if      (Ustrncmp(s, ":defer:", 7) == 0)
1347     { special = FF_DEFER; specopt = RDO_DEFER; }  /* specbit is 0 */
1348   else if (Ustrncmp(s, ":blackhole:", 11) == 0)
1349     { special = FF_BLACKHOLE; specopt = specbit = RDO_BLACKHOLE; }
1350   else if (Ustrncmp(s, ":fail:", 6) == 0)
1351     { special = FF_FAIL; specopt = RDO_FAIL; }  /* specbit is 0 */
1352
1353   if (special)
1354     {
1355     uschar * ss = Ustrchr(s+1, ':') + 1; /* line after the special... */
1356     if ((options & specopt) == specbit)
1357       {
1358       *error = string_sprintf("\"%.*s\" is not permitted", len, s);
1359       return FF_ERROR;
1360       }
1361     while (*ss && isspace(*ss)) ss++;   /* skip leading whitespace */
1362     if ((len = Ustrlen(ss)) > 0)        /* ignore trailing newlines */
1363       for (const uschar * t = ss + len - 1; t >= ss && *t == '\n'; t--) len--;
1364     *error = string_copyn(ss, len);     /* becomes the error */
1365     return special;
1366     }
1367
1368   /* If the address is of the form :include:pathname, read the file, and call
1369   this function recursively to extract the addresses from it. If directory is
1370   NULL, do no checks. Otherwise, insist that the file name starts with the
1371   given directory and is a regular file. */
1372
1373   if (Ustrncmp(s, ":include:", 9) == 0)
1374     {
1375     uschar * filebuf;
1376     uschar filename[256];
1377     const uschar * t = s+9;
1378     int flen = len - 9;
1379     int frc;
1380     struct stat statbuf;
1381     address_item * last;
1382     FILE * f;
1383
1384     while (flen > 0 && isspace(*t)) { t++; flen--; }
1385
1386     if (flen <= 0)
1387       {
1388       *error = US"file name missing after :include:";
1389       return FF_ERROR;
1390       }
1391
1392     if (flen > sizeof(filename)-1)
1393       {
1394       *error = string_sprintf("included file name \"%s\" is too long", t);
1395       return FF_ERROR;
1396       }
1397
1398     Ustrncpy(filename, t, flen);
1399     filename[flen] = 0;
1400
1401     /* Insist on absolute path */
1402
1403     if (filename[0] != '/')
1404       {
1405       *error = string_sprintf("included file \"%s\" is not an absolute path",
1406         filename);
1407       return FF_ERROR;
1408       }
1409
1410     /* Check if include is permitted */
1411
1412     if (options & RDO_INCLUDE)
1413       {
1414       *error = US"included files not permitted";
1415       return FF_ERROR;
1416       }
1417
1418     if (is_tainted(filename))
1419       {
1420       *error = string_sprintf("Tainted name '%s' for included file  not permitted\n",
1421        filename);
1422       return FF_ERROR;
1423       }
1424
1425     /* Check file name if required */
1426
1427     if (directory)
1428       {
1429       int len = Ustrlen(directory);
1430       uschar * p;
1431
1432       while (len > 0 && directory[len-1] == '/') len--;         /* ignore trailing '/' */
1433       p = filename + len;
1434       if (Ustrncmp(filename, directory, len) != 0 || *p != '/')
1435         {
1436         *error = string_sprintf("included file %s is not in directory %s",
1437           filename, directory);
1438         return FF_ERROR;
1439         }
1440
1441 #ifdef EXIM_HAVE_OPENAT
1442       /* It is necessary to check that every component inside the directory
1443       is NOT a symbolic link, in order to keep the file inside the directory.
1444       This is mighty tedious. We open the directory and openat every component,
1445       with a flag that fails symlinks. */
1446
1447       {
1448       int fd = exim_open2(CCS directory, O_RDONLY);
1449       if (fd < 0)
1450         {
1451         *error = string_sprintf("failed to open directory %s", directory);
1452         return FF_ERROR;
1453         }
1454       while (*p)
1455         {
1456         uschar temp;
1457         int fd2;
1458         uschar * q = p + 1;             /* skip dividing '/' */
1459
1460         while (*q == '/') q++;          /* skip extra '/' */
1461         while (*++p && *p != '/') ;     /* end of component */
1462         temp = *p;
1463         *p = '\0';
1464
1465         fd2 = exim_openat(fd, CS q, O_RDONLY|O_NOFOLLOW);
1466         close(fd);
1467         *p = temp;
1468         if (fd2 < 0)
1469           {
1470           *error = string_sprintf("failed to open %s (component of included "
1471             "file); could be symbolic link", filename);
1472           return FF_ERROR;
1473           }
1474         fd = fd2;
1475         }
1476       f = fdopen(fd, "rb");
1477       }
1478 #else
1479       /* It is necessary to check that every component inside the directory
1480       is NOT a symbolic link, in order to keep the file inside the directory.
1481       This is mighty tedious. It is also not totally foolproof in that it
1482       leaves the possibility of a race attack, but I don't know how to do
1483       any better. */
1484
1485       while (*p)
1486         {
1487         int temp;
1488         while (*++p && *p != '/');
1489         temp = *p;
1490         *p = 0;
1491         if (Ulstat(filename, &statbuf) != 0)
1492           {
1493           *error = string_sprintf("failed to stat %s (component of included "
1494             "file)", filename);
1495           *p = temp;
1496           return FF_ERROR;
1497           }
1498
1499         *p = temp;
1500
1501         if ((statbuf.st_mode & S_IFMT) == S_IFLNK)
1502           {
1503           *error = string_sprintf("included file %s in the %s directory "
1504             "involves a symbolic link", filename, directory);
1505           return FF_ERROR;
1506           }
1507         }
1508 #endif
1509       }
1510
1511 #ifdef EXIM_HAVE_OPENAT
1512     else
1513 #endif
1514       /* Open and stat the file */
1515       f = Ufopen(filename, "rb");
1516
1517     if (!f)
1518       {
1519       *error = string_open_failed("included file %s", filename);
1520       return FF_INCLUDEFAIL;
1521       }
1522
1523     if (fstat(fileno(f), &statbuf) != 0)
1524       {
1525       *error = string_sprintf("failed to stat included file %s: %s",
1526         filename, strerror(errno));
1527       (void)fclose(f);
1528       return FF_INCLUDEFAIL;
1529       }
1530
1531     /* If directory was checked, double check that we opened a regular file */
1532
1533     if (directory && (statbuf.st_mode & S_IFMT) != S_IFREG)
1534       {
1535       *error = string_sprintf("included file %s is not a regular file in "
1536         "the %s directory", filename, directory);
1537       return FF_ERROR;
1538       }
1539
1540     /* Get a buffer and read the contents */
1541
1542     if (statbuf.st_size > MAX_INCLUDE_SIZE)
1543       {
1544       *error = string_sprintf("included file %s is too big (max %d)",
1545         filename, MAX_INCLUDE_SIZE);
1546       return FF_ERROR;
1547       }
1548
1549     filebuf = store_get(statbuf.st_size + 1, filename);
1550     if (fread(filebuf, 1, statbuf.st_size, f) != statbuf.st_size)
1551       {
1552       *error = string_sprintf("error while reading included file %s: %s",
1553         filename, strerror(errno));
1554       (void)fclose(f);
1555       return FF_ERROR;
1556       }
1557     filebuf[statbuf.st_size] = 0;
1558     (void)fclose(f);
1559
1560     addr = NULL;
1561     frc = parse_forward_list(filebuf, options, &addr,
1562       error, incoming_domain, directory, syntax_errors);
1563     if (frc != FF_DELIVERED && frc != FF_NOTDELIVERED) return frc;
1564
1565     if (addr)
1566       {
1567       for (last = addr; last->next; last = last->next) count++;
1568       last->next = *anchor;
1569       *anchor = addr;
1570       count++;
1571       }
1572     }
1573
1574   /* Else (not :include:) ensure address is syntactically correct and fully
1575   qualified if not a pipe or a file, removing a leading \ if present on an
1576   unqualified address. For pipes and files we must handle quoting. It's
1577   not quite clear exactly what to do for partially quoted things, but the
1578   common case of having the whole thing in quotes is straightforward. If this
1579   was the case, inquote will have been set TRUE above and the quotes removed.
1580
1581   There is a possible ambiguity over addresses whose local parts start with
1582   a vertical bar or a slash, and the latter do in fact occur, thanks to X.400.
1583   Consider a .forward file that contains the line
1584
1585      /X=xxx/Y=xxx/OU=xxx/@some.gate.way
1586
1587   Is this a file or an X.400 address? Does it make any difference if it is in
1588   quotes? On the grounds that file names of this type are rare, Exim treats
1589   something that parses as an RFC 822 address and has a domain as an address
1590   rather than a file or a pipe. This is also how an address such as the above
1591   would be treated if it came in from outside. */
1592
1593   else
1594     {
1595     int start, end, domain;
1596     const uschar *recipient = NULL;
1597     uschar * s_ltd = string_copyn(s, len);
1598
1599     /* If it starts with \ and the rest of it parses as a valid mail address
1600     without a domain, carry on with that address, but qualify it with the
1601     incoming domain. Otherwise arrange for the address to fall through,
1602     causing an error message on the re-parse. */
1603
1604     if (*s_ltd == '\\')
1605       {
1606       recipient =
1607         parse_extract_address(s_ltd+1, error, &start, &end, &domain, FALSE);
1608       if (recipient)
1609         recipient = domain != 0 ? NULL :
1610           string_sprintf("%s@%s", recipient, incoming_domain);
1611       }
1612
1613     /* Try parsing the item as an address. */
1614
1615     if (!recipient) recipient =
1616       parse_extract_address(s_ltd, error, &start, &end, &domain, FALSE);
1617
1618     /* If item starts with / or | and is not a valid address, or there
1619     is no domain, treat it as a file or pipe. If it was a quoted item,
1620     remove the quoting occurrences of \ within it. */
1621
1622     if ((*s_ltd == '|' || *s_ltd == '/') && (!recipient || domain == 0))
1623       {
1624       uschar * t = store_get(Ustrlen(s_ltd) + 1, s_ltd);
1625       uschar * p = t, * q = s_ltd;
1626
1627       while (*q)
1628         {
1629         if (inquote)
1630           {
1631           *p++ = *q == '\\' ? *++q : *q;
1632           q++;
1633           }
1634         else *p++ = *q++;
1635         }
1636       *p = 0;
1637       addr = deliver_make_addr(t, TRUE);
1638       setflag(addr, af_pfr);                   /* indicates pipe/file/reply */
1639       if (*s_ltd != '|') setflag(addr, af_file);   /* indicates file */
1640       }
1641
1642     /* Item must be an address. Complain if not, else qualify, rewrite and set
1643     up the control block. It appears that people are in the habit of using
1644     empty addresses but with comments as a way of putting comments into
1645     alias and forward files. Therefore, ignore the error "empty address".
1646     Mailing lists might want to tolerate syntax errors; there is therefore
1647     an option to do so. */
1648
1649     else
1650       {
1651       if (!recipient)
1652         {
1653         if (Ustrcmp(*error, "empty address") == 0)
1654           {
1655           *error = NULL;
1656           s = nexts;
1657           continue;
1658           }
1659
1660         if (syntax_errors)
1661           {
1662           error_block * e = store_get(sizeof(error_block), GET_UNTAINTED);
1663           error_block * last = *syntax_errors;
1664           if (last)
1665             {
1666             while (last->next) last = last->next;
1667             last->next = e;
1668             }
1669           else
1670             *syntax_errors = e;
1671           e->next = NULL;
1672           e->text1 = *error;
1673           e->text2 = s_ltd;
1674           s = nexts;
1675           continue;
1676           }
1677         else
1678           {
1679           *error = string_sprintf("%s in \"%s\"", *error, s_ltd);
1680           return FF_ERROR;
1681           }
1682         }
1683
1684       /* Address was successfully parsed. Rewrite, and then make an address
1685       block. */
1686
1687       recipient = options & RDO_REWRITE
1688         ? rewrite_address(recipient, TRUE, FALSE, global_rewrite_rules,
1689                           rewrite_existflags)
1690         : rewrite_address_qualify(recipient, TRUE);     /*XXX loses track of const */
1691       addr = deliver_make_addr(US recipient, TRUE);  /* TRUE => copy recipient, so deconst ok */
1692       }
1693
1694     /* Add the original data to the output chain. */
1695
1696     addr->next = *anchor;
1697     *anchor = addr;
1698     count++;
1699     }
1700
1701   /* Advance pointer for the next address */
1702
1703   s = nexts;
1704   }
1705 }
1706
1707
1708 /*************************************************
1709 *            Extract a Message-ID                *
1710 *************************************************/
1711
1712 /* This function is used to extract message ids from In-Reply-To: and
1713 References: header lines.
1714
1715 Arguments:
1716   str          pointer to the start of the message-id
1717   yield        put pointer to the message id (in dynamic memory) here
1718   error        put error message here on failure
1719
1720 Returns:       points after the processed message-id or NULL on error
1721 */
1722
1723 const uschar *
1724 parse_message_id(const uschar *str, uschar **yield, uschar **error)
1725 {
1726 uschar *domain = NULL;
1727 uschar *id;
1728 rmark reset_point;
1729
1730 str = skip_comment(str);
1731 if (*str != '<')
1732   {
1733   *error = US"Missing '<' before message-id";
1734   return NULL;
1735   }
1736
1737 /* Getting a block the size of the input string will definitely be sufficient
1738 for the answer, but it may also be very long if we are processing a header
1739 line. Therefore, take care to release unwanted store afterwards. */
1740
1741 reset_point = store_mark();
1742 id = *yield = store_get(Ustrlen(str) + 1, str);
1743 *id++ = *str++;
1744
1745 str = read_addr_spec(str, id, '>', error, &domain);
1746
1747 if (!*error)
1748   {
1749   if (*str != '>') *error = US"Missing '>' after message-id";
1750     else if (domain == NULL) *error = US"domain missing in message-id";
1751   }
1752
1753 if (*error)
1754   {
1755   store_reset(reset_point);
1756   return NULL;
1757   }
1758
1759 while (*id) id++;
1760 *id++ = *str++;
1761 *id++ = 0;
1762 store_release_above(id);
1763
1764 return skip_comment(str);
1765 }
1766
1767
1768 /*************************************************
1769 *        Parse a fixed digit number              *
1770 *************************************************/
1771
1772 /* Parse a string containing an ASCII encoded fixed digits number
1773
1774 Arguments:
1775   str          pointer to the start of the ASCII encoded number
1776   n            pointer to the resulting value
1777   digits       number of required digits
1778
1779 Returns:       points after the processed date or NULL on error
1780 */
1781
1782 static const uschar *
1783 parse_number(const uschar *str, int *n, int digits)
1784 {
1785 *n=0;
1786 while (digits--)
1787   {
1788   if (*str<'0' || *str>'9') return NULL;
1789   *n=10*(*n)+(*str++-'0');
1790   }
1791 return str;
1792 }
1793
1794
1795 /*************************************************
1796 *        Parse a RFC 2822 day of week            *
1797 *************************************************/
1798
1799 /* Parse the day of the week from a RFC 2822 date, but do not
1800    decode it, because it is only for humans.
1801
1802 Arguments:
1803   str          pointer to the start of the day of the week
1804
1805 Returns:       points after the parsed day or NULL on error
1806 */
1807
1808 static const uschar *
1809 parse_day_of_week(const uschar * str)
1810 {
1811 /*
1812 day-of-week     =       ([FWS] day-name) / obs-day-of-week
1813
1814 day-name        =       "Mon" / "Tue" / "Wed" / "Thu" /
1815                         "Fri" / "Sat" / "Sun"
1816
1817 obs-day-of-week =       [CFWS] day-name [CFWS]
1818 */
1819
1820 static const uschar *day_name[7]={ US"mon", US"tue", US"wed", US"thu", US"fri", US"sat", US"sun" };
1821 int i;
1822 uschar day[4];
1823
1824 str = skip_comment(str);
1825 for (i = 0; i < 3; ++i)
1826   {
1827   if ((day[i] = tolower(*str)) == '\0') return NULL;
1828   ++str;
1829   }
1830 day[3] = '\0';
1831 for (i = 0; i<7; ++i) if (Ustrcmp(day,day_name[i]) == 0) break;
1832 if (i == 7) return NULL;
1833 return skip_comment(str);
1834 }
1835
1836
1837 /*************************************************
1838 *            Parse a RFC 2822 date               *
1839 *************************************************/
1840
1841 /* Parse the date part of a RFC 2822 date-time, extracting the
1842    day, month and year.
1843
1844 Arguments:
1845   str          pointer to the start of the date
1846   d            pointer to the resulting day
1847   m            pointer to the resulting month
1848   y            pointer to the resulting year
1849
1850 Returns:       points after the processed date or NULL on error
1851 */
1852
1853 static const uschar *
1854 parse_date(const uschar *str, int *d, int *m, int *y)
1855 {
1856 /*
1857 date            =       day month year
1858
1859 year            =       4*DIGIT / obs-year
1860
1861 obs-year        =       [CFWS] 2*DIGIT [CFWS]
1862
1863 month           =       (FWS month-name FWS) / obs-month
1864
1865 month-name      =       "Jan" / "Feb" / "Mar" / "Apr" /
1866                         "May" / "Jun" / "Jul" / "Aug" /
1867                         "Sep" / "Oct" / "Nov" / "Dec"
1868
1869 obs-month       =       CFWS month-name CFWS
1870
1871 day             =       ([FWS] 1*2DIGIT) / obs-day
1872
1873 obs-day         =       [CFWS] 1*2DIGIT [CFWS]
1874 */
1875
1876 const uschar * s, * n;
1877 static const uschar *month_name[]={ US"jan", US"feb", US"mar", US"apr", US"may", US"jun", US"jul", US"aug", US"sep", US"oct", US"nov", US"dec" };
1878 int i;
1879 uschar month[4];
1880
1881 str = skip_comment(str);
1882 if ((str = parse_number(str,d,1)) == NULL) return NULL;
1883
1884 if (*str>='0' && *str<='9') *d = 10*(*d)+(*str++-'0');
1885 s = skip_comment(str);
1886 if (s == str) return NULL;
1887 str = s;
1888
1889 for (i = 0; i<3; ++i) if ((month[i]=tolower(*(str+i))) == '\0') return NULL;
1890 month[3] = '\0';
1891 for (i = 0; i<12; ++i) if (Ustrcmp(month,month_name[i]) == 0) break;
1892 if (i == 12) return NULL;
1893 str+=3;
1894 *m = i;
1895 s = skip_comment(str);
1896 if (s == str) return NULL;
1897 str=s;
1898
1899 if ((n = parse_number(str,y,4)))
1900   {
1901   str = n;
1902   if (*y<1900) return NULL;
1903   *y = *y-1900;
1904   }
1905 else if ((n = parse_number(str,y,2)))
1906   {
1907   str = skip_comment(n);
1908   while (*(str-1) == ' ' || *(str-1) == '\t') --str; /* match last FWS later */
1909   if (*y<50) *y+=100;
1910   }
1911 else return NULL;
1912 return str;
1913 }
1914
1915
1916 /*************************************************
1917 *            Parse a RFC 2822 Time               *
1918 *************************************************/
1919
1920 /* Parse the time part of a RFC 2822 date-time, extracting the
1921    hour, minute, second and timezone.
1922
1923 Arguments:
1924   str          pointer to the start of the time
1925   h            pointer to the resulting hour
1926   m            pointer to the resulting minute
1927   s            pointer to the resulting second
1928   z            pointer to the resulting timezone (offset in seconds)
1929
1930 Returns:       points after the processed time or NULL on error
1931 */
1932
1933 static const uschar *
1934 parse_time(const uschar *str, int *h, int *m, int *s, int *z)
1935 {
1936 /*
1937 time            =       time-of-day FWS zone
1938
1939 time-of-day     =       hour ":" minute [ ":" second ]
1940
1941 hour            =       2DIGIT / obs-hour
1942
1943 obs-hour        =       [CFWS] 2DIGIT [CFWS]
1944
1945 minute          =       2DIGIT / obs-minute
1946
1947 obs-minute      =       [CFWS] 2DIGIT [CFWS]
1948
1949 second          =       2DIGIT / obs-second
1950
1951 obs-second      =       [CFWS] 2DIGIT [CFWS]
1952
1953 zone            =       (( "+" / "-" ) 4DIGIT) / obs-zone
1954
1955 obs-zone        =       "UT" / "GMT" /          ; Universal Time
1956                                                 ; North American UT
1957                                                 ; offsets
1958                         "EST" / "EDT" /         ; Eastern:  - 5/ - 4
1959                         "CST" / "CDT" /         ; Central:  - 6/ - 5
1960                         "MST" / "MDT" /         ; Mountain: - 7/ - 6
1961                         "PST" / "PDT" /         ; Pacific:  - 8/ - 7
1962
1963                         %d65-73 /               ; Military zones - "A"
1964                         %d75-90 /               ; through "I" and "K"
1965                         %d97-105 /              ; through "Z", both
1966                         %d107-122               ; upper and lower case
1967 */
1968
1969 const uschar * c;
1970
1971 str = skip_comment(str);
1972 if ((str = parse_number(str,h,2)) == NULL) return NULL;
1973 str = skip_comment(str);
1974 if (*str!=':') return NULL;
1975 ++str;
1976 str = skip_comment(str);
1977 if ((str = parse_number(str,m,2)) == NULL) return NULL;
1978 c = skip_comment(str);
1979 if (*str == ':')
1980   {
1981   ++str;
1982   str = skip_comment(str);
1983   if ((str = parse_number(str,s,2)) == NULL) return NULL;
1984   c = skip_comment(str);
1985   }
1986 if (c == str) return NULL;
1987 else str=c;
1988 if (*str == '+' || *str == '-')
1989   {
1990   int neg;
1991
1992   neg = (*str == '-');
1993   ++str;
1994   if ((str = parse_number(str,z,4)) == NULL) return NULL;
1995   *z = (*z/100)*3600+(*z%100)*60;
1996   if (neg) *z = -*z;
1997   }
1998 else
1999   {
2000   char zone[5];
2001   struct { const char *name; int off; } zone_name[10] =
2002   { {"gmt",0}, {"ut",0}, {"est",-5}, {"edt",-4}, {"cst",-6}, {"cdt",-5}, {"mst",-7}, {"mdt",-6}, {"pst",-8}, {"pdt",-7}};
2003   int i,j;
2004
2005   for (i = 0; i<4; ++i)
2006     {
2007     zone[i] = tolower(*(str+i));
2008     if (zone[i]<'a' || zone[i]>'z') break;
2009     }
2010   zone[i] = '\0';
2011   for (j = 0; j<10 && strcmp(zone,zone_name[j].name); ++j);
2012   /* Besides zones named in the grammar, RFC 2822 says other alphabetic */
2013   /* time zones should be treated as unknown offsets. */
2014   if (j<10)
2015     {
2016     *z = zone_name[j].off*3600;
2017     str+=i;
2018     }
2019   else if (zone[0]<'a' || zone[1]>'z') return 0;
2020   else
2021     {
2022     while ((*str>='a' && *str<='z') || (*str>='A' && *str<='Z')) ++str;
2023     *z = 0;
2024     }
2025   }
2026 return str;
2027 }
2028
2029
2030 /*************************************************
2031 *          Parse a RFC 2822 date-time            *
2032 *************************************************/
2033
2034 /* Parse a RFC 2822 date-time and return it in seconds since the epoch.
2035
2036 Arguments:
2037   str          pointer to the start of the date-time
2038   t            pointer to the parsed time
2039
2040 Returns:       points after the processed date-time or NULL on error
2041 */
2042
2043 const uschar *
2044 parse_date_time(const uschar *str, time_t *t)
2045 {
2046 /*
2047 date-time       =       [ day-of-week "," ] date FWS time [CFWS]
2048 */
2049
2050 struct tm tm;
2051 int zone;
2052 extern char **environ;
2053 char **old_environ;
2054 static char gmt0[]="TZ=GMT0";
2055 static char *gmt_env[]={ gmt0, (char*)0 };
2056 const uschar * try;
2057
2058 if ((try = parse_day_of_week(str)))
2059   {
2060   str = try;
2061   if (*str!=',') return 0;
2062   ++str;
2063   }
2064 if ((str = parse_date(str,&tm.tm_mday,&tm.tm_mon,&tm.tm_year)) == NULL) return NULL;
2065 if (*str!=' ' && *str!='\t') return NULL;
2066 while (*str == ' ' || *str == '\t') ++str;
2067 if ((str = parse_time(str,&tm.tm_hour,&tm.tm_min,&tm.tm_sec,&zone)) == NULL) return NULL;
2068 tm.tm_isdst = 0;
2069 old_environ = environ;
2070 environ = gmt_env;
2071 *t = mktime(&tm);
2072 environ = old_environ;
2073 if (*t == -1) return NULL;
2074 *t-=zone;
2075 return skip_comment(str);
2076 }
2077
2078
2079
2080
2081 /*************************************************
2082 **************************************************
2083 *             Stand-alone test program           *
2084 **************************************************
2085 *************************************************/
2086
2087 #if defined STAND_ALONE
2088 int main(void)
2089 {
2090 int start, end, domain;
2091 uschar buffer[1024];
2092
2093 store_init();
2094 big_buffer = store_malloc(big_buffer_size);
2095
2096 /* strip_trailing_dot = TRUE; */
2097 allow_domain_literals = TRUE;
2098
2099 printf("Testing parse_fix_phrase\n");
2100
2101 while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
2102   {
2103   buffer[Ustrlen(buffer)-1] = 0;
2104   if (buffer[0] == 0) break;
2105   printf("%s\n", CS parse_fix_phrase(buffer, Ustrlen(buffer)));
2106   }
2107
2108 printf("Testing parse_extract_address without group syntax and without UTF-8\n");
2109
2110 while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
2111   {
2112   uschar *out;
2113   uschar *errmess;
2114   buffer[Ustrlen(buffer) - 1] = 0;
2115   if (buffer[0] == 0) break;
2116   out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
2117   if (!out)
2118     printf("*** bad address: %s\n", errmess);
2119   else
2120     {
2121     uschar extract[1024];
2122     Ustrncpy(extract, buffer+start, end-start);
2123     extract[end-start] = 0;
2124     printf("%s %d %d %d \"%s\"\n", out, start, end, domain, extract);
2125     }
2126   }
2127
2128 printf("Testing parse_extract_address without group syntax but with UTF-8\n");
2129
2130 allow_utf8_domains = TRUE;
2131 while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
2132   {
2133   uschar *out;
2134   uschar *errmess;
2135   buffer[Ustrlen(buffer) - 1] = 0;
2136   if (buffer[0] == 0) break;
2137   out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
2138   if (!out)
2139     printf("*** bad address: %s\n", errmess);
2140   else
2141     {
2142     uschar extract[1024];
2143     Ustrncpy(extract, buffer+start, end-start);
2144     extract[end-start] = 0;
2145     printf("%s %d %d %d \"%s\"\n", out, start, end, domain, extract);
2146     }
2147   }
2148 allow_utf8_domains = FALSE;
2149
2150 printf("Testing parse_extract_address with group syntax\n");
2151
2152 f.parse_allow_group = TRUE;
2153 while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
2154   {
2155   uschar *out;
2156   uschar *errmess;
2157   uschar *s;
2158   buffer[Ustrlen(buffer) - 1] = 0;
2159   if (buffer[0] == 0) break;
2160   s = buffer;
2161   while (*s)
2162     {
2163     uschar *ss = parse_find_address_end(s, FALSE);
2164     int terminator = *ss;
2165     *ss = 0;
2166     out = parse_extract_address(buffer, &errmess, &start, &end, &domain, FALSE);
2167     *ss = terminator;
2168
2169     if (!out)
2170       printf("*** bad address: %s\n", errmess);
2171     else
2172       {
2173       uschar extract[1024];
2174       Ustrncpy(extract, buffer+start, end-start);
2175       extract[end-start] = 0;
2176       printf("%s %d %d %d \"%s\"\n", out, start, end, domain, extract);
2177       }
2178
2179     s = ss + (terminator? 1:0);
2180     Uskip_whitespace(&s);
2181     }
2182   }
2183
2184 printf("Testing parse_find_at\n");
2185
2186 while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
2187   {
2188   uschar *s;
2189   buffer[Ustrlen(buffer)-1] = 0;
2190   if (buffer[0] == 0) break;
2191   s = parse_find_at(buffer);
2192   if (s == NULL) printf("no @ found\n");
2193     else printf("offset = %d\n", s - buffer);
2194   }
2195
2196 printf("Testing parse_extract_addresses\n");
2197
2198 while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
2199   {
2200   uschar *errmess;
2201   int extracted;
2202   address_item *anchor = NULL;
2203   buffer[Ustrlen(buffer) - 1] = 0;
2204   if (buffer[0] == 0) break;
2205   if ((extracted = parse_forward_list(buffer, -1, &anchor,
2206       &errmess, US"incoming.domain", NULL, NULL)) == FF_DELIVERED)
2207     {
2208     while (anchor != NULL)
2209       {
2210       address_item *addr = anchor;
2211       anchor = anchor->next;
2212       printf("%d %s\n", testflag(addr, af_pfr), addr->address);
2213       }
2214     }
2215   else printf("Failed: %d %s\n", extracted, errmess);
2216   }
2217
2218 printf("Testing parse_message_id\n");
2219
2220 while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
2221   {
2222   uschar *s, *t, *errmess;
2223   buffer[Ustrlen(buffer) - 1] = 0;
2224   if (buffer[0] == 0) break;
2225   s = buffer;
2226   while (*s != 0)
2227     {
2228     s = parse_message_id(s, &t, &errmess);
2229     if (errmess != NULL)
2230       {
2231       printf("Failed: %s\n", errmess);
2232       break;
2233       }
2234     printf("%s\n", t);
2235     }
2236   }
2237
2238 return 0;
2239 }
2240
2241 #endif
2242
2243 /* End of parse.c */