src/src/string.c

   1 /* $Cambridge: exim/src/src/string.c,v 1.4 2005/06/07 15:20:56 ph10 Exp $ */
   2
   3 /*************************************************
   4 *     Exim - an Internet mail transport agent    *
   5 *************************************************/
   6
   7 /* Copyright (c) University of Cambridge 1995 - 2005 */
   8 /* See the file NOTICE for conditions of use and distribution. */
   9
  10 /* Miscellaneous string-handling functions. Some are not required for
  11 utilities and tests, and are cut out by the COMPILE_UTILITY macro. */
  12
  13
  14 #include "exim.h"
  15
  16
  17 #ifndef COMPILE_UTILITY
  18 /*************************************************
  19 *            Test for IP address                 *
  20 *************************************************/
  21
  22 /* This used just to be a regular expression, but with IPv6 things are a bit
  23 more complicated. If the address contains a colon, it is assumed to be a v6
  24 address (assuming HAVE_IPV6 is set). If a mask is permitted and one is present,
  25 and maskptr is not NULL, its offset is placed there.
  26
  27 Arguments:
  28   s         a string
  29   maskptr   NULL if no mask is permitted to follow
  30             otherwise, points to an int where the offset of '/' is placed
  31
  32 Returns:    0 if the string is not a textual representation of an IP address
  33             4 if it is an IPv4 address
  34             6 if it is an IPv6 address
  35 */
  36
  37 int
  38 string_is_ip_address(uschar *s, int *maskptr)
  39 {
  40 int i;
  41 int yield = 4;
  42
  43 /* If an optional mask is permitted, check for it. If found, pass back the
  44 offset. */
  45
  46 if (maskptr != NULL)
  47   {
  48   uschar *ss = s + Ustrlen(s);
  49   *maskptr = 0;
  50   if (s != ss && isdigit(*(--ss)))
  51     {
  52     while (ss > s && isdigit(ss[-1])) ss--;
  53     if (ss > s && *(--ss) == '/') *maskptr = ss - s;
  54     }
  55   }
  56
  57 /* A colon anywhere in the string => IPv6 address */
  58
  59 if (Ustrchr(s, ':') != NULL)
  60   {
  61   BOOL had_double_colon = FALSE;
  62   BOOL v4end = FALSE;
  63   int count = 0;
  64
  65   yield = 6;
  66
  67   /* An IPv6 address must start with hex digit or double colon. A single
  68   colon is invalid. */
  69
  70   if (*s == ':' && *(++s) != ':') return 0;
  71
  72   /* Now read up to 8 components consisting of up to 4 hex digits each. There
  73   may be one and only one appearance of double colon, which implies any number
  74   of binary zero bits. The number of preceding components is held in count. */
  75
  76   for (count = 0; count < 8; count++)
  77     {
  78     /* If the end of the string is reached before reading 8 components, the
  79     address is valid provided a double colon has been read. This also applies
  80     if we hit the / that introduces a mask or the % that introduces the
  81     interface specifier (scope id) of a link-local address. */
  82
  83     if (*s == 0 || *s == '%' || *s == '/') return had_double_colon? yield : 0;
  84
  85     /* If a component starts with an additional colon, we have hit a double
  86     colon. This is permitted to appear once only, and counts as at least
  87     one component. The final component may be of this form. */
  88
  89     if (*s == ':')
  90       {
  91       if (had_double_colon) return 0;
  92       had_double_colon = TRUE;
  93       s++;
  94       continue;
  95       }
  96
  97     /* If the remainder of the string contains a dot but no colons, we
  98     can expect a trailing IPv4 address. This is valid if either there has
  99     been no double-colon and this is the 7th component (with the IPv4 address
 100     being the 7th & 8th components), OR if there has been a double-colon
 101     and fewer than 6 components. */
 102
 103     if (Ustrchr(s, ':') == NULL && Ustrchr(s, '.') != NULL)
 104       {
 105       if ((!had_double_colon && count != 6) ||
 106           (had_double_colon && count > 6)) return 0;
 107       v4end = TRUE;
 108       yield = 6;
 109       break;
 110       }
 111
 112     /* Check for at least one and not more than 4 hex digits for this
 113     component. */
 114
 115     if (!isxdigit(*s++)) return 0;
 116     if (isxdigit(*s) && isxdigit(*(++s)) && isxdigit(*(++s))) s++;
 117
 118     /* If the component is terminated by colon and there is more to
 119     follow, skip over the colon. If there is no more to follow the address is
 120     invalid. */
 121
 122     if (*s == ':' && *(++s) == 0) return 0;
 123     }
 124
 125   /* If about to handle a trailing IPv4 address, drop through. Otherwise
 126   all is well if we are at the end of the string or at the mask or at a percent
 127   sign, which introduces the interface specifier (scope id) of a link local
 128   address. */
 129
 130   if (!v4end) return (*s == 0 || *s == '%' || *s == '/')? yield : 0;
 131   }
 132
 133 /* Test for IPv4 address, which may be the tail-end of an IPv6 address. */
 134
 135 for (i = 0; i < 4; i++)
 136   {
 137   if (i != 0 && *s++ != '.') return 0;
 138   if (!isdigit(*s++)) return 0;
 139   if (isdigit(*s) && isdigit(*(++s))) s++;
 140   }
 141
 142 return (*s == 0 || *s == '/')? yield : 0;
 143 }
 144 #endif  /* COMPILE_UTILITY */
 145
 146
 147 /*************************************************
 148 *              Format message size               *
 149 *************************************************/
 150
 151 /* Convert a message size in bytes to printing form, rounding
 152 according to the magnitude of the number. A value of zero causes
 153 a string of spaces to be returned.
 154
 155 Arguments:
 156   size        the message size in bytes
 157   buffer      where to put the answer
 158
 159 Returns:      pointer to the buffer
 160               a string of exactly 5 characters is normally returned
 161 */
 162
 163 uschar *
 164 string_format_size(int size, uschar *buffer)
 165 {
 166 if (size == 0) Ustrcpy(CS buffer, "     ");
 167 else if (size < 1024) sprintf(CS buffer, "%5d", size);
 168 else if (size < 10*1024)
 169   sprintf(CS buffer, "%4.1fK", (double)size / 1024.0);
 170 else if (size < 1024*1024)
 171   sprintf(CS buffer, "%4dK", (size + 512)/1024);
 172 else if (size < 10*1024*1024)
 173   sprintf(CS buffer, "%4.1fM", (double)size / (1024.0 * 1024.0));
 174 else
 175   sprintf(CS buffer, "%4dM", (size + 512 * 1024)/(1024*1024));
 176 return buffer;
 177 }
 178
 179
 180
 181 #ifndef COMPILE_UTILITY
 182 /*************************************************
 183 *       Convert a number to base 62 format       *
 184 *************************************************/
 185
 186 /* Convert a long integer into an ASCII base 62 string. For Cygwin the value of
 187 BASE_62 is actually 36. Always return exactly 6 characters plus zero, in a
 188 static area.
 189
 190 Argument: a long integer
 191 Returns:  pointer to base 62 string
 192 */
 193
 194 uschar *
 195 string_base62(unsigned long int value)
 196 {
 197 static uschar yield[7];
 198 uschar *p = yield + sizeof(yield) - 1;
 199 *p = 0;
 200 while (p > yield)
 201   {
 202   *(--p) = base62_chars[value % BASE_62];
 203   value /= BASE_62;
 204   }
 205 return yield;
 206 }
 207 #endif  /* COMPILE_UTILITY */
 208
 209
 210
 211 #ifndef COMPILE_UTILITY
 212 /*************************************************
 213 *          Interpret escape sequence             *
 214 *************************************************/
 215
 216 /* This function is called from several places where escape sequences are to be
 217 interpreted in strings.
 218
 219 Arguments:
 220   pp       points a pointer to the initiating "\" in the string;
 221            the pointer gets updated to point to the final character
 222 Returns:   the value of the character escape
 223 */
 224
 225 int
 226 string_interpret_escape(uschar **pp)
 227 {
 228 int ch;
 229 uschar *p = *pp;
 230 ch = *(++p);
 231 if (isdigit(ch) && ch != '8' && ch != '9')
 232   {
 233   ch -= '0';
 234   if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
 235     {
 236     ch = ch * 8 + *(++p) - '0';
 237     if (isdigit(p[1]) && p[1] != '8' && p[1] != '9')
 238       ch = ch * 8 + *(++p) - '0';
 239     }
 240   }
 241 else switch(ch)
 242   {
 243   case 'n':  ch = '\n'; break;
 244   case 'r':  ch = '\r'; break;
 245   case 't':  ch = '\t'; break;
 246   case 'x':
 247   ch = 0;
 248   if (isxdigit(p[1]))
 249     {
 250     ch = ch * 16 +
 251       Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
 252     if (isxdigit(p[1])) ch = ch * 16 +
 253       Ustrchr(hex_digits, tolower(*(++p))) - hex_digits;
 254     }
 255   break;
 256   }
 257 *pp = p;
 258 return ch;
 259 }
 260 #endif  /* COMPILE_UTILITY */
 261
 262
 263
 264 #ifndef COMPILE_UTILITY
 265 /*************************************************
 266 *          Ensure string is printable            *
 267 *************************************************/
 268
 269 /* This function is called for critical strings. It checks for any
 270 non-printing characters, and if any are found, it makes a new copy
 271 of the string with suitable escape sequences. It is most often called by the
 272 macro string_printing(), which sets allow_tab TRUE.
 273
 274 Arguments:
 275   s             the input string
 276   allow_tab     TRUE to allow tab as a printing character
 277
 278 Returns:        string with non-printers encoded as printing sequences
 279 */
 280
 281 uschar *
 282 string_printing2(uschar *s, BOOL allow_tab)
 283 {
 284 int nonprintcount = 0;
 285 int length = 0;
 286 uschar *t = s;
 287 uschar *ss, *tt;
 288
 289 while (*t != 0)
 290   {
 291   int c = *t++;
 292   if (!mac_isprint(c) || (!allow_tab && c == '\t')) nonprintcount++;
 293   length++;
 294   }
 295
 296 if (nonprintcount == 0) return s;
 297
 298 /* Get a new block of store guaranteed big enough to hold the
 299 expanded string. */
 300
 301 ss = store_get(length + nonprintcount * 4 + 1);
 302
 303 /* Copy everying, escaping non printers. */
 304
 305 t = s;
 306 tt = ss;
 307
 308 while (*t != 0)
 309   {
 310   int c = *t;
 311   if (mac_isprint(c) && (allow_tab || c != '\t')) *tt++ = *t++; else
 312     {
 313     *tt++ = '\\';
 314     switch (*t)
 315       {
 316       case '\n': *tt++ = 'n'; break;
 317       case '\r': *tt++ = 'r'; break;
 318       case '\b': *tt++ = 'b'; break;
 319       case '\v': *tt++ = 'v'; break;
 320       case '\f': *tt++ = 'f'; break;
 321       case '\t': *tt++ = 't'; break;
 322       default: sprintf(CS tt, "%03o", *t); tt += 3; break;
 323       }
 324     t++;
 325     }
 326   }
 327 *tt = 0;
 328 return ss;
 329 }
 330 #endif  /* COMPILE_UTILITY */
 331
 332
 333
 334
 335 /*************************************************
 336 *            Copy and save string                *
 337 *************************************************/
 338
 339 /* This function assumes that memcpy() is faster than strcpy().
 340
 341 Argument: string to copy
 342 Returns:  copy of string in new store
 343 */
 344
 345 uschar *
 346 string_copy(uschar *s)
 347 {
 348 int len = Ustrlen(s) + 1;
 349 uschar *ss = store_get(len);
 350 memcpy(ss, s, len);
 351 return ss;
 352 }
 353
 354
 355
 356 /*************************************************
 357 *     Copy and save string in malloc'd store     *
 358 *************************************************/
 359
 360 /* This function assumes that memcpy() is faster than strcpy().
 361
 362 Argument: string to copy
 363 Returns:  copy of string in new store
 364 */
 365
 366 uschar *
 367 string_copy_malloc(uschar *s)
 368 {
 369 int len = Ustrlen(s) + 1;
 370 uschar *ss = store_malloc(len);
 371 memcpy(ss, s, len);
 372 return ss;
 373 }
 374
 375
 376
 377 /*************************************************
 378 *       Copy, lowercase and save string          *
 379 *************************************************/
 380
 381 /*
 382 Argument: string to copy
 383 Returns:  copy of string in new store, with letters lowercased
 384 */
 385
 386 uschar *
 387 string_copylc(uschar *s)
 388 {
 389 uschar *ss = store_get(Ustrlen(s) + 1);
 390 uschar *p = ss;
 391 while (*s != 0) *p++ = tolower(*s++);
 392 *p = 0;
 393 return ss;
 394 }
 395
 396
 397
 398 /*************************************************
 399 *       Copy and save string, given length       *
 400 *************************************************/
 401
 402 /* It is assumed the data contains no zeros. A zero is added
 403 onto the end.
 404
 405 Arguments:
 406   s         string to copy
 407   n         number of characters
 408
 409 Returns:    copy of string in new store
 410 */
 411
 412 uschar *
 413 string_copyn(uschar *s, int n)
 414 {
 415 uschar *ss = store_get(n + 1);
 416 Ustrncpy(ss, s, n);
 417 ss[n] = 0;
 418 return ss;
 419 }
 420
 421
 422 /*************************************************
 423 * Copy, lowercase, and save string, given length *
 424 *************************************************/
 425
 426 /* It is assumed the data contains no zeros. A zero is added
 427 onto the end.
 428
 429 Arguments:
 430   s         string to copy
 431   n         number of characters
 432
 433 Returns:    copy of string in new store, with letters lowercased
 434 */
 435
 436 uschar *
 437 string_copynlc(uschar *s, int n)
 438 {
 439 uschar *ss = store_get(n + 1);
 440 uschar *p = ss;
 441 while (n-- > 0) *p++ = tolower(*s++);
 442 *p = 0;
 443 return ss;
 444 }
 445
 446
 447
 448 /*************************************************
 449 *   Copy returned DNS domain name, de-escaping   *
 450 *************************************************/
 451
 452 /* If a domain name contains top-bit characters, some resolvers return
 453 the fully qualified name with those characters turned into escapes. The
 454 convention is a backslash followed by _decimal_ digits. We convert these
 455 back into the original binary values. This will be relevant when
 456 allow_utf8_domains is set true and UTF-8 characters are used in domain
 457 names. Backslash can also be used to escape other characters, though we
 458 shouldn't come across them in domain names.
 459
 460 Argument:   the domain name string
 461 Returns:    copy of string in new store, de-escaped
 462 */
 463
 464 uschar *
 465 string_copy_dnsdomain(uschar *s)
 466 {
 467 uschar *yield;
 468 uschar *ss = yield = store_get(Ustrlen(s) + 1);
 469
 470 while (*s != 0)
 471   {
 472   if (*s != '\\')
 473     {
 474     *ss++ = *s++;
 475     }
 476   else if (isdigit(s[1]))
 477     {
 478     *ss++ = (s[1] - '0')*100 + (s[2] - '0')*10 + s[3] - '0';
 479     s += 4;
 480     }
 481   else if (*(++s) != 0)
 482     {
 483     *ss++ = *s++;
 484     }
 485   }
 486
 487 *ss = 0;
 488 return yield;
 489 }
 490
 491
 492 #ifndef COMPILE_UTILITY
 493 /*************************************************
 494 *     Copy space-terminated or quoted string     *
 495 *************************************************/
 496
 497 /* This function copies from a string until its end, or until whitespace is
 498 encountered, unless the string begins with a double quote, in which case the
 499 terminating quote is sought, and escaping within the string is done. The length
 500 of a de-quoted string can be no longer than the original, since escaping always
 501 turns n characters into 1 character.
 502
 503 Argument:  pointer to the pointer to the first character, which gets updated
 504 Returns:   the new string
 505 */
 506
 507 uschar *
 508 string_dequote(uschar **sptr)
 509 {
 510 uschar *s = *sptr;
 511 uschar *t, *yield;
 512
 513 /* First find the end of the string */
 514
 515 if (*s != '\"')
 516   {
 517   while (*s != 0 && !isspace(*s)) s++;
 518   }
 519 else
 520   {
 521   s++;
 522   while (*s != 0 && *s != '\"')
 523     {
 524     if (*s == '\\') (void)string_interpret_escape(&s);
 525     s++;
 526     }
 527   if (*s != 0) s++;
 528   }
 529
 530 /* Get enough store to copy into */
 531
 532 t = yield = store_get(s - *sptr + 1);
 533 s = *sptr;
 534
 535 /* Do the copy */
 536
 537 if (*s != '\"')
 538   {
 539   while (*s != 0 && !isspace(*s)) *t++ = *s++;
 540   }
 541 else
 542   {
 543   s++;
 544   while (*s != 0 && *s != '\"')
 545     {
 546     if (*s == '\\') *t++ = string_interpret_escape(&s);
 547       else *t++ = *s;
 548     s++;
 549     }
 550   if (*s != 0) s++;
 551   }
 552
 553 /* Update the pointer and return the terminated copy */
 554
 555 *sptr = s;
 556 *t = 0;
 557 return yield;
 558 }
 559 #endif  /* COMPILE_UTILITY */
 560
 561
 562
 563 /*************************************************
 564 *          Format a string and save it           *
 565 *************************************************/
 566
 567 /* The formatting is done by string_format, which checks the length of
 568 everything.
 569
 570 Arguments:
 571   format    a printf() format - deliberately char * rather than uschar *
 572               because it will most usually be a literal string
 573   ...       arguments for format
 574
 575 Returns:    pointer to fresh piece of store containing sprintf'ed string
 576 */
 577
 578 uschar *
 579 string_sprintf(char *format, ...)
 580 {
 581 va_list ap;
 582 uschar buffer[STRING_SPRINTF_BUFFER_SIZE];
 583 va_start(ap, format);
 584 if (!string_vformat(buffer, sizeof(buffer), format, ap))
 585   log_write(0, LOG_MAIN|LOG_PANIC_DIE,
 586     "string_sprintf expansion was longer than %d", sizeof(buffer));
 587 va_end(ap);
 588 return string_copy(buffer);
 589 }
 590
 591
 592
 593 /*************************************************
 594 *         Case-independent strncmp() function    *
 595 *************************************************/
 596
 597 /*
 598 Arguments:
 599   s         first string
 600   t         second string
 601   n         number of characters to compare
 602
 603 Returns:    < 0, = 0, or > 0, according to the comparison
 604 */
 605
 606 int
 607 strncmpic(uschar *s, uschar *t, int n)
 608 {
 609 while (n--)
 610   {
 611   int c = tolower(*s++) - tolower(*t++);
 612   if (c) return c;
 613   }
 614 return 0;
 615 }
 616
 617
 618 /*************************************************
 619 *         Case-independent strcmp() function     *
 620 *************************************************/
 621
 622 /*
 623 Arguments:
 624   s         first string
 625   t         second string
 626
 627 Returns:    < 0, = 0, or > 0, according to the comparison
 628 */
 629
 630 int
 631 strcmpic(uschar *s, uschar *t)
 632 {
 633 while (*s != 0)
 634   {
 635   int c = tolower(*s++) - tolower(*t++);
 636   if (c != 0) return c;
 637   }
 638 return *t;
 639 }
 640
 641
 642 /*************************************************
 643 *         Case-independent strstr() function     *
 644 *************************************************/
 645
 646 /* The third argument specifies whether whitespace is required
 647 to follow the matched string.
 648
 649 Arguments:
 650   s              string to search
 651   t              substring to search for
 652   space_follows  if TRUE, match only if whitespace follows
 653
 654 Returns:         pointer to substring in string, or NULL if not found
 655 */
 656
 657 uschar *
 658 strstric(uschar *s, uschar *t, BOOL space_follows)
 659 {
 660 uschar *p = t;
 661 uschar *yield = NULL;
 662 int cl = tolower(*p);
 663 int cu = toupper(*p);
 664
 665 while (*s)
 666   {
 667   if (*s == cl || *s == cu)
 668     {
 669     if (yield == NULL) yield = s;
 670     if (*(++p) == 0)
 671       {
 672       if (!space_follows || s[1] == ' ' || s[1] == '\n' ) return yield;
 673       yield = NULL;
 674       p = t;
 675       }
 676     cl = tolower(*p);
 677     cu = toupper(*p);
 678     s++;
 679     }
 680   else if (yield != NULL)
 681     {
 682     yield = NULL;
 683     p = t;
 684     cl = tolower(*p);
 685     cu = toupper(*p);
 686     }
 687   else s++;
 688   }
 689 return NULL;
 690 }
 691
 692
 693
 694 #ifndef COMPILE_UTILITY
 695 /*************************************************
 696 *       Get next string from separated list      *
 697 *************************************************/
 698
 699 /* Leading and trailing space is removed from each item. The separator in the
 700 list is controlled by the int pointed to by the separator argument as follows:
 701
 702   If its value is > 0 it is used as the delimiter.
 703     (If its value is actually > UCHAR_MAX there is only one item in the list.
 704     This is used for some cases when called via functions that sometimes
 705     plough through lists, and sometimes are given single items.)
 706   If its value is <= 0, the string is inspected for a leading <x, where
 707     x is an ispunct() value. If found, it is used as the delimiter. If not
 708     found: (a) if separator == 0, ':' is used
 709            (b) if separator <0, then -separator is used
 710     In all cases the value of the separator that is used is written back to
 711       the int so that it is used on subsequent calls as we progress through
 712       the list.
 713
 714 The separator can always be represented in the string by doubling.
 715
 716 Arguments:
 717   listptr    points to a pointer to the current start of the list; the
 718              pointer gets updated to point after the end of the next item
 719   separator  a pointer to the separator character in an int (see above)
 720   buffer     where to put a copy of the next string in the list; or
 721                NULL if the next string is returned in new memory
 722   buflen     when buffer is not NULL, the size of buffer; otherwise ignored
 723
 724 Returns:     pointer to buffer, containing the next substring,
 725              or NULL if no more substrings
 726 */
 727
 728 uschar *
 729 string_nextinlist(uschar **listptr, int *separator, uschar *buffer, int buflen)
 730 {
 731 register int p = 0;
 732 register int sep = *separator;
 733 register uschar *s = *listptr;
 734
 735 if (s == NULL) return NULL;
 736 while (isspace(*s)) s++;
 737
 738 if (sep <= 0)
 739   {
 740   if (*s == '<' && ispunct(s[1]))
 741     {
 742     sep = s[1];
 743     s += 2;
 744     while (isspace(*s)) s++;
 745     }
 746   else
 747     {
 748     sep = (sep == 0)? ':' : -sep;
 749     }
 750   *separator = sep;
 751   }
 752
 753 if (*s == 0) return NULL;
 754
 755 /* Handle the case when a buffer is provided. */
 756
 757 if (buffer != NULL)
 758   {
 759   for (; *s != 0; s++)
 760     {
 761     if (*s == sep && *(++s) != sep) break;
 762     if (p < buflen - 1) buffer[p++] = *s;
 763     }
 764   while (p > 0 && isspace(buffer[p-1])) p--;
 765   buffer[p] = 0;
 766   }
 767
 768 /* Handle the case when a buffer is not provided. */
 769
 770 else
 771   {
 772   /* We know that *s != 0 at this point. However, it might be pointing to a
 773   separator, which could indicate an empty string, or could be doubled to
 774   indicate a separator character as data at the start of a string. */
 775
 776   if (*s == sep)
 777     {
 778     s++;
 779     if (*s != sep) buffer = string_copy(US"");
 780     }
 781
 782   if (buffer == NULL)
 783     {
 784     int size = 0;
 785     int ptr = 0;
 786     uschar *ss;
 787     for (;;)
 788       {
 789       for (ss = s + 1; *ss != 0 && *ss != sep; ss++);
 790       buffer = string_cat(buffer, &size, &ptr, s, ss-s);
 791       s = ss;
 792       if (*s == 0 || *(++s) != sep) break;
 793       }
 794     while (ptr > 0 && isspace(buffer[ptr-1])) ptr--;
 795     buffer[ptr] = 0;
 796     }
 797   }
 798
 799 /* Update the current pointer and return the new string */
 800
 801 *listptr = s;
 802 return buffer;
 803 }
 804 #endif  /* COMPILE_UTILITY */
 805
 806
 807
 808 #ifndef COMPILE_UTILITY
 809 /*************************************************
 810 *             Add chars to string                *
 811 *************************************************/
 812
 813 /* This function is used when building up strings of unknown length. Room is
 814 always left for a terminating zero to be added to the string that is being
 815 built. This function does not require the string that is being added to be NUL
 816 terminated, because the number of characters to add is given explicitly. It is
 817 sometimes called to extract parts of other strings.
 818
 819 Arguments:
 820   string   points to the start of the string that is being built, or NULL
 821              if this is a new string that has no contents yet
 822   size     points to a variable that holds the current capacity of the memory
 823              block (updated if changed)
 824   ptr      points to a variable that holds the offset at which to add
 825              characters, updated to the new offset
 826   s        points to characters to add
 827   count    count of characters to add; must not exceed the length of s, if s
 828              is a C string
 829
 830 If string is given as NULL, *size and *ptr should both be zero.
 831
 832 Returns:   pointer to the start of the string, changed if copied for expansion.
 833            Note that a NUL is not added, though space is left for one. This is
 834            because string_cat() is often called multiple times to build up a
 835            string - there's no point adding the NUL till the end.
 836 */
 837
 838 uschar *
 839 string_cat(uschar *string, int *size, int *ptr, const uschar *s, int count)
 840 {
 841 int p = *ptr;
 842
 843 if (p + count >= *size)
 844   {
 845   int oldsize = *size;
 846
 847   /* Mostly, string_cat() is used to build small strings of a few hundred
 848   characters at most. There are times, however, when the strings are very much
 849   longer (for example, a lookup that returns a vast number of alias addresses).
 850   To try to keep things reasonable, we use increments whose size depends on the
 851   existing length of the string. */
 852
 853   int inc = (oldsize < 4096)? 100 : 1024;
 854   while (*size <= p + count) *size += inc;
 855
 856   /* New string */
 857
 858   if (string == NULL) string = store_get(*size);
 859
 860   /* Try to extend an existing allocation. If the result of calling
 861   store_extend() is false, either there isn't room in the current memory block,
 862   or this string is not the top item on the dynamic store stack. We then have
 863   to get a new chunk of store and copy the old string. When building large
 864   strings, it is helpful to call store_release() on the old string, to release
 865   memory blocks that have become empty. (The block will be freed if the string
 866   is at its start.) However, we can do this only if we know that the old string
 867   was the last item on the dynamic memory stack. This is the case if it matches
 868   store_last_get. */
 869
 870   else if (!store_extend(string, oldsize, *size))
 871     {
 872     BOOL release_ok = store_last_get[store_pool] == string;
 873     uschar *newstring = store_get(*size);
 874     memcpy(newstring, string, p);
 875     if (release_ok) store_release(string);
 876     string = newstring;
 877     }
 878   }
 879
 880 /* Because we always specify the exact number of characters to copy, we can
 881 use memcpy(), which is likely to be more efficient than strncopy() because the
 882 latter has to check for zero bytes. */
 883
 884 memcpy(string + p, s, count);
 885 *ptr = p + count;
 886 return string;
 887 }
 888 #endif  /* COMPILE_UTILITY */
 889
 890
 891
 892 #ifndef COMPILE_UTILITY
 893 /*************************************************
 894 *        Append strings to another string        *
 895 *************************************************/
 896
 897 /* This function can be used to build a string from many other strings.
 898 It calls string_cat() to do the dirty work.
 899
 900 Arguments:
 901   string   points to the start of the string that is being built, or NULL
 902              if this is a new string that has no contents yet
 903   size     points to a variable that holds the current capacity of the memory
 904              block (updated if changed)
 905   ptr      points to a variable that holds the offset at which to add
 906              characters, updated to the new offset
 907   count    the number of strings to append
 908   ...      "count" uschar* arguments, which must be valid zero-terminated
 909              C strings
 910
 911 Returns:   pointer to the start of the string, changed if copied for expansion.
 912            The string is not zero-terminated - see string_cat() above.
 913 */
 914
 915 uschar *
 916 string_append(uschar *string, int *size, int *ptr, int count, ...)
 917 {
 918 va_list ap;
 919 int i;
 920
 921 va_start(ap, count);
 922 for (i = 0; i < count; i++)
 923   {
 924   uschar *t = va_arg(ap, uschar *);
 925   string = string_cat(string, size, ptr, t, Ustrlen(t));
 926   }
 927 va_end(ap);
 928
 929 return string;
 930 }
 931 #endif
 932
 933
 934
 935 /*************************************************
 936 *        Format a string with length checks      *
 937 *************************************************/
 938
 939 /* This function is used to format a string with checking of the length of the
 940 output for all conversions. It protects Exim from absent-mindedness when
 941 calling functions like debug_printf and string_sprintf, and elsewhere. There
 942 are two different entry points to what is actually the same function, depending
 943 on whether the variable length list of data arguments are given explicitly or
 944 as a va_list item.
 945
 946 The formats are the usual printf() ones, with some omissions (never used) and
 947 two additions for strings: %S forces lower case, and %#s or %#S prints nothing
 948 for a NULL string. Without the # "NULL" is printed (useful in debugging). There
 949 is also the addition of %D, which inserts the date in the form used for
 950 datestamped log files.
 951
 952 Arguments:
 953   buffer       a buffer in which to put the formatted string
 954   buflen       the length of the buffer
 955   format       the format string - deliberately char * and not uschar *
 956   ... or ap    variable list of supplementary arguments
 957
 958 Returns:       TRUE if the result fitted in the buffer
 959 */
 960
 961 BOOL
 962 string_format(uschar *buffer, int buflen, char *format, ...)
 963 {
 964 BOOL yield;
 965 va_list ap;
 966 va_start(ap, format);
 967 yield = string_vformat(buffer, buflen, format, ap);
 968 va_end(ap);
 969 return yield;
 970 }
 971
 972
 973 BOOL
 974 string_vformat(uschar *buffer, int buflen, char *format, va_list ap)
 975 {
 976 BOOL yield = TRUE;
 977 int width, precision;
 978 char *fp = format;             /* Deliberately not unsigned */
 979 uschar *p = buffer;
 980 uschar *last = buffer + buflen - 1;
 981
 982 string_datestamp_offset = -1;  /* Datestamp not inserted */
 983
 984 /* Scan the format and handle the insertions */
 985
 986 while (*fp != 0)
 987   {
 988   int *nptr;
 989   int slen;
 990   char *null = "NULL";         /* ) These variables */
 991   char *item_start, *s;        /* ) are deliberately */
 992   char newformat[16];          /* ) not unsigned */
 993
 994   /* Non-% characters just get copied verbatim */
 995
 996   if (*fp != '%')
 997     {
 998     if (p >= last) { yield = FALSE; break; }
 999     *p++ = (uschar)*fp++;
1000     continue;
1001     }
1002
1003   /* Deal with % characters. Pick off the width and precision, for checking
1004   strings, skipping over the flag and modifier characters. */
1005
1006   item_start = fp;
1007   width = precision = -1;
1008
1009   if (strchr("-+ #0", *(++fp)) != NULL)
1010     {
1011     if (*fp == '#') null = "";
1012     fp++;
1013     }
1014
1015   if (isdigit((uschar)*fp))
1016     {
1017     width = *fp++ - '0';
1018     while (isdigit((uschar)*fp)) width = width * 10 + *fp++ - '0';
1019     }
1020   else if (*fp == '*')
1021     {
1022     width = va_arg(ap, int);
1023     fp++;
1024     }
1025
1026   if (*fp == '.')
1027     {
1028     if (*(++fp) == '*')
1029       {
1030       precision = va_arg(ap, int);
1031       fp++;
1032       }
1033     else
1034       {
1035       precision = 0;
1036       while (isdigit((uschar)*fp))
1037         precision = precision*10 + *fp++ - '0';
1038       }
1039     }
1040
1041   if (strchr("hlL", *fp) != NULL) fp++;
1042
1043   /* Handle each specific format type. */
1044
1045   switch (*fp++)
1046     {
1047     case 'n':
1048     nptr = va_arg(ap, int *);
1049     *nptr = p - buffer;
1050     break;
1051
1052     case 'd':
1053     case 'o':
1054     case 'u':
1055     case 'x':
1056     case 'X':
1057     if (p >= last - 12) { yield = FALSE; goto END_FORMAT; }
1058     strncpy(newformat, item_start, fp - item_start);
1059     newformat[fp - item_start] = 0;
1060     sprintf(CS p, newformat, va_arg(ap, int));
1061     while (*p) p++;
1062     break;
1063
1064     case 'p':
1065     if (p >= last - 24) { yield = FALSE; goto END_FORMAT; }
1066     strncpy(newformat, item_start, fp - item_start);
1067     newformat[fp - item_start] = 0;
1068     sprintf(CS p, newformat, va_arg(ap, void *));
1069     while (*p) p++;
1070     break;
1071
1072     /* %f format is inherently insecure if the numbers that it may be
1073     handed are unknown (e.g. 1e300). However, in Exim, %f is used for
1074     printing load averages, and these are actually stored as integers
1075     (load average * 1000) so the size of the numbers is constrained.
1076     It is also used for formatting sending rates, where the simplicity
1077     of the format prevents overflow. */
1078
1079     case 'f':
1080     case 'e':
1081     case 'E':
1082     case 'g':
1083     case 'G':
1084     if (precision < 0) precision = 6;
1085     if (p >= last - precision - 8) { yield = FALSE; goto END_FORMAT; }
1086     strncpy(newformat, item_start, fp - item_start);
1087     newformat[fp-item_start] = 0;
1088     sprintf(CS p, newformat, va_arg(ap, double));
1089     while (*p) p++;
1090     break;
1091
1092     /* String types */
1093
1094     case '%':
1095     if (p >= last) { yield = FALSE; goto END_FORMAT; }
1096     *p++ = '%';
1097     break;
1098
1099     case 'c':
1100     if (p >= last) { yield = FALSE; goto END_FORMAT; }
1101     *p++ = va_arg(ap, int);
1102     break;
1103
1104     case 'D':                   /* Insert datestamp for log file names */
1105     s = CS tod_stamp(tod_log_datestamp);
1106     string_datestamp_offset = p - buffer;   /* Passed back via global */
1107     goto INSERT_STRING;
1108
1109     case 's':
1110     case 'S':                   /* Forces *lower* case */
1111     s = va_arg(ap, char *);
1112
1113     INSERT_STRING:              /* Come to from %D above */
1114     if (s == NULL) s = null;
1115     slen = Ustrlen(s);
1116
1117     /* If the width is specified, check that there is a precision
1118     set; if not, set it to the width to prevent overruns of long
1119     strings. */
1120
1121     if (width >= 0)
1122       {
1123       if (precision < 0) precision = width;
1124       }
1125
1126     /* If a width is not specified and the precision is specified, set
1127     the width to the precision, or the string length if shorted. */
1128
1129     else if (precision >= 0)
1130       {
1131       width = (precision < slen)? precision : slen;
1132       }
1133
1134     /* If neither are specified, set them both to the string length. */
1135
1136     else width = precision = slen;
1137
1138     /* Check string space, and add the string to the buffer if ok. If
1139     not OK, add part of the string (debugging uses this to show as
1140     much as possible). */
1141
1142     if (p >= last - width)
1143       {
1144       yield = FALSE;
1145       width = precision = last - p - 1;
1146       }
1147     sprintf(CS p, "%*.*s", width, precision, s);
1148     if (fp[-1] == 'S')
1149       while (*p) { *p = tolower(*p); p++; }
1150     else
1151       while (*p) p++;
1152     if (!yield) goto END_FORMAT;
1153     break;
1154
1155     /* Some things are never used in Exim; also catches junk. */
1156
1157     default:
1158     strncpy(newformat, item_start, fp - item_start);
1159     newformat[fp-item_start] = 0;
1160     log_write(0, LOG_MAIN|LOG_PANIC_DIE, "string_format: unsupported type "
1161       "in \"%s\" in \"%s\"", newformat, format);
1162     break;
1163     }
1164   }
1165
1166 /* Ensure string is complete; return TRUE if got to the end of the format */
1167
1168 END_FORMAT:
1169
1170 *p = 0;
1171 return yield;
1172 }
1173
1174
1175
1176 #ifndef COMPILE_UTILITY
1177 /*************************************************
1178 *       Generate an "open failed" message        *
1179 *************************************************/
1180
1181 /* This function creates a message after failure to open a file. It includes a
1182 string supplied as data, adds the strerror() text, and if the failure was
1183 "Permission denied", reads and includes the euid and egid.
1184
1185 Arguments:
1186   eno           the value of errno after the failure
1187   format        a text format string - deliberately not uschar *
1188   ...           arguments for the format string
1189
1190 Returns:        a message, in dynamic store
1191 */
1192
1193 uschar *
1194 string_open_failed(int eno, char *format, ...)
1195 {
1196 va_list ap;
1197 uschar buffer[1024];
1198
1199 Ustrcpy(buffer, "failed to open ");
1200 va_start(ap, format);
1201
1202 /* Use the checked formatting routine to ensure that the buffer
1203 does not overflow. It should not, since this is called only for internally
1204 specified messages. If it does, the message just gets truncated, and there
1205 doesn't seem much we can do about that. */
1206
1207 (void)string_vformat(buffer+15, sizeof(buffer) - 15, format, ap);
1208
1209 return (eno == EACCES)?
1210   string_sprintf("%s: %s (euid=%ld egid=%ld)", buffer, strerror(eno),
1211     (long int)geteuid(), (long int)getegid()) :
1212   string_sprintf("%s: %s", buffer, strerror(eno));
1213 }
1214 #endif  /* COMPILE_UTILITY */
1215
1216
1217
1218 #ifndef COMPILE_UTILITY
1219 /*************************************************
1220 *        Generate local prt for logging          *
1221 *************************************************/
1222
1223 /* This function is a subroutine for use in string_log_address() below.
1224
1225 Arguments:
1226   addr        the address being logged
1227   yield       the current dynamic buffer pointer
1228   sizeptr     points to current size
1229   ptrptr      points to current insert pointer
1230
1231 Returns:      the new value of the buffer pointer
1232 */
1233
1234 static uschar *
1235 string_get_localpart(address_item *addr, uschar *yield, int *sizeptr,
1236   int *ptrptr)
1237 {
1238 if (testflag(addr, af_include_affixes) && addr->prefix != NULL)
1239   yield = string_cat(yield, sizeptr, ptrptr, addr->prefix,
1240     Ustrlen(addr->prefix));
1241 yield = string_cat(yield, sizeptr, ptrptr, addr->local_part,
1242   Ustrlen(addr->local_part));
1243 if (testflag(addr, af_include_affixes) && addr->suffix != NULL)
1244   yield = string_cat(yield, sizeptr, ptrptr, addr->suffix,
1245     Ustrlen(addr->suffix));
1246 return yield;
1247 }
1248
1249
1250 /*************************************************
1251 *          Generate log address list             *
1252 *************************************************/
1253
1254 /* This function generates a list consisting of an address and its parents, for
1255 use in logging lines. For saved onetime aliased addresses, the onetime parent
1256 field is used. If the address was delivered by a transport with rcpt_include_
1257 affixes set, the af_include_affixes bit will be set in the address. In that
1258 case, we include the affixes here too.
1259
1260 Arguments:
1261   addr          bottom (ultimate) address
1262   all_parents   if TRUE, include all parents
1263   success       TRUE for successful delivery
1264
1265 Returns:        a string in dynamic store
1266 */
1267
1268 uschar *
1269 string_log_address(address_item *addr, BOOL all_parents, BOOL success)
1270 {
1271 int size = 64;
1272 int ptr = 0;
1273 BOOL add_topaddr = TRUE;
1274 uschar *yield = store_get(size);
1275 address_item *topaddr;
1276
1277 /* Find the ultimate parent */
1278
1279 for (topaddr = addr; topaddr->parent != NULL; topaddr = topaddr->parent);
1280
1281 /* We start with just the local part for pipe, file, and reply deliveries, and
1282 for successful local deliveries from routers that have the log_as_local flag
1283 set. File deliveries from filters can be specified as non-absolute paths in
1284 cases where the transport is goin to complete the path. If there is an error
1285 before this happens (expansion failure) the local part will not be updated, and
1286 so won't necessarily look like a path. Add extra text for this case. */
1287
1288 if (testflag(addr, af_pfr) ||
1289       (success &&
1290        addr->router != NULL && addr->router->log_as_local &&
1291        addr->transport != NULL && addr->transport->info->local))
1292   {
1293   if (testflag(addr, af_file) && addr->local_part[0] != '/')
1294     yield = string_cat(yield, &size, &ptr, CUS"save ", 5);
1295   yield = string_get_localpart(addr, yield, &size, &ptr);
1296   }
1297
1298 /* Other deliveries start with the full address. It we have split it into local
1299 part and domain, use those fields. Some early failures can happen before the
1300 splitting is done; in those cases use the original field. */
1301
1302 else
1303   {
1304   if (addr->local_part != NULL)
1305     {
1306     yield = string_get_localpart(addr, yield, &size, &ptr);
1307     yield = string_cat(yield, &size, &ptr, US"@", 1);
1308     yield = string_cat(yield, &size, &ptr, addr->domain,
1309       Ustrlen(addr->domain) );
1310     }
1311   else
1312     {
1313     yield = string_cat(yield, &size, &ptr, addr->address, Ustrlen(addr->address));
1314     }
1315   yield[ptr] = 0;
1316
1317   /* If the address we are going to print is the same as the top address,
1318   and all parents are not being included, don't add on the top address. First
1319   of all, do a caseless comparison; if this succeeds, do a caseful comparison
1320   on the local parts. */
1321
1322   if (strcmpic(yield, topaddr->address) == 0 &&
1323       Ustrncmp(yield, topaddr->address, Ustrchr(yield, '@') - yield) == 0 &&
1324       addr->onetime_parent == NULL &&
1325       (!all_parents || addr->parent == NULL || addr->parent == topaddr))
1326     add_topaddr = FALSE;
1327   }
1328
1329 /* If all parents are requested, or this is a local pipe/file/reply, and
1330 there is at least one intermediate parent, show it in brackets, and continue
1331 with all of them if all are wanted. */
1332
1333 if ((all_parents || testflag(addr, af_pfr)) &&
1334     addr->parent != NULL &&
1335     addr->parent != topaddr)
1336   {
1337   uschar *s = US" (";
1338   address_item *addr2;
1339   for (addr2 = addr->parent; addr2 != topaddr; addr2 = addr2->parent)
1340     {
1341     yield = string_cat(yield, &size, &ptr, s, 2);
1342     yield = string_cat(yield, &size, &ptr, addr2->address, Ustrlen(addr2->address));
1343     if (!all_parents) break;
1344     s = US", ";
1345     }
1346   yield = string_cat(yield, &size, &ptr, US")", 1);
1347   }
1348
1349 /* Add the top address if it is required */
1350
1351 if (add_topaddr)
1352   {
1353   yield = string_cat(yield, &size, &ptr, US" <", 2);
1354
1355   if (addr->onetime_parent == NULL)
1356     yield = string_cat(yield, &size, &ptr, topaddr->address,
1357       Ustrlen(topaddr->address));
1358   else
1359     yield = string_cat(yield, &size, &ptr, addr->onetime_parent,
1360       Ustrlen(addr->onetime_parent));
1361
1362   yield = string_cat(yield, &size, &ptr, US">", 1);
1363   }
1364
1365 yield[ptr] = 0;  /* string_cat() leaves space */
1366 return yield;
1367 }
1368 #endif  /* COMPILE_UTILITY */
1369
1370
1371
1372
1373
1374 /*************************************************
1375 **************************************************
1376 *             Stand-alone test program           *
1377 **************************************************
1378 *************************************************/
1379
1380 #ifdef STAND_ALONE
1381 int main(void)
1382 {
1383 uschar buffer[256];
1384
1385 printf("Testing is_ip_address\n");
1386
1387 while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1388   {
1389   int offset;
1390   buffer[Ustrlen(buffer) - 1] = 0;
1391   printf("%d\n", string_is_ip_address(buffer, NULL));
1392   printf("%d %d %s\n", string_is_ip_address(buffer, &offset), offset, buffer);
1393   }
1394
1395 printf("Testing string_nextinlist\n");
1396
1397 while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1398   {
1399   uschar *list = buffer;
1400   uschar *lp1, *lp2;
1401   uschar item[256];
1402   int sep1 = 0;
1403   int sep2 = 0;
1404
1405   if (*list == '<')
1406     {
1407     sep1 = sep2 = list[1];
1408     list += 2;
1409     }
1410
1411   lp1 = lp2 = list;
1412   for (;;)
1413     {
1414     uschar *item1 = string_nextinlist(&lp1, &sep1, item, sizeof(item));
1415     uschar *item2 = string_nextinlist(&lp2, &sep2, NULL, 0);
1416
1417     if (item1 == NULL && item2 == NULL) break;
1418     if (item == NULL || item2 == NULL || Ustrcmp(item1, item2) != 0)
1419       {
1420       printf("***ERROR\nitem1=\"%s\"\nitem2=\"%s\"\n",
1421         (item1 == NULL)? "NULL" : CS item1,
1422         (item2 == NULL)? "NULL" : CS item2);
1423       break;
1424       }
1425     else printf("  \"%s\"\n", CS item1);
1426     }
1427   }
1428
1429 /* This is a horrible lash-up, but it serves its purpose. */
1430
1431 printf("Testing string_format\n");
1432
1433 while (fgets(CS buffer, sizeof(buffer), stdin) != NULL)
1434   {
1435   void *args[3];
1436   double dargs[3];
1437   int dflag = 0;
1438   int n = 0;
1439   int count;
1440   int countset = 0;
1441   uschar format[256];
1442   uschar outbuf[256];
1443   uschar *s;
1444   buffer[Ustrlen(buffer) - 1] = 0;
1445
1446   s = Ustrchr(buffer, ',');
1447   if (s == NULL) s = buffer + Ustrlen(buffer);
1448
1449   Ustrncpy(format, buffer, s - buffer);
1450   format[s-buffer] = 0;
1451
1452   if (*s == ',') s++;
1453
1454   while (*s != 0)
1455     {
1456     uschar *ss = s;
1457     s = Ustrchr(ss, ',');
1458     if (s == NULL) s = ss + Ustrlen(ss);
1459
1460     if (isdigit(*ss))
1461       {
1462       Ustrncpy(outbuf, ss, s-ss);
1463       if (Ustrchr(outbuf, '.') != NULL)
1464         {
1465         dflag = 1;
1466         dargs[n++] = Ustrtod(outbuf, NULL);
1467         }
1468       else
1469         {
1470         args[n++] = (void *)Uatoi(outbuf);
1471         }
1472       }
1473
1474     else if (Ustrcmp(ss, "*") == 0)
1475       {
1476       args[n++] = (void *)(&count);
1477       countset = 1;
1478       }
1479
1480     else
1481       {
1482       uschar *sss = malloc(s - ss + 1);
1483       Ustrncpy(sss, ss, s-ss);
1484       args[n++] = sss;
1485       }
1486
1487     if (*s == ',') s++;
1488     }
1489
1490   if (!dflag) printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1491     args[0], args[1], args[2])? "True" : "False");
1492
1493   else printf("%s\n", string_format(outbuf, sizeof(outbuf), CS format,
1494     dargs[0], dargs[1], dargs[2])? "True" : "False");
1495
1496   printf("%s\n", CS outbuf);
1497   if (countset) printf("count=%d\n", count);
1498   }
1499
1500 return 0;
1501 }
1502 #endif
1503
1504 /* End of string.c */