src/src/retry.c

   1 /*************************************************
   2 *     Exim - an Internet mail transport agent    *
   3 *************************************************/
   4
   5 /* Copyright (c) The Exim Maintainers 2020 - 2024 */
   6 /* Copyright (c) University of Cambridge 1995 - 2018 */
   7 /* See the file NOTICE for conditions of use and distribution. */
   8 /* SPDX-License-Identifier: GPL-2.0-or-later */
   9
  10 /* Functions concerned with retrying unsuccessful deliveries. */
  11
  12
  13 #include "exim.h"
  14
  15
  16
  17 /*************************************************
  18 *         Check the ultimate address timeout     *
  19 *************************************************/
  20
  21 /* This function tests whether a message has been on the queue longer than
  22 the maximum retry time for a particular host or address.
  23
  24 Arguments:
  25   retry_key     the key to look up a retry rule
  26   domain        the domain to look up a domain retry rule
  27   retry_record  contains error information for finding rule
  28   now           the time
  29
  30 Returns:        TRUE if the ultimate timeout has been reached
  31 */
  32
  33 BOOL
  34 retry_ultimate_address_timeout(const uschar * retry_key, const uschar *domain,
  35   dbdata_retry *retry_record, time_t now)
  36 {
  37 BOOL address_timeout;
  38 retry_config * retry;
  39
  40 DEBUG(D_retry)
  41   {
  42   debug_printf("retry time not reached: checking ultimate address timeout\n");
  43   debug_printf("  now=" TIME_T_FMT " first_failed=" TIME_T_FMT
  44                 " next_try=" TIME_T_FMT " expired=%c\n",
  45                 now, retry_record->first_failed,
  46                 retry_record->next_try, retry_record->expired ? 'T' : 'F');
  47   }
  48
  49 retry = retry_find_config(retry_key+2, domain,
  50     retry_record->basic_errno, retry_record->more_errno);
  51
  52 if (retry && retry->rules)
  53   {
  54   retry_rule *last_rule;
  55   for (last_rule = retry->rules; last_rule->next; last_rule = last_rule->next) ;
  56   DEBUG(D_retry)
  57     debug_printf("  received_time=" TIME_T_FMT " diff=%d timeout=%d\n",
  58       received_time.tv_sec, (int)(now - received_time.tv_sec), last_rule->timeout);
  59   address_timeout = (now - received_time.tv_sec > last_rule->timeout);
  60   }
  61 else
  62   {
  63   DEBUG(D_retry)
  64     debug_printf("no retry rule found: assume timed out\n");
  65   address_timeout = TRUE;
  66   }
  67
  68 DEBUG(D_retry)
  69   if (address_timeout)
  70     debug_printf("on queue longer than maximum retry for address - "
  71       "allowing delivery\n");
  72
  73 return address_timeout;
  74 }
  75
  76
  77
  78 const uschar *
  79 retry_host_key_build(const host_item * host, BOOL incl_ip,
  80   const uschar * portstring)
  81 {
  82 const uschar * s = host->name;
  83 gstring * g = string_is_ip_address(s, NULL)
  84   ? string_fmt_append(NULL, "T:[%s]", s)    /* wrap a name which is a bare ip */
  85   : string_fmt_append(NULL, "T:%s",   s);
  86
  87 s = host->address;
  88 if (incl_ip)
  89   g = Ustrchr(s, ':')
  90     ? string_fmt_append(g, ":[%s]", s)      /* wrap an ipv6  */
  91     : string_fmt_append(g, ":%s",   s);
  92
  93 if (portstring)
  94   g = string_cat(g, portstring);
  95
  96 gstring_release_unused(g);
  97 return string_from_gstring(g);
  98 }
  99
 100
 101 /*************************************************
 102 *     Set status of a host+address item          *
 103 *************************************************/
 104
 105 /* This function is passed a host_item which contains a host name and an
 106 IP address string. Its job is to set the status of the address if it is not
 107 already set (indicated by hstatus_unknown). The possible values are:
 108
 109    hstatus_usable    the address is not listed in the unusable tree, and does
 110                      not have a retry record, OR the time is past the next
 111                      try time, OR the message has been on the queue for more
 112                      than the maximum retry time for a failing host
 113
 114    hstatus_unusable  the address is listed in the unusable tree, or does have
 115                      a retry record, and the time is not yet at the next retry
 116                      time.
 117
 118    hstatus_unusable_expired  as above, but also the retry time has expired
 119                      for this address.
 120
 121 The reason a delivery is permitted when a message has been around for a very
 122 long time is to allow the ultimate address timeout to operate after a delivery
 123 failure. Otherwise some messages may stick around without being tried for too
 124 long.
 125
 126 If a host retry record is retrieved from the hints database, the time of last
 127 trying is filled into the last_try field of the host block. If a host is
 128 generally usable, a check is made to see if there is a retry delay on this
 129 specific message at this host.
 130
 131 If a non-standard port is being used, it is added to the retry key.
 132
 133 Arguments:
 134   domain              the address domain
 135   host                pointer to a host item
 136   portstring          "" for standard port, ":xxxx" for a non-standard port
 137   include_ip_address  TRUE to include the address in the key - this is
 138                         usual, but sometimes is not wanted
 139   retry_host_key      where to put a pointer to the key for the host-specific
 140                         retry record, if one is read and the host is usable
 141   retry_message_key   where to put a pointer to the key for the message+host
 142                         retry record, if one is read and the host is usable
 143
 144 Returns:    TRUE if the host has expired but is usable because
 145              its retry time has come
 146 */
 147
 148 BOOL
 149 retry_check_address(const uschar *domain, host_item *host, uschar *portstring,
 150   BOOL include_ip_address,
 151   const uschar **retry_host_key, const uschar **retry_message_key)
 152 {
 153 BOOL yield = FALSE;
 154 time_t now = time(NULL);
 155 const uschar * host_key, * message_key;
 156 open_db dbblock, * dbm_file = NULL;
 157 tree_node * node;
 158 dbdata_retry * host_retry_record, * message_retry_record;
 159
 160 *retry_host_key = *retry_message_key = NULL;
 161
 162 /* Do nothing if status already set; otherwise initialize status as usable. */
 163
 164 if (host->status != hstatus_unknown) return FALSE;
 165 host->status = hstatus_usable;
 166
 167 DEBUG(D_transport|D_retry)
 168   {
 169   debug_printf_indent("checking retry status of %s\n", host->name);
 170   acl_level++;
 171   }
 172
 173 /* Generate the host key for the unusable tree and the retry database. Ensure
 174 host names are lower cased (that's what %S does).
 175 Generate the message-specific key too.
 176 Be sure to maintain lack-of-spaces in retry keys; exinext depends on it. */
 177
 178 host_key = retry_host_key_build(host, include_ip_address, portstring);
 179 message_key = string_sprintf("%s:%s", host_key, message_id);
 180
 181 /* Search the tree of unusable IP addresses. This is filled in when deliveries
 182 fail, because the retry database itself is not updated until the end of all
 183 deliveries (so as to do it all in one go). The tree records addresses that have
 184 become unusable during this delivery process (i.e. those that will get put into
 185 the retry database when it is updated). */
 186
 187 if ((node = tree_search(tree_unusable, host_key)))
 188   {
 189   DEBUG(D_transport|D_retry)
 190     debug_printf_indent("found in tree of unusables\n");
 191   host->status = node->data.val > 255
 192     ? hstatus_unusable_expired : hstatus_unusable;
 193   host->why = node->data.val & 255;
 194   goto out;
 195   }
 196
 197 /* Open the retry database, giving up if there isn't one. Otherwise, search for
 198 the retry records, and then close the database again. */
 199
 200 if (!continue_retry_db)
 201   dbm_file = dbfn_open(US"retry", O_RDONLY, &dbblock, FALSE, TRUE);
 202 else if (continue_retry_db != (open_db *)-1)
 203   {
 204   DEBUG(D_hints_lookup)
 205     debug_printf_indent(" using cached retry hintsdb handle\n");
 206   dbm_file = continue_retry_db;
 207   }
 208 else DEBUG(D_hints_lookup)
 209     debug_printf_indent(" using cached retry hintsdb nonpresence\n");
 210
 211 if (!dbm_file)
 212   {
 213   DEBUG(D_deliver|D_retry|D_hints_lookup)
 214     debug_printf_indent("no retry data available\n");
 215   goto out;
 216   }
 217 host_retry_record = dbfn_read(dbm_file, host_key);
 218 message_retry_record = dbfn_read(dbm_file, message_key);
 219 if (!continue_retry_db)
 220   dbfn_close(dbm_file);
 221 else
 222   DEBUG(D_hints_lookup) debug_printf_indent("retaining retry hintsdb handle\n");
 223
 224 /* Ignore the data if it is too old - too long since it was written */
 225
 226 if (!host_retry_record)
 227   {
 228   DEBUG(D_transport|D_retry) debug_printf_indent("no host retry record\n");
 229   }
 230 else if (now - host_retry_record->time_stamp > retry_data_expire)
 231   {
 232   host_retry_record = NULL;
 233   DEBUG(D_transport|D_retry) debug_printf_indent("host retry record too old\n");
 234   }
 235
 236 if (!message_retry_record)
 237   {
 238   DEBUG(D_transport|D_retry) debug_printf_indent("no message retry record\n");
 239   }
 240 else if (now - message_retry_record->time_stamp > retry_data_expire)
 241   {
 242   message_retry_record = NULL;
 243   DEBUG(D_transport|D_retry)
 244     debug_printf_indent("message retry record too old\n");
 245   }
 246
 247 /* If there's a host-specific retry record, check for reaching the retry
 248 time (or forcing). If not, and the host is not expired, check for the message
 249 having been around for longer than the maximum retry time for this host or
 250 address. Allow the delivery if it has. Otherwise set the appropriate unusable
 251 flag and return FALSE. Otherwise arrange to return TRUE if this is an expired
 252 host. */
 253
 254 if (host_retry_record)
 255   {
 256   *retry_host_key = host_key;
 257
 258   /* We have not reached the next try time. Check for the ultimate address
 259   timeout if the host has not expired. */
 260
 261   if (now < host_retry_record->next_try && !f.deliver_force)
 262     {
 263     if (!host_retry_record->expired &&
 264         retry_ultimate_address_timeout(host_key, domain,
 265           host_retry_record, now))
 266       goto out;
 267
 268     /* We have not hit the ultimate address timeout; host is unusable. */
 269
 270     host->status = (host_retry_record->expired)?
 271       hstatus_unusable_expired : hstatus_unusable;
 272     host->why = hwhy_retry;
 273     host->last_try = host_retry_record->last_try;
 274     goto out;
 275     }
 276
 277   /* Host is usable; set return TRUE if expired. */
 278
 279   yield = host_retry_record->expired;
 280   }
 281
 282 /* It's OK to try the host. If there's a message-specific retry record, check
 283 for reaching its retry time (or forcing). If not, mark the host unusable,
 284 unless the ultimate address timeout has been reached. */
 285
 286 if (message_retry_record)
 287   {
 288   *retry_message_key = message_key;
 289   if (now < message_retry_record->next_try && !f.deliver_force)
 290     {
 291     if (!retry_ultimate_address_timeout(host_key, domain,
 292         message_retry_record, now))
 293       {
 294       host->status = hstatus_unusable;
 295       host->why = hwhy_retry;
 296       }
 297     yield = FALSE; goto out;
 298     }
 299   }
 300
 301 out:
 302 DEBUG(D_transport|D_retry) acl_level--;
 303 return yield;
 304 }
 305
 306
 307
 308
 309 /*************************************************
 310 *           Add a retry item to an address       *
 311 *************************************************/
 312
 313 /* Retry items are chained onto an address when it is deferred either by router
 314 or by a transport, or if it succeeds or fails and there was a previous retry
 315 item that now needs to be deleted. Sometimes there can be both kinds of item:
 316 for example, if routing was deferred but then succeeded, and delivery then
 317 deferred. In that case there is a delete item for the routing retry, and an
 318 updating item for the delivery.
 319
 320 (But note that that is only visible at the outer level, because in remote
 321 delivery subprocesses, the address starts "clean", with no retry items carried
 322 in.)
 323
 324 These items are used at the end of a delivery attempt to update the retry
 325 database. The keys start R: for routing delays and T: for transport delays.
 326
 327 Arguments:
 328   addr    the address block onto which to hang the item
 329   key     the retry key
 330   flags   delete, host, and message flags, copied into the block
 331
 332 Returns:  nothing
 333 */
 334
 335 void
 336 retry_add_item(address_item * addr, const uschar * key, int flags)
 337 {
 338 retry_item * rti = store_get(sizeof(retry_item), GET_UNTAINTED);
 339 host_item * host = addr->host_used;
 340
 341 rti->next = addr->retries;
 342 addr->retries = rti;
 343 rti->key = key;
 344 rti->basic_errno = addr->basic_errno;
 345 rti->more_errno = addr->more_errno;
 346 rti->message = host
 347   ? string_sprintf("H=%s [%s]: %s", host->name, host->address, addr->message)
 348   : addr->message;
 349 rti->flags = flags;
 350
 351 DEBUG(D_transport|D_retry)
 352   {
 353   int letter = rti->more_errno & 255;
 354   debug_printf("added retry %sitem for %s: errno=%d more_errno=",
 355     flags & rf_delete ? "delete-" : "",
 356     rti->key,
 357     rti->basic_errno);
 358   if (letter == 'A' || letter == 'M')
 359     debug_printf("%d,%c", (rti->more_errno >> 8) & 255, letter);
 360   else
 361     debug_printf("%d", rti->more_errno);
 362   debug_printf(" flags=%d\n", flags);
 363   }
 364 }
 365
 366
 367
 368 /*************************************************
 369 *        Find retry configuration data           *
 370 *************************************************/
 371
 372 /* Search the in-store retry information for the first retry item that applies
 373 to a given destination. If the key contains an @ we are probably handling a
 374 local delivery and have a complete address to search for; this happens when
 375 retry_use_local_part is set on a router. Otherwise, the key is likely to be a
 376 host name for a remote delivery, or a domain name for a local delivery. We
 377 prepend *@ on the front of it so that it will match a retry item whose address
 378 item pattern is independent of the local part. The alternate key, if set, is
 379 always just a domain, so we treat it likewise.
 380
 381 Arguments:
 382   key          key for which retry info is wanted
 383   alternate    alternative key, always just a domain
 384   basic_errno  specific error predicate on the retry rule, or zero
 385   more_errno   additional data for errno predicate
 386
 387 Returns:       pointer to retry rule, or NULL
 388 */
 389
 390 retry_config *
 391 retry_find_config(const uschar * key, const uschar * alternate, int basic_errno,
 392   int more_errno)
 393 {
 394 const uschar * colon = Ustrchr(key, ':');
 395 retry_config * yield;
 396
 397 /* If there's a colon in the key, there are two possibilities:
 398
 399 (1) This is a key for a host, ip address, and possibly port, in the format
 400
 401       hostname:ip+port
 402
 403     In this case, we copy the host name (which could be an [ip], including
 404     being an [ipv6], and we drop the []).
 405
 406 (2) This is a key for a pipe, file, or autoreply delivery, in the format
 407
 408       pipe-or-file-or-auto:x@y
 409
 410     where x@y is the original address that provoked the delivery. The pipe or
 411     file or auto will start with | or / or >, whereas a host name will start
 412     with a letter or a digit. In this case we want to use the original address
 413     to search for a retry rule. */
 414
 415 if (colon)
 416   key = isalnum(*key)
 417     ? string_copyn(key, colon-key)      /* the hostname */
 418     : *key == '['
 419     ? string_copyn(key+1, Ustrchr(key, ']')-1-key)      /* the ip */
 420     : Ustrrchr(key, ':') + 1;           /* Take from the last colon */
 421
 422 /* Sort out the keys */
 423
 424 if (!Ustrchr(key, '@')) key = string_sprintf("*@%s", key);
 425 if (alternate)    alternate = string_sprintf("*@%s", alternate);
 426
 427 /* Scan the configured retry items. */
 428
 429 for (yield = retries; yield; yield = yield->next)
 430   {
 431   const uschar *plist = yield->pattern;
 432   const uschar *slist = yield->senders;
 433
 434   /* If a specific error is set for this item, check that we are handling that
 435   specific error, and if so, check any additional error information if
 436   required. */
 437
 438   if (yield->basic_errno != 0)
 439     {
 440     /* Special code is required for quota errors, as these can either be system
 441     quota errors, or Exim's own quota imposition, which has a different error
 442     number. Full partitions are also treated in the same way as quota errors.
 443     */
 444
 445     if (yield->basic_errno == ERRNO_EXIMQUOTA)
 446       {
 447       if ((basic_errno != ERRNO_EXIMQUOTA && basic_errno != errno_quota &&
 448            basic_errno != ENOSPC) ||
 449           (yield->more_errno != 0 && yield->more_errno > more_errno))
 450         continue;
 451       }
 452
 453     /* The TLSREQUIRED error also covers TLSFAILURE. These are subtly different
 454     errors, but not worth separating at this level. */
 455
 456     else if (yield->basic_errno == ERRNO_TLSREQUIRED)
 457       {
 458       if (basic_errno != ERRNO_TLSREQUIRED && basic_errno != ERRNO_TLSFAILURE)
 459         continue;
 460       }
 461
 462     /* Handle 4xx responses to MAIL, RCPT, or DATA. The code that was received
 463     is in the 2nd least significant byte of more_errno (with 400 subtracted).
 464     The required value is coded in the 2nd least significant byte of the
 465     yield->more_errno field as follows:
 466
 467       255     => any 4xx code
 468       >= 100  => the decade must match the value less 100
 469       < 100   => the exact value must match
 470     */
 471
 472     else if (yield->basic_errno == ERRNO_MAIL4XX ||
 473              yield->basic_errno == ERRNO_RCPT4XX ||
 474              yield->basic_errno == ERRNO_DATA4XX)
 475       {
 476       int wanted;
 477       if (basic_errno != yield->basic_errno) continue;
 478       wanted = (yield->more_errno >> 8) & 255;
 479       if (wanted != 255)
 480         {
 481         int evalue = (more_errno >> 8) & 255;
 482         if (wanted >= 100)
 483           {
 484           if ((evalue/10)*10 != wanted - 100) continue;
 485           }
 486         else if (evalue != wanted) continue;
 487         }
 488       }
 489
 490     /* There are some special cases for timeouts */
 491
 492     else if (yield->basic_errno == ETIMEDOUT)
 493       {
 494       if (basic_errno != ETIMEDOUT) continue;
 495
 496       /* Just RTEF_CTOUT in the rule => don't care about 'A'/'M' addresses */
 497       if (yield->more_errno == RTEF_CTOUT)
 498         {
 499         if ((more_errno & RTEF_CTOUT) == 0) continue;
 500         }
 501
 502       else if (yield->more_errno != 0)
 503         {
 504         int cf_errno = more_errno;
 505         if ((yield->more_errno & RTEF_CTOUT) == 0) cf_errno &= ~RTEF_CTOUT;
 506         if (yield->more_errno != cf_errno) continue;
 507         }
 508       }
 509
 510     /* Default checks for exact match */
 511
 512     else
 513       {
 514       if (yield->basic_errno != basic_errno ||
 515          (yield->more_errno != 0 && yield->more_errno != more_errno))
 516        continue;
 517       }
 518     }
 519
 520   /* If the "senders" condition is set, check it. Note that sender_address may
 521   be null during -brt checking, in which case we do not use this rule. */
 522
 523   if (  slist
 524      && (  !sender_address
 525         || match_address_list_basic(sender_address, &slist, 0) != OK
 526      )  )
 527     continue;
 528
 529   /* Check for a match between the address list item at the start of this retry
 530   rule and either the main or alternate keys. */
 531
 532   if (  match_address_list_basic(key, &plist, UCHAR_MAX+1) == OK
 533      || (  alternate
 534         && match_address_list_basic(alternate, &plist, UCHAR_MAX+1) == OK
 535      )  )
 536     break;
 537   }
 538
 539 return yield;
 540 }
 541
 542
 543
 544
 545 /*************************************************
 546 *              Update retry database             *
 547 *************************************************/
 548
 549 /* Update the retry data for any directing/routing/transporting that was
 550 deferred, or delete it for those that succeeded after a previous defer. This is
 551 done all in one go to minimize opening/closing/locking of the database file.
 552 Called (only) from deliver_message().
 553
 554 Note that, because SMTP delivery involves a list of destinations to try, there
 555 may be defer-type retry information for some of them even when the message was
 556 successfully delivered. Likewise if it eventually failed.
 557
 558 This function may move addresses from the defer to the failed queue if the
 559 ultimate retry time has expired.
 560
 561 Arguments:
 562   addr_defer    queue of deferred addresses
 563   addr_failed   queue of failed addresses
 564   addr_succeed  queue of successful addresses
 565
 566 Returns:        nothing
 567 */
 568
 569 void
 570 retry_update(address_item ** addr_defer, address_item ** addr_failed,
 571   address_item ** addr_succeed)
 572 {
 573 open_db dbblock, * dbm_file = NULL;
 574 time_t now = time(NULL);
 575
 576 DEBUG(D_retry) { debug_printf_indent("Processing retry items\n"); acl_level++; }
 577
 578 /* Three-times loop to handle succeeded, failed, and deferred addresses.
 579 Deferred addresses must be handled after failed ones, because some may be moved
 580 to the failed chain if they have timed out. */
 581
 582 for (int i = 0; i < 3; i++)
 583   {
 584   address_item * endaddr, *addr;
 585   address_item * last_first = NULL;
 586   address_item ** paddr = i==0 ? addr_succeed : i==1 ? addr_failed : addr_defer;
 587   address_item ** saved_paddr = NULL;
 588
 589   DEBUG(D_retry)
 590     {
 591     debug_printf_indent("%s addresses:\n",
 592       i == 0 ? "Succeeded" : i == 1 ? "Failed" : "Deferred");
 593     acl_level++;
 594     }
 595
 596   /* Loop for each address on the chain. For deferred addresses, the whole
 597   address times out unless one of its retry addresses has a retry rule that
 598   hasn't yet timed out. Deferred addresses should not be requesting deletion
 599   of retry items, but just in case they do by accident, treat that case
 600   as "not timed out".
 601
 602   As well as handling the addresses themselves, we must also process any
 603   retry items for any parent addresses - these are typically "delete" items,
 604   because the parent must have succeeded in order to generate the child. */
 605
 606   while ((endaddr = *paddr))
 607     {
 608     BOOL timed_out = FALSE;
 609
 610     for (addr = endaddr; addr; addr = addr->parent)
 611       {
 612       int update_count = 0, timedout_count = 0;
 613
 614       DEBUG(D_retry)
 615         {
 616         debug_printf_indent("%s%s\n", addr->address,
 617                             addr->retries ? "" : ": no retry items");
 618         acl_level++;
 619         }
 620
 621       /* Loop for each retry item. */
 622
 623       for (retry_item * rti = addr->retries; rti; rti = rti->next)
 624         {
 625         uschar *message;
 626         int message_length, message_space, failing_interval, next_try;
 627         retry_rule *rule, *final_rule;
 628         retry_config *retry;
 629         dbdata_retry *retry_record;
 630
 631         /* Open the retry database if it is not already open; failure to open
 632         the file is logged, but otherwise ignored - deferred addresses will
 633         get retried at the next opportunity. Not opening earlier than this saves
 634         opening if no addresses have retry items - common when none have yet
 635         reached their retry next try time. */
 636
 637         if (!dbm_file)
 638           if (continue_retry_db && continue_retry_db != (open_db *)-1)
 639             {
 640             DEBUG(D_hints_lookup)
 641               debug_printf_indent("using cached retry hintsdb handle\n");
 642             dbm_file = continue_retry_db;
 643             }
 644           else if (!(dbm_file = exim_lockfile_needed()
 645                     ? dbfn_open(US"retry", O_RDWR|O_CREAT, &dbblock, TRUE, TRUE)
 646                     : dbfn_open_multi(US"retry", O_RDWR|O_CREAT, &dbblock)))
 647             {
 648             DEBUG(D_deliver|D_retry|D_hints_lookup)
 649               debug_printf_indent("retry db not available for updating\n");
 650             return;
 651             }
 652
 653         /* If there are no deferred addresses, that is, if this message is
 654         completing, and the retry item is for a message-specific SMTP error,
 655         force it to be deleted, because there's no point in keeping data for
 656         no-longer-existing messages. This situation can occur when a domain has
 657         two hosts and a message-specific error occurs for the first of them,
 658         but the address gets delivered to the second one. This optimization
 659         doesn't succeed in cleaning out all the dead entries, but it helps. */
 660
 661         if (!*addr_defer  &&  rti->flags & rf_message)
 662           rti->flags |= rf_delete;
 663
 664         /* Handle the case of a request to delete the retry info for this
 665         destination. */
 666
 667         if (rti->flags & rf_delete)
 668           {
 669           (void)dbfn_delete(dbm_file, rti->key);
 670           DEBUG(D_retry)
 671             debug_printf_indent("deleted retry information for %s\n", rti->key);
 672           continue;
 673           }
 674
 675         /* Count the number of non-delete retry items. This is so that we
 676         can compare it to the count of timed_out ones, to check whether
 677         all are timed out. */
 678
 679         update_count++;
 680
 681         /* Get the retry information for this destination and error code, if
 682         any. If this item is for a remote host with ip address, then pass
 683         the domain name as an alternative to search for. If no retry
 684         information is found, we can't generate a retry time, so there is
 685         no point updating the database. This retry item is timed out. */
 686
 687         if (!(retry = retry_find_config(rti->key + 2,
 688              rti->flags & rf_host ? addr->domain : NULL,
 689              rti->basic_errno, rti->more_errno)))
 690           {
 691           DEBUG(D_retry) debug_printf_indent("No configured retry item for %s%s%s\n",
 692             rti->key,
 693             rti->flags & rf_host ? US" or " : US"",
 694             rti->flags & rf_host ? addr->domain : US"");
 695           if (addr == endaddr) timedout_count++;
 696           continue;
 697           }
 698
 699         DEBUG(D_retry)
 700           if (rti->flags & rf_host)
 701             debug_printf_indent("retry for %s (%s) = %s %d %d\n", rti->key,
 702               addr->domain, retry->pattern, retry->basic_errno,
 703               retry->more_errno);
 704           else
 705             debug_printf_indent("retry for %s = %s %d %d\n", rti->key, retry->pattern,
 706               retry->basic_errno, retry->more_errno);
 707
 708         /* Set up the message for the database retry record. Because DBM
 709         records have a maximum data length, we enforce a limit. There isn't
 710         much point in keeping a huge message here, anyway. */
 711
 712         message = rti->basic_errno > 0
 713           ? US strerror(rti->basic_errno)
 714           : rti->message
 715           ? US string_printing(rti->message)
 716           : US"unknown error";
 717         message_length = Ustrlen(message);
 718         if (message_length > EXIM_DB_RLIMIT)
 719           {
 720           DEBUG(D_retry)
 721             debug_printf_indent("truncating message from %u to %u bytes\n",
 722                                 message_length, EXIM_DB_RLIMIT);
 723           message_length = EXIM_DB_RLIMIT;
 724           }
 725
 726         /* For a transaction-capable DB, open one for the read,write
 727         sequence used for this retry record */
 728
 729         if (!exim_lockfile_needed())
 730           dbfn_transaction_start(dbm_file);
 731
 732         /* Read a retry record from the database or construct a new one.
 733         Ignore an old one if it is too old since it was last updated. */
 734
 735         retry_record = dbfn_read_with_length(dbm_file, rti->key,
 736                                               &message_space);
 737         if (  retry_record
 738            && now - retry_record->time_stamp > retry_data_expire)
 739           retry_record = NULL;
 740
 741         if (!retry_record)
 742           {
 743           retry_record = store_get(sizeof(dbdata_retry) + message_length,
 744                                    message);
 745           message_space = message_length;
 746           retry_record->first_failed = now;
 747           retry_record->last_try = now;
 748           retry_record->next_try = now;
 749           retry_record->expired = FALSE;
 750           retry_record->text[0] = 0;      /* just in case */
 751           }
 752         else message_space -= sizeof(dbdata_retry);
 753
 754         /* Compute how long this destination has been failing */
 755
 756         failing_interval = now - retry_record->first_failed;
 757         DEBUG(D_retry) debug_printf_indent("failing_interval=%d message_age=%d\n",
 758           failing_interval, message_age);
 759
 760         /* For a non-host error, if the message has been on the queue longer
 761         than the recorded time of failure, use the message's age instead. This
 762         can happen when some messages can be delivered and others cannot; a
 763         successful delivery will reset the first_failed time, and this can lead
 764         to a failing message being retried too often. */
 765
 766         if (!(rti->flags & rf_host) && message_age > failing_interval)
 767           failing_interval = message_age;
 768
 769         /* Search for the current retry rule. The cutoff time of the
 770         last rule is handled differently to the others. The rule continues
 771         to operate for ever (the global maximum interval will eventually
 772         limit the gaps) but its cutoff time determines when an individual
 773         destination times out. If there are no retry rules, the destination
 774         always times out, but we can't compute a retry time. */
 775
 776         final_rule = NULL;
 777         for (rule = retry->rules; rule; rule = rule->next)
 778           {
 779           if (failing_interval <= rule->timeout) break;
 780           final_rule = rule;
 781           }
 782
 783         /* If there's an un-timed out rule, the destination has not
 784         yet timed out, so the address as a whole has not timed out (but we are
 785         interested in this only for the end address). Make sure the expired
 786         flag is false (can be forced via fixdb from outside, but ensure it is
 787         consistent with the rules whenever we go through here). */
 788
 789         if (rule)
 790           retry_record->expired = FALSE;
 791
 792         /* Otherwise, set the retry timeout expired, and set the final rule
 793         as the one from which to compute the next retry time. Subsequent
 794         messages will fail immediately until the retry time is reached (unless
 795         there are other, still active, retries). */
 796
 797         else
 798           {
 799           rule = final_rule;
 800           retry_record->expired = TRUE;
 801           if (addr == endaddr) timedout_count++;
 802           }
 803
 804         /* There is a special case to consider when some messages get through
 805         to a destination and others don't. This can happen locally when a
 806         large message pushes a user over quota, and it can happen remotely
 807         when a machine is on a dodgy Internet connection. The messages that
 808         get through wipe the retry information, causing those that don't to
 809         stay on the queue longer than the final retry time. In order to
 810         avoid this, we check, using the time of arrival of the message, to
 811         see if it has been on the queue for more than the final cutoff time,
 812         and if so, cause this retry item to time out, and the retry time to
 813         be set to "now" so that any subsequent messages in the same condition
 814         also get tried. We search for the last rule onwards from the one that
 815         is in use. If there are no retry rules for the item, rule will be null
 816         and timedout_count will already have been updated.
 817
 818         This implements "timeout this rule if EITHER the host (or routing or
 819         directing) has been failing for more than the maximum time, OR if the
 820         message has been on the queue for more than the maximum time."
 821
 822         February 2006: It is possible that this code is no longer needed
 823         following the change to the retry calculation to use the message age if
 824         it is larger than the time since first failure. It may be that the
 825         expired flag is always set when the other conditions are met. However,
 826         this is a small bit of code, and it does no harm to leave it in place,
 827         just in case. */
 828
 829         if (  received_time.tv_sec <= retry_record->first_failed
 830            && addr == endaddr
 831            && !retry_record->expired
 832            && rule)
 833           {
 834           retry_rule *last_rule;
 835           for (last_rule = rule; last_rule->next; last_rule = last_rule->next)
 836             ;
 837           if (now - received_time.tv_sec > last_rule->timeout)
 838             {
 839             DEBUG(D_retry) debug_printf_indent("on queue longer than maximum retry\n");
 840             timedout_count++;
 841             rule = NULL;
 842             }
 843           }
 844
 845         /* Compute the next try time from the rule, subject to the global
 846         maximum, and update the retry database. If rule == NULL it means
 847         there were no rules at all (and the timeout will be set expired),
 848         or we have a message that is older than the final timeout. In this
 849         case set the next retry time to now, so that one delivery attempt
 850         happens for subsequent messages. */
 851
 852         if (!rule)
 853           next_try = now;
 854         else
 855           {
 856           if (rule->rule == 'F')
 857             next_try = now + rule->p1;
 858           else  /* rule = 'G' or 'H' */
 859             {
 860             int last_predicted_gap =
 861               retry_record->next_try - retry_record->last_try;
 862             int last_actual_gap = now - retry_record->last_try;
 863             int lastgap = (last_predicted_gap < last_actual_gap)?
 864               last_predicted_gap : last_actual_gap;
 865             int next_gap = (lastgap * rule->p2)/1000;
 866             if (rule->rule == 'G')
 867               next_try = now + ((lastgap < rule->p1)? rule->p1 : next_gap);
 868             else  /* The 'H' rule */
 869               {
 870               next_try = now + rule->p1;
 871               if (next_gap > rule->p1)
 872                 next_try += random_number(next_gap - rule->p1)/2 +
 873                   (next_gap - rule->p1)/2;
 874               }
 875             }
 876           }
 877
 878         /* Impose a global retry max */
 879
 880         if (next_try - now > retry_interval_max)
 881           next_try = now + retry_interval_max;
 882
 883         /* If the new message length is greater than the previous one, we have
 884         to copy the record first.  If we're using an old one, the read used
 885         tainted memory so we're ok to write into it. */
 886
 887         if (message_length > message_space)
 888           {
 889           dbdata_retry * newr =
 890             store_get(sizeof(dbdata_retry) + message_length, message);
 891           memcpy(newr, retry_record, sizeof(dbdata_retry));
 892           retry_record = newr;
 893           }
 894
 895         /* Set up the retry record; message_length may be less than the string
 896         length for very long error strings. */
 897
 898         retry_record->last_try = now;
 899         retry_record->next_try = next_try;
 900         retry_record->basic_errno = rti->basic_errno;
 901         retry_record->more_errno = rti->more_errno;
 902         Ustrncpy(retry_record->text, message, message_length);
 903         retry_record->text[message_length] = 0; /* nul-term string in db */
 904
 905         DEBUG(D_retry)
 906           {
 907           int letter = retry_record->more_errno & 255;
 908           debug_printf_indent("Writing retry data for %s\n", rti->key);
 909           debug_printf_indent("  first failed=%d last try=%d next try=%d expired=%d\n",
 910             (int)retry_record->first_failed, (int)retry_record->last_try,
 911             (int)retry_record->next_try, retry_record->expired);
 912           debug_printf_indent("  errno=%d more_errno=", retry_record->basic_errno);
 913           if (letter == 'A' || letter == 'M')
 914             debug_printf("%d,%c", (retry_record->more_errno >> 8) & 255,
 915               letter);
 916           else
 917             debug_printf("%d", retry_record->more_errno);
 918           debug_printf(" %s\n", retry_record->text);
 919           }
 920
 921         if (dbfn_write(dbm_file, rti->key, retry_record,
 922                       sizeof(dbdata_retry) + message_length) != 0)
 923           DEBUG(D_retry) debug_printf_indent("retry record write failed\n");
 924
 925         if (!exim_lockfile_needed())
 926           dbfn_transaction_commit(dbm_file);
 927         }                            /* Loop for each retry item */
 928       DEBUG(D_retry) acl_level--;
 929
 930       /* If all the non-delete retry items are timed out, the address is
 931       timed out, provided that we didn't skip any hosts because their retry
 932       time was not reached (or because of hosts_max_try). */
 933
 934       if (update_count > 0 && update_count == timedout_count)
 935         if (!testflag(endaddr, af_retry_skipped))
 936           {
 937           DEBUG(D_retry) debug_printf_indent("timed out: all retries expired\n");
 938           timed_out = TRUE;
 939           }
 940         else
 941           DEBUG(D_retry)
 942             debug_printf_indent("timed out but some hosts were skipped\n");
 943       }     /* Loop for an address and its parents */
 944
 945     /* If this is a deferred address, and retry processing was requested by
 946     means of one or more retry items, and they all timed out, move the address
 947     to the failed queue, and restart this loop without updating paddr.
 948
 949     If there were several addresses batched in the same remote delivery, only
 950     the original top one will have host retry items attached to it, but we want
 951     to handle all the same. Each will have a pointer back to its "top" address,
 952     and they will now precede the item with the retries because addresses are
 953     inverted when added to these final queues. We have saved information about
 954     them in passing (below) so they can all be cut out at once. */
 955
 956     if (i == 2)   /* Handling defers */
 957       {
 958       if (endaddr->retries && timed_out)
 959         {
 960         if (last_first == endaddr) paddr = saved_paddr;
 961         addr = *paddr;
 962         *paddr = endaddr->next;
 963
 964         endaddr->next = *addr_failed;
 965         *addr_failed = addr;
 966
 967         for (;; addr = addr->next)
 968           {
 969           setflag(addr, af_retry_timedout);
 970           addr->message = addr->message
 971             ? string_sprintf("%s: retry timeout exceeded", addr->message)
 972             : US"retry timeout exceeded";
 973           addr->user_message = addr->user_message
 974             ? string_sprintf("%s: retry timeout exceeded", addr->user_message)
 975             : US"retry timeout exceeded";
 976           log_write(0, LOG_MAIN, "** %s%s%s%s: retry timeout exceeded",
 977             addr->address,
 978             addr->parent ? US" <" : US"",
 979             addr->parent ? addr->parent->address : US"",
 980             addr->parent ? US">" : US"");
 981
 982           if (addr == endaddr) break;
 983           }
 984
 985         continue;                       /* Restart from changed *paddr */
 986         }
 987
 988       /* This address is to remain on the defer chain. If it has a "first"
 989       pointer, save the pointer to it in case we want to fail the set of
 990       addresses when we get to the first one. */
 991
 992       if (endaddr->first != last_first)
 993         {
 994         last_first = endaddr->first;
 995         saved_paddr = paddr;
 996         }
 997       }
 998
 999     /* All cases (succeed, fail, defer left on queue) */
1000
1001     paddr = &(endaddr->next);         /* Advance to next address */
1002     }                                 /* Loop for all addresses  */
1003   DEBUG(D_retry) acl_level--;
1004   }                                   /* Loop for succeed, fail, defer */
1005
1006 /* Close and unlock the database */
1007
1008 if (dbm_file)
1009   if (dbm_file != continue_retry_db)
1010       if (exim_lockfile_needed())
1011         dbfn_close(dbm_file);
1012       else
1013         dbfn_close_multi(dbm_file);
1014   else DEBUG(D_hints_lookup)
1015     debug_printf_indent("retaining retry hintsdb handle\n");
1016
1017 DEBUG(D_retry)
1018   { acl_level--; debug_printf_indent("end of retry processing\n"); }
1019 }
1020
1021 /* End of retry.c */
1022 /* vi: aw ai sw=2
1023 */