src/src/retry.c

   1 /* $Cambridge: exim/src/src/retry.c,v 1.12 2007/01/08 10:50:18 ph10 Exp $ */
   2
   3 /*************************************************
   4 *     Exim - an Internet mail transport agent    *
   5 *************************************************/
   6
   7 /* Copyright (c) University of Cambridge 1995 - 2007 */
   8 /* See the file NOTICE for conditions of use and distribution. */
   9
  10 /* Functions concerned with retrying unsuccessful deliveries. */
  11
  12
  13 #include "exim.h"
  14
  15
  16
  17 /*************************************************
  18 *         Check the ultimate address timeout     *
  19 *************************************************/
  20
  21 /* This function tests whether a message has been on the queue longer than
  22 the maximum retry time for a particular host.
  23
  24 Arguments:
  25   host_key      the key to look up a host retry rule
  26   domain        the domain to look up a domain retry rule
  27   basic_errno   a specific error number, or zero if none
  28   more_errno    additional data for the error
  29   now           the time
  30
  31 Returns:        TRUE if the ultimate timeout has been reached
  32 */
  33
  34 static BOOL
  35 ultimate_address_timeout(uschar *host_key, uschar *domain, int basic_errno,
  36   int more_errno, time_t now)
  37 {
  38 BOOL address_timeout = TRUE;   /* no rule => timed out */
  39
  40 retry_config *retry =
  41   retry_find_config(host_key+2, domain, basic_errno, more_errno);
  42
  43 if (retry != NULL && retry->rules != NULL)
  44   {
  45   retry_rule *last_rule;
  46   for (last_rule = retry->rules;
  47        last_rule->next != NULL;
  48        last_rule = last_rule->next);
  49   DEBUG(D_transport|D_retry)
  50     debug_printf("  received_time=%d diff=%d timeout=%d\n",
  51       received_time, (int)(now - received_time), last_rule->timeout);
  52   address_timeout = (now - received_time > last_rule->timeout);
  53   }
  54 else
  55   {
  56   DEBUG(D_transport|D_retry)
  57     debug_printf("no retry rule found: assume timed out\n");
  58   }
  59
  60 return address_timeout;
  61 }
  62
  63
  64
  65 /*************************************************
  66 *     Set status of a host+address item          *
  67 *************************************************/
  68
  69 /* This function is passed a host_item which contains a host name and an
  70 IP address string. Its job is to set the status of the address if it is not
  71 already set (indicated by hstatus_unknown). The possible values are:
  72
  73    hstatus_usable    the address is not listed in the unusable tree, and does
  74                      not have a retry record, OR the time is past the next
  75                      try time, OR the message has been on the queue for more
  76                      than the maximum retry time for a failing host
  77
  78    hstatus_unusable  the address is listed in the unusable tree, or does have
  79                      a retry record, and the time is not yet at the next retry
  80                      time.
  81
  82    hstatus_unusable_expired  as above, but also the retry time has expired
  83                      for this address.
  84
  85 The reason a delivery is permitted when a message has been around for a very
  86 long time is to allow the ultimate address timeout to operate after a delivery
  87 failure. Otherwise some messages may stick around without being tried for too
  88 long.
  89
  90 If a host retry record is retrieved from the hints database, the time of last
  91 trying is filled into the last_try field of the host block. If a host is
  92 generally usable, a check is made to see if there is a retry delay on this
  93 specific message at this host.
  94
  95 If a non-standard port is being used, it is added to the retry key.
  96
  97 Arguments:
  98   domain              the address domain
  99   host                pointer to a host item
 100   portstring          "" for standard port, ":xxxx" for a non-standard port
 101   include_ip_address  TRUE to include the address in the key - this is
 102                         usual, but sometimes is not wanted
 103   retry_host_key      where to put a pointer to the key for the host-specific
 104                         retry record, if one is read and the host is usable
 105   retry_message_key   where to put a pointer to the key for the message+host
 106                         retry record, if one is read and the host is usable
 107
 108 Returns:    TRUE if the host has expired but is usable because
 109              its retry time has come
 110 */
 111
 112 BOOL
 113 retry_check_address(uschar *domain, host_item *host, uschar *portstring,
 114   BOOL include_ip_address, uschar **retry_host_key, uschar **retry_message_key)
 115 {
 116 BOOL yield = FALSE;
 117 time_t now = time(NULL);
 118 uschar *host_key, *message_key;
 119 open_db dbblock;
 120 open_db *dbm_file;
 121 tree_node *node;
 122 dbdata_retry *host_retry_record, *message_retry_record;
 123
 124 *retry_host_key = *retry_message_key = NULL;
 125
 126 DEBUG(D_transport|D_retry) debug_printf("checking status of %s\n", host->name);
 127
 128 /* Do nothing if status already set; otherwise initialize status as usable. */
 129
 130 if (host->status != hstatus_unknown) return FALSE;
 131 host->status = hstatus_usable;
 132
 133 /* Generate the host key for the unusable tree and the retry database. Ensure
 134 host names are lower cased (that's what %S does). */
 135
 136 host_key = include_ip_address?
 137   string_sprintf("T:%S:%s%s", host->name, host->address, portstring) :
 138   string_sprintf("T:%S%s", host->name, portstring);
 139
 140 /* Generate the message-specific key */
 141
 142 message_key = string_sprintf("%s:%s", host_key, message_id);
 143
 144 /* Search the tree of unusable IP addresses. This is filled in when deliveries
 145 fail, because the retry database itself is not updated until the end of all
 146 deliveries (so as to do it all in one go). The tree records addresses that have
 147 become unusable during this delivery process (i.e. those that will get put into
 148 the retry database when it is updated). */
 149
 150 node = tree_search(tree_unusable, host_key);
 151 if (node != NULL)
 152   {
 153   DEBUG(D_transport|D_retry) debug_printf("found in tree of unusables\n");
 154   host->status = (node->data.val > 255)?
 155     hstatus_unusable_expired : hstatus_unusable;
 156   host->why = node->data.val & 255;
 157   return FALSE;
 158   }
 159
 160 /* Open the retry database, giving up if there isn't one. Otherwise, search for
 161 the retry records, and then close the database again. */
 162
 163 if ((dbm_file = dbfn_open(US"retry", O_RDONLY, &dbblock, FALSE)) == NULL)
 164   {
 165   DEBUG(D_deliver|D_retry|D_hints_lookup)
 166     debug_printf("no retry data available\n");
 167   return FALSE;
 168   }
 169 host_retry_record = dbfn_read(dbm_file, host_key);
 170 message_retry_record = dbfn_read(dbm_file, message_key);
 171 dbfn_close(dbm_file);
 172
 173 /* Ignore the data if it is too old - too long since it was written */
 174
 175 if (host_retry_record == NULL)
 176   {
 177   DEBUG(D_transport|D_retry) debug_printf("no host retry record\n");
 178   }
 179 else if (now - host_retry_record->time_stamp > retry_data_expire)
 180   {
 181   host_retry_record = NULL;
 182   DEBUG(D_transport|D_retry) debug_printf("host retry record too old\n");
 183   }
 184
 185 if (message_retry_record == NULL)
 186   {
 187   DEBUG(D_transport|D_retry) debug_printf("no message retry record\n");
 188   }
 189 else if (now - message_retry_record->time_stamp > retry_data_expire)
 190   {
 191   message_retry_record = NULL;
 192   DEBUG(D_transport|D_retry) debug_printf("message retry record too old\n");
 193   }
 194
 195 /* If there's a host-specific retry record, check for reaching the retry
 196 time (or forcing). If not, and the host is not expired, check for the message
 197 having been around for longer than the maximum retry time for this host or
 198 address. Allow the delivery if it has. Otherwise set the appropriate unusable
 199 flag and return FALSE. Otherwise arrange to return TRUE if this is an expired
 200 host. */
 201
 202 if (host_retry_record != NULL)
 203   {
 204   *retry_host_key = host_key;
 205
 206   /* We have not reached the next try time. Check for the ultimate address
 207   timeout if the host has not expired. */
 208
 209   if (now < host_retry_record->next_try && !deliver_force)
 210     {
 211     DEBUG(D_transport|D_retry)
 212       {
 213       debug_printf("host retry time not reached: checking ultimate address "
 214         "timeout\n");
 215       debug_printf("  now=%d first_failed=%d next_try=%d expired=%d\n",
 216         (int)now, (int)host_retry_record->first_failed,
 217         (int)host_retry_record->next_try,
 218         host_retry_record->expired);
 219       }
 220
 221     if (!host_retry_record->expired &&
 222         ultimate_address_timeout(host_key, domain,
 223           host_retry_record->basic_errno, host_retry_record->more_errno, now))
 224       {
 225       DEBUG(D_transport|D_retry)
 226         debug_printf("on queue longer than maximum retry for "
 227           "address - allowing delivery\n");
 228       return FALSE;
 229       }
 230
 231     /* We have not hit the ultimate address timeout; host is unusable. */
 232
 233     host->status = (host_retry_record->expired)?
 234       hstatus_unusable_expired : hstatus_unusable;
 235     host->why = hwhy_retry;
 236     host->last_try = host_retry_record->last_try;
 237     return FALSE;
 238     }
 239
 240   /* Host is usable; set return TRUE if expired. */
 241
 242   yield = host_retry_record->expired;
 243   }
 244
 245 /* It's OK to try the host. If there's a message-specific retry record, check
 246 for reaching its retry time (or forcing). If not, mark the host unusable,
 247 unless the ultimate address timeout has been reached. */
 248
 249 if (message_retry_record != NULL)
 250   {
 251   *retry_message_key = message_key;
 252   if (now < message_retry_record->next_try && !deliver_force)
 253     {
 254     DEBUG(D_transport|D_retry)
 255       {
 256       debug_printf("host+message retry time not reached: checking ultimate "
 257         "address timeout\n");
 258       debug_printf("  now=%d first_failed=%d next_try=%d expired=%d\n",
 259         (int)now, (int)message_retry_record->first_failed,
 260         (int)message_retry_record->next_try, message_retry_record->expired);
 261       }
 262     if (!ultimate_address_timeout(host_key, domain, 0, 0, now))
 263       {
 264       host->status = hstatus_unusable;
 265       host->why = hwhy_retry;
 266       }
 267     else
 268       {
 269       DEBUG(D_transport|D_retry)
 270         debug_printf("on queue longer than maximum retry for "
 271           "address - allowing delivery\n");
 272       }
 273     return FALSE;
 274     }
 275   }
 276
 277 return yield;
 278 }
 279
 280
 281
 282
 283 /*************************************************
 284 *           Add a retry item to an address       *
 285 *************************************************/
 286
 287 /* Retry items are chained onto an address when it is deferred either by router
 288 or by a transport, or if it succeeds or fails and there was a previous retry
 289 item that now needs to be deleted. Sometimes there can be both kinds of item:
 290 for example, if routing was deferred but then succeeded, and delivery then
 291 deferred. In that case there is a delete item for the routing retry, and an
 292 updating item for the delivery.
 293
 294 (But note that that is only visible at the outer level, because in remote
 295 delivery subprocesses, the address starts "clean", with no retry items carried
 296 in.)
 297
 298 These items are used at the end of a delivery attempt to update the retry
 299 database. The keys start R: for routing delays and T: for transport delays.
 300
 301 Arguments:
 302   addr    the address block onto which to hang the item
 303   key     the retry key
 304   flags   delete, host, and message flags, copied into the block
 305
 306 Returns:  nothing
 307 */
 308
 309 void
 310 retry_add_item(address_item *addr, uschar *key, int flags)
 311 {
 312 retry_item *rti = store_get(sizeof(retry_item));
 313 rti->next = addr->retries;
 314 addr->retries = rti;
 315 rti->key = key;
 316 rti->basic_errno = addr->basic_errno;
 317 rti->more_errno = addr->more_errno;
 318 rti->message = addr->message;
 319 rti->flags = flags;
 320
 321 DEBUG(D_transport|D_retry)
 322   {
 323   int letter = rti->more_errno & 255;
 324   debug_printf("added retry item for %s: errno=%d more_errno=", rti->key,
 325     rti->basic_errno);
 326   if (letter == 'A' || letter == 'M')
 327     debug_printf("%d,%c", (rti->more_errno >> 8) & 255, letter);
 328   else
 329     debug_printf("%d", rti->more_errno);
 330   debug_printf(" flags=%d\n", flags);
 331   }
 332 }
 333
 334
 335
 336 /*************************************************
 337 *        Find retry configuration data           *
 338 *************************************************/
 339
 340 /* Search the in-store retry information for the first retry item that applies
 341 to a given destination. If the key contains an @ we are probably handling a
 342 local delivery and have a complete address to search for; this happens when
 343 retry_use_local_part is set on a router. Otherwise, the key is likely to be a
 344 host name for a remote delivery, or a domain name for a local delivery. We
 345 prepend *@ on the front of it so that it will match a retry item whose address
 346 item pattern is independent of the local part. The alternate key, if set, is
 347 always just a domain, so we treat it likewise.
 348
 349 Arguments:
 350   key          key for which retry info is wanted
 351   alternate    alternative key, always just a domain
 352   basic_errno  specific error predicate on the retry rule, or zero
 353   more_errno   additional data for errno predicate
 354
 355 Returns:       pointer to retry rule, or NULL
 356 */
 357
 358 retry_config *
 359 retry_find_config(uschar *key, uschar *alternate, int basic_errno,
 360   int more_errno)
 361 {
 362 int replace = 0;
 363 uschar *use_key, *use_alternate;
 364 uschar *colon = Ustrchr(key, ':');
 365 retry_config *yield;
 366
 367 /* If there's a colon in the key, there are two possibilities:
 368
 369 (1) This is a key for a host, ip address, and possibly port, in the format
 370
 371       hostname:ip+port
 372
 373     In this case, we temporarily replace the colon with a zero, to terminate
 374     the string after the host name.
 375
 376 (2) This is a key for a pipe, file, or autoreply delivery, in the format
 377
 378       pipe-or-file-or-auto:x@y
 379
 380     where x@y is the original address that provoked the delivery. The pipe or
 381     file or auto will start with | or / or >, whereas a host name will start
 382     with a letter or a digit. In this case we want to use the original address
 383     to search for a retry rule. */
 384
 385 if (colon != NULL)
 386   {
 387   if (isalnum(*key))
 388     replace = ':';
 389   else
 390     key = Ustrrchr(key, ':') + 1;   /* Take from the last colon */
 391   }
 392
 393 if (replace == 0) colon = key + Ustrlen(key);
 394 *colon = 0;
 395
 396 /* Sort out the keys */
 397
 398 use_key = (Ustrchr(key, '@') != NULL)? key : string_sprintf("*@%s", key);
 399 use_alternate = (alternate == NULL)? NULL : string_sprintf("*@%s", alternate);
 400
 401 /* Scan the configured retry items. */
 402
 403 for (yield = retries; yield != NULL; yield = yield->next)
 404   {
 405   uschar *plist = yield->pattern;
 406   uschar *slist = yield->senders;
 407
 408   /* If a specific error is set for this item, check that we are handling that
 409   specific error, and if so, check any additional error information if
 410   required. */
 411
 412   if (yield->basic_errno != 0)
 413     {
 414     /* Special code is required for quota errors, as these can either be system
 415     quota errors, or Exim's own quota imposition, which has a different error
 416     number. Full partitions are also treated in the same way as quota errors.
 417     */
 418
 419     if (yield->basic_errno == ERRNO_EXIMQUOTA)
 420       {
 421       if ((basic_errno != ERRNO_EXIMQUOTA && basic_errno != errno_quota &&
 422            basic_errno != ENOSPC) ||
 423           (yield->more_errno != 0 && yield->more_errno > more_errno))
 424         continue;
 425       }
 426
 427     /* The TLSREQUIRED error also covers TLSFAILURE. These are subtly different
 428     errors, but not worth separating at this level. */
 429
 430     else if (yield->basic_errno == ERRNO_TLSREQUIRED)
 431       {
 432       if (basic_errno != ERRNO_TLSREQUIRED && basic_errno != ERRNO_TLSFAILURE)
 433         continue;
 434       }
 435
 436     /* Handle 4xx responses to MAIL, RCPT, or DATA. The code that was received
 437     is in the 2nd least significant byte of more_errno (with 400 subtracted).
 438     The required value is coded in the 2nd least significant byte of the
 439     yield->more_errno field as follows:
 440
 441       255     => any 4xx code
 442       >= 100  => the decade must match the value less 100
 443       < 100   => the exact value must match
 444     */
 445
 446     else if (yield->basic_errno == ERRNO_MAIL4XX ||
 447              yield->basic_errno == ERRNO_RCPT4XX ||
 448              yield->basic_errno == ERRNO_DATA4XX)
 449       {
 450       int wanted;
 451       if (basic_errno != yield->basic_errno) continue;
 452       wanted = (yield->more_errno >> 8) & 255;
 453       if (wanted != 255)
 454         {
 455         int evalue = (more_errno >> 8) & 255;
 456         if (wanted >= 100)
 457           {
 458           if ((evalue/10)*10 != wanted - 100) continue;
 459           }
 460         else if (evalue != wanted) continue;
 461         }
 462       }
 463
 464     /* There are some special cases for timeouts */
 465
 466     else if (yield->basic_errno == ETIMEDOUT)
 467       {
 468       if (basic_errno != ETIMEDOUT) continue;
 469
 470       /* Just RTEF_CTOUT in the rule => don't care about 'A'/'M' addresses */
 471       if (yield->more_errno == RTEF_CTOUT)
 472         {
 473         if ((more_errno & RTEF_CTOUT) == 0) continue;
 474         }
 475
 476       else if (yield->more_errno != 0)
 477         {
 478         int cf_errno = more_errno;
 479         if ((yield->more_errno & RTEF_CTOUT) == 0) cf_errno &= ~RTEF_CTOUT;
 480         if (yield->more_errno != cf_errno) continue;
 481         }
 482       }
 483
 484     /* Default checks for exact match */
 485
 486     else
 487       {
 488       if (yield->basic_errno != basic_errno ||
 489          (yield->more_errno != 0 && yield->more_errno != more_errno))
 490        continue;
 491       }
 492     }
 493
 494   /* If the "senders" condition is set, check it. Note that sender_address may
 495   be null during -brt checking, in which case we do not use this rule. */
 496
 497   if (slist != NULL && (sender_address == NULL ||
 498       match_address_list(sender_address, TRUE, TRUE, &slist, NULL, -1, 0,
 499         NULL) != OK))
 500     continue;
 501
 502   /* Check for a match between the address list item at the start of this retry
 503   rule and either the main or alternate keys. */
 504
 505   if (match_address_list(use_key, TRUE, TRUE, &plist, NULL, -1, UCHAR_MAX+1,
 506         NULL) == OK ||
 507      (use_alternate != NULL &&
 508       match_address_list(use_alternate, TRUE, TRUE, &plist, NULL, -1,
 509         UCHAR_MAX+1, NULL) == OK))
 510     break;
 511   }
 512
 513 *colon = replace;
 514 return yield;
 515 }
 516
 517
 518
 519
 520 /*************************************************
 521 *              Update retry database             *
 522 *************************************************/
 523
 524 /* Update the retry data for any directing/routing/transporting that was
 525 deferred, or delete it for those that succeeded after a previous defer. This is
 526 done all in one go to minimize opening/closing/locking of the database file.
 527
 528 Note that, because SMTP delivery involves a list of destinations to try, there
 529 may be defer-type retry information for some of them even when the message was
 530 successfully delivered. Likewise if it eventually failed.
 531
 532 This function may move addresses from the defer to the failed queue if the
 533 ultimate retry time has expired.
 534
 535 Arguments:
 536   addr_defer    queue of deferred addresses
 537   addr_failed   queue of failed addresses
 538   addr_succeed  queue of successful addresses
 539
 540 Returns:        nothing
 541 */
 542
 543 void
 544 retry_update(address_item **addr_defer, address_item **addr_failed,
 545   address_item **addr_succeed)
 546 {
 547 open_db dbblock;
 548 open_db *dbm_file = NULL;
 549 time_t now = time(NULL);
 550 int i;
 551
 552 DEBUG(D_retry) debug_printf("Processing retry items\n");
 553
 554 /* Three-times loop to handle succeeded, failed, and deferred addresses.
 555 Deferred addresses must be handled after failed ones, because some may be moved
 556 to the failed chain if they have timed out. */
 557
 558 for (i = 0; i < 3; i++)
 559   {
 560   address_item *endaddr, *addr;
 561   address_item *last_first = NULL;
 562   address_item **paddr = (i==0)? addr_succeed :
 563     (i==1)? addr_failed : addr_defer;
 564   address_item **saved_paddr = NULL;
 565
 566   DEBUG(D_retry) debug_printf("%s addresses:\n", (i == 0)? "Succeeded" :
 567     (i == 1)? "Failed" : "Deferred");
 568
 569   /* Loop for each address on the chain. For deferred addresses, the whole
 570   address times out unless one of its retry addresses has a retry rule that
 571   hasn't yet timed out. Deferred addresses should not be requesting deletion
 572   of retry items, but just in case they do by accident, treat that case
 573   as "not timed out".
 574
 575   As well as handling the addresses themselves, we must also process any
 576   retry items for any parent addresses - these are typically "delete" items,
 577   because the parent must have succeeded in order to generate the child. */
 578
 579   while ((endaddr = *paddr) != NULL)
 580     {
 581     BOOL timed_out = FALSE;
 582     retry_item *rti;
 583
 584     for (addr = endaddr; addr != NULL; addr = addr->parent)
 585       {
 586       int update_count = 0;
 587       int timedout_count = 0;
 588
 589       DEBUG(D_retry) debug_printf("%s%s\n", addr->address, (addr->retries == NULL)?
 590         ": no retry items" : "");
 591
 592       /* Loop for each retry item. */
 593
 594       for (rti = addr->retries; rti != NULL; rti = rti->next)
 595         {
 596         uschar *message;
 597         int message_length, message_space, failing_interval, next_try;
 598         retry_rule *rule, *final_rule;
 599         retry_config *retry;
 600         dbdata_retry *retry_record;
 601
 602         /* Open the retry database if it is not already open; failure to open
 603         the file is logged, but otherwise ignored - deferred addresses will
 604         get retried at the next opportunity. Not opening earlier than this saves
 605         opening if no addresses have retry items - common when none have yet
 606         reached their retry next try time. */
 607
 608         if (dbm_file == NULL)
 609           dbm_file = dbfn_open(US"retry", O_RDWR, &dbblock, TRUE);
 610
 611         if (dbm_file == NULL)
 612           {
 613           DEBUG(D_deliver|D_retry|D_hints_lookup)
 614             debug_printf("retry database not available for updating\n");
 615           return;
 616           }
 617
 618         /* If there are no deferred addresses, that is, if this message is
 619         completing, and the retry item is for a message-specific SMTP error,
 620         force it to be deleted, because there's no point in keeping data for
 621         no-longer-existing messages. This situation can occur when a domain has
 622         two hosts and a message-specific error occurs for the first of them,
 623         but the address gets delivered to the second one. This optimization
 624         doesn't succeed in cleaning out all the dead entries, but it helps. */
 625
 626         if (*addr_defer == NULL && (rti->flags & rf_message) != 0)
 627           rti->flags |= rf_delete;
 628
 629         /* Handle the case of a request to delete the retry info for this
 630         destination. */
 631
 632         if ((rti->flags & rf_delete) != 0)
 633           {
 634           (void)dbfn_delete(dbm_file, rti->key);
 635           DEBUG(D_retry)
 636             debug_printf("deleted retry information for %s\n", rti->key);
 637           continue;
 638           }
 639
 640         /* Count the number of non-delete retry items. This is so that we
 641         can compare it to the count of timed_out ones, to check whether
 642         all are timed out. */
 643
 644         update_count++;
 645
 646         /* Get the retry information for this destination and error code, if
 647         any. If this item is for a remote host with ip address, then pass
 648         the domain name as an alternative to search for. If no retry
 649         information is found, we can't generate a retry time, so there is
 650         no point updating the database. This retry item is timed out. */
 651
 652         if ((retry = retry_find_config(rti->key + 2,
 653              ((rti->flags & rf_host) != 0)? addr->domain : NULL,
 654              rti->basic_errno, rti->more_errno)) == NULL)
 655           {
 656           DEBUG(D_retry) debug_printf("No configured retry item for %s%s%s\n",
 657             rti->key,
 658             ((rti->flags & rf_host) != 0)? US" or " : US"",
 659             ((rti->flags & rf_host) != 0)? addr->domain : US"");
 660           if (addr == endaddr) timedout_count++;
 661           continue;
 662           }
 663
 664         DEBUG(D_retry)
 665           {
 666           if ((rti->flags & rf_host) != 0)
 667             debug_printf("retry for %s (%s) = %s %d %d\n", rti->key,
 668               addr->domain, retry->pattern, retry->basic_errno,
 669               retry->more_errno);
 670           else
 671             debug_printf("retry for %s = %s %d %d\n", rti->key, retry->pattern,
 672               retry->basic_errno, retry->more_errno);
 673           }
 674
 675         /* Set up the message for the database retry record. Because DBM
 676         records have a maximum data length, we enforce a limit. There isn't
 677         much point in keeping a huge message here, anyway. */
 678
 679         message = (rti->basic_errno > 0)? US strerror(rti->basic_errno) :
 680           (rti->message == NULL)?
 681           US"unknown error" : string_printing(rti->message);
 682         message_length = Ustrlen(message);
 683         if (message_length > 150) message_length = 150;
 684
 685         /* Read a retry record from the database or construct a new one.
 686         Ignore an old one if it is too old since it was last updated. */
 687
 688         retry_record = dbfn_read(dbm_file, rti->key);
 689         if (retry_record != NULL &&
 690             now - retry_record->time_stamp > retry_data_expire)
 691           retry_record = NULL;
 692
 693         if (retry_record == NULL)
 694           {
 695           retry_record = store_get(sizeof(dbdata_retry) + message_length);
 696           message_space = message_length;
 697           retry_record->first_failed = now;
 698           retry_record->last_try = now;
 699           retry_record->next_try = now;
 700           retry_record->expired = FALSE;
 701           retry_record->text[0] = 0;      /* just in case */
 702           }
 703         else message_space = Ustrlen(retry_record->text);
 704
 705         /* Compute how long this destination has been failing */
 706
 707         failing_interval = now - retry_record->first_failed;
 708         DEBUG(D_retry) debug_printf("failing_interval=%d message_age=%d\n",
 709           failing_interval, message_age);
 710
 711         /* For a non-host error, if the message has been on the queue longer
 712         than the recorded time of failure, use the message's age instead. This
 713         can happen when some messages can be delivered and others cannot; a
 714         successful delivery will reset the first_failed time, and this can lead
 715         to a failing message being retried too often. */
 716
 717         if ((rti->flags & rf_host) == 0 && message_age > failing_interval)
 718           failing_interval = message_age;
 719
 720         /* Search for the current retry rule. The cutoff time of the
 721         last rule is handled differently to the others. The rule continues
 722         to operate for ever (the global maximum interval will eventually
 723         limit the gaps) but its cutoff time determines when an individual
 724         destination times out. If there are no retry rules, the destination
 725         always times out, but we can't compute a retry time. */
 726
 727         final_rule = NULL;
 728         for (rule = retry->rules; rule != NULL; rule = rule->next)
 729           {
 730           if (failing_interval <= rule->timeout) break;
 731           final_rule = rule;
 732           }
 733
 734         /* If there's an un-timed out rule, the destination has not
 735         yet timed out, so the address as a whole has not timed out (but we are
 736         interested in this only for the end address). Make sure the expired
 737         flag is false (can be forced via fixdb from outside, but ensure it is
 738         consistent with the rules whenever we go through here). */
 739
 740         if (rule != NULL)
 741           {
 742           retry_record->expired = FALSE;
 743           }
 744
 745         /* Otherwise, set the retry timeout expired, and set the final rule
 746         as the one from which to compute the next retry time. Subsequent
 747         messages will fail immediately until the retry time is reached (unless
 748         there are other, still active, retries). */
 749
 750         else
 751           {
 752           rule = final_rule;
 753           retry_record->expired = TRUE;
 754           if (addr == endaddr) timedout_count++;
 755           }
 756
 757         /* There is a special case to consider when some messages get through
 758         to a destination and others don't. This can happen locally when a
 759         large message pushes a user over quota, and it can happen remotely
 760         when a machine is on a dodgy Internet connection. The messages that
 761         get through wipe the retry information, causing those that don't to
 762         stay on the queue longer than the final retry time. In order to
 763         avoid this, we check, using the time of arrival of the message, to
 764         see if it has been on the queue for more than the final cutoff time,
 765         and if so, cause this retry item to time out, and the retry time to
 766         be set to "now" so that any subsequent messages in the same condition
 767         also get tried. We search for the last rule onwards from the one that
 768         is in use. If there are no retry rules for the item, rule will be null
 769         and timedout_count will already have been updated.
 770
 771         This implements "timeout this rule if EITHER the host (or routing or
 772         directing) has been failing for more than the maximum time, OR if the
 773         message has been on the queue for more than the maximum time."
 774
 775         February 2006: It is possible that this code is no longer needed
 776         following the change to the retry calculation to use the message age if
 777         it is larger than the time since first failure. It may be that the
 778         expired flag is always set when the other conditions are met. However,
 779         this is a small bit of code, and it does no harm to leave it in place,
 780         just in case. */
 781
 782         if (received_time <= retry_record->first_failed &&
 783             addr == endaddr && !retry_record->expired && rule != NULL)
 784           {
 785           retry_rule *last_rule;
 786           for (last_rule = rule;
 787                last_rule->next != NULL;
 788                last_rule = last_rule->next);
 789           if (now - received_time > last_rule->timeout)
 790             {
 791             DEBUG(D_retry) debug_printf("on queue longer than maximum retry\n");
 792             timedout_count++;
 793             rule = NULL;
 794             }
 795           }
 796
 797         /* Compute the next try time from the rule, subject to the global
 798         maximum, and update the retry database. If rule == NULL it means
 799         there were no rules at all (and the timeout will be set expired),
 800         or we have a message that is older than the final timeout. In this
 801         case set the next retry time to now, so that one delivery attempt
 802         happens for subsequent messages. */
 803
 804         if (rule == NULL) next_try = now; else
 805           {
 806           if (rule->rule == 'F') next_try = now + rule->p1;
 807           else  /* rule = 'G' or 'H' */
 808             {
 809             int last_predicted_gap =
 810               retry_record->next_try - retry_record->last_try;
 811             int last_actual_gap = now - retry_record->last_try;
 812             int lastgap = (last_predicted_gap < last_actual_gap)?
 813               last_predicted_gap : last_actual_gap;
 814             int next_gap = (lastgap * rule->p2)/1000;
 815             if (rule->rule == 'G')
 816               {
 817               next_try = now + ((lastgap < rule->p1)? rule->p1 : next_gap);
 818               }
 819             else  /* The 'H' rule */
 820               {
 821               next_try = now + rule->p1;
 822               if (next_gap > rule->p1)
 823                 next_try += random_number(next_gap - rule->p1)/2 +
 824                   (next_gap - rule->p1)/2;
 825               }
 826             }
 827           }
 828
 829         /* Impose a global retry max */
 830
 831         if (next_try - now > retry_interval_max)
 832           next_try = now + retry_interval_max;
 833
 834         /* If the new message length is greater than the previous one, we
 835         have to copy the record first. */
 836
 837         if (message_length > message_space)
 838           {
 839           dbdata_retry *newr = store_get(sizeof(dbdata_retry) + message_length);
 840           memcpy(newr, retry_record, sizeof(dbdata_retry));
 841           retry_record = newr;
 842           }
 843
 844         /* Set up the retry record; message_length may be less than the string
 845         length for very long error strings. */
 846
 847         retry_record->last_try = now;
 848         retry_record->next_try = next_try;
 849         retry_record->basic_errno = rti->basic_errno;
 850         retry_record->more_errno = rti->more_errno;
 851         Ustrncpy(retry_record->text, message, message_length);
 852         retry_record->text[message_length] = 0;
 853
 854         DEBUG(D_retry)
 855           {
 856           int letter = retry_record->more_errno & 255;
 857           debug_printf("Writing retry data for %s\n", rti->key);
 858           debug_printf("  first failed=%d last try=%d next try=%d expired=%d\n",
 859             (int)retry_record->first_failed, (int)retry_record->last_try,
 860             (int)retry_record->next_try, retry_record->expired);
 861           debug_printf("  errno=%d more_errno=", retry_record->basic_errno);
 862           if (letter == 'A' || letter == 'M')
 863             debug_printf("%d,%c", (retry_record->more_errno >> 8) & 255,
 864               letter);
 865           else
 866             debug_printf("%d", retry_record->more_errno);
 867           debug_printf(" %s\n", retry_record->text);
 868           }
 869
 870         (void)dbfn_write(dbm_file, rti->key, retry_record,
 871           sizeof(dbdata_retry) + message_length);
 872         }                            /* Loop for each retry item */
 873
 874       /* If all the non-delete retry items are timed out, the address is
 875       timed out, provided that we didn't skip any hosts because their retry
 876       time was not reached (or because of hosts_max_try). */
 877
 878       if (update_count > 0 && update_count == timedout_count)
 879         {
 880         if (!testflag(endaddr, af_retry_skipped))
 881           {
 882           DEBUG(D_retry) debug_printf("timed out: all retries expired\n");
 883           timed_out = TRUE;
 884           }
 885         else
 886           {
 887           DEBUG(D_retry)
 888             debug_printf("timed out but some hosts were skipped\n");
 889           }
 890         }
 891       }     /* Loop for an address and its parents */
 892
 893     /* If this is a deferred address, and retry processing was requested by
 894     means of one or more retry items, and they all timed out, move the address
 895     to the failed queue, and restart this loop without updating paddr.
 896
 897     If there were several addresses batched in the same remote delivery, only
 898     the original top one will have host retry items attached to it, but we want
 899     to handle all the same. Each will have a pointer back to its "top" address,
 900     and they will now precede the item with the retries because addresses are
 901     inverted when added to these final queues. We have saved information about
 902     them in passing (below) so they can all be cut out at once. */
 903
 904     if (i == 2)   /* Handling defers */
 905       {
 906       if (endaddr->retries != NULL && timed_out)
 907         {
 908         if (last_first == endaddr) paddr = saved_paddr;
 909         addr = *paddr;
 910         *paddr = endaddr->next;
 911
 912         endaddr->next = *addr_failed;
 913         *addr_failed = addr;
 914
 915         for (;; addr = addr->next)
 916           {
 917           setflag(addr, af_retry_timedout);
 918           addr->message = (addr->message == NULL)? US"retry timeout exceeded" :
 919             string_sprintf("%s: retry timeout exceeded", addr->message);
 920           addr->user_message = (addr->user_message == NULL)?
 921             US"retry timeout exceeded" :
 922             string_sprintf("%s: retry timeout exceeded", addr->user_message);
 923           log_write(0, LOG_MAIN, "** %s%s%s%s: retry timeout exceeded",
 924             addr->address,
 925            (addr->parent == NULL)? US"" : US" <",
 926            (addr->parent == NULL)? US"" : addr->parent->address,
 927            (addr->parent == NULL)? US"" : US">");
 928
 929           if (addr == endaddr) break;
 930           }
 931
 932         continue;                       /* Restart from changed *paddr */
 933         }
 934
 935       /* This address is to remain on the defer chain. If it has a "first"
 936       pointer, save the pointer to it in case we want to fail the set of
 937       addresses when we get to the first one. */
 938
 939       if (endaddr->first != last_first)
 940         {
 941         last_first = endaddr->first;
 942         saved_paddr = paddr;
 943         }
 944       }
 945
 946     /* All cases (succeed, fail, defer left on queue) */
 947
 948     paddr = &(endaddr->next);         /* Advance to next address */
 949     }                                 /* Loop for all addresses  */
 950   }                                   /* Loop for succeed, fail, defer */
 951
 952 /* Close and unlock the database */
 953
 954 if (dbm_file != NULL) dbfn_close(dbm_file);
 955
 956 DEBUG(D_retry) debug_printf("end of retry processing\n");
 957 }
 958
 959 /* End of retry.c */