Chunking: do not treat the first lonely dot special. CVE-2017-16944, Bug 2201

[exim.git] / src / src / receive.c
diff --git a/src/src/receive.c b/src/src/receive.c

index 67fcc8e15b73503c7e9035c8746478ab3debf97e..2812ea2c87743f5db2171cbf86e00c6c24d08a9e 100644 (file)
--- a/src/src/receive.c
+++ b/src/src/receive.c
@@ -25,6 +25,7 @@ static FILE   *data_file = NULL;
  static int     data_fd = -1;
  static uschar *spool_name = US"";
  
+enum CH_STATE {LF_SEEN, MID_LINE, CR_SEEN};
  
  
  /*************************************************
@@ -905,7 +906,8 @@ a cut-down version of the state-machine above; we don't need to do leading-dot
  detection and unstuffing.
  
  Arguments:
-  fout      a FILE to which to write the message; NULL if skipping
+  fout      a FILE to which to write the message; NULL if skipping;
+            must be open for both writing and reading.
  
  Returns:    One of the END_xxx values indicating why it stopped reading
  */
@@ -913,27 +915,58 @@ Returns:    One of the END_xxx values indicating why it stopped reading
  static int
  read_message_bdat_smtp(FILE *fout)
  {
-int ch_state = 0, linelength = 0, ch;
+int linelength = 0, ch;
+enum CH_STATE ch_state = LF_SEEN;
+BOOL fix_nl = FALSE;
  
  for(;;)
    {
    switch ((ch = (bdat_getc)(GETC_BUFFER_UNLIMITED)))
      {
      case EOF:  return END_EOF;
-    case EOD:  return END_DOT;         /* normal exit */
      case ERR:  return END_PROTOCOL;
+    case EOD:
+      /* Nothing to get from the sender anymore. We check the last
+      character written to the spool.
+
+      RFC 3030 states, that BDAT chunks are normal text, terminated by CRLF.
+      If we would be strict, we would refuse such broken messages.
+      But we are liberal, so we fix it.  It would be easy just to append
+      the "\n" to the spool.
+
+      But there are some more things (line counting, message size calculation and such),
+      that would need to be duplicated here.  So we simply do some ungetc
+      trickery.
+      */
+      if (fout)
+       {
+       if (fseek(fout, -1, SEEK_CUR) < 0)      return END_PROTOCOL;
+       if (fgetc(fout) == '\n')                return END_DOT;
+       }
+
+      if (linelength == -1)    /* \r already seen (see below) */
+        {
+        DEBUG(D_receive) debug_printf("Add missing LF\n");
+        bdat_ungetc('\n');
+        continue;
+        }
+      DEBUG(D_receive) debug_printf("Add missing CRLF\n");
+      bdat_ungetc('\r');      /* not even \r was seen */
+      fix_nl = TRUE;
+
+      continue;
      case '\0':  body_zerocount++; break;
      }
    switch (ch_state)
      {
-    case 0:                             /* After LF or CRLF */
-      ch_state = 1;
+    case LF_SEEN:                             /* After LF or CRLF */
+      ch_state = MID_LINE;
        /* fall through to handle as normal uschar. */
  
-    case 1:                             /* Mid-line state */
+    case MID_LINE:                            /* Mid-line state */
        if (ch == '\n')
         {
-       ch_state = 0;
+       ch_state = LF_SEEN;
         body_linecount++;
         if (linelength > max_received_linelength)
           max_received_linelength = linelength;
@@ -941,25 +974,26 @@ for(;;)
         }
        else if (ch == '\r')
         {
-       ch_state = 2;
+       ch_state = CR_SEEN;
+       if (fix_nl) bdat_ungetc('\n');
         continue;                       /* don't write CR */
         }
        break;
  
-    case 2:                             /* After (unwritten) CR */
+    case CR_SEEN:                       /* After (unwritten) CR */
        body_linecount++;
        if (linelength > max_received_linelength)
         max_received_linelength = linelength;
        linelength = -1;
        if (ch == '\n')
-       ch_state = 0;
+       ch_state = LF_SEEN;
        else
         {
         message_size++;
-       if (fout != NULL && fputc('\n', fout) == EOF) return END_WERROR;
+       if (fout && fputc('\n', fout) == EOF) return END_WERROR;
         (void) cutthrough_put_nl();
         if (ch == '\r') continue;       /* don't write CR */
-       ch_state = 1;
+       ch_state = MID_LINE;
         }
        break;
      }
@@ -1116,7 +1150,7 @@ switch(where)
  
  if (acl_removed_headers != NULL)
    {
-  DEBUG(D_receive|D_acl) debug_printf(">>Headers removed by %s ACL:\n", acl_name);
+  DEBUG(D_receive|D_acl) debug_printf_indent(">>Headers removed by %s ACL:\n", acl_name);
  
    for (h = header_list; h != NULL; h = h->next) if (h->type != htype_old)
      {
@@ -1129,15 +1163,15 @@ if (acl_removed_headers != NULL)
        if (header_testname(h, s, Ustrlen(s), FALSE))
         {
         h->type = htype_old;
-        DEBUG(D_receive|D_acl) debug_printf("  %s", h->text);
+        DEBUG(D_receive|D_acl) debug_printf_indent("  %s", h->text);
         }
      }
    acl_removed_headers = NULL;
-  DEBUG(D_receive|D_acl) debug_printf(">>\n");
+  DEBUG(D_receive|D_acl) debug_printf_indent(">>\n");
    }
  
  if (acl_added_headers == NULL) return;
-DEBUG(D_receive|D_acl) debug_printf(">>Headers added by %s ACL:\n", acl_name);
+DEBUG(D_receive|D_acl) debug_printf_indent(">>Headers added by %s ACL:\n", acl_name);
  
  for (h = acl_added_headers; h != NULL; h = next)
    {
@@ -1148,7 +1182,7 @@ for (h = acl_added_headers; h != NULL; h = next)
      case htype_add_top:
      h->next = header_list;
      header_list = h;
-    DEBUG(D_receive|D_acl) debug_printf("  (at top)");
+    DEBUG(D_receive|D_acl) debug_printf_indent("  (at top)");
      break;
  
      case htype_add_rec:
@@ -1163,7 +1197,7 @@ for (h = acl_added_headers; h != NULL; h = next)
        }
      h->next = last_received->next;
      last_received->next = h;
-    DEBUG(D_receive|D_acl) debug_printf("  (after Received:)");
+    DEBUG(D_receive|D_acl) debug_printf_indent("  (after Received:)");
      break;
  
      case htype_add_rfc:
@@ -1178,7 +1212,7 @@ for (h = acl_added_headers; h != NULL; h = next)
         of all headers. Our current header must follow it. */
      h->next = last_received->next;
      last_received->next = h;
-    DEBUG(D_receive|D_acl) debug_printf("  (before any non-Received: or Resent-*: header)");
+    DEBUG(D_receive|D_acl) debug_printf_indent("  (before any non-Received: or Resent-*: header)");
      break;
  
      default:
@@ -1198,11 +1232,11 @@ for (h = acl_added_headers; h != NULL; h = next)
    h->type = header_checkname(h, FALSE);
    if (h->type >= 'a') h->type = htype_other;
  
-  DEBUG(D_receive|D_acl) debug_printf("  %s", header_last->text);
+  DEBUG(D_receive|D_acl) debug_printf_indent("  %s", header_last->text);
    }
  
  acl_added_headers = NULL;
-DEBUG(D_receive|D_acl) debug_printf(">>\n");
+DEBUG(D_receive|D_acl) debug_printf_indent(">>\n");
  }
  
  
@@ -1354,7 +1388,7 @@ if (rc == OK)
        {
        (void) string_format(rfc822_file_path, sizeof(rfc822_file_path),
         "%s/scan/%s/%s", spool_directory, message_id, entry->d_name);
-      debug_printf("RFC822 attachment detected: running MIME ACL for '%s'\n",
+      DEBUG(D_receive) debug_printf("RFC822 attachment detected: running MIME ACL for '%s'\n",
         rfc822_file_path);
        break;
        }
@@ -1738,8 +1772,8 @@ for (;;)
    (and sometimes lunatic messages can have ones that are 100s of K long) we
    call store_release() for strings that have been copied - if the string is at
    the start of a block (and therefore the only thing in it, because we aren't
-  doing any other gets), the block gets freed. We can only do this because we
-  know there are no other calls to store_get() going on. */
+  doing any other gets), the block gets freed. We can only do this release if
+  there were no allocations since the once that we want to free. */
  
    if (ptr >= header_size - 4)
      {
@@ -1748,9 +1782,10 @@ for (;;)
      header_size *= 2;
      if (!store_extend(next->text, oldsize, header_size))
        {
+      BOOL release_ok = store_last_get[store_pool] == next->text;
        uschar *newtext = store_get(header_size);
        memcpy(newtext, next->text, ptr);
-      store_release(next->text);
+      if (release_ok) store_release(next->text);
        next->text = newtext;
        }
      }
@@ -1792,7 +1827,7 @@ for (;;)
    prevent further reading), and break out of the loop, having freed the
    empty header, and set next = NULL to indicate no data line. */
  
-  if (ptr == 0 && ch == '.' && (smtp_input || dot_ends))
+  if (ptr == 0 && ch == '.' && dot_ends)
      {
      ch = (receive_getc)(GETC_BUFFER_UNLIMITED);
      if (ch == '\r')
@@ -2548,8 +2583,9 @@ letter and it is not used internally.
  NOTE: If ever the format of message ids is changed, the regular expression for
  checking that a string is in this format must be updated in a corresponding
  way. It appears in the initializing code in exim.c. The macro MESSAGE_ID_LENGTH
-must also be changed to reflect the correct string length. Then, of course,
-other programs that rely on the message id format will need updating too. */
+must also be changed to reflect the correct string length. The queue-sort code
+needs to know the layout. Then, of course, other programs that rely on the
+message id format will need updating too. */
  
  Ustrncpy(message_id, string_base62((long int)(message_id_tv.tv_sec)), 6);
  message_id[6] = '-';