Update version number and copyright year.

[users/jgh/exim.git] / src / src / parse.c
diff --git a/src/src/parse.c b/src/src/parse.c

index 67d9e148c3655e8e895153c710b76daa0965cf80..66764b642216304361b97b93766430910eef1018 100644 (file)
--- a/src/src/parse.c
+++ b/src/src/parse.c
@@ -1,10 +1,10 @@
-/* $Cambridge: exim/src/src/parse.c,v 1.7 2006/02/07 11:19:00 ph10 Exp $ */
+/* $Cambridge: exim/src/src/parse.c,v 1.11 2007/01/08 10:50:18 ph10 Exp $ */
  
  /*************************************************
  *     Exim - an Internet mail transport agent    *
  *************************************************/
  
-/* Copyright (c) University of Cambridge 1995 - 2006 */
+/* Copyright (c) University of Cambridge 1995 - 2007 */
  /* See the file NOTICE for conditions of use and distribution. */
  
  /* Functions for parsing addresses */
@@ -597,10 +597,15 @@ which may appear in certain headers. If the flag parse_allow_group is set
  TRUE and parse_found_group is FALSE when this function is called, an address
  which is the start of a group (i.e. preceded by a phrase and a colon) is
  recognized; the phrase is ignored and the flag parse_found_group is set. If
-this flag is TRUE at the end of an address, then if an extraneous semicolon is
-found, it is ignored and the flag is cleared. This logic is used only when
-scanning through addresses in headers, either to fulfil the -t option or for
-rewriting or checking header syntax.
+this flag is TRUE at the end of an address, and if an extraneous semicolon is
+found, it is ignored and the flag is cleared.
+
+This logic is used only when scanning through addresses in headers, either to
+fulfil the -t option, or for rewriting, or for checking header syntax. Because
+the group "state" has to be remembered between multiple calls of this function,
+the variables parse_{allow,found}_group are global. It is important to ensure
+that they are reset to FALSE at the end of scanning a header's list of
+addresses.
  
  Arguments:
    mailbox     points to the RFC822 mailbox
@@ -854,6 +859,8 @@ Arguments:
    charset      the name of the character set; NULL => iso-8859-1
    buffer       the buffer to put the answer in
    buffer_size  the size of the buffer
+  fold         if TRUE, a newline is inserted before the separating space when
+                 more than one encoded-word is generated
  
  Returns:       pointer to the original string, if no quoting needed, or
                 pointer to buffer containing the quoted string, or
@@ -863,7 +870,7 @@ Returns:       pointer to the original string, if no quoting needed, or
  
  uschar *
  parse_quote_2047(uschar *string, int len, uschar *charset, uschar *buffer,
-  int buffer_size)
+  int buffer_size, BOOL fold)
  {
  uschar *s = string;
  uschar *p, *t;
@@ -890,6 +897,7 @@ for (; len > 0; len--)
      {
      *t++ = '?';
      *t++ = '=';
+    if (fold) *t++ = '\n';
      *t++ = ' ';
      p = t;
      Ustrncpy(p, buffer, hlen);
@@ -989,7 +997,7 @@ for (i = 0, s = phrase; i < len; i++, s++)
    if ((*s < 32 && *s != '\t') || *s > 126) break;
  
  if (i < len) return parse_quote_2047(phrase, len, headers_charset, buffer,
-  buffer_size);
+  buffer_size, FALSE);
  
  /* No non-printers; use the RFC 822 quoting rules */
  
@@ -1656,6 +1664,68 @@ for (;;)
  }
  
  
+
+/*************************************************
+*            Extract a Message-ID                *
+*************************************************/
+
+/* This function is used to extract message ids from In-Reply-To: and
+References: header lines.
+
+Arguments:
+  str          pointer to the start of the message-id
+  yield        put pointer to the message id (in dynamic memory) here
+  error        put error message here on failure
+
+Returns:       points after the processed message-id or NULL on error
+*/
+
+uschar *
+parse_message_id(uschar *str, uschar **yield, uschar **error)
+{
+uschar *domain = NULL;
+uschar *id;
+
+str = skip_comment(str);
+if (*str != '<')
+  {
+  *error = US"Missing '<' before message-id";
+  return NULL;
+  }
+
+/* Getting a block the size of the input string will definitely be sufficient
+for the answer, but it may also be very long if we are processing a header
+line. Therefore, take care to release unwanted store afterwards. */
+
+id = *yield = store_get(Ustrlen(str) + 1);
+*id++ = *str++;
+
+str = read_addr_spec(str, id, '>', error, &domain);
+
+if (*error == NULL)
+  {
+  if (*str != '>') *error = US"Missing '>' after message-id";
+    else if (domain == NULL) *error = US"domain missing in message-id";
+  }
+
+if (*error != NULL)
+  {
+  store_reset(*yield);
+  return NULL;
+  }
+
+while (*id != 0) id++;
+*id++ = *str++;
+*id++ = 0;
+store_reset(id);
+
+str = skip_comment(str);
+return str;
+}
+
+
+
+
  /*************************************************
  **************************************************
  *             Stand-alone test program           *
@@ -1788,6 +1858,26 @@ while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
    else printf("Failed: %d %s\n", extracted, errmess);
    }
  
+printf("Testing parse_message_id\n");
+
+while (Ufgets(buffer, sizeof(buffer), stdin) != NULL)
+  {
+  uschar *s, *t, *errmess;
+  buffer[Ustrlen(buffer) - 1] = 0;
+  if (buffer[0] == 0) break;
+  s = buffer;
+  while (*s != 0)
+    {
+    s = parse_message_id(s, &t, &errmess);
+    if (errmess != NULL)
+      {
+      printf("Failed: %s\n", errmess);
+      break;
+      }
+    printf("%s\n", t);
+    }
+  }
+
  return 0;
  }