+/* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.4 2006/11/07 16:50:36 ph10 Exp $ */
+
/*************************************************
* PCRE testing program *
*************************************************/
/* This program was hacked up as a tester for PCRE. I really should have
written it more tidily in the first place. Will I ever learn? It has grown and
-been extended and consequently is now rather untidy in places.
+been extended and consequently is now rather, er, *very* untidy in places.
-----------------------------------------------------------------------------
Redistribution and use in source and binary forms, with or without
#include <locale.h>
#include <errno.h>
-/* We need the internal info for displaying the results of pcre_study(). Also
-for getting the opcodes for showing compiled code. */
+#ifndef _WIN32
+#include <sys/resource.h>
+#endif
#define PCRE_SPY /* For Win32 build, import data, not export */
-#include "internal.h"
+
+/* We include pcre_internal.h because we need the internal info for displaying
+the results of pcre_study() and we also need to know about the internal
+macros, structures, and other internal data values; pcretest has "inside
+information" compared to a program that strictly follows the PCRE API. */
+
+#include "pcre_internal.h"
+
+/* We need access to the data tables that PCRE uses. So as not to have to keep
+two copies, we include the source file here, changing the names of the external
+symbols to prevent clashes. */
+
+#define _pcre_utf8_table1 utf8_table1
+#define _pcre_utf8_table1_size utf8_table1_size
+#define _pcre_utf8_table2 utf8_table2
+#define _pcre_utf8_table3 utf8_table3
+#define _pcre_utf8_table4 utf8_table4
+#define _pcre_utt utt
+#define _pcre_utt_size utt_size
+#define _pcre_OP_lengths OP_lengths
+
+#include "pcre_tables.c"
+
+/* We also need the pcre_printint() function for printing out compiled
+patterns. This function is in a separate file so that it can be included in
+pcre_compile.c when that module is compiled with debugging enabled. */
+
+#include "pcre_printint.src"
+
/* It is possible to compile this test program without including support for
testing the POSIX interface, though this is not available via the standard
#include "pcreposix.h"
#endif
+/* It is also possible, for the benefit of the version imported into Exim, to
+build pcretest without support for UTF8 (define NOUTF8), without the interface
+to the DFA matcher (NODFA), and without the doublecheck of the old "info"
+function (define NOINFOCHECK). */
+
+
+/* Other parameters */
+
#ifndef CLOCKS_PER_SEC
#ifdef CLK_TCK
#define CLOCKS_PER_SEC CLK_TCK
#define LOOPREPEAT 500000
-#define BUFFER_SIZE 30000
-#define PBUFFER_SIZE BUFFER_SIZE
-#define DBUFFER_SIZE BUFFER_SIZE
-
+/* Static variables */
static FILE *outfile;
static int log_store = 0;
static int use_utf8;
static size_t gotten_store;
+/* The buffers grow automatically if very long input lines are encountered. */
+
+static int buffer_size = 50000;
+static uschar *buffer = NULL;
+static uschar *dbuffer = NULL;
static uschar *pbuffer = NULL;
-static const int utf8_table1[] = {
- 0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
-static const int utf8_table2[] = {
- 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
+/*************************************************
+* Read or extend an input line *
+*************************************************/
-static const int utf8_table3[] = {
- 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
+/* Input lines are read into buffer, but both patterns and data lines can be
+continued over multiple input lines. In addition, if the buffer fills up, we
+want to automatically expand it so as to be able to handle extremely large
+lines that are needed for certain stress tests. When the input buffer is
+expanded, the other two buffers must also be expanded likewise, and the
+contents of pbuffer, which are a copy of the input for callouts, must be
+preserved (for when expansion happens for a data line). This is not the most
+optimal way of handling this, but hey, this is just a test program!
+Arguments:
+ f the file to read
+ start where in buffer to start (this *must* be within buffer)
+Returns: pointer to the start of new data
+ could be a copy of start, or could be moved
+ NULL if no data read and EOF reached
+*/
-/*************************************************
-* Print compiled regex *
-*************************************************/
+static uschar *
+extend_inputline(FILE *f, uschar *start)
+{
+uschar *here = start;
+
+for (;;)
+ {
+ int rlen = buffer_size - (here - buffer);
+ if (rlen > 1000)
+ {
+ int dlen;
+ if (fgets((char *)here, rlen, f) == NULL)
+ return (here == start)? NULL : start;
+ dlen = (int)strlen((char *)here);
+ if (dlen > 0 && here[dlen - 1] == '\n') return start;
+ here += dlen;
+ }
+
+ else
+ {
+ int new_buffer_size = 2*buffer_size;
+ uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
+ uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
+ uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
+
+ if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
+ {
+ fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
+ exit(1);
+ }
+
+ memcpy(new_buffer, buffer, buffer_size);
+ memcpy(new_pbuffer, pbuffer, buffer_size);
+
+ buffer_size = new_buffer_size;
+
+ start = new_buffer + (start - buffer);
+ here = new_buffer + (here - buffer);
+
+ free(buffer);
+ free(dbuffer);
+ free(pbuffer);
+
+ buffer = new_buffer;
+ dbuffer = new_dbuffer;
+ pbuffer = new_pbuffer;
+ }
+ }
+
+return NULL; /* Control never gets here */
+}
-/* The code for doing this is held in a separate file that is also included in
-pcre.c when it is compiled with the debug switch. It defines a function called
-print_internals(), which uses a table of opcode lengths defined by the macro
-OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
-Unicode property names to numbers; this is kept in a separate file. */
-static uschar OP_lengths[] = { OP_LENGTHS };
-#ifdef SUPPORT_UCP
-#include "ucp.h"
-#include "ucptypetable.c"
-#endif
-#include "printint.c"
-/*************************************************
-* Convert character value to UTF-8 *
-*************************************************/
-
-/* This function takes an integer value in the range 0 - 0x7fffffff
-and encodes it as a UTF-8 character in 0 to 6 bytes.
-
-Arguments:
- cvalue the character value
- buffer pointer to buffer for result - at least 6 bytes long
-
-Returns: number of characters placed in the buffer
- -1 if input character is negative
- 0 if input character is positive but too big (only when
- int is longer than 32 bits)
-*/
-
-static int
-ord2utf8(int cvalue, unsigned char *buffer)
-{
-register int i, j;
-for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
- if (cvalue <= utf8_table1[i]) break;
-if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
-if (cvalue < 0) return -1;
-
-buffer += i;
-for (j = i; j > 0; j--)
- {
- *buffer-- = 0x80 | (cvalue & 0x3f);
- cvalue >>= 6;
- }
-*buffer = utf8_table2[i] | cvalue;
-return i + 1;
-}
-
/*************************************************
* Convert UTF-8 string to value *
and returns the value of the character.
Argument:
- buffer a pointer to the byte vector
- vptr a pointer to an int to receive the value
+ utf8bytes a pointer to the byte vector
+ vptr a pointer to an int to receive the value
-Returns: > 0 => the number of bytes consumed
- -6 to 0 => malformed UTF-8 character at offset = (-return)
+Returns: > 0 => the number of bytes consumed
+ -6 to 0 => malformed UTF-8 character at offset = (-return)
*/
+#if !defined NOUTF8
+
static int
-utf82ord(unsigned char *buffer, int *vptr)
+utf82ord(unsigned char *utf8bytes, int *vptr)
{
-int c = *buffer++;
+int c = *utf8bytes++;
int d = c;
int i, j, s;
for (j = 0; j < i; j++)
{
- c = *buffer++;
+ c = *utf8bytes++;
if ((c & 0xc0) != 0x80) return -(j+1);
s -= 6;
d |= (c & 0x3f) << s;
/* Check that encoding was the correct unique one */
-for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
+for (j = 0; j < utf8_table1_size; j++)
if (d <= utf8_table1[j]) break;
if (j != i) return -(i+1);
return i+1;
}
+#endif
+
+
+
+/*************************************************
+* Convert character value to UTF-8 *
+*************************************************/
+
+/* This function takes an integer value in the range 0 - 0x7fffffff
+and encodes it as a UTF-8 character in 0 to 6 bytes.
+
+Arguments:
+ cvalue the character value
+ utf8bytes pointer to buffer for result - at least 6 bytes long
+
+Returns: number of characters placed in the buffer
+*/
+
+#if !defined NOUTF8
+
+static int
+ord2utf8(int cvalue, uschar *utf8bytes)
+{
+register int i, j;
+for (i = 0; i < utf8_table1_size; i++)
+ if (cvalue <= utf8_table1[i]) break;
+utf8bytes += i;
+for (j = i; j > 0; j--)
+ {
+ *utf8bytes-- = 0x80 | (cvalue & 0x3f);
+ cvalue >>= 6;
+ }
+*utf8bytes = utf8_table2[i] | cvalue;
+return i + 1;
+}
+
+#endif
+
/*************************************************
static int pchars(unsigned char *p, int length, FILE *f)
{
-int c;
+int c = 0;
int yield = 0;
while (length-- > 0)
{
+#if !defined NOUTF8
if (use_utf8)
{
int rc = utf82ord(p, &c);
continue;
}
}
+#endif
/* Not UTF-8, or malformed UTF-8 */
* Byte flipping function *
*************************************************/
-static long int
-byteflip(long int value, int n)
+static unsigned long int
+byteflip(unsigned long int value, int n)
{
if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
return ((value & 0x000000ff) << 24) |
+/*************************************************
+* Check match or recursion limit *
+*************************************************/
+
+static int
+check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
+ int start_offset, int options, int *use_offsets, int use_size_offsets,
+ int flag, unsigned long int *limit, int errnumber, const char *msg)
+{
+int count;
+int min = 0;
+int mid = 64;
+int max = -1;
+
+extra->flags |= flag;
+
+for (;;)
+ {
+ *limit = mid;
+
+ count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
+ use_offsets, use_size_offsets);
+
+ if (count == errnumber)
+ {
+ /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
+ min = mid;
+ mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
+ }
+
+ else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
+ count == PCRE_ERROR_PARTIAL)
+ {
+ if (mid == min + 1)
+ {
+ fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
+ break;
+ }
+ /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
+ max = mid;
+ mid = (min + mid)/2;
+ }
+ else break; /* Some other error */
+ }
+
+extra->flags &= ~flag;
+return count;
+}
+
+
+
+/*************************************************
+* Check newline indicator *
+*************************************************/
+
+/* This is used both at compile and run-time to check for <xxx> escapes, where
+xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
+
+Arguments:
+ p points after the leading '<'
+ f file for error message
+
+Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
+*/
+
+static int
+check_newline(uschar *p, FILE *f)
+{
+if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
+if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
+if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
+fprintf(f, "Unknown newline type at: <%s\n", p);
+return 0;
+}
+
+
+
/*************************************************
* Main Program *
*************************************************/
int timeit = 0;
int showinfo = 0;
int showstore = 0;
+int quiet = 0;
int size_offsets = 45;
int size_offsets_max;
-int *offsets;
+int *offsets = NULL;
#if !defined NOPOSIX
int posix = 0;
#endif
int debug = 0;
int done = 0;
+int all_use_dfa = 0;
+int yield = 0;
+int stack_size;
+
+/* These vectors store, end-to-end, a list of captured substring names. Assume
+that 1024 is plenty long enough for the few names we'll be testing. */
+
+uschar copynames[1024];
+uschar getnames[1024];
-unsigned char *buffer;
-unsigned char *dbuffer;
+uschar *copynamesptr;
+uschar *getnamesptr;
/* Get buffers from malloc() so that Electric Fence will check their misuse
-when I am debugging. */
+when I am debugging. They grow automatically when very long lines are read. */
-buffer = (unsigned char *)malloc(BUFFER_SIZE);
-dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
-pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
+buffer = (unsigned char *)malloc(buffer_size);
+dbuffer = (unsigned char *)malloc(buffer_size);
+pbuffer = (unsigned char *)malloc(buffer_size);
/* The outfile variable is static so that new_malloc can use it. The _setmode()
stuff is some magic that I don't understand, but which apparently does good
if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
showstore = 1;
else if (strcmp(argv[op], "-t") == 0) timeit = 1;
+ else if (strcmp(argv[op], "-q") == 0) quiet = 1;
else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
+#if !defined NODFA
+ else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
+#endif
else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
*endptr == 0))
op++;
argc--;
}
+ else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
+ ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
+ *endptr == 0))
+ {
+#ifdef _WIN32
+ printf("PCRE: -S not supported on this OS\n");
+ exit(1);
+#else
+ int rc;
+ struct rlimit rlim;
+ getrlimit(RLIMIT_STACK, &rlim);
+ rlim.rlim_cur = stack_size * 1024 * 1024;
+ rc = setrlimit(RLIMIT_STACK, &rlim);
+ if (rc != 0)
+ {
+ printf("PCRE: setrlimit() failed with error %d\n", rc);
+ exit(1);
+ }
+ op++;
+ argc--;
+#endif
+ }
#if !defined NOPOSIX
else if (strcmp(argv[op], "-p") == 0) posix = 1;
#endif
(void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
printf(" %sUnicode properties support\n", rc? "" : "No ");
(void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
- printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
+ printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
+ (rc == '\n')? "LF" : "CRLF");
(void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
printf(" Internal link size = %d\n", rc);
(void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
printf(" POSIX malloc threshold = %d\n", rc);
(void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
printf(" Default match limit = %d\n", rc);
+ (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
+ printf(" Default recursion depth limit = %d\n", rc);
(void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
printf(" Match recursion uses %s\n", rc? "stack" : "heap");
exit(0);
else
{
printf("** Unknown or malformed option %s\n", argv[op]);
- printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
+ printf("Usage: pcretest [options] [<input> [<output>]]\n");
printf(" -C show PCRE compile-time options and exit\n");
- printf(" -d debug: show compiled code; implies -i\n"
- " -i show information about compiled pattern\n"
+ printf(" -d debug: show compiled code; implies -i\n");
+#if !defined NODFA
+ printf(" -dfa force DFA matching for all subjects\n");
+#endif
+ printf(" -i show information about compiled pattern\n"
" -m output memory used information\n"
" -o <n> set size of offsets vector to <n>\n");
#if !defined NOPOSIX
printf(" -p use POSIX interface\n");
#endif
+ printf(" -S <n> set stack size to <n> megabytes\n");
printf(" -s output store (memory) used information\n"
" -t time compilation and execution\n");
- return 1;
+ yield = 1;
+ goto EXIT;
}
op++;
argc--;
{
printf("** Failed to get %d bytes of memory for offsets vector\n",
size_offsets_max * sizeof(int));
- return 1;
+ yield = 1;
+ goto EXIT;
}
/* Sort out the input and output files */
if (infile == NULL)
{
printf("** Failed to open %s\n", argv[op]);
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
if (outfile == NULL)
{
printf("** Failed to open %s\n", argv[op+1]);
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
pcre_stack_malloc = stack_malloc;
pcre_stack_free = stack_free;
-/* Heading line, then prompt for first regex if stdin */
+/* Heading line unless quiet, then prompt for first regex if stdin */
-fprintf(outfile, "PCRE version %s\n\n", pcre_version());
+if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
/* Main loop */
use_utf8 = 0;
if (infile == stdin) printf(" re> ");
- if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
+ if (extend_inputline(infile, buffer) == NULL) break;
if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
fflush(outfile);
if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
{
- unsigned long int magic;
+ unsigned long int magic, get_options;
uschar sbuf[8];
FILE *f;
/* Need to know if UTF-8 for printing data strings */
- new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
- use_utf8 = (options & PCRE_UTF8) != 0;
+ new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
+ use_utf8 = (get_options & PCRE_UTF8) != 0;
/* Now see if there is any following study data */
pp++;
}
if (*pp != 0) break;
-
- len = BUFFER_SIZE - (pp - buffer);
- if (len < 256)
- {
- fprintf(outfile, "** Expression too long - missing delimiter?\n");
- goto SKIP_DATA;
- }
-
if (infile == stdin) printf(" > ");
- if (fgets((char *)pp, len, infile) == NULL)
+ if ((pp = extend_inputline(infile, pp)) == NULL)
{
fprintf(outfile, "** Unexpected EOF\n");
done = 1;
{
switch (*pp++)
{
+ case 'f': options |= PCRE_FIRSTLINE; break;
case 'g': do_g = 1; break;
case 'i': options |= PCRE_CASELESS; break;
case 'm': options |= PCRE_MULTILINE; break;
case 'F': do_flip = 1; break;
case 'G': do_G = 1; break;
case 'I': do_showinfo = 1; break;
+ case 'J': options |= PCRE_DUPNAMES; break;
case 'M': log_store = 1; break;
case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
case 'L':
ppp = pp;
- while (*ppp != '\n' && *ppp != ' ') ppp++;
+ /* The '\r' test here is so that it works on Windows */
+ while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
*ppp = 0;
if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
{
*pp = 0;
break;
- case '\n': case ' ': break;
+ case '<':
+ {
+ int x = check_newline(pp, outfile);
+ if (x == 0) goto SKIP_DATA;
+ options |= x;
+ while (*pp++ != '>');
+ }
+ break;
+
+ case '\r': /* So that it works in Windows */
+ case '\n':
+ case ' ':
+ break;
default:
fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
+ if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
+ if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
+ if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
+
rc = regcomp(&preg, (char *)p, cflags);
/* Compilation failed; go back for another re, skipping to blank line
if (rc != 0)
{
- (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
+ (void)regerror(rc, &preg, (char *)buffer, buffer_size);
fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
goto SKIP_DATA;
}
{
for (;;)
{
- if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
+ if (extend_inputline(infile, buffer) == NULL)
{
done = 1;
goto CONTINUE;
if (do_showinfo)
{
unsigned long int get_options, all_options;
+#if !defined NOINFOCHECK
int old_first_char, old_options, old_count;
+#endif
int count, backrefmax, first_char, need_char;
int nameentrysize, namecount;
const uschar *nametable;
if (do_debug)
{
fprintf(outfile, "------------------------------------------------------------------\n");
- print_internals(re, outfile);
+ pcre_printint(re, outfile);
}
new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
+#if !defined NOINFOCHECK
old_count = pcre_info(re, &old_options, &old_first_char);
if (count < 0) fprintf(outfile,
"Error %d from pcre_info()\n", count);
"Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
get_options, old_options);
}
+#endif
if (size != regex_gotten_store) fprintf(outfile,
"Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
if (do_flip)
{
all_options = byteflip(all_options, sizeof(all_options));
- }
+ }
if ((all_options & PCRE_NOPARTIAL) != 0)
fprintf(outfile, "Partial matching not supported\n");
if (get_options == 0) fprintf(outfile, "No options\n");
- else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
+ else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
+ ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
((get_options & PCRE_EXTRA) != 0)? " extra" : "",
((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
+ ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
((get_options & PCRE_UTF8) != 0)? " utf8" : "",
- ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
+ ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
+ ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
+
+ switch (get_options & PCRE_NEWLINE_CRLF)
+ {
+ case PCRE_NEWLINE_CR:
+ fprintf(outfile, "Forced newline sequence: CR\n");
+ break;
+
+ case PCRE_NEWLINE_LF:
+ fprintf(outfile, "Forced newline sequence: LF\n");
+ break;
- if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
- fprintf(outfile, "Case state changes\n");
+ case PCRE_NEWLINE_CRLF:
+ fprintf(outfile, "Forced newline sequence: CRLF\n");
+ break;
+
+ default:
+ break;
+ }
if (first_char == -1)
{
- fprintf(outfile, "First char at start or follows \\n\n");
+ fprintf(outfile, "First char at start or follows newline\n");
}
else if (first_char < 0)
{
}
fclose(f);
}
+
+ new_free(re);
+ if (extra != NULL) new_free(extra);
+ if (tables != NULL) new_free((void *)tables);
continue; /* With next regex */
}
} /* End of non-POSIX compile */
for (;;)
{
- unsigned char *q;
- unsigned char *bptr = dbuffer;
+ uschar *q;
+ uschar *bptr = dbuffer;
int *use_offsets = offsets;
int use_size_offsets = size_offsets;
int callout_data = 0;
int gmatched = 0;
int start_offset = 0;
int g_notempty = 0;
+ int use_dfa = 0;
options = 0;
+ *copynames = 0;
+ *getnames = 0;
+
+ copynamesptr = copynames;
+ getnamesptr = getnames;
+
pcre_callout = callout;
first_callout = 1;
callout_extra = 0;
callout_fail_id = -1;
show_malloc = 0;
- if (infile == stdin) printf("data> ");
- if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
+ if (extra != NULL) extra->flags &=
+ ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
+
+ len = 0;
+ for (;;)
{
- done = 1;
- goto CONTINUE;
+ if (infile == stdin) printf("data> ");
+ if (extend_inputline(infile, buffer + len) == NULL)
+ {
+ if (len > 0) break;
+ done = 1;
+ goto CONTINUE;
+ }
+ if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
+ len = (int)strlen((char *)buffer);
+ if (buffer[len-1] == '\n') break;
}
- if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
- len = (int)strlen((char *)buffer);
while (len > 0 && isspace(buffer[len-1])) len--;
buffer[len] = 0;
if (len == 0) break;
c -= '0';
while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
c = c * 8 + *p++ - '0';
+
+#if !defined NOUTF8
+ if (use_utf8 && c > 255)
+ {
+ unsigned char buff8[8];
+ int ii, utn;
+ utn = ord2utf8(c, buff8);
+ for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
+ c = buff8[ii]; /* Last byte */
+ }
+#endif
break;
case 'x':
/* Handle \x{..} specially - new Perl thing for utf8 */
+#if !defined NOUTF8
if (*p == '{')
{
unsigned char *pt = p;
}
/* Not correct form; fall through */
}
+#endif
/* Ordinary \x */
}
else if (isalnum(*p))
{
- uschar name[256];
- uschar *npp = name;
+ uschar *npp = copynamesptr;
while (isalnum(*p)) *npp++ = *p++;
+ *npp++ = 0;
*npp = 0;
- n = pcre_get_stringnumber(re, (char *)name);
+ n = pcre_get_stringnumber(re, (char *)copynamesptr);
if (n < 0)
- fprintf(outfile, "no parentheses with name \"%s\"\n", name);
- else copystrings |= 1 << n;
+ fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
+ copynamesptr = npp;
}
else if (*p == '+')
{
}
continue;
+#if !defined NODFA
+ case 'D':
+#if !defined NOPOSIX
+ if (posix || do_posix)
+ printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
+ else
+#endif
+ use_dfa = 1;
+ continue;
+
+ case 'F':
+ options |= PCRE_DFA_SHORTEST;
+ continue;
+#endif
+
case 'G':
if (isdigit(*p))
{
}
else if (isalnum(*p))
{
- uschar name[256];
- uschar *npp = name;
+ uschar *npp = getnamesptr;
while (isalnum(*p)) *npp++ = *p++;
+ *npp++ = 0;
*npp = 0;
- n = pcre_get_stringnumber(re, (char *)name);
+ n = pcre_get_stringnumber(re, (char *)getnamesptr);
if (n < 0)
- fprintf(outfile, "no parentheses with name \"%s\"\n", name);
- else getstrings |= 1 << n;
+ fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
+ getnamesptr = npp;
}
continue;
{
printf("** Failed to get %d bytes of memory for offsets vector\n",
size_offsets_max * sizeof(int));
- return 1;
+ yield = 1;
+ goto EXIT;
}
}
use_size_offsets = n;
options |= PCRE_PARTIAL;
continue;
+ case 'Q':
+ while(isdigit(*p)) n = n * 10 + *p++ - '0';
+ if (extra == NULL)
+ {
+ extra = (pcre_extra *)malloc(sizeof(pcre_extra));
+ extra->flags = 0;
+ }
+ extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
+ extra->match_limit_recursion = n;
+ continue;
+
+ case 'q':
+ while(isdigit(*p)) n = n * 10 + *p++ - '0';
+ if (extra == NULL)
+ {
+ extra = (pcre_extra *)malloc(sizeof(pcre_extra));
+ extra->flags = 0;
+ }
+ extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
+ extra->match_limit = n;
+ continue;
+
+#if !defined NODFA
+ case 'R':
+ options |= PCRE_DFA_RESTART;
+ continue;
+#endif
+
case 'S':
show_malloc = 1;
continue;
case '?':
options |= PCRE_NO_UTF8_CHECK;
continue;
+
+ case '<':
+ {
+ int x = check_newline(p, outfile);
+ if (x == 0) goto NEXT_DATA;
+ options |= x;
+ while (*p++ != '>');
+ }
+ continue;
}
*q++ = c;
}
*q = 0;
len = q - dbuffer;
+ if ((all_use_dfa || use_dfa) && find_match_limit)
+ {
+ printf("**Match limit not relevant for DFA matching: ignored\n");
+ find_match_limit = 0;
+ }
+
/* Handle matching via the POSIX interface, which does not
support timing or playing with the match limit or callout data. */
if (rc != 0)
{
- (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
+ (void)regerror(rc, &preg, (char *)buffer, buffer_size);
fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
}
+ else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
+ != 0)
+ {
+ fprintf(outfile, "Matched with REG_NOSUB\n");
+ }
else
{
size_t i;
register int i;
clock_t time_taken;
clock_t start_time = clock();
+
+#if !defined NODFA
+ if (all_use_dfa || use_dfa)
+ {
+ int workspace[1000];
+ for (i = 0; i < LOOPREPEAT; i++)
+ count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
+ options | g_notempty, use_offsets, use_size_offsets, workspace,
+ sizeof(workspace)/sizeof(int));
+ }
+ else
+#endif
+
for (i = 0; i < LOOPREPEAT; i++)
count = pcre_exec(re, extra, (char *)bptr, len,
start_offset, options | g_notempty, use_offsets, use_size_offsets);
+
time_taken = clock() - start_time;
fprintf(outfile, "Execute time %.3f milliseconds\n",
(((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
}
/* If find_match_limit is set, we want to do repeated matches with
- varying limits in order to find the minimum value. */
+ varying limits in order to find the minimum value for the match limit and
+ for the recursion limit. */
if (find_match_limit)
{
- int min = 0;
- int mid = 64;
- int max = -1;
-
if (extra == NULL)
{
extra = (pcre_extra *)malloc(sizeof(pcre_extra));
extra->flags = 0;
}
- extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
- for (;;)
- {
- extra->match_limit = mid;
- count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
- options | g_notempty, use_offsets, use_size_offsets);
- if (count == PCRE_ERROR_MATCHLIMIT)
- {
- /* fprintf(outfile, "Testing match limit = %d\n", mid); */
- min = mid;
- mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
- }
- else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
- count == PCRE_ERROR_PARTIAL)
- {
- if (mid == min + 1)
- {
- fprintf(outfile, "Minimum match limit = %d\n", mid);
- break;
- }
- /* fprintf(outfile, "Testing match limit = %d\n", mid); */
- max = mid;
- mid = (min + mid)/2;
- }
- else break; /* Some other error */
- }
+ (void)check_match_limit(re, extra, bptr, len, start_offset,
+ options|g_notempty, use_offsets, use_size_offsets,
+ PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
+ PCRE_ERROR_MATCHLIMIT, "match()");
- extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
+ count = check_match_limit(re, extra, bptr, len, start_offset,
+ options|g_notempty, use_offsets, use_size_offsets,
+ PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
+ PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
}
/* If callout_data is set, use the interface with additional data */
/* The normal case is just to do the match once, with the default
value of match_limit. */
- else
+#if !defined NODFA
+ else if (all_use_dfa || use_dfa)
{
- count = pcre_exec(re, extra, (char *)bptr, len,
- start_offset, options | g_notempty, use_offsets, use_size_offsets);
+ int workspace[1000];
+ count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
+ options | g_notempty, use_offsets, use_size_offsets, workspace,
+ sizeof(workspace)/sizeof(int));
+ if (count == 0)
+ {
+ fprintf(outfile, "Matched, but too many subsidiary matches\n");
+ count = use_size_offsets/2;
+ }
}
+#endif
- if (count == 0)
+ else
{
- fprintf(outfile, "Matched, but too many substrings\n");
- count = use_size_offsets/3;
+ count = pcre_exec(re, extra, (char *)bptr, len,
+ start_offset, options | g_notempty, use_offsets, use_size_offsets);
+ if (count == 0)
+ {
+ fprintf(outfile, "Matched, but too many substrings\n");
+ count = use_size_offsets/3;
+ }
}
/* Matched */
{
if ((copystrings & (1 << i)) != 0)
{
- char copybuffer[16];
+ char copybuffer[256];
int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
i, copybuffer, sizeof(copybuffer));
if (rc < 0)
}
}
+ for (copynamesptr = copynames;
+ *copynamesptr != 0;
+ copynamesptr += (int)strlen((char*)copynamesptr) + 1)
+ {
+ char copybuffer[256];
+ int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
+ count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
+ if (rc < 0)
+ fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
+ else
+ fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
+ }
+
for (i = 0; i < 32; i++)
{
if ((getstrings & (1 << i)) != 0)
else
{
fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
- /* free((void *)substring); */
pcre_free_substring(substring);
}
}
}
+ for (getnamesptr = getnames;
+ *getnamesptr != 0;
+ getnamesptr += (int)strlen((char*)getnamesptr) + 1)
+ {
+ const char *substring;
+ int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
+ count, (char *)getnamesptr, &substring);
+ if (rc < 0)
+ fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
+ else
+ {
+ fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
+ pcre_free_substring(substring);
+ }
+ }
+
if (getlist)
{
const char **stringlist;
else if (count == PCRE_ERROR_PARTIAL)
{
- fprintf(outfile, "Partial match\n");
+ fprintf(outfile, "Partial match");
+#if !defined NODFA
+ if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
+ fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
+ bptr + use_offsets[0]);
+#endif
+ fprintf(outfile, "\n");
break; /* Out of the /g loop */
}
len -= use_offsets[1];
}
} /* End of loop for /g and /G */
+
+ NEXT_DATA: continue;
} /* End of loop for data lines */
CONTINUE:
if (posix || do_posix) regfree(&preg);
#endif
- if (re != NULL) free(re);
- if (extra != NULL) free(extra);
+ if (re != NULL) new_free(re);
+ if (extra != NULL) new_free(extra);
if (tables != NULL)
{
- free((void *)tables);
+ new_free((void *)tables);
setlocale(LC_CTYPE, "C");
}
}
if (infile == stdin) fprintf(outfile, "\n");
-return 0;
+
+EXIT:
+
+if (infile != NULL && infile != stdin) fclose(infile);
+if (outfile != NULL && outfile != stdout) fclose(outfile);
+
+free(buffer);
+free(dbuffer);
+free(pbuffer);
+free(offsets);
+
+return yield;
}
-/* End */
+/* End of pcretest.c */