1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.6 2007/01/23 15:08:45 ph10 Exp $ */
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
50 /* A number of things vary for Windows builds. Originally, pcretest opened its
51 input and output without "b"; then I was told that "b" was needed in some
52 environments, so it was added for release 5.0 to both the input and output. (It
53 makes no difference on Unix-like systems.) Later I was told that it is wrong
54 for the input on Windows. I've now abstracted the modes into two macros that
55 are set here, to make it easier to fiddle with them, and removed "b" from the
56 input mode under Windows. */
58 #if defined(_WIN32) || defined(WIN32)
59 #include <io.h> /* For _setmode() */
60 #include <fcntl.h> /* For _O_BINARY */
61 #define INPUT_MODE "r"
62 #define OUTPUT_MODE "wb"
65 #include <sys/time.h> /* These two includes are needed */
66 #include <sys/resource.h> /* for setrlimit(). */
67 #define INPUT_MODE "rb"
68 #define OUTPUT_MODE "wb"
72 #define PCRE_SPY /* For Win32 build, import data, not export */
74 /* We include pcre_internal.h because we need the internal info for displaying
75 the results of pcre_study() and we also need to know about the internal
76 macros, structures, and other internal data values; pcretest has "inside
77 information" compared to a program that strictly follows the PCRE API. */
79 #include "pcre_internal.h"
81 /* We need access to the data tables that PCRE uses. So as not to have to keep
82 two copies, we include the source file here, changing the names of the external
83 symbols to prevent clashes. */
85 #define _pcre_utf8_table1 utf8_table1
86 #define _pcre_utf8_table1_size utf8_table1_size
87 #define _pcre_utf8_table2 utf8_table2
88 #define _pcre_utf8_table3 utf8_table3
89 #define _pcre_utf8_table4 utf8_table4
91 #define _pcre_utt_size utt_size
92 #define _pcre_OP_lengths OP_lengths
94 #include "pcre_tables.c"
96 /* We also need the pcre_printint() function for printing out compiled
97 patterns. This function is in a separate file so that it can be included in
98 pcre_compile.c when that module is compiled with debugging enabled.
100 The definition of the macro PRINTABLE, which determines whether to print an
101 output character as-is or as a hex value when showing compiled patterns, is
102 contained in this file. We uses it here also, in cases when the locale has not
103 been explicitly changed, so as to get consistent output from systems that
104 differ in their output from isprint() even in the "C" locale. */
106 #include "pcre_printint.src"
108 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
111 /* It is possible to compile this test program without including support for
112 testing the POSIX interface, though this is not available via the standard
116 #include "pcreposix.h"
119 /* It is also possible, for the benefit of the version imported into Exim, to
120 build pcretest without support for UTF8 (define NOUTF8), without the interface
121 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
122 function (define NOINFOCHECK). */
125 /* Other parameters */
127 #ifndef CLOCKS_PER_SEC
129 #define CLOCKS_PER_SEC CLK_TCK
131 #define CLOCKS_PER_SEC 100
135 /* This is the default loop count for timing. */
137 #define LOOPREPEAT 500000
139 /* Static variables */
141 static FILE *outfile;
142 static int log_store = 0;
143 static int callout_count;
144 static int callout_extra;
145 static int callout_fail_count;
146 static int callout_fail_id;
147 static int first_callout;
148 static int locale_set = 0;
149 static int show_malloc;
151 static size_t gotten_store;
153 /* The buffers grow automatically if very long input lines are encountered. */
155 static int buffer_size = 50000;
156 static uschar *buffer = NULL;
157 static uschar *dbuffer = NULL;
158 static uschar *pbuffer = NULL;
162 /*************************************************
163 * Read or extend an input line *
164 *************************************************/
166 /* Input lines are read into buffer, but both patterns and data lines can be
167 continued over multiple input lines. In addition, if the buffer fills up, we
168 want to automatically expand it so as to be able to handle extremely large
169 lines that are needed for certain stress tests. When the input buffer is
170 expanded, the other two buffers must also be expanded likewise, and the
171 contents of pbuffer, which are a copy of the input for callouts, must be
172 preserved (for when expansion happens for a data line). This is not the most
173 optimal way of handling this, but hey, this is just a test program!
177 start where in buffer to start (this *must* be within buffer)
179 Returns: pointer to the start of new data
180 could be a copy of start, or could be moved
181 NULL if no data read and EOF reached
185 extend_inputline(FILE *f, uschar *start)
187 uschar *here = start;
191 int rlen = buffer_size - (here - buffer);
196 if (fgets((char *)here, rlen, f) == NULL)
197 return (here == start)? NULL : start;
198 dlen = (int)strlen((char *)here);
199 if (dlen > 0 && here[dlen - 1] == '\n') return start;
205 int new_buffer_size = 2*buffer_size;
206 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
207 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
208 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
210 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
212 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
216 memcpy(new_buffer, buffer, buffer_size);
217 memcpy(new_pbuffer, pbuffer, buffer_size);
219 buffer_size = new_buffer_size;
221 start = new_buffer + (start - buffer);
222 here = new_buffer + (here - buffer);
229 dbuffer = new_dbuffer;
230 pbuffer = new_pbuffer;
234 return NULL; /* Control never gets here */
243 /*************************************************
244 * Read number from string *
245 *************************************************/
247 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
248 around with conditional compilation, just do the job by hand. It is only used
249 for unpicking arguments, so just keep it simple.
252 str string to be converted
253 endptr where to put the end pointer
255 Returns: the unsigned long
259 get_value(unsigned char *str, unsigned char **endptr)
262 while(*str != 0 && isspace(*str)) str++;
263 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
271 /*************************************************
272 * Convert UTF-8 string to value *
273 *************************************************/
275 /* This function takes one or more bytes that represents a UTF-8 character,
276 and returns the value of the character.
279 utf8bytes a pointer to the byte vector
280 vptr a pointer to an int to receive the value
282 Returns: > 0 => the number of bytes consumed
283 -6 to 0 => malformed UTF-8 character at offset = (-return)
289 utf82ord(unsigned char *utf8bytes, int *vptr)
291 int c = *utf8bytes++;
295 for (i = -1; i < 6; i++) /* i is number of additional bytes */
297 if ((d & 0x80) == 0) break;
301 if (i == -1) { *vptr = c; return 1; } /* ascii character */
302 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
304 /* i now has a value in the range 1-5 */
307 d = (c & utf8_table3[i]) << s;
309 for (j = 0; j < i; j++)
312 if ((c & 0xc0) != 0x80) return -(j+1);
314 d |= (c & 0x3f) << s;
317 /* Check that encoding was the correct unique one */
319 for (j = 0; j < utf8_table1_size; j++)
320 if (d <= utf8_table1[j]) break;
321 if (j != i) return -(i+1);
333 /*************************************************
334 * Convert character value to UTF-8 *
335 *************************************************/
337 /* This function takes an integer value in the range 0 - 0x7fffffff
338 and encodes it as a UTF-8 character in 0 to 6 bytes.
341 cvalue the character value
342 utf8bytes pointer to buffer for result - at least 6 bytes long
344 Returns: number of characters placed in the buffer
350 ord2utf8(int cvalue, uschar *utf8bytes)
353 for (i = 0; i < utf8_table1_size; i++)
354 if (cvalue <= utf8_table1[i]) break;
356 for (j = i; j > 0; j--)
358 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
361 *utf8bytes = utf8_table2[i] | cvalue;
369 /*************************************************
370 * Print character string *
371 *************************************************/
373 /* Character string printing function. Must handle UTF-8 strings in utf8
374 mode. Yields number of characters printed. If handed a NULL file, just counts
375 chars without printing. */
377 static int pchars(unsigned char *p, int length, FILE *f)
387 int rc = utf82ord(p, &c);
389 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
395 if (f != NULL) fprintf(f, "%c", c);
401 if (f != NULL) fprintf(f, "\\x{%02x}", c);
402 yield += (n <= 0x000000ff)? 2 :
403 (n <= 0x00000fff)? 3 :
404 (n <= 0x0000ffff)? 4 :
405 (n <= 0x000fffff)? 5 : 6;
412 /* Not UTF-8, or malformed UTF-8 */
417 if (f != NULL) fprintf(f, "%c", c);
422 if (f != NULL) fprintf(f, "\\x%02x", c);
432 /*************************************************
434 *************************************************/
436 /* Called from PCRE as a result of the (?C) item. We print out where we are in
437 the match. Yield zero unless more callouts than the fail count, or the callout
440 static int callout(pcre_callout_block *cb)
442 FILE *f = (first_callout | callout_extra)? outfile : NULL;
443 int i, pre_start, post_start, subject_length;
447 fprintf(f, "Callout %d: last capture = %d\n",
448 cb->callout_number, cb->capture_last);
450 for (i = 0; i < cb->capture_top * 2; i += 2)
452 if (cb->offset_vector[i] < 0)
453 fprintf(f, "%2d: <unset>\n", i/2);
456 fprintf(f, "%2d: ", i/2);
457 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
458 cb->offset_vector[i+1] - cb->offset_vector[i], f);
464 /* Re-print the subject in canonical form, the first time or if giving full
465 datails. On subsequent calls in the same match, we use pchars just to find the
466 printed lengths of the substrings. */
468 if (f != NULL) fprintf(f, "--->");
470 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
471 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
472 cb->current_position - cb->start_match, f);
474 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
476 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
477 cb->subject_length - cb->current_position, f);
479 if (f != NULL) fprintf(f, "\n");
481 /* Always print appropriate indicators, with callout number if not already
482 shown. For automatic callouts, show the pattern offset. */
484 if (cb->callout_number == 255)
486 fprintf(outfile, "%+3d ", cb->pattern_position);
487 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
491 if (callout_extra) fprintf(outfile, " ");
492 else fprintf(outfile, "%3d ", cb->callout_number);
495 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
496 fprintf(outfile, "^");
500 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
501 fprintf(outfile, "^");
504 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
505 fprintf(outfile, " ");
507 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
508 pbuffer + cb->pattern_position);
510 fprintf(outfile, "\n");
513 if (cb->callout_data != NULL)
515 int callout_data = *((int *)(cb->callout_data));
516 if (callout_data != 0)
518 fprintf(outfile, "Callout data = %d\n", callout_data);
523 return (cb->callout_number != callout_fail_id)? 0 :
524 (++callout_count >= callout_fail_count)? 1 : 0;
528 /*************************************************
529 * Local malloc functions *
530 *************************************************/
532 /* Alternative malloc function, to test functionality and show the size of the
535 static void *new_malloc(size_t size)
537 void *block = malloc(size);
540 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
544 static void new_free(void *block)
547 fprintf(outfile, "free %p\n", block);
552 /* For recursion malloc/free, to test stacking calls */
554 static void *stack_malloc(size_t size)
556 void *block = malloc(size);
558 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
562 static void stack_free(void *block)
565 fprintf(outfile, "stack_free %p\n", block);
570 /*************************************************
571 * Call pcre_fullinfo() *
572 *************************************************/
574 /* Get one piece of information from the pcre_fullinfo() function */
576 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
579 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
580 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
585 /*************************************************
586 * Byte flipping function *
587 *************************************************/
589 static unsigned long int
590 byteflip(unsigned long int value, int n)
592 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
593 return ((value & 0x000000ff) << 24) |
594 ((value & 0x0000ff00) << 8) |
595 ((value & 0x00ff0000) >> 8) |
596 ((value & 0xff000000) >> 24);
602 /*************************************************
603 * Check match or recursion limit *
604 *************************************************/
607 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
608 int start_offset, int options, int *use_offsets, int use_size_offsets,
609 int flag, unsigned long int *limit, int errnumber, const char *msg)
616 extra->flags |= flag;
622 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
623 use_offsets, use_size_offsets);
625 if (count == errnumber)
627 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
629 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
632 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
633 count == PCRE_ERROR_PARTIAL)
637 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
640 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
644 else break; /* Some other error */
647 extra->flags &= ~flag;
653 /*************************************************
654 * Check newline indicator *
655 *************************************************/
657 /* This is used both at compile and run-time to check for <xxx> escapes, where
658 xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
661 p points after the leading '<'
662 f file for error message
664 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
668 check_newline(uschar *p, FILE *f)
670 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
671 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
672 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
673 if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
674 fprintf(f, "Unknown newline type at: <%s\n", p);
680 /*************************************************
682 *************************************************/
687 printf("Usage: pcretest [options] [<input> [<output>]]\n");
688 printf(" -b show compiled code (bytecode)\n");
689 printf(" -C show PCRE compile-time options and exit\n");
690 printf(" -d debug: show compiled code and information (-b and -i)\n");
692 printf(" -dfa force DFA matching for all subjects\n");
694 printf(" -help show usage information\n");
695 printf(" -i show information about compiled patterns\n"
696 " -m output memory used information\n"
697 " -o <n> set size of offsets vector to <n>\n");
699 printf(" -p use POSIX interface\n");
701 printf(" -q quiet: do not output PCRE version number at start\n");
702 printf(" -S <n> set stack size to <n> megabytes\n");
703 printf(" -s output store (memory) used information\n"
704 " -t time compilation and execution\n");
705 printf(" -t <n> time compilation and execution, repeating <n> times\n");
706 printf(" -tm time execution (matching) only\n");
707 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
712 /*************************************************
714 *************************************************/
716 /* Read lines from named file or stdin and write to named file or stdout; lines
717 consist of a regular expression, in delimiters and optionally followed by
718 options, followed by a set of test data, terminated by an empty line. */
720 int main(int argc, char **argv)
722 FILE *infile = stdin;
724 int study_options = 0;
731 int size_offsets = 45;
732 int size_offsets_max;
743 /* These vectors store, end-to-end, a list of captured substring names. Assume
744 that 1024 is plenty long enough for the few names we'll be testing. */
746 uschar copynames[1024];
747 uschar getnames[1024];
749 uschar *copynamesptr;
752 /* Get buffers from malloc() so that Electric Fence will check their misuse
753 when I am debugging. They grow automatically when very long lines are read. */
755 buffer = (unsigned char *)malloc(buffer_size);
756 dbuffer = (unsigned char *)malloc(buffer_size);
757 pbuffer = (unsigned char *)malloc(buffer_size);
759 /* The outfile variable is static so that new_malloc can use it. */
763 /* The following _setmode() stuff is some Windows magic that tells its runtime
764 library to translate CRLF into a single LF character. At least, that's what
765 I've been told: never having used Windows I take this all on trust. Originally
766 it set 0x8000, but then I was advised that _O_BINARY was better. */
768 #if defined(_WIN32) || defined(WIN32)
769 _setmode( _fileno( stdout ), _O_BINARY );
774 while (argc > 1 && argv[op][0] == '-')
776 unsigned char *endptr;
778 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
780 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
781 else if (strcmp(argv[op], "-b") == 0) debug = 1;
782 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
783 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
785 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
787 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
788 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
796 int both = argv[op][2] == 0;
798 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
805 else timeitm = LOOPREPEAT;
806 if (both) timeit = timeitm;
808 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
809 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
812 #if defined(_WIN32) || defined(WIN32)
813 printf("PCRE: -S not supported on this OS\n");
818 getrlimit(RLIMIT_STACK, &rlim);
819 rlim.rlim_cur = stack_size * 1024 * 1024;
820 rc = setrlimit(RLIMIT_STACK, &rlim);
823 printf("PCRE: setrlimit() failed with error %d\n", rc);
831 else if (strcmp(argv[op], "-p") == 0) posix = 1;
833 else if (strcmp(argv[op], "-C") == 0)
836 printf("PCRE version %s\n", pcre_version());
837 printf("Compiled with\n");
838 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
839 printf(" %sUTF-8 support\n", rc? "" : "No ");
840 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
841 printf(" %sUnicode properties support\n", rc? "" : "No ");
842 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
843 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
844 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
845 (rc == -1)? "ANY" : "???");
846 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
847 printf(" Internal link size = %d\n", rc);
848 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
849 printf(" POSIX malloc threshold = %d\n", rc);
850 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
851 printf(" Default match limit = %d\n", rc);
852 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
853 printf(" Default recursion depth limit = %d\n", rc);
854 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
855 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
858 else if (strcmp(argv[op], "-help") == 0 ||
859 strcmp(argv[op], "--help") == 0)
866 printf("** Unknown or malformed option %s\n", argv[op]);
875 /* Get the store for the offsets vector, and remember what it was */
877 size_offsets_max = size_offsets;
878 offsets = (int *)malloc(size_offsets_max * sizeof(int));
881 printf("** Failed to get %d bytes of memory for offsets vector\n",
882 size_offsets_max * sizeof(int));
887 /* Sort out the input and output files */
891 infile = fopen(argv[op], INPUT_MODE);
894 printf("** Failed to open %s\n", argv[op]);
902 outfile = fopen(argv[op+1], OUTPUT_MODE);
905 printf("** Failed to open %s\n", argv[op+1]);
911 /* Set alternative malloc function */
913 pcre_malloc = new_malloc;
914 pcre_free = new_free;
915 pcre_stack_malloc = stack_malloc;
916 pcre_stack_free = stack_free;
918 /* Heading line unless quiet, then prompt for first regex if stdin */
920 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
927 pcre_extra *extra = NULL;
929 #if !defined NOPOSIX /* There are still compilers that require no indent */
935 unsigned char *p, *pp, *ppp;
936 unsigned char *to_file = NULL;
937 const unsigned char *tables = NULL;
938 unsigned long int true_size, true_study_size = 0;
939 size_t size, regex_gotten_store;
941 int do_debug = debug;
944 int do_showinfo = showinfo;
947 int erroroffset, len, delimiter, poffset;
951 if (infile == stdin) printf(" re> ");
952 if (extend_inputline(infile, buffer) == NULL) break;
953 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
957 while (isspace(*p)) p++;
958 if (*p == 0) continue;
960 /* See if the pattern is to be loaded pre-compiled from a file. */
962 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
964 unsigned long int magic, get_options;
969 pp = p + (int)strlen((char *)p);
970 while (isspace(pp[-1])) pp--;
973 f = fopen((char *)p, "rb");
976 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
980 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
983 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
985 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
987 re = (real_pcre *)new_malloc(true_size);
988 regex_gotten_store = gotten_store;
990 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
992 magic = ((real_pcre *)re)->magic_number;
993 if (magic != MAGIC_NUMBER)
995 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1001 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1007 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1008 do_flip? " (byte-inverted)" : "", p);
1010 /* Need to know if UTF-8 for printing data strings */
1012 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1013 use_utf8 = (get_options & PCRE_UTF8) != 0;
1015 /* Now see if there is any following study data */
1017 if (true_study_size != 0)
1019 pcre_study_data *psd;
1021 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1022 extra->flags = PCRE_EXTRA_STUDY_DATA;
1024 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1025 extra->study_data = psd;
1027 if (fread(psd, 1, true_study_size, f) != true_study_size)
1030 fprintf(outfile, "Failed to read data from %s\n", p);
1031 if (extra != NULL) new_free(extra);
1032 if (re != NULL) new_free(re);
1036 fprintf(outfile, "Study data loaded from %s\n", p);
1037 do_study = 1; /* To get the data output if requested */
1039 else fprintf(outfile, "No study data\n");
1045 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1046 the pattern; if is isn't complete, read more. */
1050 if (isalnum(delimiter) || delimiter == '\\')
1052 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1057 poffset = p - buffer;
1063 if (*pp == '\\' && pp[1] != 0) pp++;
1064 else if (*pp == delimiter) break;
1067 if (*pp != 0) break;
1068 if (infile == stdin) printf(" > ");
1069 if ((pp = extend_inputline(infile, pp)) == NULL)
1071 fprintf(outfile, "** Unexpected EOF\n");
1075 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1078 /* The buffer may have moved while being extended; reset the start of data
1079 pointer to the correct relative point in the buffer. */
1081 p = buffer + poffset;
1083 /* If the first character after the delimiter is backslash, make
1084 the pattern end with backslash. This is purely to provide a way
1085 of testing for the error message when a pattern ends with backslash. */
1087 if (pp[1] == '\\') *pp++ = '\\';
1089 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1093 strcpy((char *)pbuffer, (char *)p);
1095 /* Look for options after final delimiter */
1099 log_store = showstore; /* default from command line */
1105 case 'f': options |= PCRE_FIRSTLINE; break;
1106 case 'g': do_g = 1; break;
1107 case 'i': options |= PCRE_CASELESS; break;
1108 case 'm': options |= PCRE_MULTILINE; break;
1109 case 's': options |= PCRE_DOTALL; break;
1110 case 'x': options |= PCRE_EXTENDED; break;
1112 case '+': do_showrest = 1; break;
1113 case 'A': options |= PCRE_ANCHORED; break;
1114 case 'B': do_debug = 1; break;
1115 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1116 case 'D': do_debug = do_showinfo = 1; break;
1117 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1118 case 'F': do_flip = 1; break;
1119 case 'G': do_G = 1; break;
1120 case 'I': do_showinfo = 1; break;
1121 case 'J': options |= PCRE_DUPNAMES; break;
1122 case 'M': log_store = 1; break;
1123 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1125 #if !defined NOPOSIX
1126 case 'P': do_posix = 1; break;
1129 case 'S': do_study = 1; break;
1130 case 'U': options |= PCRE_UNGREEDY; break;
1131 case 'X': options |= PCRE_EXTRA; break;
1132 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1133 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1137 /* The '\r' test here is so that it works on Windows. */
1138 /* The '0' test is just in case this is an unterminated line. */
1139 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1141 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1143 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1147 tables = pcre_maketables();
1153 while (*pp != 0) pp++;
1154 while (isspace(pp[-1])) pp--;
1160 int x = check_newline(pp, outfile);
1161 if (x == 0) goto SKIP_DATA;
1163 while (*pp++ != '>');
1167 case '\r': /* So that it works in Windows */
1173 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1178 /* Handle compiling via the POSIX interface, which doesn't support the
1179 timing, showing, or debugging options, nor the ability to pass over
1180 local character tables. */
1182 #if !defined NOPOSIX
1183 if (posix || do_posix)
1188 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1189 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1190 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1191 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1192 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1194 rc = regcomp(&preg, (char *)p, cflags);
1196 /* Compilation failed; go back for another re, skipping to blank line
1197 if non-interactive. */
1201 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1202 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1207 /* Handle compiling via the native interface */
1210 #endif /* !defined NOPOSIX */
1217 clock_t start_time = clock();
1218 for (i = 0; i < timeit; i++)
1220 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1221 if (re != NULL) free(re);
1223 time_taken = clock() - start_time;
1224 fprintf(outfile, "Compile time %.4f milliseconds\n",
1225 (((double)time_taken * 1000.0) / (double)timeit) /
1226 (double)CLOCKS_PER_SEC);
1229 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1231 /* Compilation failed; go back for another re, skipping to blank line
1232 if non-interactive. */
1236 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1238 if (infile != stdin)
1242 if (extend_inputline(infile, buffer) == NULL)
1247 len = (int)strlen((char *)buffer);
1248 while (len > 0 && isspace(buffer[len-1])) len--;
1249 if (len == 0) break;
1251 fprintf(outfile, "\n");
1256 /* Compilation succeeded; print data if required. There are now two
1257 info-returning functions. The old one has a limited interface and
1258 returns only limited data. Check that it agrees with the newer one. */
1261 fprintf(outfile, "Memory allocation (code space): %d\n",
1262 (int)(gotten_store -
1264 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1266 /* Extract the size for possible writing before possibly flipping it,
1267 and remember the store that was got. */
1269 true_size = ((real_pcre *)re)->size;
1270 regex_gotten_store = gotten_store;
1272 /* If /S was present, study the regexp to generate additional info to
1273 help with the matching. */
1281 clock_t start_time = clock();
1282 for (i = 0; i < timeit; i++)
1283 extra = pcre_study(re, study_options, &error);
1284 time_taken = clock() - start_time;
1285 if (extra != NULL) free(extra);
1286 fprintf(outfile, " Study time %.4f milliseconds\n",
1287 (((double)time_taken * 1000.0) / (double)timeit) /
1288 (double)CLOCKS_PER_SEC);
1290 extra = pcre_study(re, study_options, &error);
1292 fprintf(outfile, "Failed to study: %s\n", error);
1293 else if (extra != NULL)
1294 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1297 /* If the 'F' option was present, we flip the bytes of all the integer
1298 fields in the regex data block and the study block. This is to make it
1299 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1300 compiled on a different architecture. */
1304 real_pcre *rre = (real_pcre *)re;
1305 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1306 rre->size = byteflip(rre->size, sizeof(rre->size));
1307 rre->options = byteflip(rre->options, sizeof(rre->options));
1308 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1309 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1310 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1311 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1312 rre->name_table_offset = byteflip(rre->name_table_offset,
1313 sizeof(rre->name_table_offset));
1314 rre->name_entry_size = byteflip(rre->name_entry_size,
1315 sizeof(rre->name_entry_size));
1316 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1320 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1321 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1322 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1326 /* Extract information from the compiled data if required */
1332 fprintf(outfile, "------------------------------------------------------------------\n");
1333 pcre_printint(re, outfile);
1338 unsigned long int get_options, all_options;
1339 #if !defined NOINFOCHECK
1340 int old_first_char, old_options, old_count;
1342 int count, backrefmax, first_char, need_char;
1343 int nameentrysize, namecount;
1344 const uschar *nametable;
1346 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1347 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1348 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1349 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1350 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1351 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1352 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1353 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1354 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1356 #if !defined NOINFOCHECK
1357 old_count = pcre_info(re, &old_options, &old_first_char);
1358 if (count < 0) fprintf(outfile,
1359 "Error %d from pcre_info()\n", count);
1362 if (old_count != count) fprintf(outfile,
1363 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1366 if (old_first_char != first_char) fprintf(outfile,
1367 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1368 first_char, old_first_char);
1370 if (old_options != (int)get_options) fprintf(outfile,
1371 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1372 get_options, old_options);
1376 if (size != regex_gotten_store) fprintf(outfile,
1377 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1378 (int)size, (int)regex_gotten_store);
1380 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1382 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1386 fprintf(outfile, "Named capturing subpatterns:\n");
1387 while (namecount-- > 0)
1389 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1390 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1391 GET2(nametable, 0));
1392 nametable += nameentrysize;
1396 /* The NOPARTIAL bit is a private bit in the options, so we have
1397 to fish it out via out back door */
1399 all_options = ((real_pcre *)re)->options;
1402 all_options = byteflip(all_options, sizeof(all_options));
1405 if ((all_options & PCRE_NOPARTIAL) != 0)
1406 fprintf(outfile, "Partial matching not supported\n");
1408 if (get_options == 0) fprintf(outfile, "No options\n");
1409 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1410 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1411 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1412 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1413 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1414 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1415 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1416 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1417 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1418 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1419 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1420 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1421 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1422 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1424 switch (get_options & PCRE_NEWLINE_BITS)
1426 case PCRE_NEWLINE_CR:
1427 fprintf(outfile, "Forced newline sequence: CR\n");
1430 case PCRE_NEWLINE_LF:
1431 fprintf(outfile, "Forced newline sequence: LF\n");
1434 case PCRE_NEWLINE_CRLF:
1435 fprintf(outfile, "Forced newline sequence: CRLF\n");
1438 case PCRE_NEWLINE_ANY:
1439 fprintf(outfile, "Forced newline sequence: ANY\n");
1446 if (first_char == -1)
1448 fprintf(outfile, "First char at start or follows newline\n");
1450 else if (first_char < 0)
1452 fprintf(outfile, "No first char\n");
1456 int ch = first_char & 255;
1457 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1460 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1462 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1467 fprintf(outfile, "No need char\n");
1471 int ch = need_char & 255;
1472 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1475 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1477 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1480 /* Don't output study size; at present it is in any case a fixed
1481 value, but it varies, depending on the computer architecture, and
1482 so messes up the test suite. (And with the /F option, it might be
1488 fprintf(outfile, "Study returned NULL\n");
1491 uschar *start_bits = NULL;
1492 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1494 if (start_bits == NULL)
1495 fprintf(outfile, "No starting byte set\n");
1500 fprintf(outfile, "Starting byte set: ");
1501 for (i = 0; i < 256; i++)
1503 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1507 fprintf(outfile, "\n ");
1510 if (PRINTHEX(i) && i != ' ')
1512 fprintf(outfile, "%c ", i);
1517 fprintf(outfile, "\\x%02x ", i);
1522 fprintf(outfile, "\n");
1528 /* If the '>' option was present, we write out the regex to a file, and
1529 that is all. The first 8 bytes of the file are the regex length and then
1530 the study length, in big-endian order. */
1532 if (to_file != NULL)
1534 FILE *f = fopen((char *)to_file, "wb");
1537 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1542 sbuf[0] = (true_size >> 24) & 255;
1543 sbuf[1] = (true_size >> 16) & 255;
1544 sbuf[2] = (true_size >> 8) & 255;
1545 sbuf[3] = (true_size) & 255;
1547 sbuf[4] = (true_study_size >> 24) & 255;
1548 sbuf[5] = (true_study_size >> 16) & 255;
1549 sbuf[6] = (true_study_size >> 8) & 255;
1550 sbuf[7] = (true_study_size) & 255;
1552 if (fwrite(sbuf, 1, 8, f) < 8 ||
1553 fwrite(re, 1, true_size, f) < true_size)
1555 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1559 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1562 if (fwrite(extra->study_data, 1, true_study_size, f) <
1565 fprintf(outfile, "Write error on %s: %s\n", to_file,
1568 else fprintf(outfile, "Study data written to %s\n", to_file);
1576 if (extra != NULL) new_free(extra);
1577 if (tables != NULL) new_free((void *)tables);
1578 continue; /* With next regex */
1580 } /* End of non-POSIX compile */
1582 /* Read data lines and test them */
1587 uschar *bptr = dbuffer;
1588 int *use_offsets = offsets;
1589 int use_size_offsets = size_offsets;
1590 int callout_data = 0;
1591 int callout_data_set = 0;
1593 int copystrings = 0;
1594 int find_match_limit = 0;
1598 int start_offset = 0;
1607 copynamesptr = copynames;
1608 getnamesptr = getnames;
1610 pcre_callout = callout;
1614 callout_fail_count = 999999;
1615 callout_fail_id = -1;
1618 if (extra != NULL) extra->flags &=
1619 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1624 if (infile == stdin) printf("data> ");
1625 if (extend_inputline(infile, buffer + len) == NULL)
1631 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1632 len = (int)strlen((char *)buffer);
1633 if (buffer[len-1] == '\n') break;
1636 while (len > 0 && isspace(buffer[len-1])) len--;
1638 if (len == 0) break;
1641 while (isspace(*p)) p++;
1644 while ((c = *p++) != 0)
1649 if (c == '\\') switch ((c = *p++))
1651 case 'a': c = 7; break;
1652 case 'b': c = '\b'; break;
1653 case 'e': c = 27; break;
1654 case 'f': c = '\f'; break;
1655 case 'n': c = '\n'; break;
1656 case 'r': c = '\r'; break;
1657 case 't': c = '\t'; break;
1658 case 'v': c = '\v'; break;
1660 case '0': case '1': case '2': case '3':
1661 case '4': case '5': case '6': case '7':
1663 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1664 c = c * 8 + *p++ - '0';
1667 if (use_utf8 && c > 255)
1669 unsigned char buff8[8];
1671 utn = ord2utf8(c, buff8);
1672 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1673 c = buff8[ii]; /* Last byte */
1680 /* Handle \x{..} specially - new Perl thing for utf8 */
1685 unsigned char *pt = p;
1687 while (isxdigit(*(++pt)))
1688 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1691 unsigned char buff8[8];
1693 utn = ord2utf8(c, buff8);
1694 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1695 c = buff8[ii]; /* Last byte */
1699 /* Not correct form; fall through */
1706 while (i++ < 2 && isxdigit(*p))
1708 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1713 case 0: /* \ followed by EOF allows for an empty line */
1718 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1721 case 'A': /* Option setting */
1722 options |= PCRE_ANCHORED;
1726 options |= PCRE_NOTBOL;
1730 if (isdigit(*p)) /* Set copy string */
1732 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1733 copystrings |= 1 << n;
1735 else if (isalnum(*p))
1737 uschar *npp = copynamesptr;
1738 while (isalnum(*p)) *npp++ = *p++;
1741 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1743 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1753 pcre_callout = NULL;
1758 callout_fail_id = 0;
1761 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1762 callout_fail_count = 0;
1767 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1774 if (*(++p) == '-') { sign = -1; p++; }
1776 callout_data = callout_data * 10 + *p++ - '0';
1777 callout_data *= sign;
1778 callout_data_set = 1;
1784 #if !defined NOPOSIX
1785 if (posix || do_posix)
1786 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1793 options |= PCRE_DFA_SHORTEST;
1800 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1801 getstrings |= 1 << n;
1803 else if (isalnum(*p))
1805 uschar *npp = getnamesptr;
1806 while (isalnum(*p)) *npp++ = *p++;
1809 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1811 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1821 find_match_limit = 1;
1825 options |= PCRE_NOTEMPTY;
1829 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1830 if (n > size_offsets_max)
1832 size_offsets_max = n;
1834 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1835 if (offsets == NULL)
1837 printf("** Failed to get %d bytes of memory for offsets vector\n",
1838 size_offsets_max * sizeof(int));
1843 use_size_offsets = n;
1844 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1848 options |= PCRE_PARTIAL;
1852 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1855 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1858 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1859 extra->match_limit_recursion = n;
1863 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1866 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1869 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1870 extra->match_limit = n;
1875 options |= PCRE_DFA_RESTART;
1884 options |= PCRE_NOTEOL;
1888 options |= PCRE_NO_UTF8_CHECK;
1893 int x = check_newline(p, outfile);
1894 if (x == 0) goto NEXT_DATA;
1896 while (*p++ != '>');
1905 if ((all_use_dfa || use_dfa) && find_match_limit)
1907 printf("**Match limit not relevant for DFA matching: ignored\n");
1908 find_match_limit = 0;
1911 /* Handle matching via the POSIX interface, which does not
1912 support timing or playing with the match limit or callout data. */
1914 #if !defined NOPOSIX
1915 if (posix || do_posix)
1919 regmatch_t *pmatch = NULL;
1920 if (use_size_offsets > 0)
1921 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1922 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1923 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1925 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1929 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1930 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1932 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1935 fprintf(outfile, "Matched with REG_NOSUB\n");
1940 for (i = 0; i < (size_t)use_size_offsets; i++)
1942 if (pmatch[i].rm_so >= 0)
1944 fprintf(outfile, "%2d: ", (int)i);
1945 (void)pchars(dbuffer + pmatch[i].rm_so,
1946 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1947 fprintf(outfile, "\n");
1948 if (i == 0 && do_showrest)
1950 fprintf(outfile, " 0+ ");
1951 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1953 fprintf(outfile, "\n");
1961 /* Handle matching via the native interface - repeats for /g and /G */
1964 #endif /* !defined NOPOSIX */
1966 for (;; gmatched++) /* Loop for /g or /G */
1972 clock_t start_time = clock();
1975 if (all_use_dfa || use_dfa)
1977 int workspace[1000];
1978 for (i = 0; i < timeitm; i++)
1979 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1980 options | g_notempty, use_offsets, use_size_offsets, workspace,
1981 sizeof(workspace)/sizeof(int));
1986 for (i = 0; i < timeitm; i++)
1987 count = pcre_exec(re, extra, (char *)bptr, len,
1988 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1990 time_taken = clock() - start_time;
1991 fprintf(outfile, "Execute time %.4f milliseconds\n",
1992 (((double)time_taken * 1000.0) / (double)timeitm) /
1993 (double)CLOCKS_PER_SEC);
1996 /* If find_match_limit is set, we want to do repeated matches with
1997 varying limits in order to find the minimum value for the match limit and
1998 for the recursion limit. */
2000 if (find_match_limit)
2004 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2008 (void)check_match_limit(re, extra, bptr, len, start_offset,
2009 options|g_notempty, use_offsets, use_size_offsets,
2010 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2011 PCRE_ERROR_MATCHLIMIT, "match()");
2013 count = check_match_limit(re, extra, bptr, len, start_offset,
2014 options|g_notempty, use_offsets, use_size_offsets,
2015 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2016 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2019 /* If callout_data is set, use the interface with additional data */
2021 else if (callout_data_set)
2025 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2028 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2029 extra->callout_data = &callout_data;
2030 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2031 options | g_notempty, use_offsets, use_size_offsets);
2032 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2035 /* The normal case is just to do the match once, with the default
2036 value of match_limit. */
2039 else if (all_use_dfa || use_dfa)
2041 int workspace[1000];
2042 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2043 options | g_notempty, use_offsets, use_size_offsets, workspace,
2044 sizeof(workspace)/sizeof(int));
2047 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2048 count = use_size_offsets/2;
2055 count = pcre_exec(re, extra, (char *)bptr, len,
2056 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2059 fprintf(outfile, "Matched, but too many substrings\n");
2060 count = use_size_offsets/3;
2071 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2073 maxcount = use_size_offsets/3;
2075 /* This is a check against a lunatic return value. */
2077 if (count > maxcount)
2080 "** PCRE error: returned count %d is too big for offset size %d\n",
2081 count, use_size_offsets);
2082 count = use_size_offsets/3;
2085 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2086 do_g = do_G = FALSE; /* Break g/G loop */
2090 for (i = 0; i < count * 2; i += 2)
2092 if (use_offsets[i] < 0)
2093 fprintf(outfile, "%2d: <unset>\n", i/2);
2096 fprintf(outfile, "%2d: ", i/2);
2097 (void)pchars(bptr + use_offsets[i],
2098 use_offsets[i+1] - use_offsets[i], outfile);
2099 fprintf(outfile, "\n");
2104 fprintf(outfile, " 0+ ");
2105 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2107 fprintf(outfile, "\n");
2113 for (i = 0; i < 32; i++)
2115 if ((copystrings & (1 << i)) != 0)
2117 char copybuffer[256];
2118 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2119 i, copybuffer, sizeof(copybuffer));
2121 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2123 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2127 for (copynamesptr = copynames;
2129 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2131 char copybuffer[256];
2132 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2133 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2135 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2137 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2140 for (i = 0; i < 32; i++)
2142 if ((getstrings & (1 << i)) != 0)
2144 const char *substring;
2145 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2148 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2151 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2152 pcre_free_substring(substring);
2157 for (getnamesptr = getnames;
2159 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2161 const char *substring;
2162 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2163 count, (char *)getnamesptr, &substring);
2165 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2168 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2169 pcre_free_substring(substring);
2175 const char **stringlist;
2176 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2179 fprintf(outfile, "get substring list failed %d\n", rc);
2182 for (i = 0; i < count; i++)
2183 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2184 if (stringlist[i] != NULL)
2185 fprintf(outfile, "string list not terminated by NULL\n");
2186 /* free((void *)stringlist); */
2187 pcre_free_substring_list(stringlist);
2192 /* There was a partial match */
2194 else if (count == PCRE_ERROR_PARTIAL)
2196 fprintf(outfile, "Partial match");
2198 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2199 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2200 bptr + use_offsets[0]);
2202 fprintf(outfile, "\n");
2203 break; /* Out of the /g loop */
2206 /* Failed to match. If this is a /g or /G loop and we previously set
2207 g_notempty after a null match, this is not necessarily the end.
2208 We want to advance the start offset, and continue. In the case of UTF-8
2209 matching, the advance must be one character, not one byte. Fudge the
2210 offset values to achieve this. We won't be at the end of the string -
2211 that was checked before setting g_notempty. */
2215 if (g_notempty != 0)
2218 use_offsets[0] = start_offset;
2221 while (start_offset + onechar < len)
2223 int tb = bptr[start_offset+onechar];
2224 if (tb <= 127) break;
2226 if (tb != 0 && tb != 0xc0) onechar++;
2229 use_offsets[1] = start_offset + onechar;
2233 if (count == PCRE_ERROR_NOMATCH)
2235 if (gmatched == 0) fprintf(outfile, "No match\n");
2237 else fprintf(outfile, "Error %d\n", count);
2238 break; /* Out of the /g loop */
2242 /* If not /g or /G we are done */
2244 if (!do_g && !do_G) break;
2246 /* If we have matched an empty string, first check to see if we are at
2247 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2248 what Perl's /g options does. This turns out to be rather cunning. First
2249 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2250 same point. If this fails (picked up above) we advance to the next
2254 if (use_offsets[0] == use_offsets[1])
2256 if (use_offsets[0] == len) break;
2257 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2260 /* For /g, update the start offset, leaving the rest alone */
2262 if (do_g) start_offset = use_offsets[1];
2264 /* For /G, update the pointer and length */
2268 bptr += use_offsets[1];
2269 len -= use_offsets[1];
2271 } /* End of loop for /g and /G */
2273 NEXT_DATA: continue;
2274 } /* End of loop for data lines */
2278 #if !defined NOPOSIX
2279 if (posix || do_posix) regfree(&preg);
2282 if (re != NULL) new_free(re);
2283 if (extra != NULL) new_free(extra);
2286 new_free((void *)tables);
2287 setlocale(LC_CTYPE, "C");
2292 if (infile == stdin) fprintf(outfile, "\n");
2296 if (infile != NULL && infile != stdin) fclose(infile);
2297 if (outfile != NULL && outfile != stdout) fclose(outfile);
2307 /* End of pcretest.c */