1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.5 2006/12/19 14:38:38 ph10 Exp $ */
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
51 #include <sys/resource.h>
54 #define PCRE_SPY /* For Win32 build, import data, not export */
56 /* We include pcre_internal.h because we need the internal info for displaying
57 the results of pcre_study() and we also need to know about the internal
58 macros, structures, and other internal data values; pcretest has "inside
59 information" compared to a program that strictly follows the PCRE API. */
61 #include "pcre_internal.h"
63 /* We need access to the data tables that PCRE uses. So as not to have to keep
64 two copies, we include the source file here, changing the names of the external
65 symbols to prevent clashes. */
67 #define _pcre_utf8_table1 utf8_table1
68 #define _pcre_utf8_table1_size utf8_table1_size
69 #define _pcre_utf8_table2 utf8_table2
70 #define _pcre_utf8_table3 utf8_table3
71 #define _pcre_utf8_table4 utf8_table4
73 #define _pcre_utt_size utt_size
74 #define _pcre_OP_lengths OP_lengths
76 #include "pcre_tables.c"
78 /* We also need the pcre_printint() function for printing out compiled
79 patterns. This function is in a separate file so that it can be included in
80 pcre_compile.c when that module is compiled with debugging enabled. */
82 #include "pcre_printint.src"
85 /* It is possible to compile this test program without including support for
86 testing the POSIX interface, though this is not available via the standard
90 #include "pcreposix.h"
93 /* It is also possible, for the benefit of the version imported into Exim, to
94 build pcretest without support for UTF8 (define NOUTF8), without the interface
95 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
96 function (define NOINFOCHECK). */
99 /* Other parameters */
101 #ifndef CLOCKS_PER_SEC
103 #define CLOCKS_PER_SEC CLK_TCK
105 #define CLOCKS_PER_SEC 100
109 #define LOOPREPEAT 500000
111 /* Static variables */
113 static FILE *outfile;
114 static int log_store = 0;
115 static int callout_count;
116 static int callout_extra;
117 static int callout_fail_count;
118 static int callout_fail_id;
119 static int first_callout;
120 static int show_malloc;
122 static size_t gotten_store;
124 /* The buffers grow automatically if very long input lines are encountered. */
126 static int buffer_size = 50000;
127 static uschar *buffer = NULL;
128 static uschar *dbuffer = NULL;
129 static uschar *pbuffer = NULL;
133 /*************************************************
134 * Read or extend an input line *
135 *************************************************/
137 /* Input lines are read into buffer, but both patterns and data lines can be
138 continued over multiple input lines. In addition, if the buffer fills up, we
139 want to automatically expand it so as to be able to handle extremely large
140 lines that are needed for certain stress tests. When the input buffer is
141 expanded, the other two buffers must also be expanded likewise, and the
142 contents of pbuffer, which are a copy of the input for callouts, must be
143 preserved (for when expansion happens for a data line). This is not the most
144 optimal way of handling this, but hey, this is just a test program!
148 start where in buffer to start (this *must* be within buffer)
150 Returns: pointer to the start of new data
151 could be a copy of start, or could be moved
152 NULL if no data read and EOF reached
156 extend_inputline(FILE *f, uschar *start)
158 uschar *here = start;
162 int rlen = buffer_size - (here - buffer);
166 if (fgets((char *)here, rlen, f) == NULL)
167 return (here == start)? NULL : start;
168 dlen = (int)strlen((char *)here);
169 if (dlen > 0 && here[dlen - 1] == '\n') return start;
175 int new_buffer_size = 2*buffer_size;
176 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
177 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
178 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
180 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
182 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
186 memcpy(new_buffer, buffer, buffer_size);
187 memcpy(new_pbuffer, pbuffer, buffer_size);
189 buffer_size = new_buffer_size;
191 start = new_buffer + (start - buffer);
192 here = new_buffer + (here - buffer);
199 dbuffer = new_dbuffer;
200 pbuffer = new_pbuffer;
204 return NULL; /* Control never gets here */
213 /*************************************************
214 * Read number from string *
215 *************************************************/
217 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
218 around with conditional compilation, just do the job by hand. It is only used
219 for unpicking the -o argument, so just keep it simple.
222 str string to be converted
223 endptr where to put the end pointer
225 Returns: the unsigned long
229 get_value(unsigned char *str, unsigned char **endptr)
232 while(*str != 0 && isspace(*str)) str++;
233 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
241 /*************************************************
242 * Convert UTF-8 string to value *
243 *************************************************/
245 /* This function takes one or more bytes that represents a UTF-8 character,
246 and returns the value of the character.
249 utf8bytes a pointer to the byte vector
250 vptr a pointer to an int to receive the value
252 Returns: > 0 => the number of bytes consumed
253 -6 to 0 => malformed UTF-8 character at offset = (-return)
259 utf82ord(unsigned char *utf8bytes, int *vptr)
261 int c = *utf8bytes++;
265 for (i = -1; i < 6; i++) /* i is number of additional bytes */
267 if ((d & 0x80) == 0) break;
271 if (i == -1) { *vptr = c; return 1; } /* ascii character */
272 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
274 /* i now has a value in the range 1-5 */
277 d = (c & utf8_table3[i]) << s;
279 for (j = 0; j < i; j++)
282 if ((c & 0xc0) != 0x80) return -(j+1);
284 d |= (c & 0x3f) << s;
287 /* Check that encoding was the correct unique one */
289 for (j = 0; j < utf8_table1_size; j++)
290 if (d <= utf8_table1[j]) break;
291 if (j != i) return -(i+1);
303 /*************************************************
304 * Convert character value to UTF-8 *
305 *************************************************/
307 /* This function takes an integer value in the range 0 - 0x7fffffff
308 and encodes it as a UTF-8 character in 0 to 6 bytes.
311 cvalue the character value
312 utf8bytes pointer to buffer for result - at least 6 bytes long
314 Returns: number of characters placed in the buffer
320 ord2utf8(int cvalue, uschar *utf8bytes)
323 for (i = 0; i < utf8_table1_size; i++)
324 if (cvalue <= utf8_table1[i]) break;
326 for (j = i; j > 0; j--)
328 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
331 *utf8bytes = utf8_table2[i] | cvalue;
339 /*************************************************
340 * Print character string *
341 *************************************************/
343 /* Character string printing function. Must handle UTF-8 strings in utf8
344 mode. Yields number of characters printed. If handed a NULL file, just counts
345 chars without printing. */
347 static int pchars(unsigned char *p, int length, FILE *f)
357 int rc = utf82ord(p, &c);
359 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
363 if (c < 256 && isprint(c))
365 if (f != NULL) fprintf(f, "%c", c);
371 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
379 /* Not UTF-8, or malformed UTF-8 */
381 if (isprint(c = *(p++)))
383 if (f != NULL) fprintf(f, "%c", c);
388 if (f != NULL) fprintf(f, "\\x%02x", c);
398 /*************************************************
400 *************************************************/
402 /* Called from PCRE as a result of the (?C) item. We print out where we are in
403 the match. Yield zero unless more callouts than the fail count, or the callout
406 static int callout(pcre_callout_block *cb)
408 FILE *f = (first_callout | callout_extra)? outfile : NULL;
409 int i, pre_start, post_start, subject_length;
413 fprintf(f, "Callout %d: last capture = %d\n",
414 cb->callout_number, cb->capture_last);
416 for (i = 0; i < cb->capture_top * 2; i += 2)
418 if (cb->offset_vector[i] < 0)
419 fprintf(f, "%2d: <unset>\n", i/2);
422 fprintf(f, "%2d: ", i/2);
423 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
424 cb->offset_vector[i+1] - cb->offset_vector[i], f);
430 /* Re-print the subject in canonical form, the first time or if giving full
431 datails. On subsequent calls in the same match, we use pchars just to find the
432 printed lengths of the substrings. */
434 if (f != NULL) fprintf(f, "--->");
436 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
437 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
438 cb->current_position - cb->start_match, f);
440 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
442 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
443 cb->subject_length - cb->current_position, f);
445 if (f != NULL) fprintf(f, "\n");
447 /* Always print appropriate indicators, with callout number if not already
448 shown. For automatic callouts, show the pattern offset. */
450 if (cb->callout_number == 255)
452 fprintf(outfile, "%+3d ", cb->pattern_position);
453 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
457 if (callout_extra) fprintf(outfile, " ");
458 else fprintf(outfile, "%3d ", cb->callout_number);
461 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
462 fprintf(outfile, "^");
466 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
467 fprintf(outfile, "^");
470 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
471 fprintf(outfile, " ");
473 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
474 pbuffer + cb->pattern_position);
476 fprintf(outfile, "\n");
479 if (cb->callout_data != NULL)
481 int callout_data = *((int *)(cb->callout_data));
482 if (callout_data != 0)
484 fprintf(outfile, "Callout data = %d\n", callout_data);
489 return (cb->callout_number != callout_fail_id)? 0 :
490 (++callout_count >= callout_fail_count)? 1 : 0;
494 /*************************************************
495 * Local malloc functions *
496 *************************************************/
498 /* Alternative malloc function, to test functionality and show the size of the
501 static void *new_malloc(size_t size)
503 void *block = malloc(size);
506 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
510 static void new_free(void *block)
513 fprintf(outfile, "free %p\n", block);
518 /* For recursion malloc/free, to test stacking calls */
520 static void *stack_malloc(size_t size)
522 void *block = malloc(size);
524 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
528 static void stack_free(void *block)
531 fprintf(outfile, "stack_free %p\n", block);
536 /*************************************************
537 * Call pcre_fullinfo() *
538 *************************************************/
540 /* Get one piece of information from the pcre_fullinfo() function */
542 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
545 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
546 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
551 /*************************************************
552 * Byte flipping function *
553 *************************************************/
555 static unsigned long int
556 byteflip(unsigned long int value, int n)
558 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
559 return ((value & 0x000000ff) << 24) |
560 ((value & 0x0000ff00) << 8) |
561 ((value & 0x00ff0000) >> 8) |
562 ((value & 0xff000000) >> 24);
568 /*************************************************
569 * Check match or recursion limit *
570 *************************************************/
573 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
574 int start_offset, int options, int *use_offsets, int use_size_offsets,
575 int flag, unsigned long int *limit, int errnumber, const char *msg)
582 extra->flags |= flag;
588 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
589 use_offsets, use_size_offsets);
591 if (count == errnumber)
593 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
595 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
598 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
599 count == PCRE_ERROR_PARTIAL)
603 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
606 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
610 else break; /* Some other error */
613 extra->flags &= ~flag;
619 /*************************************************
620 * Check newline indicator *
621 *************************************************/
623 /* This is used both at compile and run-time to check for <xxx> escapes, where
624 xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
627 p points after the leading '<'
628 f file for error message
630 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
634 check_newline(uschar *p, FILE *f)
636 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
637 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
638 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
639 fprintf(f, "Unknown newline type at: <%s\n", p);
645 /*************************************************
647 *************************************************/
649 /* Read lines from named file or stdin and write to named file or stdout; lines
650 consist of a regular expression, in delimiters and optionally followed by
651 options, followed by a set of test data, terminated by an empty line. */
653 int main(int argc, char **argv)
655 FILE *infile = stdin;
657 int study_options = 0;
663 int size_offsets = 45;
664 int size_offsets_max;
675 /* These vectors store, end-to-end, a list of captured substring names. Assume
676 that 1024 is plenty long enough for the few names we'll be testing. */
678 uschar copynames[1024];
679 uschar getnames[1024];
681 uschar *copynamesptr;
684 /* Get buffers from malloc() so that Electric Fence will check their misuse
685 when I am debugging. They grow automatically when very long lines are read. */
687 buffer = (unsigned char *)malloc(buffer_size);
688 dbuffer = (unsigned char *)malloc(buffer_size);
689 pbuffer = (unsigned char *)malloc(buffer_size);
691 /* The outfile variable is static so that new_malloc can use it. The _setmode()
692 stuff is some magic that I don't understand, but which apparently does good
693 things in Windows. It's related to line terminations. */
695 #if defined(_WIN32) || defined(WIN32)
696 _setmode( _fileno( stdout ), 0x8000 );
697 #endif /* defined(_WIN32) || defined(WIN32) */
703 while (argc > 1 && argv[op][0] == '-')
705 unsigned char *endptr;
707 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
709 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
710 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
711 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
712 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
714 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
716 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
717 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
723 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
724 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
728 printf("PCRE: -S not supported on this OS\n");
733 getrlimit(RLIMIT_STACK, &rlim);
734 rlim.rlim_cur = stack_size * 1024 * 1024;
735 rc = setrlimit(RLIMIT_STACK, &rlim);
738 printf("PCRE: setrlimit() failed with error %d\n", rc);
746 else if (strcmp(argv[op], "-p") == 0) posix = 1;
748 else if (strcmp(argv[op], "-C") == 0)
751 printf("PCRE version %s\n", pcre_version());
752 printf("Compiled with\n");
753 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
754 printf(" %sUTF-8 support\n", rc? "" : "No ");
755 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
756 printf(" %sUnicode properties support\n", rc? "" : "No ");
757 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
758 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
759 (rc == '\n')? "LF" : "CRLF");
760 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
761 printf(" Internal link size = %d\n", rc);
762 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
763 printf(" POSIX malloc threshold = %d\n", rc);
764 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
765 printf(" Default match limit = %d\n", rc);
766 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
767 printf(" Default recursion depth limit = %d\n", rc);
768 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
769 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
774 printf("** Unknown or malformed option %s\n", argv[op]);
775 printf("Usage: pcretest [options] [<input> [<output>]]\n");
776 printf(" -C show PCRE compile-time options and exit\n");
777 printf(" -d debug: show compiled code; implies -i\n");
779 printf(" -dfa force DFA matching for all subjects\n");
781 printf(" -i show information about compiled pattern\n"
782 " -m output memory used information\n"
783 " -o <n> set size of offsets vector to <n>\n");
785 printf(" -p use POSIX interface\n");
787 printf(" -S <n> set stack size to <n> megabytes\n");
788 printf(" -s output store (memory) used information\n"
789 " -t time compilation and execution\n");
797 /* Get the store for the offsets vector, and remember what it was */
799 size_offsets_max = size_offsets;
800 offsets = (int *)malloc(size_offsets_max * sizeof(int));
803 printf("** Failed to get %d bytes of memory for offsets vector\n",
804 size_offsets_max * sizeof(int));
809 /* Sort out the input and output files */
813 infile = fopen(argv[op], "rb");
816 printf("** Failed to open %s\n", argv[op]);
824 outfile = fopen(argv[op+1], "wb");
827 printf("** Failed to open %s\n", argv[op+1]);
833 /* Set alternative malloc function */
835 pcre_malloc = new_malloc;
836 pcre_free = new_free;
837 pcre_stack_malloc = stack_malloc;
838 pcre_stack_free = stack_free;
840 /* Heading line unless quiet, then prompt for first regex if stdin */
842 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
849 pcre_extra *extra = NULL;
851 #if !defined NOPOSIX /* There are still compilers that require no indent */
857 unsigned char *p, *pp, *ppp;
858 unsigned char *to_file = NULL;
859 const unsigned char *tables = NULL;
860 unsigned long int true_size, true_study_size = 0;
861 size_t size, regex_gotten_store;
863 int do_debug = debug;
866 int do_showinfo = showinfo;
869 int erroroffset, len, delimiter;
873 if (infile == stdin) printf(" re> ");
874 if (extend_inputline(infile, buffer) == NULL) break;
875 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
879 while (isspace(*p)) p++;
880 if (*p == 0) continue;
882 /* See if the pattern is to be loaded pre-compiled from a file. */
884 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
886 unsigned long int magic, get_options;
891 pp = p + (int)strlen((char *)p);
892 while (isspace(pp[-1])) pp--;
895 f = fopen((char *)p, "rb");
898 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
902 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
905 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
907 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
909 re = (real_pcre *)new_malloc(true_size);
910 regex_gotten_store = gotten_store;
912 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
914 magic = ((real_pcre *)re)->magic_number;
915 if (magic != MAGIC_NUMBER)
917 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
923 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
929 fprintf(outfile, "Compiled regex%s loaded from %s\n",
930 do_flip? " (byte-inverted)" : "", p);
932 /* Need to know if UTF-8 for printing data strings */
934 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
935 use_utf8 = (get_options & PCRE_UTF8) != 0;
937 /* Now see if there is any following study data */
939 if (true_study_size != 0)
941 pcre_study_data *psd;
943 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
944 extra->flags = PCRE_EXTRA_STUDY_DATA;
946 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
947 extra->study_data = psd;
949 if (fread(psd, 1, true_study_size, f) != true_study_size)
952 fprintf(outfile, "Failed to read data from %s\n", p);
953 if (extra != NULL) new_free(extra);
954 if (re != NULL) new_free(re);
958 fprintf(outfile, "Study data loaded from %s\n", p);
959 do_study = 1; /* To get the data output if requested */
961 else fprintf(outfile, "No study data\n");
967 /* In-line pattern (the usual case). Get the delimiter and seek the end of
968 the pattern; if is isn't complete, read more. */
972 if (isalnum(delimiter) || delimiter == '\\')
974 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
984 if (*pp == '\\' && pp[1] != 0) pp++;
985 else if (*pp == delimiter) break;
989 if (infile == stdin) printf(" > ");
990 if ((pp = extend_inputline(infile, pp)) == NULL)
992 fprintf(outfile, "** Unexpected EOF\n");
996 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
999 /* If the first character after the delimiter is backslash, make
1000 the pattern end with backslash. This is purely to provide a way
1001 of testing for the error message when a pattern ends with backslash. */
1003 if (pp[1] == '\\') *pp++ = '\\';
1005 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1009 strcpy((char *)pbuffer, (char *)p);
1011 /* Look for options after final delimiter */
1015 log_store = showstore; /* default from command line */
1021 case 'f': options |= PCRE_FIRSTLINE; break;
1022 case 'g': do_g = 1; break;
1023 case 'i': options |= PCRE_CASELESS; break;
1024 case 'm': options |= PCRE_MULTILINE; break;
1025 case 's': options |= PCRE_DOTALL; break;
1026 case 'x': options |= PCRE_EXTENDED; break;
1028 case '+': do_showrest = 1; break;
1029 case 'A': options |= PCRE_ANCHORED; break;
1030 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1031 case 'D': do_debug = do_showinfo = 1; break;
1032 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1033 case 'F': do_flip = 1; break;
1034 case 'G': do_G = 1; break;
1035 case 'I': do_showinfo = 1; break;
1036 case 'J': options |= PCRE_DUPNAMES; break;
1037 case 'M': log_store = 1; break;
1038 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1040 #if !defined NOPOSIX
1041 case 'P': do_posix = 1; break;
1044 case 'S': do_study = 1; break;
1045 case 'U': options |= PCRE_UNGREEDY; break;
1046 case 'X': options |= PCRE_EXTRA; break;
1047 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1048 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1052 /* The '\r' test here is so that it works on Windows */
1053 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1055 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1057 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1060 tables = pcre_maketables();
1066 while (*pp != 0) pp++;
1067 while (isspace(pp[-1])) pp--;
1073 int x = check_newline(pp, outfile);
1074 if (x == 0) goto SKIP_DATA;
1076 while (*pp++ != '>');
1080 case '\r': /* So that it works in Windows */
1086 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1091 /* Handle compiling via the POSIX interface, which doesn't support the
1092 timing, showing, or debugging options, nor the ability to pass over
1093 local character tables. */
1095 #if !defined NOPOSIX
1096 if (posix || do_posix)
1101 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1102 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1103 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1104 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1105 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1107 rc = regcomp(&preg, (char *)p, cflags);
1109 /* Compilation failed; go back for another re, skipping to blank line
1110 if non-interactive. */
1114 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1115 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1120 /* Handle compiling via the native interface */
1123 #endif /* !defined NOPOSIX */
1130 clock_t start_time = clock();
1131 for (i = 0; i < LOOPREPEAT; i++)
1133 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1134 if (re != NULL) free(re);
1136 time_taken = clock() - start_time;
1137 fprintf(outfile, "Compile time %.3f milliseconds\n",
1138 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1139 (double)CLOCKS_PER_SEC);
1142 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1144 /* Compilation failed; go back for another re, skipping to blank line
1145 if non-interactive. */
1149 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1151 if (infile != stdin)
1155 if (extend_inputline(infile, buffer) == NULL)
1160 len = (int)strlen((char *)buffer);
1161 while (len > 0 && isspace(buffer[len-1])) len--;
1162 if (len == 0) break;
1164 fprintf(outfile, "\n");
1169 /* Compilation succeeded; print data if required. There are now two
1170 info-returning functions. The old one has a limited interface and
1171 returns only limited data. Check that it agrees with the newer one. */
1174 fprintf(outfile, "Memory allocation (code space): %d\n",
1175 (int)(gotten_store -
1177 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1179 /* Extract the size for possible writing before possibly flipping it,
1180 and remember the store that was got. */
1182 true_size = ((real_pcre *)re)->size;
1183 regex_gotten_store = gotten_store;
1185 /* If /S was present, study the regexp to generate additional info to
1186 help with the matching. */
1194 clock_t start_time = clock();
1195 for (i = 0; i < LOOPREPEAT; i++)
1196 extra = pcre_study(re, study_options, &error);
1197 time_taken = clock() - start_time;
1198 if (extra != NULL) free(extra);
1199 fprintf(outfile, " Study time %.3f milliseconds\n",
1200 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1201 (double)CLOCKS_PER_SEC);
1203 extra = pcre_study(re, study_options, &error);
1205 fprintf(outfile, "Failed to study: %s\n", error);
1206 else if (extra != NULL)
1207 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1210 /* If the 'F' option was present, we flip the bytes of all the integer
1211 fields in the regex data block and the study block. This is to make it
1212 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1213 compiled on a different architecture. */
1217 real_pcre *rre = (real_pcre *)re;
1218 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1219 rre->size = byteflip(rre->size, sizeof(rre->size));
1220 rre->options = byteflip(rre->options, sizeof(rre->options));
1221 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1222 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1223 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1224 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1225 rre->name_table_offset = byteflip(rre->name_table_offset,
1226 sizeof(rre->name_table_offset));
1227 rre->name_entry_size = byteflip(rre->name_entry_size,
1228 sizeof(rre->name_entry_size));
1229 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1233 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1234 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1235 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1239 /* Extract information from the compiled data if required */
1245 unsigned long int get_options, all_options;
1246 #if !defined NOINFOCHECK
1247 int old_first_char, old_options, old_count;
1249 int count, backrefmax, first_char, need_char;
1250 int nameentrysize, namecount;
1251 const uschar *nametable;
1255 fprintf(outfile, "------------------------------------------------------------------\n");
1256 pcre_printint(re, outfile);
1259 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1260 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1261 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1262 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1263 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1264 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1265 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1266 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1267 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1269 #if !defined NOINFOCHECK
1270 old_count = pcre_info(re, &old_options, &old_first_char);
1271 if (count < 0) fprintf(outfile,
1272 "Error %d from pcre_info()\n", count);
1275 if (old_count != count) fprintf(outfile,
1276 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1279 if (old_first_char != first_char) fprintf(outfile,
1280 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1281 first_char, old_first_char);
1283 if (old_options != (int)get_options) fprintf(outfile,
1284 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1285 get_options, old_options);
1289 if (size != regex_gotten_store) fprintf(outfile,
1290 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1291 (int)size, (int)regex_gotten_store);
1293 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1295 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1299 fprintf(outfile, "Named capturing subpatterns:\n");
1300 while (namecount-- > 0)
1302 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1303 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1304 GET2(nametable, 0));
1305 nametable += nameentrysize;
1309 /* The NOPARTIAL bit is a private bit in the options, so we have
1310 to fish it out via out back door */
1312 all_options = ((real_pcre *)re)->options;
1315 all_options = byteflip(all_options, sizeof(all_options));
1318 if ((all_options & PCRE_NOPARTIAL) != 0)
1319 fprintf(outfile, "Partial matching not supported\n");
1321 if (get_options == 0) fprintf(outfile, "No options\n");
1322 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1323 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1324 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1325 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1326 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1327 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1328 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1329 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1330 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1331 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1332 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1333 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1334 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1335 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1337 switch (get_options & PCRE_NEWLINE_CRLF)
1339 case PCRE_NEWLINE_CR:
1340 fprintf(outfile, "Forced newline sequence: CR\n");
1343 case PCRE_NEWLINE_LF:
1344 fprintf(outfile, "Forced newline sequence: LF\n");
1347 case PCRE_NEWLINE_CRLF:
1348 fprintf(outfile, "Forced newline sequence: CRLF\n");
1355 if (first_char == -1)
1357 fprintf(outfile, "First char at start or follows newline\n");
1359 else if (first_char < 0)
1361 fprintf(outfile, "No first char\n");
1365 int ch = first_char & 255;
1366 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1369 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1371 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1376 fprintf(outfile, "No need char\n");
1380 int ch = need_char & 255;
1381 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1384 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1386 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1389 /* Don't output study size; at present it is in any case a fixed
1390 value, but it varies, depending on the computer architecture, and
1391 so messes up the test suite. (And with the /F option, it might be
1397 fprintf(outfile, "Study returned NULL\n");
1400 uschar *start_bits = NULL;
1401 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1403 if (start_bits == NULL)
1404 fprintf(outfile, "No starting byte set\n");
1409 fprintf(outfile, "Starting byte set: ");
1410 for (i = 0; i < 256; i++)
1412 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1416 fprintf(outfile, "\n ");
1419 if (isprint(i) && i != ' ')
1421 fprintf(outfile, "%c ", i);
1426 fprintf(outfile, "\\x%02x ", i);
1431 fprintf(outfile, "\n");
1437 /* If the '>' option was present, we write out the regex to a file, and
1438 that is all. The first 8 bytes of the file are the regex length and then
1439 the study length, in big-endian order. */
1441 if (to_file != NULL)
1443 FILE *f = fopen((char *)to_file, "wb");
1446 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1451 sbuf[0] = (true_size >> 24) & 255;
1452 sbuf[1] = (true_size >> 16) & 255;
1453 sbuf[2] = (true_size >> 8) & 255;
1454 sbuf[3] = (true_size) & 255;
1456 sbuf[4] = (true_study_size >> 24) & 255;
1457 sbuf[5] = (true_study_size >> 16) & 255;
1458 sbuf[6] = (true_study_size >> 8) & 255;
1459 sbuf[7] = (true_study_size) & 255;
1461 if (fwrite(sbuf, 1, 8, f) < 8 ||
1462 fwrite(re, 1, true_size, f) < true_size)
1464 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1468 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1471 if (fwrite(extra->study_data, 1, true_study_size, f) <
1474 fprintf(outfile, "Write error on %s: %s\n", to_file,
1477 else fprintf(outfile, "Study data written to %s\n", to_file);
1484 if (extra != NULL) new_free(extra);
1485 if (tables != NULL) new_free((void *)tables);
1486 continue; /* With next regex */
1488 } /* End of non-POSIX compile */
1490 /* Read data lines and test them */
1495 uschar *bptr = dbuffer;
1496 int *use_offsets = offsets;
1497 int use_size_offsets = size_offsets;
1498 int callout_data = 0;
1499 int callout_data_set = 0;
1501 int copystrings = 0;
1502 int find_match_limit = 0;
1506 int start_offset = 0;
1515 copynamesptr = copynames;
1516 getnamesptr = getnames;
1518 pcre_callout = callout;
1522 callout_fail_count = 999999;
1523 callout_fail_id = -1;
1526 if (extra != NULL) extra->flags &=
1527 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1532 if (infile == stdin) printf("data> ");
1533 if (extend_inputline(infile, buffer + len) == NULL)
1539 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1540 len = (int)strlen((char *)buffer);
1541 if (buffer[len-1] == '\n') break;
1544 while (len > 0 && isspace(buffer[len-1])) len--;
1546 if (len == 0) break;
1549 while (isspace(*p)) p++;
1552 while ((c = *p++) != 0)
1557 if (c == '\\') switch ((c = *p++))
1559 case 'a': c = 7; break;
1560 case 'b': c = '\b'; break;
1561 case 'e': c = 27; break;
1562 case 'f': c = '\f'; break;
1563 case 'n': c = '\n'; break;
1564 case 'r': c = '\r'; break;
1565 case 't': c = '\t'; break;
1566 case 'v': c = '\v'; break;
1568 case '0': case '1': case '2': case '3':
1569 case '4': case '5': case '6': case '7':
1571 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1572 c = c * 8 + *p++ - '0';
1575 if (use_utf8 && c > 255)
1577 unsigned char buff8[8];
1579 utn = ord2utf8(c, buff8);
1580 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1581 c = buff8[ii]; /* Last byte */
1588 /* Handle \x{..} specially - new Perl thing for utf8 */
1593 unsigned char *pt = p;
1595 while (isxdigit(*(++pt)))
1596 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1599 unsigned char buff8[8];
1601 utn = ord2utf8(c, buff8);
1602 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1603 c = buff8[ii]; /* Last byte */
1607 /* Not correct form; fall through */
1614 while (i++ < 2 && isxdigit(*p))
1616 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1621 case 0: /* \ followed by EOF allows for an empty line */
1626 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1629 case 'A': /* Option setting */
1630 options |= PCRE_ANCHORED;
1634 options |= PCRE_NOTBOL;
1638 if (isdigit(*p)) /* Set copy string */
1640 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1641 copystrings |= 1 << n;
1643 else if (isalnum(*p))
1645 uschar *npp = copynamesptr;
1646 while (isalnum(*p)) *npp++ = *p++;
1649 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1651 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1661 pcre_callout = NULL;
1666 callout_fail_id = 0;
1669 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1670 callout_fail_count = 0;
1675 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1682 if (*(++p) == '-') { sign = -1; p++; }
1684 callout_data = callout_data * 10 + *p++ - '0';
1685 callout_data *= sign;
1686 callout_data_set = 1;
1692 #if !defined NOPOSIX
1693 if (posix || do_posix)
1694 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1701 options |= PCRE_DFA_SHORTEST;
1708 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1709 getstrings |= 1 << n;
1711 else if (isalnum(*p))
1713 uschar *npp = getnamesptr;
1714 while (isalnum(*p)) *npp++ = *p++;
1717 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1719 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1729 find_match_limit = 1;
1733 options |= PCRE_NOTEMPTY;
1737 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1738 if (n > size_offsets_max)
1740 size_offsets_max = n;
1742 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1743 if (offsets == NULL)
1745 printf("** Failed to get %d bytes of memory for offsets vector\n",
1746 size_offsets_max * sizeof(int));
1751 use_size_offsets = n;
1752 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1756 options |= PCRE_PARTIAL;
1760 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1763 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1766 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1767 extra->match_limit_recursion = n;
1771 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1774 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1777 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1778 extra->match_limit = n;
1783 options |= PCRE_DFA_RESTART;
1792 options |= PCRE_NOTEOL;
1796 options |= PCRE_NO_UTF8_CHECK;
1801 int x = check_newline(p, outfile);
1802 if (x == 0) goto NEXT_DATA;
1804 while (*p++ != '>');
1813 if ((all_use_dfa || use_dfa) && find_match_limit)
1815 printf("**Match limit not relevant for DFA matching: ignored\n");
1816 find_match_limit = 0;
1819 /* Handle matching via the POSIX interface, which does not
1820 support timing or playing with the match limit or callout data. */
1822 #if !defined NOPOSIX
1823 if (posix || do_posix)
1827 regmatch_t *pmatch = NULL;
1828 if (use_size_offsets > 0)
1829 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1830 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1831 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1833 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1837 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1838 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1840 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1843 fprintf(outfile, "Matched with REG_NOSUB\n");
1848 for (i = 0; i < (size_t)use_size_offsets; i++)
1850 if (pmatch[i].rm_so >= 0)
1852 fprintf(outfile, "%2d: ", (int)i);
1853 (void)pchars(dbuffer + pmatch[i].rm_so,
1854 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1855 fprintf(outfile, "\n");
1856 if (i == 0 && do_showrest)
1858 fprintf(outfile, " 0+ ");
1859 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1861 fprintf(outfile, "\n");
1869 /* Handle matching via the native interface - repeats for /g and /G */
1872 #endif /* !defined NOPOSIX */
1874 for (;; gmatched++) /* Loop for /g or /G */
1880 clock_t start_time = clock();
1883 if (all_use_dfa || use_dfa)
1885 int workspace[1000];
1886 for (i = 0; i < LOOPREPEAT; i++)
1887 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1888 options | g_notempty, use_offsets, use_size_offsets, workspace,
1889 sizeof(workspace)/sizeof(int));
1894 for (i = 0; i < LOOPREPEAT; i++)
1895 count = pcre_exec(re, extra, (char *)bptr, len,
1896 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1898 time_taken = clock() - start_time;
1899 fprintf(outfile, "Execute time %.3f milliseconds\n",
1900 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1901 (double)CLOCKS_PER_SEC);
1904 /* If find_match_limit is set, we want to do repeated matches with
1905 varying limits in order to find the minimum value for the match limit and
1906 for the recursion limit. */
1908 if (find_match_limit)
1912 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1916 (void)check_match_limit(re, extra, bptr, len, start_offset,
1917 options|g_notempty, use_offsets, use_size_offsets,
1918 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1919 PCRE_ERROR_MATCHLIMIT, "match()");
1921 count = check_match_limit(re, extra, bptr, len, start_offset,
1922 options|g_notempty, use_offsets, use_size_offsets,
1923 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1924 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1927 /* If callout_data is set, use the interface with additional data */
1929 else if (callout_data_set)
1933 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1936 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1937 extra->callout_data = &callout_data;
1938 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1939 options | g_notempty, use_offsets, use_size_offsets);
1940 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1943 /* The normal case is just to do the match once, with the default
1944 value of match_limit. */
1947 else if (all_use_dfa || use_dfa)
1949 int workspace[1000];
1950 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1951 options | g_notempty, use_offsets, use_size_offsets, workspace,
1952 sizeof(workspace)/sizeof(int));
1955 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1956 count = use_size_offsets/2;
1963 count = pcre_exec(re, extra, (char *)bptr, len,
1964 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1967 fprintf(outfile, "Matched, but too many substrings\n");
1968 count = use_size_offsets/3;
1977 for (i = 0; i < count * 2; i += 2)
1979 if (use_offsets[i] < 0)
1980 fprintf(outfile, "%2d: <unset>\n", i/2);
1983 fprintf(outfile, "%2d: ", i/2);
1984 (void)pchars(bptr + use_offsets[i],
1985 use_offsets[i+1] - use_offsets[i], outfile);
1986 fprintf(outfile, "\n");
1991 fprintf(outfile, " 0+ ");
1992 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1994 fprintf(outfile, "\n");
2000 for (i = 0; i < 32; i++)
2002 if ((copystrings & (1 << i)) != 0)
2004 char copybuffer[256];
2005 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2006 i, copybuffer, sizeof(copybuffer));
2008 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2010 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2014 for (copynamesptr = copynames;
2016 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2018 char copybuffer[256];
2019 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2020 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2022 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2024 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2027 for (i = 0; i < 32; i++)
2029 if ((getstrings & (1 << i)) != 0)
2031 const char *substring;
2032 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2035 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2038 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2039 pcre_free_substring(substring);
2044 for (getnamesptr = getnames;
2046 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2048 const char *substring;
2049 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2050 count, (char *)getnamesptr, &substring);
2052 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2055 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2056 pcre_free_substring(substring);
2062 const char **stringlist;
2063 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2066 fprintf(outfile, "get substring list failed %d\n", rc);
2069 for (i = 0; i < count; i++)
2070 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2071 if (stringlist[i] != NULL)
2072 fprintf(outfile, "string list not terminated by NULL\n");
2073 /* free((void *)stringlist); */
2074 pcre_free_substring_list(stringlist);
2079 /* There was a partial match */
2081 else if (count == PCRE_ERROR_PARTIAL)
2083 fprintf(outfile, "Partial match");
2085 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2086 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2087 bptr + use_offsets[0]);
2089 fprintf(outfile, "\n");
2090 break; /* Out of the /g loop */
2093 /* Failed to match. If this is a /g or /G loop and we previously set
2094 g_notempty after a null match, this is not necessarily the end.
2095 We want to advance the start offset, and continue. In the case of UTF-8
2096 matching, the advance must be one character, not one byte. Fudge the
2097 offset values to achieve this. We won't be at the end of the string -
2098 that was checked before setting g_notempty. */
2102 if (g_notempty != 0)
2105 use_offsets[0] = start_offset;
2108 while (start_offset + onechar < len)
2110 int tb = bptr[start_offset+onechar];
2111 if (tb <= 127) break;
2113 if (tb != 0 && tb != 0xc0) onechar++;
2116 use_offsets[1] = start_offset + onechar;
2120 if (count == PCRE_ERROR_NOMATCH)
2122 if (gmatched == 0) fprintf(outfile, "No match\n");
2124 else fprintf(outfile, "Error %d\n", count);
2125 break; /* Out of the /g loop */
2129 /* If not /g or /G we are done */
2131 if (!do_g && !do_G) break;
2133 /* If we have matched an empty string, first check to see if we are at
2134 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2135 what Perl's /g options does. This turns out to be rather cunning. First
2136 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2137 same point. If this fails (picked up above) we advance to the next
2141 if (use_offsets[0] == use_offsets[1])
2143 if (use_offsets[0] == len) break;
2144 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2147 /* For /g, update the start offset, leaving the rest alone */
2149 if (do_g) start_offset = use_offsets[1];
2151 /* For /G, update the pointer and length */
2155 bptr += use_offsets[1];
2156 len -= use_offsets[1];
2158 } /* End of loop for /g and /G */
2160 NEXT_DATA: continue;
2161 } /* End of loop for data lines */
2165 #if !defined NOPOSIX
2166 if (posix || do_posix) regfree(&preg);
2169 if (re != NULL) new_free(re);
2170 if (extra != NULL) new_free(extra);
2173 new_free((void *)tables);
2174 setlocale(LC_CTYPE, "C");
2178 if (infile == stdin) fprintf(outfile, "\n");
2182 if (infile != NULL && infile != stdin) fclose(infile);
2183 if (outfile != NULL && outfile != stdout) fclose(outfile);
2193 /* End of pcretest.c */