1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.4 2006/11/07 16:50:36 ph10 Exp $ */
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
50 #include <sys/resource.h>
53 #define PCRE_SPY /* For Win32 build, import data, not export */
55 /* We include pcre_internal.h because we need the internal info for displaying
56 the results of pcre_study() and we also need to know about the internal
57 macros, structures, and other internal data values; pcretest has "inside
58 information" compared to a program that strictly follows the PCRE API. */
60 #include "pcre_internal.h"
62 /* We need access to the data tables that PCRE uses. So as not to have to keep
63 two copies, we include the source file here, changing the names of the external
64 symbols to prevent clashes. */
66 #define _pcre_utf8_table1 utf8_table1
67 #define _pcre_utf8_table1_size utf8_table1_size
68 #define _pcre_utf8_table2 utf8_table2
69 #define _pcre_utf8_table3 utf8_table3
70 #define _pcre_utf8_table4 utf8_table4
72 #define _pcre_utt_size utt_size
73 #define _pcre_OP_lengths OP_lengths
75 #include "pcre_tables.c"
77 /* We also need the pcre_printint() function for printing out compiled
78 patterns. This function is in a separate file so that it can be included in
79 pcre_compile.c when that module is compiled with debugging enabled. */
81 #include "pcre_printint.src"
84 /* It is possible to compile this test program without including support for
85 testing the POSIX interface, though this is not available via the standard
89 #include "pcreposix.h"
92 /* It is also possible, for the benefit of the version imported into Exim, to
93 build pcretest without support for UTF8 (define NOUTF8), without the interface
94 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
95 function (define NOINFOCHECK). */
98 /* Other parameters */
100 #ifndef CLOCKS_PER_SEC
102 #define CLOCKS_PER_SEC CLK_TCK
104 #define CLOCKS_PER_SEC 100
108 #define LOOPREPEAT 500000
110 /* Static variables */
112 static FILE *outfile;
113 static int log_store = 0;
114 static int callout_count;
115 static int callout_extra;
116 static int callout_fail_count;
117 static int callout_fail_id;
118 static int first_callout;
119 static int show_malloc;
121 static size_t gotten_store;
123 /* The buffers grow automatically if very long input lines are encountered. */
125 static int buffer_size = 50000;
126 static uschar *buffer = NULL;
127 static uschar *dbuffer = NULL;
128 static uschar *pbuffer = NULL;
132 /*************************************************
133 * Read or extend an input line *
134 *************************************************/
136 /* Input lines are read into buffer, but both patterns and data lines can be
137 continued over multiple input lines. In addition, if the buffer fills up, we
138 want to automatically expand it so as to be able to handle extremely large
139 lines that are needed for certain stress tests. When the input buffer is
140 expanded, the other two buffers must also be expanded likewise, and the
141 contents of pbuffer, which are a copy of the input for callouts, must be
142 preserved (for when expansion happens for a data line). This is not the most
143 optimal way of handling this, but hey, this is just a test program!
147 start where in buffer to start (this *must* be within buffer)
149 Returns: pointer to the start of new data
150 could be a copy of start, or could be moved
151 NULL if no data read and EOF reached
155 extend_inputline(FILE *f, uschar *start)
157 uschar *here = start;
161 int rlen = buffer_size - (here - buffer);
165 if (fgets((char *)here, rlen, f) == NULL)
166 return (here == start)? NULL : start;
167 dlen = (int)strlen((char *)here);
168 if (dlen > 0 && here[dlen - 1] == '\n') return start;
174 int new_buffer_size = 2*buffer_size;
175 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
176 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
177 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
179 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
181 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
185 memcpy(new_buffer, buffer, buffer_size);
186 memcpy(new_pbuffer, pbuffer, buffer_size);
188 buffer_size = new_buffer_size;
190 start = new_buffer + (start - buffer);
191 here = new_buffer + (here - buffer);
198 dbuffer = new_dbuffer;
199 pbuffer = new_pbuffer;
203 return NULL; /* Control never gets here */
212 /*************************************************
213 * Read number from string *
214 *************************************************/
216 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
217 around with conditional compilation, just do the job by hand. It is only used
218 for unpicking the -o argument, so just keep it simple.
221 str string to be converted
222 endptr where to put the end pointer
224 Returns: the unsigned long
228 get_value(unsigned char *str, unsigned char **endptr)
231 while(*str != 0 && isspace(*str)) str++;
232 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
240 /*************************************************
241 * Convert UTF-8 string to value *
242 *************************************************/
244 /* This function takes one or more bytes that represents a UTF-8 character,
245 and returns the value of the character.
248 utf8bytes a pointer to the byte vector
249 vptr a pointer to an int to receive the value
251 Returns: > 0 => the number of bytes consumed
252 -6 to 0 => malformed UTF-8 character at offset = (-return)
258 utf82ord(unsigned char *utf8bytes, int *vptr)
260 int c = *utf8bytes++;
264 for (i = -1; i < 6; i++) /* i is number of additional bytes */
266 if ((d & 0x80) == 0) break;
270 if (i == -1) { *vptr = c; return 1; } /* ascii character */
271 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
273 /* i now has a value in the range 1-5 */
276 d = (c & utf8_table3[i]) << s;
278 for (j = 0; j < i; j++)
281 if ((c & 0xc0) != 0x80) return -(j+1);
283 d |= (c & 0x3f) << s;
286 /* Check that encoding was the correct unique one */
288 for (j = 0; j < utf8_table1_size; j++)
289 if (d <= utf8_table1[j]) break;
290 if (j != i) return -(i+1);
302 /*************************************************
303 * Convert character value to UTF-8 *
304 *************************************************/
306 /* This function takes an integer value in the range 0 - 0x7fffffff
307 and encodes it as a UTF-8 character in 0 to 6 bytes.
310 cvalue the character value
311 utf8bytes pointer to buffer for result - at least 6 bytes long
313 Returns: number of characters placed in the buffer
319 ord2utf8(int cvalue, uschar *utf8bytes)
322 for (i = 0; i < utf8_table1_size; i++)
323 if (cvalue <= utf8_table1[i]) break;
325 for (j = i; j > 0; j--)
327 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
330 *utf8bytes = utf8_table2[i] | cvalue;
338 /*************************************************
339 * Print character string *
340 *************************************************/
342 /* Character string printing function. Must handle UTF-8 strings in utf8
343 mode. Yields number of characters printed. If handed a NULL file, just counts
344 chars without printing. */
346 static int pchars(unsigned char *p, int length, FILE *f)
356 int rc = utf82ord(p, &c);
358 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
362 if (c < 256 && isprint(c))
364 if (f != NULL) fprintf(f, "%c", c);
370 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
378 /* Not UTF-8, or malformed UTF-8 */
380 if (isprint(c = *(p++)))
382 if (f != NULL) fprintf(f, "%c", c);
387 if (f != NULL) fprintf(f, "\\x%02x", c);
397 /*************************************************
399 *************************************************/
401 /* Called from PCRE as a result of the (?C) item. We print out where we are in
402 the match. Yield zero unless more callouts than the fail count, or the callout
405 static int callout(pcre_callout_block *cb)
407 FILE *f = (first_callout | callout_extra)? outfile : NULL;
408 int i, pre_start, post_start, subject_length;
412 fprintf(f, "Callout %d: last capture = %d\n",
413 cb->callout_number, cb->capture_last);
415 for (i = 0; i < cb->capture_top * 2; i += 2)
417 if (cb->offset_vector[i] < 0)
418 fprintf(f, "%2d: <unset>\n", i/2);
421 fprintf(f, "%2d: ", i/2);
422 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
423 cb->offset_vector[i+1] - cb->offset_vector[i], f);
429 /* Re-print the subject in canonical form, the first time or if giving full
430 datails. On subsequent calls in the same match, we use pchars just to find the
431 printed lengths of the substrings. */
433 if (f != NULL) fprintf(f, "--->");
435 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
436 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
437 cb->current_position - cb->start_match, f);
439 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
441 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
442 cb->subject_length - cb->current_position, f);
444 if (f != NULL) fprintf(f, "\n");
446 /* Always print appropriate indicators, with callout number if not already
447 shown. For automatic callouts, show the pattern offset. */
449 if (cb->callout_number == 255)
451 fprintf(outfile, "%+3d ", cb->pattern_position);
452 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
456 if (callout_extra) fprintf(outfile, " ");
457 else fprintf(outfile, "%3d ", cb->callout_number);
460 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
461 fprintf(outfile, "^");
465 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
466 fprintf(outfile, "^");
469 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
470 fprintf(outfile, " ");
472 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
473 pbuffer + cb->pattern_position);
475 fprintf(outfile, "\n");
478 if (cb->callout_data != NULL)
480 int callout_data = *((int *)(cb->callout_data));
481 if (callout_data != 0)
483 fprintf(outfile, "Callout data = %d\n", callout_data);
488 return (cb->callout_number != callout_fail_id)? 0 :
489 (++callout_count >= callout_fail_count)? 1 : 0;
493 /*************************************************
494 * Local malloc functions *
495 *************************************************/
497 /* Alternative malloc function, to test functionality and show the size of the
500 static void *new_malloc(size_t size)
502 void *block = malloc(size);
505 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
509 static void new_free(void *block)
512 fprintf(outfile, "free %p\n", block);
517 /* For recursion malloc/free, to test stacking calls */
519 static void *stack_malloc(size_t size)
521 void *block = malloc(size);
523 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
527 static void stack_free(void *block)
530 fprintf(outfile, "stack_free %p\n", block);
535 /*************************************************
536 * Call pcre_fullinfo() *
537 *************************************************/
539 /* Get one piece of information from the pcre_fullinfo() function */
541 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
544 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
545 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
550 /*************************************************
551 * Byte flipping function *
552 *************************************************/
554 static unsigned long int
555 byteflip(unsigned long int value, int n)
557 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
558 return ((value & 0x000000ff) << 24) |
559 ((value & 0x0000ff00) << 8) |
560 ((value & 0x00ff0000) >> 8) |
561 ((value & 0xff000000) >> 24);
567 /*************************************************
568 * Check match or recursion limit *
569 *************************************************/
572 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
573 int start_offset, int options, int *use_offsets, int use_size_offsets,
574 int flag, unsigned long int *limit, int errnumber, const char *msg)
581 extra->flags |= flag;
587 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
588 use_offsets, use_size_offsets);
590 if (count == errnumber)
592 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
594 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
597 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
598 count == PCRE_ERROR_PARTIAL)
602 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
605 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
609 else break; /* Some other error */
612 extra->flags &= ~flag;
618 /*************************************************
619 * Check newline indicator *
620 *************************************************/
622 /* This is used both at compile and run-time to check for <xxx> escapes, where
623 xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
626 p points after the leading '<'
627 f file for error message
629 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
633 check_newline(uschar *p, FILE *f)
635 if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
636 if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
637 if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
638 fprintf(f, "Unknown newline type at: <%s\n", p);
644 /*************************************************
646 *************************************************/
648 /* Read lines from named file or stdin and write to named file or stdout; lines
649 consist of a regular expression, in delimiters and optionally followed by
650 options, followed by a set of test data, terminated by an empty line. */
652 int main(int argc, char **argv)
654 FILE *infile = stdin;
656 int study_options = 0;
662 int size_offsets = 45;
663 int size_offsets_max;
674 /* These vectors store, end-to-end, a list of captured substring names. Assume
675 that 1024 is plenty long enough for the few names we'll be testing. */
677 uschar copynames[1024];
678 uschar getnames[1024];
680 uschar *copynamesptr;
683 /* Get buffers from malloc() so that Electric Fence will check their misuse
684 when I am debugging. They grow automatically when very long lines are read. */
686 buffer = (unsigned char *)malloc(buffer_size);
687 dbuffer = (unsigned char *)malloc(buffer_size);
688 pbuffer = (unsigned char *)malloc(buffer_size);
690 /* The outfile variable is static so that new_malloc can use it. The _setmode()
691 stuff is some magic that I don't understand, but which apparently does good
692 things in Windows. It's related to line terminations. */
694 #if defined(_WIN32) || defined(WIN32)
695 _setmode( _fileno( stdout ), 0x8000 );
696 #endif /* defined(_WIN32) || defined(WIN32) */
702 while (argc > 1 && argv[op][0] == '-')
704 unsigned char *endptr;
706 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
708 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
709 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
710 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
711 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
713 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
715 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
716 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
722 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
723 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
727 printf("PCRE: -S not supported on this OS\n");
732 getrlimit(RLIMIT_STACK, &rlim);
733 rlim.rlim_cur = stack_size * 1024 * 1024;
734 rc = setrlimit(RLIMIT_STACK, &rlim);
737 printf("PCRE: setrlimit() failed with error %d\n", rc);
745 else if (strcmp(argv[op], "-p") == 0) posix = 1;
747 else if (strcmp(argv[op], "-C") == 0)
750 printf("PCRE version %s\n", pcre_version());
751 printf("Compiled with\n");
752 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
753 printf(" %sUTF-8 support\n", rc? "" : "No ");
754 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
755 printf(" %sUnicode properties support\n", rc? "" : "No ");
756 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
757 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
758 (rc == '\n')? "LF" : "CRLF");
759 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
760 printf(" Internal link size = %d\n", rc);
761 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
762 printf(" POSIX malloc threshold = %d\n", rc);
763 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
764 printf(" Default match limit = %d\n", rc);
765 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
766 printf(" Default recursion depth limit = %d\n", rc);
767 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
768 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
773 printf("** Unknown or malformed option %s\n", argv[op]);
774 printf("Usage: pcretest [options] [<input> [<output>]]\n");
775 printf(" -C show PCRE compile-time options and exit\n");
776 printf(" -d debug: show compiled code; implies -i\n");
778 printf(" -dfa force DFA matching for all subjects\n");
780 printf(" -i show information about compiled pattern\n"
781 " -m output memory used information\n"
782 " -o <n> set size of offsets vector to <n>\n");
784 printf(" -p use POSIX interface\n");
786 printf(" -S <n> set stack size to <n> megabytes\n");
787 printf(" -s output store (memory) used information\n"
788 " -t time compilation and execution\n");
796 /* Get the store for the offsets vector, and remember what it was */
798 size_offsets_max = size_offsets;
799 offsets = (int *)malloc(size_offsets_max * sizeof(int));
802 printf("** Failed to get %d bytes of memory for offsets vector\n",
803 size_offsets_max * sizeof(int));
808 /* Sort out the input and output files */
812 infile = fopen(argv[op], "rb");
815 printf("** Failed to open %s\n", argv[op]);
823 outfile = fopen(argv[op+1], "wb");
826 printf("** Failed to open %s\n", argv[op+1]);
832 /* Set alternative malloc function */
834 pcre_malloc = new_malloc;
835 pcre_free = new_free;
836 pcre_stack_malloc = stack_malloc;
837 pcre_stack_free = stack_free;
839 /* Heading line unless quiet, then prompt for first regex if stdin */
841 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
848 pcre_extra *extra = NULL;
850 #if !defined NOPOSIX /* There are still compilers that require no indent */
856 unsigned char *p, *pp, *ppp;
857 unsigned char *to_file = NULL;
858 const unsigned char *tables = NULL;
859 unsigned long int true_size, true_study_size = 0;
860 size_t size, regex_gotten_store;
862 int do_debug = debug;
865 int do_showinfo = showinfo;
868 int erroroffset, len, delimiter;
872 if (infile == stdin) printf(" re> ");
873 if (extend_inputline(infile, buffer) == NULL) break;
874 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
878 while (isspace(*p)) p++;
879 if (*p == 0) continue;
881 /* See if the pattern is to be loaded pre-compiled from a file. */
883 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
885 unsigned long int magic, get_options;
890 pp = p + (int)strlen((char *)p);
891 while (isspace(pp[-1])) pp--;
894 f = fopen((char *)p, "rb");
897 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
901 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
904 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
906 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
908 re = (real_pcre *)new_malloc(true_size);
909 regex_gotten_store = gotten_store;
911 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
913 magic = ((real_pcre *)re)->magic_number;
914 if (magic != MAGIC_NUMBER)
916 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
922 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
928 fprintf(outfile, "Compiled regex%s loaded from %s\n",
929 do_flip? " (byte-inverted)" : "", p);
931 /* Need to know if UTF-8 for printing data strings */
933 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
934 use_utf8 = (get_options & PCRE_UTF8) != 0;
936 /* Now see if there is any following study data */
938 if (true_study_size != 0)
940 pcre_study_data *psd;
942 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
943 extra->flags = PCRE_EXTRA_STUDY_DATA;
945 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
946 extra->study_data = psd;
948 if (fread(psd, 1, true_study_size, f) != true_study_size)
951 fprintf(outfile, "Failed to read data from %s\n", p);
952 if (extra != NULL) new_free(extra);
953 if (re != NULL) new_free(re);
957 fprintf(outfile, "Study data loaded from %s\n", p);
958 do_study = 1; /* To get the data output if requested */
960 else fprintf(outfile, "No study data\n");
966 /* In-line pattern (the usual case). Get the delimiter and seek the end of
967 the pattern; if is isn't complete, read more. */
971 if (isalnum(delimiter) || delimiter == '\\')
973 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
983 if (*pp == '\\' && pp[1] != 0) pp++;
984 else if (*pp == delimiter) break;
988 if (infile == stdin) printf(" > ");
989 if ((pp = extend_inputline(infile, pp)) == NULL)
991 fprintf(outfile, "** Unexpected EOF\n");
995 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
998 /* If the first character after the delimiter is backslash, make
999 the pattern end with backslash. This is purely to provide a way
1000 of testing for the error message when a pattern ends with backslash. */
1002 if (pp[1] == '\\') *pp++ = '\\';
1004 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1008 strcpy((char *)pbuffer, (char *)p);
1010 /* Look for options after final delimiter */
1014 log_store = showstore; /* default from command line */
1020 case 'f': options |= PCRE_FIRSTLINE; break;
1021 case 'g': do_g = 1; break;
1022 case 'i': options |= PCRE_CASELESS; break;
1023 case 'm': options |= PCRE_MULTILINE; break;
1024 case 's': options |= PCRE_DOTALL; break;
1025 case 'x': options |= PCRE_EXTENDED; break;
1027 case '+': do_showrest = 1; break;
1028 case 'A': options |= PCRE_ANCHORED; break;
1029 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1030 case 'D': do_debug = do_showinfo = 1; break;
1031 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1032 case 'F': do_flip = 1; break;
1033 case 'G': do_G = 1; break;
1034 case 'I': do_showinfo = 1; break;
1035 case 'J': options |= PCRE_DUPNAMES; break;
1036 case 'M': log_store = 1; break;
1037 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1039 #if !defined NOPOSIX
1040 case 'P': do_posix = 1; break;
1043 case 'S': do_study = 1; break;
1044 case 'U': options |= PCRE_UNGREEDY; break;
1045 case 'X': options |= PCRE_EXTRA; break;
1046 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1047 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1051 /* The '\r' test here is so that it works on Windows */
1052 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1054 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1056 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1059 tables = pcre_maketables();
1065 while (*pp != 0) pp++;
1066 while (isspace(pp[-1])) pp--;
1072 int x = check_newline(pp, outfile);
1073 if (x == 0) goto SKIP_DATA;
1075 while (*pp++ != '>');
1079 case '\r': /* So that it works in Windows */
1085 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1090 /* Handle compiling via the POSIX interface, which doesn't support the
1091 timing, showing, or debugging options, nor the ability to pass over
1092 local character tables. */
1094 #if !defined NOPOSIX
1095 if (posix || do_posix)
1100 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1101 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1102 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1103 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1104 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1106 rc = regcomp(&preg, (char *)p, cflags);
1108 /* Compilation failed; go back for another re, skipping to blank line
1109 if non-interactive. */
1113 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1114 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1119 /* Handle compiling via the native interface */
1122 #endif /* !defined NOPOSIX */
1129 clock_t start_time = clock();
1130 for (i = 0; i < LOOPREPEAT; i++)
1132 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1133 if (re != NULL) free(re);
1135 time_taken = clock() - start_time;
1136 fprintf(outfile, "Compile time %.3f milliseconds\n",
1137 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1138 (double)CLOCKS_PER_SEC);
1141 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1143 /* Compilation failed; go back for another re, skipping to blank line
1144 if non-interactive. */
1148 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1150 if (infile != stdin)
1154 if (extend_inputline(infile, buffer) == NULL)
1159 len = (int)strlen((char *)buffer);
1160 while (len > 0 && isspace(buffer[len-1])) len--;
1161 if (len == 0) break;
1163 fprintf(outfile, "\n");
1168 /* Compilation succeeded; print data if required. There are now two
1169 info-returning functions. The old one has a limited interface and
1170 returns only limited data. Check that it agrees with the newer one. */
1173 fprintf(outfile, "Memory allocation (code space): %d\n",
1174 (int)(gotten_store -
1176 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1178 /* Extract the size for possible writing before possibly flipping it,
1179 and remember the store that was got. */
1181 true_size = ((real_pcre *)re)->size;
1182 regex_gotten_store = gotten_store;
1184 /* If /S was present, study the regexp to generate additional info to
1185 help with the matching. */
1193 clock_t start_time = clock();
1194 for (i = 0; i < LOOPREPEAT; i++)
1195 extra = pcre_study(re, study_options, &error);
1196 time_taken = clock() - start_time;
1197 if (extra != NULL) free(extra);
1198 fprintf(outfile, " Study time %.3f milliseconds\n",
1199 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1200 (double)CLOCKS_PER_SEC);
1202 extra = pcre_study(re, study_options, &error);
1204 fprintf(outfile, "Failed to study: %s\n", error);
1205 else if (extra != NULL)
1206 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1209 /* If the 'F' option was present, we flip the bytes of all the integer
1210 fields in the regex data block and the study block. This is to make it
1211 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1212 compiled on a different architecture. */
1216 real_pcre *rre = (real_pcre *)re;
1217 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1218 rre->size = byteflip(rre->size, sizeof(rre->size));
1219 rre->options = byteflip(rre->options, sizeof(rre->options));
1220 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1221 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1222 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1223 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1224 rre->name_table_offset = byteflip(rre->name_table_offset,
1225 sizeof(rre->name_table_offset));
1226 rre->name_entry_size = byteflip(rre->name_entry_size,
1227 sizeof(rre->name_entry_size));
1228 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1232 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1233 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1234 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1238 /* Extract information from the compiled data if required */
1244 unsigned long int get_options, all_options;
1245 #if !defined NOINFOCHECK
1246 int old_first_char, old_options, old_count;
1248 int count, backrefmax, first_char, need_char;
1249 int nameentrysize, namecount;
1250 const uschar *nametable;
1254 fprintf(outfile, "------------------------------------------------------------------\n");
1255 pcre_printint(re, outfile);
1258 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1259 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1260 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1261 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1262 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1263 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1264 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1265 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1266 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1268 #if !defined NOINFOCHECK
1269 old_count = pcre_info(re, &old_options, &old_first_char);
1270 if (count < 0) fprintf(outfile,
1271 "Error %d from pcre_info()\n", count);
1274 if (old_count != count) fprintf(outfile,
1275 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1278 if (old_first_char != first_char) fprintf(outfile,
1279 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1280 first_char, old_first_char);
1282 if (old_options != (int)get_options) fprintf(outfile,
1283 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1284 get_options, old_options);
1288 if (size != regex_gotten_store) fprintf(outfile,
1289 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1290 (int)size, (int)regex_gotten_store);
1292 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1294 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1298 fprintf(outfile, "Named capturing subpatterns:\n");
1299 while (namecount-- > 0)
1301 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1302 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1303 GET2(nametable, 0));
1304 nametable += nameentrysize;
1308 /* The NOPARTIAL bit is a private bit in the options, so we have
1309 to fish it out via out back door */
1311 all_options = ((real_pcre *)re)->options;
1314 all_options = byteflip(all_options, sizeof(all_options));
1317 if ((all_options & PCRE_NOPARTIAL) != 0)
1318 fprintf(outfile, "Partial matching not supported\n");
1320 if (get_options == 0) fprintf(outfile, "No options\n");
1321 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1322 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1323 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1324 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1325 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1326 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1327 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1328 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1329 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1330 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1331 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1332 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1333 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1334 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1336 switch (get_options & PCRE_NEWLINE_CRLF)
1338 case PCRE_NEWLINE_CR:
1339 fprintf(outfile, "Forced newline sequence: CR\n");
1342 case PCRE_NEWLINE_LF:
1343 fprintf(outfile, "Forced newline sequence: LF\n");
1346 case PCRE_NEWLINE_CRLF:
1347 fprintf(outfile, "Forced newline sequence: CRLF\n");
1354 if (first_char == -1)
1356 fprintf(outfile, "First char at start or follows newline\n");
1358 else if (first_char < 0)
1360 fprintf(outfile, "No first char\n");
1364 int ch = first_char & 255;
1365 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1368 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1370 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1375 fprintf(outfile, "No need char\n");
1379 int ch = need_char & 255;
1380 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1383 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1385 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1388 /* Don't output study size; at present it is in any case a fixed
1389 value, but it varies, depending on the computer architecture, and
1390 so messes up the test suite. (And with the /F option, it might be
1396 fprintf(outfile, "Study returned NULL\n");
1399 uschar *start_bits = NULL;
1400 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1402 if (start_bits == NULL)
1403 fprintf(outfile, "No starting byte set\n");
1408 fprintf(outfile, "Starting byte set: ");
1409 for (i = 0; i < 256; i++)
1411 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1415 fprintf(outfile, "\n ");
1418 if (isprint(i) && i != ' ')
1420 fprintf(outfile, "%c ", i);
1425 fprintf(outfile, "\\x%02x ", i);
1430 fprintf(outfile, "\n");
1436 /* If the '>' option was present, we write out the regex to a file, and
1437 that is all. The first 8 bytes of the file are the regex length and then
1438 the study length, in big-endian order. */
1440 if (to_file != NULL)
1442 FILE *f = fopen((char *)to_file, "wb");
1445 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1450 sbuf[0] = (true_size >> 24) & 255;
1451 sbuf[1] = (true_size >> 16) & 255;
1452 sbuf[2] = (true_size >> 8) & 255;
1453 sbuf[3] = (true_size) & 255;
1455 sbuf[4] = (true_study_size >> 24) & 255;
1456 sbuf[5] = (true_study_size >> 16) & 255;
1457 sbuf[6] = (true_study_size >> 8) & 255;
1458 sbuf[7] = (true_study_size) & 255;
1460 if (fwrite(sbuf, 1, 8, f) < 8 ||
1461 fwrite(re, 1, true_size, f) < true_size)
1463 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1467 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1470 if (fwrite(extra->study_data, 1, true_study_size, f) <
1473 fprintf(outfile, "Write error on %s: %s\n", to_file,
1476 else fprintf(outfile, "Study data written to %s\n", to_file);
1483 if (extra != NULL) new_free(extra);
1484 if (tables != NULL) new_free((void *)tables);
1485 continue; /* With next regex */
1487 } /* End of non-POSIX compile */
1489 /* Read data lines and test them */
1494 uschar *bptr = dbuffer;
1495 int *use_offsets = offsets;
1496 int use_size_offsets = size_offsets;
1497 int callout_data = 0;
1498 int callout_data_set = 0;
1500 int copystrings = 0;
1501 int find_match_limit = 0;
1505 int start_offset = 0;
1514 copynamesptr = copynames;
1515 getnamesptr = getnames;
1517 pcre_callout = callout;
1521 callout_fail_count = 999999;
1522 callout_fail_id = -1;
1525 if (extra != NULL) extra->flags &=
1526 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1531 if (infile == stdin) printf("data> ");
1532 if (extend_inputline(infile, buffer + len) == NULL)
1538 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1539 len = (int)strlen((char *)buffer);
1540 if (buffer[len-1] == '\n') break;
1543 while (len > 0 && isspace(buffer[len-1])) len--;
1545 if (len == 0) break;
1548 while (isspace(*p)) p++;
1551 while ((c = *p++) != 0)
1556 if (c == '\\') switch ((c = *p++))
1558 case 'a': c = 7; break;
1559 case 'b': c = '\b'; break;
1560 case 'e': c = 27; break;
1561 case 'f': c = '\f'; break;
1562 case 'n': c = '\n'; break;
1563 case 'r': c = '\r'; break;
1564 case 't': c = '\t'; break;
1565 case 'v': c = '\v'; break;
1567 case '0': case '1': case '2': case '3':
1568 case '4': case '5': case '6': case '7':
1570 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1571 c = c * 8 + *p++ - '0';
1574 if (use_utf8 && c > 255)
1576 unsigned char buff8[8];
1578 utn = ord2utf8(c, buff8);
1579 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1580 c = buff8[ii]; /* Last byte */
1587 /* Handle \x{..} specially - new Perl thing for utf8 */
1592 unsigned char *pt = p;
1594 while (isxdigit(*(++pt)))
1595 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1598 unsigned char buff8[8];
1600 utn = ord2utf8(c, buff8);
1601 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1602 c = buff8[ii]; /* Last byte */
1606 /* Not correct form; fall through */
1613 while (i++ < 2 && isxdigit(*p))
1615 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1620 case 0: /* \ followed by EOF allows for an empty line */
1625 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1628 case 'A': /* Option setting */
1629 options |= PCRE_ANCHORED;
1633 options |= PCRE_NOTBOL;
1637 if (isdigit(*p)) /* Set copy string */
1639 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1640 copystrings |= 1 << n;
1642 else if (isalnum(*p))
1644 uschar *npp = copynamesptr;
1645 while (isalnum(*p)) *npp++ = *p++;
1648 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1650 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1660 pcre_callout = NULL;
1665 callout_fail_id = 0;
1668 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1669 callout_fail_count = 0;
1674 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1681 if (*(++p) == '-') { sign = -1; p++; }
1683 callout_data = callout_data * 10 + *p++ - '0';
1684 callout_data *= sign;
1685 callout_data_set = 1;
1691 #if !defined NOPOSIX
1692 if (posix || do_posix)
1693 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1700 options |= PCRE_DFA_SHORTEST;
1707 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1708 getstrings |= 1 << n;
1710 else if (isalnum(*p))
1712 uschar *npp = getnamesptr;
1713 while (isalnum(*p)) *npp++ = *p++;
1716 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1718 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1728 find_match_limit = 1;
1732 options |= PCRE_NOTEMPTY;
1736 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1737 if (n > size_offsets_max)
1739 size_offsets_max = n;
1741 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1742 if (offsets == NULL)
1744 printf("** Failed to get %d bytes of memory for offsets vector\n",
1745 size_offsets_max * sizeof(int));
1750 use_size_offsets = n;
1751 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1755 options |= PCRE_PARTIAL;
1759 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1762 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1765 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1766 extra->match_limit_recursion = n;
1770 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1773 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1776 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1777 extra->match_limit = n;
1782 options |= PCRE_DFA_RESTART;
1791 options |= PCRE_NOTEOL;
1795 options |= PCRE_NO_UTF8_CHECK;
1800 int x = check_newline(p, outfile);
1801 if (x == 0) goto NEXT_DATA;
1803 while (*p++ != '>');
1812 if ((all_use_dfa || use_dfa) && find_match_limit)
1814 printf("**Match limit not relevant for DFA matching: ignored\n");
1815 find_match_limit = 0;
1818 /* Handle matching via the POSIX interface, which does not
1819 support timing or playing with the match limit or callout data. */
1821 #if !defined NOPOSIX
1822 if (posix || do_posix)
1826 regmatch_t *pmatch = NULL;
1827 if (use_size_offsets > 0)
1828 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1829 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1830 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1832 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1836 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1837 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1839 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1842 fprintf(outfile, "Matched with REG_NOSUB\n");
1847 for (i = 0; i < (size_t)use_size_offsets; i++)
1849 if (pmatch[i].rm_so >= 0)
1851 fprintf(outfile, "%2d: ", (int)i);
1852 (void)pchars(dbuffer + pmatch[i].rm_so,
1853 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1854 fprintf(outfile, "\n");
1855 if (i == 0 && do_showrest)
1857 fprintf(outfile, " 0+ ");
1858 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1860 fprintf(outfile, "\n");
1868 /* Handle matching via the native interface - repeats for /g and /G */
1871 #endif /* !defined NOPOSIX */
1873 for (;; gmatched++) /* Loop for /g or /G */
1879 clock_t start_time = clock();
1882 if (all_use_dfa || use_dfa)
1884 int workspace[1000];
1885 for (i = 0; i < LOOPREPEAT; i++)
1886 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1887 options | g_notempty, use_offsets, use_size_offsets, workspace,
1888 sizeof(workspace)/sizeof(int));
1893 for (i = 0; i < LOOPREPEAT; i++)
1894 count = pcre_exec(re, extra, (char *)bptr, len,
1895 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1897 time_taken = clock() - start_time;
1898 fprintf(outfile, "Execute time %.3f milliseconds\n",
1899 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1900 (double)CLOCKS_PER_SEC);
1903 /* If find_match_limit is set, we want to do repeated matches with
1904 varying limits in order to find the minimum value for the match limit and
1905 for the recursion limit. */
1907 if (find_match_limit)
1911 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1915 (void)check_match_limit(re, extra, bptr, len, start_offset,
1916 options|g_notempty, use_offsets, use_size_offsets,
1917 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1918 PCRE_ERROR_MATCHLIMIT, "match()");
1920 count = check_match_limit(re, extra, bptr, len, start_offset,
1921 options|g_notempty, use_offsets, use_size_offsets,
1922 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1923 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1926 /* If callout_data is set, use the interface with additional data */
1928 else if (callout_data_set)
1932 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1935 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1936 extra->callout_data = &callout_data;
1937 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1938 options | g_notempty, use_offsets, use_size_offsets);
1939 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1942 /* The normal case is just to do the match once, with the default
1943 value of match_limit. */
1946 else if (all_use_dfa || use_dfa)
1948 int workspace[1000];
1949 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1950 options | g_notempty, use_offsets, use_size_offsets, workspace,
1951 sizeof(workspace)/sizeof(int));
1954 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1955 count = use_size_offsets/2;
1962 count = pcre_exec(re, extra, (char *)bptr, len,
1963 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1966 fprintf(outfile, "Matched, but too many substrings\n");
1967 count = use_size_offsets/3;
1976 for (i = 0; i < count * 2; i += 2)
1978 if (use_offsets[i] < 0)
1979 fprintf(outfile, "%2d: <unset>\n", i/2);
1982 fprintf(outfile, "%2d: ", i/2);
1983 (void)pchars(bptr + use_offsets[i],
1984 use_offsets[i+1] - use_offsets[i], outfile);
1985 fprintf(outfile, "\n");
1990 fprintf(outfile, " 0+ ");
1991 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1993 fprintf(outfile, "\n");
1999 for (i = 0; i < 32; i++)
2001 if ((copystrings & (1 << i)) != 0)
2003 char copybuffer[256];
2004 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2005 i, copybuffer, sizeof(copybuffer));
2007 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2009 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2013 for (copynamesptr = copynames;
2015 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2017 char copybuffer[256];
2018 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2019 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2021 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2023 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2026 for (i = 0; i < 32; i++)
2028 if ((getstrings & (1 << i)) != 0)
2030 const char *substring;
2031 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2034 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2037 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2038 pcre_free_substring(substring);
2043 for (getnamesptr = getnames;
2045 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2047 const char *substring;
2048 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2049 count, (char *)getnamesptr, &substring);
2051 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2054 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2055 pcre_free_substring(substring);
2061 const char **stringlist;
2062 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2065 fprintf(outfile, "get substring list failed %d\n", rc);
2068 for (i = 0; i < count; i++)
2069 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2070 if (stringlist[i] != NULL)
2071 fprintf(outfile, "string list not terminated by NULL\n");
2072 /* free((void *)stringlist); */
2073 pcre_free_substring_list(stringlist);
2078 /* There was a partial match */
2080 else if (count == PCRE_ERROR_PARTIAL)
2082 fprintf(outfile, "Partial match");
2084 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2085 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2086 bptr + use_offsets[0]);
2088 fprintf(outfile, "\n");
2089 break; /* Out of the /g loop */
2092 /* Failed to match. If this is a /g or /G loop and we previously set
2093 g_notempty after a null match, this is not necessarily the end.
2094 We want to advance the start offset, and continue. In the case of UTF-8
2095 matching, the advance must be one character, not one byte. Fudge the
2096 offset values to achieve this. We won't be at the end of the string -
2097 that was checked before setting g_notempty. */
2101 if (g_notempty != 0)
2104 use_offsets[0] = start_offset;
2107 while (start_offset + onechar < len)
2109 int tb = bptr[start_offset+onechar];
2110 if (tb <= 127) break;
2112 if (tb != 0 && tb != 0xc0) onechar++;
2115 use_offsets[1] = start_offset + onechar;
2119 if (count == PCRE_ERROR_NOMATCH)
2121 if (gmatched == 0) fprintf(outfile, "No match\n");
2123 else fprintf(outfile, "Error %d\n", count);
2124 break; /* Out of the /g loop */
2128 /* If not /g or /G we are done */
2130 if (!do_g && !do_G) break;
2132 /* If we have matched an empty string, first check to see if we are at
2133 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2134 what Perl's /g options does. This turns out to be rather cunning. First
2135 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2136 same point. If this fails (picked up above) we advance to the next
2140 if (use_offsets[0] == use_offsets[1])
2142 if (use_offsets[0] == len) break;
2143 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2146 /* For /g, update the start offset, leaving the rest alone */
2148 if (do_g) start_offset = use_offsets[1];
2150 /* For /G, update the pointer and length */
2154 bptr += use_offsets[1];
2155 len -= use_offsets[1];
2157 } /* End of loop for /g and /G */
2159 NEXT_DATA: continue;
2160 } /* End of loop for data lines */
2164 #if !defined NOPOSIX
2165 if (posix || do_posix) regfree(&preg);
2168 if (re != NULL) new_free(re);
2169 if (extra != NULL) new_free(extra);
2172 new_free((void *)tables);
2173 setlocale(LC_CTYPE, "C");
2177 if (infile == stdin) fprintf(outfile, "\n");
2181 if (infile != NULL && infile != stdin) fclose(infile);
2182 if (outfile != NULL && outfile != stdout) fclose(outfile);
2192 /* End of pcretest.c */