1 /* $Cambridge: exim/src/src/pcre/pcretest.c,v 1.2 2005/06/15 08:57:10 ph10 Exp $ */
3 /*************************************************
4 * PCRE testing program *
5 *************************************************/
7 /* This program was hacked up as a tester for PCRE. I really should have
8 written it more tidily in the first place. Will I ever learn? It has grown and
9 been extended and consequently is now rather, er, *very* untidy in places.
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
49 #define PCRE_SPY /* For Win32 build, import data, not export */
51 /* We need the internal info for displaying the results of pcre_study() and
52 other internal data; pcretest also uses some of the fixed tables, and generally
53 has "inside information" compared to a program that strictly follows the PCRE
56 #include "pcre_internal.h"
59 /* It is possible to compile this test program without including support for
60 testing the POSIX interface, though this is not available via the standard
64 #include "pcreposix.h"
67 /* It is also possible, for the benefit of the version imported into Exim, to
68 build pcretest without support for UTF8 (define NOUTF8), without the interface
69 to the DFA matcher (NODFA), and without the doublecheck of the old "info"
70 function (define NOINFOCHECK). */
73 #ifndef CLOCKS_PER_SEC
75 #define CLOCKS_PER_SEC CLK_TCK
77 #define CLOCKS_PER_SEC 100
81 #define LOOPREPEAT 500000
83 #define BUFFER_SIZE 30000
84 #define PBUFFER_SIZE BUFFER_SIZE
85 #define DBUFFER_SIZE BUFFER_SIZE
89 static int log_store = 0;
90 static int callout_count;
91 static int callout_extra;
92 static int callout_fail_count;
93 static int callout_fail_id;
94 static int first_callout;
95 static int show_malloc;
97 static size_t gotten_store;
99 static uschar *pbuffer = NULL;
103 /*************************************************
104 * Read number from string *
105 *************************************************/
107 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
108 around with conditional compilation, just do the job by hand. It is only used
109 for unpicking the -o argument, so just keep it simple.
112 str string to be converted
113 endptr where to put the end pointer
115 Returns: the unsigned long
119 get_value(unsigned char *str, unsigned char **endptr)
122 while(*str != 0 && isspace(*str)) str++;
123 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
131 /*************************************************
132 * Convert UTF-8 string to value *
133 *************************************************/
135 /* This function takes one or more bytes that represents a UTF-8 character,
136 and returns the value of the character.
139 buffer a pointer to the byte vector
140 vptr a pointer to an int to receive the value
142 Returns: > 0 => the number of bytes consumed
143 -6 to 0 => malformed UTF-8 character at offset = (-return)
149 utf82ord(unsigned char *buffer, int *vptr)
155 for (i = -1; i < 6; i++) /* i is number of additional bytes */
157 if ((d & 0x80) == 0) break;
161 if (i == -1) { *vptr = c; return 1; } /* ascii character */
162 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
164 /* i now has a value in the range 1-5 */
167 d = (c & _pcre_utf8_table3[i]) << s;
169 for (j = 0; j < i; j++)
172 if ((c & 0xc0) != 0x80) return -(j+1);
174 d |= (c & 0x3f) << s;
177 /* Check that encoding was the correct unique one */
179 for (j = 0; j < _pcre_utf8_table1_size; j++)
180 if (d <= _pcre_utf8_table1[j]) break;
181 if (j != i) return -(i+1);
193 /*************************************************
194 * Print character string *
195 *************************************************/
197 /* Character string printing function. Must handle UTF-8 strings in utf8
198 mode. Yields number of characters printed. If handed a NULL file, just counts
199 chars without printing. */
201 static int pchars(unsigned char *p, int length, FILE *f)
211 int rc = utf82ord(p, &c);
213 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
217 if (c < 256 && isprint(c))
219 if (f != NULL) fprintf(f, "%c", c);
225 if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
233 /* Not UTF-8, or malformed UTF-8 */
235 if (isprint(c = *(p++)))
237 if (f != NULL) fprintf(f, "%c", c);
242 if (f != NULL) fprintf(f, "\\x%02x", c);
252 /*************************************************
254 *************************************************/
256 /* Called from PCRE as a result of the (?C) item. We print out where we are in
257 the match. Yield zero unless more callouts than the fail count, or the callout
260 static int callout(pcre_callout_block *cb)
262 FILE *f = (first_callout | callout_extra)? outfile : NULL;
263 int i, pre_start, post_start, subject_length;
267 fprintf(f, "Callout %d: last capture = %d\n",
268 cb->callout_number, cb->capture_last);
270 for (i = 0; i < cb->capture_top * 2; i += 2)
272 if (cb->offset_vector[i] < 0)
273 fprintf(f, "%2d: <unset>\n", i/2);
276 fprintf(f, "%2d: ", i/2);
277 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
278 cb->offset_vector[i+1] - cb->offset_vector[i], f);
284 /* Re-print the subject in canonical form, the first time or if giving full
285 datails. On subsequent calls in the same match, we use pchars just to find the
286 printed lengths of the substrings. */
288 if (f != NULL) fprintf(f, "--->");
290 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
291 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
292 cb->current_position - cb->start_match, f);
294 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
296 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
297 cb->subject_length - cb->current_position, f);
299 if (f != NULL) fprintf(f, "\n");
301 /* Always print appropriate indicators, with callout number if not already
302 shown. For automatic callouts, show the pattern offset. */
304 if (cb->callout_number == 255)
306 fprintf(outfile, "%+3d ", cb->pattern_position);
307 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
311 if (callout_extra) fprintf(outfile, " ");
312 else fprintf(outfile, "%3d ", cb->callout_number);
315 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
316 fprintf(outfile, "^");
320 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
321 fprintf(outfile, "^");
324 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
325 fprintf(outfile, " ");
327 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
328 pbuffer + cb->pattern_position);
330 fprintf(outfile, "\n");
333 if (cb->callout_data != NULL)
335 int callout_data = *((int *)(cb->callout_data));
336 if (callout_data != 0)
338 fprintf(outfile, "Callout data = %d\n", callout_data);
343 return (cb->callout_number != callout_fail_id)? 0 :
344 (++callout_count >= callout_fail_count)? 1 : 0;
348 /*************************************************
349 * Local malloc functions *
350 *************************************************/
352 /* Alternative malloc function, to test functionality and show the size of the
355 static void *new_malloc(size_t size)
357 void *block = malloc(size);
360 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
364 static void new_free(void *block)
367 fprintf(outfile, "free %p\n", block);
372 /* For recursion malloc/free, to test stacking calls */
374 static void *stack_malloc(size_t size)
376 void *block = malloc(size);
378 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
382 static void stack_free(void *block)
385 fprintf(outfile, "stack_free %p\n", block);
390 /*************************************************
391 * Call pcre_fullinfo() *
392 *************************************************/
394 /* Get one piece of information from the pcre_fullinfo() function */
396 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
399 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
400 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
405 /*************************************************
406 * Byte flipping function *
407 *************************************************/
410 byteflip(long int value, int n)
412 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
413 return ((value & 0x000000ff) << 24) |
414 ((value & 0x0000ff00) << 8) |
415 ((value & 0x00ff0000) >> 8) |
416 ((value & 0xff000000) >> 24);
422 /*************************************************
424 *************************************************/
426 /* Read lines from named file or stdin and write to named file or stdout; lines
427 consist of a regular expression, in delimiters and optionally followed by
428 options, followed by a set of test data, terminated by an empty line. */
430 int main(int argc, char **argv)
432 FILE *infile = stdin;
434 int study_options = 0;
439 int size_offsets = 45;
440 int size_offsets_max;
450 unsigned char *buffer;
451 unsigned char *dbuffer;
453 /* Get buffers from malloc() so that Electric Fence will check their misuse
454 when I am debugging. */
456 buffer = (unsigned char *)malloc(BUFFER_SIZE);
457 dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
458 pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
460 /* The outfile variable is static so that new_malloc can use it. The _setmode()
461 stuff is some magic that I don't understand, but which apparently does good
462 things in Windows. It's related to line terminations. */
464 #if defined(_WIN32) || defined(WIN32)
465 _setmode( _fileno( stdout ), 0x8000 );
466 #endif /* defined(_WIN32) || defined(WIN32) */
472 while (argc > 1 && argv[op][0] == '-')
474 unsigned char *endptr;
476 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
478 else if (strcmp(argv[op], "-t") == 0) timeit = 1;
479 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
480 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
482 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
484 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
485 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
492 else if (strcmp(argv[op], "-p") == 0) posix = 1;
494 else if (strcmp(argv[op], "-C") == 0)
497 printf("PCRE version %s\n", pcre_version());
498 printf("Compiled with\n");
499 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
500 printf(" %sUTF-8 support\n", rc? "" : "No ");
501 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
502 printf(" %sUnicode properties support\n", rc? "" : "No ");
503 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
504 printf(" Newline character is %s\n", (rc == '\r')? "CR" : "LF");
505 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
506 printf(" Internal link size = %d\n", rc);
507 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
508 printf(" POSIX malloc threshold = %d\n", rc);
509 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
510 printf(" Default match limit = %d\n", rc);
511 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
512 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
517 printf("** Unknown or malformed option %s\n", argv[op]);
518 printf("Usage: pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
519 printf(" -C show PCRE compile-time options and exit\n");
520 printf(" -d debug: show compiled code; implies -i\n");
522 printf(" -dfa force DFA matching for all subjects\n");
524 printf(" -i show information about compiled pattern\n"
525 " -m output memory used information\n"
526 " -o <n> set size of offsets vector to <n>\n");
528 printf(" -p use POSIX interface\n");
530 printf(" -s output store (memory) used information\n"
531 " -t time compilation and execution\n");
539 /* Get the store for the offsets vector, and remember what it was */
541 size_offsets_max = size_offsets;
542 offsets = (int *)malloc(size_offsets_max * sizeof(int));
545 printf("** Failed to get %d bytes of memory for offsets vector\n",
546 size_offsets_max * sizeof(int));
551 /* Sort out the input and output files */
555 infile = fopen(argv[op], "rb");
558 printf("** Failed to open %s\n", argv[op]);
566 outfile = fopen(argv[op+1], "wb");
569 printf("** Failed to open %s\n", argv[op+1]);
575 /* Set alternative malloc function */
577 pcre_malloc = new_malloc;
578 pcre_free = new_free;
579 pcre_stack_malloc = stack_malloc;
580 pcre_stack_free = stack_free;
582 /* Heading line, then prompt for first regex if stdin */
584 fprintf(outfile, "PCRE version %s\n\n", pcre_version());
591 pcre_extra *extra = NULL;
593 #if !defined NOPOSIX /* There are still compilers that require no indent */
599 unsigned char *p, *pp, *ppp;
600 unsigned char *to_file = NULL;
601 const unsigned char *tables = NULL;
602 unsigned long int true_size, true_study_size = 0;
603 size_t size, regex_gotten_store;
605 int do_debug = debug;
608 int do_showinfo = showinfo;
611 int erroroffset, len, delimiter;
615 if (infile == stdin) printf(" re> ");
616 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
617 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
621 while (isspace(*p)) p++;
622 if (*p == 0) continue;
624 /* See if the pattern is to be loaded pre-compiled from a file. */
626 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
628 unsigned long int magic;
633 pp = p + (int)strlen((char *)p);
634 while (isspace(pp[-1])) pp--;
637 f = fopen((char *)p, "rb");
640 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
644 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
647 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
649 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
651 re = (real_pcre *)new_malloc(true_size);
652 regex_gotten_store = gotten_store;
654 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
656 magic = ((real_pcre *)re)->magic_number;
657 if (magic != MAGIC_NUMBER)
659 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
665 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
671 fprintf(outfile, "Compiled regex%s loaded from %s\n",
672 do_flip? " (byte-inverted)" : "", p);
674 /* Need to know if UTF-8 for printing data strings */
676 new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
677 use_utf8 = (options & PCRE_UTF8) != 0;
679 /* Now see if there is any following study data */
681 if (true_study_size != 0)
683 pcre_study_data *psd;
685 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
686 extra->flags = PCRE_EXTRA_STUDY_DATA;
688 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
689 extra->study_data = psd;
691 if (fread(psd, 1, true_study_size, f) != true_study_size)
694 fprintf(outfile, "Failed to read data from %s\n", p);
695 if (extra != NULL) new_free(extra);
696 if (re != NULL) new_free(re);
700 fprintf(outfile, "Study data loaded from %s\n", p);
701 do_study = 1; /* To get the data output if requested */
703 else fprintf(outfile, "No study data\n");
709 /* In-line pattern (the usual case). Get the delimiter and seek the end of
710 the pattern; if is isn't complete, read more. */
714 if (isalnum(delimiter) || delimiter == '\\')
716 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
726 if (*pp == '\\' && pp[1] != 0) pp++;
727 else if (*pp == delimiter) break;
732 len = BUFFER_SIZE - (pp - buffer);
735 fprintf(outfile, "** Expression too long - missing delimiter?\n");
739 if (infile == stdin) printf(" > ");
740 if (fgets((char *)pp, len, infile) == NULL)
742 fprintf(outfile, "** Unexpected EOF\n");
746 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
749 /* If the first character after the delimiter is backslash, make
750 the pattern end with backslash. This is purely to provide a way
751 of testing for the error message when a pattern ends with backslash. */
753 if (pp[1] == '\\') *pp++ = '\\';
755 /* Terminate the pattern at the delimiter, and save a copy of the pattern
759 strcpy((char *)pbuffer, (char *)p);
761 /* Look for options after final delimiter */
765 log_store = showstore; /* default from command line */
771 case 'f': options |= PCRE_FIRSTLINE; break;
772 case 'g': do_g = 1; break;
773 case 'i': options |= PCRE_CASELESS; break;
774 case 'm': options |= PCRE_MULTILINE; break;
775 case 's': options |= PCRE_DOTALL; break;
776 case 'x': options |= PCRE_EXTENDED; break;
778 case '+': do_showrest = 1; break;
779 case 'A': options |= PCRE_ANCHORED; break;
780 case 'C': options |= PCRE_AUTO_CALLOUT; break;
781 case 'D': do_debug = do_showinfo = 1; break;
782 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
783 case 'F': do_flip = 1; break;
784 case 'G': do_G = 1; break;
785 case 'I': do_showinfo = 1; break;
786 case 'M': log_store = 1; break;
787 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
790 case 'P': do_posix = 1; break;
793 case 'S': do_study = 1; break;
794 case 'U': options |= PCRE_UNGREEDY; break;
795 case 'X': options |= PCRE_EXTRA; break;
796 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
797 case '?': options |= PCRE_NO_UTF8_CHECK; break;
801 /* The '\r' test here is so that it works on Windows */
802 while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
804 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
806 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
809 tables = pcre_maketables();
815 while (*pp != 0) pp++;
816 while (isspace(pp[-1])) pp--;
820 case '\r': /* So that it works in Windows */
826 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
831 /* Handle compiling via the POSIX interface, which doesn't support the
832 timing, showing, or debugging options, nor the ability to pass over
833 local character tables. */
836 if (posix || do_posix)
841 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
842 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
843 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
844 rc = regcomp(&preg, (char *)p, cflags);
846 /* Compilation failed; go back for another re, skipping to blank line
847 if non-interactive. */
851 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
852 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
857 /* Handle compiling via the native interface */
860 #endif /* !defined NOPOSIX */
867 clock_t start_time = clock();
868 for (i = 0; i < LOOPREPEAT; i++)
870 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
871 if (re != NULL) free(re);
873 time_taken = clock() - start_time;
874 fprintf(outfile, "Compile time %.3f milliseconds\n",
875 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
876 (double)CLOCKS_PER_SEC);
879 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
881 /* Compilation failed; go back for another re, skipping to blank line
882 if non-interactive. */
886 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
892 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
897 len = (int)strlen((char *)buffer);
898 while (len > 0 && isspace(buffer[len-1])) len--;
901 fprintf(outfile, "\n");
906 /* Compilation succeeded; print data if required. There are now two
907 info-returning functions. The old one has a limited interface and
908 returns only limited data. Check that it agrees with the newer one. */
911 fprintf(outfile, "Memory allocation (code space): %d\n",
914 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
916 /* Extract the size for possible writing before possibly flipping it,
917 and remember the store that was got. */
919 true_size = ((real_pcre *)re)->size;
920 regex_gotten_store = gotten_store;
922 /* If /S was present, study the regexp to generate additional info to
923 help with the matching. */
931 clock_t start_time = clock();
932 for (i = 0; i < LOOPREPEAT; i++)
933 extra = pcre_study(re, study_options, &error);
934 time_taken = clock() - start_time;
935 if (extra != NULL) free(extra);
936 fprintf(outfile, " Study time %.3f milliseconds\n",
937 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
938 (double)CLOCKS_PER_SEC);
940 extra = pcre_study(re, study_options, &error);
942 fprintf(outfile, "Failed to study: %s\n", error);
943 else if (extra != NULL)
944 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
947 /* If the 'F' option was present, we flip the bytes of all the integer
948 fields in the regex data block and the study block. This is to make it
949 possible to test PCRE's handling of byte-flipped patterns, e.g. those
950 compiled on a different architecture. */
954 real_pcre *rre = (real_pcre *)re;
955 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
956 rre->size = byteflip(rre->size, sizeof(rre->size));
957 rre->options = byteflip(rre->options, sizeof(rre->options));
958 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
959 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
960 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
961 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
962 rre->name_table_offset = byteflip(rre->name_table_offset,
963 sizeof(rre->name_table_offset));
964 rre->name_entry_size = byteflip(rre->name_entry_size,
965 sizeof(rre->name_entry_size));
966 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
970 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
971 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
972 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
976 /* Extract information from the compiled data if required */
982 unsigned long int get_options, all_options;
983 #if !defined NOINFOCHECK
984 int old_first_char, old_options, old_count;
986 int count, backrefmax, first_char, need_char;
987 int nameentrysize, namecount;
988 const uschar *nametable;
992 fprintf(outfile, "------------------------------------------------------------------\n");
993 _pcre_printint(re, outfile);
996 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
997 new_info(re, NULL, PCRE_INFO_SIZE, &size);
998 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
999 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1000 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1001 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1002 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1003 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1004 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1006 #if !defined NOINFOCHECK
1007 old_count = pcre_info(re, &old_options, &old_first_char);
1008 if (count < 0) fprintf(outfile,
1009 "Error %d from pcre_info()\n", count);
1012 if (old_count != count) fprintf(outfile,
1013 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1016 if (old_first_char != first_char) fprintf(outfile,
1017 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1018 first_char, old_first_char);
1020 if (old_options != (int)get_options) fprintf(outfile,
1021 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1022 get_options, old_options);
1026 if (size != regex_gotten_store) fprintf(outfile,
1027 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1028 (int)size, (int)regex_gotten_store);
1030 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1032 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1036 fprintf(outfile, "Named capturing subpatterns:\n");
1037 while (namecount-- > 0)
1039 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1040 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1041 GET2(nametable, 0));
1042 nametable += nameentrysize;
1046 /* The NOPARTIAL bit is a private bit in the options, so we have
1047 to fish it out via out back door */
1049 all_options = ((real_pcre *)re)->options;
1052 all_options = byteflip(all_options, sizeof(all_options));
1055 if ((all_options & PCRE_NOPARTIAL) != 0)
1056 fprintf(outfile, "Partial matching not supported\n");
1058 if (get_options == 0) fprintf(outfile, "No options\n");
1059 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1060 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1061 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1062 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1063 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1064 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1065 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1066 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1067 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1068 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1069 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1070 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1072 if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1073 fprintf(outfile, "Case state changes\n");
1075 if (first_char == -1)
1077 fprintf(outfile, "First char at start or follows \\n\n");
1079 else if (first_char < 0)
1081 fprintf(outfile, "No first char\n");
1085 int ch = first_char & 255;
1086 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1089 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1091 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1096 fprintf(outfile, "No need char\n");
1100 int ch = need_char & 255;
1101 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1104 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1106 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1109 /* Don't output study size; at present it is in any case a fixed
1110 value, but it varies, depending on the computer architecture, and
1111 so messes up the test suite. (And with the /F option, it might be
1117 fprintf(outfile, "Study returned NULL\n");
1120 uschar *start_bits = NULL;
1121 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1123 if (start_bits == NULL)
1124 fprintf(outfile, "No starting byte set\n");
1129 fprintf(outfile, "Starting byte set: ");
1130 for (i = 0; i < 256; i++)
1132 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1136 fprintf(outfile, "\n ");
1139 if (isprint(i) && i != ' ')
1141 fprintf(outfile, "%c ", i);
1146 fprintf(outfile, "\\x%02x ", i);
1151 fprintf(outfile, "\n");
1157 /* If the '>' option was present, we write out the regex to a file, and
1158 that is all. The first 8 bytes of the file are the regex length and then
1159 the study length, in big-endian order. */
1161 if (to_file != NULL)
1163 FILE *f = fopen((char *)to_file, "wb");
1166 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1171 sbuf[0] = (true_size >> 24) & 255;
1172 sbuf[1] = (true_size >> 16) & 255;
1173 sbuf[2] = (true_size >> 8) & 255;
1174 sbuf[3] = (true_size) & 255;
1176 sbuf[4] = (true_study_size >> 24) & 255;
1177 sbuf[5] = (true_study_size >> 16) & 255;
1178 sbuf[6] = (true_study_size >> 8) & 255;
1179 sbuf[7] = (true_study_size) & 255;
1181 if (fwrite(sbuf, 1, 8, f) < 8 ||
1182 fwrite(re, 1, true_size, f) < true_size)
1184 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1188 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1191 if (fwrite(extra->study_data, 1, true_study_size, f) <
1194 fprintf(outfile, "Write error on %s: %s\n", to_file,
1197 else fprintf(outfile, "Study data written to %s\n", to_file);
1204 if (extra != NULL) new_free(extra);
1205 if (tables != NULL) new_free((void *)tables);
1206 continue; /* With next regex */
1208 } /* End of non-POSIX compile */
1210 /* Read data lines and test them */
1215 unsigned char *bptr = dbuffer;
1216 int *use_offsets = offsets;
1217 int use_size_offsets = size_offsets;
1218 int callout_data = 0;
1219 int callout_data_set = 0;
1221 int copystrings = 0;
1222 int find_match_limit = 0;
1226 int start_offset = 0;
1232 pcre_callout = callout;
1236 callout_fail_count = 999999;
1237 callout_fail_id = -1;
1240 if (infile == stdin) printf("data> ");
1241 if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1246 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1248 len = (int)strlen((char *)buffer);
1249 while (len > 0 && isspace(buffer[len-1])) len--;
1251 if (len == 0) break;
1254 while (isspace(*p)) p++;
1257 while ((c = *p++) != 0)
1262 if (c == '\\') switch ((c = *p++))
1264 case 'a': c = 7; break;
1265 case 'b': c = '\b'; break;
1266 case 'e': c = 27; break;
1267 case 'f': c = '\f'; break;
1268 case 'n': c = '\n'; break;
1269 case 'r': c = '\r'; break;
1270 case 't': c = '\t'; break;
1271 case 'v': c = '\v'; break;
1273 case '0': case '1': case '2': case '3':
1274 case '4': case '5': case '6': case '7':
1276 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1277 c = c * 8 + *p++ - '0';
1282 /* Handle \x{..} specially - new Perl thing for utf8 */
1287 unsigned char *pt = p;
1289 while (isxdigit(*(++pt)))
1290 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1293 unsigned char buff8[8];
1295 utn = _pcre_ord2utf8(c, buff8);
1296 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1297 c = buff8[ii]; /* Last byte */
1301 /* Not correct form; fall through */
1308 while (i++ < 2 && isxdigit(*p))
1310 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1315 case 0: /* \ followed by EOF allows for an empty line */
1320 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1323 case 'A': /* Option setting */
1324 options |= PCRE_ANCHORED;
1328 options |= PCRE_NOTBOL;
1332 if (isdigit(*p)) /* Set copy string */
1334 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1335 copystrings |= 1 << n;
1337 else if (isalnum(*p))
1341 while (isalnum(*p)) *npp++ = *p++;
1343 n = pcre_get_stringnumber(re, (char *)name);
1345 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1346 else copystrings |= 1 << n;
1355 pcre_callout = NULL;
1360 callout_fail_id = 0;
1363 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1364 callout_fail_count = 0;
1369 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1376 if (*(++p) == '-') { sign = -1; p++; }
1378 callout_data = callout_data * 10 + *p++ - '0';
1379 callout_data *= sign;
1380 callout_data_set = 1;
1386 #if !defined NOPOSIX
1387 if (posix || do_posix)
1388 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1395 options |= PCRE_DFA_SHORTEST;
1402 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1403 getstrings |= 1 << n;
1405 else if (isalnum(*p))
1409 while (isalnum(*p)) *npp++ = *p++;
1411 n = pcre_get_stringnumber(re, (char *)name);
1413 fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1414 else getstrings |= 1 << n;
1423 find_match_limit = 1;
1427 options |= PCRE_NOTEMPTY;
1431 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1432 if (n > size_offsets_max)
1434 size_offsets_max = n;
1436 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1437 if (offsets == NULL)
1439 printf("** Failed to get %d bytes of memory for offsets vector\n",
1440 size_offsets_max * sizeof(int));
1445 use_size_offsets = n;
1446 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1450 options |= PCRE_PARTIAL;
1455 options |= PCRE_DFA_RESTART;
1464 options |= PCRE_NOTEOL;
1468 options |= PCRE_NO_UTF8_CHECK;
1476 if ((all_use_dfa || use_dfa) && find_match_limit)
1478 printf("**Match limit not relevant for DFA matching: ignored\n");
1479 find_match_limit = 0;
1482 /* Handle matching via the POSIX interface, which does not
1483 support timing or playing with the match limit or callout data. */
1485 #if !defined NOPOSIX
1486 if (posix || do_posix)
1490 regmatch_t *pmatch = NULL;
1491 if (use_size_offsets > 0)
1492 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1493 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1494 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1496 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1500 (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1501 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1506 for (i = 0; i < (size_t)use_size_offsets; i++)
1508 if (pmatch[i].rm_so >= 0)
1510 fprintf(outfile, "%2d: ", (int)i);
1511 (void)pchars(dbuffer + pmatch[i].rm_so,
1512 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1513 fprintf(outfile, "\n");
1514 if (i == 0 && do_showrest)
1516 fprintf(outfile, " 0+ ");
1517 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1519 fprintf(outfile, "\n");
1527 /* Handle matching via the native interface - repeats for /g and /G */
1530 #endif /* !defined NOPOSIX */
1532 for (;; gmatched++) /* Loop for /g or /G */
1538 clock_t start_time = clock();
1541 if (all_use_dfa || use_dfa)
1543 int workspace[1000];
1544 for (i = 0; i < LOOPREPEAT; i++)
1545 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1546 options | g_notempty, use_offsets, use_size_offsets, workspace,
1547 sizeof(workspace)/sizeof(int));
1552 for (i = 0; i < LOOPREPEAT; i++)
1553 count = pcre_exec(re, extra, (char *)bptr, len,
1554 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1556 time_taken = clock() - start_time;
1557 fprintf(outfile, "Execute time %.3f milliseconds\n",
1558 (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1559 (double)CLOCKS_PER_SEC);
1562 /* If find_match_limit is set, we want to do repeated matches with
1563 varying limits in order to find the minimum value. */
1565 if (find_match_limit)
1573 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1576 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1580 extra->match_limit = mid;
1581 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1582 options | g_notempty, use_offsets, use_size_offsets);
1583 if (count == PCRE_ERROR_MATCHLIMIT)
1585 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1587 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1589 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1590 count == PCRE_ERROR_PARTIAL)
1594 fprintf(outfile, "Minimum match limit = %d\n", mid);
1597 /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1599 mid = (min + mid)/2;
1601 else break; /* Some other error */
1604 extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1607 /* If callout_data is set, use the interface with additional data */
1609 else if (callout_data_set)
1613 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1616 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1617 extra->callout_data = &callout_data;
1618 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1619 options | g_notempty, use_offsets, use_size_offsets);
1620 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1623 /* The normal case is just to do the match once, with the default
1624 value of match_limit. */
1627 else if (all_use_dfa || use_dfa)
1629 int workspace[1000];
1630 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1631 options | g_notempty, use_offsets, use_size_offsets, workspace,
1632 sizeof(workspace)/sizeof(int));
1635 fprintf(outfile, "Matched, but too many subsidiary matches\n");
1636 count = use_size_offsets/2;
1643 count = pcre_exec(re, extra, (char *)bptr, len,
1644 start_offset, options | g_notempty, use_offsets, use_size_offsets);
1647 fprintf(outfile, "Matched, but too many substrings\n");
1648 count = use_size_offsets/3;
1657 for (i = 0; i < count * 2; i += 2)
1659 if (use_offsets[i] < 0)
1660 fprintf(outfile, "%2d: <unset>\n", i/2);
1663 fprintf(outfile, "%2d: ", i/2);
1664 (void)pchars(bptr + use_offsets[i],
1665 use_offsets[i+1] - use_offsets[i], outfile);
1666 fprintf(outfile, "\n");
1671 fprintf(outfile, " 0+ ");
1672 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1674 fprintf(outfile, "\n");
1680 for (i = 0; i < 32; i++)
1682 if ((copystrings & (1 << i)) != 0)
1684 char copybuffer[16];
1685 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1686 i, copybuffer, sizeof(copybuffer));
1688 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1690 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1694 for (i = 0; i < 32; i++)
1696 if ((getstrings & (1 << i)) != 0)
1698 const char *substring;
1699 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1702 fprintf(outfile, "get substring %d failed %d\n", i, rc);
1705 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1706 /* free((void *)substring); */
1707 pcre_free_substring(substring);
1714 const char **stringlist;
1715 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1718 fprintf(outfile, "get substring list failed %d\n", rc);
1721 for (i = 0; i < count; i++)
1722 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1723 if (stringlist[i] != NULL)
1724 fprintf(outfile, "string list not terminated by NULL\n");
1725 /* free((void *)stringlist); */
1726 pcre_free_substring_list(stringlist);
1731 /* There was a partial match */
1733 else if (count == PCRE_ERROR_PARTIAL)
1735 fprintf(outfile, "Partial match");
1737 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1738 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1739 bptr + use_offsets[0]);
1741 fprintf(outfile, "\n");
1742 break; /* Out of the /g loop */
1745 /* Failed to match. If this is a /g or /G loop and we previously set
1746 g_notempty after a null match, this is not necessarily the end.
1747 We want to advance the start offset, and continue. In the case of UTF-8
1748 matching, the advance must be one character, not one byte. Fudge the
1749 offset values to achieve this. We won't be at the end of the string -
1750 that was checked before setting g_notempty. */
1754 if (g_notempty != 0)
1757 use_offsets[0] = start_offset;
1760 while (start_offset + onechar < len)
1762 int tb = bptr[start_offset+onechar];
1763 if (tb <= 127) break;
1765 if (tb != 0 && tb != 0xc0) onechar++;
1768 use_offsets[1] = start_offset + onechar;
1772 if (count == PCRE_ERROR_NOMATCH)
1774 if (gmatched == 0) fprintf(outfile, "No match\n");
1776 else fprintf(outfile, "Error %d\n", count);
1777 break; /* Out of the /g loop */
1781 /* If not /g or /G we are done */
1783 if (!do_g && !do_G) break;
1785 /* If we have matched an empty string, first check to see if we are at
1786 the end of the subject. If so, the /g loop is over. Otherwise, mimic
1787 what Perl's /g options does. This turns out to be rather cunning. First
1788 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1789 same point. If this fails (picked up above) we advance to the next
1793 if (use_offsets[0] == use_offsets[1])
1795 if (use_offsets[0] == len) break;
1796 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1799 /* For /g, update the start offset, leaving the rest alone */
1801 if (do_g) start_offset = use_offsets[1];
1803 /* For /G, update the pointer and length */
1807 bptr += use_offsets[1];
1808 len -= use_offsets[1];
1810 } /* End of loop for /g and /G */
1811 } /* End of loop for data lines */
1815 #if !defined NOPOSIX
1816 if (posix || do_posix) regfree(&preg);
1819 if (re != NULL) new_free(re);
1820 if (extra != NULL) new_free(extra);
1823 new_free((void *)tables);
1824 setlocale(LC_CTYPE, "C");
1828 if (infile == stdin) fprintf(outfile, "\n");
1832 if (infile != NULL && infile != stdin) fclose(infile);
1833 if (outfile != NULL && outfile != stdout) fclose(outfile);
1843 /* End of pcretest.c */