1 /* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.1 2006/11/07 16:50:36 ph10 Exp $ */
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
10 Written by Philip Hazel
11 Copyright (c) 1997-2005 University of Cambridge
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
43 /* This module contains a PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
45 local functions. This source file is used in two places:
47 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
48 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
50 (2) It is always #included by pcretest.c, which can be asked to print out a
51 compiled regex for debugging purposes. */
54 static const char *OP_names[] = { OP_NAME_LIST };
57 /*************************************************
58 * Print single- or multi-byte character *
59 *************************************************/
62 print_char(FILE *f, uschar *ptr, BOOL utf8)
66 if (!utf8 || (c & 0xc0) != 0xc0)
68 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
74 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
76 c = (c & _pcre_utf8_table3[a]) << s;
77 for (i = 1; i <= a; i++)
79 /* This is a check for malformed UTF-8; it should only occur if the sanity
80 check has been turned off. Rather than swallow random bytes, just stop if
81 we hit a bad one. Print it with \X instead of \x as an indication. */
83 if ((ptr[i] & 0xc0) != 0x80)
85 fprintf(f, "\\X{%x}", c);
92 c |= (ptr[i] & 0x3f) << s;
94 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
101 /*************************************************
102 * Find Unicode property name *
103 *************************************************/
106 get_ucpname(int ptype, int pvalue)
110 for (i = _pcre_utt_size; i >= 0; i--)
112 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
114 return (i >= 0)? _pcre_utt[i].name : "??";
116 /* It gets harder and harder to shut off unwanted compiler warnings. */
117 ptype = ptype * pvalue;
118 return (ptype == pvalue)? "??" : "??";
124 /*************************************************
125 * Print compiled regex *
126 *************************************************/
128 /* Make this function work for a regex with integers either byte order.
129 However, we assume that what we are passed is a compiled regex. */
132 pcre_printint(pcre *external_re, FILE *f)
134 real_pcre *re = (real_pcre *)external_re;
135 uschar *codestart, *code;
138 unsigned int options = re->options;
139 int offset = re->name_table_offset;
140 int count = re->name_count;
141 int size = re->name_entry_size;
143 if (re->magic_number != MAGIC_NUMBER)
145 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
146 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
147 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
148 options = ((options << 24) & 0xff000000) |
149 ((options << 8) & 0x00ff0000) |
150 ((options >> 8) & 0x0000ff00) |
151 ((options >> 24) & 0x000000ff);
154 code = codestart = (uschar *)re + offset + count * size;
155 utf8 = (options & PCRE_UTF8) != 0;
163 fprintf(f, "%3d ", (int)(code - codestart));
167 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
168 fprintf(f, "%3d Bra extra\n", GET(code, 1));
170 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
171 code += _pcre_OP_lengths[OP_BRA];
178 fprintf(f, " %s\n", OP_names[*code]);
179 fprintf(f, "------------------------------------------------------------------\n");
183 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
191 code += 1 + print_char(f, code, utf8);
193 while (*code == OP_CHAR);
202 code += 1 + print_char(f, code, utf8);
204 while (*code == OP_CHARNC);
215 case OP_ASSERTBACK_NOT:
219 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
223 printf("%3d %s", GET2(code, 1), OP_names[*code]);
227 if (GET2(code, 1) == CREF_RECURSE)
228 fprintf(f, " Cond recurse");
230 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
244 case OP_TYPEMINQUERY:
246 if (*code >= OP_TYPESTAR)
248 fprintf(f, "%s", OP_names[code[1]]);
249 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
251 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
255 else extra = print_char(f, code+1, utf8);
256 fprintf(f, "%s", OP_names[*code]);
263 extra = print_char(f, code+3, utf8);
265 if (*code != OP_EXACT) fprintf(f, ",");
266 fprintf(f, "%d}", GET2(code,1));
267 if (*code == OP_MINUPTO) fprintf(f, "?");
273 fprintf(f, " %s", OP_names[code[3]]);
274 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
276 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
280 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
281 fprintf(f, "%d}", GET2(code,1));
282 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
286 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
287 else fprintf(f, " [^\\x%02x]", c);
296 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
297 else fprintf(f, " [^\\x%02x]", c);
298 fprintf(f, "%s", OP_names[*code]);
304 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
305 else fprintf(f, " [^\\x%02x]{", c);
306 if (*code != OP_NOTEXACT) fprintf(f, "0,");
307 fprintf(f, "%d}", GET2(code,1));
308 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
312 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
316 fprintf(f, " \\%d", GET2(code,1));
317 ccode = code + _pcre_OP_lengths[*code];
318 goto CLASS_REF_REPEAT;
321 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
322 GET(code, 2 + LINK_SIZE));
327 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
330 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
331 having this code always here, and it makes it less messy without all those
343 if (*code == OP_XCLASS)
345 extra = GET(code, 1);
346 ccode = code + LINK_SIZE + 1;
347 printmap = (*ccode & XCL_MAP) != 0;
348 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
356 /* Print a bit map */
360 for (i = 0; i < 256; i++)
362 if ((ccode[i/8] & (1 << (i&7))) != 0)
365 for (j = i+1; j < 256; j++)
366 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
367 if (i == '-' || i == ']') fprintf(f, "\\");
368 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
371 if (j != i + 1) fprintf(f, "-");
372 if (j == '-' || j == ']') fprintf(f, "\\");
373 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
381 /* For an XCLASS there is always some additional data */
383 if (*code == OP_XCLASS)
386 while ((ch = *ccode++) != XCL_END)
390 int ptype = *ccode++;
391 int pvalue = *ccode++;
392 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
394 else if (ch == XCL_NOTPROP)
396 int ptype = *ccode++;
397 int pvalue = *ccode++;
398 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
402 ccode += 1 + print_char(f, ccode, TRUE);
406 ccode += 1 + print_char(f, ccode, TRUE);
412 /* Indicate a non-UTF8 class which was created by negation */
414 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
416 /* Handle repeats after a class or a back reference */
427 fprintf(f, "%s", OP_names[*ccode]);
428 extra += _pcre_OP_lengths[*ccode];
435 if (max == 0) fprintf(f, "{%d,}", min);
436 else fprintf(f, "{%d,%d}", min, max);
437 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
438 extra += _pcre_OP_lengths[*ccode];
441 /* Do nothing if it's not a repeat; this code stops picky compilers
442 warning about the lack of a default code path. */
450 /* Anything else is just an item with no data*/
453 fprintf(f, " %s", OP_names[*code]);
457 code += _pcre_OP_lengths[*code] + extra;
462 /* End of pcre_printint.src */