1 /* $Cambridge: exim/src/src/pcre/pcre_printint.src,v 1.2 2007/01/23 15:08:45 ph10 Exp $ */
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
10 Written by Philip Hazel
11 Copyright (c) 1997-2005 University of Cambridge
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
43 /* This module contains a PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
45 local functions. This source file is used in two places:
47 (1) It is #included by pcre_compile.c when it is compiled in debugging mode
48 (DEBUG defined in pcre_internal.h). It is not included in production compiles.
50 (2) It is always #included by pcretest.c, which can be asked to print out a
51 compiled regex for debugging purposes. */
54 /* Macro that decides whether a character should be output as a literal or in
55 hexadecimal. We don't use isprint() because that can vary from system to system
56 (even without the use of locales) and we want the output always to be the same,
57 for testing purposes. This macro is used in pcretest as well as in this file. */
59 #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
61 /* The table of operator names. */
63 static const char *OP_names[] = { OP_NAME_LIST };
67 /*************************************************
68 * Print single- or multi-byte character *
69 *************************************************/
72 print_char(FILE *f, uschar *ptr, BOOL utf8)
76 if (!utf8 || (c & 0xc0) != 0xc0)
78 if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
84 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
86 c = (c & _pcre_utf8_table3[a]) << s;
87 for (i = 1; i <= a; i++)
89 /* This is a check for malformed UTF-8; it should only occur if the sanity
90 check has been turned off. Rather than swallow random bytes, just stop if
91 we hit a bad one. Print it with \X instead of \x as an indication. */
93 if ((ptr[i] & 0xc0) != 0x80)
95 fprintf(f, "\\X{%x}", c);
102 c |= (ptr[i] & 0x3f) << s;
104 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
111 /*************************************************
112 * Find Unicode property name *
113 *************************************************/
116 get_ucpname(int ptype, int pvalue)
120 for (i = _pcre_utt_size; i >= 0; i--)
122 if (ptype == _pcre_utt[i].type && pvalue == _pcre_utt[i].value) break;
124 return (i >= 0)? _pcre_utt[i].name : "??";
126 /* It gets harder and harder to shut off unwanted compiler warnings. */
127 ptype = ptype * pvalue;
128 return (ptype == pvalue)? "??" : "??";
134 /*************************************************
135 * Print compiled regex *
136 *************************************************/
138 /* Make this function work for a regex with integers either byte order.
139 However, we assume that what we are passed is a compiled regex. */
142 pcre_printint(pcre *external_re, FILE *f)
144 real_pcre *re = (real_pcre *)external_re;
145 uschar *codestart, *code;
148 unsigned int options = re->options;
149 int offset = re->name_table_offset;
150 int count = re->name_count;
151 int size = re->name_entry_size;
153 if (re->magic_number != MAGIC_NUMBER)
155 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
156 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
157 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
158 options = ((options << 24) & 0xff000000) |
159 ((options << 8) & 0x00ff0000) |
160 ((options >> 8) & 0x0000ff00) |
161 ((options >> 24) & 0x000000ff);
164 code = codestart = (uschar *)re + offset + count * size;
165 utf8 = (options & PCRE_UTF8) != 0;
173 fprintf(f, "%3d ", (int)(code - codestart));
178 fprintf(f, " %s\n", OP_names[*code]);
179 fprintf(f, "------------------------------------------------------------------\n");
183 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
191 code += 1 + print_char(f, code, utf8);
193 while (*code == OP_CHAR);
202 code += 1 + print_char(f, code, utf8);
204 while (*code == OP_CHARNC);
210 fprintf(f, "%3d %s %d", GET(code, 1), OP_names[*code],
211 GET2(code, 1+LINK_SIZE));
223 case OP_ASSERTBACK_NOT:
228 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
232 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
238 fprintf(f, " Cond recurse any");
240 fprintf(f, " Cond recurse %d", c);
244 fprintf(f, " Cond def");
263 case OP_TYPEMINQUERY:
264 case OP_TYPEPOSQUERY:
266 if (*code >= OP_TYPESTAR)
268 fprintf(f, "%s", OP_names[code[1]]);
269 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
271 fprintf(f, " %s ", get_ucpname(code[2], code[3]));
275 else extra = print_char(f, code+1, utf8);
276 fprintf(f, "%s", OP_names[*code]);
284 extra = print_char(f, code+3, utf8);
286 if (*code != OP_EXACT) fprintf(f, "0,");
287 fprintf(f, "%d}", GET2(code,1));
288 if (*code == OP_MINUPTO) fprintf(f, "?");
289 else if (*code == OP_POSUPTO) fprintf(f, "+");
296 fprintf(f, " %s", OP_names[code[3]]);
297 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
299 fprintf(f, " %s ", get_ucpname(code[4], code[5]));
303 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
304 fprintf(f, "%d}", GET2(code,1));
305 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
306 else if (*code == OP_TYPEPOSUPTO) fprintf(f, "+");
311 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
312 else fprintf(f, " [^\\x%02x]", c);
325 if (PRINTABLE(c)) fprintf(f, " [^%c]", c);
326 else fprintf(f, " [^\\x%02x]", c);
327 fprintf(f, "%s", OP_names[*code]);
335 if (PRINTABLE(c)) fprintf(f, " [^%c]{", c);
336 else fprintf(f, " [^\\x%02x]{", c);
337 if (*code != OP_NOTEXACT) fprintf(f, "0,");
338 fprintf(f, "%d}", GET2(code,1));
339 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
340 else if (*code == OP_NOTPOSUPTO) fprintf(f, "+");
344 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
348 fprintf(f, " \\%d", GET2(code,1));
349 ccode = code + _pcre_OP_lengths[*code];
350 goto CLASS_REF_REPEAT;
353 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
354 GET(code, 2 + LINK_SIZE));
359 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1], code[2]));
362 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
363 having this code always here, and it makes it less messy without all those
375 if (*code == OP_XCLASS)
377 extra = GET(code, 1);
378 ccode = code + LINK_SIZE + 1;
379 printmap = (*ccode & XCL_MAP) != 0;
380 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
388 /* Print a bit map */
392 for (i = 0; i < 256; i++)
394 if ((ccode[i/8] & (1 << (i&7))) != 0)
397 for (j = i+1; j < 256; j++)
398 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
399 if (i == '-' || i == ']') fprintf(f, "\\");
400 if (PRINTABLE(i)) fprintf(f, "%c", i);
401 else fprintf(f, "\\x%02x", i);
404 if (j != i + 1) fprintf(f, "-");
405 if (j == '-' || j == ']') fprintf(f, "\\");
406 if (PRINTABLE(j)) fprintf(f, "%c", j);
407 else fprintf(f, "\\x%02x", j);
415 /* For an XCLASS there is always some additional data */
417 if (*code == OP_XCLASS)
420 while ((ch = *ccode++) != XCL_END)
424 int ptype = *ccode++;
425 int pvalue = *ccode++;
426 fprintf(f, "\\p{%s}", get_ucpname(ptype, pvalue));
428 else if (ch == XCL_NOTPROP)
430 int ptype = *ccode++;
431 int pvalue = *ccode++;
432 fprintf(f, "\\P{%s}", get_ucpname(ptype, pvalue));
436 ccode += 1 + print_char(f, ccode, TRUE);
440 ccode += 1 + print_char(f, ccode, TRUE);
446 /* Indicate a non-UTF8 class which was created by negation */
448 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
450 /* Handle repeats after a class or a back reference */
461 fprintf(f, "%s", OP_names[*ccode]);
462 extra += _pcre_OP_lengths[*ccode];
469 if (max == 0) fprintf(f, "{%d,}", min);
470 else fprintf(f, "{%d,%d}", min, max);
471 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
472 extra += _pcre_OP_lengths[*ccode];
475 /* Do nothing if it's not a repeat; this code stops picky compilers
476 warning about the lack of a default code path. */
484 /* Anything else is just an item with no data*/
487 fprintf(f, " %s", OP_names[*code]);
491 code += _pcre_OP_lengths[*code] + extra;
496 /* End of pcre_printint.src */