1 /* $Cambridge: exim/src/src/pcre/pcre_printint.c,v 1.2 2005/08/08 10:22:14 ph10 Exp $ */
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
7 /* PCRE is a library of functions to support regular expressions whose syntax
8 and semantics are as close as possible to those of the Perl 5 language.
10 Written by Philip Hazel
11 Copyright (c) 1997-2005 University of Cambridge
13 -----------------------------------------------------------------------------
14 Redistribution and use in source and binary forms, with or without
15 modification, are permitted provided that the following conditions are met:
17 * Redistributions of source code must retain the above copyright notice,
18 this list of conditions and the following disclaimer.
20 * Redistributions in binary form must reproduce the above copyright
21 notice, this list of conditions and the following disclaimer in the
22 documentation and/or other materials provided with the distribution.
24 * Neither the name of the University of Cambridge nor the names of its
25 contributors may be used to endorse or promote products derived from
26 this software without specific prior written permission.
28 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
29 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
32 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
35 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
36 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
37 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
38 POSSIBILITY OF SUCH DAMAGE.
39 -----------------------------------------------------------------------------
43 /* This module contains an PCRE private debugging function for printing out the
44 internal form of a compiled regular expression, along with some supporting
48 #include "pcre_internal.h"
51 static const char *OP_names[] = { OP_NAME_LIST };
54 /*************************************************
55 * Print single- or multi-byte character *
56 *************************************************/
59 print_char(FILE *f, uschar *ptr, BOOL utf8)
63 if (!utf8 || (c & 0xc0) != 0xc0)
65 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
71 int a = _pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
73 c = (c & _pcre_utf8_table3[a]) << s;
74 for (i = 1; i <= a; i++)
76 /* This is a check for malformed UTF-8; it should only occur if the sanity
77 check has been turned off. Rather than swallow random bytes, just stop if
78 we hit a bad one. Print it with \X instead of \x as an indication. */
80 if ((ptr[i] & 0xc0) != 0x80)
82 fprintf(f, "\\X{%x}", c);
89 c |= (ptr[i] & 0x3f) << s;
91 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
98 /*************************************************
99 * Find Unicode property name *
100 *************************************************/
103 get_ucpname(int property)
107 for (i = _pcre_utt_size; i >= 0; i--)
109 if (property == _pcre_utt[i].value) break;
111 return (i >= 0)? _pcre_utt[i].name : "??";
119 /*************************************************
120 * Print compiled regex *
121 *************************************************/
123 /* Make this function work for a regex with integers either byte order.
124 However, we assume that what we are passed is a compiled regex. */
127 _pcre_printint(pcre *external_re, FILE *f)
129 real_pcre *re = (real_pcre *)external_re;
130 uschar *codestart, *code;
133 unsigned int options = re->options;
134 int offset = re->name_table_offset;
135 int count = re->name_count;
136 int size = re->name_entry_size;
138 if (re->magic_number != MAGIC_NUMBER)
140 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
141 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
142 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
143 options = ((options << 24) & 0xff000000) |
144 ((options << 8) & 0x00ff0000) |
145 ((options >> 8) & 0x0000ff00) |
146 ((options >> 24) & 0x000000ff);
149 code = codestart = (uschar *)re + offset + count * size;
150 utf8 = (options & PCRE_UTF8) != 0;
158 fprintf(f, "%3d ", (int)(code - codestart));
162 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
163 fprintf(f, "%3d Bra extra\n", GET(code, 1));
165 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
166 code += _pcre_OP_lengths[OP_BRA];
173 fprintf(f, " %s\n", OP_names[*code]);
174 fprintf(f, "------------------------------------------------------------------\n");
178 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
187 code += 1 + print_char(f, code, utf8);
189 while (*code == OP_CHAR);
201 code += 1 + print_char(f, code, utf8);
203 while (*code == OP_CHARNC);
216 case OP_ASSERTBACK_NOT:
220 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
224 printf("%3d %s", GET2(code, 1), OP_names[*code]);
228 if (GET2(code, 1) == CREF_RECURSE)
229 fprintf(f, " Cond recurse");
231 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
245 case OP_TYPEMINQUERY:
247 if (*code >= OP_TYPESTAR)
249 fprintf(f, "%s", OP_names[code[1]]);
250 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
252 fprintf(f, " %s ", get_ucpname(code[2]));
256 else extra = print_char(f, code+1, utf8);
257 fprintf(f, "%s", OP_names[*code]);
264 extra = print_char(f, code+3, utf8);
266 if (*code != OP_EXACT) fprintf(f, ",");
267 fprintf(f, "%d}", GET2(code,1));
268 if (*code == OP_MINUPTO) fprintf(f, "?");
274 fprintf(f, " %s", OP_names[code[3]]);
275 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
277 fprintf(f, " %s ", get_ucpname(code[4]));
281 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
282 fprintf(f, "%d}", GET2(code,1));
283 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
287 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
288 else fprintf(f, " [^\\x%02x]", c);
297 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
298 else fprintf(f, " [^\\x%02x]", c);
299 fprintf(f, "%s", OP_names[*code]);
305 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
306 else fprintf(f, " [^\\x%02x]{", c);
307 if (*code != OP_NOTEXACT) fprintf(f, "0,");
308 fprintf(f, "%d}", GET2(code,1));
309 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
313 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
317 fprintf(f, " \\%d", GET2(code,1));
318 ccode = code + _pcre_OP_lengths[*code];
319 goto CLASS_REF_REPEAT;
322 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
323 GET(code, 2 + LINK_SIZE));
328 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
331 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
332 having this code always here, and it makes it less messy without all those
344 if (*code == OP_XCLASS)
346 extra = GET(code, 1);
347 ccode = code + LINK_SIZE + 1;
348 printmap = (*ccode & XCL_MAP) != 0;
349 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
357 /* Print a bit map */
361 for (i = 0; i < 256; i++)
363 if ((ccode[i/8] & (1 << (i&7))) != 0)
366 for (j = i+1; j < 256; j++)
367 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
368 if (i == '-' || i == ']') fprintf(f, "\\");
369 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
372 if (j != i + 1) fprintf(f, "-");
373 if (j == '-' || j == ']') fprintf(f, "\\");
374 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
382 /* For an XCLASS there is always some additional data */
384 if (*code == OP_XCLASS)
387 while ((ch = *ccode++) != XCL_END)
391 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
393 else if (ch == XCL_NOTPROP)
395 fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
399 ccode += 1 + print_char(f, ccode, TRUE);
403 ccode += 1 + print_char(f, ccode, TRUE);
409 /* Indicate a non-UTF8 class which was created by negation */
411 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
413 /* Handle repeats after a class or a back reference */
424 fprintf(f, "%s", OP_names[*ccode]);
425 extra += _pcre_OP_lengths[*ccode];
432 if (max == 0) fprintf(f, "{%d,}", min);
433 else fprintf(f, "{%d,%d}", min, max);
434 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
435 extra += _pcre_OP_lengths[*ccode];
441 /* Anything else is just an item with no data*/
444 fprintf(f, " %s", OP_names[*code]);
448 code += _pcre_OP_lengths[*code] + extra;
453 /* End of pcre_printint.c */