1 /* $Cambridge: exim/src/src/pcre/printint.c,v 1.2 2005/06/15 08:57:10 ph10 Exp $ */
3 /*************************************************
4 * Perl-Compatible Regular Expressions *
5 *************************************************/
8 This is a library of functions to support regular expressions whose syntax
9 and semantics are as close as possible to those of the Perl 5 language. See
10 the file Tech.Notes for some information on the internals.
12 Written by: Philip Hazel <ph10@cam.ac.uk>
14 Copyright (c) 1997-2004 University of Cambridge
16 -----------------------------------------------------------------------------
17 Redistribution and use in source and binary forms, with or without
18 modification, are permitted provided that the following conditions are met:
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
31 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41 POSSIBILITY OF SUCH DAMAGE.
42 -----------------------------------------------------------------------------
46 /* This module contains a debugging function for printing out the internal form
47 of a compiled regular expression. It is kept in a separate file so that it can
48 be #included both in the pcretest program, and in the library itself when
49 compiled with the debugging switch. */
52 static const char *OP_names[] = { OP_NAME_LIST };
55 /*************************************************
56 * Print single- or multi-byte character *
57 *************************************************/
59 /* These tables are actually copies of ones in pcre.c. If we compile the
60 library with debugging, they are included twice, but that isn't really a
61 problem - compiling with debugging is pretty rare and these are very small. */
63 static const int utf8_t3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
65 static const uschar utf8_t4[] = {
66 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
67 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
68 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
69 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
72 print_char(FILE *f, uschar *ptr, BOOL utf8)
76 if (!utf8 || (c & 0xc0) != 0xc0)
78 if (isprint(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);
84 int a = utf8_t4[c & 0x3f]; /* Number of additional bytes */
86 c = (c & utf8_t3[a]) << s;
87 for (i = 1; i <= a; i++)
89 /* This is a check for malformed UTF-8; it should only occur if the sanity
90 check has been turned off. Rather than swallow random bytes, just stop if
91 we hit a bad one. Print it with \X instead of \x as an indication. */
93 if ((ptr[i] & 0xc0) != 0x80)
95 fprintf(f, "\\X{%x}", c);
102 c |= (ptr[i] & 0x3f) << s;
104 if (c < 128) fprintf(f, "\\x%02x", c); else fprintf(f, "\\x{%x}", c);
112 /*************************************************
113 * Find Unicode property name *
114 *************************************************/
117 get_ucpname(int property)
121 for (i = sizeof(utt)/sizeof(ucp_type_table); i >= 0; i--)
123 if (property == utt[i].value) break;
125 return (i >= 0)? utt[i].name : "??";
133 /*************************************************
134 * Print compiled regex *
135 *************************************************/
137 /* Make this function work for a regex with integers either byte order.
138 However, we assume that what we are passed is a compiled regex. */
141 print_internals(pcre *external_re, FILE *f)
143 real_pcre *re = (real_pcre *)external_re;
144 uschar *codestart, *code;
147 unsigned int options = re->options;
148 int offset = re->name_table_offset;
149 int count = re->name_count;
150 int size = re->name_entry_size;
152 if (re->magic_number != MAGIC_NUMBER)
154 offset = ((offset << 8) & 0xff00) | ((offset >> 8) & 0xff);
155 count = ((count << 8) & 0xff00) | ((count >> 8) & 0xff);
156 size = ((size << 8) & 0xff00) | ((size >> 8) & 0xff);
157 options = ((options << 24) & 0xff000000) |
158 ((options << 8) & 0x00ff0000) |
159 ((options >> 8) & 0x0000ff00) |
160 ((options >> 24) & 0x000000ff);
163 code = codestart = (uschar *)re + offset + count * size;
164 utf8 = (options & PCRE_UTF8) != 0;
172 fprintf(f, "%3d ", (int)(code - codestart));
176 if (*code - OP_BRA > EXTRACT_BASIC_MAX)
177 fprintf(f, "%3d Bra extra\n", GET(code, 1));
179 fprintf(f, "%3d Bra %d\n", GET(code, 1), *code - OP_BRA);
180 code += OP_lengths[OP_BRA];
187 fprintf(f, " %s\n", OP_names[*code]);
188 fprintf(f, "------------------------------------------------------------------\n");
192 fprintf(f, " %.2x %s", code[1], OP_names[*code]);
201 code += 1 + print_char(f, code, utf8);
203 while (*code == OP_CHAR);
215 code += 1 + print_char(f, code, utf8);
217 while (*code == OP_CHARNC);
230 case OP_ASSERTBACK_NOT:
234 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
238 printf("%3d %s", GET2(code, 1), OP_names[*code]);
242 if (GET2(code, 1) == CREF_RECURSE)
243 fprintf(f, " Cond recurse");
245 fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);
259 case OP_TYPEMINQUERY:
261 if (*code >= OP_TYPESTAR)
263 fprintf(f, "%s", OP_names[code[1]]);
264 if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
266 fprintf(f, " %s ", get_ucpname(code[2]));
270 else extra = print_char(f, code+1, utf8);
271 fprintf(f, "%s", OP_names[*code]);
278 extra = print_char(f, code+3, utf8);
280 if (*code != OP_EXACT) fprintf(f, ",");
281 fprintf(f, "%d}", GET2(code,1));
282 if (*code == OP_MINUPTO) fprintf(f, "?");
288 fprintf(f, " %s", OP_names[code[3]]);
289 if (code[3] == OP_PROP || code[3] == OP_NOTPROP)
291 fprintf(f, " %s ", get_ucpname(code[4]));
295 if (*code != OP_TYPEEXACT) fprintf(f, "0,");
296 fprintf(f, "%d}", GET2(code,1));
297 if (*code == OP_TYPEMINUPTO) fprintf(f, "?");
301 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
302 else fprintf(f, " [^\\x%02x]", c);
311 if (isprint(c = code[1])) fprintf(f, " [^%c]", c);
312 else fprintf(f, " [^\\x%02x]", c);
313 fprintf(f, "%s", OP_names[*code]);
319 if (isprint(c = code[3])) fprintf(f, " [^%c]{", c);
320 else fprintf(f, " [^\\x%02x]{", c);
321 if (*code != OP_NOTEXACT) fprintf(f, ",");
322 fprintf(f, "%d}", GET2(code,1));
323 if (*code == OP_NOTMINUPTO) fprintf(f, "?");
327 fprintf(f, "%3d %s", GET(code, 1), OP_names[*code]);
331 fprintf(f, " \\%d", GET2(code,1));
332 ccode = code + OP_lengths[*code];
333 goto CLASS_REF_REPEAT;
336 fprintf(f, " %s %d %d %d", OP_names[*code], code[1], GET(code,2),
337 GET(code, 2 + LINK_SIZE));
342 fprintf(f, " %s %s", OP_names[*code], get_ucpname(code[1]));
345 /* OP_XCLASS can only occur in UTF-8 mode. However, there's no harm in
346 having this code always here, and it makes it less messy without all those
358 if (*code == OP_XCLASS)
360 extra = GET(code, 1);
361 ccode = code + LINK_SIZE + 1;
362 printmap = (*ccode & XCL_MAP) != 0;
363 if ((*ccode++ & XCL_NOT) != 0) fprintf(f, "^");
371 /* Print a bit map */
375 for (i = 0; i < 256; i++)
377 if ((ccode[i/8] & (1 << (i&7))) != 0)
380 for (j = i+1; j < 256; j++)
381 if ((ccode[j/8] & (1 << (j&7))) == 0) break;
382 if (i == '-' || i == ']') fprintf(f, "\\");
383 if (isprint(i)) fprintf(f, "%c", i); else fprintf(f, "\\x%02x", i);
386 if (j != i + 1) fprintf(f, "-");
387 if (j == '-' || j == ']') fprintf(f, "\\");
388 if (isprint(j)) fprintf(f, "%c", j); else fprintf(f, "\\x%02x", j);
396 /* For an XCLASS there is always some additional data */
398 if (*code == OP_XCLASS)
401 while ((ch = *ccode++) != XCL_END)
405 fprintf(f, "\\p{%s}", get_ucpname(*ccode++));
407 else if (ch == XCL_NOTPROP)
409 fprintf(f, "\\P{%s}", get_ucpname(*ccode++));
413 ccode += 1 + print_char(f, ccode, TRUE);
417 ccode += 1 + print_char(f, ccode, TRUE);
423 /* Indicate a non-UTF8 class which was created by negation */
425 fprintf(f, "]%s", (*code == OP_NCLASS)? " (neg)" : "");
427 /* Handle repeats after a class or a back reference */
438 fprintf(f, "%s", OP_names[*ccode]);
439 extra += OP_lengths[*ccode];
446 if (max == 0) fprintf(f, "{%d,}", min);
447 else fprintf(f, "{%d,%d}", min, max);
448 if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
449 extra += OP_lengths[*ccode];
455 /* Anything else is just an item with no data*/
458 fprintf(f, " %s", OP_names[*code]);
462 code += OP_lengths[*code] + extra;
467 /* End of printint.c */