1 /*************************************************
2 * Perl-Compatible Regular Expressions *
3 *************************************************/
6 This is a library of functions to support regular expressions whose syntax
7 and semantics are as close as possible to those of the Perl 5 language. See
8 the file Tech.Notes for some information on the internals.
10 Written by: Philip Hazel <ph10@cam.ac.uk>
12 Copyright (c) 1997-2003 University of Cambridge
14 -----------------------------------------------------------------------------
15 Redistribution and use in source and binary forms, with or without
16 modification, are permitted provided that the following conditions are met:
18 * Redistributions of source code must retain the above copyright notice,
19 this list of conditions and the following disclaimer.
21 * Redistributions in binary form must reproduce the above copyright
22 notice, this list of conditions and the following disclaimer in the
23 documentation and/or other materials provided with the distribution.
25 * Neither the name of the University of Cambridge nor the names of its
26 contributors may be used to endorse or promote products derived from
27 this software without specific prior written permission.
29 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
30 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
33 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
34 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
35 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
36 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
37 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
38 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
39 POSSIBILITY OF SUCH DAMAGE.
40 -----------------------------------------------------------------------------
43 /* This module contains some convenience functions for extracting substrings
44 from the subject string after a regex match has succeeded. The original idea
45 for these functions came from Scott Wimer. */
48 /* Include the internals header, which itself includes Standard C headers plus
49 the external pcre header. */
54 /*************************************************
55 * Find number for named string *
56 *************************************************/
58 /* This function is used by the two extraction functions below, as well
59 as being generally available.
62 code the compiled regex
63 stringname the name whose number is required
65 Returns: the number of the named parentheses, or a negative number
66 (PCRE_ERROR_NOSUBSTRING) if not found
70 pcre_get_stringnumber(const pcre *code, const char *stringname)
77 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
79 if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
81 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
83 if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
89 int mid = (top + bot) / 2;
90 uschar *entry = nametable + entrysize*mid;
91 int c = strcmp(stringname, (char *)(entry + 2));
92 if (c == 0) return (entry[0] << 8) + entry[1];
93 if (c > 0) bot = mid + 1; else top = mid;
96 return PCRE_ERROR_NOSUBSTRING;
101 /*************************************************
102 * Copy captured string to given buffer *
103 *************************************************/
105 /* This function copies a single captured substring into a given buffer.
106 Note that we use memcpy() rather than strncpy() in case there are binary zeros
110 subject the subject string that was matched
111 ovector pointer to the offsets table
112 stringcount the number of substrings that were captured
113 (i.e. the yield of the pcre_exec call, unless
114 that was zero, in which case it should be 1/3
115 of the offset table size)
116 stringnumber the number of the required substring
117 buffer where to put the substring
118 size the size of the buffer
120 Returns: if successful:
121 the length of the copied string, not including the zero
122 that is put on the end; can be zero
124 PCRE_ERROR_NOMEMORY (-6) buffer too small
125 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
129 pcre_copy_substring(const char *subject, int *ovector, int stringcount,
130 int stringnumber, char *buffer, int size)
133 if (stringnumber < 0 || stringnumber >= stringcount)
134 return PCRE_ERROR_NOSUBSTRING;
136 yield = ovector[stringnumber+1] - ovector[stringnumber];
137 if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
138 memcpy(buffer, subject + ovector[stringnumber], yield);
145 /*************************************************
146 * Copy named captured string to given buffer *
147 *************************************************/
149 /* This function copies a single captured substring into a given buffer,
150 identifying it by name.
153 code the compiled regex
154 subject the subject string that was matched
155 ovector pointer to the offsets table
156 stringcount the number of substrings that were captured
157 (i.e. the yield of the pcre_exec call, unless
158 that was zero, in which case it should be 1/3
159 of the offset table size)
160 stringname the name of the required substring
161 buffer where to put the substring
162 size the size of the buffer
164 Returns: if successful:
165 the length of the copied string, not including the zero
166 that is put on the end; can be zero
168 PCRE_ERROR_NOMEMORY (-6) buffer too small
169 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
173 pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
174 int stringcount, const char *stringname, char *buffer, int size)
176 int n = pcre_get_stringnumber(code, stringname);
177 if (n <= 0) return n;
178 return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
183 /*************************************************
184 * Copy all captured strings to new store *
185 *************************************************/
187 /* This function gets one chunk of store and builds a list of pointers and all
188 of the captured substrings in it. A NULL pointer is put on the end of the list.
191 subject the subject string that was matched
192 ovector pointer to the offsets table
193 stringcount the number of substrings that were captured
194 (i.e. the yield of the pcre_exec call, unless
195 that was zero, in which case it should be 1/3
196 of the offset table size)
197 listptr set to point to the list of pointers
199 Returns: if successful: 0
201 PCRE_ERROR_NOMEMORY (-6) failed to get store
205 pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
206 const char ***listptr)
209 int size = sizeof(char *);
210 int double_count = stringcount * 2;
214 for (i = 0; i < double_count; i += 2)
215 size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
217 stringlist = (char **)(pcre_malloc)(size);
218 if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
220 *listptr = (const char **)stringlist;
221 p = (char *)(stringlist + stringcount + 1);
223 for (i = 0; i < double_count; i += 2)
225 int len = ovector[i+1] - ovector[i];
226 memcpy(p, subject + ovector[i], len);
238 /*************************************************
239 * Free store obtained by get_substring_list *
240 *************************************************/
242 /* This function exists for the benefit of people calling PCRE from non-C
243 programs that can call its functions, but not free() or (pcre_free)() directly.
245 Argument: the result of a previous pcre_get_substring_list()
250 pcre_free_substring_list(const char **pointer)
252 (pcre_free)((void *)pointer);
257 /*************************************************
258 * Copy captured string to new store *
259 *************************************************/
261 /* This function copies a single captured substring into a piece of new
265 subject the subject string that was matched
266 ovector pointer to the offsets table
267 stringcount the number of substrings that were captured
268 (i.e. the yield of the pcre_exec call, unless
269 that was zero, in which case it should be 1/3
270 of the offset table size)
271 stringnumber the number of the required substring
272 stringptr where to put a pointer to the substring
274 Returns: if successful:
275 the length of the string, not including the zero that
276 is put on the end; can be zero
278 PCRE_ERROR_NOMEMORY (-6) failed to get store
279 PCRE_ERROR_NOSUBSTRING (-7) substring not present
283 pcre_get_substring(const char *subject, int *ovector, int stringcount,
284 int stringnumber, const char **stringptr)
288 if (stringnumber < 0 || stringnumber >= stringcount)
289 return PCRE_ERROR_NOSUBSTRING;
291 yield = ovector[stringnumber+1] - ovector[stringnumber];
292 substring = (char *)(pcre_malloc)(yield + 1);
293 if (substring == NULL) return PCRE_ERROR_NOMEMORY;
294 memcpy(substring, subject + ovector[stringnumber], yield);
295 substring[yield] = 0;
296 *stringptr = substring;
302 /*************************************************
303 * Copy named captured string to new store *
304 *************************************************/
306 /* This function copies a single captured substring, identified by name, into
310 code the compiled regex
311 subject the subject string that was matched
312 ovector pointer to the offsets table
313 stringcount the number of substrings that were captured
314 (i.e. the yield of the pcre_exec call, unless
315 that was zero, in which case it should be 1/3
316 of the offset table size)
317 stringname the name of the required substring
318 stringptr where to put the pointer
320 Returns: if successful:
321 the length of the copied string, not including the zero
322 that is put on the end; can be zero
324 PCRE_ERROR_NOMEMORY (-6) couldn't get memory
325 PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
329 pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
330 int stringcount, const char *stringname, const char **stringptr)
332 int n = pcre_get_stringnumber(code, stringname);
333 if (n <= 0) return n;
334 return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
340 /*************************************************
341 * Free store obtained by get_substring *
342 *************************************************/
344 /* This function exists for the benefit of people calling PCRE from non-C
345 programs that can call its functions, but not free() or (pcre_free)() directly.
347 Argument: the result of a previous pcre_get_substring()
352 pcre_free_substring(const char *pointer)
354 (pcre_free)((void *)pointer);