1 : /*************************************************
2 : * Perl-Compatible Regular Expressions *
3 : *************************************************/
4 :
5 : /* PCRE is a library of functions to support regular expressions whose syntax
6 : and semantics are as close as possible to those of the Perl 5 language.
7 :
8 : Written by Philip Hazel
9 : Copyright (c) 1997-2006 University of Cambridge
10 :
11 : -----------------------------------------------------------------------------
12 : Redistribution and use in source and binary forms, with or without
13 : modification, are permitted provided that the following conditions are met:
14 :
15 : * Redistributions of source code must retain the above copyright notice,
16 : this list of conditions and the following disclaimer.
17 :
18 : * Redistributions in binary form must reproduce the above copyright
19 : notice, this list of conditions and the following disclaimer in the
20 : documentation and/or other materials provided with the distribution.
21 :
22 : * Neither the name of the University of Cambridge nor the names of its
23 : contributors may be used to endorse or promote products derived from
24 : this software without specific prior written permission.
25 :
26 : THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 : AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 : IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 : ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 : LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 : CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 : SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 : INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 : CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 : ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 : POSSIBILITY OF SUCH DAMAGE.
37 : -----------------------------------------------------------------------------
38 : */
39 :
40 :
41 : /* This module contains some convenience functions for extracting substrings
42 : from the subject string after a regex match has succeeded. The original idea
43 : for these functions came from Scott Wimer. */
44 :
45 :
46 : #include "pcre_internal.h"
47 :
48 :
49 : /*************************************************
50 : * Find number for named string *
51 : *************************************************/
52 :
53 : /* This function is used by the get_first_set() function below, as well
54 : as being generally available. It assumes that names are unique.
55 :
56 : Arguments:
57 : code the compiled regex
58 : stringname the name whose number is required
59 :
60 : Returns: the number of the named parentheses, or a negative number
61 : (PCRE_ERROR_NOSUBSTRING) if not found
62 : */
63 :
64 : int
65 : pcre_get_stringnumber(const pcre *code, const char *stringname)
66 0 : {
67 : int rc;
68 : int entrysize;
69 : int top, bot;
70 : uschar *nametable;
71 :
72 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
73 0 : return rc;
74 0 : if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
75 :
76 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
77 0 : return rc;
78 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
79 0 : return rc;
80 :
81 0 : bot = 0;
82 0 : while (top > bot)
83 : {
84 0 : int mid = (top + bot) / 2;
85 0 : uschar *entry = nametable + entrysize*mid;
86 0 : int c = strcmp(stringname, (char *)(entry + 2));
87 0 : if (c == 0) return (entry[0] << 8) + entry[1];
88 0 : if (c > 0) bot = mid + 1; else top = mid;
89 : }
90 :
91 0 : return PCRE_ERROR_NOSUBSTRING;
92 : }
93 :
94 :
95 :
96 : /*************************************************
97 : * Find (multiple) entries for named string *
98 : *************************************************/
99 :
100 : /* This is used by the get_first_set() function below, as well as being
101 : generally available. It is used when duplicated names are permitted.
102 :
103 : Arguments:
104 : code the compiled regex
105 : stringname the name whose entries required
106 : firstptr where to put the pointer to the first entry
107 : lastptr where to put the pointer to the last entry
108 :
109 : Returns: the length of each entry, or a negative number
110 : (PCRE_ERROR_NOSUBSTRING) if not found
111 : */
112 :
113 : int
114 : pcre_get_stringtable_entries(const pcre *code, const char *stringname,
115 : char **firstptr, char **lastptr)
116 0 : {
117 : int rc;
118 : int entrysize;
119 : int top, bot;
120 : uschar *nametable, *lastentry;
121 :
122 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMECOUNT, &top)) != 0)
123 0 : return rc;
124 0 : if (top <= 0) return PCRE_ERROR_NOSUBSTRING;
125 :
126 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMEENTRYSIZE, &entrysize)) != 0)
127 0 : return rc;
128 0 : if ((rc = pcre_fullinfo(code, NULL, PCRE_INFO_NAMETABLE, &nametable)) != 0)
129 0 : return rc;
130 :
131 0 : lastentry = nametable + entrysize * (top - 1);
132 0 : bot = 0;
133 0 : while (top > bot)
134 : {
135 0 : int mid = (top + bot) / 2;
136 0 : uschar *entry = nametable + entrysize*mid;
137 0 : int c = strcmp(stringname, (char *)(entry + 2));
138 0 : if (c == 0)
139 : {
140 0 : uschar *first = entry;
141 0 : uschar *last = entry;
142 0 : while (first > nametable)
143 : {
144 0 : if (strcmp(stringname, (char *)(first - entrysize + 2)) != 0) break;
145 0 : first -= entrysize;
146 : }
147 0 : while (last < lastentry)
148 : {
149 0 : if (strcmp(stringname, (char *)(last + entrysize + 2)) != 0) break;
150 0 : last += entrysize;
151 : }
152 0 : *firstptr = (char *)first;
153 0 : *lastptr = (char *)last;
154 0 : return entrysize;
155 : }
156 0 : if (c > 0) bot = mid + 1; else top = mid;
157 : }
158 :
159 0 : return PCRE_ERROR_NOSUBSTRING;
160 : }
161 :
162 :
163 :
164 : /*************************************************
165 : * Find first set of multiple named strings *
166 : *************************************************/
167 :
168 : /* This function allows for duplicate names in the table of named substrings.
169 : It returns the number of the first one that was set in a pattern match.
170 :
171 : Arguments:
172 : code the compiled regex
173 : stringname the name of the capturing substring
174 : ovector the vector of matched substrings
175 :
176 : Returns: the number of the first that is set,
177 : or the number of the last one if none are set,
178 : or a negative number on error
179 : */
180 :
181 : static int
182 : get_first_set(const pcre *code, const char *stringname, int *ovector)
183 0 : {
184 0 : const real_pcre *re = (const real_pcre *)code;
185 : int entrysize;
186 : char *first, *last;
187 : uschar *entry;
188 0 : if ((re->options & (PCRE_DUPNAMES | PCRE_JCHANGED)) == 0)
189 0 : return pcre_get_stringnumber(code, stringname);
190 0 : entrysize = pcre_get_stringtable_entries(code, stringname, &first, &last);
191 0 : if (entrysize <= 0) return entrysize;
192 0 : for (entry = (uschar *)first; entry <= (uschar *)last; entry += entrysize)
193 : {
194 0 : int n = (entry[0] << 8) + entry[1];
195 0 : if (ovector[n*2] >= 0) return n;
196 : }
197 0 : return (first[0] << 8) + first[1];
198 : }
199 :
200 :
201 :
202 :
203 : /*************************************************
204 : * Copy captured string to given buffer *
205 : *************************************************/
206 :
207 : /* This function copies a single captured substring into a given buffer.
208 : Note that we use memcpy() rather than strncpy() in case there are binary zeros
209 : in the string.
210 :
211 : Arguments:
212 : subject the subject string that was matched
213 : ovector pointer to the offsets table
214 : stringcount the number of substrings that were captured
215 : (i.e. the yield of the pcre_exec call, unless
216 : that was zero, in which case it should be 1/3
217 : of the offset table size)
218 : stringnumber the number of the required substring
219 : buffer where to put the substring
220 : size the size of the buffer
221 :
222 : Returns: if successful:
223 : the length of the copied string, not including the zero
224 : that is put on the end; can be zero
225 : if not successful:
226 : PCRE_ERROR_NOMEMORY (-6) buffer too small
227 : PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
228 : */
229 :
230 : int
231 : pcre_copy_substring(const char *subject, int *ovector, int stringcount,
232 : int stringnumber, char *buffer, int size)
233 0 : {
234 : int yield;
235 0 : if (stringnumber < 0 || stringnumber >= stringcount)
236 0 : return PCRE_ERROR_NOSUBSTRING;
237 0 : stringnumber *= 2;
238 0 : yield = ovector[stringnumber+1] - ovector[stringnumber];
239 0 : if (size < yield + 1) return PCRE_ERROR_NOMEMORY;
240 0 : memcpy(buffer, subject + ovector[stringnumber], yield);
241 0 : buffer[yield] = 0;
242 0 : return yield;
243 : }
244 :
245 :
246 :
247 : /*************************************************
248 : * Copy named captured string to given buffer *
249 : *************************************************/
250 :
251 : /* This function copies a single captured substring into a given buffer,
252 : identifying it by name. If the regex permits duplicate names, the first
253 : substring that is set is chosen.
254 :
255 : Arguments:
256 : code the compiled regex
257 : subject the subject string that was matched
258 : ovector pointer to the offsets table
259 : stringcount the number of substrings that were captured
260 : (i.e. the yield of the pcre_exec call, unless
261 : that was zero, in which case it should be 1/3
262 : of the offset table size)
263 : stringname the name of the required substring
264 : buffer where to put the substring
265 : size the size of the buffer
266 :
267 : Returns: if successful:
268 : the length of the copied string, not including the zero
269 : that is put on the end; can be zero
270 : if not successful:
271 : PCRE_ERROR_NOMEMORY (-6) buffer too small
272 : PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
273 : */
274 :
275 : int
276 : pcre_copy_named_substring(const pcre *code, const char *subject, int *ovector,
277 : int stringcount, const char *stringname, char *buffer, int size)
278 0 : {
279 0 : int n = get_first_set(code, stringname, ovector);
280 0 : if (n <= 0) return n;
281 0 : return pcre_copy_substring(subject, ovector, stringcount, n, buffer, size);
282 : }
283 :
284 :
285 :
286 : /*************************************************
287 : * Copy all captured strings to new store *
288 : *************************************************/
289 :
290 : /* This function gets one chunk of store and builds a list of pointers and all
291 : of the captured substrings in it. A NULL pointer is put on the end of the list.
292 :
293 : Arguments:
294 : subject the subject string that was matched
295 : ovector pointer to the offsets table
296 : stringcount the number of substrings that were captured
297 : (i.e. the yield of the pcre_exec call, unless
298 : that was zero, in which case it should be 1/3
299 : of the offset table size)
300 : listptr set to point to the list of pointers
301 :
302 : Returns: if successful: 0
303 : if not successful:
304 : PCRE_ERROR_NOMEMORY (-6) failed to get store
305 : */
306 :
307 : int
308 : pcre_get_substring_list(const char *subject, int *ovector, int stringcount,
309 : const char ***listptr)
310 366 : {
311 : int i;
312 366 : int size = sizeof(char *);
313 366 : int double_count = stringcount * 2;
314 : char **stringlist;
315 : char *p;
316 :
317 1098 : for (i = 0; i < double_count; i += 2)
318 732 : size += sizeof(char *) + ovector[i+1] - ovector[i] + 1;
319 :
320 366 : stringlist = (char **)(pcre_malloc)(size);
321 366 : if (stringlist == NULL) return PCRE_ERROR_NOMEMORY;
322 :
323 366 : *listptr = (const char **)stringlist;
324 366 : p = (char *)(stringlist + stringcount + 1);
325 :
326 1098 : for (i = 0; i < double_count; i += 2)
327 : {
328 732 : int len = ovector[i+1] - ovector[i];
329 732 : memcpy(p, subject + ovector[i], len);
330 732 : *stringlist++ = p;
331 732 : p += len;
332 732 : *p++ = 0;
333 : }
334 :
335 366 : *stringlist = NULL;
336 366 : return 0;
337 : }
338 :
339 :
340 :
341 : /*************************************************
342 : * Free store obtained by get_substring_list *
343 : *************************************************/
344 :
345 : /* This function exists for the benefit of people calling PCRE from non-C
346 : programs that can call its functions, but not free() or (pcre_free)() directly.
347 :
348 : Argument: the result of a previous pcre_get_substring_list()
349 : Returns: nothing
350 : */
351 :
352 : void
353 : pcre_free_substring_list(const char **pointer)
354 0 : {
355 0 : (pcre_free)((void *)pointer);
356 0 : }
357 :
358 :
359 :
360 : /*************************************************
361 : * Copy captured string to new store *
362 : *************************************************/
363 :
364 : /* This function copies a single captured substring into a piece of new
365 : store
366 :
367 : Arguments:
368 : subject the subject string that was matched
369 : ovector pointer to the offsets table
370 : stringcount the number of substrings that were captured
371 : (i.e. the yield of the pcre_exec call, unless
372 : that was zero, in which case it should be 1/3
373 : of the offset table size)
374 : stringnumber the number of the required substring
375 : stringptr where to put a pointer to the substring
376 :
377 : Returns: if successful:
378 : the length of the string, not including the zero that
379 : is put on the end; can be zero
380 : if not successful:
381 : PCRE_ERROR_NOMEMORY (-6) failed to get store
382 : PCRE_ERROR_NOSUBSTRING (-7) substring not present
383 : */
384 :
385 : int
386 : pcre_get_substring(const char *subject, int *ovector, int stringcount,
387 : int stringnumber, const char **stringptr)
388 0 : {
389 : int yield;
390 : char *substring;
391 0 : if (stringnumber < 0 || stringnumber >= stringcount)
392 0 : return PCRE_ERROR_NOSUBSTRING;
393 0 : stringnumber *= 2;
394 0 : yield = ovector[stringnumber+1] - ovector[stringnumber];
395 0 : substring = (char *)(pcre_malloc)(yield + 1);
396 0 : if (substring == NULL) return PCRE_ERROR_NOMEMORY;
397 0 : memcpy(substring, subject + ovector[stringnumber], yield);
398 0 : substring[yield] = 0;
399 0 : *stringptr = substring;
400 0 : return yield;
401 : }
402 :
403 :
404 :
405 : /*************************************************
406 : * Copy named captured string to new store *
407 : *************************************************/
408 :
409 : /* This function copies a single captured substring, identified by name, into
410 : new store. If the regex permits duplicate names, the first substring that is
411 : set is chosen.
412 :
413 : Arguments:
414 : code the compiled regex
415 : subject the subject string that was matched
416 : ovector pointer to the offsets table
417 : stringcount the number of substrings that were captured
418 : (i.e. the yield of the pcre_exec call, unless
419 : that was zero, in which case it should be 1/3
420 : of the offset table size)
421 : stringname the name of the required substring
422 : stringptr where to put the pointer
423 :
424 : Returns: if successful:
425 : the length of the copied string, not including the zero
426 : that is put on the end; can be zero
427 : if not successful:
428 : PCRE_ERROR_NOMEMORY (-6) couldn't get memory
429 : PCRE_ERROR_NOSUBSTRING (-7) no such captured substring
430 : */
431 :
432 : int
433 : pcre_get_named_substring(const pcre *code, const char *subject, int *ovector,
434 : int stringcount, const char *stringname, const char **stringptr)
435 0 : {
436 0 : int n = get_first_set(code, stringname, ovector);
437 0 : if (n <= 0) return n;
438 0 : return pcre_get_substring(subject, ovector, stringcount, n, stringptr);
439 : }
440 :
441 :
442 :
443 :
444 : /*************************************************
445 : * Free store obtained by get_substring *
446 : *************************************************/
447 :
448 : /* This function exists for the benefit of people calling PCRE from non-C
449 : programs that can call its functions, but not free() or (pcre_free)() directly.
450 :
451 : Argument: the result of a previous pcre_get_substring()
452 : Returns: nothing
453 : */
454 :
455 : void
456 : pcre_free_substring(const char *pointer)
457 0 : {
458 0 : (pcre_free)((void *)pointer);
459 0 : }
460 :
461 : /* End of pcre_get.c */
|