1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2007 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Andrei Zmievski <andrei@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: php_pcre.c,v 1.168.2.9.2.16 2007/03/14 23:47:44 tony2001 Exp $ */
20 :
21 : #ifdef HAVE_CONFIG_H
22 : #include "config.h"
23 : #endif
24 :
25 : #include "php.h"
26 : #include "php_ini.h"
27 : #include "php_globals.h"
28 : #include "php_pcre.h"
29 : #include "ext/standard/info.h"
30 : #include "ext/standard/php_smart_str.h"
31 :
32 : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
33 :
34 : #include "ext/standard/php_string.h"
35 :
36 : #define PREG_PATTERN_ORDER 1
37 : #define PREG_SET_ORDER 2
38 : #define PREG_OFFSET_CAPTURE (1<<8)
39 :
40 : #define PREG_SPLIT_NO_EMPTY (1<<0)
41 : #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
42 : #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
43 :
44 : #define PREG_REPLACE_EVAL (1<<0)
45 :
46 : #define PREG_GREP_INVERT (1<<0)
47 :
48 : #define PCRE_CACHE_SIZE 4096
49 :
50 : enum {
51 : PHP_PCRE_NO_ERROR = 0,
52 : PHP_PCRE_INTERNAL_ERROR,
53 : PHP_PCRE_BACKTRACK_LIMIT_ERROR,
54 : PHP_PCRE_RECURSION_LIMIT_ERROR,
55 : PHP_PCRE_BAD_UTF8_ERROR,
56 : };
57 :
58 :
59 : ZEND_DECLARE_MODULE_GLOBALS(pcre);
60 :
61 :
62 : static void pcre_handle_exec_error(int pcre_code TSRMLS_DC)
63 0 : {
64 0 : int preg_code = 0;
65 :
66 0 : switch (pcre_code) {
67 : case PCRE_ERROR_MATCHLIMIT:
68 0 : preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
69 0 : break;
70 :
71 : case PCRE_ERROR_RECURSIONLIMIT:
72 0 : preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
73 0 : break;
74 :
75 : case PCRE_ERROR_BADUTF8:
76 0 : preg_code = PHP_PCRE_BAD_UTF8_ERROR;
77 0 : break;
78 :
79 : default:
80 0 : preg_code = PHP_PCRE_INTERNAL_ERROR;
81 : break;
82 : }
83 :
84 0 : PCRE_G(error_code) = preg_code;
85 0 : }
86 :
87 :
88 : static void php_free_pcre_cache(void *data)
89 184 : {
90 184 : pcre_cache_entry *pce = (pcre_cache_entry *) data;
91 184 : if (!pce) return;
92 184 : pefree(pce->re, 1);
93 184 : if (pce->extra) pefree(pce->extra, 1);
94 : #if HAVE_SETLOCALE
95 184 : if ((void*)pce->tables) pefree((void*)pce->tables, 1);
96 184 : pefree(pce->locale, 1);
97 : #endif
98 : }
99 :
100 :
101 : static PHP_GINIT_FUNCTION(pcre)
102 220 : {
103 220 : zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
104 220 : pcre_globals->backtrack_limit = 0;
105 220 : pcre_globals->recursion_limit = 0;
106 220 : pcre_globals->error_code = PHP_PCRE_NO_ERROR;
107 220 : }
108 :
109 : static PHP_GSHUTDOWN_FUNCTION(pcre)
110 219 : {
111 219 : zend_hash_destroy(&pcre_globals->pcre_cache);
112 219 : }
113 :
114 : PHP_INI_BEGIN()
115 : STD_PHP_INI_ENTRY("pcre.backtrack_limit", "100000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
116 : STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
117 : PHP_INI_END()
118 :
119 :
120 : /* {{{ PHP_MINFO_FUNCTION(pcre) */
121 : static PHP_MINFO_FUNCTION(pcre)
122 0 : {
123 0 : php_info_print_table_start();
124 0 : php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
125 0 : php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
126 0 : php_info_print_table_end();
127 0 : }
128 : /* }}} */
129 :
130 : /* {{{ PHP_MINIT_FUNCTION(pcre) */
131 : static PHP_MINIT_FUNCTION(pcre)
132 220 : {
133 220 : REGISTER_INI_ENTRIES();
134 :
135 220 : REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
136 220 : REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
137 220 : REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
138 220 : REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
139 220 : REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
140 220 : REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
141 220 : REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
142 :
143 220 : REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
144 220 : REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
145 220 : REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
146 220 : REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
147 220 : REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
148 :
149 220 : return SUCCESS;
150 : }
151 : /* }}} */
152 :
153 : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
154 : static PHP_MSHUTDOWN_FUNCTION(pcre)
155 219 : {
156 219 : UNREGISTER_INI_ENTRIES();
157 :
158 219 : return SUCCESS;
159 : }
160 : /* }}} */
161 :
162 : /* {{{ static pcre_clean_cache */
163 : static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
164 0 : {
165 0 : int *num_clean = (int *)arg;
166 :
167 0 : if (*num_clean > 0) {
168 0 : (*num_clean)--;
169 0 : return 1;
170 : } else {
171 0 : return 0;
172 : }
173 : }
174 : /* }}} */
175 :
176 : /* {{{ pcre_get_compiled_regex_cache
177 : */
178 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
179 21253 : {
180 21253 : pcre *re = NULL;
181 : pcre_extra *extra;
182 21253 : int coptions = 0;
183 21253 : int soptions = 0;
184 : const char *error;
185 : int erroffset;
186 : char delimiter;
187 : char start_delimiter;
188 : char end_delimiter;
189 : char *p, *pp;
190 : char *pattern;
191 21253 : int do_study = 0;
192 21253 : int poptions = 0;
193 21253 : unsigned const char *tables = NULL;
194 : #if HAVE_SETLOCALE
195 21253 : char *locale = setlocale(LC_CTYPE, NULL);
196 : #endif
197 : pcre_cache_entry *pce;
198 : pcre_cache_entry new_entry;
199 :
200 : /* Try to lookup the cached regex entry, and if successful, just pass
201 : back the compiled pattern, otherwise go on and compile it. */
202 21253 : regex_len = strlen(regex);
203 21253 : if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
204 : /*
205 : * We use a quick pcre_info() check to see whether cache is corrupted, and if it
206 : * is, we flush it and compile the pattern from scratch.
207 : */
208 21069 : if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
209 0 : zend_hash_clean(&PCRE_G(pcre_cache));
210 : } else {
211 : #if HAVE_SETLOCALE
212 21069 : if (!strcmp(pce->locale, locale)) {
213 : #endif
214 21069 : return pce;
215 : #if HAVE_SETLOCALE
216 : }
217 : #endif
218 : }
219 : }
220 :
221 184 : p = regex;
222 :
223 : /* Parse through the leading whitespace, and display a warning if we
224 : get to the end without encountering a delimiter. */
225 184 : while (isspace((int)*(unsigned char *)p)) p++;
226 184 : if (*p == 0) {
227 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
228 0 : return NULL;
229 : }
230 :
231 : /* Get the delimiter and display a warning if it is alphanumeric
232 : or a backslash. */
233 184 : delimiter = *p++;
234 184 : if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
235 0 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
236 0 : return NULL;
237 : }
238 :
239 184 : start_delimiter = delimiter;
240 184 : if ((pp = strchr("([{< )]}> )]}>", delimiter)))
241 0 : delimiter = pp[5];
242 184 : end_delimiter = delimiter;
243 :
244 184 : if (start_delimiter == end_delimiter) {
245 : /* We need to iterate through the pattern, searching for the ending delimiter,
246 : but skipping the backslashed delimiters. If the ending delimiter is not
247 : found, display a warning. */
248 184 : pp = p;
249 38631 : while (*pp != 0) {
250 38447 : if (*pp == '\\' && pp[1] != 0) pp++;
251 34625 : else if (*pp == delimiter)
252 184 : break;
253 38263 : pp++;
254 : }
255 184 : if (*pp == 0) {
256 0 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
257 0 : return NULL;
258 : }
259 : } else {
260 : /* We iterate through the pattern, searching for the matching ending
261 : * delimiter. For each matching starting delimiter, we increment nesting
262 : * level, and decrement it for each matching ending delimiter. If we
263 : * reach the end of the pattern without matching, display a warning.
264 : */
265 0 : int brackets = 1; /* brackets nesting level */
266 0 : pp = p;
267 0 : while (*pp != 0) {
268 0 : if (*pp == '\\' && pp[1] != 0) pp++;
269 0 : else if (*pp == end_delimiter && --brackets <= 0)
270 : break;
271 0 : else if (*pp == start_delimiter)
272 0 : brackets++;
273 0 : pp++;
274 : }
275 0 : if (*pp == 0) {
276 0 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
277 0 : return NULL;
278 : }
279 : }
280 :
281 : /* Make a copy of the actual pattern. */
282 184 : pattern = estrndup(p, pp-p);
283 :
284 : /* Move on to the options */
285 184 : pp++;
286 :
287 : /* Parse through the options, setting appropriate flags. Display
288 : a warning if we encounter an unknown modifier. */
289 548 : while (*pp != 0) {
290 180 : switch (*pp++) {
291 : /* Perl compatible options */
292 1 : case 'i': coptions |= PCRE_CASELESS; break;
293 0 : case 'm': coptions |= PCRE_MULTILINE; break;
294 179 : case 's': coptions |= PCRE_DOTALL; break;
295 0 : case 'x': coptions |= PCRE_EXTENDED; break;
296 :
297 : /* PCRE specific options */
298 0 : case 'A': coptions |= PCRE_ANCHORED; break;
299 0 : case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
300 0 : case 'S': do_study = 1; break;
301 0 : case 'U': coptions |= PCRE_UNGREEDY; break;
302 0 : case 'X': coptions |= PCRE_EXTRA; break;
303 0 : case 'u': coptions |= PCRE_UTF8; break;
304 :
305 : /* Custom preg options */
306 0 : case 'e': poptions |= PREG_REPLACE_EVAL; break;
307 :
308 : case ' ':
309 : case '\n':
310 0 : break;
311 :
312 : default:
313 0 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
314 0 : efree(pattern);
315 0 : return NULL;
316 : }
317 : }
318 :
319 : #if HAVE_SETLOCALE
320 184 : if (strcmp(locale, "C"))
321 184 : tables = pcre_maketables();
322 : #endif
323 :
324 : /* Compile pattern and display a warning if compilation failed. */
325 184 : re = pcre_compile(pattern,
326 : coptions,
327 : &error,
328 : &erroffset,
329 : tables);
330 :
331 184 : if (re == NULL) {
332 0 : php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
333 0 : efree(pattern);
334 0 : if (tables) {
335 0 : pefree((void*)tables, 1);
336 : }
337 0 : return NULL;
338 : }
339 :
340 : /* If study option was specified, study the pattern and
341 : store the result in extra for passing to pcre_exec. */
342 184 : if (do_study) {
343 0 : extra = pcre_study(re, soptions, &error);
344 0 : if (extra) {
345 0 : extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
346 : }
347 0 : if (error != NULL) {
348 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
349 : }
350 : } else {
351 184 : extra = NULL;
352 : }
353 :
354 184 : efree(pattern);
355 :
356 : /*
357 : * If we reached cache limit, clean out the items from the head of the list;
358 : * these are supposedly the oldest ones (but not necessarily the least used
359 : * ones).
360 : */
361 184 : if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
362 0 : int num_clean = PCRE_CACHE_SIZE / 8;
363 0 : zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
364 : }
365 :
366 : /* Store the compiled pattern and extra info in the cache. */
367 184 : new_entry.re = re;
368 184 : new_entry.extra = extra;
369 184 : new_entry.preg_options = poptions;
370 184 : new_entry.compile_options = coptions;
371 : #if HAVE_SETLOCALE
372 184 : new_entry.locale = pestrdup(locale, 1);
373 184 : new_entry.tables = tables;
374 : #endif
375 184 : zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
376 : sizeof(pcre_cache_entry), (void**)&pce);
377 :
378 184 : return pce;
379 : }
380 : /* }}} */
381 :
382 : /* {{{ pcre_get_compiled_regex
383 : */
384 : PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
385 0 : {
386 0 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
387 :
388 0 : if (extra) {
389 0 : *extra = pce ? pce->extra : NULL;
390 : }
391 0 : if (preg_options) {
392 0 : *preg_options = pce ? pce->preg_options : 0;
393 : }
394 :
395 0 : return pce ? pce->re : NULL;
396 : }
397 : /* }}} */
398 :
399 : /* {{{ pcre_get_compiled_regex_ex
400 : */
401 : PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
402 0 : {
403 0 : pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
404 :
405 0 : if (extra) {
406 0 : *extra = pce ? pce->extra : NULL;
407 : }
408 0 : if (preg_options) {
409 0 : *preg_options = pce ? pce->preg_options : 0;
410 : }
411 0 : if (compile_options) {
412 0 : *compile_options = pce ? pce->compile_options : 0;
413 : }
414 :
415 0 : return pce ? pce->re : NULL;
416 : }
417 : /* }}} */
418 :
419 : /* {{{ add_offset_pair */
420 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
421 0 : {
422 : zval *match_pair;
423 :
424 0 : ALLOC_ZVAL(match_pair);
425 0 : array_init(match_pair);
426 0 : INIT_PZVAL(match_pair);
427 :
428 : /* Add (match, offset) to the return value */
429 0 : add_next_index_stringl(match_pair, str, len, 1);
430 0 : add_next_index_long(match_pair, offset);
431 :
432 0 : if (name) {
433 0 : zval_add_ref(&match_pair);
434 0 : zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
435 : }
436 0 : zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
437 0 : }
438 : /* }}} */
439 :
440 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
441 21144 : {
442 : /* parameters */
443 : char *regex; /* Regular expression */
444 : char *subject; /* String to match against */
445 : int regex_len;
446 : int subject_len;
447 : pcre_cache_entry *pce; /* Compiled regular expression */
448 21144 : zval *subpats = NULL; /* Array for subpatterns */
449 21144 : long flags = 0; /* Match control flags */
450 21144 : long start_offset = 0; /* Where the new search starts */
451 :
452 21144 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), ®ex, ®ex_len,
453 : &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
454 0 : RETURN_FALSE;
455 : }
456 :
457 : /* Compile regex or get it from cache. */
458 21144 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
459 0 : RETURN_FALSE;
460 : }
461 :
462 21144 : php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
463 : global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
464 : }
465 :
466 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
467 : zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
468 21144 : {
469 : zval *result_set, /* Holds a set of subpatterns after
470 : a global match */
471 21144 : **match_sets = NULL; /* An array of sets of matches for each
472 : subpattern after a global match */
473 21144 : pcre_extra *extra = pce->extra;/* Holds results of studying */
474 : pcre_extra extra_data; /* Used locally for exec options */
475 21144 : int exoptions = 0; /* Execution options */
476 21144 : int count = 0; /* Count of matched subpatterns */
477 : int *offsets; /* Array of subpattern offsets */
478 : int num_subpats; /* Number of captured subpatterns */
479 : int size_offsets; /* Size of the offsets array */
480 : int matched; /* Has anything matched */
481 21144 : int g_notempty = 0; /* If the match should not be empty */
482 : const char **stringlist; /* Holds list of subpatterns */
483 : char *match; /* The current match */
484 21144 : char **subpat_names = NULL;/* Array for named subpatterns */
485 : int i, rc;
486 : int subpats_order; /* Order of subpattern matches */
487 : int offset_capture; /* Capture match offsets: yes/no */
488 :
489 : /* Overwrite the passed-in value for subpatterns with an empty array. */
490 21144 : if (subpats != NULL) {
491 4501 : zval_dtor(subpats);
492 4501 : array_init(subpats);
493 : }
494 :
495 21144 : subpats_order = global ? PREG_PATTERN_ORDER : 0;
496 :
497 21144 : if (use_flags) {
498 0 : offset_capture = flags & PREG_OFFSET_CAPTURE;
499 :
500 : /*
501 : * subpats_order is pre-set to pattern mode so we change it only if
502 : * necessary.
503 : */
504 0 : if (flags & 0xff) {
505 0 : subpats_order = flags & 0xff;
506 : }
507 0 : if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
508 : (!global && subpats_order != 0)) {
509 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
510 0 : return;
511 : }
512 : } else {
513 21144 : offset_capture = 0;
514 : }
515 :
516 : /* Negative offset counts from the end of the string. */
517 21144 : if (start_offset < 0) {
518 0 : start_offset = subject_len + start_offset;
519 0 : if (start_offset < 0) {
520 0 : start_offset = 0;
521 : }
522 : }
523 :
524 21144 : if (extra == NULL) {
525 21144 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
526 21144 : extra = &extra_data;
527 : }
528 21144 : extra->match_limit = PCRE_G(backtrack_limit);
529 21144 : extra->match_limit_recursion = PCRE_G(recursion_limit);
530 :
531 : /* Calculate the size of the offsets array, and allocate memory for it. */
532 21144 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
533 21144 : if (rc < 0) {
534 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
535 0 : RETURN_FALSE;
536 : }
537 21144 : num_subpats++;
538 21144 : size_offsets = num_subpats * 3;
539 21144 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
540 :
541 : /*
542 : * Build a mapping from subpattern numbers to their names. We will always
543 : * allocate the table, even though there may be no named subpatterns. This
544 : * avoids somewhat more complicated logic in the inner loops.
545 : */
546 21144 : subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0);
547 21144 : memset(subpat_names, 0, sizeof(char *) * num_subpats);
548 : {
549 21144 : int name_cnt = 0, name_size, ni = 0;
550 : char *name_table;
551 : unsigned short name_idx;
552 :
553 21144 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
554 21144 : if (rc < 0) {
555 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
556 0 : efree(offsets);
557 0 : efree(subpat_names);
558 0 : RETURN_FALSE;
559 : }
560 21144 : if (name_cnt > 0) {
561 : int rc1, rc2;
562 :
563 0 : rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
564 0 : rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
565 0 : rc = rc2 ? rc2 : rc1;
566 0 : if (rc < 0) {
567 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
568 0 : efree(offsets);
569 0 : efree(subpat_names);
570 0 : RETURN_FALSE;
571 : }
572 :
573 0 : while (ni++ < name_cnt) {
574 0 : name_idx = 0xff * name_table[0] + name_table[1];
575 0 : subpat_names[name_idx] = name_table + 2;
576 0 : if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
577 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
578 0 : efree(offsets);
579 0 : efree(subpat_names);
580 0 : RETURN_FALSE;
581 : }
582 0 : name_table += name_size;
583 : }
584 : }
585 : }
586 :
587 : /* Allocate match sets array and initialize the values. */
588 21144 : if (global && subpats_order == PREG_PATTERN_ORDER) {
589 0 : match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
590 0 : for (i=0; i<num_subpats; i++) {
591 0 : ALLOC_ZVAL(match_sets[i]);
592 0 : array_init(match_sets[i]);
593 0 : INIT_PZVAL(match_sets[i]);
594 : }
595 : }
596 :
597 21144 : match = NULL;
598 21144 : matched = 0;
599 21144 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
600 :
601 : do {
602 : /* Execute the regular expression. */
603 21144 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
604 : exoptions|g_notempty, offsets, size_offsets);
605 :
606 : /* Check for too many substrings condition. */
607 21144 : if (count == 0) {
608 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
609 0 : count = size_offsets/3;
610 : }
611 :
612 : /* If something has matched */
613 21144 : if (count > 0) {
614 4784 : matched++;
615 4784 : match = subject + offsets[0];
616 :
617 : /* If subpatterns array has been passed, fill it in with values. */
618 4784 : if (subpats != NULL) {
619 : /* Try to get the list of substrings and display a warning if failed. */
620 366 : if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
621 0 : efree(subpat_names);
622 0 : efree(offsets);
623 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
624 0 : RETURN_FALSE;
625 : }
626 :
627 366 : if (global) { /* global pattern matching */
628 0 : if (subpats_order == PREG_PATTERN_ORDER) {
629 : /* For each subpattern, insert it into the appropriate array. */
630 0 : for (i = 0; i < count; i++) {
631 0 : if (offset_capture) {
632 0 : add_offset_pair(match_sets[i], (char *)stringlist[i],
633 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
634 : } else {
635 0 : add_next_index_stringl(match_sets[i], (char *)stringlist[i],
636 : offsets[(i<<1)+1] - offsets[i<<1], 1);
637 : }
638 : }
639 : /*
640 : * If the number of captured subpatterns on this run is
641 : * less than the total possible number, pad the result
642 : * arrays with empty strings.
643 : */
644 0 : if (count < num_subpats) {
645 0 : for (; i < num_subpats; i++) {
646 0 : add_next_index_string(match_sets[i], "", 1);
647 : }
648 : }
649 : } else {
650 : /* Allocate the result set array */
651 0 : ALLOC_ZVAL(result_set);
652 0 : array_init(result_set);
653 0 : INIT_PZVAL(result_set);
654 :
655 : /* Add all the subpatterns to it */
656 0 : for (i = 0; i < count; i++) {
657 0 : if (offset_capture) {
658 0 : add_offset_pair(result_set, (char *)stringlist[i],
659 : offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
660 : } else {
661 0 : if (subpat_names[i]) {
662 0 : add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
663 : offsets[(i<<1)+1] - offsets[i<<1], 1);
664 : }
665 0 : add_next_index_stringl(result_set, (char *)stringlist[i],
666 : offsets[(i<<1)+1] - offsets[i<<1], 1);
667 : }
668 : }
669 : /* And add it to the output array */
670 0 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
671 : }
672 : } else { /* single pattern matching */
673 : /* For each subpattern, insert it into the subpatterns array. */
674 1098 : for (i = 0; i < count; i++) {
675 732 : if (offset_capture) {
676 0 : add_offset_pair(subpats, (char *)stringlist[i],
677 : offsets[(i<<1)+1] - offsets[i<<1],
678 : offsets[i<<1], subpat_names[i]);
679 : } else {
680 732 : if (subpat_names[i]) {
681 0 : add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
682 : offsets[(i<<1)+1] - offsets[i<<1], 1);
683 : }
684 732 : add_next_index_stringl(subpats, (char *)stringlist[i],
685 : offsets[(i<<1)+1] - offsets[i<<1], 1);
686 : }
687 : }
688 : }
689 :
690 366 : pcre_free((void *) stringlist);
691 : }
692 16360 : } else if (count == PCRE_ERROR_NOMATCH) {
693 : /* If we previously set PCRE_NOTEMPTY after a null match,
694 : this is not necessarily the end. We need to advance
695 : the start offset, and continue. Fudge the offset values
696 : to achieve this, unless we're already at the end of the string. */
697 16360 : if (g_notempty != 0 && start_offset < subject_len) {
698 0 : offsets[0] = start_offset;
699 0 : offsets[1] = start_offset + 1;
700 : } else
701 : break;
702 : } else {
703 0 : pcre_handle_exec_error(count TSRMLS_CC);
704 0 : break;
705 : }
706 :
707 : /* If we have matched an empty string, mimic what Perl's /g options does.
708 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
709 : the match again at the same point. If this fails (picked up above) we
710 : advance to the next character. */
711 4784 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
712 :
713 : /* Advance to the position right after the last full match */
714 4784 : start_offset = offsets[1];
715 4784 : } while (global);
716 :
717 : /* Add the match sets to the output array and clean up */
718 21144 : if (global && subpats_order == PREG_PATTERN_ORDER) {
719 0 : for (i = 0; i < num_subpats; i++) {
720 0 : if (subpat_names[i]) {
721 0 : zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
722 : strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
723 0 : ZVAL_ADDREF(match_sets[i]);
724 : }
725 0 : zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
726 : }
727 0 : efree(match_sets);
728 : }
729 :
730 21144 : efree(offsets);
731 21144 : efree(subpat_names);
732 :
733 21144 : RETVAL_LONG(matched);
734 : }
735 : /* }}} */
736 :
737 : /* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags [, int offset]]])
738 : Perform a Perl-style regular expression match */
739 : PHP_FUNCTION(preg_match)
740 21144 : {
741 21144 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
742 21144 : }
743 : /* }}} */
744 :
745 : /* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags [, int offset]])
746 : Perform a Perl-style global regular expression match */
747 : PHP_FUNCTION(preg_match_all)
748 0 : {
749 0 : php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
750 0 : }
751 : /* }}} */
752 :
753 : /* {{{ preg_get_backref
754 : */
755 : static int preg_get_backref(char **str, int *backref)
756 0 : {
757 0 : register char in_brace = 0;
758 0 : register char *walk = *str;
759 :
760 0 : if (walk[1] == 0)
761 0 : return 0;
762 :
763 0 : if (*walk == '$' && walk[1] == '{') {
764 0 : in_brace = 1;
765 0 : walk++;
766 : }
767 0 : walk++;
768 :
769 0 : if (*walk >= '0' && *walk <= '9') {
770 0 : *backref = *walk - '0';
771 0 : walk++;
772 : } else
773 0 : return 0;
774 :
775 0 : if (*walk && *walk >= '0' && *walk <= '9') {
776 0 : *backref = *backref * 10 + *walk - '0';
777 0 : walk++;
778 : }
779 :
780 0 : if (in_brace) {
781 0 : if (*walk == 0 || *walk != '}')
782 0 : return 0;
783 : else
784 0 : walk++;
785 : }
786 :
787 0 : *str = walk;
788 0 : return 1;
789 : }
790 : /* }}} */
791 :
792 : /* {{{ preg_do_repl_func
793 : */
794 : static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC)
795 0 : {
796 : zval *retval_ptr; /* Function return value */
797 : zval **args[1]; /* Argument to pass to function */
798 : zval *subpats; /* Captured subpatterns */
799 : int result_len; /* Return value length */
800 : int i;
801 :
802 0 : MAKE_STD_ZVAL(subpats);
803 0 : array_init(subpats);
804 0 : for (i = 0; i < count; i++)
805 0 : add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
806 0 : args[0] = &subpats;
807 :
808 0 : if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
809 0 : convert_to_string_ex(&retval_ptr);
810 0 : *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
811 0 : result_len = Z_STRLEN_P(retval_ptr);
812 0 : zval_ptr_dtor(&retval_ptr);
813 : } else {
814 0 : if (!EG(exception)) {
815 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
816 : }
817 0 : result_len = offsets[1] - offsets[0];
818 0 : *result = estrndup(&subject[offsets[0]], result_len);
819 : }
820 0 : zval_dtor(subpats);
821 0 : FREE_ZVAL(subpats);
822 :
823 0 : return result_len;
824 : }
825 : /* }}} */
826 :
827 : /* {{{ preg_do_eval
828 : */
829 : static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
830 : int *offsets, int count, char **result TSRMLS_DC)
831 0 : {
832 : zval retval; /* Return value from evaluation */
833 : char *eval_str_end, /* End of eval string */
834 : *match, /* Current match for a backref */
835 : *esc_match, /* Quote-escaped match */
836 : *walk, /* Used to walk the code string */
837 : *segment, /* Start of segment to append while walking */
838 : walk_last; /* Last walked character */
839 : int match_len; /* Length of the match */
840 : int esc_match_len; /* Length of the quote-escaped match */
841 : int result_len; /* Length of the result of the evaluation */
842 : int backref; /* Current backref */
843 : char *compiled_string_description;
844 0 : smart_str code = {0};
845 :
846 0 : eval_str_end = eval_str + eval_str_len;
847 0 : walk = segment = eval_str;
848 0 : walk_last = 0;
849 :
850 0 : while (walk < eval_str_end) {
851 : /* If found a backreference.. */
852 0 : if ('\\' == *walk || '$' == *walk) {
853 0 : smart_str_appendl(&code, segment, walk - segment);
854 0 : if (walk_last == '\\') {
855 0 : code.c[code.len-1] = *walk++;
856 0 : segment = walk;
857 0 : walk_last = 0;
858 0 : continue;
859 : }
860 0 : segment = walk;
861 0 : if (preg_get_backref(&walk, &backref)) {
862 0 : if (backref < count) {
863 : /* Find the corresponding string match and substitute it
864 : in instead of the backref */
865 0 : match = subject + offsets[backref<<1];
866 0 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
867 0 : if (match_len) {
868 0 : esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC);
869 : } else {
870 0 : esc_match = match;
871 0 : esc_match_len = 0;
872 : }
873 : } else {
874 0 : esc_match = "";
875 0 : esc_match_len = 0;
876 0 : match_len = 0;
877 : }
878 0 : smart_str_appendl(&code, esc_match, esc_match_len);
879 :
880 0 : segment = walk;
881 :
882 : /* Clean up and reassign */
883 0 : if (esc_match_len)
884 0 : efree(esc_match);
885 0 : continue;
886 : }
887 : }
888 0 : walk++;
889 0 : walk_last = walk[-1];
890 : }
891 0 : smart_str_appendl(&code, segment, walk - segment);
892 0 : smart_str_0(&code);
893 :
894 0 : compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
895 : /* Run the code */
896 0 : if (zend_eval_string(code.c, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
897 0 : efree(compiled_string_description);
898 0 : php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
899 : /* zend_error() does not return in this case */
900 : }
901 0 : efree(compiled_string_description);
902 0 : convert_to_string(&retval);
903 :
904 : /* Save the return value and its length */
905 0 : *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
906 0 : result_len = Z_STRLEN(retval);
907 :
908 : /* Clean up */
909 0 : zval_dtor(&retval);
910 0 : smart_str_free(&code);
911 :
912 0 : return result_len;
913 : }
914 : /* }}} */
915 :
916 : /* {{{ php_pcre_replace
917 : */
918 : PHPAPI char *php_pcre_replace(char *regex, int regex_len,
919 : char *subject, int subject_len,
920 : zval *replace_val, int is_callable_replace,
921 : int *result_len, int limit, int *replace_count TSRMLS_DC)
922 108 : {
923 : pcre_cache_entry *pce; /* Compiled regular expression */
924 :
925 : /* Compile regex or get it from cache. */
926 108 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
927 0 : return NULL;
928 : }
929 :
930 108 : return php_pcre_replace_impl(pce, subject, subject_len, replace_val,
931 : is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
932 : }
933 :
934 : PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
935 : int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
936 108 : {
937 108 : pcre_extra *extra = pce->extra;/* Holds results of studying */
938 : pcre_extra extra_data; /* Used locally for exec options */
939 108 : int exoptions = 0; /* Execution options */
940 108 : int count = 0; /* Count of matched subpatterns */
941 : int *offsets; /* Array of subpattern offsets */
942 : int size_offsets; /* Size of the offsets array */
943 : int new_len; /* Length of needed storage */
944 : int alloc_len; /* Actual allocated length */
945 108 : int eval_result_len=0; /* Length of the eval'ed or
946 : function-returned string */
947 : int match_len; /* Length of the current match */
948 : int backref; /* Backreference number */
949 : int eval; /* If the replacement string should be eval'ed */
950 : int start_offset; /* Where the new search starts */
951 108 : int g_notempty=0; /* If the match should not be empty */
952 108 : int replace_len=0; /* Length of replacement string */
953 : char *result, /* Result of replacement */
954 108 : *replace=NULL, /* Replacement string */
955 : *new_buf, /* Temporary buffer for re-allocation */
956 : *walkbuf, /* Location of current replacement in the result */
957 : *walk, /* Used to walk the replacement string */
958 : *match, /* The current match */
959 : *piece, /* The current piece of subject */
960 108 : *replace_end=NULL, /* End of replacement string */
961 : *eval_result, /* Result of eval or custom function */
962 : walk_last; /* Last walked character */
963 : int rc;
964 :
965 108 : if (extra == NULL) {
966 108 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
967 108 : extra = &extra_data;
968 : }
969 108 : extra->match_limit = PCRE_G(backtrack_limit);
970 108 : extra->match_limit_recursion = PCRE_G(recursion_limit);
971 :
972 108 : eval = pce->preg_options & PREG_REPLACE_EVAL;
973 108 : if (is_callable_replace) {
974 0 : if (eval) {
975 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
976 0 : return NULL;
977 : }
978 : } else {
979 108 : replace = Z_STRVAL_P(replace_val);
980 108 : replace_len = Z_STRLEN_P(replace_val);
981 108 : replace_end = replace + replace_len;
982 : }
983 :
984 : /* Calculate the size of the offsets array, and allocate memory for it. */
985 108 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
986 108 : if (rc < 0) {
987 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
988 0 : return NULL;
989 : }
990 108 : size_offsets = (size_offsets + 1) * 3;
991 108 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
992 :
993 108 : alloc_len = 2 * subject_len + 1;
994 108 : result = safe_emalloc(alloc_len, sizeof(char), 0);
995 :
996 : /* Initialize */
997 108 : match = NULL;
998 108 : *result_len = 0;
999 108 : start_offset = 0;
1000 108 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1001 :
1002 : while (1) {
1003 : /* Execute the regular expression. */
1004 347 : count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1005 : exoptions|g_notempty, offsets, size_offsets);
1006 :
1007 : /* Check for too many substrings condition. */
1008 347 : if (count == 0) {
1009 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1010 0 : count = size_offsets/3;
1011 : }
1012 :
1013 347 : piece = subject + start_offset;
1014 :
1015 586 : if (count > 0 && (limit == -1 || limit > 0)) {
1016 239 : if (replace_count) {
1017 0 : ++*replace_count;
1018 : }
1019 : /* Set the match location in subject */
1020 239 : match = subject + offsets[0];
1021 :
1022 239 : new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1023 :
1024 : /* If evaluating, do it and add the return string's length */
1025 239 : if (eval) {
1026 0 : eval_result_len = preg_do_eval(replace, replace_len, subject,
1027 : offsets, count, &eval_result TSRMLS_CC);
1028 0 : new_len += eval_result_len;
1029 239 : } else if (is_callable_replace) {
1030 : /* Use custom function to get replacement string and its length. */
1031 0 : eval_result_len = preg_do_repl_func(replace_val, subject, offsets,
1032 : count, &eval_result TSRMLS_CC);
1033 0 : new_len += eval_result_len;
1034 : } else { /* do regular substitution */
1035 239 : walk = replace;
1036 239 : walk_last = 0;
1037 717 : while (walk < replace_end) {
1038 239 : if ('\\' == *walk || '$' == *walk) {
1039 0 : if (walk_last == '\\') {
1040 0 : walk++;
1041 0 : walk_last = 0;
1042 0 : continue;
1043 : }
1044 0 : if (preg_get_backref(&walk, &backref)) {
1045 0 : if (backref < count)
1046 0 : new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1047 0 : continue;
1048 : }
1049 : }
1050 239 : new_len++;
1051 239 : walk++;
1052 239 : walk_last = walk[-1];
1053 : }
1054 : }
1055 :
1056 239 : if (new_len + 1 > alloc_len) {
1057 0 : alloc_len = 1 + alloc_len + 2 * new_len;
1058 0 : new_buf = emalloc(alloc_len);
1059 0 : memcpy(new_buf, result, *result_len);
1060 0 : efree(result);
1061 0 : result = new_buf;
1062 : }
1063 : /* copy the part of the string before the match */
1064 239 : memcpy(&result[*result_len], piece, match-piece);
1065 239 : *result_len += match-piece;
1066 :
1067 : /* copy replacement and backrefs */
1068 239 : walkbuf = result + *result_len;
1069 :
1070 : /* If evaluating or using custom function, copy result to the buffer
1071 : * and clean up. */
1072 239 : if (eval || is_callable_replace) {
1073 0 : memcpy(walkbuf, eval_result, eval_result_len);
1074 0 : *result_len += eval_result_len;
1075 0 : STR_FREE(eval_result);
1076 : } else { /* do regular backreference copying */
1077 239 : walk = replace;
1078 239 : walk_last = 0;
1079 717 : while (walk < replace_end) {
1080 239 : if ('\\' == *walk || '$' == *walk) {
1081 0 : if (walk_last == '\\') {
1082 0 : *(walkbuf-1) = *walk++;
1083 0 : walk_last = 0;
1084 0 : continue;
1085 : }
1086 0 : if (preg_get_backref(&walk, &backref)) {
1087 0 : if (backref < count) {
1088 0 : match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1089 0 : memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1090 0 : walkbuf += match_len;
1091 : }
1092 0 : continue;
1093 : }
1094 : }
1095 239 : *walkbuf++ = *walk++;
1096 239 : walk_last = walk[-1];
1097 : }
1098 239 : *walkbuf = '\0';
1099 : /* increment the result length by how much we've added to the string */
1100 239 : *result_len += walkbuf - (result + *result_len);
1101 : }
1102 :
1103 239 : if (limit != -1)
1104 0 : limit--;
1105 :
1106 108 : } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1107 : /* If we previously set PCRE_NOTEMPTY after a null match,
1108 : this is not necessarily the end. We need to advance
1109 : the start offset, and continue. Fudge the offset values
1110 : to achieve this, unless we're already at the end of the string. */
1111 108 : if (g_notempty != 0 && start_offset < subject_len) {
1112 0 : offsets[0] = start_offset;
1113 0 : offsets[1] = start_offset + 1;
1114 0 : memcpy(&result[*result_len], piece, 1);
1115 0 : (*result_len)++;
1116 : } else {
1117 108 : new_len = *result_len + subject_len - start_offset;
1118 108 : if (new_len + 1 > alloc_len) {
1119 0 : alloc_len = new_len + 1; /* now we know exactly how long it is */
1120 0 : new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1121 0 : memcpy(new_buf, result, *result_len);
1122 0 : efree(result);
1123 0 : result = new_buf;
1124 : }
1125 : /* stick that last bit of string on our output */
1126 108 : memcpy(&result[*result_len], piece, subject_len - start_offset);
1127 108 : *result_len += subject_len - start_offset;
1128 108 : result[*result_len] = '\0';
1129 108 : break;
1130 : }
1131 : } else {
1132 0 : pcre_handle_exec_error(count TSRMLS_CC);
1133 0 : efree(result);
1134 0 : result = NULL;
1135 0 : break;
1136 : }
1137 :
1138 : /* If we have matched an empty string, mimic what Perl's /g options does.
1139 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1140 : the match again at the same point. If this fails (picked up above) we
1141 : advance to the next character. */
1142 239 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1143 :
1144 : /* Advance to the next piece. */
1145 239 : start_offset = offsets[1];
1146 239 : }
1147 :
1148 108 : efree(offsets);
1149 :
1150 108 : return result;
1151 : }
1152 : /* }}} */
1153 :
1154 : /* {{{ php_replace_in_subject
1155 : */
1156 : static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, zend_bool is_callable_replace, int *replace_count TSRMLS_DC)
1157 108 : {
1158 : zval **regex_entry,
1159 108 : **replace_entry = NULL,
1160 : *replace_value,
1161 : empty_replace;
1162 : char *subject_value,
1163 : *result;
1164 : int subject_len;
1165 :
1166 : /* Make sure we're dealing with strings. */
1167 108 : convert_to_string_ex(subject);
1168 : /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1169 108 : ZVAL_STRINGL(&empty_replace, "", 0, 0);
1170 :
1171 : /* If regex is an array */
1172 108 : if (Z_TYPE_P(regex) == IS_ARRAY) {
1173 : /* Duplicate subject string for repeated replacement */
1174 0 : subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1175 0 : subject_len = Z_STRLEN_PP(subject);
1176 0 : *result_len = subject_len;
1177 :
1178 0 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1179 :
1180 0 : replace_value = replace;
1181 0 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1182 0 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1183 :
1184 : /* For each entry in the regex array, get the entry */
1185 0 : while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1186 : /* Make sure we're dealing with strings. */
1187 0 : convert_to_string_ex(regex_entry);
1188 :
1189 : /* If replace is an array and not a callable construct */
1190 0 : if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1191 : /* Get current entry */
1192 0 : if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1193 0 : if (!is_callable_replace) {
1194 0 : convert_to_string_ex(replace_entry);
1195 : }
1196 0 : replace_value = *replace_entry;
1197 0 : zend_hash_move_forward(Z_ARRVAL_P(replace));
1198 : } else {
1199 : /* We've run out of replacement strings, so use an empty one */
1200 0 : replace_value = &empty_replace;
1201 : }
1202 : }
1203 :
1204 : /* Do the actual replacement and put the result back into subject_value
1205 : for further replacements. */
1206 0 : if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1207 : Z_STRLEN_PP(regex_entry),
1208 : subject_value,
1209 : subject_len,
1210 : replace_value,
1211 : is_callable_replace,
1212 : result_len,
1213 : limit,
1214 : replace_count TSRMLS_CC)) != NULL) {
1215 0 : efree(subject_value);
1216 0 : subject_value = result;
1217 0 : subject_len = *result_len;
1218 : }
1219 :
1220 0 : zend_hash_move_forward(Z_ARRVAL_P(regex));
1221 : }
1222 :
1223 0 : return subject_value;
1224 : } else {
1225 108 : result = php_pcre_replace(Z_STRVAL_P(regex),
1226 : Z_STRLEN_P(regex),
1227 : Z_STRVAL_PP(subject),
1228 : Z_STRLEN_PP(subject),
1229 : replace,
1230 : is_callable_replace,
1231 : result_len,
1232 : limit,
1233 : replace_count TSRMLS_CC);
1234 108 : return result;
1235 : }
1236 : }
1237 : /* }}} */
1238 :
1239 : /* {{{ preg_replace_impl
1240 : */
1241 : static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_callable_replace)
1242 108 : {
1243 : zval **regex,
1244 : **replace,
1245 : **subject,
1246 : **limit,
1247 : **subject_entry,
1248 : **zcount;
1249 : char *result;
1250 : int result_len;
1251 108 : int limit_val = -1;
1252 : char *string_key;
1253 : ulong num_key;
1254 : char *callback_name;
1255 108 : int replace_count=0;
1256 108 : int *replace_count_ptr=NULL;
1257 :
1258 : /* Get function parameters and do error-checking. */
1259 108 : if (ZEND_NUM_ARGS() < 3 || ZEND_NUM_ARGS() > 5 ||
1260 : zend_get_parameters_ex(ZEND_NUM_ARGS(), ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1261 0 : WRONG_PARAM_COUNT;
1262 : }
1263 108 : if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1264 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1265 0 : RETURN_FALSE;
1266 : }
1267 :
1268 108 : SEPARATE_ZVAL(replace);
1269 108 : if (Z_TYPE_PP(replace) != IS_ARRAY)
1270 108 : convert_to_string_ex(replace);
1271 108 : if (is_callable_replace) {
1272 0 : if (!zend_is_callable(*replace, 0, &callback_name)) {
1273 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1274 0 : efree(callback_name);
1275 0 : *return_value = **subject;
1276 0 : zval_copy_ctor(return_value);
1277 0 : INIT_PZVAL(return_value);
1278 0 : return;
1279 : }
1280 0 : efree(callback_name);
1281 : }
1282 :
1283 108 : SEPARATE_ZVAL(regex);
1284 108 : SEPARATE_ZVAL(subject);
1285 :
1286 108 : if (ZEND_NUM_ARGS() > 3) {
1287 0 : convert_to_long_ex(limit);
1288 0 : limit_val = Z_LVAL_PP(limit);
1289 : }
1290 108 : if (ZEND_NUM_ARGS() > 4) {
1291 0 : replace_count_ptr =& replace_count;
1292 : }
1293 :
1294 108 : if (Z_TYPE_PP(regex) != IS_ARRAY)
1295 108 : convert_to_string_ex(regex);
1296 :
1297 : /* if subject is an array */
1298 108 : if (Z_TYPE_PP(subject) == IS_ARRAY) {
1299 0 : array_init(return_value);
1300 0 : zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1301 :
1302 : /* For each subject entry, convert it to string, then perform replacement
1303 : and add the result to the return_value array. */
1304 0 : while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1305 0 : SEPARATE_ZVAL(subject_entry);
1306 0 : if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, replace_count_ptr TSRMLS_CC)) != NULL) {
1307 : /* Add to return array */
1308 0 : switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
1309 : {
1310 : case HASH_KEY_IS_STRING:
1311 0 : add_assoc_stringl(return_value, string_key, result, result_len, 0);
1312 0 : break;
1313 :
1314 : case HASH_KEY_IS_LONG:
1315 0 : add_index_stringl(return_value, num_key, result, result_len, 0);
1316 : break;
1317 : }
1318 : }
1319 :
1320 0 : zend_hash_move_forward(Z_ARRVAL_PP(subject));
1321 : }
1322 : } else { /* if subject is not an array */
1323 108 : if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, replace_count_ptr TSRMLS_CC)) != NULL) {
1324 108 : RETVAL_STRINGL(result, result_len, 0);
1325 : }
1326 : }
1327 108 : if (replace_count_ptr) {
1328 0 : zval_dtor(*zcount);
1329 0 : ZVAL_LONG(*zcount, replace_count);
1330 : }
1331 :
1332 : }
1333 : /* }}} */
1334 :
1335 : /* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, count]])
1336 : Perform Perl-style regular expression replacement. */
1337 : PHP_FUNCTION(preg_replace)
1338 108 : {
1339 108 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1340 108 : }
1341 : /* }}} */
1342 :
1343 : /* {{{ proto string preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, count]])
1344 : Perform Perl-style regular expression replacement using replacement callback. */
1345 : PHP_FUNCTION(preg_replace_callback)
1346 0 : {
1347 0 : preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1348 0 : }
1349 : /* }}} */
1350 :
1351 : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1352 : Split string into an array using a perl-style regular expression as a delimiter */
1353 : PHP_FUNCTION(preg_split)
1354 1 : {
1355 : char *regex; /* Regular expression */
1356 : char *subject; /* String to match against */
1357 : int regex_len;
1358 : int subject_len;
1359 1 : long limit_val = -1;/* Integer value of limit */
1360 1 : long flags = 0; /* Match control flags */
1361 : pcre_cache_entry *pce; /* Compiled regular expression */
1362 :
1363 : /* Get function parameters and do error checking */
1364 1 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1365 : &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1366 0 : RETURN_FALSE;
1367 : }
1368 :
1369 : /* Compile regex or get it from cache. */
1370 1 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1371 0 : RETURN_FALSE;
1372 : }
1373 :
1374 1 : php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1375 : }
1376 :
1377 : /* {{{ php_pcre_split
1378 : */
1379 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1380 : long limit_val, long flags TSRMLS_DC)
1381 1 : {
1382 1 : pcre_extra *extra = NULL; /* Holds results of studying */
1383 1 : pcre *re_bump = NULL; /* Regex instance for empty matches */
1384 1 : pcre_extra *extra_bump = NULL; /* Almost dummy */
1385 : pcre_extra extra_data; /* Used locally for exec options */
1386 : int *offsets; /* Array of subpattern offsets */
1387 : int size_offsets; /* Size of the offsets array */
1388 1 : int exoptions = 0; /* Execution options */
1389 1 : int count = 0; /* Count of matched subpatterns */
1390 : int start_offset; /* Where the new search starts */
1391 : int next_offset; /* End of the last delimiter match + 1 */
1392 1 : int g_notempty = 0; /* If the match should not be empty */
1393 : char *match, /* The current match */
1394 : *last_match; /* Location of last match */
1395 : int rc;
1396 : int no_empty; /* If NO_EMPTY flag is set */
1397 : int delim_capture; /* If delimiters should be captured */
1398 : int offset_capture; /* If offsets should be captured */
1399 :
1400 1 : no_empty = flags & PREG_SPLIT_NO_EMPTY;
1401 1 : delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1402 1 : offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1403 :
1404 1 : if (limit_val == 0) {
1405 0 : limit_val = -1;
1406 : }
1407 :
1408 1 : if (extra == NULL) {
1409 1 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1410 1 : extra = &extra_data;
1411 : }
1412 1 : extra->match_limit = PCRE_G(backtrack_limit);
1413 1 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1414 :
1415 : /* Initialize return value */
1416 1 : array_init(return_value);
1417 :
1418 : /* Calculate the size of the offsets array, and allocate memory for it. */
1419 1 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1420 1 : if (rc < 0) {
1421 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1422 0 : RETURN_FALSE;
1423 : }
1424 1 : size_offsets = (size_offsets + 1) * 3;
1425 1 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1426 :
1427 : /* Start at the beginning of the string */
1428 1 : start_offset = 0;
1429 1 : next_offset = 0;
1430 1 : last_match = subject;
1431 1 : match = NULL;
1432 1 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1433 :
1434 : /* Get next piece if no limit or limit not yet reached and something matched*/
1435 4 : while ((limit_val == -1 || limit_val > 1)) {
1436 3 : count = pcre_exec(pce->re, extra, subject,
1437 : subject_len, start_offset,
1438 : exoptions|g_notempty, offsets, size_offsets);
1439 :
1440 : /* Check for too many substrings condition. */
1441 3 : if (count == 0) {
1442 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1443 0 : count = size_offsets/3;
1444 : }
1445 :
1446 : /* If something matched */
1447 3 : if (count > 0) {
1448 2 : match = subject + offsets[0];
1449 :
1450 2 : if (!no_empty || &subject[offsets[0]] != last_match) {
1451 :
1452 2 : if (offset_capture) {
1453 : /* Add (match, offset) pair to the return value */
1454 0 : add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1455 : } else {
1456 : /* Add the piece to the return value */
1457 2 : add_next_index_stringl(return_value, last_match,
1458 : &subject[offsets[0]]-last_match, 1);
1459 : }
1460 :
1461 : /* One less left to do */
1462 2 : if (limit_val != -1)
1463 0 : limit_val--;
1464 : }
1465 :
1466 2 : last_match = &subject[offsets[1]];
1467 2 : next_offset = offsets[1];
1468 :
1469 2 : if (delim_capture) {
1470 : int i, match_len;
1471 0 : for (i = 1; i < count; i++) {
1472 0 : match_len = offsets[(i<<1)+1] - offsets[i<<1];
1473 : /* If we have matched a delimiter */
1474 0 : if (!no_empty || match_len > 0) {
1475 0 : if (offset_capture) {
1476 0 : add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1477 : } else {
1478 0 : add_next_index_stringl(return_value,
1479 : &subject[offsets[i<<1]],
1480 : match_len, 1);
1481 : }
1482 : }
1483 : }
1484 : }
1485 1 : } else if (count == PCRE_ERROR_NOMATCH) {
1486 : /* If we previously set PCRE_NOTEMPTY after a null match,
1487 : this is not necessarily the end. We need to advance
1488 : the start offset, and continue. Fudge the offset values
1489 : to achieve this, unless we're already at the end of the string. */
1490 1 : if (g_notempty != 0 && start_offset < subject_len) {
1491 0 : if (pce->compile_options & PCRE_UTF8) {
1492 0 : if (re_bump == NULL) {
1493 : int dummy;
1494 :
1495 0 : if ((re_bump = pcre_get_compiled_regex("/./u", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1496 0 : RETURN_FALSE;
1497 : }
1498 : }
1499 0 : count = pcre_exec(re_bump, extra_bump, subject,
1500 : subject_len, start_offset,
1501 : exoptions, offsets, size_offsets);
1502 0 : if (count < 1) {
1503 0 : php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error");
1504 0 : offsets[0] = start_offset;
1505 0 : offsets[1] = start_offset + 1;
1506 : }
1507 : } else {
1508 0 : offsets[0] = start_offset;
1509 0 : offsets[1] = start_offset + 1;
1510 : }
1511 : } else
1512 : break;
1513 : } else {
1514 0 : pcre_handle_exec_error(count TSRMLS_CC);
1515 0 : break;
1516 : }
1517 :
1518 : /* If we have matched an empty string, mimic what Perl's /g options does.
1519 : This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
1520 : the match again at the same point. If this fails (picked up above) we
1521 : advance to the next character. */
1522 2 : g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
1523 :
1524 : /* Advance to the position right after the last full match */
1525 2 : start_offset = offsets[1];
1526 : }
1527 :
1528 :
1529 1 : if (!no_empty || start_offset != subject_len)
1530 : {
1531 1 : if (offset_capture) {
1532 : /* Add the last (match, offset) pair to the return value */
1533 0 : add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1534 : } else {
1535 : /* Add the last piece to the return value */
1536 1 : add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1537 : }
1538 : }
1539 :
1540 :
1541 : /* Clean up */
1542 1 : efree(offsets);
1543 : }
1544 : /* }}} */
1545 :
1546 : /* {{{ proto string preg_quote(string str [, string delim_char])
1547 : Quote regular expression characters plus an optional character */
1548 : PHP_FUNCTION(preg_quote)
1549 107 : {
1550 : int in_str_len;
1551 : char *in_str; /* Input string argument */
1552 : char *in_str_end; /* End of the input string */
1553 : int delim_len;
1554 107 : char *delim = NULL; /* Additional delimiter argument */
1555 : char *out_str, /* Output string with quoted characters */
1556 : *p, /* Iterator for input string */
1557 : *q, /* Iterator for output string */
1558 107 : delim_char=0, /* Delimiter character to be quoted */
1559 : c; /* Current character */
1560 107 : zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1561 :
1562 : /* Get the arguments and check for errors */
1563 107 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1564 : &delim, &delim_len) == FAILURE) {
1565 0 : return;
1566 : }
1567 :
1568 107 : in_str_end = in_str + in_str_len;
1569 :
1570 : /* Nothing to do if we got an empty string */
1571 107 : if (in_str == in_str_end) {
1572 0 : RETURN_EMPTY_STRING();
1573 : }
1574 :
1575 107 : if (delim && *delim) {
1576 107 : delim_char = delim[0];
1577 107 : quote_delim = 1;
1578 : }
1579 :
1580 : /* Allocate enough memory so that even if each character
1581 : is quoted, we won't run out of room */
1582 107 : out_str = safe_emalloc(4, in_str_len, 1);
1583 :
1584 : /* Go through the string and quote necessary characters */
1585 35936 : for(p = in_str, q = out_str; p != in_str_end; p++) {
1586 35829 : c = *p;
1587 35829 : switch(c) {
1588 : case '.':
1589 : case '\\':
1590 : case '+':
1591 : case '*':
1592 : case '?':
1593 : case '[':
1594 : case '^':
1595 : case ']':
1596 : case '$':
1597 : case '(':
1598 : case ')':
1599 : case '{':
1600 : case '}':
1601 : case '=':
1602 : case '!':
1603 : case '>':
1604 : case '<':
1605 : case '|':
1606 : case ':':
1607 3272 : *q++ = '\\';
1608 3272 : *q++ = c;
1609 3272 : break;
1610 :
1611 : case '\0':
1612 0 : *q++ = '\\';
1613 0 : *q++ = '0';
1614 0 : *q++ = '0';
1615 0 : *q++ = '0';
1616 0 : break;
1617 :
1618 : default:
1619 32557 : if (quote_delim && c == delim_char)
1620 303 : *q++ = '\\';
1621 32557 : *q++ = c;
1622 : break;
1623 : }
1624 : }
1625 107 : *q = '\0';
1626 :
1627 : /* Reallocate string and return it */
1628 107 : RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1629 : }
1630 : /* }}} */
1631 :
1632 : /* {{{ proto array preg_grep(string regex, array input [, int flags])
1633 : Searches array and returns entries which match regex */
1634 : PHP_FUNCTION(preg_grep)
1635 0 : {
1636 : char *regex; /* Regular expression */
1637 : int regex_len;
1638 : zval *input; /* Input array */
1639 0 : long flags = 0; /* Match control flags */
1640 : pcre_cache_entry *pce; /* Compiled regular expression */
1641 :
1642 : /* Get arguments and do error checking */
1643 0 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1644 : &input, &flags) == FAILURE) {
1645 0 : return;
1646 : }
1647 :
1648 : /* Compile regex or get it from cache. */
1649 0 : if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1650 0 : RETURN_FALSE;
1651 : }
1652 :
1653 0 : php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1654 : }
1655 :
1656 : PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value,
1657 : long flags TSRMLS_DC)
1658 0 : {
1659 : zval **entry; /* An entry in the input array */
1660 0 : pcre_extra *extra = pce->extra;/* Holds results of studying */
1661 : pcre_extra extra_data; /* Used locally for exec options */
1662 : int *offsets; /* Array of subpattern offsets */
1663 : int size_offsets; /* Size of the offsets array */
1664 0 : int count = 0; /* Count of matched subpatterns */
1665 : char *string_key;
1666 : ulong num_key;
1667 : zend_bool invert; /* Whether to return non-matching
1668 : entries */
1669 : int rc;
1670 :
1671 0 : invert = flags & PREG_GREP_INVERT ? 1 : 0;
1672 :
1673 0 : if (extra == NULL) {
1674 0 : extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1675 0 : extra = &extra_data;
1676 : }
1677 0 : extra->match_limit = PCRE_G(backtrack_limit);
1678 0 : extra->match_limit_recursion = PCRE_G(recursion_limit);
1679 :
1680 : /* Calculate the size of the offsets array, and allocate memory for it. */
1681 0 : rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1682 0 : if (rc < 0) {
1683 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1684 0 : RETURN_FALSE;
1685 : }
1686 0 : size_offsets = (size_offsets + 1) * 3;
1687 0 : offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1688 :
1689 : /* Initialize return array */
1690 0 : array_init(return_value);
1691 :
1692 0 : PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1693 :
1694 : /* Go through the input array */
1695 0 : zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1696 0 : while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1697 :
1698 0 : convert_to_string_ex(entry);
1699 :
1700 : /* Perform the match */
1701 0 : count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry),
1702 : Z_STRLEN_PP(entry), 0,
1703 : 0, offsets, size_offsets);
1704 :
1705 : /* Check for too many substrings condition. */
1706 0 : if (count == 0) {
1707 0 : php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1708 0 : count = size_offsets/3;
1709 0 : } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1710 0 : pcre_handle_exec_error(count TSRMLS_CC);
1711 0 : break;
1712 : }
1713 :
1714 : /* If the entry fits our requirements */
1715 0 : if ((count > 0 && !invert) ||
1716 : (count == PCRE_ERROR_NOMATCH && invert)) {
1717 0 : (*entry)->refcount++;
1718 :
1719 : /* Add to return array */
1720 0 : switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
1721 : {
1722 : case HASH_KEY_IS_STRING:
1723 0 : zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1724 : strlen(string_key)+1, entry, sizeof(zval *), NULL);
1725 0 : break;
1726 :
1727 : case HASH_KEY_IS_LONG:
1728 0 : zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1729 : sizeof(zval *), NULL);
1730 : break;
1731 : }
1732 : }
1733 :
1734 0 : zend_hash_move_forward(Z_ARRVAL_P(input));
1735 : }
1736 :
1737 : /* Clean up */
1738 0 : efree(offsets);
1739 : }
1740 : /* }}} */
1741 :
1742 : /* {{{ proto int preg_last_error()
1743 : Returns the error code of the last regexp execution. */
1744 : PHP_FUNCTION(preg_last_error)
1745 0 : {
1746 0 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1747 0 : return;
1748 : }
1749 :
1750 0 : RETURN_LONG(PCRE_G(error_code));
1751 : }
1752 : /* }}} */
1753 :
1754 : /* {{{ module definition structures */
1755 :
1756 : zend_function_entry pcre_functions[] = {
1757 : PHP_FE(preg_match, third_arg_force_ref)
1758 : PHP_FE(preg_match_all, third_arg_force_ref)
1759 : PHP_FE(preg_replace, fifth_arg_force_ref)
1760 : PHP_FE(preg_replace_callback, fifth_arg_force_ref)
1761 : PHP_FE(preg_split, NULL)
1762 : PHP_FE(preg_quote, NULL)
1763 : PHP_FE(preg_grep, NULL)
1764 : PHP_FE(preg_last_error, NULL)
1765 : {NULL, NULL, NULL}
1766 : };
1767 :
1768 : zend_module_entry pcre_module_entry = {
1769 : STANDARD_MODULE_HEADER,
1770 : "pcre",
1771 : pcre_functions,
1772 : PHP_MINIT(pcre),
1773 : PHP_MSHUTDOWN(pcre),
1774 : NULL,
1775 : NULL,
1776 : PHP_MINFO(pcre),
1777 : NO_VERSION_YET,
1778 : PHP_MODULE_GLOBALS(pcre),
1779 : PHP_GINIT(pcre),
1780 : PHP_GSHUTDOWN(pcre),
1781 : NULL,
1782 : STANDARD_MODULE_PROPERTIES_EX
1783 : };
1784 :
1785 : #ifdef COMPILE_DL_PCRE
1786 : ZEND_GET_MODULE(pcre)
1787 : # ifdef PHP_WIN32
1788 : # include "zend_arg_defs.c"
1789 : # endif
1790 : #endif
1791 :
1792 : /* }}} */
1793 :
1794 : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
1795 :
1796 : /*
1797 : * Local variables:
1798 : * tab-width: 4
1799 : * c-basic-offset: 4
1800 : * End:
1801 : * vim600: sw=4 ts=4 fdm=marker
1802 : * vim<600: sw=4 ts=4
1803 : */
|