LTP GCOV extension - code coverage report
Current view: directory - ext/pcre - php_pcre.c
Test: PHP Code Coverage
Date: 2007-04-10 Instrumented lines: 774
Code covered: 42.6 % Executed lines: 330
Legend: not executed executed

       1                 : /*
       2                 :    +----------------------------------------------------------------------+
       3                 :    | PHP Version 5                                                        |
       4                 :    +----------------------------------------------------------------------+
       5                 :    | Copyright (c) 1997-2007 The PHP Group                                |
       6                 :    +----------------------------------------------------------------------+
       7                 :    | This source file is subject to version 3.01 of the PHP license,      |
       8                 :    | that is bundled with this package in the file LICENSE, and is        |
       9                 :    | available through the world-wide-web at the following url:           |
      10                 :    | http://www.php.net/license/3_01.txt                                  |
      11                 :    | If you did not receive a copy of the PHP license and are unable to   |
      12                 :    | obtain it through the world-wide-web, please send a note to          |
      13                 :    | license@php.net so we can mail you a copy immediately.               |
      14                 :    +----------------------------------------------------------------------+
      15                 :    | Author: Andrei Zmievski <andrei@php.net>                             |
      16                 :    +----------------------------------------------------------------------+
      17                 :  */
      18                 : 
      19                 : /* $Id: php_pcre.c,v 1.168.2.9.2.16 2007/03/14 23:47:44 tony2001 Exp $ */
      20                 : 
      21                 : #ifdef HAVE_CONFIG_H
      22                 : #include "config.h"
      23                 : #endif
      24                 : 
      25                 : #include "php.h"
      26                 : #include "php_ini.h"
      27                 : #include "php_globals.h"
      28                 : #include "php_pcre.h"
      29                 : #include "ext/standard/info.h"
      30                 : #include "ext/standard/php_smart_str.h"
      31                 : 
      32                 : #if HAVE_PCRE || HAVE_BUNDLED_PCRE
      33                 : 
      34                 : #include "ext/standard/php_string.h"
      35                 : 
      36                 : #define PREG_PATTERN_ORDER                      1
      37                 : #define PREG_SET_ORDER                          2
      38                 : #define PREG_OFFSET_CAPTURE                     (1<<8)
      39                 : 
      40                 : #define PREG_SPLIT_NO_EMPTY                     (1<<0)
      41                 : #define PREG_SPLIT_DELIM_CAPTURE        (1<<1)
      42                 : #define PREG_SPLIT_OFFSET_CAPTURE       (1<<2)
      43                 : 
      44                 : #define PREG_REPLACE_EVAL                       (1<<0)
      45                 : 
      46                 : #define PREG_GREP_INVERT                        (1<<0)
      47                 : 
      48                 : #define PCRE_CACHE_SIZE 4096
      49                 : 
      50                 : enum {
      51                 :         PHP_PCRE_NO_ERROR = 0,
      52                 :         PHP_PCRE_INTERNAL_ERROR,
      53                 :         PHP_PCRE_BACKTRACK_LIMIT_ERROR,
      54                 :         PHP_PCRE_RECURSION_LIMIT_ERROR,
      55                 :         PHP_PCRE_BAD_UTF8_ERROR,
      56                 : };
      57                 : 
      58                 : 
      59                 : ZEND_DECLARE_MODULE_GLOBALS(pcre);
      60                 : 
      61                 : 
      62                 : static void pcre_handle_exec_error(int pcre_code TSRMLS_DC)
      63               0 : {
      64               0 :         int preg_code = 0;
      65                 : 
      66               0 :         switch (pcre_code) {
      67                 :                 case PCRE_ERROR_MATCHLIMIT:
      68               0 :                         preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
      69               0 :                         break;
      70                 : 
      71                 :                 case PCRE_ERROR_RECURSIONLIMIT:
      72               0 :                         preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
      73               0 :                         break;
      74                 : 
      75                 :                 case PCRE_ERROR_BADUTF8:
      76               0 :                         preg_code = PHP_PCRE_BAD_UTF8_ERROR;
      77               0 :                         break;
      78                 : 
      79                 :                 default:
      80               0 :                         preg_code = PHP_PCRE_INTERNAL_ERROR;
      81                 :                         break;
      82                 :         }
      83                 : 
      84               0 :         PCRE_G(error_code) = preg_code;
      85               0 : }
      86                 : 
      87                 : 
      88                 : static void php_free_pcre_cache(void *data)
      89             184 : {
      90             184 :         pcre_cache_entry *pce = (pcre_cache_entry *) data;
      91             184 :         if (!pce) return;
      92             184 :         pefree(pce->re, 1);
      93             184 :         if (pce->extra) pefree(pce->extra, 1);
      94                 : #if HAVE_SETLOCALE
      95             184 :         if ((void*)pce->tables) pefree((void*)pce->tables, 1);
      96             184 :         pefree(pce->locale, 1);
      97                 : #endif
      98                 : }
      99                 : 
     100                 : 
     101                 : static PHP_GINIT_FUNCTION(pcre)
     102             220 : {
     103             220 :         zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
     104             220 :         pcre_globals->backtrack_limit = 0;
     105             220 :         pcre_globals->recursion_limit = 0;
     106             220 :         pcre_globals->error_code      = PHP_PCRE_NO_ERROR;
     107             220 : }
     108                 : 
     109                 : static PHP_GSHUTDOWN_FUNCTION(pcre)
     110             219 : {
     111             219 :         zend_hash_destroy(&pcre_globals->pcre_cache);
     112             219 : }
     113                 : 
     114                 : PHP_INI_BEGIN()
     115                 :         STD_PHP_INI_ENTRY("pcre.backtrack_limit", "100000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
     116                 :         STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
     117                 : PHP_INI_END()
     118                 : 
     119                 : 
     120                 : /* {{{ PHP_MINFO_FUNCTION(pcre) */
     121                 : static PHP_MINFO_FUNCTION(pcre)
     122               0 : {
     123               0 :         php_info_print_table_start();
     124               0 :         php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
     125               0 :         php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
     126               0 :         php_info_print_table_end();
     127               0 : }
     128                 : /* }}} */
     129                 : 
     130                 : /* {{{ PHP_MINIT_FUNCTION(pcre) */
     131                 : static PHP_MINIT_FUNCTION(pcre)
     132             220 : {
     133             220 :         REGISTER_INI_ENTRIES();
     134                 :         
     135             220 :         REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
     136             220 :         REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
     137             220 :         REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     138             220 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
     139             220 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
     140             220 :         REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
     141             220 :         REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
     142                 : 
     143             220 :         REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
     144             220 :         REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
     145             220 :         REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     146             220 :         REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
     147             220 :         REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
     148                 : 
     149             220 :         return SUCCESS;
     150                 : }
     151                 : /* }}} */
     152                 : 
     153                 : /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
     154                 : static PHP_MSHUTDOWN_FUNCTION(pcre)
     155             219 : {
     156             219 :         UNREGISTER_INI_ENTRIES();
     157                 : 
     158             219 :         return SUCCESS;
     159                 : }
     160                 : /* }}} */
     161                 : 
     162                 : /* {{{ static pcre_clean_cache */
     163                 : static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
     164               0 : {
     165               0 :         int *num_clean = (int *)arg;
     166                 : 
     167               0 :         if (*num_clean > 0) {
     168               0 :                 (*num_clean)--;
     169               0 :                 return 1;
     170                 :         } else {
     171               0 :                 return 0;
     172                 :         }
     173                 : }
     174                 : /* }}} */
     175                 : 
     176                 : /* {{{ pcre_get_compiled_regex_cache
     177                 :  */
     178                 : PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
     179           21253 : {
     180           21253 :         pcre                            *re = NULL;
     181                 :         pcre_extra                      *extra;
     182           21253 :         int                                      coptions = 0;
     183           21253 :         int                                      soptions = 0;
     184                 :         const char                      *error;
     185                 :         int                                      erroffset;
     186                 :         char                             delimiter;
     187                 :         char                             start_delimiter;
     188                 :         char                             end_delimiter;
     189                 :         char                            *p, *pp;
     190                 :         char                            *pattern;
     191           21253 :         int                                      do_study = 0;
     192           21253 :         int                                      poptions = 0;
     193           21253 :         unsigned const char *tables = NULL;
     194                 : #if HAVE_SETLOCALE
     195           21253 :         char                            *locale = setlocale(LC_CTYPE, NULL);
     196                 : #endif
     197                 :         pcre_cache_entry        *pce;
     198                 :         pcre_cache_entry         new_entry;
     199                 : 
     200                 :         /* Try to lookup the cached regex entry, and if successful, just pass
     201                 :            back the compiled pattern, otherwise go on and compile it. */
     202           21253 :         regex_len = strlen(regex);
     203           21253 :         if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
     204                 :                 /*
     205                 :                  * We use a quick pcre_info() check to see whether cache is corrupted, and if it
     206                 :                  * is, we flush it and compile the pattern from scratch.
     207                 :                  */
     208           21069 :                 if (pcre_info(pce->re, NULL, NULL) == PCRE_ERROR_BADMAGIC) {
     209               0 :                         zend_hash_clean(&PCRE_G(pcre_cache));
     210                 :                 } else {
     211                 : #if HAVE_SETLOCALE
     212           21069 :                         if (!strcmp(pce->locale, locale)) {
     213                 : #endif
     214           21069 :                                 return pce;
     215                 : #if HAVE_SETLOCALE
     216                 :                         }
     217                 : #endif
     218                 :                 }
     219                 :         }
     220                 :         
     221             184 :         p = regex;
     222                 :         
     223                 :         /* Parse through the leading whitespace, and display a warning if we
     224                 :            get to the end without encountering a delimiter. */
     225             184 :         while (isspace((int)*(unsigned char *)p)) p++;
     226             184 :         if (*p == 0) {
     227               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
     228               0 :                 return NULL;
     229                 :         }
     230                 :         
     231                 :         /* Get the delimiter and display a warning if it is alphanumeric
     232                 :            or a backslash. */
     233             184 :         delimiter = *p++;
     234             184 :         if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
     235               0 :                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
     236               0 :                 return NULL;
     237                 :         }
     238                 : 
     239             184 :         start_delimiter = delimiter;
     240             184 :         if ((pp = strchr("([{< )]}> )]}>", delimiter)))
     241               0 :                 delimiter = pp[5];
     242             184 :         end_delimiter = delimiter;
     243                 : 
     244             184 :         if (start_delimiter == end_delimiter) {
     245                 :                 /* We need to iterate through the pattern, searching for the ending delimiter,
     246                 :                    but skipping the backslashed delimiters.  If the ending delimiter is not
     247                 :                    found, display a warning. */
     248             184 :                 pp = p;
     249           38631 :                 while (*pp != 0) {
     250           38447 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     251           34625 :                         else if (*pp == delimiter)
     252             184 :                                 break;
     253           38263 :                         pp++;
     254                 :                 }
     255             184 :                 if (*pp == 0) {
     256               0 :                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
     257               0 :                         return NULL;
     258                 :                 }
     259                 :         } else {
     260                 :                 /* We iterate through the pattern, searching for the matching ending
     261                 :                  * delimiter. For each matching starting delimiter, we increment nesting
     262                 :                  * level, and decrement it for each matching ending delimiter. If we
     263                 :                  * reach the end of the pattern without matching, display a warning.
     264                 :                  */
     265               0 :                 int brackets = 1;       /* brackets nesting level */
     266               0 :                 pp = p;
     267               0 :                 while (*pp != 0) {
     268               0 :                         if (*pp == '\\' && pp[1] != 0) pp++;
     269               0 :                         else if (*pp == end_delimiter && --brackets <= 0)
     270                 :                                 break;
     271               0 :                         else if (*pp == start_delimiter)
     272               0 :                                 brackets++;
     273               0 :                         pp++;
     274                 :                 }
     275               0 :                 if (*pp == 0) {
     276               0 :                         php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", end_delimiter);
     277               0 :                         return NULL;
     278                 :                 }
     279                 :         }
     280                 :         
     281                 :         /* Make a copy of the actual pattern. */
     282             184 :         pattern = estrndup(p, pp-p);
     283                 : 
     284                 :         /* Move on to the options */
     285             184 :         pp++;
     286                 : 
     287                 :         /* Parse through the options, setting appropriate flags.  Display
     288                 :            a warning if we encounter an unknown modifier. */    
     289             548 :         while (*pp != 0) {
     290             180 :                 switch (*pp++) {
     291                 :                         /* Perl compatible options */
     292               1 :                         case 'i':       coptions |= PCRE_CASELESS;              break;
     293               0 :                         case 'm':       coptions |= PCRE_MULTILINE;             break;
     294             179 :                         case 's':       coptions |= PCRE_DOTALL;                break;
     295               0 :                         case 'x':       coptions |= PCRE_EXTENDED;              break;
     296                 :                         
     297                 :                         /* PCRE specific options */
     298               0 :                         case 'A':       coptions |= PCRE_ANCHORED;              break;
     299               0 :                         case 'D':       coptions |= PCRE_DOLLAR_ENDONLY;break;
     300               0 :                         case 'S':       do_study  = 1;                                  break;
     301               0 :                         case 'U':       coptions |= PCRE_UNGREEDY;              break;
     302               0 :                         case 'X':       coptions |= PCRE_EXTRA;                 break;
     303               0 :                         case 'u':       coptions |= PCRE_UTF8;                  break;
     304                 : 
     305                 :                         /* Custom preg options */
     306               0 :                         case 'e':       poptions |= PREG_REPLACE_EVAL;  break;
     307                 :                         
     308                 :                         case ' ':
     309                 :                         case '\n':
     310               0 :                                 break;
     311                 : 
     312                 :                         default:
     313               0 :                                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
     314               0 :                                 efree(pattern);
     315               0 :                                 return NULL;
     316                 :                 }
     317                 :         }
     318                 : 
     319                 : #if HAVE_SETLOCALE
     320             184 :         if (strcmp(locale, "C"))
     321             184 :                 tables = pcre_maketables();
     322                 : #endif
     323                 : 
     324                 :         /* Compile pattern and display a warning if compilation failed. */
     325             184 :         re = pcre_compile(pattern,
     326                 :                                           coptions,
     327                 :                                           &error,
     328                 :                                           &erroffset,
     329                 :                                           tables);
     330                 : 
     331             184 :         if (re == NULL) {
     332               0 :                 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
     333               0 :                 efree(pattern);
     334               0 :                 if (tables) {
     335               0 :                         pefree((void*)tables, 1);
     336                 :                 }
     337               0 :                 return NULL;
     338                 :         }
     339                 : 
     340                 :         /* If study option was specified, study the pattern and
     341                 :            store the result in extra for passing to pcre_exec. */
     342             184 :         if (do_study) {
     343               0 :                 extra = pcre_study(re, soptions, &error);
     344               0 :                 if (extra) {
     345               0 :                         extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     346                 :                 }
     347               0 :                 if (error != NULL) {
     348               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
     349                 :                 }
     350                 :         } else {
     351             184 :                 extra = NULL;
     352                 :         }
     353                 : 
     354             184 :         efree(pattern);
     355                 : 
     356                 :         /*
     357                 :          * If we reached cache limit, clean out the items from the head of the list;
     358                 :          * these are supposedly the oldest ones (but not necessarily the least used
     359                 :          * ones).
     360                 :          */
     361             184 :         if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
     362               0 :                 int num_clean = PCRE_CACHE_SIZE / 8;
     363               0 :                 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
     364                 :         }
     365                 : 
     366                 :         /* Store the compiled pattern and extra info in the cache. */
     367             184 :         new_entry.re = re;
     368             184 :         new_entry.extra = extra;
     369             184 :         new_entry.preg_options = poptions;
     370             184 :         new_entry.compile_options = coptions;
     371                 : #if HAVE_SETLOCALE
     372             184 :         new_entry.locale = pestrdup(locale, 1);
     373             184 :         new_entry.tables = tables;
     374                 : #endif
     375             184 :         zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
     376                 :                                                 sizeof(pcre_cache_entry), (void**)&pce);
     377                 : 
     378             184 :         return pce;
     379                 : }
     380                 : /* }}} */
     381                 : 
     382                 : /* {{{ pcre_get_compiled_regex
     383                 :  */
     384                 : PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
     385               0 : {
     386               0 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
     387                 : 
     388               0 :         if (extra) {
     389               0 :                 *extra = pce ? pce->extra : NULL;
     390                 :         }
     391               0 :         if (preg_options) {
     392               0 :                 *preg_options = pce ? pce->preg_options : 0;
     393                 :         }
     394                 :         
     395               0 :         return pce ? pce->re : NULL;
     396                 : }
     397                 : /* }}} */
     398                 : 
     399                 : /* {{{ pcre_get_compiled_regex_ex
     400                 :  */
     401                 : PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
     402               0 : {
     403               0 :         pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
     404                 :         
     405               0 :         if (extra) {
     406               0 :                 *extra = pce ? pce->extra : NULL;
     407                 :         }
     408               0 :         if (preg_options) {
     409               0 :                 *preg_options = pce ? pce->preg_options : 0;
     410                 :         }
     411               0 :         if (compile_options) {
     412               0 :                 *compile_options = pce ? pce->compile_options : 0;
     413                 :         }
     414                 :         
     415               0 :         return pce ? pce->re : NULL;
     416                 : }
     417                 : /* }}} */
     418                 : 
     419                 : /* {{{ add_offset_pair */
     420                 : static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
     421               0 : {
     422                 :         zval *match_pair;
     423                 : 
     424               0 :         ALLOC_ZVAL(match_pair);
     425               0 :         array_init(match_pair);
     426               0 :         INIT_PZVAL(match_pair);
     427                 : 
     428                 :         /* Add (match, offset) to the return value */
     429               0 :         add_next_index_stringl(match_pair, str, len, 1);
     430               0 :         add_next_index_long(match_pair, offset);
     431                 :         
     432               0 :         if (name) {
     433               0 :                 zval_add_ref(&match_pair);
     434               0 :                 zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
     435                 :         }
     436               0 :         zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
     437               0 : }
     438                 : /* }}} */
     439                 : 
     440                 : static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
     441           21144 : {
     442                 :         /* parameters */
     443                 :         char                     *regex;                        /* Regular expression */
     444                 :         char                     *subject;                      /* String to match against */
     445                 :         int                               regex_len;
     446                 :         int                               subject_len;
     447                 :         pcre_cache_entry *pce;                          /* Compiled regular expression */
     448           21144 :         zval                     *subpats = NULL;       /* Array for subpatterns */
     449           21144 :         long                      flags = 0;            /* Match control flags */
     450           21144 :         long                      start_offset = 0;     /* Where the new search starts */
     451                 : 
     452           21144 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, ((global) ? "ssz|ll" : "ss|zll"), &regex, &regex_len,
     453                 :                                                           &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
     454               0 :                 RETURN_FALSE;
     455                 :         }
     456                 :         
     457                 :         /* Compile regex or get it from cache. */
     458           21144 :         if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
     459               0 :                 RETURN_FALSE;
     460                 :         }
     461                 : 
     462           21144 :         php_pcre_match_impl(pce, subject, subject_len, return_value, subpats, 
     463                 :                 global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
     464                 : }
     465                 : 
     466                 : PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
     467                 :         zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
     468           21144 : {
     469                 :         zval                    *result_set,            /* Holds a set of subpatterns after
     470                 :                                                                                    a global match */
     471           21144 :                                    **match_sets = NULL; /* An array of sets of matches for each
     472                 :                                                                                    subpattern after a global match */
     473           21144 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
     474                 :         pcre_extra               extra_data;            /* Used locally for exec options */
     475           21144 :         int                              exoptions = 0;         /* Execution options */
     476           21144 :         int                              count = 0;                     /* Count of matched subpatterns */
     477                 :         int                             *offsets;                       /* Array of subpattern offsets */
     478                 :         int                              num_subpats;           /* Number of captured subpatterns */
     479                 :         int                              size_offsets;          /* Size of the offsets array */
     480                 :         int                              matched;                       /* Has anything matched */
     481           21144 :         int                              g_notempty = 0;        /* If the match should not be empty */
     482                 :         const char         **stringlist;                /* Holds list of subpatterns */
     483                 :         char                    *match;                         /* The current match */
     484           21144 :         char               **subpat_names = NULL;/* Array for named subpatterns */
     485                 :         int                              i, rc;
     486                 :         int                              subpats_order;         /* Order of subpattern matches */
     487                 :         int                              offset_capture;    /* Capture match offsets: yes/no */
     488                 : 
     489                 :         /* Overwrite the passed-in value for subpatterns with an empty array. */
     490           21144 :         if (subpats != NULL) {
     491            4501 :                 zval_dtor(subpats);
     492            4501 :                 array_init(subpats);
     493                 :         }
     494                 : 
     495           21144 :         subpats_order = global ? PREG_PATTERN_ORDER : 0;
     496                 : 
     497           21144 :         if (use_flags) {
     498               0 :                 offset_capture = flags & PREG_OFFSET_CAPTURE;
     499                 : 
     500                 :                 /*
     501                 :                  * subpats_order is pre-set to pattern mode so we change it only if
     502                 :                  * necessary.
     503                 :                  */
     504               0 :                 if (flags & 0xff) {
     505               0 :                         subpats_order = flags & 0xff;
     506                 :                 }
     507               0 :                 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
     508                 :                         (!global && subpats_order != 0)) {
     509               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
     510               0 :                         return;
     511                 :                 }
     512                 :         } else {
     513           21144 :                 offset_capture = 0;
     514                 :         }
     515                 : 
     516                 :         /* Negative offset counts from the end of the string. */
     517           21144 :         if (start_offset < 0) {
     518               0 :                 start_offset = subject_len + start_offset;
     519               0 :                 if (start_offset < 0) {
     520               0 :                         start_offset = 0;
     521                 :                 }
     522                 :         }
     523                 : 
     524           21144 :         if (extra == NULL) {
     525           21144 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     526           21144 :                 extra = &extra_data;
     527                 :         }
     528           21144 :         extra->match_limit = PCRE_G(backtrack_limit);
     529           21144 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
     530                 : 
     531                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
     532           21144 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
     533           21144 :         if (rc < 0) {
     534               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     535               0 :                 RETURN_FALSE;
     536                 :         }
     537           21144 :         num_subpats++;
     538           21144 :         size_offsets = num_subpats * 3;
     539           21144 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
     540                 : 
     541                 :         /*
     542                 :          * Build a mapping from subpattern numbers to their names. We will always
     543                 :          * allocate the table, even though there may be no named subpatterns. This
     544                 :          * avoids somewhat more complicated logic in the inner loops.
     545                 :          */
     546           21144 :         subpat_names = (char **)safe_emalloc(num_subpats, sizeof(char *), 0);
     547           21144 :         memset(subpat_names, 0, sizeof(char *) * num_subpats);
     548                 :         {
     549           21144 :                 int name_cnt = 0, name_size, ni = 0;
     550                 :                 char *name_table;
     551                 :                 unsigned short name_idx;
     552                 : 
     553           21144 :                 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
     554           21144 :                 if (rc < 0) {
     555               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     556               0 :                         efree(offsets);
     557               0 :                         efree(subpat_names);
     558               0 :                         RETURN_FALSE;
     559                 :                 }
     560           21144 :                 if (name_cnt > 0) {
     561                 :                         int rc1, rc2;
     562                 : 
     563               0 :                         rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
     564               0 :                         rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
     565               0 :                         rc = rc2 ? rc2 : rc1;
     566               0 :                         if (rc < 0) {
     567               0 :                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     568               0 :                                 efree(offsets);
     569               0 :                                 efree(subpat_names);
     570               0 :                                 RETURN_FALSE;
     571                 :                         }
     572                 : 
     573               0 :                         while (ni++ < name_cnt) {
     574               0 :                                 name_idx = 0xff * name_table[0] + name_table[1];
     575               0 :                                 subpat_names[name_idx] = name_table + 2;
     576               0 :                                 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
     577               0 :                                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
     578               0 :                                         efree(offsets);
     579               0 :                                         efree(subpat_names);
     580               0 :                                         RETURN_FALSE;
     581                 :                                 }
     582               0 :                                 name_table += name_size;
     583                 :                         }
     584                 :                 }
     585                 :         }
     586                 : 
     587                 :         /* Allocate match sets array and initialize the values. */
     588           21144 :         if (global && subpats_order == PREG_PATTERN_ORDER) {
     589               0 :                 match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
     590               0 :                 for (i=0; i<num_subpats; i++) {
     591               0 :                         ALLOC_ZVAL(match_sets[i]);
     592               0 :                         array_init(match_sets[i]);
     593               0 :                         INIT_PZVAL(match_sets[i]);
     594                 :                 }
     595                 :         }
     596                 : 
     597           21144 :         match = NULL;
     598           21144 :         matched = 0;
     599           21144 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
     600                 :         
     601                 :         do {
     602                 :                 /* Execute the regular expression. */
     603           21144 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
     604                 :                                                   exoptions|g_notempty, offsets, size_offsets);
     605                 : 
     606                 :                 /* Check for too many substrings condition. */  
     607           21144 :                 if (count == 0) {
     608               0 :                         php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
     609               0 :                         count = size_offsets/3;
     610                 :                 }
     611                 : 
     612                 :                 /* If something has matched */
     613           21144 :                 if (count > 0) {
     614            4784 :                         matched++;
     615            4784 :                         match = subject + offsets[0];
     616                 : 
     617                 :                         /* If subpatterns array has been passed, fill it in with values. */
     618            4784 :                         if (subpats != NULL) {
     619                 :                                 /* Try to get the list of substrings and display a warning if failed. */
     620             366 :                                 if (pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
     621               0 :                                         efree(subpat_names);
     622               0 :                                         efree(offsets);
     623               0 :                                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
     624               0 :                                         RETURN_FALSE;
     625                 :                                 }
     626                 : 
     627             366 :                                 if (global) {   /* global pattern matching */
     628               0 :                                         if (subpats_order == PREG_PATTERN_ORDER) {
     629                 :                                                 /* For each subpattern, insert it into the appropriate array. */
     630               0 :                                                 for (i = 0; i < count; i++) {
     631               0 :                                                         if (offset_capture) {
     632               0 :                                                                 add_offset_pair(match_sets[i], (char *)stringlist[i],
     633                 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
     634                 :                                                         } else {
     635               0 :                                                                 add_next_index_stringl(match_sets[i], (char *)stringlist[i],
     636                 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1], 1);
     637                 :                                                         }
     638                 :                                                 }
     639                 :                                                 /*
     640                 :                                                  * If the number of captured subpatterns on this run is
     641                 :                                                  * less than the total possible number, pad the result
     642                 :                                                  * arrays with empty strings.
     643                 :                                                  */
     644               0 :                                                 if (count < num_subpats) {
     645               0 :                                                         for (; i < num_subpats; i++) {
     646               0 :                                                                 add_next_index_string(match_sets[i], "", 1);
     647                 :                                                         }
     648                 :                                                 }
     649                 :                                         } else {
     650                 :                                                 /* Allocate the result set array */
     651               0 :                                                 ALLOC_ZVAL(result_set);
     652               0 :                                                 array_init(result_set);
     653               0 :                                                 INIT_PZVAL(result_set);
     654                 :                                                 
     655                 :                                                 /* Add all the subpatterns to it */
     656               0 :                                                 for (i = 0; i < count; i++) {
     657               0 :                                                         if (offset_capture) {
     658               0 :                                                                 add_offset_pair(result_set, (char *)stringlist[i],
     659                 :                                                                                                 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
     660                 :                                                         } else {
     661               0 :                                                                 if (subpat_names[i]) {
     662               0 :                                                                         add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
     663                 :                                                                                                                    offsets[(i<<1)+1] - offsets[i<<1], 1);
     664                 :                                                                 }
     665               0 :                                                                 add_next_index_stringl(result_set, (char *)stringlist[i],
     666                 :                                                                                                            offsets[(i<<1)+1] - offsets[i<<1], 1);
     667                 :                                                         }
     668                 :                                                 }
     669                 :                                                 /* And add it to the output array */
     670               0 :                                                 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
     671                 :                                         }
     672                 :                                 } else {                        /* single pattern matching */
     673                 :                                         /* For each subpattern, insert it into the subpatterns array. */
     674            1098 :                                         for (i = 0; i < count; i++) {
     675             732 :                                                 if (offset_capture) {
     676               0 :                                                         add_offset_pair(subpats, (char *)stringlist[i],
     677                 :                                                                                         offsets[(i<<1)+1] - offsets[i<<1],
     678                 :                                                                                         offsets[i<<1], subpat_names[i]);
     679                 :                                                 } else {
     680             732 :                                                         if (subpat_names[i]) {
     681               0 :                                                                 add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
     682                 :                                                                                                   offsets[(i<<1)+1] - offsets[i<<1], 1);
     683                 :                                                         }
     684             732 :                                                         add_next_index_stringl(subpats, (char *)stringlist[i],
     685                 :                                                                                                    offsets[(i<<1)+1] - offsets[i<<1], 1);
     686                 :                                                 }
     687                 :                                         }
     688                 :                                 }
     689                 : 
     690             366 :                                 pcre_free((void *) stringlist);
     691                 :                         }
     692           16360 :                 } else if (count == PCRE_ERROR_NOMATCH) {
     693                 :                         /* If we previously set PCRE_NOTEMPTY after a null match,
     694                 :                            this is not necessarily the end. We need to advance
     695                 :                            the start offset, and continue. Fudge the offset values
     696                 :                            to achieve this, unless we're already at the end of the string. */
     697           16360 :                         if (g_notempty != 0 && start_offset < subject_len) {
     698               0 :                                 offsets[0] = start_offset;
     699               0 :                                 offsets[1] = start_offset + 1;
     700                 :                         } else
     701                 :                                 break;
     702                 :                 } else {
     703               0 :                         pcre_handle_exec_error(count TSRMLS_CC);
     704               0 :                         break;
     705                 :                 }
     706                 :                 
     707                 :                 /* If we have matched an empty string, mimic what Perl's /g options does.
     708                 :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
     709                 :                    the match again at the same point. If this fails (picked up above) we
     710                 :                    advance to the next character. */
     711            4784 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
     712                 :                 
     713                 :                 /* Advance to the position right after the last full match */
     714            4784 :                 start_offset = offsets[1];
     715            4784 :         } while (global);
     716                 : 
     717                 :         /* Add the match sets to the output array and clean up */
     718           21144 :         if (global && subpats_order == PREG_PATTERN_ORDER) {
     719               0 :                 for (i = 0; i < num_subpats; i++) {
     720               0 :                         if (subpat_names[i]) {
     721               0 :                                 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
     722                 :                                                                  strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
     723               0 :                                 ZVAL_ADDREF(match_sets[i]);
     724                 :                         }
     725               0 :                         zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
     726                 :                 }
     727               0 :                 efree(match_sets);
     728                 :         }
     729                 :         
     730           21144 :         efree(offsets);
     731           21144 :         efree(subpat_names);
     732                 : 
     733           21144 :         RETVAL_LONG(matched);
     734                 : }
     735                 : /* }}} */
     736                 : 
     737                 : /* {{{ proto int preg_match(string pattern, string subject [, array subpatterns [, int flags [, int offset]]])
     738                 :    Perform a Perl-style regular expression match */
     739                 : PHP_FUNCTION(preg_match)
     740           21144 : {
     741           21144 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
     742           21144 : }
     743                 : /* }}} */
     744                 : 
     745                 : /* {{{ proto int preg_match_all(string pattern, string subject, array subpatterns [, int flags [, int offset]])
     746                 :    Perform a Perl-style global regular expression match */
     747                 : PHP_FUNCTION(preg_match_all)
     748               0 : {
     749               0 :         php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
     750               0 : }
     751                 : /* }}} */
     752                 : 
     753                 : /* {{{ preg_get_backref
     754                 :  */
     755                 : static int preg_get_backref(char **str, int *backref)
     756               0 : {
     757               0 :         register char in_brace = 0;
     758               0 :         register char *walk = *str;
     759                 : 
     760               0 :         if (walk[1] == 0)
     761               0 :                 return 0;
     762                 : 
     763               0 :         if (*walk == '$' && walk[1] == '{') {
     764               0 :                 in_brace = 1;
     765               0 :                 walk++;
     766                 :         }
     767               0 :         walk++;
     768                 : 
     769               0 :         if (*walk >= '0' && *walk <= '9') {
     770               0 :                 *backref = *walk - '0';
     771               0 :                 walk++;
     772                 :         } else
     773               0 :                 return 0;
     774                 :         
     775               0 :         if (*walk && *walk >= '0' && *walk <= '9') {
     776               0 :                 *backref = *backref * 10 + *walk - '0';
     777               0 :                 walk++;
     778                 :         }
     779                 : 
     780               0 :         if (in_brace) {
     781               0 :                 if (*walk == 0 || *walk != '}')
     782               0 :                         return 0;
     783                 :                 else
     784               0 :                         walk++;
     785                 :         }
     786                 :         
     787               0 :         *str = walk;
     788               0 :         return 1;       
     789                 : }
     790                 : /* }}} */
     791                 : 
     792                 : /* {{{ preg_do_repl_func
     793                 :  */
     794                 : static int preg_do_repl_func(zval *function, char *subject, int *offsets, int count, char **result TSRMLS_DC)
     795               0 : {
     796                 :         zval            *retval_ptr;            /* Function return value */
     797                 :         zval       **args[1];                   /* Argument to pass to function */
     798                 :         zval            *subpats;                       /* Captured subpatterns */ 
     799                 :         int                      result_len;            /* Return value length */
     800                 :         int                      i;
     801                 : 
     802               0 :         MAKE_STD_ZVAL(subpats);
     803               0 :         array_init(subpats);
     804               0 :         for (i = 0; i < count; i++)
     805               0 :                 add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
     806               0 :         args[0] = &subpats;
     807                 : 
     808               0 :         if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
     809               0 :                 convert_to_string_ex(&retval_ptr);
     810               0 :                 *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
     811               0 :                 result_len = Z_STRLEN_P(retval_ptr);
     812               0 :                 zval_ptr_dtor(&retval_ptr);
     813                 :         } else {
     814               0 :                 if (!EG(exception)) {
     815               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
     816                 :                 }
     817               0 :                 result_len = offsets[1] - offsets[0];
     818               0 :                 *result = estrndup(&subject[offsets[0]], result_len);
     819                 :         }
     820               0 :         zval_dtor(subpats);
     821               0 :         FREE_ZVAL(subpats);
     822                 : 
     823               0 :         return result_len;
     824                 : }
     825                 : /* }}} */
     826                 : 
     827                 : /* {{{ preg_do_eval
     828                 :  */
     829                 : static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
     830                 :                                                 int *offsets, int count, char **result TSRMLS_DC)
     831               0 : {
     832                 :         zval             retval;                        /* Return value from evaluation */
     833                 :         char            *eval_str_end,          /* End of eval string */
     834                 :                                 *match,                         /* Current match for a backref */
     835                 :                                 *esc_match,                     /* Quote-escaped match */
     836                 :                                 *walk,                          /* Used to walk the code string */
     837                 :                                 *segment,                       /* Start of segment to append while walking */
     838                 :                                  walk_last;                     /* Last walked character */
     839                 :         int                      match_len;                     /* Length of the match */
     840                 :         int                      esc_match_len;         /* Length of the quote-escaped match */
     841                 :         int                      result_len;            /* Length of the result of the evaluation */
     842                 :         int                      backref;                       /* Current backref */
     843                 :         char        *compiled_string_description;
     844               0 :         smart_str    code = {0};
     845                 :         
     846               0 :         eval_str_end = eval_str + eval_str_len;
     847               0 :         walk = segment = eval_str;
     848               0 :         walk_last = 0;
     849                 :         
     850               0 :         while (walk < eval_str_end) {
     851                 :                 /* If found a backreference.. */
     852               0 :                 if ('\\' == *walk || '$' == *walk) {
     853               0 :                         smart_str_appendl(&code, segment, walk - segment);
     854               0 :                         if (walk_last == '\\') {
     855               0 :                                 code.c[code.len-1] = *walk++;
     856               0 :                                 segment = walk;
     857               0 :                                 walk_last = 0;
     858               0 :                                 continue;
     859                 :                         }
     860               0 :                         segment = walk;
     861               0 :                         if (preg_get_backref(&walk, &backref)) {
     862               0 :                                 if (backref < count) {
     863                 :                                         /* Find the corresponding string match and substitute it
     864                 :                                            in instead of the backref */
     865               0 :                                         match = subject + offsets[backref<<1];
     866               0 :                                         match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
     867               0 :                                         if (match_len) {
     868               0 :                                                 esc_match = php_addslashes_ex(match, match_len, &esc_match_len, 0, 1 TSRMLS_CC);
     869                 :                                         } else {
     870               0 :                                                 esc_match = match;
     871               0 :                                                 esc_match_len = 0;
     872                 :                                         }
     873                 :                                 } else {
     874               0 :                                         esc_match = "";
     875               0 :                                         esc_match_len = 0;
     876               0 :                                         match_len = 0;
     877                 :                                 }
     878               0 :                                 smart_str_appendl(&code, esc_match, esc_match_len);
     879                 : 
     880               0 :                                 segment = walk;
     881                 : 
     882                 :                                 /* Clean up and reassign */
     883               0 :                                 if (esc_match_len)
     884               0 :                                         efree(esc_match);
     885               0 :                                 continue;
     886                 :                         }
     887                 :                 }
     888               0 :                 walk++;
     889               0 :                 walk_last = walk[-1];
     890                 :         }
     891               0 :         smart_str_appendl(&code, segment, walk - segment);
     892               0 :         smart_str_0(&code);
     893                 : 
     894               0 :         compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
     895                 :         /* Run the code */
     896               0 :         if (zend_eval_string(code.c, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
     897               0 :                 efree(compiled_string_description);
     898               0 :                 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
     899                 :                 /* zend_error() does not return in this case */
     900                 :         }
     901               0 :         efree(compiled_string_description);
     902               0 :         convert_to_string(&retval);
     903                 :         
     904                 :         /* Save the return value and its length */
     905               0 :         *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
     906               0 :         result_len = Z_STRLEN(retval);
     907                 :         
     908                 :         /* Clean up */
     909               0 :         zval_dtor(&retval);
     910               0 :         smart_str_free(&code);
     911                 :         
     912               0 :         return result_len;
     913                 : }
     914                 : /* }}} */
     915                 : 
     916                 : /* {{{ php_pcre_replace
     917                 :  */
     918                 : PHPAPI char *php_pcre_replace(char *regex,   int regex_len,
     919                 :                                                           char *subject, int subject_len,
     920                 :                                                           zval *replace_val, int is_callable_replace,
     921                 :                                                           int *result_len, int limit, int *replace_count TSRMLS_DC)
     922             108 : {
     923                 :         pcre_cache_entry        *pce;                       /* Compiled regular expression */
     924                 : 
     925                 :         /* Compile regex or get it from cache. */
     926             108 :         if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
     927               0 :                 return NULL;
     928                 :         }
     929                 : 
     930             108 :         return php_pcre_replace_impl(pce, subject, subject_len, replace_val, 
     931                 :                 is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
     932                 : }
     933                 : 
     934                 : PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val, 
     935                 :         int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
     936             108 : {
     937             108 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
     938                 :         pcre_extra               extra_data;            /* Used locally for exec options */
     939             108 :         int                              exoptions = 0;         /* Execution options */
     940             108 :         int                              count = 0;                     /* Count of matched subpatterns */
     941                 :         int                             *offsets;                       /* Array of subpattern offsets */
     942                 :         int                              size_offsets;          /* Size of the offsets array */
     943                 :         int                              new_len;                       /* Length of needed storage */
     944                 :         int                              alloc_len;                     /* Actual allocated length */
     945             108 :         int                              eval_result_len=0;     /* Length of the eval'ed or
     946                 :                                                                                    function-returned string */
     947                 :         int                              match_len;                     /* Length of the current match */
     948                 :         int                              backref;                       /* Backreference number */
     949                 :         int                              eval;                          /* If the replacement string should be eval'ed */
     950                 :         int                              start_offset;          /* Where the new search starts */
     951             108 :         int                              g_notempty=0;          /* If the match should not be empty */
     952             108 :         int                              replace_len=0;         /* Length of replacement string */
     953                 :         char                    *result,                        /* Result of replacement */
     954             108 :                                         *replace=NULL,          /* Replacement string */
     955                 :                                         *new_buf,                       /* Temporary buffer for re-allocation */
     956                 :                                         *walkbuf,                       /* Location of current replacement in the result */
     957                 :                                         *walk,                          /* Used to walk the replacement string */
     958                 :                                         *match,                         /* The current match */
     959                 :                                         *piece,                         /* The current piece of subject */
     960             108 :                                         *replace_end=NULL,      /* End of replacement string */
     961                 :                                         *eval_result,           /* Result of eval or custom function */
     962                 :                                          walk_last;                     /* Last walked character */
     963                 :         int                              rc;
     964                 : 
     965             108 :         if (extra == NULL) {
     966             108 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
     967             108 :                 extra = &extra_data;
     968                 :         }
     969             108 :         extra->match_limit = PCRE_G(backtrack_limit);
     970             108 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
     971                 : 
     972             108 :         eval = pce->preg_options & PREG_REPLACE_EVAL;
     973             108 :         if (is_callable_replace) {
     974               0 :                 if (eval) {
     975               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
     976               0 :                         return NULL;
     977                 :                 }
     978                 :         } else {
     979             108 :                 replace = Z_STRVAL_P(replace_val);
     980             108 :                 replace_len = Z_STRLEN_P(replace_val);
     981             108 :                 replace_end = replace + replace_len;
     982                 :         }
     983                 : 
     984                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
     985             108 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
     986             108 :         if (rc < 0) {
     987               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
     988               0 :                 return NULL;
     989                 :         }
     990             108 :         size_offsets = (size_offsets + 1) * 3;
     991             108 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
     992                 :         
     993             108 :         alloc_len = 2 * subject_len + 1;
     994             108 :         result = safe_emalloc(alloc_len, sizeof(char), 0);
     995                 : 
     996                 :         /* Initialize */
     997             108 :         match = NULL;
     998             108 :         *result_len = 0;
     999             108 :         start_offset = 0;
    1000             108 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1001                 :         
    1002                 :         while (1) {
    1003                 :                 /* Execute the regular expression. */
    1004             347 :                 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
    1005                 :                                                   exoptions|g_notempty, offsets, size_offsets);
    1006                 :                 
    1007                 :                 /* Check for too many substrings condition. */
    1008             347 :                 if (count == 0) {
    1009               0 :                         php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
    1010               0 :                         count = size_offsets/3;
    1011                 :                 }
    1012                 : 
    1013             347 :                 piece = subject + start_offset;
    1014                 : 
    1015             586 :                 if (count > 0 && (limit == -1 || limit > 0)) {
    1016             239 :                         if (replace_count) {
    1017               0 :                                 ++*replace_count;
    1018                 :                         }
    1019                 :                         /* Set the match location in subject */
    1020             239 :                         match = subject + offsets[0];
    1021                 : 
    1022             239 :                         new_len = *result_len + offsets[0] - start_offset; /* part before the match */
    1023                 :                         
    1024                 :                         /* If evaluating, do it and add the return string's length */
    1025             239 :                         if (eval) {
    1026               0 :                                 eval_result_len = preg_do_eval(replace, replace_len, subject,
    1027                 :                                                                                            offsets, count, &eval_result TSRMLS_CC);
    1028               0 :                                 new_len += eval_result_len;
    1029             239 :                         } else if (is_callable_replace) {
    1030                 :                                 /* Use custom function to get replacement string and its length. */
    1031               0 :                                 eval_result_len = preg_do_repl_func(replace_val, subject, offsets,
    1032                 :                                                                                                         count, &eval_result TSRMLS_CC);
    1033               0 :                                 new_len += eval_result_len;
    1034                 :                         } else { /* do regular substitution */
    1035             239 :                                 walk = replace;
    1036             239 :                                 walk_last = 0;
    1037             717 :                                 while (walk < replace_end) {
    1038             239 :                                         if ('\\' == *walk || '$' == *walk) {
    1039               0 :                                                 if (walk_last == '\\') {
    1040               0 :                                                         walk++;
    1041               0 :                                                         walk_last = 0;
    1042               0 :                                                         continue;
    1043                 :                                                 }
    1044               0 :                                                 if (preg_get_backref(&walk, &backref)) {
    1045               0 :                                                         if (backref < count)
    1046               0 :                                                                 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
    1047               0 :                                                         continue;
    1048                 :                                                 }
    1049                 :                                         }
    1050             239 :                                         new_len++;
    1051             239 :                                         walk++;
    1052             239 :                                         walk_last = walk[-1];
    1053                 :                                 }
    1054                 :                         }
    1055                 : 
    1056             239 :                         if (new_len + 1 > alloc_len) {
    1057               0 :                                 alloc_len = 1 + alloc_len + 2 * new_len;
    1058               0 :                                 new_buf = emalloc(alloc_len);
    1059               0 :                                 memcpy(new_buf, result, *result_len);
    1060               0 :                                 efree(result);
    1061               0 :                                 result = new_buf;
    1062                 :                         }
    1063                 :                         /* copy the part of the string before the match */
    1064             239 :                         memcpy(&result[*result_len], piece, match-piece);
    1065             239 :                         *result_len += match-piece;
    1066                 : 
    1067                 :                         /* copy replacement and backrefs */
    1068             239 :                         walkbuf = result + *result_len;
    1069                 :                         
    1070                 :                         /* If evaluating or using custom function, copy result to the buffer
    1071                 :                          * and clean up. */
    1072             239 :                         if (eval || is_callable_replace) {
    1073               0 :                                 memcpy(walkbuf, eval_result, eval_result_len);
    1074               0 :                                 *result_len += eval_result_len;
    1075               0 :                                 STR_FREE(eval_result);
    1076                 :                         } else { /* do regular backreference copying */
    1077             239 :                                 walk = replace;
    1078             239 :                                 walk_last = 0;
    1079             717 :                                 while (walk < replace_end) {
    1080             239 :                                         if ('\\' == *walk || '$' == *walk) {
    1081               0 :                                                 if (walk_last == '\\') {
    1082               0 :                                                         *(walkbuf-1) = *walk++;
    1083               0 :                                                         walk_last = 0;
    1084               0 :                                                         continue;
    1085                 :                                                 }
    1086               0 :                                                 if (preg_get_backref(&walk, &backref)) {
    1087               0 :                                                         if (backref < count) {
    1088               0 :                                                                 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
    1089               0 :                                                                 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
    1090               0 :                                                                 walkbuf += match_len;
    1091                 :                                                         }
    1092               0 :                                                         continue;
    1093                 :                                                 }
    1094                 :                                         }
    1095             239 :                                         *walkbuf++ = *walk++;
    1096             239 :                                         walk_last = walk[-1];
    1097                 :                                 }
    1098             239 :                                 *walkbuf = '\0';
    1099                 :                                 /* increment the result length by how much we've added to the string */
    1100             239 :                                 *result_len += walkbuf - (result + *result_len);
    1101                 :                         }
    1102                 : 
    1103             239 :                         if (limit != -1)
    1104               0 :                                 limit--;
    1105                 : 
    1106             108 :                 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
    1107                 :                         /* If we previously set PCRE_NOTEMPTY after a null match,
    1108                 :                            this is not necessarily the end. We need to advance
    1109                 :                            the start offset, and continue. Fudge the offset values
    1110                 :                            to achieve this, unless we're already at the end of the string. */
    1111             108 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1112               0 :                                 offsets[0] = start_offset;
    1113               0 :                                 offsets[1] = start_offset + 1;
    1114               0 :                                 memcpy(&result[*result_len], piece, 1);
    1115               0 :                                 (*result_len)++;
    1116                 :                         } else {
    1117             108 :                                 new_len = *result_len + subject_len - start_offset;
    1118             108 :                                 if (new_len + 1 > alloc_len) {
    1119               0 :                                         alloc_len = new_len + 1; /* now we know exactly how long it is */
    1120               0 :                                         new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
    1121               0 :                                         memcpy(new_buf, result, *result_len);
    1122               0 :                                         efree(result);
    1123               0 :                                         result = new_buf;
    1124                 :                                 }
    1125                 :                                 /* stick that last bit of string on our output */
    1126             108 :                                 memcpy(&result[*result_len], piece, subject_len - start_offset);
    1127             108 :                                 *result_len += subject_len - start_offset;
    1128             108 :                                 result[*result_len] = '\0';
    1129             108 :                                 break;
    1130                 :                         }
    1131                 :                 } else {
    1132               0 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1133               0 :                         efree(result);
    1134               0 :                         result = NULL;
    1135               0 :                         break;
    1136                 :                 }
    1137                 :                         
    1138                 :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1139                 :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
    1140                 :                    the match again at the same point. If this fails (picked up above) we
    1141                 :                    advance to the next character. */
    1142             239 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
    1143                 :                 
    1144                 :                 /* Advance to the next piece. */
    1145             239 :                 start_offset = offsets[1];
    1146             239 :         }
    1147                 :         
    1148             108 :         efree(offsets);
    1149                 : 
    1150             108 :         return result;
    1151                 : }
    1152                 : /* }}} */
    1153                 : 
    1154                 : /* {{{ php_replace_in_subject
    1155                 :  */
    1156                 : static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, zend_bool is_callable_replace, int *replace_count TSRMLS_DC)
    1157             108 : {
    1158                 :         zval            **regex_entry,
    1159             108 :                                 **replace_entry = NULL,
    1160                 :                                  *replace_value,
    1161                 :                                   empty_replace;
    1162                 :         char            *subject_value,
    1163                 :                                 *result;
    1164                 :         int                      subject_len;
    1165                 : 
    1166                 :         /* Make sure we're dealing with strings. */     
    1167             108 :         convert_to_string_ex(subject);
    1168                 :         /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
    1169             108 :         ZVAL_STRINGL(&empty_replace, "", 0, 0);
    1170                 :         
    1171                 :         /* If regex is an array */
    1172             108 :         if (Z_TYPE_P(regex) == IS_ARRAY) {
    1173                 :                 /* Duplicate subject string for repeated replacement */
    1174               0 :                 subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
    1175               0 :                 subject_len = Z_STRLEN_PP(subject);
    1176               0 :                 *result_len = subject_len;
    1177                 :                 
    1178               0 :                 zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
    1179                 : 
    1180               0 :                 replace_value = replace;
    1181               0 :                 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
    1182               0 :                         zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
    1183                 : 
    1184                 :                 /* For each entry in the regex array, get the entry */
    1185               0 :                 while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)&regex_entry) == SUCCESS) {
    1186                 :                         /* Make sure we're dealing with strings. */     
    1187               0 :                         convert_to_string_ex(regex_entry);
    1188                 :                 
    1189                 :                         /* If replace is an array and not a callable construct */
    1190               0 :                         if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
    1191                 :                                 /* Get current entry */
    1192               0 :                                 if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
    1193               0 :                                         if (!is_callable_replace) {
    1194               0 :                                                 convert_to_string_ex(replace_entry);
    1195                 :                                         }
    1196               0 :                                         replace_value = *replace_entry;
    1197               0 :                                         zend_hash_move_forward(Z_ARRVAL_P(replace));
    1198                 :                                 } else {
    1199                 :                                         /* We've run out of replacement strings, so use an empty one */
    1200               0 :                                         replace_value = &empty_replace;
    1201                 :                                 }
    1202                 :                         }
    1203                 :                         
    1204                 :                         /* Do the actual replacement and put the result back into subject_value
    1205                 :                            for further replacements. */
    1206               0 :                         if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
    1207                 :                                                                                    Z_STRLEN_PP(regex_entry),
    1208                 :                                                                                    subject_value,
    1209                 :                                                                                    subject_len,
    1210                 :                                                                                    replace_value,
    1211                 :                                                                                    is_callable_replace,
    1212                 :                                                                                    result_len,
    1213                 :                                                                                    limit,
    1214                 :                                                                                    replace_count TSRMLS_CC)) != NULL) {
    1215               0 :                                 efree(subject_value);
    1216               0 :                                 subject_value = result;
    1217               0 :                                 subject_len = *result_len;
    1218                 :                         }
    1219                 :                         
    1220               0 :                         zend_hash_move_forward(Z_ARRVAL_P(regex));
    1221                 :                 }
    1222                 : 
    1223               0 :                 return subject_value;
    1224                 :         } else {
    1225             108 :                 result = php_pcre_replace(Z_STRVAL_P(regex),
    1226                 :                                                                   Z_STRLEN_P(regex),
    1227                 :                                                                   Z_STRVAL_PP(subject),
    1228                 :                                                                   Z_STRLEN_PP(subject),
    1229                 :                                                                   replace,
    1230                 :                                                                   is_callable_replace,
    1231                 :                                                                   result_len,
    1232                 :                                                                   limit,
    1233                 :                                                                   replace_count TSRMLS_CC);
    1234             108 :                 return result;
    1235                 :         }
    1236                 : }
    1237                 : /* }}} */
    1238                 : 
    1239                 : /* {{{ preg_replace_impl
    1240                 :  */
    1241                 : static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, zend_bool is_callable_replace)
    1242             108 : {
    1243                 :         zval               **regex,
    1244                 :                                    **replace,
    1245                 :                                    **subject,
    1246                 :                                    **limit,
    1247                 :                                    **subject_entry,
    1248                 :                                    **zcount;
    1249                 :         char                    *result;
    1250                 :         int                              result_len;
    1251             108 :         int                              limit_val = -1;
    1252                 :         char                    *string_key;
    1253                 :         ulong                    num_key;
    1254                 :         char                    *callback_name;
    1255             108 :         int                              replace_count=0;
    1256             108 :         int                             *replace_count_ptr=NULL; 
    1257                 :         
    1258                 :         /* Get function parameters and do error-checking. */
    1259             108 :         if (ZEND_NUM_ARGS() < 3 || ZEND_NUM_ARGS() > 5 ||
    1260                 :                 zend_get_parameters_ex(ZEND_NUM_ARGS(), &regex, &replace, &subject, &limit, &zcount) == FAILURE) {
    1261               0 :                 WRONG_PARAM_COUNT;
    1262                 :         }
    1263             108 :         if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
    1264               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
    1265               0 :                 RETURN_FALSE;
    1266                 :         }
    1267                 : 
    1268             108 :         SEPARATE_ZVAL(replace);
    1269             108 :         if (Z_TYPE_PP(replace) != IS_ARRAY)
    1270             108 :                 convert_to_string_ex(replace);
    1271             108 :         if (is_callable_replace) {
    1272               0 :                 if (!zend_is_callable(*replace, 0, &callback_name)) {
    1273               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
    1274               0 :                         efree(callback_name);
    1275               0 :                         *return_value = **subject;
    1276               0 :                         zval_copy_ctor(return_value);
    1277               0 :                         INIT_PZVAL(return_value);
    1278               0 :                         return;
    1279                 :                 }
    1280               0 :                 efree(callback_name);
    1281                 :         }
    1282                 : 
    1283             108 :         SEPARATE_ZVAL(regex);
    1284             108 :         SEPARATE_ZVAL(subject);
    1285                 : 
    1286             108 :         if (ZEND_NUM_ARGS() > 3) {
    1287               0 :                 convert_to_long_ex(limit);
    1288               0 :                 limit_val = Z_LVAL_PP(limit);
    1289                 :         }
    1290             108 :         if (ZEND_NUM_ARGS() > 4) {
    1291               0 :                 replace_count_ptr =& replace_count;
    1292                 :         }
    1293                 :                 
    1294             108 :         if (Z_TYPE_PP(regex) != IS_ARRAY)
    1295             108 :                 convert_to_string_ex(regex);
    1296                 :         
    1297                 :         /* if subject is an array */
    1298             108 :         if (Z_TYPE_PP(subject) == IS_ARRAY) {
    1299               0 :                 array_init(return_value);
    1300               0 :                 zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
    1301                 : 
    1302                 :                 /* For each subject entry, convert it to string, then perform replacement
    1303                 :                    and add the result to the return_value array. */
    1304               0 :                 while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
    1305               0 :                         SEPARATE_ZVAL(subject_entry);
    1306               0 :                         if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, replace_count_ptr TSRMLS_CC)) != NULL) {
    1307                 :                                 /* Add to return array */
    1308               0 :                                 switch(zend_hash_get_current_key(Z_ARRVAL_PP(subject), &string_key, &num_key, 0))
    1309                 :                                 {
    1310                 :                                         case HASH_KEY_IS_STRING:
    1311               0 :                                                 add_assoc_stringl(return_value, string_key, result, result_len, 0);
    1312               0 :                                                 break;
    1313                 : 
    1314                 :                                         case HASH_KEY_IS_LONG:
    1315               0 :                                                 add_index_stringl(return_value, num_key, result, result_len, 0);
    1316                 :                                                 break;
    1317                 :                                 }
    1318                 :                         }
    1319                 :                 
    1320               0 :                         zend_hash_move_forward(Z_ARRVAL_PP(subject));
    1321                 :                 }
    1322                 :         } else {        /* if subject is not an array */
    1323             108 :                 if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, replace_count_ptr TSRMLS_CC)) != NULL) {
    1324             108 :                         RETVAL_STRINGL(result, result_len, 0);
    1325                 :                 }
    1326                 :         }
    1327             108 :         if (replace_count_ptr) {
    1328               0 :                 zval_dtor(*zcount);
    1329               0 :                 ZVAL_LONG(*zcount, replace_count);
    1330                 :         }
    1331                 :         
    1332                 : }
    1333                 : /* }}} */
    1334                 : 
    1335                 : /* {{{ proto string preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, count]])
    1336                 :    Perform Perl-style regular expression replacement. */
    1337                 : PHP_FUNCTION(preg_replace)
    1338             108 : {
    1339             108 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
    1340             108 : }
    1341                 : /* }}} */
    1342                 : 
    1343                 : /* {{{ proto string preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, count]])
    1344                 :    Perform Perl-style regular expression replacement using replacement callback. */
    1345                 : PHP_FUNCTION(preg_replace_callback)
    1346               0 : {
    1347               0 :         preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
    1348               0 : }
    1349                 : /* }}} */
    1350                 : 
    1351                 : /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]]) 
    1352                 :    Split string into an array using a perl-style regular expression as a delimiter */
    1353                 : PHP_FUNCTION(preg_split)
    1354               1 : {
    1355                 :         char                            *regex;                 /* Regular expression */
    1356                 :         char                            *subject;               /* String to match against */
    1357                 :         int                                      regex_len;
    1358                 :         int                                      subject_len;
    1359               1 :         long                             limit_val = -1;/* Integer value of limit */
    1360               1 :         long                             flags = 0;             /* Match control flags */
    1361                 :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1362                 : 
    1363                 :         /* Get function parameters and do error checking */     
    1364               1 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", &regex, &regex_len,
    1365                 :                                                           &subject, &subject_len, &limit_val, &flags) == FAILURE) {
    1366               0 :                 RETURN_FALSE;
    1367                 :         }
    1368                 :         
    1369                 :         /* Compile regex or get it from cache. */
    1370               1 :         if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
    1371               0 :                 RETURN_FALSE;
    1372                 :         }
    1373                 : 
    1374               1 :         php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
    1375                 : }
    1376                 : 
    1377                 : /* {{{ php_pcre_split
    1378                 :  */
    1379                 : PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
    1380                 :         long limit_val, long flags TSRMLS_DC)
    1381               1 : {
    1382               1 :         pcre_extra              *extra = NULL;          /* Holds results of studying */
    1383               1 :         pcre                    *re_bump = NULL;        /* Regex instance for empty matches */
    1384               1 :         pcre_extra              *extra_bump = NULL;     /* Almost dummy */
    1385                 :         pcre_extra               extra_data;            /* Used locally for exec options */
    1386                 :         int                             *offsets;                       /* Array of subpattern offsets */
    1387                 :         int                              size_offsets;          /* Size of the offsets array */
    1388               1 :         int                              exoptions = 0;         /* Execution options */
    1389               1 :         int                              count = 0;                     /* Count of matched subpatterns */
    1390                 :         int                              start_offset;          /* Where the new search starts */
    1391                 :         int                              next_offset;           /* End of the last delimiter match + 1 */
    1392               1 :         int                              g_notempty = 0;        /* If the match should not be empty */
    1393                 :         char                    *match,                         /* The current match */
    1394                 :                                         *last_match;            /* Location of last match */
    1395                 :         int                              rc;
    1396                 :         int                              no_empty;                      /* If NO_EMPTY flag is set */
    1397                 :         int                              delim_capture;         /* If delimiters should be captured */
    1398                 :         int                              offset_capture;        /* If offsets should be captured */
    1399                 : 
    1400               1 :         no_empty = flags & PREG_SPLIT_NO_EMPTY;
    1401               1 :         delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
    1402               1 :         offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
    1403                 :         
    1404               1 :         if (limit_val == 0) {
    1405               0 :                 limit_val = -1;
    1406                 :         }
    1407                 : 
    1408               1 :         if (extra == NULL) {
    1409               1 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1410               1 :                 extra = &extra_data;
    1411                 :         }
    1412               1 :         extra->match_limit = PCRE_G(backtrack_limit);
    1413               1 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1414                 :         
    1415                 :         /* Initialize return value */
    1416               1 :         array_init(return_value);
    1417                 : 
    1418                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1419               1 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
    1420               1 :         if (rc < 0) {
    1421               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
    1422               0 :                 RETURN_FALSE;
    1423                 :         }
    1424               1 :         size_offsets = (size_offsets + 1) * 3;
    1425               1 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1426                 :         
    1427                 :         /* Start at the beginning of the string */
    1428               1 :         start_offset = 0;
    1429               1 :         next_offset = 0;
    1430               1 :         last_match = subject;
    1431               1 :         match = NULL;
    1432               1 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1433                 :         
    1434                 :         /* Get next piece if no limit or limit not yet reached and something matched*/
    1435               4 :         while ((limit_val == -1 || limit_val > 1)) {
    1436               3 :                 count = pcre_exec(pce->re, extra, subject,
    1437                 :                                                   subject_len, start_offset,
    1438                 :                                                   exoptions|g_notempty, offsets, size_offsets);
    1439                 : 
    1440                 :                 /* Check for too many substrings condition. */
    1441               3 :                 if (count == 0) {
    1442               0 :                         php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
    1443               0 :                         count = size_offsets/3;
    1444                 :                 }
    1445                 :                                 
    1446                 :                 /* If something matched */
    1447               3 :                 if (count > 0) {
    1448               2 :                         match = subject + offsets[0];
    1449                 : 
    1450               2 :                         if (!no_empty || &subject[offsets[0]] != last_match) {
    1451                 : 
    1452               2 :                                 if (offset_capture) {
    1453                 :                                         /* Add (match, offset) pair to the return value */
    1454               0 :                                         add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
    1455                 :                                 } else {
    1456                 :                                         /* Add the piece to the return value */
    1457               2 :                                         add_next_index_stringl(return_value, last_match,
    1458                 :                                                                            &subject[offsets[0]]-last_match, 1);
    1459                 :                                 }
    1460                 : 
    1461                 :                                 /* One less left to do */
    1462               2 :                                 if (limit_val != -1)
    1463               0 :                                         limit_val--;
    1464                 :                         }
    1465                 :                         
    1466               2 :                         last_match = &subject[offsets[1]];
    1467               2 :                         next_offset = offsets[1];
    1468                 : 
    1469               2 :                         if (delim_capture) {
    1470                 :                                 int i, match_len;
    1471               0 :                                 for (i = 1; i < count; i++) {
    1472               0 :                                         match_len = offsets[(i<<1)+1] - offsets[i<<1];
    1473                 :                                         /* If we have matched a delimiter */
    1474               0 :                                         if (!no_empty || match_len > 0) {
    1475               0 :                                                 if (offset_capture) {
    1476               0 :                                                         add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
    1477                 :                                                 } else {
    1478               0 :                                                         add_next_index_stringl(return_value,
    1479                 :                                                                                                    &subject[offsets[i<<1]],
    1480                 :                                                                                                    match_len, 1);
    1481                 :                                                 }
    1482                 :                                         }
    1483                 :                                 }
    1484                 :                         }
    1485               1 :                 } else if (count == PCRE_ERROR_NOMATCH) {
    1486                 :                         /* If we previously set PCRE_NOTEMPTY after a null match,
    1487                 :                            this is not necessarily the end. We need to advance
    1488                 :                            the start offset, and continue. Fudge the offset values
    1489                 :                            to achieve this, unless we're already at the end of the string. */
    1490               1 :                         if (g_notempty != 0 && start_offset < subject_len) {
    1491               0 :                                 if (pce->compile_options & PCRE_UTF8) {
    1492               0 :                                         if (re_bump == NULL) {
    1493                 :                                                 int dummy;
    1494                 : 
    1495               0 :                                                 if ((re_bump = pcre_get_compiled_regex("/./u", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
    1496               0 :                                                         RETURN_FALSE;
    1497                 :                                                 }
    1498                 :                                         }
    1499               0 :                                         count = pcre_exec(re_bump, extra_bump, subject,
    1500                 :                                                           subject_len, start_offset,
    1501                 :                                                           exoptions, offsets, size_offsets);
    1502               0 :                                         if (count < 1) {
    1503               0 :                                                 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Unknown error");
    1504               0 :                                                 offsets[0] = start_offset;
    1505               0 :                                                 offsets[1] = start_offset + 1;
    1506                 :                                         }
    1507                 :                                 } else {
    1508               0 :                                         offsets[0] = start_offset;
    1509               0 :                                         offsets[1] = start_offset + 1;
    1510                 :                                 }
    1511                 :                         } else
    1512                 :                                 break;
    1513                 :                 } else {
    1514               0 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1515               0 :                         break;
    1516                 :                 }
    1517                 : 
    1518                 :                 /* If we have matched an empty string, mimic what Perl's /g options does.
    1519                 :                    This turns out to be rather cunning. First we set PCRE_NOTEMPTY and try
    1520                 :                    the match again at the same point. If this fails (picked up above) we
    1521                 :                    advance to the next character. */
    1522               2 :                 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY | PCRE_ANCHORED : 0;
    1523                 :                 
    1524                 :                 /* Advance to the position right after the last full match */
    1525               2 :                 start_offset = offsets[1];
    1526                 :         }
    1527                 : 
    1528                 : 
    1529               1 :         if (!no_empty || start_offset != subject_len)
    1530                 :         {
    1531               1 :                 if (offset_capture) {
    1532                 :                         /* Add the last (match, offset) pair to the return value */
    1533               0 :                         add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
    1534                 :                 } else {
    1535                 :                         /* Add the last piece to the return value */
    1536               1 :                         add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
    1537                 :                 }
    1538                 :         }
    1539                 : 
    1540                 :         
    1541                 :         /* Clean up */
    1542               1 :         efree(offsets);
    1543                 : }
    1544                 : /* }}} */
    1545                 : 
    1546                 : /* {{{ proto string preg_quote(string str [, string delim_char])
    1547                 :    Quote regular expression characters plus an optional character */
    1548                 : PHP_FUNCTION(preg_quote)
    1549             107 : {
    1550                 :         int              in_str_len;
    1551                 :         char    *in_str;                /* Input string argument */
    1552                 :         char    *in_str_end;    /* End of the input string */
    1553                 :         int              delim_len;
    1554             107 :         char    *delim = NULL;  /* Additional delimiter argument */
    1555                 :         char    *out_str,               /* Output string with quoted characters */
    1556                 :                         *p,                             /* Iterator for input string */
    1557                 :                         *q,                             /* Iterator for output string */
    1558             107 :                          delim_char=0,  /* Delimiter character to be quoted */
    1559                 :                          c;                             /* Current character */
    1560             107 :         zend_bool quote_delim = 0; /* Whether to quote additional delim char */
    1561                 :         
    1562                 :         /* Get the arguments and check for errors */
    1563             107 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
    1564                 :                                                           &delim, &delim_len) == FAILURE) {
    1565               0 :                 return;
    1566                 :         }
    1567                 :         
    1568             107 :         in_str_end = in_str + in_str_len;
    1569                 : 
    1570                 :         /* Nothing to do if we got an empty string */
    1571             107 :         if (in_str == in_str_end) {
    1572               0 :                 RETURN_EMPTY_STRING();
    1573                 :         }
    1574                 : 
    1575             107 :         if (delim && *delim) {
    1576             107 :                 delim_char = delim[0];
    1577             107 :                 quote_delim = 1;
    1578                 :         }
    1579                 :         
    1580                 :         /* Allocate enough memory so that even if each character
    1581                 :            is quoted, we won't run out of room */
    1582             107 :         out_str = safe_emalloc(4, in_str_len, 1);
    1583                 :         
    1584                 :         /* Go through the string and quote necessary characters */
    1585           35936 :         for(p = in_str, q = out_str; p != in_str_end; p++) {
    1586           35829 :                 c = *p;
    1587           35829 :                 switch(c) {
    1588                 :                         case '.':
    1589                 :                         case '\\':
    1590                 :                         case '+':
    1591                 :                         case '*':
    1592                 :                         case '?':
    1593                 :                         case '[':
    1594                 :                         case '^':
    1595                 :                         case ']':
    1596                 :                         case '$':
    1597                 :                         case '(':
    1598                 :                         case ')':
    1599                 :                         case '{':
    1600                 :                         case '}':
    1601                 :                         case '=':
    1602                 :                         case '!':
    1603                 :                         case '>':
    1604                 :                         case '<':
    1605                 :                         case '|':
    1606                 :                         case ':':
    1607            3272 :                                 *q++ = '\\';
    1608            3272 :                                 *q++ = c;
    1609            3272 :                                 break;
    1610                 : 
    1611                 :                         case '\0':
    1612               0 :                                 *q++ = '\\';
    1613               0 :                                 *q++ = '0';
    1614               0 :                                 *q++ = '0';
    1615               0 :                                 *q++ = '0';
    1616               0 :                                 break;
    1617                 : 
    1618                 :                         default:
    1619           32557 :                                 if (quote_delim && c == delim_char)
    1620             303 :                                         *q++ = '\\';
    1621           32557 :                                 *q++ = c;
    1622                 :                                 break;
    1623                 :                 }
    1624                 :         }
    1625             107 :         *q = '\0';
    1626                 :         
    1627                 :         /* Reallocate string and return it */
    1628             107 :         RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
    1629                 : }
    1630                 : /* }}} */
    1631                 : 
    1632                 : /* {{{ proto array preg_grep(string regex, array input [, int flags])
    1633                 :    Searches array and returns entries which match regex */
    1634                 : PHP_FUNCTION(preg_grep)
    1635               0 : {
    1636                 :         char                            *regex;                 /* Regular expression */
    1637                 :         int                                      regex_len;
    1638                 :         zval                            *input;                 /* Input array */
    1639               0 :         long                             flags = 0;             /* Match control flags */
    1640                 :         pcre_cache_entry        *pce;                   /* Compiled regular expression */
    1641                 : 
    1642                 :         /* Get arguments and do error checking */
    1643               0 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", &regex, &regex_len,
    1644                 :                                                           &input, &flags) == FAILURE) {
    1645               0 :                 return;
    1646                 :         }
    1647                 :         
    1648                 :         /* Compile regex or get it from cache. */
    1649               0 :         if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
    1650               0 :                 RETURN_FALSE;
    1651                 :         }
    1652                 :         
    1653               0 :         php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
    1654                 : }
    1655                 : 
    1656                 : PHPAPI void  php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value,
    1657                 :         long flags TSRMLS_DC)
    1658               0 : {
    1659                 :         zval               **entry;                             /* An entry in the input array */
    1660               0 :         pcre_extra              *extra = pce->extra;/* Holds results of studying */
    1661                 :         pcre_extra               extra_data;            /* Used locally for exec options */
    1662                 :         int                             *offsets;                       /* Array of subpattern offsets */
    1663                 :         int                              size_offsets;          /* Size of the offsets array */
    1664               0 :         int                              count = 0;                     /* Count of matched subpatterns */
    1665                 :         char                    *string_key;
    1666                 :         ulong                    num_key;
    1667                 :         zend_bool                invert;                        /* Whether to return non-matching
    1668                 :                                                                                    entries */
    1669                 :         int                              rc;
    1670                 :         
    1671               0 :         invert = flags & PREG_GREP_INVERT ? 1 : 0;
    1672                 :         
    1673               0 :         if (extra == NULL) {
    1674               0 :                 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
    1675               0 :                 extra = &extra_data;
    1676                 :         }
    1677               0 :         extra->match_limit = PCRE_G(backtrack_limit);
    1678               0 :         extra->match_limit_recursion = PCRE_G(recursion_limit);
    1679                 : 
    1680                 :         /* Calculate the size of the offsets array, and allocate memory for it. */
    1681               0 :         rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
    1682               0 :         if (rc < 0) {
    1683               0 :                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
    1684               0 :                 RETURN_FALSE;
    1685                 :         }
    1686               0 :         size_offsets = (size_offsets + 1) * 3;
    1687               0 :         offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
    1688                 :         
    1689                 :         /* Initialize return array */
    1690               0 :         array_init(return_value);
    1691                 : 
    1692               0 :         PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
    1693                 : 
    1694                 :         /* Go through the input array */
    1695               0 :         zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
    1696               0 :         while(zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
    1697                 : 
    1698               0 :                 convert_to_string_ex(entry);
    1699                 : 
    1700                 :                 /* Perform the match */
    1701               0 :                 count = pcre_exec(pce->re, extra, Z_STRVAL_PP(entry),
    1702                 :                                                   Z_STRLEN_PP(entry), 0,
    1703                 :                                                   0, offsets, size_offsets);
    1704                 : 
    1705                 :                 /* Check for too many substrings condition. */
    1706               0 :                 if (count == 0) {
    1707               0 :                         php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
    1708               0 :                         count = size_offsets/3;
    1709               0 :                 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
    1710               0 :                         pcre_handle_exec_error(count TSRMLS_CC);
    1711               0 :                         break;
    1712                 :                 }
    1713                 : 
    1714                 :                 /* If the entry fits our requirements */
    1715               0 :                 if ((count > 0 && !invert) ||
    1716                 :                         (count == PCRE_ERROR_NOMATCH && invert)) {
    1717               0 :                         (*entry)->refcount++;
    1718                 : 
    1719                 :                         /* Add to return array */
    1720               0 :                         switch (zend_hash_get_current_key(Z_ARRVAL_P(input), &string_key, &num_key, 0))
    1721                 :                         {
    1722                 :                                 case HASH_KEY_IS_STRING:
    1723               0 :                                         zend_hash_update(Z_ARRVAL_P(return_value), string_key,
    1724                 :                                                                          strlen(string_key)+1, entry, sizeof(zval *), NULL);
    1725               0 :                                         break;
    1726                 : 
    1727                 :                                 case HASH_KEY_IS_LONG:
    1728               0 :                                         zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
    1729                 :                                                                                    sizeof(zval *), NULL);
    1730                 :                                         break;
    1731                 :                         }
    1732                 :                 }
    1733                 :                 
    1734               0 :                 zend_hash_move_forward(Z_ARRVAL_P(input));
    1735                 :         }
    1736                 :         
    1737                 :         /* Clean up */
    1738               0 :         efree(offsets);
    1739                 : }
    1740                 : /* }}} */
    1741                 : 
    1742                 : /* {{{ proto int preg_last_error()
    1743                 :    Returns the error code of the last regexp execution. */
    1744                 : PHP_FUNCTION(preg_last_error)
    1745               0 : {
    1746               0 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
    1747               0 :                 return;
    1748                 :         }
    1749                 : 
    1750               0 :         RETURN_LONG(PCRE_G(error_code));
    1751                 : }
    1752                 : /* }}} */
    1753                 : 
    1754                 : /* {{{ module definition structures */
    1755                 : 
    1756                 : zend_function_entry pcre_functions[] = {
    1757                 :         PHP_FE(preg_match,                              third_arg_force_ref)
    1758                 :         PHP_FE(preg_match_all,                  third_arg_force_ref)
    1759                 :         PHP_FE(preg_replace,                    fifth_arg_force_ref)
    1760                 :         PHP_FE(preg_replace_callback,   fifth_arg_force_ref)
    1761                 :         PHP_FE(preg_split,                              NULL)
    1762                 :         PHP_FE(preg_quote,                              NULL)
    1763                 :         PHP_FE(preg_grep,                               NULL)
    1764                 :         PHP_FE(preg_last_error,                 NULL)
    1765                 :         {NULL,          NULL,                           NULL}
    1766                 : };
    1767                 : 
    1768                 : zend_module_entry pcre_module_entry = {
    1769                 :         STANDARD_MODULE_HEADER,
    1770                 :    "pcre",
    1771                 :         pcre_functions,
    1772                 :         PHP_MINIT(pcre),
    1773                 :         PHP_MSHUTDOWN(pcre),
    1774                 :         NULL,
    1775                 :         NULL,
    1776                 :         PHP_MINFO(pcre),
    1777                 :         NO_VERSION_YET,
    1778                 :         PHP_MODULE_GLOBALS(pcre),
    1779                 :         PHP_GINIT(pcre),
    1780                 :         PHP_GSHUTDOWN(pcre),
    1781                 :         NULL,
    1782                 :         STANDARD_MODULE_PROPERTIES_EX
    1783                 : };
    1784                 : 
    1785                 : #ifdef COMPILE_DL_PCRE
    1786                 : ZEND_GET_MODULE(pcre)
    1787                 : # ifdef PHP_WIN32
    1788                 : # include "zend_arg_defs.c"
    1789                 : # endif
    1790                 : #endif
    1791                 : 
    1792                 : /* }}} */
    1793                 : 
    1794                 : #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
    1795                 : 
    1796                 : /*
    1797                 :  * Local variables:
    1798                 :  * tab-width: 4
    1799                 :  * c-basic-offset: 4
    1800                 :  * End:
    1801                 :  * vim600: sw=4 ts=4 fdm=marker
    1802                 :  * vim<600: sw=4 ts=4
    1803                 :  */

Generated by: LTP GCOV extension version 1.5