LTP GCOV extension - code coverage report
Current view: directory - ext/standard - html.c
Test: PHP Code Coverage
Date: 2007-04-10 Instrumented lines: 408
Code covered: 1.7 % Executed lines: 7
Legend: not executed executed

       1                 : /*
       2                 :    +----------------------------------------------------------------------+
       3                 :    | PHP Version 5                                                        |
       4                 :    +----------------------------------------------------------------------+
       5                 :    | Copyright (c) 1997-2007 The PHP Group                                |
       6                 :    +----------------------------------------------------------------------+
       7                 :    | This source file is subject to version 3.01 of the PHP license,      |
       8                 :    | that is bundled with this package in the file LICENSE, and is        |
       9                 :    | available through the world-wide-web at the following url:           |
      10                 :    | http://www.php.net/license/3_01.txt                                  |
      11                 :    | If you did not receive a copy of the PHP license and are unable to   |
      12                 :    | obtain it through the world-wide-web, please send a note to          |
      13                 :    | license@php.net so we can mail you a copy immediately.               |
      14                 :    +----------------------------------------------------------------------+
      15                 :    | Authors: Rasmus Lerdorf <rasmus@php.net>                             |
      16                 :    |          Jaakko Hyvätti <jaakko.hyvatti@iki.fi>                      |
      17                 :    |          Wez Furlong <wez@thebrainroom.com>                          |
      18                 :    +----------------------------------------------------------------------+
      19                 : */
      20                 : 
      21                 : /* $Id: html.c,v 1.111.2.2.2.9 2007/02/27 03:28:16 iliaa Exp $ */
      22                 : 
      23                 : /*
      24                 :  * HTML entity resources:
      25                 :  *
      26                 :  * http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset2.asp
      27                 :  * http://msdn.microsoft.com/workshop/author/dhtml/reference/charsets/charset3.asp
      28                 :  * http://www.unicode.org/Public/MAPPINGS/OBSOLETE/UNI2SGML.TXT
      29                 :  *
      30                 :  * http://www.w3.org/TR/2002/REC-xhtml1-20020801/dtds.html#h-A2
      31                 :  * 
      32                 :  */
      33                 : 
      34                 : #include "php.h"
      35                 : #if PHP_WIN32
      36                 : #include "config.w32.h"
      37                 : #else
      38                 : #include <php_config.h>
      39                 : #endif
      40                 : #include "reg.h"
      41                 : #include "html.h"
      42                 : #include "php_string.h"
      43                 : #include "SAPI.h"
      44                 : #if HAVE_LOCALE_H
      45                 : #include <locale.h>
      46                 : #endif
      47                 : #if HAVE_LANGINFO_H
      48                 : #include <langinfo.h>
      49                 : #endif
      50                 : 
      51                 : #if HAVE_MBSTRING
      52                 : # include "ext/mbstring/mbstring.h"
      53                 : ZEND_EXTERN_MODULE_GLOBALS(mbstring)
      54                 : #endif
      55                 : 
      56                 : enum entity_charset { cs_terminator, cs_8859_1, cs_cp1252,
      57                 :                                           cs_8859_15, cs_utf_8, cs_big5, cs_gb2312, 
      58                 :                                           cs_big5hkscs, cs_sjis, cs_eucjp, cs_koi8r,
      59                 :                                           cs_cp1251, cs_8859_5, cs_cp866, cs_macroman
      60                 :                                         };
      61                 : typedef const char *const entity_table_t;
      62                 : 
      63                 : /* codepage 1252 is a Windows extension to iso-8859-1. */
      64                 : static entity_table_t ent_cp_1252[] = {
      65                 :         "euro", NULL, "sbquo", "fnof", "bdquo", "hellip", "dagger",
      66                 :         "Dagger", "circ", "permil", "Scaron", "lsaquo", "OElig",
      67                 :         NULL, NULL, NULL, NULL, "lsquo", "rsquo", "ldquo", "rdquo",
      68                 :         "bull", "ndash", "mdash", "tilde", "trade", "scaron", "rsaquo",
      69                 :         "oelig", NULL, NULL, "Yuml" 
      70                 : };
      71                 : 
      72                 : static entity_table_t ent_iso_8859_1[] = {
      73                 :         "nbsp", "iexcl", "cent", "pound", "curren", "yen", "brvbar",
      74                 :         "sect", "uml", "copy", "ordf", "laquo", "not", "shy", "reg",
      75                 :         "macr", "deg", "plusmn", "sup2", "sup3", "acute", "micro",
      76                 :         "para", "middot", "cedil", "sup1", "ordm", "raquo", "frac14",
      77                 :         "frac12", "frac34", "iquest", "Agrave", "Aacute", "Acirc",
      78                 :         "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave",
      79                 :         "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc",
      80                 :         "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde",
      81                 :         "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml",
      82                 :         "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc",
      83                 :         "atilde", "auml", "aring", "aelig", "ccedil", "egrave",
      84                 :         "eacute", "ecirc", "euml", "igrave", "iacute", "icirc",
      85                 :         "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde",
      86                 :         "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc",
      87                 :         "uuml", "yacute", "thorn", "yuml"
      88                 : };
      89                 : 
      90                 : static entity_table_t ent_iso_8859_15[] = {
      91                 :         "nbsp", "iexcl", "cent", "pound", "euro", "yen", "Scaron",
      92                 :         "sect", "scaron", "copy", "ordf", "laquo", "not", "shy", "reg",
      93                 :         "macr", "deg", "plusmn", "sup2", "sup3", NULL, /* Zcaron */
      94                 :         "micro", "para", "middot", NULL, /* zcaron */ "sup1", "ordm",
      95                 :         "raquo", "OElig", "oelig", "Yuml", "iquest", "Agrave", "Aacute",
      96                 :         "Acirc", "Atilde", "Auml", "Aring", "AElig", "Ccedil", "Egrave",
      97                 :         "Eacute", "Ecirc", "Euml", "Igrave", "Iacute", "Icirc",
      98                 :         "Iuml", "ETH", "Ntilde", "Ograve", "Oacute", "Ocirc", "Otilde",
      99                 :         "Ouml", "times", "Oslash", "Ugrave", "Uacute", "Ucirc", "Uuml",
     100                 :         "Yacute", "THORN", "szlig", "agrave", "aacute", "acirc",
     101                 :         "atilde", "auml", "aring", "aelig", "ccedil", "egrave",
     102                 :         "eacute", "ecirc", "euml", "igrave", "iacute", "icirc",
     103                 :         "iuml", "eth", "ntilde", "ograve", "oacute", "ocirc", "otilde",
     104                 :         "ouml", "divide", "oslash", "ugrave", "uacute", "ucirc",
     105                 :         "uuml", "yacute", "thorn", "yuml"
     106                 : };
     107                 : 
     108                 : static entity_table_t ent_uni_338_402[] = {
     109                 :         /* 338 (0x0152) */
     110                 :         "OElig", "oelig", NULL, NULL, NULL, NULL,
     111                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     112                 :         /* 352 (0x0160) */
     113                 :         "Scaron", "scaron", NULL, NULL, NULL, NULL, NULL, NULL,
     114                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     115                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     116                 :         /* 376 (0x0178) */
     117                 :         "Yuml", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     118                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     119                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     120                 :         /* 400 (0x0190) */
     121                 :         NULL, NULL, "fnof"
     122                 : };
     123                 : 
     124                 : static entity_table_t ent_uni_spacing[] = {
     125                 :         /* 710 */
     126                 :         "circ",
     127                 :         /* 711 - 730 */
     128                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     129                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     130                 :         /* 731 - 732 */
     131                 :         NULL, "tilde"
     132                 : };
     133                 : 
     134                 : static entity_table_t ent_uni_greek[] = {
     135                 :         /* 913 */
     136                 :         "Alpha", "Beta", "Gamma", "Delta", "Epsilon", "Zeta", "Eta", "Theta",
     137                 :         "Iota", "Kappa", "Lambda", "Mu", "Nu", "Xi", "Omicron", "Pi", "Rho",
     138                 :         NULL, "Sigma", "Tau", "Upsilon", "Phi", "Chi", "Psi", "Omega",
     139                 :         /* 938 - 944 are not mapped */
     140                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     141                 :         "alpha", "beta", "gamma", "delta", "epsilon", "zeta", "eta", "theta",
     142                 :         "iota", "kappa", "lambda", "mu", "nu", "xi", "omicron", "pi", "rho",
     143                 :         "sigmaf", "sigma", "tau", "upsilon", "phi", "chi", "psi", "omega",
     144                 :         /* 970 - 976 are not mapped */
     145                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     146                 :         "thetasym", "upsih",
     147                 :         NULL, NULL, NULL,
     148                 :         "piv" 
     149                 : };
     150                 : 
     151                 : static entity_table_t ent_uni_punct[] = {
     152                 :         /* 8194 */
     153                 :         "ensp", "emsp", NULL, NULL, NULL, NULL, NULL,
     154                 :         "thinsp", NULL, NULL, "zwnj", "zwj", "lrm", "rlm",
     155                 :         NULL, NULL, NULL, "ndash", "mdash", NULL, NULL, NULL,
     156                 :         /* 8216 */
     157                 :         "lsquo", "rsquo", "sbquo", NULL, "ldquo", "rdquo", "bdquo", NULL,
     158                 :         "dagger", "Dagger", "bull", NULL, NULL, NULL, "hellip",
     159                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "permil", NULL,
     160                 :         /* 8242 */
     161                 :         "prime", "Prime", NULL, NULL, NULL, NULL, NULL, "lsaquo", "rsaquo", NULL,
     162                 :         NULL, NULL, "oline", NULL, NULL, NULL, NULL, NULL,
     163                 :         "frasl"
     164                 : };
     165                 : 
     166                 : static entity_table_t ent_uni_euro[] = {
     167                 :         "euro"
     168                 : };
     169                 : 
     170                 : static entity_table_t ent_uni_8465_8501[] = {
     171                 :         /* 8465 */
     172                 :         "image", NULL, NULL, NULL, NULL, NULL, NULL,
     173                 :         /* 8472 */
     174                 :         "weierp", NULL, NULL, NULL,
     175                 :         /* 8476 */
     176                 :         "real", NULL, NULL, NULL, NULL, NULL,
     177                 :         /* 8482 */
     178                 :         "trade", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     179                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     180                 :         /* 8501 */
     181                 :         "alefsym",
     182                 : };
     183                 : 
     184                 : static entity_table_t ent_uni_8592_9002[] = {
     185                 :         /* 8592 (0x2190) */
     186                 :         "larr", "uarr", "rarr", "darr", "harr", NULL, NULL, NULL,
     187                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     188                 :         /* 8608 (0x21a0) */
     189                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     190                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     191                 :         /* 8624 (0x21b0) */
     192                 :         NULL, NULL, NULL, NULL, NULL, "crarr", NULL, NULL,
     193                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     194                 :         /* 8640 (0x21c0) */
     195                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     196                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     197                 :         /* 8656 (0x21d0) */
     198                 :         "lArr", "uArr", "rArr", "dArr", "hArr", "vArr", NULL, NULL,
     199                 :         NULL, NULL, "lAarr", "rAarr", NULL, "rarrw", NULL, NULL,
     200                 :         /* 8672 (0x21e0) */
     201                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     202                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     203                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     204                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     205                 :         /* 8704 (0x2200) */
     206                 :         "forall", "comp", "part", "exist", "nexist", "empty", NULL, "nabla",
     207                 :         "isin", "notin", "epsis", "ni", "notni", "bepsi", NULL, "prod",
     208                 :         /* 8720 (0x2210) */
     209                 :         "coprod", "sum", "minus", "mnplus", "plusdo", NULL, "setmn", "lowast",
     210                 :         "compfn", NULL, "radic", NULL, NULL, "prop", "infin", "ang90",
     211                 :         /* 8736 (0x2220) */
     212                 :         "ang", "angmsd", "angsph", "mid", "nmid", "par", "npar", "and",
     213                 :         "or", "cap", "cup", "int", NULL, NULL, "conint", NULL,
     214                 :         /* 8752 (0x2230) */
     215                 :         NULL, NULL, NULL, NULL, "there4", "becaus", NULL, NULL,
     216                 :         NULL, NULL, NULL, NULL, "sim", "bsim", NULL, NULL,
     217                 :         /* 8768 (0x2240) */
     218                 :         "wreath", "nsim", NULL, "sime", "nsime", "cong", NULL, "ncong",
     219                 :         "asymp", "nap", "ape", NULL, "bcong", "asymp", "bump", "bumpe",
     220                 :         /* 8784 (0x2250) */
     221                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     222                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     223                 :         /* 8800 (0x2260) */
     224                 :         "ne", "equiv", NULL, NULL, "le", "ge", "lE", "gE",
     225                 :         "lnE", "gnE", "Lt", "Gt", "twixt", NULL, "nlt", "ngt",
     226                 :         /* 8816 (0x2270) */
     227                 :         "nles", "nges", "lsim", "gsim", NULL, NULL, "lg", "gl",
     228                 :         NULL, NULL, "pr", "sc", "cupre", "sscue", "prsim", "scsim",
     229                 :         /* 8832 (0x2280) */
     230                 :         "npr", "nsc", "sub", "sup", "nsub", "nsup", "sube", "supe",
     231                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     232                 :         /* 8848 (0x2290) */
     233                 :         NULL, NULL, NULL, NULL, NULL, "oplus", NULL, "otimes",
     234                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     235                 :         /* 8864 (0x22a0) */
     236                 :         NULL, NULL, NULL, NULL, NULL, "perp", NULL, NULL,
     237                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     238                 :         /* 8880 (0x22b0) */
     239                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     240                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     241                 :         /* 8896 (0x22c0) */
     242                 :         NULL, NULL, NULL, NULL, NULL, "sdot", NULL, NULL,
     243                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     244                 :         /* 8912 (0x22d0) */
     245                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     246                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     247                 :         /* 8928 (0x22e0) */
     248                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     249                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     250                 :         /* 8944 (0x22f0) */
     251                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     252                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     253                 :         /* 8960 (0x2300) */
     254                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     255                 :         "lceil", "rceil", "lfloor", "rfloor", NULL, NULL, NULL, NULL,
     256                 :         /* 8976 (0x2310) */
     257                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     258                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     259                 :         /* 8992 (0x2320) */
     260                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     261                 :         NULL, "lang", "rang"
     262                 : };
     263                 : 
     264                 : static entity_table_t ent_uni_9674[] = {
     265                 :         /* 9674 */
     266                 :         "loz"
     267                 : };
     268                 : 
     269                 : static entity_table_t ent_uni_9824_9830[] = {
     270                 :         /* 9824 */
     271                 :         "spades", NULL, NULL, "clubs", NULL, "hearts", "diams"
     272                 : };
     273                 : 
     274                 : static entity_table_t ent_koi8r[] = {
     275                 :         "#1105", /* "jo "*/
     276                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
     277                 :         NULL, NULL, NULL, NULL, NULL, "#1025", /* "JO" */
     278                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 
     279                 :         "#1102", "#1072", "#1073", "#1094", "#1076", "#1077", "#1092", 
     280                 :         "#1075", "#1093", "#1080", "#1081", "#1082", "#1083", "#1084", 
     281                 :         "#1085", "#1086", "#1087", "#1103", "#1088", "#1089", "#1090", 
     282                 :         "#1091", "#1078", "#1074", "#1100", "#1099", "#1079", "#1096", 
     283                 :         "#1101", "#1097", "#1095", "#1098", "#1070", "#1040", "#1041", 
     284                 :         "#1062", "#1044", "#1045", "#1060", "#1043", "#1061", "#1048", 
     285                 :         "#1049", "#1050", "#1051", "#1052", "#1053", "#1054", "#1055", 
     286                 :         "#1071", "#1056", "#1057", "#1058", "#1059", "#1046", "#1042",
     287                 :         "#1068", "#1067", "#1047", "#1064", "#1069", "#1065", "#1063", 
     288                 :         "#1066"
     289                 : };
     290                 : 
     291                 : static entity_table_t ent_cp_1251[] = {
     292                 :         "#1026", "#1027", "#8218", "#1107", "#8222", "hellip", "dagger",
     293                 :         "Dagger", "euro", "permil", "#1033", "#8249", "#1034", "#1036",
     294                 :         "#1035", "#1039", "#1106", "#8216", "#8217", "#8219", "#8220",
     295                 :         "bull", "ndash", "mdash", NULL, "trade", "#1113", "#8250",
     296                 :         "#1114", "#1116", "#1115", "#1119", "nbsp", "#1038", "#1118",
     297                 :         "#1032", "curren", "#1168", "brvbar", "sect", "#1025", "copy",
     298                 :         "#1028", "laquo", "not", "shy", "reg", "#1031", "deg", "plusmn",
     299                 :         "#1030", "#1110", "#1169", "micro", "para", "middot", "#1105",
     300                 :         "#8470", "#1108", "raquo", "#1112", "#1029", "#1109", "#1111",
     301                 :         "#1040", "#1041", "#1042", "#1043", "#1044", "#1045", "#1046",
     302                 :         "#1047", "#1048", "#1049", "#1050", "#1051", "#1052", "#1053",
     303                 :         "#1054", "#1055", "#1056", "#1057", "#1058", "#1059", "#1060",
     304                 :         "#1061", "#1062", "#1063", "#1064", "#1065", "#1066", "#1067",
     305                 :         "#1068", "#1069", "#1070", "#1071", "#1072", "#1073", "#1074",
     306                 :         "#1075", "#1076", "#1077", "#1078", "#1079", "#1080", "#1081",
     307                 :         "#1082", "#1083", "#1084", "#1085", "#1086", "#1087", "#1088",
     308                 :         "#1089", "#1090", "#1091", "#1092", "#1093", "#1094", "#1095",
     309                 :         "#1096", "#1097", "#1098", "#1099", "#1100", "#1101", "#1102",
     310                 :         "#1103"
     311                 : };
     312                 : 
     313                 : static entity_table_t ent_iso_8859_5[] = {
     314                 :         "#1056", "#1057", "#1058", "#1059", "#1060", "#1061", "#1062",
     315                 :         "#1063", "#1064", "#1065", "#1066", "#1067", "#1068", "#1069",
     316                 :         "#1070", "#1071", "#1072", "#1073", "#1074", "#1075", "#1076",
     317                 :         "#1077", "#1078", "#1079", "#1080", "#1081", "#1082", "#1083",
     318                 :         "#1084", "#1085", "#1086", "#1087", "#1088", "#1089", "#1090",
     319                 :         "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097",
     320                 :         "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1104",
     321                 :         "#1105", "#1106", "#1107", "#1108", "#1109", "#1110", "#1111",
     322                 :         "#1112", "#1113", "#1114", "#1115", "#1116", "#1117", "#1118",
     323                 :         "#1119"
     324                 : };
     325                 : 
     326                 : static entity_table_t ent_cp_866[] = {
     327                 : 
     328                 :         "#9492", "#9524", "#9516", "#9500", "#9472", "#9532", "#9566", 
     329                 :         "#9567", "#9562", "#9556", "#9577", "#9574", "#9568", "#9552", 
     330                 :         "#9580", "#9575", "#9576", "#9572", "#9573", "#9561", "#9560", 
     331                 :         "#9554", "#9555", "#9579", "#9578", "#9496", "#9484", "#9608", 
     332                 :         "#9604", "#9612", "#9616", "#9600", "#1088", "#1089", "#1090", 
     333                 :         "#1091", "#1092", "#1093", "#1094", "#1095", "#1096", "#1097", 
     334                 :         "#1098", "#1099", "#1100", "#1101", "#1102", "#1103", "#1025", 
     335                 :         "#1105", "#1028", "#1108", "#1031", "#1111", "#1038", "#1118", 
     336                 :         "#176", "#8729", "#183", "#8730", "#8470", "#164",  "#9632", 
     337                 :         "#160"
     338                 : };
     339                 : 
     340                 : /* MacRoman has a couple of low-ascii chars that need mapping too */
     341                 : /* Vertical tab (ASCII 11) is often used to store line breaks inside */
     342                 : /* DB exports, this mapping changes it to a space */
     343                 : static entity_table_t ent_macroman[] = {
     344                 :         "sp", NULL, NULL, NULL,
     345                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     346                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     347                 :         NULL, NULL, NULL, NULL, NULL, "quot", NULL,
     348                 :         NULL, NULL, "amp", NULL, NULL, NULL, NULL,
     349                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     350                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     351                 :         NULL, NULL, NULL, "lt", NULL, "gt", NULL,
     352                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     353                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     354                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     355                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     356                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     357                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     358                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     359                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     360                 :         NULL, NULL, NULL, NULL, NULL, NULL, NULL,
     361                 :         NULL, "Auml", "Aring", "Ccedil", "Eacute", "Ntilde", "Ouml",
     362                 :         "Uuml", "aacute", "agrave", "acirc", "auml", "atilde", "aring",
     363                 :         "ccedil", "eacute", "egrave", "ecirc", "euml", "iacute", "igrave",
     364                 :         "icirc", "iuml", "ntilde", "oacute", "ograve", "ocirc", "ouml",
     365                 :         "otilde", "uacute", "ugrave", "ucirc", "uuml", "dagger", "deg",
     366                 :         "cent", "pound", "sect", "bull", "para", "szlig", "reg",
     367                 :         "copy", "trade", "acute", "uml", "ne", "AElig", "Oslash",
     368                 :         "infin", "plusmn", "le", "ge", "yen", "micro", "part",
     369                 :         "sum", "prod", "pi", "int", "ordf", "ordm", "Omega",
     370                 :         "aelig", "oslash", "iquest", "iexcl", "not", "radic", "fnof",
     371                 :         "asymp", "#8710", "laquo", "raquo", "hellip", "nbsp", "Agrave",
     372                 :         "Atilde", "Otilde", "OElig", "oelig", "ndash", "mdash", "ldquo",
     373                 :         "rdquo", "lsquo", "rsquo", "divide", "loz", "yuml", "Yuml",
     374                 :         "frasl", "euro", "lsaquo", "rsaquo", "#xFB01", "#xFB02", "Dagger",
     375                 :         "middot", "sbquo", "bdquo", "permil", "Acirc", "Ecirc", "Aacute",
     376                 :         "Euml", "Egrave", "Iacute", "Icirc", "Iuml", "Igrave", "Oacute",
     377                 :         "Ocirc", "#xF8FF", "Ograve", "Uacute", "Ucirc", "Ugrave", "#305",
     378                 :         "circ", "tilde", "macr", "#728", "#729", "#730", "cedil",
     379                 :         "#733", "#731", "#711"
     380                 : };
     381                 : 
     382                 : struct html_entity_map {
     383                 :         enum entity_charset charset;    /* charset identifier */
     384                 :         unsigned short basechar;                        /* char code at start of table */
     385                 :         unsigned short endchar;                 /* last char code in the table */
     386                 :         entity_table_t *table;                  /* the table of mappings */
     387                 : };
     388                 : 
     389                 : static const struct html_entity_map entity_map[] = {
     390                 :         { cs_cp1252,            0x80, 0x9f, ent_cp_1252 },
     391                 :         { cs_cp1252,            0xa0, 0xff, ent_iso_8859_1 },
     392                 :         { cs_8859_1,            0xa0, 0xff, ent_iso_8859_1 },
     393                 :         { cs_8859_15,           0xa0, 0xff, ent_iso_8859_15 },
     394                 :         { cs_utf_8,             0xa0, 0xff, ent_iso_8859_1 },
     395                 :         { cs_utf_8,             338,  402,  ent_uni_338_402 },
     396                 :         { cs_utf_8,             710,  732,  ent_uni_spacing },
     397                 :         { cs_utf_8,             913,  982,  ent_uni_greek },
     398                 :         { cs_utf_8,             8194, 8260, ent_uni_punct },
     399                 :         { cs_utf_8,             8364, 8364, ent_uni_euro }, 
     400                 :         { cs_utf_8,             8465, 8501, ent_uni_8465_8501 },
     401                 :         { cs_utf_8,             8592, 9002, ent_uni_8592_9002 },
     402                 :         { cs_utf_8,             9674, 9674, ent_uni_9674 },
     403                 :         { cs_utf_8,             9824, 9830, ent_uni_9824_9830 },
     404                 :         { cs_big5,                      0xa0, 0xff, ent_iso_8859_1 },
     405                 :         { cs_gb2312,            0xa0, 0xff, ent_iso_8859_1 },
     406                 :         { cs_big5hkscs,         0xa0, 0xff, ent_iso_8859_1 },
     407                 :         { cs_sjis,                      0xa0, 0xff, ent_iso_8859_1 },
     408                 :         { cs_eucjp,                     0xa0, 0xff, ent_iso_8859_1 },
     409                 :         { cs_koi8r,                 0xa3, 0xff, ent_koi8r },
     410                 :         { cs_cp1251,            0x80, 0xff, ent_cp_1251 },
     411                 :         { cs_8859_5,            0xc0, 0xff, ent_iso_8859_5 },
     412                 :         { cs_cp866,                 0xc0, 0xff, ent_cp_866 },
     413                 :         { cs_macroman,          0x0b, 0xff, ent_macroman },
     414                 :         { cs_terminator }
     415                 : };
     416                 : 
     417                 : static const struct {
     418                 :         const char *codeset;
     419                 :         enum entity_charset charset;
     420                 : } charset_map[] = {
     421                 :         { "ISO-8859-1",       cs_8859_1 },
     422                 :         { "ISO8859-1",                cs_8859_1 },
     423                 :         { "ISO-8859-15",      cs_8859_15 },
     424                 :         { "ISO8859-15",       cs_8859_15 },
     425                 :         { "utf-8",                    cs_utf_8 },
     426                 :         { "cp1252",           cs_cp1252 },
     427                 :         { "Windows-1252",     cs_cp1252 },
     428                 :         { "1252",           cs_cp1252 }, 
     429                 :         { "BIG5",                     cs_big5 },
     430                 :         { "950",            cs_big5 },
     431                 :         { "GB2312",                   cs_gb2312 },
     432                 :         { "936",            cs_gb2312 },
     433                 :         { "BIG5-HKSCS",               cs_big5hkscs },
     434                 :         { "Shift_JIS",                cs_sjis },
     435                 :         { "SJIS",             cs_sjis },
     436                 :         { "932",            cs_sjis },
     437                 :         { "EUCJP",            cs_eucjp },
     438                 :         { "EUC-JP",                   cs_eucjp },
     439                 :         { "KOI8-R",         cs_koi8r },
     440                 :         { "koi8-ru",        cs_koi8r },
     441                 :         { "koi8r",          cs_koi8r },
     442                 :         { "cp1251",         cs_cp1251 },
     443                 :         { "Windows-1251",   cs_cp1251 },
     444                 :         { "win-1251",       cs_cp1251 },
     445                 :         { "iso8859-5",      cs_8859_5 },
     446                 :         { "iso-8859-5",     cs_8859_5 },
     447                 :         { "cp866",          cs_cp866 },
     448                 :         { "866",            cs_cp866 },    
     449                 :         { "ibm866",         cs_cp866 },
     450                 :         { "MacRoman",       cs_macroman },
     451                 :         { NULL }
     452                 : };
     453                 : 
     454                 : static const struct {
     455                 :         unsigned short charcode;
     456                 :         char *entity;
     457                 :         int entitylen;
     458                 :         int flags;
     459                 : } basic_entities[] = {
     460                 :         { '"',     "&quot;", 6,      ENT_HTML_QUOTE_DOUBLE },
     461                 :         { '\'', "&#039;", 6,      ENT_HTML_QUOTE_SINGLE },
     462                 :         { '\'', "&#39;",  5,      ENT_HTML_QUOTE_SINGLE },
     463                 :         { '<',       "&lt;",           4,      0 },
     464                 :         { '>',       "&gt;",           4,      0 },
     465                 :         { 0, NULL, 0, 0 }
     466                 : };
     467                 :         
     468                 : struct basic_entities_dec {
     469                 :         unsigned short charcode;
     470                 :         char entity[8];
     471                 :         int entitylen;  
     472                 : };
     473                 :         
     474                 : #define MB_RETURN { \
     475                 :                         *newpos = pos;       \
     476                 :                         mbseq[mbpos] = '\0'; \
     477                 :                         *mbseqlen = mbpos;   \
     478                 :                         return this_char; }
     479                 :                                         
     480                 : #define MB_WRITE(mbchar) { \
     481                 :                         mbspace--;  \
     482                 :                         if (mbspace == 0) {      \
     483                 :                                 MB_RETURN;           \
     484                 :                         }                        \
     485                 :                         mbseq[mbpos++] = (mbchar); }
     486                 : 
     487                 : /* {{{ get_next_char
     488                 :  */
     489                 : inline static unsigned short get_next_char(enum entity_charset charset,
     490                 :                 unsigned char * str,
     491                 :                 int * newpos,
     492                 :                 unsigned char * mbseq,
     493                 :                 int * mbseqlen)
     494               0 : {
     495               0 :         int pos = *newpos;
     496               0 :         int mbpos = 0;
     497               0 :         int mbspace = *mbseqlen;
     498               0 :         unsigned short this_char = str[pos++];
     499                 :         
     500               0 :         if (mbspace <= 0) {
     501               0 :                 *mbseqlen = 0;
     502               0 :                 return this_char;
     503                 :         }
     504                 :         
     505               0 :         MB_WRITE((unsigned char)this_char);
     506                 :         
     507               0 :         switch (charset) {
     508                 :                 case cs_utf_8:
     509                 :                         {
     510               0 :                                 unsigned long utf = 0;
     511               0 :                                 int stat = 0;
     512               0 :                                 int more = 1;
     513                 : 
     514                 :                                 /* unpack utf-8 encoding into a wide char.
     515                 :                                  * Code stolen from the mbstring extension */
     516                 : 
     517                 :                                 do {
     518               0 :                                         if (this_char < 0x80) {
     519               0 :                                                 more = 0;
     520               0 :                                                 break;
     521               0 :                                         } else if (this_char < 0xc0) {
     522               0 :                                                 switch (stat) {
     523                 :                                                         case 0x10:      /* 2, 2nd */
     524                 :                                                         case 0x21:      /* 3, 3rd */
     525                 :                                                         case 0x32:      /* 4, 4th */
     526                 :                                                         case 0x43:      /* 5, 5th */
     527                 :                                                         case 0x54:      /* 6, 6th */
     528                 :                                                                 /* last byte in sequence */
     529               0 :                                                                 more = 0;
     530               0 :                                                                 utf |= (this_char & 0x3f);
     531               0 :                                                                 this_char = (unsigned short)utf;
     532               0 :                                                                 break;
     533                 :                                                         case 0x20:      /* 3, 2nd */
     534                 :                                                         case 0x31:      /* 4, 3rd */
     535                 :                                                         case 0x42:      /* 5, 4th */
     536                 :                                                         case 0x53:      /* 6, 5th */
     537                 :                                                                 /* penultimate char */
     538               0 :                                                                 utf |= ((this_char & 0x3f) << 6);
     539               0 :                                                                 stat++;
     540               0 :                                                                 break;
     541                 :                                                         case 0x30:      /* 4, 2nd */
     542                 :                                                         case 0x41:      /* 5, 3rd */
     543                 :                                                         case 0x52:      /* 6, 4th */
     544               0 :                                                                 utf |= ((this_char & 0x3f) << 12);
     545               0 :                                                                 stat++;
     546               0 :                                                                 break;
     547                 :                                                         case 0x40:      /* 5, 2nd */
     548                 :                                                         case 0x51:
     549               0 :                                                                 utf |= ((this_char & 0x3f) << 18);
     550               0 :                                                                 stat++;
     551               0 :                                                                 break;
     552                 :                                                         case 0x50:      /* 6, 2nd */
     553               0 :                                                                 utf |= ((this_char & 0x3f) << 24);
     554               0 :                                                                 stat++;
     555               0 :                                                                 break;
     556                 :                                                         default:
     557                 :                                                                 /* invalid */
     558               0 :                                                                 more = 0;
     559                 :                                                 }
     560                 :                                         }
     561                 :                                         /* lead byte */
     562               0 :                                         else if (this_char < 0xe0) {
     563               0 :                                                 stat = 0x10;    /* 2 byte */
     564               0 :                                                 utf = (this_char & 0x1f) << 6;
     565               0 :                                         } else if (this_char < 0xf0) {
     566               0 :                                                 stat = 0x20;    /* 3 byte */
     567               0 :                                                 utf = (this_char & 0xf) << 12;
     568               0 :                                         } else if (this_char < 0xf8) {
     569               0 :                                                 stat = 0x30;    /* 4 byte */
     570               0 :                                                 utf = (this_char & 0x7) << 18;
     571               0 :                                         } else if (this_char < 0xfc) {
     572               0 :                                                 stat = 0x40;    /* 5 byte */
     573               0 :                                                 utf = (this_char & 0x3) << 24;
     574               0 :                                         } else if (this_char < 0xfe) {
     575               0 :                                                 stat = 0x50;    /* 6 byte */
     576               0 :                                                 utf = (this_char & 0x1) << 30;
     577                 :                                         } else {
     578                 :                                                 /* invalid; bail */
     579               0 :                                                 more = 0;
     580               0 :                                                 break;
     581                 :                                         }
     582                 : 
     583               0 :                                         if (more) {
     584               0 :                                                 this_char = str[pos++];
     585               0 :                                                 MB_WRITE((unsigned char)this_char);
     586                 :                                         }
     587               0 :                                 } while (more);
     588                 :                         }
     589               0 :                         break;
     590                 :                 case cs_big5:
     591                 :                 case cs_gb2312:
     592                 :                 case cs_big5hkscs:
     593                 :                         {
     594                 :                                 /* check if this is the first of a 2-byte sequence */
     595               0 :                                 if (this_char >= 0xa1 && this_char <= 0xfe) {
     596                 :                                         /* peek at the next char */
     597               0 :                                         unsigned char next_char = str[pos];
     598               0 :                                         if ((next_char >= 0x40 && next_char <= 0x7e) ||
     599                 :                                                         (next_char >= 0xa1 && next_char <= 0xfe)) {
     600                 :                                                 /* yes, this a wide char */
     601               0 :                                                 this_char <<= 8;
     602               0 :                                                 MB_WRITE(next_char);
     603               0 :                                                 this_char |= next_char;
     604               0 :                                                 pos++;
     605                 :                                         }
     606                 :                                         
     607                 :                                 }
     608               0 :                                 break;
     609                 :                         }
     610                 :                 case cs_sjis:
     611                 :                         {
     612                 :                                 /* check if this is the first of a 2-byte sequence */
     613               0 :                                 if ( (this_char >= 0x81 && this_char <= 0x9f) ||
     614                 :                                          (this_char >= 0xe0 && this_char <= 0xef)
     615                 :                                         ) {
     616                 :                                         /* peek at the next char */
     617               0 :                                         unsigned char next_char = str[pos];
     618               0 :                                         if ((next_char >= 0x40 && next_char <= 0x7e) ||
     619                 :                                                 (next_char >= 0x80 && next_char <= 0xfc))
     620                 :                                         {
     621                 :                                                 /* yes, this a wide char */
     622               0 :                                                 this_char <<= 8;
     623               0 :                                                 MB_WRITE(next_char);
     624               0 :                                                 this_char |= next_char;
     625               0 :                                                 pos++;
     626                 :                                         }
     627                 :                                         
     628                 :                                 }
     629               0 :                                 break;
     630                 :                         }
     631                 :                 case cs_eucjp:
     632                 :                         {
     633                 :                                 /* check if this is the first of a multi-byte sequence */
     634               0 :                                 if (this_char >= 0xa1 && this_char <= 0xfe) {
     635                 :                                         /* peek at the next char */
     636               0 :                                         unsigned char next_char = str[pos];
     637               0 :                                         if (next_char >= 0xa1 && next_char <= 0xfe) {
     638                 :                                                 /* yes, this a jis kanji char */
     639               0 :                                                 this_char <<= 8;
     640               0 :                                                 MB_WRITE(next_char);
     641               0 :                                                 this_char |= next_char;
     642               0 :                                                 pos++;
     643                 :                                         }
     644                 :                                         
     645               0 :                                 } else if (this_char == 0x8e) {
     646                 :                                         /* peek at the next char */
     647               0 :                                         unsigned char next_char = str[pos];
     648               0 :                                         if (next_char >= 0xa1 && next_char <= 0xdf) {
     649                 :                                                 /* JIS X 0201 kana */
     650               0 :                                                 this_char <<= 8;
     651               0 :                                                 MB_WRITE(next_char);
     652               0 :                                                 this_char |= next_char;
     653               0 :                                                 pos++;
     654                 :                                         }
     655                 :                                         
     656               0 :                                 } else if (this_char == 0x8f) {
     657                 :                                         /* peek at the next two char */
     658               0 :                                         unsigned char next_char = str[pos];
     659               0 :                                         unsigned char next2_char = str[pos+1];
     660               0 :                                         if ((next_char >= 0xa1 && next_char <= 0xfe) &&
     661                 :                                                 (next2_char >= 0xa1 && next2_char <= 0xfe)) {
     662                 :                                                 /* JIS X 0212 hojo-kanji */
     663               0 :                                                 this_char <<= 8;
     664               0 :                                                 MB_WRITE(next_char);
     665               0 :                                                 this_char |= next_char;
     666               0 :                                                 pos++;
     667               0 :                                                 this_char <<= 8;
     668               0 :                                                 MB_WRITE(next2_char);
     669               0 :                                                 this_char |= next2_char;
     670               0 :                                                 pos++;
     671                 :                                         }
     672                 :                                         
     673                 :                                 }
     674                 :                                 break;
     675                 :                         }
     676                 :                 default:
     677                 :                         break;
     678                 :         }
     679               0 :         MB_RETURN;
     680                 : }
     681                 : /* }}} */
     682                 : 
     683                 : /* {{{ entity_charset determine_charset
     684                 :  * returns the charset identifier based on current locale or a hint.
     685                 :  * defaults to iso-8859-1 */
     686                 : static enum entity_charset determine_charset(char *charset_hint TSRMLS_DC)
     687               0 : {
     688                 :         int i;
     689               0 :         enum entity_charset charset = cs_8859_1;
     690               0 :         int len = 0;
     691               0 :         zval *uf_result = NULL;
     692                 : 
     693                 :         /* Guarantee default behaviour for backwards compatibility */
     694               0 :         if (charset_hint == NULL)
     695               0 :                 return cs_8859_1;
     696                 : 
     697               0 :         if ((len = strlen(charset_hint)) != 0) {
     698               0 :                 goto det_charset;
     699                 :         }
     700                 : #if HAVE_MBSTRING
     701                 : #if !defined(COMPILE_DL_MBSTRING)
     702                 :         /* XXX: Ugly things. Why don't we look for a more sophisticated way? */
     703                 :         switch (MBSTRG(current_internal_encoding)) {
     704                 :                 case mbfl_no_encoding_8859_1:
     705                 :                         return cs_8859_1;
     706                 : 
     707                 :                 case mbfl_no_encoding_utf8:
     708                 :                         return cs_utf_8;
     709                 : 
     710                 :                 case mbfl_no_encoding_euc_jp:
     711                 :                 case mbfl_no_encoding_eucjp_win:
     712                 :                         return cs_eucjp;
     713                 : 
     714                 :                 case mbfl_no_encoding_sjis:
     715                 :                 case mbfl_no_encoding_sjis_win:
     716                 :                 case mbfl_no_encoding_sjis_mac:
     717                 :                         return cs_sjis;
     718                 : 
     719                 :                 case mbfl_no_encoding_cp1252:
     720                 :                         return cs_cp1252;
     721                 : 
     722                 :                 case mbfl_no_encoding_8859_15:
     723                 :                         return cs_8859_15;
     724                 : 
     725                 :                 case mbfl_no_encoding_big5:
     726                 :                         return cs_big5;
     727                 : 
     728                 :                 case mbfl_no_encoding_euc_cn:
     729                 :                 case mbfl_no_encoding_hz:
     730                 :                 case mbfl_no_encoding_cp936:
     731                 :                         return cs_gb2312;
     732                 : 
     733                 :                 case mbfl_no_encoding_koi8r:
     734                 :                         return cs_koi8r;
     735                 : 
     736                 :                 case mbfl_no_encoding_cp866:
     737                 :                         return cs_cp866;
     738                 : 
     739                 :                 case mbfl_no_encoding_cp1251:
     740                 :                         return cs_cp1251;
     741                 : 
     742                 :                 case mbfl_no_encoding_8859_5:
     743                 :                         return cs_8859_5;
     744                 : 
     745                 :                 default:
     746                 :                         ;
     747                 :         }
     748                 : #else
     749                 :         {
     750                 :                 zval nm_mb_internal_encoding;
     751                 : 
     752                 :                 ZVAL_STRING(&nm_mb_internal_encoding, "mb_internal_encoding", 0);
     753                 : 
     754                 :                 if (call_user_function_ex(CG(function_table), NULL, &nm_mb_internal_encoding, &uf_result, 0, NULL, 1, NULL TSRMLS_CC) != FAILURE) {
     755                 : 
     756                 :                         charset_hint = Z_STRVAL_P(uf_result);
     757                 :                         len = Z_STRLEN_P(uf_result);
     758                 :                         
     759                 :                         if (len == 4) { /* sizeof(none|auto|pass)-1 */
     760                 :                                 if (!memcmp("pass", charset_hint, sizeof("pass") - 1) || 
     761                 :                                     !memcmp("auto", charset_hint, sizeof("auto") - 1) || 
     762                 :                                     !memcmp("none", charset_hint, sizeof("none") - 1)) {
     763                 :                                         
     764                 :                                         charset_hint = NULL;
     765                 :                                         len = 0;
     766                 :                                 }
     767                 :                         }
     768                 :                         goto det_charset;
     769                 :                 }
     770                 :         }
     771                 : #endif
     772                 : #endif
     773                 : 
     774               0 :         charset_hint = SG(default_charset);
     775               0 :         if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
     776               0 :                 goto det_charset;
     777                 :         }
     778                 : 
     779                 :         /* try to detect the charset for the locale */
     780                 : #if HAVE_NL_LANGINFO && HAVE_LOCALE_H && defined(CODESET)
     781               0 :         charset_hint = nl_langinfo(CODESET);
     782               0 :         if (charset_hint != NULL && (len=strlen(charset_hint)) != 0) {
     783               0 :                 goto det_charset;
     784                 :         }
     785                 : #endif
     786                 : 
     787                 : #if HAVE_LOCALE_H
     788                 :         /* try to figure out the charset from the locale */
     789                 :         {
     790                 :                 char *localename;
     791                 :                 char *dot, *at;
     792                 : 
     793                 :                 /* lang[_territory][.codeset][@modifier] */
     794               0 :                 localename = setlocale(LC_CTYPE, NULL);
     795                 : 
     796               0 :                 dot = strchr(localename, '.');
     797               0 :                 if (dot) {
     798               0 :                         dot++;
     799                 :                         /* locale specifies a codeset */
     800               0 :                         at = strchr(dot, '@');
     801               0 :                         if (at)
     802               0 :                                 len = at - dot;
     803                 :                         else
     804               0 :                                 len = strlen(dot);
     805               0 :                         charset_hint = dot;
     806                 :                 } else {
     807                 :                         /* no explicit name; see if the name itself
     808                 :                          * is the charset */
     809               0 :                         charset_hint = localename;
     810               0 :                         len = strlen(charset_hint);
     811                 :                 }
     812                 :         }
     813                 : #endif
     814                 : 
     815               0 : det_charset:
     816                 : 
     817               0 :         if (charset_hint) {
     818               0 :                 int found = 0;
     819                 :                 
     820                 :                 /* now walk the charset map and look for the codeset */
     821               0 :                 for (i = 0; charset_map[i].codeset; i++) {
     822               0 :                         if (strncasecmp(charset_hint, charset_map[i].codeset, len) == 0) {
     823               0 :                                 charset = charset_map[i].charset;
     824               0 :                                 found = 1;
     825               0 :                                 break;
     826                 :                         }
     827                 :                 }
     828               0 :                 if (!found) {
     829               0 :                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "charset `%s' not supported, assuming iso-8859-1",
     830                 :                                         charset_hint);
     831                 :                 }
     832                 :         }
     833               0 :         if (uf_result != NULL) {
     834               0 :                 zval_ptr_dtor(&uf_result);
     835                 :         }
     836               0 :         return charset;
     837                 : }
     838                 : /* }}} */
     839                 : 
     840                 : /* {{{ php_utf32_utf8 */
     841                 : size_t php_utf32_utf8(unsigned char *buf, int k)
     842               0 : {
     843               0 :         size_t retval = 0;
     844                 : 
     845               0 :         if (k < 0x80) {
     846               0 :                 buf[0] = k;
     847               0 :                 retval = 1;
     848               0 :         } else if (k < 0x800) {
     849               0 :                 buf[0] = 0xc0 | (k >> 6);
     850               0 :                 buf[1] = 0x80 | (k & 0x3f);
     851               0 :                 retval = 2;
     852               0 :         } else if (k < 0x10000) {
     853               0 :                 buf[0] = 0xe0 | (k >> 12);
     854               0 :                 buf[1] = 0x80 | ((k >> 6) & 0x3f);
     855               0 :                 buf[2] = 0x80 | (k & 0x3f);
     856               0 :                 retval = 3;
     857               0 :         } else if (k < 0x200000) {
     858               0 :                 buf[0] = 0xf0 | (k >> 18);
     859               0 :                 buf[1] = 0x80 | ((k >> 12) & 0x3f);
     860               0 :                 buf[2] = 0x80 | ((k >> 6) & 0x3f);
     861               0 :                 buf[3] = 0x80 | (k & 0x3f);
     862               0 :                 retval = 4;
     863               0 :         } else if (k < 0x4000000) {
     864               0 :                 buf[0] = 0xf8 | (k >> 24);
     865               0 :                 buf[1] = 0x80 | ((k >> 18) & 0x3f);
     866               0 :                 buf[2] = 0x80 | ((k >> 12) & 0x3f);
     867               0 :                 buf[3] = 0x80 | ((k >> 6) & 0x3f);
     868               0 :                 buf[4] = 0x80 | (k & 0x3f);
     869               0 :                 retval = 5;
     870                 :         } else {
     871               0 :                 buf[0] = 0xfc | (k >> 30);
     872               0 :                 buf[1] = 0x80 | ((k >> 24) & 0x3f);
     873               0 :                 buf[2] = 0x80 | ((k >> 18) & 0x3f);
     874               0 :                 buf[3] = 0x80 | ((k >> 12) & 0x3f);
     875               0 :                 buf[4] = 0x80 | ((k >> 6) & 0x3f);
     876               0 :                 buf[5] = 0x80 | (k & 0x3f);
     877               0 :                 retval = 6;
     878                 :         }
     879               0 :         buf[retval] = '\0';
     880                 : 
     881               0 :         return retval;
     882                 : }
     883                 : /* }}} */
     884                 : 
     885                 : /* {{{ php_unescape_html_entities
     886                 :  */
     887                 : PHPAPI char *php_unescape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
     888               0 : {
     889                 :         int retlen;
     890                 :         int j, k;
     891                 :         char *replaced, *ret, *p, *q, *lim, *next;
     892               0 :         enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
     893                 :         unsigned char replacement[15];
     894                 :         int replacement_len;
     895                 : 
     896               0 :         ret = estrndup(old, oldlen);
     897               0 :         retlen = oldlen;
     898               0 :         if (!retlen) {
     899               0 :                 goto empty_source;
     900                 :         }
     901                 :         
     902               0 :         if (all) {
     903                 :                 /* look for a match in the maps for this charset */
     904               0 :                 for (j = 0; entity_map[j].charset != cs_terminator; j++) {
     905               0 :                         if (entity_map[j].charset != charset)
     906               0 :                                 continue;
     907                 : 
     908               0 :                         for (k = entity_map[j].basechar; k <= entity_map[j].endchar; k++) {
     909                 :                                 unsigned char entity[32];
     910               0 :                                 int entity_length = 0;
     911                 : 
     912               0 :                                 if (entity_map[j].table[k - entity_map[j].basechar] == NULL)
     913               0 :                                         continue;
     914                 : 
     915               0 :                                 entity_length = slprintf(entity, sizeof(entity), "&%s;", entity_map[j].table[k - entity_map[j].basechar]);
     916               0 :                                 if (entity_length >= sizeof(entity)) {
     917               0 :                                         continue;
     918                 :                                 }
     919                 : 
     920                 :                                 /* When we have MBCS entities in the tables above, this will need to handle it */
     921               0 :                                 replacement_len = 0;
     922               0 :                                 switch (charset) {
     923                 :                                         case cs_8859_1:
     924                 :                                         case cs_cp1252:
     925                 :                                         case cs_8859_15:
     926                 :                                         case cs_cp1251:
     927                 :                                         case cs_8859_5:
     928                 :                                         case cs_cp866:
     929               0 :                                                 replacement[0] = k;
     930               0 :                                                 replacement[1] = '\0';
     931               0 :                                                 replacement_len = 1;
     932               0 :                                                 break;
     933                 : 
     934                 :                                         case cs_big5:
     935                 :                                         case cs_gb2312:
     936                 :                                         case cs_big5hkscs:
     937                 :                                         case cs_sjis:
     938                 :                                         case cs_eucjp:
     939                 :                                                 /* we cannot properly handle those multibyte encodings
     940                 :                                                  * with php_str_to_str. skip it. */ 
     941               0 :                                                 continue;
     942                 : 
     943                 :                                         case cs_utf_8:
     944               0 :                                                 replacement_len = php_utf32_utf8(replacement, k);
     945               0 :                                                 break;
     946                 : 
     947                 :                                         default:
     948               0 :                                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "cannot yet handle MBCS!");
     949               0 :                                                 efree(ret);
     950               0 :                                                 return NULL;
     951                 :                                 }
     952                 : 
     953               0 :                                 if (php_memnstr(ret, entity, entity_length, ret+retlen)) {
     954               0 :                                         replaced = php_str_to_str(ret, retlen, entity, entity_length, replacement, replacement_len, &retlen);
     955               0 :                                         efree(ret);
     956               0 :                                         ret = replaced;
     957                 :                                 }
     958                 :                         }
     959                 :                 }
     960                 :         }
     961                 : 
     962               0 :         for (j = 0; basic_entities[j].charcode != 0; j++) {
     963                 : 
     964               0 :                 if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0)
     965               0 :                         continue;
     966                 :                 
     967               0 :                 replacement[0] = (unsigned char)basic_entities[j].charcode;
     968               0 :                 replacement[1] = '\0';
     969                 : 
     970               0 :                 if (php_memnstr(ret, basic_entities[j].entity, basic_entities[j].entitylen, ret+retlen)) {              
     971               0 :                         replaced = php_str_to_str(ret, retlen, basic_entities[j].entity, basic_entities[j].entitylen, replacement, 1, &retlen);
     972               0 :                         efree(ret);
     973               0 :                         ret = replaced;
     974                 :                 }
     975                 :         }
     976                 : 
     977                 :         /* replace numeric entities & "&amp;" */
     978               0 :         lim = ret + retlen;
     979               0 :         for (p = ret, q = ret; p < lim;) {
     980                 :                 int code;
     981                 : 
     982               0 :                 if (p[0] == '&') {
     983               0 :                         if (p + 2 < lim) {
     984               0 :                                 if (p[1] == '#') {
     985               0 :                                         int invalid_code = 0;
     986                 : 
     987               0 :                                         if (p[2] == 'x' || p[2] == 'X') {
     988               0 :                                                 code = strtol(p + 3, &next, 16);
     989                 :                                         } else {
     990               0 :                                                 code = strtol(p + 2, &next, 10);
     991                 :                                         }
     992                 : 
     993               0 :                                         if (next != NULL && *next == ';') {
     994               0 :                                                 switch (charset) {
     995                 :                                                         case cs_utf_8:
     996               0 :                                                                 q += php_utf32_utf8(q, code);
     997               0 :                                                                 break;
     998                 : 
     999                 :                                                         case cs_8859_1:
    1000                 :                                                         case cs_8859_5:
    1001                 :                                                         case cs_8859_15:
    1002               0 :                                                                 if ((code >= 0x80 && code < 0xa0) || code > 0xff) {
    1003               0 :                                                                         invalid_code = 1;
    1004                 :                                                                 } else {
    1005               0 :                                                                         if (code == 39 || !quote_style) {
    1006               0 :                                                                                 invalid_code = 1;
    1007                 :                                                                         } else {
    1008               0 :                                                                                 *(q++) = code;
    1009                 :                                                                         }
    1010                 :                                                                 }
    1011               0 :                                                                 break;
    1012                 : 
    1013                 :                                                         case cs_cp1252:
    1014                 :                                                         case cs_cp1251:
    1015                 :                                                         case cs_cp866:
    1016               0 :                                                                 if (code > 0xff) {
    1017               0 :                                                                         invalid_code = 1;
    1018                 :                                                                 } else {
    1019               0 :                                                                         *(q++) = code;
    1020                 :                                                                 }
    1021               0 :                                                                 break;
    1022                 : 
    1023                 :                                                         case cs_big5:
    1024                 :                                                         case cs_big5hkscs:
    1025                 :                                                         case cs_sjis:
    1026                 :                                                         case cs_eucjp:
    1027               0 :                                                                 if (code >= 0x80) {
    1028               0 :                                                                         invalid_code = 1;
    1029                 :                                                                 } else {
    1030               0 :                                                                         *(q++) = code;
    1031                 :                                                                 }
    1032               0 :                                                                 break;
    1033                 : 
    1034                 :                                                         case cs_gb2312:
    1035               0 :                                                                 if (code >= 0x81) {
    1036               0 :                                                                         invalid_code = 1;
    1037                 :                                                                 } else {
    1038               0 :                                                                         *(q++) = code;
    1039                 :                                                                 }
    1040               0 :                                                                 break;
    1041                 : 
    1042                 :                                                         default:
    1043                 :                                                                 /* for backwards compatilibity */
    1044               0 :                                                                 invalid_code = 1;
    1045                 :                                                                 break;
    1046                 :                                                 }
    1047               0 :                                                 if (invalid_code) {
    1048               0 :                                                         for (; p <= next; p++) {
    1049               0 :                                                                 *(q++) = *p;
    1050                 :                                                         }
    1051                 :                                                 }
    1052               0 :                                                 p = next + 1;
    1053                 :                                         } else {
    1054               0 :                                                 *(q++) = *(p++);        
    1055               0 :                                                 *(q++) = *(p++);        
    1056                 :                                         }
    1057               0 :                                 } else if (p + 4 < lim &&
    1058                 :                                                         p[1] == 'a' && p[2] == 'm' &&p[3] == 'p' &&
    1059                 :                                                         p[4] == ';') {
    1060               0 :                                         *(q++) = '&';
    1061               0 :                                         p += 5;
    1062                 :                                 } else {
    1063               0 :                                         *(q++) = *(p++);
    1064               0 :                                         *(q++) = *(p++);
    1065                 :                                 }
    1066                 :                         } else {
    1067               0 :                                 *(q++) = *(p++);        
    1068                 :                         }
    1069                 :                 } else {
    1070               0 :                         *(q++) = *(p++);        
    1071                 :                 }
    1072                 :         }
    1073               0 :         *q = '\0';
    1074               0 :         retlen = (size_t)(q - ret);
    1075               0 : empty_source:   
    1076               0 :         *newlen = retlen;
    1077               0 :         return ret;
    1078                 : }
    1079                 : /* }}} */
    1080                 : 
    1081                 : 
    1082                 : 
    1083                 : 
    1084                 : /* {{{ php_escape_html_entities
    1085                 :  */
    1086                 : PHPAPI char *php_escape_html_entities(unsigned char *old, int oldlen, int *newlen, int all, int quote_style, char *hint_charset TSRMLS_DC)
    1087               0 : {
    1088                 :         int i, j, maxlen, len;
    1089                 :         char *replaced;
    1090               0 :         enum entity_charset charset = determine_charset(hint_charset TSRMLS_CC);
    1091                 :         int matches_map;
    1092                 : 
    1093               0 :         maxlen = 2 * oldlen;
    1094               0 :         if (maxlen < 128)
    1095               0 :                 maxlen = 128;
    1096               0 :         replaced = emalloc (maxlen);
    1097               0 :         len = 0;
    1098                 : 
    1099               0 :         i = 0;
    1100               0 :         while (i < oldlen) {
    1101                 :                 unsigned char mbsequence[16];   /* allow up to 15 characters in a multibyte sequence */
    1102               0 :                 int mbseqlen = sizeof(mbsequence);
    1103               0 :                 unsigned short this_char = get_next_char(charset, old, &i, mbsequence, &mbseqlen);
    1104                 : 
    1105               0 :                 matches_map = 0;
    1106                 : 
    1107               0 :                 if (len + 16 > maxlen)
    1108               0 :                         replaced = erealloc (replaced, maxlen += 128);
    1109                 : 
    1110               0 :                 if (all) {
    1111                 :                         /* look for a match in the maps for this charset */
    1112               0 :                         unsigned char *rep = NULL;
    1113                 : 
    1114                 : 
    1115               0 :                         for (j = 0; entity_map[j].charset != cs_terminator; j++) {
    1116               0 :                                 if (entity_map[j].charset == charset
    1117                 :                                                 && this_char >= entity_map[j].basechar
    1118                 :                                                 && this_char <= entity_map[j].endchar) {
    1119               0 :                                         rep = (unsigned char*)entity_map[j].table[this_char - entity_map[j].basechar];
    1120               0 :                                         if (rep == NULL) {
    1121                 :                                                 /* there is no entity for this position; fall through and
    1122                 :                                                  * just output the character itself */
    1123               0 :                                                 break;
    1124                 :                                         }
    1125                 : 
    1126               0 :                                         matches_map = 1;
    1127               0 :                                         break;
    1128                 :                                 }
    1129                 :                         }
    1130                 : 
    1131               0 :                         if (matches_map) {
    1132               0 :                                 int l = strlen(rep);
    1133                 :                                 /* increase the buffer size */
    1134               0 :                                 if (len + 2 + l >= maxlen) {
    1135               0 :                                         replaced = erealloc(replaced, maxlen += 128);
    1136                 :                                 }
    1137                 : 
    1138               0 :                                 replaced[len++] = '&';
    1139               0 :                                 strlcpy(replaced + len, rep, maxlen);
    1140               0 :                                 len += l;
    1141               0 :                                 replaced[len++] = ';';
    1142                 :                         }
    1143                 :                 }
    1144               0 :                 if (!matches_map) {     
    1145               0 :                         int is_basic = 0;
    1146                 : 
    1147               0 :                         if (this_char == '&') {
    1148               0 :                                 memcpy(replaced + len, "&amp;", sizeof("&amp;") - 1);
    1149               0 :                                 len += sizeof("&amp;") - 1;
    1150               0 :                                 is_basic = 1;
    1151                 :                         } else {
    1152               0 :                                 for (j = 0; basic_entities[j].charcode != 0; j++) {
    1153               0 :                                         if ((basic_entities[j].charcode != this_char) ||
    1154                 :                                                         (basic_entities[j].flags &&
    1155                 :                                                         (quote_style & basic_entities[j].flags) == 0)) {
    1156                 :                                                 continue;
    1157                 :                                         }
    1158                 : 
    1159               0 :                                         memcpy(replaced + len, basic_entities[j].entity, basic_entities[j].entitylen);
    1160               0 :                                         len += basic_entities[j].entitylen;
    1161                 :                 
    1162               0 :                                         is_basic = 1;
    1163               0 :                                         break;
    1164                 :                                 }
    1165                 :                         }
    1166                 : 
    1167               0 :                         if (!is_basic) {
    1168                 :                                 /* a wide char without a named entity; pass through the original sequence */
    1169               0 :                                 if (mbseqlen > 1) {
    1170               0 :                                         memcpy(replaced + len, mbsequence, mbseqlen);
    1171               0 :                                         len += mbseqlen;
    1172                 :                                 } else {
    1173               0 :                                         replaced[len++] = (unsigned char)this_char;
    1174                 :                                 }
    1175                 :                         }
    1176                 :                 }
    1177                 :         }
    1178               0 :         replaced[len] = '\0';
    1179               0 :         *newlen = len;
    1180                 : 
    1181               0 :         return replaced;
    1182                 : 
    1183                 : 
    1184                 : }
    1185                 : /* }}} */
    1186                 : 
    1187                 : /* {{{ php_html_entities
    1188                 :  */
    1189                 : static void php_html_entities(INTERNAL_FUNCTION_PARAMETERS, int all)
    1190               0 : {
    1191               0 :         char *str, *hint_charset = NULL;
    1192               0 :         int str_len, hint_charset_len = 0;
    1193                 :         int len;
    1194               0 :         long quote_style = ENT_COMPAT;
    1195                 :         char *replaced;
    1196                 : 
    1197               0 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len, &quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
    1198               0 :                 return;
    1199                 :         }
    1200                 : 
    1201               0 :         replaced = php_escape_html_entities(str, str_len, &len, all, quote_style, hint_charset TSRMLS_CC);
    1202               0 :         RETVAL_STRINGL(replaced, len, 0);
    1203                 : }
    1204                 : /* }}} */
    1205                 : 
    1206                 : #define HTML_SPECIALCHARS       0
    1207                 : #define HTML_ENTITIES           1
    1208                 : 
    1209                 : /* {{{ register_html_constants
    1210                 :  */
    1211                 : void register_html_constants(INIT_FUNC_ARGS)
    1212             220 : {
    1213             220 :         REGISTER_LONG_CONSTANT("HTML_SPECIALCHARS", HTML_SPECIALCHARS, CONST_PERSISTENT|CONST_CS);
    1214             220 :         REGISTER_LONG_CONSTANT("HTML_ENTITIES", HTML_ENTITIES, CONST_PERSISTENT|CONST_CS);
    1215             220 :         REGISTER_LONG_CONSTANT("ENT_COMPAT", ENT_COMPAT, CONST_PERSISTENT|CONST_CS);
    1216             220 :         REGISTER_LONG_CONSTANT("ENT_QUOTES", ENT_QUOTES, CONST_PERSISTENT|CONST_CS);
    1217             220 :         REGISTER_LONG_CONSTANT("ENT_NOQUOTES", ENT_NOQUOTES, CONST_PERSISTENT|CONST_CS);
    1218             220 : }
    1219                 : /* }}} */
    1220                 : 
    1221                 : /* {{{ proto string htmlspecialchars(string string [, int quote_style][, string charset])
    1222                 :    Convert special characters to HTML entities */
    1223                 : PHP_FUNCTION(htmlspecialchars)
    1224               0 : {
    1225               0 :         php_html_entities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
    1226               0 : }
    1227                 : /* }}} */
    1228                 : 
    1229                 : /* {{{ proto string htmlspecialchars_decode(string string [, int quote_style])
    1230                 :    Convert special HTML entities back to characters */
    1231                 : PHP_FUNCTION(htmlspecialchars_decode)
    1232               0 : {
    1233                 :         char *str, *new_str, *e, *p;
    1234                 :         int len, j, i, new_len;
    1235               0 :         long quote_style = ENT_COMPAT;
    1236                 :         struct basic_entities_dec basic_entities_dec[8];
    1237                 : 
    1238               0 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", &str, &len, &quote_style) == FAILURE) {
    1239               0 :                 return;
    1240                 :         }
    1241                 : 
    1242               0 :         new_str = estrndup(str, len);
    1243               0 :         new_len = len;
    1244               0 :         e = new_str + new_len;
    1245                 : 
    1246               0 :         if (!(p = memchr(new_str, '&', new_len))) {
    1247               0 :                 RETURN_STRINGL(new_str, new_len, 0);
    1248                 :         }
    1249                 : 
    1250               0 :         for (j = 0, i = 0; basic_entities[i].charcode != 0; i++) {
    1251               0 :                 if (basic_entities[i].flags && !(quote_style & basic_entities[i].flags)) {
    1252               0 :                         continue;
    1253                 :                 }
    1254               0 :                 basic_entities_dec[j].charcode = basic_entities[i].charcode;
    1255               0 :                 memcpy(basic_entities_dec[j].entity, basic_entities[i].entity, basic_entities[i].entitylen + 1);
    1256               0 :                 basic_entities_dec[j].entitylen = basic_entities[i].entitylen;
    1257               0 :                 j++;
    1258                 :         }
    1259               0 :         basic_entities_dec[j].charcode = '&';
    1260               0 :         basic_entities_dec[j].entitylen = sizeof("&amp;") - 1;
    1261               0 :         memcpy(basic_entities_dec[j].entity, "&amp;", sizeof("&amp;"));
    1262               0 :         i = j + 1;
    1263                 :         
    1264                 :         do {
    1265               0 :                 int l = e - p;
    1266                 :         
    1267               0 :                 for (j = 0; j < i; j++) {
    1268               0 :                         if (basic_entities_dec[j].entitylen > l) {
    1269               0 :                                 continue;
    1270                 :                         }
    1271               0 :                         if (!memcmp(p, basic_entities_dec[j].entity, basic_entities_dec[j].entitylen)) {
    1272               0 :                                 int e_len = basic_entities_dec[j].entitylen - 1;
    1273                 :                 
    1274               0 :                                 *p++ = basic_entities_dec[j].charcode;
    1275               0 :                                 memmove(p, p + e_len, (e - p - e_len));
    1276               0 :                                 e -= e_len;
    1277               0 :                                 goto done;
    1278                 :                         }
    1279                 :                 }
    1280               0 :                 p++;
    1281                 : 
    1282               0 : done:
    1283               0 :                 if (p >= e) {
    1284               0 :                         break;
    1285                 :                 }
    1286               0 :         } while ((p = memchr(p, '&', (e - p))));
    1287                 : 
    1288               0 :         new_len = e - new_str;
    1289                 : 
    1290               0 :         new_str[new_len] = '\0';
    1291               0 :         RETURN_STRINGL(new_str, new_len, 0);
    1292                 : }
    1293                 : /* }}} */
    1294                 : 
    1295                 : /* {{{ proto string html_entity_decode(string string [, int quote_style][, string charset])
    1296                 :    Convert all HTML entities to their applicable characters */
    1297                 : PHP_FUNCTION(html_entity_decode)
    1298               0 : {
    1299               0 :         char *str, *hint_charset = NULL;
    1300                 :         int str_len, hint_charset_len, len;
    1301               0 :         long quote_style = ENT_COMPAT;
    1302                 :         char *replaced;
    1303                 : 
    1304               0 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ls", &str, &str_len,
    1305                 :                                                           &quote_style, &hint_charset, &hint_charset_len) == FAILURE) {
    1306               0 :                 return;
    1307                 :         }
    1308                 : 
    1309               0 :         replaced = php_unescape_html_entities(str, str_len, &len, 1, quote_style, hint_charset TSRMLS_CC);
    1310               0 :         if (replaced) {
    1311               0 :                 RETURN_STRINGL(replaced, len, 0);
    1312                 :         }
    1313               0 :         RETURN_FALSE;
    1314                 : }
    1315                 : /* }}} */
    1316                 : 
    1317                 : 
    1318                 : /* {{{ proto string htmlentities(string string [, int quote_style][, string charset])
    1319                 :    Convert all applicable characters to HTML entities */
    1320                 : PHP_FUNCTION(htmlentities)
    1321               0 : {
    1322               0 :         php_html_entities(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
    1323               0 : }
    1324                 : /* }}} */
    1325                 : 
    1326                 : /* {{{ proto array get_html_translation_table([int table [, int quote_style]])
    1327                 :    Returns the internal translation table used by htmlspecialchars and htmlentities */
    1328                 : PHP_FUNCTION(get_html_translation_table)
    1329               0 : {
    1330               0 :         long which = HTML_SPECIALCHARS, quote_style = ENT_COMPAT;
    1331                 :         int i, j;
    1332                 :         char ind[2];
    1333               0 :         enum entity_charset charset = determine_charset(NULL TSRMLS_CC);
    1334                 : 
    1335               0 :         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ll", &which, &quote_style) == FAILURE) {
    1336               0 :                 return;
    1337                 :         }
    1338                 : 
    1339               0 :         array_init(return_value);
    1340                 : 
    1341               0 :         ind[1] = 0;
    1342                 : 
    1343               0 :         switch (which) {
    1344                 :                 case HTML_ENTITIES:
    1345               0 :                         for (j=0; entity_map[j].charset != cs_terminator; j++) {
    1346               0 :                                 if (entity_map[j].charset != charset)
    1347               0 :                                         continue;
    1348               0 :                                 for (i = 0; i <= entity_map[j].endchar - entity_map[j].basechar; i++) {
    1349                 :                                         char buffer[16];
    1350                 : 
    1351               0 :                                         if (entity_map[j].table[i] == NULL)
    1352               0 :                                                 continue;
    1353                 :                                         /* what about wide chars here ?? */
    1354               0 :                                         ind[0] = i + entity_map[j].basechar;
    1355               0 :                                         snprintf(buffer, sizeof(buffer), "&%s;", entity_map[j].table[i]);
    1356               0 :                                         add_assoc_string(return_value, ind, buffer, 1);
    1357                 : 
    1358                 :                                 }
    1359                 :                         }
    1360                 :                         /* break thru */
    1361                 : 
    1362                 :                 case HTML_SPECIALCHARS:
    1363               0 :                         for (j = 0; basic_entities[j].charcode != 0; j++) {
    1364                 : 
    1365               0 :                                 if (basic_entities[j].flags && (quote_style & basic_entities[j].flags) == 0)
    1366               0 :                                         continue;
    1367                 :                                 
    1368               0 :                                 ind[0] = (unsigned char)basic_entities[j].charcode;
    1369               0 :                                 add_assoc_stringl(return_value, ind, basic_entities[j].entity, basic_entities[j].entitylen, 1);
    1370                 :                         }
    1371               0 :                         add_assoc_stringl(return_value, "&", "&amp;", sizeof("&amp;") - 1, 1);
    1372                 : 
    1373                 :                         break;
    1374                 :         }
    1375                 : }
    1376                 : /* }}} */
    1377                 : 
    1378                 : /*
    1379                 :  * Local variables:
    1380                 :  * tab-width: 4
    1381                 :  * c-basic-offset: 4
    1382                 :  * End:
    1383                 :  * vim600: sw=4 ts=4 fdm=marker
    1384                 :  * vim<600: sw=4 ts=4
    1385                 :  */

Generated by: LTP GCOV extension version 1.5