1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2007 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Kirill Maximov <kir@rus.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: cyr_convert.c,v 1.27.2.3.2.1 2007/01/01 09:36:08 sebastian Exp $ */
20 :
21 : #include <stdlib.h>
22 :
23 : #ifdef HAVE_UNISTD_H
24 : #include <unistd.h>
25 : #endif
26 : #include <string.h>
27 : #include <errno.h>
28 :
29 : #include "php.h"
30 : #include "cyr_convert.h"
31 :
32 : #include <stdio.h>
33 :
34 : /*****************************************************************************
35 : * This is codetables for different Cyrillic charsets (relative to koi8-r).
36 : * Each table contains data for 128-255 symbols from ASCII table.
37 : * First 256 symbols are for conversion from koi8-r to corresponding charset,
38 : * second 256 symbols are for reverse conversion, from charset to koi8-r.
39 : *
40 : * Here we have the following tables:
41 : * _cyr_win1251 - for windows-1251 charset
42 : * _cyr_iso88595 - for iso8859-5 charset
43 : * _cyr_cp866 - for x-cp866 charset
44 : * _cyr_mac - for x-mac-cyrillic charset
45 : *
46 : *****************************************************************************/
47 :
48 : typedef unsigned char _cyr_charset_table[512];
49 :
50 : /* {{{ static const _cyr_charset_table _cyr_win1251
51 : */
52 : static const _cyr_charset_table _cyr_win1251 = {
53 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
54 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
55 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
56 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
57 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
58 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
59 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
60 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
61 : 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
62 : 46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,46,
63 : 154,174,190,46,159,189,46,46,179,191,180,157,46,46,156,183,
64 : 46,46,182,166,173,46,46,158,163,152,164,155,46,46,46,167,
65 : 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
66 : 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
67 : 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
68 : 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
69 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
70 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
71 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
72 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
73 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
74 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
75 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
76 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
77 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
78 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
79 : 32,32,32,184,186,32,179,191,32,32,32,32,32,180,162,32,
80 : 32,32,32,168,170,32,178,175,32,32,32,32,32,165,161,169,
81 : 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
82 : 239,255,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
83 : 222,192,193,214,196,197,212,195,213,200,201,202,203,204,205,206,
84 : 207,223,208,209,210,211,198,194,220,219,199,216,221,217,215,218,
85 : },
86 : _cyr_cp866 = {
87 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
88 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
89 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
90 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
91 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
92 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
93 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
94 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
95 : 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
96 : 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
97 : 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
98 : 35,35,35,124,124,124,124,43,43,124,124,43,43,43,43,43,
99 : 43,45,45,124,45,43,124,124,43,43,45,45,124,45,43,45,
100 : 45,45,45,43,43,43,43,43,43,43,43,35,35,124,124,35,
101 : 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
102 : 179,163,180,164,183,167,190,174,32,149,158,32,152,159,148,154,
103 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
104 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
105 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
106 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
107 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
108 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
109 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
110 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
111 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
112 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
113 : 205,186,213,241,243,201,32,245,187,212,211,200,190,32,247,198,
114 : 199,204,181,240,242,185,32,244,203,207,208,202,216,32,246,32,
115 : 238,160,161,230,164,165,228,163,229,168,169,170,171,172,173,174,
116 : 175,239,224,225,226,227,166,162,236,235,167,232,237,233,231,234,
117 : 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
118 : 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
119 : },
120 : _cyr_iso88595 = {
121 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
122 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
123 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
124 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
125 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
126 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
127 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
128 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
129 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
130 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
131 : 32,179,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
132 : 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
133 : 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
134 : 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
135 : 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,209,
136 : 32,163,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
137 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
138 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
139 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
140 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
141 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
142 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
143 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
144 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
145 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
146 : 32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,
147 : 32,32,32,241,32,32,32,32,32,32,32,32,32,32,32,32,
148 : 32,32,32,161,32,32,32,32,32,32,32,32,32,32,32,32,
149 : 238,208,209,230,212,213,228,211,229,216,217,218,219,220,221,222,
150 : 223,239,224,225,226,227,214,210,236,235,215,232,237,233,231,234,
151 : 206,176,177,198,180,181,196,179,197,184,185,186,187,188,189,190,
152 : 191,207,192,193,194,195,182,178,204,203,183,200,205,201,199,202,
153 : },
154 : _cyr_mac = {
155 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
156 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
157 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
158 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
159 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
160 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
161 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
162 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
163 : 225,226,247,231,228,229,246,250,233,234,235,236,237,238,239,240,
164 : 242,243,244,245,230,232,227,254,251,253,255,249,248,252,224,241,
165 : 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
166 : 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
167 : 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
168 : 144,145,146,147,148,149,150,151,152,153,154,155,156,179,163,209,
169 : 193,194,215,199,196,197,214,218,201,202,203,204,205,206,207,208,
170 : 210,211,212,213,198,200,195,222,219,221,223,217,216,220,192,255,
171 : 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
172 : 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
173 : 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
174 : 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
175 : 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
176 : 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
177 : 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
178 : 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
179 : 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
180 : 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
181 : 160,161,162,222,164,165,166,167,168,169,170,171,172,173,174,175,
182 : 176,177,178,221,180,181,182,183,184,185,186,187,188,189,190,191,
183 : 254,224,225,246,228,229,244,227,245,232,233,234,235,236,237,238,
184 : 239,223,240,241,242,243,230,226,252,251,231,248,253,249,247,250,
185 : 158,128,129,150,132,133,148,131,149,136,137,138,139,140,141,142,
186 : 143,159,144,145,146,147,134,130,156,155,135,152,157,153,151,154,
187 : };
188 : /* }}} */
189 :
190 : /* {{{ php_convert_cyr_string
191 : * This is the function that performs real in-place conversion of the string
192 : * between charsets.
193 : * Parameters:
194 : * str - string to be converted
195 : * from,to - one-symbol label of source and destination charset
196 : * The following symbols are used as labels:
197 : * k - koi8-r
198 : * w - windows-1251
199 : * i - iso8859-5
200 : * a - x-cp866
201 : * d - x-cp866
202 : * m - x-mac-cyrillic
203 : *****************************************************************************/
204 : static char * php_convert_cyr_string(unsigned char *str, int length, char from, char to TSRMLS_DC)
205 0 : {
206 : const unsigned char *from_table, *to_table;
207 : unsigned char tmp;
208 : int i;
209 :
210 0 : from_table = NULL;
211 0 : to_table = NULL;
212 :
213 0 : switch (toupper((int)(unsigned char)from))
214 : {
215 : case 'W':
216 0 : from_table = _cyr_win1251;
217 0 : break;
218 : case 'A':
219 : case 'D':
220 0 : from_table = _cyr_cp866;
221 0 : break;
222 : case 'I':
223 0 : from_table = _cyr_iso88595;
224 0 : break;
225 : case 'M':
226 0 : from_table = _cyr_mac;
227 0 : break;
228 : case 'K':
229 0 : break;
230 : default:
231 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown source charset: %c", from);
232 : break;
233 : }
234 :
235 0 : switch (toupper((int)(unsigned char)to))
236 : {
237 : case 'W':
238 0 : to_table = _cyr_win1251;
239 0 : break;
240 : case 'A':
241 : case 'D':
242 0 : to_table = _cyr_cp866;
243 0 : break;
244 : case 'I':
245 0 : to_table = _cyr_iso88595;
246 0 : break;
247 : case 'M':
248 0 : to_table = _cyr_mac;
249 0 : break;
250 : case 'K':
251 0 : break;
252 : default:
253 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown destination charset: %c", to);
254 : break;
255 : }
256 :
257 :
258 0 : if (!str)
259 0 : return (char *)str;
260 :
261 0 : for( i = 0; i<length; i++)
262 : {
263 0 : tmp = (from_table == NULL)? str[i] : from_table[ str[i] ];
264 0 : str[i] = (to_table == NULL) ? tmp : to_table[tmp + 256];
265 : }
266 0 : return (char *)str;
267 : }
268 : /* }}} */
269 :
270 : /* {{{ proto string convert_cyr_string(string str, string from, string to)
271 : Convert from one Cyrillic character set to another */
272 : PHP_FUNCTION(convert_cyr_string)
273 0 : {
274 : zval **str_arg, **fr_cs, **to_cs;
275 : unsigned char *str;
276 :
277 0 : if (ZEND_NUM_ARGS() != 3 || zend_get_parameters_ex(3,&str_arg,&fr_cs, &to_cs)==FAILURE)
278 : {
279 0 : WRONG_PARAM_COUNT;
280 : }
281 0 : convert_to_string_ex(str_arg);
282 0 : convert_to_string_ex(fr_cs);
283 0 : convert_to_string_ex(to_cs);
284 :
285 0 : str = (unsigned char*) estrndup(Z_STRVAL_PP(str_arg), Z_STRLEN_PP(str_arg));
286 :
287 0 : php_convert_cyr_string(str, Z_STRLEN_PP(str_arg), Z_STRVAL_PP(fr_cs)[0], Z_STRVAL_PP(to_cs)[0] TSRMLS_CC);
288 0 : RETVAL_STRING((char *)str, 0)
289 : }
290 : /* }}} */
291 :
292 : /*
293 : * Local variables:
294 : * tab-width: 4
295 : * c-basic-offset: 4
296 : * End:
297 : * vim600: sw=4 ts=4 fdm=marker
298 : * vim<600: sw=4 ts=4
299 : */
|