1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2007 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Bjørn Borud - Guardian Networks AS <borud@guardian.no> |
16 : +----------------------------------------------------------------------+
17 : */
18 : /* $Id: soundex.c,v 1.25.2.1.2.1 2007/01/01 09:36:08 sebastian Exp $ */
19 :
20 : #include "php.h"
21 : #include <stdlib.h>
22 : #include <errno.h>
23 : #include <ctype.h>
24 : #include "php_string.h"
25 :
26 : /* Simple soundex algorithm as described by Knuth in TAOCP, vol 3 */
27 : /* {{{ proto string soundex(string str)
28 : Calculate the soundex key of a string */
29 : PHP_FUNCTION(soundex)
30 0 : {
31 : char *str;
32 : int i, _small, str_len, code, last;
33 : char soundex[4 + 1];
34 :
35 : static char soundex_table[26] =
36 : {0, /* A */
37 : '1', /* B */
38 : '2', /* C */
39 : '3', /* D */
40 : 0, /* E */
41 : '1', /* F */
42 : '2', /* G */
43 : 0, /* H */
44 : 0, /* I */
45 : '2', /* J */
46 : '2', /* K */
47 : '4', /* L */
48 : '5', /* M */
49 : '5', /* N */
50 : 0, /* O */
51 : '1', /* P */
52 : '2', /* Q */
53 : '6', /* R */
54 : '2', /* S */
55 : '3', /* T */
56 : 0, /* U */
57 : '1', /* V */
58 : 0, /* W */
59 : '2', /* X */
60 : 0, /* Y */
61 : '2'}; /* Z */
62 :
63 0 : if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &str, &str_len) == FAILURE) {
64 0 : return;
65 : }
66 0 : if (str_len == 0) {
67 0 : RETURN_FALSE;
68 : }
69 :
70 : /* build soundex string */
71 0 : last = -1;
72 0 : for (i = 0, _small = 0; i < str_len && _small < 4; i++) {
73 : /* convert chars to upper case and strip non-letter chars */
74 : /* BUG: should also map here accented letters used in non */
75 : /* English words or names (also found in English text!): */
76 : /* esstsett, thorn, n-tilde, c-cedilla, s-caron, ... */
77 0 : code = toupper((int)(unsigned char)str[i]);
78 0 : if (code >= 'A' && code <= 'Z') {
79 0 : if (_small == 0) {
80 : /* remember first valid char */
81 0 : soundex[_small++] = code;
82 0 : last = soundex_table[code - 'A'];
83 : }
84 : else {
85 : /* ignore sequences of consonants with same soundex */
86 : /* code in trail, and vowels unless they separate */
87 : /* consonant letters */
88 0 : code = soundex_table[code - 'A'];
89 0 : if (code != last) {
90 0 : if (code != 0) {
91 0 : soundex[_small++] = code;
92 : }
93 0 : last = code;
94 : }
95 : }
96 : }
97 : }
98 : /* pad with '0' and terminate with 0 ;-) */
99 0 : while (_small < 4) {
100 0 : soundex[_small++] = '0';
101 : }
102 0 : soundex[_small] = '\0';
103 :
104 0 : RETURN_STRINGL(soundex, _small, 1);
105 : }
106 : /* }}} */
107 :
108 : /*
109 : * Local variables:
110 : * tab-width: 4
111 : * c-basic-offset: 4
112 : * End:
113 : * vim600: sw=4 ts=4 fdm=marker
114 : * vim<600: sw=4 ts=4
115 : */
|