1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2007 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Hartmut Holzgraefe <hholzgra@php.net> |
16 : +----------------------------------------------------------------------+
17 : */
18 : /* $Id: levenshtein.c,v 1.34.2.1.2.3 2007/01/01 09:36:08 sebastian Exp $ */
19 :
20 : #include "php.h"
21 : #include <stdlib.h>
22 : #include <errno.h>
23 : #include <ctype.h>
24 : #include "php_string.h"
25 :
26 : #define LEVENSHTEIN_MAX_LENTH 255
27 :
28 : /* {{{ reference_levdist
29 : * reference implementation, only optimized for memory usage, not speed */
30 : static int reference_levdist(const char *s1, int l1,
31 : const char *s2, int l2,
32 : int cost_ins, int cost_rep, int cost_del )
33 0 : {
34 : int *p1, *p2, *tmp;
35 : int i1, i2, c0, c1, c2;
36 :
37 0 : if(l1==0) return l2*cost_ins;
38 0 : if(l2==0) return l1*cost_del;
39 :
40 0 : if((l1>LEVENSHTEIN_MAX_LENTH)||(l2>LEVENSHTEIN_MAX_LENTH))
41 0 : return -1;
42 :
43 0 : p1 = safe_emalloc((l2+1), sizeof(int), 0);
44 0 : p2 = safe_emalloc((l2+1), sizeof(int), 0);
45 :
46 0 : for(i2=0;i2<=l2;i2++)
47 0 : p1[i2] = i2*cost_ins;
48 :
49 0 : for(i1=0;i1<l1;i1++)
50 : {
51 0 : p2[0]=p1[0]+cost_del;
52 0 : for(i2=0;i2<l2;i2++)
53 : {
54 0 : c0=p1[i2]+((s1[i1]==s2[i2])?0:cost_rep);
55 0 : c1=p1[i2+1]+cost_del; if(c1<c0) c0=c1;
56 0 : c2=p2[i2]+cost_ins; if(c2<c0) c0=c2;
57 0 : p2[i2+1]=c0;
58 : }
59 0 : tmp=p1; p1=p2; p2=tmp;
60 : }
61 :
62 0 : c0=p1[l2];
63 :
64 0 : efree(p1);
65 0 : efree(p2);
66 :
67 0 : return c0;
68 : }
69 : /* }}} */
70 :
71 : /* {{{ custom_levdist
72 : */
73 : static int custom_levdist(char *str1, char *str2, char *callback_name TSRMLS_DC)
74 0 : {
75 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "The general Levenshtein support is not there yet");
76 : /* not there yet */
77 :
78 0 : return -1;
79 : }
80 : /* }}} */
81 :
82 : /* {{{ proto int levenshtein(string str1, string str2[, int cost_ins, int cost_rep, int cost_del])
83 : Calculate Levenshtein distance between two strings */
84 : PHP_FUNCTION(levenshtein)
85 0 : {
86 : zval **str1, **str2, **cost_ins, **cost_rep, **cost_del, **callback_name;
87 0 : int distance=-1;
88 :
89 0 : switch(ZEND_NUM_ARGS()) {
90 : case 2: /* just two string: use maximum performance version */
91 0 : if (zend_get_parameters_ex(2, &str1, &str2) == FAILURE) {
92 0 : WRONG_PARAM_COUNT;
93 : }
94 0 : convert_to_string_ex(str1);
95 0 : convert_to_string_ex(str2);
96 :
97 0 : distance = reference_levdist(Z_STRVAL_PP(str1), Z_STRLEN_PP(str1),
98 : Z_STRVAL_PP(str2), Z_STRLEN_PP(str2), 1, 1, 1);
99 :
100 0 : break;
101 :
102 : case 5: /* more general version: calc cost by ins/rep/del weights */
103 0 : if (zend_get_parameters_ex(5, &str1, &str2, &cost_ins, &cost_rep, &cost_del) == FAILURE) {
104 0 : WRONG_PARAM_COUNT;
105 : }
106 0 : convert_to_string_ex(str1);
107 0 : convert_to_string_ex(str2);
108 0 : convert_to_long_ex(cost_ins);
109 0 : convert_to_long_ex(cost_rep);
110 0 : convert_to_long_ex(cost_del);
111 :
112 0 : distance = reference_levdist(Z_STRVAL_PP(str1), Z_STRLEN_PP(str1),
113 : Z_STRVAL_PP(str2), Z_STRLEN_PP(str2),
114 : Z_LVAL_PP(cost_ins), Z_LVAL_PP(cost_rep),
115 : Z_LVAL_PP(cost_del));
116 :
117 0 : break;
118 :
119 : case 3: /* most general version: calc cost by user-supplied function */
120 0 : if (zend_get_parameters_ex(3, &str1, &str2, &callback_name) == FAILURE) {
121 0 : WRONG_PARAM_COUNT;
122 : }
123 0 : convert_to_string_ex(str1);
124 0 : convert_to_string_ex(str2);
125 0 : convert_to_string_ex(callback_name);
126 :
127 0 : distance = custom_levdist(Z_STRVAL_PP(str1), Z_STRVAL_PP(str2),
128 : Z_STRVAL_PP(callback_name) TSRMLS_CC);
129 0 : break;
130 :
131 : default:
132 0 : WRONG_PARAM_COUNT;
133 : }
134 :
135 0 : if(distance < 0 && /* TODO */ ZEND_NUM_ARGS() != 3) {
136 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Argument string(s) too long");
137 : }
138 :
139 0 : RETURN_LONG(distance);
140 : }
141 : /* }}} */
142 :
143 : /*
144 : * Local variables:
145 : * tab-width: 4
146 : * c-basic-offset: 4
147 : * End:
148 : * vim600: sw=4 ts=4 fdm=marker
149 : * vim<600: sw=4 ts=4
150 : */
|