1 : /*
2 : +----------------------------------------------------------------------+
3 : | PHP Version 5 |
4 : +----------------------------------------------------------------------+
5 : | Copyright (c) 1997-2007 The PHP Group |
6 : +----------------------------------------------------------------------+
7 : | This source file is subject to version 3.01 of the PHP license, |
8 : | that is bundled with this package in the file LICENSE, and is |
9 : | available through the world-wide-web at the following url: |
10 : | http://www.php.net/license/3_01.txt |
11 : | If you did not receive a copy of the PHP license and are unable to |
12 : | obtain it through the world-wide-web, please send a note to |
13 : | license@php.net so we can mail you a copy immediately. |
14 : +----------------------------------------------------------------------+
15 : | Author: Clayton Collie <clcollie@mindspring.com> |
16 : +----------------------------------------------------------------------+
17 : */
18 :
19 : /* $Id: scanf.c,v 1.31.2.2.2.3 2007/02/24 02:17:27 helly Exp $ */
20 :
21 : /*
22 : scanf.c --
23 :
24 : This file contains the base code which implements sscanf and by extension
25 : fscanf. Original code is from TCL8.3.0 and bears the following copyright
26 :
27 :
28 :
29 : This software is copyrighted by the Regents of the University of
30 : California, Sun Microsystems, Inc., Scriptics Corporation,
31 : and other parties. The following terms apply to all files associated
32 : with the software unless explicitly disclaimed in individual files.
33 :
34 : The authors hereby grant permission to use, copy, modify, distribute,
35 : and license this software and its documentation for any purpose, provided
36 : that existing copyright notices are retained in all copies and that this
37 : notice is included verbatim in any distributions. No written agreement,
38 : license, or royalty fee is required for any of the authorized uses.
39 : Modifications to this software may be copyrighted by their authors
40 : and need not follow the licensing terms described here, provided that
41 : the new terms are clearly indicated on the first page of each file where
42 : they apply.
43 :
44 : IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
45 : FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
46 : ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
47 : DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
48 : POSSIBILITY OF SUCH DAMAGE.
49 :
50 : THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
51 : INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
52 : FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT. THIS SOFTWARE
53 : IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
54 : NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
55 : MODIFICATIONS.
56 :
57 : GOVERNMENT USE: If you are acquiring this software on behalf of the
58 : U.S. government, the Government shall have only "Restricted Rights"
59 : in the software and related documentation as defined in the Federal
60 : Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2). If you
61 : are acquiring the software on behalf of the Department of Defense, the
62 : software shall be classified as "Commercial Computer Software" and the
63 : Government shall have only "Restricted Rights" as defined in Clause
64 : 252.227-7013 (c) (1) of DFARs. Notwithstanding the foregoing, the
65 : authors grant the U.S. Government and others acting in its behalf
66 : permission to use and distribute the software in accordance with the
67 : terms specified in this license.
68 :
69 : */
70 :
71 : #include <stdio.h>
72 : #include <limits.h>
73 : #include <ctype.h>
74 : #include "php.h"
75 : #include "php_variables.h"
76 : #ifdef HAVE_LOCALE_H
77 : #include <locale.h>
78 : #endif
79 : #include "zend_execute.h"
80 : #include "zend_operators.h"
81 : #include "zend_strtod.h"
82 : #include "php_globals.h"
83 : #include "basic_functions.h"
84 : #include "scanf.h"
85 :
86 : /*
87 : * Flag values used internally by [f|s]canf.
88 : */
89 :
90 : #define SCAN_NOSKIP 0x1 /* Don't skip blanks. */
91 : #define SCAN_SUPPRESS 0x2 /* Suppress assignment. */
92 : #define SCAN_UNSIGNED 0x4 /* Read an unsigned value. */
93 : #define SCAN_WIDTH 0x8 /* A width value was supplied. */
94 :
95 : #define SCAN_SIGNOK 0x10 /* A +/- character is allowed. */
96 : #define SCAN_NODIGITS 0x20 /* No digits have been scanned. */
97 : #define SCAN_NOZERO 0x40 /* No zero digits have been scanned. */
98 : #define SCAN_XOK 0x80 /* An 'x' is allowed. */
99 : #define SCAN_PTOK 0x100 /* Decimal point is allowed. */
100 : #define SCAN_EXPOK 0x200 /* An exponent is allowed. */
101 :
102 : #define UCHAR(x) (zend_uchar)(x)
103 :
104 :
105 :
106 : /*
107 : * The following structure contains the information associated with
108 : * a character set.
109 : */
110 :
111 : typedef struct CharSet {
112 : int exclude; /* 1 if this is an exclusion set. */
113 : int nchars;
114 : char *chars;
115 : int nranges;
116 : struct Range {
117 : char start;
118 : char end;
119 : } *ranges;
120 : } CharSet;
121 :
122 : /*
123 : * Declarations for functions used only in this file.
124 : */
125 :
126 : static char *BuildCharSet(CharSet *cset, char *format);
127 : static int CharInSet(CharSet *cset, int ch);
128 : static void ReleaseCharSet(CharSet *cset);
129 : static inline void scan_set_error_return(int numVars, zval **return_value);
130 :
131 :
132 : /* {{{ BuildCharSet
133 : *----------------------------------------------------------------------
134 : *
135 : * BuildCharSet --
136 : *
137 : * This function examines a character set format specification
138 : * and builds a CharSet containing the individual characters and
139 : * character ranges specified.
140 : *
141 : * Results:
142 : * Returns the next format position.
143 : *
144 : * Side effects:
145 : * Initializes the charset.
146 : *
147 : *----------------------------------------------------------------------
148 : */
149 : static char * BuildCharSet(CharSet *cset, char *format)
150 0 : {
151 : char *ch, start;
152 : int nranges;
153 : char *end;
154 :
155 0 : memset(cset, 0, sizeof(CharSet));
156 :
157 0 : ch = format;
158 0 : if (*ch == '^') {
159 0 : cset->exclude = 1;
160 0 : ch = ++format;
161 : }
162 0 : end = format + 1; /* verify this - cc */
163 :
164 : /*
165 : * Find the close bracket so we can overallocate the set.
166 : */
167 :
168 0 : if (*ch == ']') {
169 0 : ch = end++;
170 : }
171 0 : nranges = 0;
172 0 : while (*ch != ']') {
173 0 : if (*ch == '-') {
174 0 : nranges++;
175 : }
176 0 : ch = end++;
177 : }
178 :
179 0 : cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
180 0 : if (nranges > 0) {
181 0 : cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
182 : } else {
183 0 : cset->ranges = NULL;
184 : }
185 :
186 : /*
187 : * Now build the character set.
188 : */
189 :
190 0 : cset->nchars = cset->nranges = 0;
191 0 : ch = format++;
192 0 : start = *ch;
193 0 : if (*ch == ']' || *ch == '-') {
194 0 : cset->chars[cset->nchars++] = *ch;
195 0 : ch = format++;
196 : }
197 0 : while (*ch != ']') {
198 0 : if (*format == '-') {
199 : /*
200 : * This may be the first character of a range, so don't add
201 : * it yet.
202 : */
203 :
204 0 : start = *ch;
205 0 : } else if (*ch == '-') {
206 : /*
207 : * Check to see if this is the last character in the set, in which
208 : * case it is not a range and we should add the previous character
209 : * as well as the dash.
210 : */
211 :
212 0 : if (*format == ']') {
213 0 : cset->chars[cset->nchars++] = start;
214 0 : cset->chars[cset->nchars++] = *ch;
215 : } else {
216 0 : ch = format++;
217 :
218 : /*
219 : * Check to see if the range is in reverse order.
220 : */
221 :
222 0 : if (start < *ch) {
223 0 : cset->ranges[cset->nranges].start = start;
224 0 : cset->ranges[cset->nranges].end = *ch;
225 : } else {
226 0 : cset->ranges[cset->nranges].start = *ch;
227 0 : cset->ranges[cset->nranges].end = start;
228 : }
229 0 : cset->nranges++;
230 : }
231 : } else {
232 0 : cset->chars[cset->nchars++] = *ch;
233 : }
234 0 : ch = format++;
235 : }
236 0 : return format;
237 : }
238 : /* }}} */
239 :
240 : /* {{{ CharInSet
241 : *----------------------------------------------------------------------
242 : *
243 : * CharInSet --
244 : *
245 : * Check to see if a character matches the given set.
246 : *
247 : * Results:
248 : * Returns non-zero if the character matches the given set.
249 : *
250 : * Side effects:
251 : * None.
252 : *
253 : *----------------------------------------------------------------------
254 : */
255 : static int CharInSet(CharSet *cset, int c)
256 0 : {
257 0 : char ch = (char) c;
258 0 : int i, match = 0;
259 :
260 0 : for (i = 0; i < cset->nchars; i++) {
261 0 : if (cset->chars[i] == ch) {
262 0 : match = 1;
263 0 : break;
264 : }
265 : }
266 0 : if (!match) {
267 0 : for (i = 0; i < cset->nranges; i++) {
268 0 : if ((cset->ranges[i].start <= ch)
269 : && (ch <= cset->ranges[i].end)) {
270 0 : match = 1;
271 0 : break;
272 : }
273 : }
274 : }
275 0 : return (cset->exclude ? !match : match);
276 : }
277 : /* }}} */
278 :
279 : /* {{{ ReleaseCharSet
280 : *----------------------------------------------------------------------
281 : *
282 : * ReleaseCharSet --
283 : *
284 : * Free the storage associated with a character set.
285 : *
286 : * Results:
287 : * None.
288 : *
289 : * Side effects:
290 : * None.
291 : *
292 : *----------------------------------------------------------------------
293 : */
294 : static void ReleaseCharSet(CharSet *cset)
295 0 : {
296 0 : efree((char *)cset->chars);
297 0 : if (cset->ranges) {
298 0 : efree((char *)cset->ranges);
299 : }
300 0 : }
301 : /* }}} */
302 :
303 : /* {{{ ValidateFormat
304 : *----------------------------------------------------------------------
305 : *
306 : * ValidateFormat --
307 : *
308 : * Parse the format string and verify that it is properly formed
309 : * and that there are exactly enough variables on the command line.
310 : *
311 : * Results:
312 : * FAILURE or SUCCESS.
313 : *
314 : * Side effects:
315 : * May set php_error based on abnormal conditions.
316 : *
317 : * Parameters :
318 : * format The format string.
319 : * numVars The number of variables passed to the scan command.
320 : * totalSubs The number of variables that will be required.
321 : *
322 : *----------------------------------------------------------------------
323 : */
324 : PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
325 0 : {
326 : #define STATIC_LIST_SIZE 16
327 : int gotXpg, gotSequential, value, i, flags;
328 0 : char *end, *ch = NULL;
329 : int staticAssign[STATIC_LIST_SIZE];
330 0 : int *nassign = staticAssign;
331 0 : int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
332 : TSRMLS_FETCH();
333 :
334 : /*
335 : * Initialize an array that records the number of times a variable
336 : * is assigned to by the format string. We use this to detect if
337 : * a variable is multiply assigned or left unassigned.
338 : */
339 :
340 0 : if (numVars > nspace) {
341 0 : nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
342 0 : nspace = numVars;
343 : }
344 0 : for (i = 0; i < nspace; i++) {
345 0 : nassign[i] = 0;
346 : }
347 :
348 0 : xpgSize = objIndex = gotXpg = gotSequential = 0;
349 :
350 0 : while (*format != '\0') {
351 0 : ch = format++;
352 0 : flags = 0;
353 :
354 0 : if (*ch != '%') {
355 0 : continue;
356 : }
357 0 : ch = format++;
358 0 : if (*ch == '%') {
359 0 : continue;
360 : }
361 0 : if (*ch == '*') {
362 0 : flags |= SCAN_SUPPRESS;
363 0 : ch = format++;
364 0 : goto xpgCheckDone;
365 : }
366 :
367 0 : if ( isdigit( (int)*ch ) ) {
368 : /*
369 : * Check for an XPG3-style %n$ specification. Note: there
370 : * must not be a mixture of XPG3 specs and non-XPG3 specs
371 : * in the same format string.
372 : */
373 :
374 0 : value = strtoul(format-1, &end, 10);
375 0 : if (*end != '$') {
376 0 : goto notXpg;
377 : }
378 0 : format = end+1;
379 0 : ch = format++;
380 0 : gotXpg = 1;
381 0 : if (gotSequential) {
382 0 : goto mixedXPG;
383 : }
384 0 : objIndex = value - 1;
385 0 : if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
386 : goto badIndex;
387 0 : } else if (numVars == 0) {
388 : /*
389 : * In the case where no vars are specified, the user can
390 : * specify %9999$ legally, so we have to consider special
391 : * rules for growing the assign array. 'value' is
392 : * guaranteed to be > 0.
393 : */
394 :
395 : /* set a lower artificial limit on this
396 : * in the interest of security and resource friendliness
397 : * 255 arguments should be more than enough. - cc
398 : */
399 0 : if (value > SCAN_MAX_ARGS) {
400 0 : goto badIndex;
401 : }
402 :
403 0 : xpgSize = (xpgSize > value) ? xpgSize : value;
404 : }
405 0 : goto xpgCheckDone;
406 : }
407 :
408 0 : notXpg:
409 0 : gotSequential = 1;
410 0 : if (gotXpg) {
411 0 : mixedXPG:
412 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
413 0 : goto error;
414 : }
415 :
416 0 : xpgCheckDone:
417 : /*
418 : * Parse any width specifier.
419 : */
420 :
421 0 : if (isdigit(UCHAR(*ch))) {
422 0 : value = strtoul(format-1, &format, 10);
423 0 : flags |= SCAN_WIDTH;
424 0 : ch = format++;
425 : }
426 :
427 : /*
428 : * Ignore size specifier.
429 : */
430 :
431 0 : if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
432 0 : ch = format++;
433 : }
434 :
435 0 : if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
436 0 : goto badIndex;
437 : }
438 :
439 : /*
440 : * Handle the various field types.
441 : */
442 :
443 0 : switch (*ch) {
444 : case 'n':
445 : case 'd':
446 : case 'D':
447 : case 'i':
448 : case 'o':
449 : case 'x':
450 : case 'X':
451 : case 'u':
452 : case 'f':
453 : case 'e':
454 : case 'E':
455 : case 'g':
456 : case 's':
457 0 : break;
458 : case 'c':
459 : /* we differ here with the TCL implementation in allowing for */
460 : /* a character width specification, to be more consistent with */
461 : /* ANSI. since Zend auto allocates space for vars, this is no */
462 : /* problem - cc */
463 : /*
464 : if (flags & SCAN_WIDTH) {
465 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
466 : goto error;
467 : }
468 : */
469 0 : break;
470 : case '[':
471 0 : if (*format == '\0') {
472 0 : goto badSet;
473 : }
474 0 : ch = format++;
475 0 : if (*ch == '^') {
476 0 : if (*format == '\0') {
477 0 : goto badSet;
478 : }
479 0 : ch = format++;
480 : }
481 0 : if (*ch == ']') {
482 0 : if (*format == '\0') {
483 0 : goto badSet;
484 : }
485 0 : ch = format++;
486 : }
487 0 : while (*ch != ']') {
488 0 : if (*format == '\0') {
489 0 : goto badSet;
490 : }
491 0 : ch = format++;
492 : }
493 0 : break;
494 0 : badSet:
495 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
496 0 : goto error;
497 : default:
498 : {
499 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
500 0 : goto error;
501 : }
502 : }
503 0 : if (!(flags & SCAN_SUPPRESS)) {
504 0 : if (objIndex >= nspace) {
505 : /*
506 : * Expand the nassign buffer. If we are using XPG specifiers,
507 : * make sure that we grow to a large enough size. xpgSize is
508 : * guaranteed to be at least one larger than objIndex.
509 : */
510 0 : value = nspace;
511 0 : if (xpgSize) {
512 0 : nspace = xpgSize;
513 : } else {
514 0 : nspace += STATIC_LIST_SIZE;
515 : }
516 0 : if (nassign == staticAssign) {
517 0 : nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
518 0 : for (i = 0; i < STATIC_LIST_SIZE; ++i) {
519 0 : nassign[i] = staticAssign[i];
520 : }
521 : } else {
522 0 : nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
523 : }
524 0 : for (i = value; i < nspace; i++) {
525 0 : nassign[i] = 0;
526 : }
527 : }
528 0 : nassign[objIndex]++;
529 0 : objIndex++;
530 : }
531 : } /* while (*format != '\0') */
532 :
533 : /*
534 : * Verify that all of the variable were assigned exactly once.
535 : */
536 :
537 0 : if (numVars == 0) {
538 0 : if (xpgSize) {
539 0 : numVars = xpgSize;
540 : } else {
541 0 : numVars = objIndex;
542 : }
543 : }
544 0 : if (totalSubs) {
545 0 : *totalSubs = numVars;
546 : }
547 0 : for (i = 0; i < numVars; i++) {
548 0 : if (nassign[i] > 1) {
549 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
550 0 : goto error;
551 0 : } else if (!xpgSize && (nassign[i] == 0)) {
552 : /*
553 : * If the space is empty, and xpgSize is 0 (means XPG wasn't
554 : * used, and/or numVars != 0), then too many vars were given
555 : */
556 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
557 0 : goto error;
558 : }
559 : }
560 :
561 0 : if (nassign != staticAssign) {
562 0 : efree((char *)nassign);
563 : }
564 0 : return SCAN_SUCCESS;
565 :
566 0 : badIndex:
567 0 : if (gotXpg) {
568 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
569 : } else {
570 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
571 : }
572 :
573 0 : error:
574 0 : if (nassign != staticAssign) {
575 0 : efree((char *)nassign);
576 : }
577 0 : return SCAN_ERROR_INVALID_FORMAT;
578 : #undef STATIC_LIST_SIZE
579 : }
580 : /* }}} */
581 :
582 : /* {{{ php_sscanf_internal
583 : * This is the internal function which does processing on behalf of
584 : * both sscanf() and fscanf()
585 : *
586 : * parameters :
587 : * string literal string to be processed
588 : * format format string
589 : * argCount total number of elements in the args array
590 : * args arguments passed in from user function (f|s)scanf
591 : * varStart offset (in args) of 1st variable passed in to (f|s)scanf
592 : * return_value set with the results of the scan
593 : */
594 :
595 : PHPAPI int php_sscanf_internal( char *string, char *format,
596 : int argCount, zval ***args,
597 : int varStart, zval **return_value TSRMLS_DC)
598 0 : {
599 0 : int numVars, nconversions, totalVars = -1;
600 : int i, value, result;
601 : int objIndex;
602 : char *end, *baseString;
603 : zval **current;
604 0 : char op = 0;
605 0 : int base = 0;
606 0 : int underflow = 0;
607 : size_t width;
608 0 : long (*fn)() = NULL;
609 : char *ch, sch;
610 : int flags;
611 : char buf[64]; /* Temporary buffer to hold scanned
612 : * number strings before they are
613 : * passed to strtoul. */
614 :
615 :
616 : /* do some sanity checking */
617 0 : if ((varStart > argCount) || (varStart < 0)){
618 0 : varStart = SCAN_MAX_ARGS + 1;
619 : }
620 0 : numVars = argCount - varStart;
621 0 : if (numVars < 0) {
622 0 : numVars = 0;
623 : }
624 :
625 : #if 0
626 : zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
627 : string, format, numVars, varStart);
628 : #endif
629 : /*
630 : * Check for errors in the format string.
631 : */
632 0 : if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
633 0 : scan_set_error_return( numVars, return_value );
634 0 : return SCAN_ERROR_INVALID_FORMAT;
635 : }
636 :
637 0 : objIndex = numVars ? varStart : 0;
638 :
639 : /*
640 : * If any variables are passed, make sure they are all passed by reference
641 : */
642 0 : if (numVars) {
643 0 : for (i = varStart;i < argCount;i++){
644 0 : if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
645 0 : php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
646 0 : scan_set_error_return(numVars, return_value);
647 0 : return SCAN_ERROR_VAR_PASSED_BYVAL;
648 : }
649 : }
650 : }
651 :
652 :
653 : /*
654 : * Allocate space for the result objects. Only happens when no variables
655 : * are specified
656 : */
657 :
658 0 : if (!numVars) {
659 : /* allocate an array for return */
660 0 : array_init(*return_value);
661 :
662 0 : for (i = 0; i < totalVars; i++) {
663 0 : if (add_next_index_null(*return_value) == FAILURE) {
664 0 : scan_set_error_return(0, return_value);
665 0 : return FAILURE;
666 : }
667 : }
668 : }
669 :
670 0 : baseString = string;
671 :
672 : /*
673 : * Iterate over the format string filling in the result objects until
674 : * we reach the end of input, the end of the format string, or there
675 : * is a mismatch.
676 : */
677 :
678 0 : nconversions = 0;
679 : /* note ! - we need to limit the loop for objIndex to keep it in bounds */
680 :
681 0 : while (*format != '\0') {
682 :
683 0 : ch = format++;
684 :
685 0 : flags = 0;
686 :
687 : /*
688 : * If we see whitespace in the format, skip whitespace in the string.
689 : */
690 :
691 0 : if ( isspace( (int)*ch ) ) {
692 0 : sch = *string;
693 0 : while ( isspace( (int)sch ) ) {
694 0 : if (*string == '\0') {
695 0 : goto done;
696 : }
697 0 : string++;
698 0 : sch = *string;
699 : }
700 0 : continue;
701 : }
702 :
703 0 : if (*ch != '%') {
704 0 : literal:
705 0 : if (*string == '\0') {
706 0 : underflow = 1;
707 0 : goto done;
708 : }
709 0 : sch = *string;
710 0 : string++;
711 0 : if (*ch != sch) {
712 0 : goto done;
713 : }
714 0 : continue;
715 : }
716 :
717 0 : ch = format++;
718 0 : if (*ch == '%') {
719 0 : goto literal;
720 : }
721 :
722 : /*
723 : * Check for assignment suppression ('*') or an XPG3-style
724 : * assignment ('%n$').
725 : */
726 :
727 0 : if (*ch == '*') {
728 0 : flags |= SCAN_SUPPRESS;
729 0 : ch = format++;
730 0 : } else if ( isdigit(UCHAR(*ch))) {
731 0 : value = strtoul(format-1, &end, 10);
732 0 : if (*end == '$') {
733 0 : format = end+1;
734 0 : ch = format++;
735 0 : objIndex = varStart + value - 1;
736 : }
737 : }
738 :
739 : /*
740 : * Parse any width specifier.
741 : */
742 :
743 0 : if ( isdigit(UCHAR(*ch))) {
744 0 : width = strtoul(format-1, &format, 10);
745 0 : ch = format++;
746 : } else {
747 0 : width = 0;
748 : }
749 :
750 : /*
751 : * Ignore size specifier.
752 : */
753 :
754 0 : if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
755 0 : ch = format++;
756 : }
757 :
758 : /*
759 : * Handle the various field types.
760 : */
761 :
762 0 : switch (*ch) {
763 : case 'n':
764 0 : if (!(flags & SCAN_SUPPRESS)) {
765 0 : if (numVars && objIndex >= argCount) {
766 : break;
767 0 : } else if (numVars) {
768 : zend_uint refcount;
769 :
770 0 : current = args[objIndex++];
771 0 : refcount = (*current)->refcount;
772 0 : zval_dtor( *current );
773 0 : ZVAL_LONG( *current, (long)(string - baseString) );
774 0 : (*current)->refcount = refcount;
775 0 : (*current)->is_ref = 1;
776 : } else {
777 0 : add_index_long(*return_value, objIndex++, string - baseString);
778 : }
779 : }
780 0 : nconversions++;
781 0 : continue;
782 :
783 : case 'd':
784 : case 'D':
785 0 : op = 'i';
786 0 : base = 10;
787 0 : fn = (long (*)())strtol;
788 0 : break;
789 : case 'i':
790 0 : op = 'i';
791 0 : base = 0;
792 0 : fn = (long (*)())strtol;
793 0 : break;
794 : case 'o':
795 0 : op = 'i';
796 0 : base = 8;
797 0 : fn = (long (*)())strtol;
798 0 : break;
799 : case 'x':
800 : case 'X':
801 0 : op = 'i';
802 0 : base = 16;
803 0 : fn = (long (*)())strtol;
804 0 : break;
805 : case 'u':
806 0 : op = 'i';
807 0 : base = 10;
808 0 : flags |= SCAN_UNSIGNED;
809 0 : fn = (long (*)())strtoul;
810 0 : break;
811 :
812 : case 'f':
813 : case 'e':
814 : case 'E':
815 : case 'g':
816 0 : op = 'f';
817 0 : break;
818 :
819 : case 's':
820 0 : op = 's';
821 0 : break;
822 :
823 : case 'c':
824 0 : op = 's';
825 0 : flags |= SCAN_NOSKIP;
826 : /*-cc-*/
827 0 : if (0 == width) {
828 0 : width = 1;
829 : }
830 : /*-cc-*/
831 0 : break;
832 : case '[':
833 0 : op = '[';
834 0 : flags |= SCAN_NOSKIP;
835 : break;
836 : } /* switch */
837 :
838 : /*
839 : * At this point, we will need additional characters from the
840 : * string to proceed.
841 : */
842 :
843 0 : if (*string == '\0') {
844 0 : underflow = 1;
845 0 : goto done;
846 : }
847 :
848 : /*
849 : * Skip any leading whitespace at the beginning of a field unless
850 : * the format suppresses this behavior.
851 : */
852 :
853 0 : if (!(flags & SCAN_NOSKIP)) {
854 0 : while (*string != '\0') {
855 0 : sch = *string;
856 0 : if (! isspace((int)sch) ) {
857 0 : break;
858 : }
859 0 : string++;
860 : }
861 0 : if (*string == '\0') {
862 0 : underflow = 1;
863 0 : goto done;
864 : }
865 : }
866 :
867 : /*
868 : * Perform the requested scanning operation.
869 : */
870 :
871 0 : switch (op) {
872 : case 'c':
873 : case 's':
874 : /*
875 : * Scan a string up to width characters or whitespace.
876 : */
877 :
878 0 : if (width == 0) {
879 0 : width = (size_t) ~0;
880 : }
881 0 : end = string;
882 0 : while (*end != '\0') {
883 0 : sch = *end;
884 0 : if ( isspace( (int)sch ) ) {
885 0 : break;
886 : }
887 0 : end++;
888 0 : if (--width == 0) {
889 0 : break;
890 : }
891 : }
892 0 : if (!(flags & SCAN_SUPPRESS)) {
893 0 : if (numVars && objIndex >= argCount) {
894 : break;
895 0 : } else if (numVars) {
896 : zend_uint refcount;
897 :
898 0 : current = args[objIndex++];
899 0 : refcount = (*current)->refcount;
900 0 : zval_dtor( *current );
901 0 : ZVAL_STRINGL( *current, string, end-string, 1);
902 0 : (*current)->refcount = refcount;
903 0 : (*current)->is_ref = 1;
904 : } else {
905 0 : add_index_stringl( *return_value, objIndex++, string, end-string, 1);
906 : }
907 : }
908 0 : string = end;
909 0 : break;
910 :
911 : case '[': {
912 : CharSet cset;
913 :
914 0 : if (width == 0) {
915 0 : width = (size_t) ~0;
916 : }
917 0 : end = string;
918 :
919 0 : format = BuildCharSet(&cset, format);
920 0 : while (*end != '\0') {
921 0 : sch = *end;
922 0 : if (!CharInSet(&cset, (int)sch)) {
923 0 : break;
924 : }
925 0 : end++;
926 0 : if (--width == 0) {
927 0 : break;
928 : }
929 : }
930 0 : ReleaseCharSet(&cset);
931 :
932 0 : if (string == end) {
933 : /*
934 : * Nothing matched the range, stop processing
935 : */
936 0 : goto done;
937 : }
938 0 : if (!(flags & SCAN_SUPPRESS)) {
939 0 : if (numVars && objIndex >= argCount) {
940 : break;
941 0 : } else if (numVars) {
942 0 : current = args[objIndex++];
943 0 : zval_dtor( *current );
944 0 : ZVAL_STRINGL( *current, string, end-string, 1);
945 : } else {
946 0 : add_index_stringl(*return_value, objIndex++, string, end-string, 1);
947 : }
948 : }
949 0 : string = end;
950 :
951 0 : break;
952 : }
953 : /*
954 : case 'c':
955 : / Scan a single character./
956 :
957 : sch = *string;
958 : string++;
959 : if (!(flags & SCAN_SUPPRESS)) {
960 : if (numVars) {
961 : char __buf[2];
962 : __buf[0] = sch;
963 : __buf[1] = '\0';;
964 : current = args[objIndex++];
965 : convert_to_string_ex( current );
966 : ZVAL_STRINGL( *current, __buf, 1, 1);
967 : } else {
968 : add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
969 : }
970 : }
971 : break;
972 : */
973 : case 'i':
974 : /*
975 : * Scan an unsigned or signed integer.
976 : */
977 :
978 : /*-cc-*/
979 0 : buf[0] = '\0';
980 : /*-cc-*/
981 0 : if ((width == 0) || (width > sizeof(buf) - 1)) {
982 0 : width = sizeof(buf) - 1;
983 : }
984 :
985 0 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
986 0 : for (end = buf; width > 0; width--) {
987 0 : switch (*string) {
988 : /*
989 : * The 0 digit has special meaning at the beginning of
990 : * a number. If we are unsure of the base, it
991 : * indicates that we are in base 8 or base 16 (if it is
992 : * followed by an 'x').
993 : */
994 : case '0':
995 : /*-cc-*/
996 0 : if (base == 16) {
997 0 : flags |= SCAN_XOK;
998 : }
999 : /*-cc-*/
1000 0 : if (base == 0) {
1001 0 : base = 8;
1002 0 : flags |= SCAN_XOK;
1003 : }
1004 0 : if (flags & SCAN_NOZERO) {
1005 0 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
1006 : } else {
1007 0 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1008 : }
1009 0 : goto addToInt;
1010 :
1011 : case '1': case '2': case '3': case '4':
1012 : case '5': case '6': case '7':
1013 0 : if (base == 0) {
1014 0 : base = 10;
1015 : }
1016 0 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1017 0 : goto addToInt;
1018 :
1019 : case '8': case '9':
1020 0 : if (base == 0) {
1021 0 : base = 10;
1022 : }
1023 0 : if (base <= 8) {
1024 0 : break;
1025 : }
1026 0 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1027 0 : goto addToInt;
1028 :
1029 : case 'A': case 'B': case 'C':
1030 : case 'D': case 'E': case 'F':
1031 : case 'a': case 'b': case 'c':
1032 : case 'd': case 'e': case 'f':
1033 0 : if (base <= 10) {
1034 0 : break;
1035 : }
1036 0 : flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1037 0 : goto addToInt;
1038 :
1039 : case '+': case '-':
1040 0 : if (flags & SCAN_SIGNOK) {
1041 0 : flags &= ~SCAN_SIGNOK;
1042 0 : goto addToInt;
1043 : }
1044 0 : break;
1045 :
1046 : case 'x': case 'X':
1047 0 : if ((flags & SCAN_XOK) && (end == buf+1)) {
1048 0 : base = 16;
1049 0 : flags &= ~SCAN_XOK;
1050 0 : goto addToInt;
1051 : }
1052 : break;
1053 : }
1054 :
1055 : /*
1056 : * We got an illegal character so we are done accumulating.
1057 : */
1058 :
1059 0 : break;
1060 :
1061 0 : addToInt:
1062 : /*
1063 : * Add the character to the temporary buffer.
1064 : */
1065 0 : *end++ = *string++;
1066 0 : if (*string == '\0') {
1067 0 : break;
1068 : }
1069 : }
1070 :
1071 : /*
1072 : * Check to see if we need to back up because we only got a
1073 : * sign or a trailing x after a 0.
1074 : */
1075 :
1076 0 : if (flags & SCAN_NODIGITS) {
1077 0 : if (*string == '\0') {
1078 0 : underflow = 1;
1079 : }
1080 0 : goto done;
1081 0 : } else if (end[-1] == 'x' || end[-1] == 'X') {
1082 0 : end--;
1083 0 : string--;
1084 : }
1085 :
1086 :
1087 : /*
1088 : * Scan the value from the temporary buffer. If we are
1089 : * returning a large unsigned value, we have to convert it back
1090 : * to a string since PHP only supports signed values.
1091 : */
1092 :
1093 0 : if (!(flags & SCAN_SUPPRESS)) {
1094 0 : *end = '\0';
1095 0 : value = (int) (*fn)(buf, NULL, base);
1096 0 : if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1097 0 : snprintf(buf, sizeof(buf), "%u", value); /* INTL: ISO digit */
1098 0 : if (numVars && objIndex >= argCount) {
1099 : break;
1100 0 : } else if (numVars) {
1101 : /* change passed value type to string */
1102 0 : current = args[objIndex++];
1103 0 : convert_to_string( *current );
1104 0 : ZVAL_STRING( *current, buf, 1 );
1105 : } else {
1106 0 : add_index_string(*return_value, objIndex++, buf, 1);
1107 : }
1108 : } else {
1109 0 : if (numVars && objIndex >= argCount) {
1110 : break;
1111 0 : } else if (numVars) {
1112 0 : current = args[objIndex++];
1113 0 : convert_to_long( *current );
1114 0 : Z_LVAL(**current) = value;
1115 : } else {
1116 0 : add_index_long(*return_value, objIndex++, value);
1117 : }
1118 : }
1119 : }
1120 :
1121 0 : break;
1122 :
1123 : case 'f':
1124 : /*
1125 : * Scan a floating point number
1126 : */
1127 0 : buf[0] = '\0'; /* call me pedantic */
1128 0 : if ((width == 0) || (width > sizeof(buf) - 1)) {
1129 0 : width = sizeof(buf) - 1;
1130 : }
1131 0 : flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1132 0 : for (end = buf; width > 0; width--) {
1133 0 : switch (*string) {
1134 : case '0': case '1': case '2': case '3':
1135 : case '4': case '5': case '6': case '7':
1136 : case '8': case '9':
1137 0 : flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1138 0 : goto addToFloat;
1139 : case '+': case '-':
1140 0 : if (flags & SCAN_SIGNOK) {
1141 0 : flags &= ~SCAN_SIGNOK;
1142 0 : goto addToFloat;
1143 : }
1144 0 : break;
1145 : case '.':
1146 0 : if (flags & SCAN_PTOK) {
1147 0 : flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1148 0 : goto addToFloat;
1149 : }
1150 0 : break;
1151 : case 'e': case 'E':
1152 : /*
1153 : * An exponent is not allowed until there has
1154 : * been at least one digit.
1155 : */
1156 :
1157 0 : if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1158 0 : flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1159 : | SCAN_SIGNOK | SCAN_NODIGITS;
1160 0 : goto addToFloat;
1161 : }
1162 : break;
1163 : }
1164 :
1165 : /*
1166 : * We got an illegal character so we are done accumulating.
1167 : */
1168 :
1169 0 : break;
1170 :
1171 0 : addToFloat:
1172 : /*
1173 : * Add the character to the temporary buffer.
1174 : */
1175 :
1176 0 : *end++ = *string++;
1177 0 : if (*string == '\0') {
1178 0 : break;
1179 : }
1180 : }
1181 :
1182 : /*
1183 : * Check to see if we need to back up because we saw a
1184 : * trailing 'e' or sign.
1185 : */
1186 :
1187 0 : if (flags & SCAN_NODIGITS) {
1188 0 : if (flags & SCAN_EXPOK) {
1189 : /*
1190 : * There were no digits at all so scanning has
1191 : * failed and we are done.
1192 : */
1193 0 : if (*string == '\0') {
1194 0 : underflow = 1;
1195 : }
1196 0 : goto done;
1197 : }
1198 :
1199 : /*
1200 : * We got a bad exponent ('e' and maybe a sign).
1201 : */
1202 :
1203 0 : end--;
1204 0 : string--;
1205 0 : if (*end != 'e' && *end != 'E') {
1206 0 : end--;
1207 0 : string--;
1208 : }
1209 : }
1210 :
1211 : /*
1212 : * Scan the value from the temporary buffer.
1213 : */
1214 :
1215 0 : if (!(flags & SCAN_SUPPRESS)) {
1216 : double dvalue;
1217 0 : *end = '\0';
1218 0 : dvalue = zend_strtod(buf, NULL);
1219 0 : if (numVars && objIndex >= argCount) {
1220 : break;
1221 0 : } else if (numVars) {
1222 0 : current = args[objIndex++];
1223 0 : convert_to_double( *current );
1224 0 : Z_DVAL_PP( current ) = dvalue;
1225 : } else {
1226 0 : add_index_double( *return_value, objIndex++, dvalue );
1227 : }
1228 : }
1229 : break;
1230 : } /* switch (op) */
1231 0 : nconversions++;
1232 : } /* while (*format != '\0') */
1233 :
1234 0 : done:
1235 0 : result = SCAN_SUCCESS;
1236 :
1237 0 : if (underflow && (0==nconversions)) {
1238 0 : scan_set_error_return( numVars, return_value );
1239 0 : result = SCAN_ERROR_EOF;
1240 0 : } else if (numVars) {
1241 0 : convert_to_long( *return_value );
1242 0 : Z_LVAL_PP(return_value) = nconversions;
1243 0 : } else if (nconversions < totalVars) {
1244 : /* to do : not all elements converted. we need to prune the list - cc
1245 : */
1246 : }
1247 :
1248 0 : return result;
1249 : }
1250 : /* }}} */
1251 :
1252 : /* the compiler choked when i tried to make this a macro */
1253 : static inline void scan_set_error_return(int numVars, zval **return_value)
1254 0 : {
1255 0 : if (numVars) {
1256 0 : Z_TYPE_PP(return_value) = IS_LONG;
1257 0 : Z_LVAL_PP(return_value) = SCAN_ERROR_EOF; /* EOF marker */
1258 : } else {
1259 : /* zval_dtor( *return_value ); */
1260 : /* convert_to_null calls destructor */
1261 0 : convert_to_null( *return_value );
1262 : }
1263 0 : }
1264 :
1265 :
1266 : /*
1267 : * Local variables:
1268 : * tab-width: 4
1269 : * c-basic-offset: 4
1270 : * End:
1271 : * vim600: sw=4 ts=4 fdm=marker
1272 : * vim<600: sw=4 ts=4
1273 : */
|