Example #1
0
/**
 * This is the internal function which does processing on behalf of
 * both sscanf() and fscanf()
 *
 * parameters :
 *    string    literal string to be processed
 *    format    format string
 *    return_value set with the results of the scan
 */
int string_sscanf(const char *string, const char *format, int numVars,
                  Variant &return_value) {
  int  nconversions;
  int  totalVars = -1;
  int64_t value;
  char *end;
  const char *baseString;
  char op   = 0;
  int  base = 0;
  int  underflow = 0;
  size_t width;
  long (*fn)(const char *, char **, int) = nullptr;
  const char *ch;
  char sch;
  int  flags;
  char buf[64];  /* Temporary buffer to hold scanned number
                  * strings before they are passed to strtoul() */

  Array returnArray;

  /*
   * Check for errors in the format string.
   */
  if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
    scan_set_error_return(numVars, return_value);
    return SCAN_ERROR_INVALID_FORMAT;
  }

  baseString = string;

  /*
   * Iterate over the format string filling in the result objects until
   * we reach the end of input, the end of the format string, or there
   * is a mismatch.
   */
  nconversions = 0;

  while (*format != '\0') {
    ch    = format++;
    flags = 0;

    /*
     * If we see whitespace in the format, skip whitespace in the string.
     */
    if ( isspace( (int)*ch ) ) {
      sch = *string;
      while ( isspace( (int)sch ) ) {
        if (*string == '\0') {
          goto done;
        }
        string++;
        sch = *string;
      }
      continue;
    }

    if (*ch != '%') {
    literal:
      if (*string == '\0') {
        underflow = 1;
        goto done;
      }
      sch = *string;
      string++;
      if (*ch != sch) {
        goto done;
      }
      continue;
    }

    ch = format++;
    if (*ch == '%') {
      goto literal;
    }

    /*
     * Check for assignment suppression ('*') or an XPG3-style
     * assignment ('%n$').
     */
    if (*ch == '*') {
      flags |= SCAN_SUPPRESS;
      ch = format++;
    } else if ( isdigit(UCHAR(*ch))) {
      value = strtoul(format-1, &end, 10);
      if (*end == '$') {
        format = end+1;
        ch = format++;
      }
    }

    /*
     * Parse any width specifier.
     */
    if ( isdigit(UCHAR(*ch))) {
      char *endptr;
      width = strtoul(format-1, &endptr, 10);
      format = endptr;
      ch = format++;
    } else {
      width = 0;
    }

    /*
     * Ignore size specifier.
     */
    if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
      ch = format++;
    }

    /*
     * Handle the various field types.
     */
    switch (*ch) {
    case 'n':
      if (!(flags & SCAN_SUPPRESS)) {
        returnArray.append((int)(string - baseString));
      }
      nconversions++;
      continue;

    case 'd':
    case 'D':
      op = 'i';
      base = 10;
      fn = (long (*)(const char *, char **, int))strtol;
      break;
    case 'i':
      op = 'i';
      base = 0;
      fn = (long (*)(const char *, char **, int))strtol;
      break;
    case 'o':
      op = 'i';
      base = 8;
      fn = (long (*)(const char *, char **, int))strtol;
      break;
    case 'x':
    case 'X':
      op = 'i';
      base = 16;
      fn = (long (*)(const char *, char **, int))strtol;
      break;
    case 'u':
      op = 'i';
      base = 10;
      flags |= SCAN_UNSIGNED;
      fn = (long (*)(const char *, char **, int))strtoul;
      break;

    case 'f':
    case 'e':
    case 'E':
    case 'g':
      op = 'f';
      break;

    case 's':
      op = 's';
      break;

    case 'c':
      op = 's';
      flags |= SCAN_NOSKIP;
      /*-cc-*/
      if (0 == width) {
        width = 1;
      }
      /*-cc-*/
      break;
    case '[':
      op = '[';
      flags |= SCAN_NOSKIP;
      break;
    } /* switch */

    /*
     * At this point, we will need additional characters from the
     * string to proceed.
     */
    if (*string == '\0') {
      underflow = 1;
      goto done;
    }

    /*
     * Skip any leading whitespace at the beginning of a field unless
     * the format suppresses this behavior.
     */
    if (!(flags & SCAN_NOSKIP)) {
      while (*string != '\0') {
        sch = *string;
        if (! isspace((int)sch) ) {
          break;
        }
        string++;
      }
      if (*string == '\0') {
        underflow = 1;
        goto done;
      }
    }

    /*
     * Perform the requested scanning operation.
     */
    switch (op) {
    case 'c':
    case 's':
      /*
       * Scan a string up to width characters or whitespace.
       */
      if (width == 0) {
        width = (size_t) ~0;
      }
      end = (char*)string;
      while (*end != '\0') {
        sch = *end;
        if ( isspace( (int)sch ) ) {
          break;
        }
        end++;
        if (--width == 0) {
          break;
        }
      }
      if (!(flags & SCAN_SUPPRESS)) {
        returnArray.append(String(string, end-string, CopyString));
      }
      string = end;
      break;

    case '[': {
      CharSet cset;

      if (width == 0) {
        width = (size_t) ~0;
      }
      end = (char*)string;

      format = BuildCharSet(&cset, format);
      while (*end != '\0') {
        sch = *end;
        if (!CharInSet(&cset, (int)sch)) {
          break;
        }
        end++;
        if (--width == 0) {
          break;
        }
      }
      ReleaseCharSet(&cset);

      if (string == end) {
        /*
         * Nothing matched the range, stop processing
         */
        goto done;
      }
      if (!(flags & SCAN_SUPPRESS)) {
        returnArray.append(String(string, end-string, CopyString));
      }
      string = end;
      break;
    }
    case 'i':
      /*
       * Scan an unsigned or signed integer.
       */
      /*-cc-*/
      buf[0] = '\0';
      /*-cc-*/
      if ((width == 0) || (width > sizeof(buf) - 1)) {
        width = sizeof(buf) - 1;
      }

      flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
      for (end = buf; width > 0; width--) {
        switch (*string) {
          /*
           * The 0 digit has special meaning at the beginning of
           * a number.  If we are unsure of the base, it
           * indicates that we are in base 8 or base 16 (if it is
           * followed by an 'x').
           */
        case '0':
          /*-cc-*/
          if (base == 16) {
            flags |= SCAN_XOK;
          }
          /*-cc-*/
          if (base == 0) {
            base = 8;
            flags |= SCAN_XOK;
          }
          if (flags & SCAN_NOZERO) {
            flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
          } else {
            flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
          }
          goto addToInt;

        case '1': case '2': case '3': case '4':
        case '5': case '6': case '7':
          if (base == 0) {
            base = 10;
          }
          flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
          goto addToInt;

        case '8': case '9':
          if (base == 0) {
            base = 10;
          }
          if (base <= 8) {
            break;
          }
          flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
          goto addToInt;

        case 'A': case 'B': case 'C':
        case 'D': case 'E': case 'F':
        case 'a': case 'b': case 'c':
        case 'd': case 'e': case 'f':
          if (base <= 10) {
            break;
          }
          flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
          goto addToInt;

        case '+': case '-':
          if (flags & SCAN_SIGNOK) {
            flags &= ~SCAN_SIGNOK;
            goto addToInt;
          }
          break;

        case 'x': case 'X':
          if ((flags & SCAN_XOK) && (end == buf+1)) {
            base = 16;
            flags &= ~SCAN_XOK;
            goto addToInt;
          }
          break;
        }

        /*
         * We got an illegal character so we are done accumulating.
         */
        break;

      addToInt:
        /*
         * Add the character to the temporary buffer.
         */
        *end++ = *string++;
        if (*string == '\0') {
          break;
        }
      }

      /*
       * Check to see if we need to back up because we only got a
       * sign or a trailing x after a 0.
       */
      if (flags & SCAN_NODIGITS) {
        if (*string == '\0') {
          underflow = 1;
        }
        goto done;
      } else if (end[-1] == 'x' || end[-1] == 'X') {
        end--;
        string--;
      }

      /*
       * Scan the value from the temporary buffer.  If we are
       * returning a large unsigned value, we have to convert it back
       * to a string since PHP only supports signed values.
       */
      if (!(flags & SCAN_SUPPRESS)) {
        *end = '\0';
        value = (int64_t) (*fn)(buf, nullptr, base);
        if ((flags & SCAN_UNSIGNED) && (value < 0)) {
          snprintf(buf, sizeof(buf), "%lu", (long)value); /* INTL: ISO digit */
          returnArray.append(String(buf, CopyString));
        } else {
          returnArray.append(value);
        }
      }
      break;

    case 'f':
      /*
       * Scan a floating point number
       */
      buf[0] = '\0';     /* call me pedantic */
      if ((width == 0) || (width > sizeof(buf) - 1)) {
        width = sizeof(buf) - 1;
      }
      flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
      for (end = buf; width > 0; width--) {
        switch (*string) {
        case '0': case '1': case '2': case '3':
        case '4': case '5': case '6': case '7':
        case '8': case '9':
          flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
          goto addToFloat;
        case '+':
        case '-':
          if (flags & SCAN_SIGNOK) {
            flags &= ~SCAN_SIGNOK;
            goto addToFloat;
          }
          break;
        case '.':
          if (flags & SCAN_PTOK) {
            flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
            goto addToFloat;
          }
          break;
        case 'e':
        case 'E':
          /*
           * An exponent is not allowed until there has
           * been at least one digit.
           */
          if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
            flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
              | SCAN_SIGNOK | SCAN_NODIGITS;
            goto addToFloat;
          }
          break;
        }

        /*
         * We got an illegal character so we are done accumulating.
         */
        break;

      addToFloat:
        /*
         * Add the character to the temporary buffer.
         */
        *end++ = *string++;
        if (*string == '\0') {
          break;
        }
      }

      /*
       * Check to see if we need to back up because we saw a
       * trailing 'e' or sign.
       */
      if (flags & SCAN_NODIGITS) {
        if (flags & SCAN_EXPOK) {
          /*
           * There were no digits at all so scanning has
           * failed and we are done.
           */
          if (*string == '\0') {
            underflow = 1;
          }
          goto done;
        }

        /*
         * We got a bad exponent ('e' and maybe a sign).
         */
        end--;
        string--;
        if (*end != 'e' && *end != 'E') {
          end--;
          string--;
        }
      }

      /*
       * Scan the value from the temporary buffer.
       */
      if (!(flags & SCAN_SUPPRESS)) {
        double dvalue;
        *end = '\0';
        dvalue = strtod(buf, nullptr);
        returnArray.append(dvalue);
      }
      break;
    } /* switch (op) */
    nconversions++;
  } /*  while (*format != '\0') */

done:
  if (underflow && (0==nconversions)) {
    scan_set_error_return(numVars, return_value);
    return SCAN_ERROR_EOF;
  } else if (nconversions < totalVars) {
    /* TODO: not all elements converted. we need to prune the list - cc */
  }
  return_value = returnArray;
  return SCAN_SUCCESS;
}
Example #2
0
PHPAPI int php_sscanf_internal( char *string, char *format,
				int argCount, zval *args,
				int varStart, zval *return_value)
{
	int  numVars, nconversions, totalVars = -1;
	int  i, result;
	zend_long value;
	int  objIndex;
	char *end, *baseString;
	zval *current;
	char op   = 0;
	int  base = 0;
	int  underflow = 0;
	size_t width;
	zend_long (*fn)() = NULL;
	char *ch, sch;
	int  flags;
	char buf[64];	/* Temporary buffer to hold scanned number
					 * strings before they are passed to strtoul() */

	/* do some sanity checking */
	if ((varStart > argCount) || (varStart < 0)){
		varStart = SCAN_MAX_ARGS + 1;
	}
	numVars = argCount - varStart;
	if (numVars < 0) {
		numVars = 0;
	}

#if 0
	zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
					string, format, numVars, varStart);
#endif
	/*
	 * Check for errors in the format string.
	 */
	if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
		scan_set_error_return( numVars, return_value );
		return SCAN_ERROR_INVALID_FORMAT;
	}

	objIndex = numVars ? varStart : 0;

	/*
	 * If any variables are passed, make sure they are all passed by reference
	 */
	if (numVars) {
		for (i = varStart;i < argCount;i++){
			if ( ! Z_ISREF(args[ i ] ) ) {
				php_error_docref(NULL, E_WARNING, "Parameter %d must be passed by reference", i);
				scan_set_error_return(numVars, return_value);
				return SCAN_ERROR_VAR_PASSED_BYVAL;
			}
		}
	}

	/*
	 * Allocate space for the result objects. Only happens when no variables
	 * are specified
	 */
	if (!numVars) {
		zval tmp;

		/* allocate an array for return */
		array_init(return_value);

		for (i = 0; i < totalVars; i++) {
			ZVAL_NULL(&tmp);
			if (add_next_index_zval(return_value, &tmp) == FAILURE) {
				scan_set_error_return(0, return_value);
				return FAILURE;
			}
		}
		varStart = 0; /* Array index starts from 0 */
	}

	baseString = string;

	/*
	 * Iterate over the format string filling in the result objects until
	 * we reach the end of input, the end of the format string, or there
	 * is a mismatch.
	 */
	nconversions = 0;
	/* note ! - we need to limit the loop for objIndex to keep it in bounds */

	while (*format != '\0') {
		ch    = format++;
		flags = 0;

		/*
		 * If we see whitespace in the format, skip whitespace in the string.
		 */
		if ( isspace( (int)*ch ) ) {
			sch = *string;
			while ( isspace( (int)sch ) ) {
				if (*string == '\0') {
					goto done;
				}
				string++;
				sch = *string;
			}
			continue;
		}

		if (*ch != '%') {
literal:
			if (*string == '\0') {
				underflow = 1;
				goto done;
			}
			sch = *string;
			string++;
			if (*ch != sch) {
				goto done;
			}
			continue;
		}

		ch = format++;
		if (*ch == '%') {
			goto literal;
		}

		/*
		 * Check for assignment suppression ('*') or an XPG3-style
		 * assignment ('%n$').
		 */
		if (*ch == '*') {
			flags |= SCAN_SUPPRESS;
			ch = format++;
		} else if ( isdigit(UCHAR(*ch))) {
			value = ZEND_STRTOUL(format-1, &end, 10);
			if (*end == '$') {
				format = end+1;
				ch = format++;
				objIndex = varStart + value - 1;
			}
		}

		/*
		 * Parse any width specifier.
		 */
		if ( isdigit(UCHAR(*ch))) {
			width = ZEND_STRTOUL(format-1, &format, 10);
			ch = format++;
		} else {
			width = 0;
		}

		/*
		 * Ignore size specifier.
		 */
		if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
			ch = format++;
		}

		/*
		 * Handle the various field types.
		 */
		switch (*ch) {
			case 'n':
				if (!(flags & SCAN_SUPPRESS)) {
					if (numVars && objIndex >= argCount) {
						break;
					} else if (numVars) {
						current = Z_REFVAL(args[objIndex++]);
						zval_ptr_dtor(current);
						ZVAL_LONG(current, (zend_long)(string - baseString) );
					} else {
						add_index_long(return_value, objIndex++, string - baseString);
					}
				}
				nconversions++;
				continue;

			case 'd':
			case 'D':
				op = 'i';
				base = 10;
				fn = (zend_long (*)())ZEND_STRTOL_PTR;
				break;
			case 'i':
				op = 'i';
				base = 0;
				fn = (zend_long (*)())ZEND_STRTOL_PTR;
				break;
			case 'o':
				op = 'i';
				base = 8;
				fn = (zend_long (*)())ZEND_STRTOL_PTR;
				break;
			case 'x':
			case 'X':
				op = 'i';
				base = 16;
				fn = (zend_long (*)())ZEND_STRTOL_PTR;
				break;
			case 'u':
				op = 'i';
				base = 10;
				flags |= SCAN_UNSIGNED;
				fn = (zend_long (*)())ZEND_STRTOUL_PTR;
				break;

			case 'f':
			case 'e':
			case 'E':
			case 'g':
				op = 'f';
				break;

			case 's':
				op = 's';
				break;

			case 'c':
				op = 's';
				flags |= SCAN_NOSKIP;
				/*-cc-*/
				if (0 == width) {
					width = 1;
				}
				/*-cc-*/
				break;
			case '[':
				op = '[';
				flags |= SCAN_NOSKIP;
				break;
		}   /* switch */

		/*
		 * At this point, we will need additional characters from the
		 * string to proceed.
		 */
		if (*string == '\0') {
			underflow = 1;
			goto done;
		}

		/*
		 * Skip any leading whitespace at the beginning of a field unless
		 * the format suppresses this behavior.
		 */
		if (!(flags & SCAN_NOSKIP)) {
			while (*string != '\0') {
				sch = *string;
				if (! isspace((int)sch) ) {
					break;
				}
				string++;
			}
			if (*string == '\0') {
				underflow = 1;
				goto done;
			}
		}

		/*
		 * Perform the requested scanning operation.
		 */
		switch (op) {
			case 'c':
			case 's':
				/*
				 * Scan a string up to width characters or whitespace.
				 */
				if (width == 0) {
					width = (size_t) ~0;
				}
				end = string;
				while (*end != '\0') {
					sch = *end;
					if ( isspace( (int)sch ) ) {
						break;
					}
					end++;
					if (--width == 0) {
					   break;
					}
				}
				if (!(flags & SCAN_SUPPRESS)) {
					if (numVars && objIndex >= argCount) {
						break;
					} else if (numVars) {
						current = Z_REFVAL(args[objIndex++]);
						zval_ptr_dtor(current);
						ZVAL_STRINGL(current, string, end-string);
					} else {
						add_index_stringl(return_value, objIndex++, string, end-string);
					}
				}
				string = end;
				break;

			case '[': {
				CharSet cset;

				if (width == 0) {
					width = (size_t) ~0;
				}
				end = string;

				format = BuildCharSet(&cset, format);
				while (*end != '\0') {
					sch = *end;
					if (!CharInSet(&cset, (int)sch)) {
						break;
					}
					end++;
					if (--width == 0) {
						break;
					}
				}
				ReleaseCharSet(&cset);

				if (string == end) {
					/*
					 * Nothing matched the range, stop processing
					 */
					goto done;
				}
				if (!(flags & SCAN_SUPPRESS)) {
					if (numVars && objIndex >= argCount) {
						break;
					} else if (numVars) {
						current = Z_REFVAL(args[objIndex++]);
						zval_ptr_dtor(current);
						ZVAL_STRINGL(current, string, end-string);
					} else {
						add_index_stringl(return_value, objIndex++, string, end-string);
					}
				}
				string = end;
				break;
			}
/*
			case 'c':
			   / Scan a single character./

				sch = *string;
				string++;
				if (!(flags & SCAN_SUPPRESS)) {
					if (numVars) {
						char __buf[2];
						__buf[0] = sch;
						__buf[1] = '\0';;
						current = args[objIndex++];
						zval_dtor(*current);
						ZVAL_STRINGL( *current, __buf, 1);
					} else {
						add_index_stringl(return_value, objIndex++, &sch, 1);
					}
				}
				break;
*/
			case 'i':
				/*
				 * Scan an unsigned or signed integer.
				 */
				/*-cc-*/
				buf[0] = '\0';
				/*-cc-*/
				if ((width == 0) || (width > sizeof(buf) - 1)) {
					width = sizeof(buf) - 1;
				}

				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
				for (end = buf; width > 0; width--) {
					switch (*string) {
						/*
						 * The 0 digit has special meaning at the beginning of
						 * a number.  If we are unsure of the base, it
						 * indicates that we are in base 8 or base 16 (if it is
						 * followed by an 'x').
						 */
						case '0':
							/*-cc-*/
							if (base == 16) {
								flags |= SCAN_XOK;
							}
							/*-cc-*/
							if (base == 0) {
								base = 8;
								flags |= SCAN_XOK;
							}
							if (flags & SCAN_NOZERO) {
								flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
							} else {
								flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
							}
							goto addToInt;

						case '1': case '2': case '3': case '4':
						case '5': case '6': case '7':
							if (base == 0) {
								base = 10;
							}
							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
							goto addToInt;

						case '8': case '9':
							if (base == 0) {
								base = 10;
							}
							if (base <= 8) {
							   break;
							}
							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
							goto addToInt;

						case 'A': case 'B': case 'C':
						case 'D': case 'E': case 'F':
						case 'a': case 'b': case 'c':
						case 'd': case 'e': case 'f':
							if (base <= 10) {
								break;
							}
							flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
							goto addToInt;

						case '+': case '-':
							if (flags & SCAN_SIGNOK) {
								flags &= ~SCAN_SIGNOK;
								goto addToInt;
							}
							break;

						case 'x': case 'X':
							if ((flags & SCAN_XOK) && (end == buf+1)) {
								base = 16;
								flags &= ~SCAN_XOK;
								goto addToInt;
							}
							break;
					}

					/*
					 * We got an illegal character so we are done accumulating.
					 */
					break;

addToInt:
					/*
					 * Add the character to the temporary buffer.
					 */
					*end++ = *string++;
					if (*string == '\0') {
						break;
					}
				}

				/*
				 * Check to see if we need to back up because we only got a
				 * sign or a trailing x after a 0.
				 */
				if (flags & SCAN_NODIGITS) {
					if (*string == '\0') {
						underflow = 1;
					}
					goto done;
				} else if (end[-1] == 'x' || end[-1] == 'X') {
					end--;
					string--;
				}

				/*
				 * Scan the value from the temporary buffer.  If we are
				 * returning a large unsigned value, we have to convert it back
				 * to a string since PHP only supports signed values.
				 */
				if (!(flags & SCAN_SUPPRESS)) {
					*end = '\0';
					value = (zend_long) (*fn)(buf, NULL, base);
					if ((flags & SCAN_UNSIGNED) && (value < 0)) {
						snprintf(buf, sizeof(buf), ZEND_ULONG_FMT, value); /* INTL: ISO digit */
						if (numVars && objIndex >= argCount) {
							break;
						} else if (numVars) {
						  /* change passed value type to string */
							current = Z_REFVAL(args[objIndex++]);
							zval_ptr_dtor(current);
							ZVAL_STRING(current, buf);
						} else {
							add_index_string(return_value, objIndex++, buf);
						}
					} else {
						if (numVars && objIndex >= argCount) {
							break;
						} else if (numVars) {
							current = Z_REFVAL(args[objIndex++]);
							zval_ptr_dtor(current);
							ZVAL_LONG(current, value);
						} else {
							add_index_long(return_value, objIndex++, value);
						}
					}
				}
				break;

			case 'f':
				/*
				 * Scan a floating point number
				 */
				buf[0] = '\0';     /* call me pedantic */
				if ((width == 0) || (width > sizeof(buf) - 1)) {
					width = sizeof(buf) - 1;
				}
				flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
				for (end = buf; width > 0; width--) {
					switch (*string) {
						case '0': case '1': case '2': case '3':
						case '4': case '5': case '6': case '7':
						case '8': case '9':
							flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
							goto addToFloat;
						case '+':
						case '-':
							if (flags & SCAN_SIGNOK) {
								flags &= ~SCAN_SIGNOK;
								goto addToFloat;
							}
							break;
						case '.':
							if (flags & SCAN_PTOK) {
								flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
								goto addToFloat;
							}
							break;
						case 'e':
						case 'E':
							/*
							 * An exponent is not allowed until there has
							 * been at least one digit.
							 */
							if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
								flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
									| SCAN_SIGNOK | SCAN_NODIGITS;
								goto addToFloat;
							}
							break;
					}

					/*
					 * We got an illegal character so we are done accumulating.
					 */
					break;

addToFloat:
					/*
					 * Add the character to the temporary buffer.
					 */
					*end++ = *string++;
					if (*string == '\0') {
						break;
					}
				}

				/*
				 * Check to see if we need to back up because we saw a
				 * trailing 'e' or sign.
				 */
				if (flags & SCAN_NODIGITS) {
					if (flags & SCAN_EXPOK) {
						/*
						 * There were no digits at all so scanning has
						 * failed and we are done.
						 */
						if (*string == '\0') {
							underflow = 1;
						}
						goto done;
					}

					/*
					 * We got a bad exponent ('e' and maybe a sign).
					 */
					end--;
					string--;
					if (*end != 'e' && *end != 'E') {
						end--;
						string--;
					}
				}

				/*
				 * Scan the value from the temporary buffer.
				 */
				if (!(flags & SCAN_SUPPRESS)) {
					double dvalue;
					*end = '\0';
					dvalue = zend_strtod(buf, NULL);
					if (numVars && objIndex >= argCount) {
						break;
					} else if (numVars) {
						current = Z_REFVAL(args[objIndex++]);
						zval_ptr_dtor(current);
						ZVAL_DOUBLE(current, dvalue);
					} else {
						add_index_double(return_value, objIndex++, dvalue );
					}
				}
				break;
		} /* switch (op) */
		nconversions++;
	} /*  while (*format != '\0') */

done:
	result = SCAN_SUCCESS;

	if (underflow && (0==nconversions)) {
		scan_set_error_return( numVars, return_value );
		result = SCAN_ERROR_EOF;
	} else if (numVars) {
		convert_to_long(return_value );
		Z_LVAL_P(return_value) = nconversions;
	} else if (nconversions < totalVars) {
		/* TODO: not all elements converted. we need to prune the list - cc */
	}
	return result;
}
Example #3
0
	/* ARGSUSED */
int
Tcl_ScanObjCmd(
    ClientData dummy,		/* Not used. */
    Tcl_Interp *interp,		/* Current interpreter. */
    int objc,			/* Number of arguments. */
    Tcl_Obj *const objv[])	/* Argument objects. */
{
    const char *format;
    int numVars, nconversions, totalVars = -1;
    int objIndex, offset, i, result, code;
    long value;
    const char *string, *end, *baseString;
    char op = 0;
    int width, underflow = 0;
    Tcl_WideInt wideValue;
    Tcl_UniChar ch, sch;
    Tcl_Obj **objs = NULL, *objPtr = NULL;
    int flags;
    char buf[513];		/* Temporary buffer to hold scanned number
				 * strings before they are passed to
				 * strtoul. */

    if (objc < 3) {
	Tcl_WrongNumArgs(interp, 1, objv,
		"string format ?varName ...?");
	return TCL_ERROR;
    }

    format = Tcl_GetStringFromObj(objv[2], NULL);
    numVars = objc-3;

    /*
     * Check for errors in the format string.
     */

    if (ValidateFormat(interp, format, numVars, &totalVars) == TCL_ERROR) {
	return TCL_ERROR;
    }

    /*
     * Allocate space for the result objects.
     */

    if (totalVars > 0) {
	objs = ckalloc(sizeof(Tcl_Obj *) * totalVars);
	for (i = 0; i < totalVars; i++) {
	    objs[i] = NULL;
	}
    }

    string = Tcl_GetStringFromObj(objv[1], NULL);
    baseString = string;

    /*
     * Iterate over the format string filling in the result objects until we
     * reach the end of input, the end of the format string, or there is a
     * mismatch.
     */

    objIndex = 0;
    nconversions = 0;
    while (*format != '\0') {
	int parseFlag = TCL_PARSE_NO_WHITESPACE;
	format += Tcl_UtfToUniChar(format, &ch);

	flags = 0;

	/*
	 * If we see whitespace in the format, skip whitespace in the string.
	 */

	if (Tcl_UniCharIsSpace(ch)) {
	    offset = Tcl_UtfToUniChar(string, &sch);
	    while (Tcl_UniCharIsSpace(sch)) {
		if (*string == '\0') {
		    goto done;
		}
		string += offset;
		offset = Tcl_UtfToUniChar(string, &sch);
	    }
	    continue;
	}

	if (ch != '%') {
	literal:
	    if (*string == '\0') {
		underflow = 1;
		goto done;
	    }
	    string += Tcl_UtfToUniChar(string, &sch);
	    if (ch != sch) {
		goto done;
	    }
	    continue;
	}

	format += Tcl_UtfToUniChar(format, &ch);
	if (ch == '%') {
	    goto literal;
	}

	/*
	 * Check for assignment suppression ('*') or an XPG3-style assignment
	 * ('%n$').
	 */

	if (ch == '*') {
	    flags |= SCAN_SUPPRESS;
	    format += Tcl_UtfToUniChar(format, &ch);
	} else if ((ch < 0x80) && isdigit(UCHAR(ch))) {	/* INTL: "C" locale. */
	    char *formatEnd;
	    value = strtoul(format-1, &formatEnd, 10);/* INTL: "C" locale. */
	    if (*formatEnd == '$') {
		format = formatEnd+1;
		format += Tcl_UtfToUniChar(format, &ch);
		objIndex = (int) value - 1;
	    }
	}

	/*
	 * Parse any width specifier.
	 */

	if ((ch < 0x80) && isdigit(UCHAR(ch))) {	/* INTL: "C" locale. */
	    width = (int) strtoul(format-1, (char **) &format, 10);/* INTL: "C" locale. */
	    format += Tcl_UtfToUniChar(format, &ch);
	} else {
	    width = 0;
	}

	/*
	 * Handle any size specifier.
	 */

	switch (ch) {
	case 'l':
	    if (*format == 'l') {
		flags |= SCAN_BIG;
		format += 1;
		format += Tcl_UtfToUniChar(format, &ch);
		break;
	    }
	case 'L':
	    flags |= SCAN_LONGER;
	    /*
	     * Fall through so we skip to the next character.
	     */
	case 'h':
	    format += Tcl_UtfToUniChar(format, &ch);
	}

	/*
	 * Handle the various field types.
	 */

	switch (ch) {
	case 'n':
	    if (!(flags & SCAN_SUPPRESS)) {
		objPtr = Tcl_NewIntObj(string - baseString);
		Tcl_IncrRefCount(objPtr);
		CLANG_ASSERT(objs);
		objs[objIndex++] = objPtr;
	    }
	    nconversions++;
	    continue;

	case 'd':
	    op = 'i';
	    parseFlag |= TCL_PARSE_DECIMAL_ONLY;
	    break;
	case 'i':
	    op = 'i';
	    parseFlag |= TCL_PARSE_SCAN_PREFIXES;
	    break;
	case 'o':
	    op = 'i';
	    parseFlag |= TCL_PARSE_OCTAL_ONLY | TCL_PARSE_SCAN_PREFIXES;
	    break;
	case 'x':
	case 'X':
	    op = 'i';
	    parseFlag |= TCL_PARSE_HEXADECIMAL_ONLY;
	    break;
	case 'b':
	    op = 'i';
	    parseFlag |= TCL_PARSE_BINARY_ONLY;
	    break;
	case 'u':
	    op = 'i';
	    parseFlag |= TCL_PARSE_DECIMAL_ONLY;
	    flags |= SCAN_UNSIGNED;
	    break;

	case 'f':
	case 'e':
	case 'E':
	case 'g':
	case 'G':
	    op = 'f';
	    break;

	case 's':
	    op = 's';
	    break;

	case 'c':
	    op = 'c';
	    flags |= SCAN_NOSKIP;
	    break;
	case '[':
	    op = '[';
	    flags |= SCAN_NOSKIP;
	    break;
	}

	/*
	 * At this point, we will need additional characters from the string
	 * to proceed.
	 */

	if (*string == '\0') {
	    underflow = 1;
	    goto done;
	}

	/*
	 * Skip any leading whitespace at the beginning of a field unless the
	 * format suppresses this behavior.
	 */

	if (!(flags & SCAN_NOSKIP)) {
	    while (*string != '\0') {
		offset = Tcl_UtfToUniChar(string, &sch);
		if (!Tcl_UniCharIsSpace(sch)) {
		    break;
		}
		string += offset;
	    }
	    if (*string == '\0') {
		underflow = 1;
		goto done;
	    }
	}

	/*
	 * Perform the requested scanning operation.
	 */

	switch (op) {
	case 's':
	    /*
	     * Scan a string up to width characters or whitespace.
	     */

	    if (width == 0) {
		width = ~0;
	    }
	    end = string;
	    while (*end != '\0') {
		offset = Tcl_UtfToUniChar(end, &sch);
		if (Tcl_UniCharIsSpace(sch)) {
		    break;
		}
		end += offset;
		if (--width == 0) {
		    break;
		}
	    }
	    if (!(flags & SCAN_SUPPRESS)) {
		objPtr = Tcl_NewStringObj(string, end-string);
		Tcl_IncrRefCount(objPtr);
		CLANG_ASSERT(objs);
		objs[objIndex++] = objPtr;
	    }
	    string = end;
	    break;

	case '[': {
	    CharSet cset;

	    if (width == 0) {
		width = ~0;
	    }
	    end = string;

	    format = BuildCharSet(&cset, format);
	    while (*end != '\0') {
		offset = Tcl_UtfToUniChar(end, &sch);
		if (!CharInSet(&cset, (int)sch)) {
		    break;
		}
		end += offset;
		if (--width == 0) {
		    break;
		}
	    }
	    ReleaseCharSet(&cset);

	    if (string == end) {
		/*
		 * Nothing matched the range, stop processing.
		 */
		goto done;
	    }
	    if (!(flags & SCAN_SUPPRESS)) {
		objPtr = Tcl_NewStringObj(string, end-string);
		Tcl_IncrRefCount(objPtr);
		objs[objIndex++] = objPtr;
	    }
	    string = end;

	    break;
	}
	case 'c':
	    /*
	     * Scan a single Unicode character.
	     */

	    string += Tcl_UtfToUniChar(string, &sch);
	    if (!(flags & SCAN_SUPPRESS)) {
		objPtr = Tcl_NewIntObj((int)sch);
		Tcl_IncrRefCount(objPtr);
		CLANG_ASSERT(objs);
		objs[objIndex++] = objPtr;
	    }
	    break;

	case 'i':
	    /*
	     * Scan an unsigned or signed integer.
	     */
	    objPtr = Tcl_NewLongObj(0);
	    Tcl_IncrRefCount(objPtr);
	    if (width == 0) {
		width = ~0;
	    }
	    if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width,
		    &end, TCL_PARSE_INTEGER_ONLY | parseFlag)) {
		Tcl_DecrRefCount(objPtr);
		if (width < 0) {
		    if (*end == '\0') {
			underflow = 1;
		    }
		} else {
		    if (end == string + width) {
			underflow = 1;
		    }
		}
		goto done;
	    }
	    string = end;
	    if (flags & SCAN_SUPPRESS) {
		Tcl_DecrRefCount(objPtr);
		break;
	    }
	    if (flags & SCAN_LONGER) {
		if (Tcl_GetWideIntFromObj(NULL, objPtr, &wideValue) != TCL_OK) {
		    wideValue = ~(Tcl_WideUInt)0 >> 1;	/* WIDE_MAX */
		    if (TclGetString(objPtr)[0] == '-') {
			wideValue++;	/* WIDE_MAX + 1 = WIDE_MIN */
		    }
		}
		if ((flags & SCAN_UNSIGNED) && (wideValue < 0)) {
		    sprintf(buf, "%" TCL_LL_MODIFIER "u",
			    (Tcl_WideUInt)wideValue);
		    Tcl_SetStringObj(objPtr, buf, -1);
		} else {
		    Tcl_SetWideIntObj(objPtr, wideValue);
		}
	    } else if (!(flags & SCAN_BIG)) {
		if (TclGetLongFromObj(NULL, objPtr, &value) != TCL_OK) {
		    if (TclGetString(objPtr)[0] == '-') {
			value = LONG_MIN;
		    } else {
			value = LONG_MAX;
		    }
		}
		if ((flags & SCAN_UNSIGNED) && (value < 0)) {
		    sprintf(buf, "%lu", value);	/* INTL: ISO digit */
		    Tcl_SetStringObj(objPtr, buf, -1);
		} else {
		    Tcl_SetLongObj(objPtr, value);
		}
	    }
	    objs[objIndex++] = objPtr;
	    break;

	case 'f':
	    /*
	     * Scan a floating point number
	     */

	    objPtr = Tcl_NewDoubleObj(0.0);
	    Tcl_IncrRefCount(objPtr);
	    if (width == 0) {
		width = ~0;
	    }
	    if (TCL_OK != TclParseNumber(NULL, objPtr, NULL, string, width,
		    &end, TCL_PARSE_DECIMAL_ONLY | TCL_PARSE_NO_WHITESPACE)) {
		Tcl_DecrRefCount(objPtr);
		if (width < 0) {
		    if (*end == '\0') {
			underflow = 1;
		    }
		} else {
		    if (end == string + width) {
			underflow = 1;
		    }
		}
		goto done;
	    } else if (flags & SCAN_SUPPRESS) {
		Tcl_DecrRefCount(objPtr);
		string = end;
	    } else {
		double dvalue;
		if (Tcl_GetDoubleFromObj(NULL, objPtr, &dvalue) != TCL_OK) {
#ifdef ACCEPT_NAN
		    if (objPtr->typePtr == &tclDoubleType) {
			dvalue = objPtr->internalRep.doubleValue;
		    } else
#endif
		    {
			Tcl_DecrRefCount(objPtr);
			goto done;
		    }
		}
		Tcl_SetDoubleObj(objPtr, dvalue);
		CLANG_ASSERT(objs);
		objs[objIndex++] = objPtr;
		string = end;
	    }
	}