Пример #1
0
static int atoh(unsigned int *result,
		const char * const s, const size_t length)
{
  size_t i = 0;
  unsigned int val = 0;
  while ((i < length) && csxdigit(s[i])) {
    if (csdigit(s[i]))
      val = val*0x10 + (s[i]-'0');
    else if (csupper(s[i]))
      val = val*0x10 + (s[i]-'A') + 10;
    else
      val = val*0x10 + (s[i]-'a') + 10;
    i++;
  }
  if (i != length)
    return 0;
  *result = val;
  return 1;
}
Пример #2
0
const char *check_unicode_name(const char *u)
{
  if (*u != 'u')
    return 0;
  const char *p = ++u;
  for (;;) {
    int val = 0;
    const char *start = p;
    for (;;) {
      // only uppercase hex digits allowed
      if (!csxdigit(*p))
	return 0;
      if (csdigit(*p))
	val = val*0x10 + (*p-'0');
      else if (csupper(*p))
	val = val*0x10 + (*p-'A'+10);
      else
	return 0;
      // biggest Unicode value is U+10FFFF
      if (val > 0x10FFFF)
	return 0;
      p++;
      if (*p == '\0' || *p == '_')
	break;
    }
    // surrogates not allowed
    if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF))
      return 0;
    if (val > 0xFFFF) {
      if (*start == '0')	// no leading zeros allowed if > 0xFFFF
	return 0;
    }
    else if (p - start != 4)	// otherwise, check for exactly 4 hex digits
      return 0;
    if (*p == '\0')
      break;
    p++;
  }
  return u;
}
Пример #3
0
int get_token(int lookup_flag)
{
  context_buffer.clear();
  for (;;) {
    int n = 0;
    int bol = input_stack::bol();
    int c = input_stack::get_char();
    if (bol && c == command_char) {
      token_buffer.clear();
      token_buffer += c;
      // the newline is not part of the token
      for (;;) {
	c = input_stack::peek_char();
	if (c == EOF || c == '\n')
	  break;
	input_stack::get_char();
	token_buffer += char(c);
      }
      context_buffer = token_buffer;
      return COMMAND_LINE;
    }
    switch (c) {
    case EOF:
      return EOF;
    case ' ':
    case '\t':
      break;
    case '\\':
      {
	int d = input_stack::peek_char();
	if (d != '\n') {
	  context_buffer = '\\';
	  return '\\';
	}
	input_stack::get_char();
	break;
      }
    case '#':
      do {
	c = input_stack::get_char();
      } while (c != '\n' && c != EOF);
      if (c == '\n')
	context_buffer = '\n';
      return c;
    case '"':
      context_buffer = '"';
      token_buffer.clear();
      for (;;) {
	c = input_stack::get_char();
	if (c == '\\') {
	  context_buffer += '\\';
	  c = input_stack::peek_char();
	  if (c == '"') {
	    input_stack::get_char();
	    token_buffer += '"';
	    context_buffer += '"';
	  }
	  else
	    token_buffer += '\\';
	}
	else if (c == '\n') {
	  error("newline in string");
	  break;
	}
	else if (c == EOF) {
	  error("missing `\"'");
	  break;
	}
	else if (c == '"') {
	  context_buffer += '"';
	  break;
	}
	else {
	  context_buffer += char(c);
	  token_buffer += char(c);
	}
      }
      return TEXT;
    case '0':
    case '1':
    case '2':
    case '3':
    case '4':
    case '5':
    case '6':
    case '7':
    case '8':
    case '9':
      {   
	int overflow = 0;
	n = 0;
	for (;;) {
	  if (n > (INT_MAX - 9)/10) {
	    overflow = 1;
	    break;
	  }
	  n *= 10;
	  n += c - '0';
	  context_buffer += char(c);
	  c = input_stack::peek_char();
	  if (c == EOF || !csdigit(c))
	    break;
	  c = input_stack::get_char();
	}
	token_double = n;
	if (overflow) {
	  for (;;) {
	    token_double *= 10.0;
	    token_double += c - '0';
	    context_buffer += char(c);
	    c = input_stack::peek_char();
	    if (c == EOF || !csdigit(c))
	      break;
	    c = input_stack::get_char();
	  }
	  // if somebody asks for 1000000000000th, we will silently
	  // give them INT_MAXth
	  double temp = token_double; // work around gas 1.34/sparc bug
	  if (token_double > INT_MAX)
	    n = INT_MAX;
	  else
	    n = int(temp);
	}
      }
      switch (c) {
      case 'i':
      case 'I':
	context_buffer += char(c);
	input_stack::get_char();
	return NUMBER;
      case '.':
	{
	  context_buffer += '.';
	  input_stack::get_char();
	got_dot:
	  double factor = 1.0;
	  for (;;) {
	    c = input_stack::peek_char();
	    if (c == EOF || !csdigit(c))
	      break;
	    input_stack::get_char();
	    context_buffer += char(c);
	    factor /= 10.0;
	    if (c != '0')
	      token_double += factor*(c - '0');
	  }
	  if (c != 'e' && c != 'E') {
	    if (c == 'i' || c == 'I') {
	      context_buffer += char(c);
	      input_stack::get_char();
	    }
	    return NUMBER;
	  }
	}
	// fall through
      case 'e':
      case 'E':
	{
	  int echar = c;
	  input_stack::get_char();
	  c = input_stack::peek_char();
	  int sign = '+';
	  if (c == '+' || c == '-') {
	    sign = c;
	    input_stack::get_char();
	    c = input_stack::peek_char();
	    if (c == EOF || !csdigit(c)) {
	      input_stack::push_back(sign);
	      input_stack::push_back(echar);
	      return NUMBER;
	    }
	    context_buffer += char(echar);
	    context_buffer += char(sign);
	  }
	  else {
	    if (c == EOF || !csdigit(c)) {
	      input_stack::push_back(echar);
	      return NUMBER;
	    }
	    context_buffer += char(echar);
	  }
	  input_stack::get_char();
	  context_buffer += char(c);
	  n = c - '0';
	  for (;;) {
	    c = input_stack::peek_char();
	    if (c == EOF || !csdigit(c))
	      break;
	    input_stack::get_char();
	    context_buffer += char(c);
	    n = n*10 + (c - '0');
	  }
	  if (sign == '-')
	    n = -n;
	  if (c == 'i' || c == 'I') {
	    context_buffer += char(c);
	    input_stack::get_char();
	  }
	  token_double *= pow(10.0, n);
	  return NUMBER;
	}
      case 'n':
	input_stack::get_char();
	c = input_stack::peek_char();
	if (c == 'd') {
	  input_stack::get_char();
	  token_int = n;
	  context_buffer += "nd";
	  return ORDINAL;
	}
	input_stack::push_back('n');
	return NUMBER;
      case 'r':
	input_stack::get_char();
	c = input_stack::peek_char();
	if (c == 'd') {
	  input_stack::get_char();
	  token_int = n;
	  context_buffer += "rd";
	  return ORDINAL;
	}
	input_stack::push_back('r');
	return NUMBER;
      case 't':
	input_stack::get_char();
	c = input_stack::peek_char();
	if (c == 'h') {
	  input_stack::get_char();
	  token_int = n;
	  context_buffer += "th";
	  return ORDINAL;
	}
	input_stack::push_back('t');
	return NUMBER;
      case 's':
	input_stack::get_char();
	c = input_stack::peek_char();
	if (c == 't') {
	  input_stack::get_char();
	  token_int = n;
	  context_buffer += "st";
	  return ORDINAL;
	}
	input_stack::push_back('s');
	return NUMBER;
      default:
	return NUMBER;
      }
      break;
    case '\'':
      {
	c = input_stack::peek_char();
	if (c == 't') {
	  input_stack::get_char();
	  c = input_stack::peek_char();
	  if (c == 'h') {
	    input_stack::get_char();
	    context_buffer = "'th";
	    return TH;
	  }
	  else
	    input_stack::push_back('t');
	}
	context_buffer = "'";
	return '\'';
      }
    case '.':
      {
	c = input_stack::peek_char();
	if (c != EOF && csdigit(c)) {
	  n = 0;
	  token_double = 0.0;
	  context_buffer = '.';
	  goto got_dot;
	}
	return get_token_after_dot(c);
      }
    case '<':
      c = input_stack::peek_char();
      if (c == '-') {
	input_stack::get_char();
	c = input_stack::peek_char();
	if (c == '>') {
	  input_stack::get_char();
	  context_buffer = "<->";
	  return DOUBLE_ARROW_HEAD;
	}
	context_buffer = "<-";
	return LEFT_ARROW_HEAD;
      }
      else if (c == '=') {
	input_stack::get_char();
	context_buffer = "<=";
	return LESSEQUAL;
      }
      context_buffer = "<";
      return '<';
    case '-':
      c = input_stack::peek_char();
      if (c == '>') {
	input_stack::get_char();
	context_buffer = "->";
	return RIGHT_ARROW_HEAD;
      }
      context_buffer = "-";
      return '-';
    case '!':
      c = input_stack::peek_char();
      if (c == '=') {
	input_stack::get_char();
	context_buffer = "!=";
	return NOTEQUAL;
      }
      context_buffer = "!";
      return '!';
    case '>':
      c = input_stack::peek_char();
      if (c == '=') {
	input_stack::get_char();
	context_buffer = ">=";
	return GREATEREQUAL;
      }
      context_buffer = ">";
      return '>';
    case '=':
      c = input_stack::peek_char();
      if (c == '=') {
	input_stack::get_char();
	context_buffer = "==";
	return EQUALEQUAL;
      }
      context_buffer = "=";
      return '=';
    case '&':
      c = input_stack::peek_char();
      if (c == '&') {
	input_stack::get_char();
	context_buffer = "&&";
	return ANDAND;
      }
      context_buffer = "&";
      return '&';
    case '|':
      c = input_stack::peek_char();
      if (c == '|') {
	input_stack::get_char();
	context_buffer = "||";
	return OROR;
      }
      context_buffer = "|";
      return '|';
    default:
      if (c != EOF && csalpha(c)) {
	token_buffer.clear();
	token_buffer = c;
	for (;;) {
	  c = input_stack::peek_char();
	  if (c == EOF || (!csalnum(c) && c != '_'))
	    break;
	  input_stack::get_char();
	  token_buffer += char(c);
	}
	int tok = lookup_keyword(token_buffer.contents(),
				 token_buffer.length());
	if (tok != 0) {
	  context_buffer = token_buffer;
	  return tok;
	}
	char *def = 0;
	if (lookup_flag) {
	  token_buffer += '\0';
	  def = macro_table.lookup(token_buffer.contents());
	  token_buffer.set_length(token_buffer.length() - 1);
	  if (def) {
	    if (c == '(') {
	      input_stack::get_char();
	      interpolate_macro_with_args(def);
	    }
	    else
	      input_stack::push(new macro_input(def));
	  }
	}
	if (!def) {
	  context_buffer = token_buffer;
	  if (csupper(token_buffer[0]))
	    return LABEL;
	  else
	    return VARIABLE;
	}
      }
      else {
	context_buffer = char(c);
	return (unsigned char)c;
      }
      break;
    }
  }
}