static int atoh(unsigned int *result, const char * const s, const size_t length) { size_t i = 0; unsigned int val = 0; while ((i < length) && csxdigit(s[i])) { if (csdigit(s[i])) val = val*0x10 + (s[i]-'0'); else if (csupper(s[i])) val = val*0x10 + (s[i]-'A') + 10; else val = val*0x10 + (s[i]-'a') + 10; i++; } if (i != length) return 0; *result = val; return 1; }
const char *check_unicode_name(const char *u) { if (*u != 'u') return 0; const char *p = ++u; for (;;) { int val = 0; const char *start = p; for (;;) { // only uppercase hex digits allowed if (!csxdigit(*p)) return 0; if (csdigit(*p)) val = val*0x10 + (*p-'0'); else if (csupper(*p)) val = val*0x10 + (*p-'A'+10); else return 0; // biggest Unicode value is U+10FFFF if (val > 0x10FFFF) return 0; p++; if (*p == '\0' || *p == '_') break; } // surrogates not allowed if ((val >= 0xD800 && val <= 0xDBFF) || (val >= 0xDC00 && val <= 0xDFFF)) return 0; if (val > 0xFFFF) { if (*start == '0') // no leading zeros allowed if > 0xFFFF return 0; } else if (p - start != 4) // otherwise, check for exactly 4 hex digits return 0; if (*p == '\0') break; p++; } return u; }
int get_token(int lookup_flag) { context_buffer.clear(); for (;;) { int n = 0; int bol = input_stack::bol(); int c = input_stack::get_char(); if (bol && c == command_char) { token_buffer.clear(); token_buffer += c; // the newline is not part of the token for (;;) { c = input_stack::peek_char(); if (c == EOF || c == '\n') break; input_stack::get_char(); token_buffer += char(c); } context_buffer = token_buffer; return COMMAND_LINE; } switch (c) { case EOF: return EOF; case ' ': case '\t': break; case '\\': { int d = input_stack::peek_char(); if (d != '\n') { context_buffer = '\\'; return '\\'; } input_stack::get_char(); break; } case '#': do { c = input_stack::get_char(); } while (c != '\n' && c != EOF); if (c == '\n') context_buffer = '\n'; return c; case '"': context_buffer = '"'; token_buffer.clear(); for (;;) { c = input_stack::get_char(); if (c == '\\') { context_buffer += '\\'; c = input_stack::peek_char(); if (c == '"') { input_stack::get_char(); token_buffer += '"'; context_buffer += '"'; } else token_buffer += '\\'; } else if (c == '\n') { error("newline in string"); break; } else if (c == EOF) { error("missing `\"'"); break; } else if (c == '"') { context_buffer += '"'; break; } else { context_buffer += char(c); token_buffer += char(c); } } return TEXT; case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { int overflow = 0; n = 0; for (;;) { if (n > (INT_MAX - 9)/10) { overflow = 1; break; } n *= 10; n += c - '0'; context_buffer += char(c); c = input_stack::peek_char(); if (c == EOF || !csdigit(c)) break; c = input_stack::get_char(); } token_double = n; if (overflow) { for (;;) { token_double *= 10.0; token_double += c - '0'; context_buffer += char(c); c = input_stack::peek_char(); if (c == EOF || !csdigit(c)) break; c = input_stack::get_char(); } // if somebody asks for 1000000000000th, we will silently // give them INT_MAXth double temp = token_double; // work around gas 1.34/sparc bug if (token_double > INT_MAX) n = INT_MAX; else n = int(temp); } } switch (c) { case 'i': case 'I': context_buffer += char(c); input_stack::get_char(); return NUMBER; case '.': { context_buffer += '.'; input_stack::get_char(); got_dot: double factor = 1.0; for (;;) { c = input_stack::peek_char(); if (c == EOF || !csdigit(c)) break; input_stack::get_char(); context_buffer += char(c); factor /= 10.0; if (c != '0') token_double += factor*(c - '0'); } if (c != 'e' && c != 'E') { if (c == 'i' || c == 'I') { context_buffer += char(c); input_stack::get_char(); } return NUMBER; } } // fall through case 'e': case 'E': { int echar = c; input_stack::get_char(); c = input_stack::peek_char(); int sign = '+'; if (c == '+' || c == '-') { sign = c; input_stack::get_char(); c = input_stack::peek_char(); if (c == EOF || !csdigit(c)) { input_stack::push_back(sign); input_stack::push_back(echar); return NUMBER; } context_buffer += char(echar); context_buffer += char(sign); } else { if (c == EOF || !csdigit(c)) { input_stack::push_back(echar); return NUMBER; } context_buffer += char(echar); } input_stack::get_char(); context_buffer += char(c); n = c - '0'; for (;;) { c = input_stack::peek_char(); if (c == EOF || !csdigit(c)) break; input_stack::get_char(); context_buffer += char(c); n = n*10 + (c - '0'); } if (sign == '-') n = -n; if (c == 'i' || c == 'I') { context_buffer += char(c); input_stack::get_char(); } token_double *= pow(10.0, n); return NUMBER; } case 'n': input_stack::get_char(); c = input_stack::peek_char(); if (c == 'd') { input_stack::get_char(); token_int = n; context_buffer += "nd"; return ORDINAL; } input_stack::push_back('n'); return NUMBER; case 'r': input_stack::get_char(); c = input_stack::peek_char(); if (c == 'd') { input_stack::get_char(); token_int = n; context_buffer += "rd"; return ORDINAL; } input_stack::push_back('r'); return NUMBER; case 't': input_stack::get_char(); c = input_stack::peek_char(); if (c == 'h') { input_stack::get_char(); token_int = n; context_buffer += "th"; return ORDINAL; } input_stack::push_back('t'); return NUMBER; case 's': input_stack::get_char(); c = input_stack::peek_char(); if (c == 't') { input_stack::get_char(); token_int = n; context_buffer += "st"; return ORDINAL; } input_stack::push_back('s'); return NUMBER; default: return NUMBER; } break; case '\'': { c = input_stack::peek_char(); if (c == 't') { input_stack::get_char(); c = input_stack::peek_char(); if (c == 'h') { input_stack::get_char(); context_buffer = "'th"; return TH; } else input_stack::push_back('t'); } context_buffer = "'"; return '\''; } case '.': { c = input_stack::peek_char(); if (c != EOF && csdigit(c)) { n = 0; token_double = 0.0; context_buffer = '.'; goto got_dot; } return get_token_after_dot(c); } case '<': c = input_stack::peek_char(); if (c == '-') { input_stack::get_char(); c = input_stack::peek_char(); if (c == '>') { input_stack::get_char(); context_buffer = "<->"; return DOUBLE_ARROW_HEAD; } context_buffer = "<-"; return LEFT_ARROW_HEAD; } else if (c == '=') { input_stack::get_char(); context_buffer = "<="; return LESSEQUAL; } context_buffer = "<"; return '<'; case '-': c = input_stack::peek_char(); if (c == '>') { input_stack::get_char(); context_buffer = "->"; return RIGHT_ARROW_HEAD; } context_buffer = "-"; return '-'; case '!': c = input_stack::peek_char(); if (c == '=') { input_stack::get_char(); context_buffer = "!="; return NOTEQUAL; } context_buffer = "!"; return '!'; case '>': c = input_stack::peek_char(); if (c == '=') { input_stack::get_char(); context_buffer = ">="; return GREATEREQUAL; } context_buffer = ">"; return '>'; case '=': c = input_stack::peek_char(); if (c == '=') { input_stack::get_char(); context_buffer = "=="; return EQUALEQUAL; } context_buffer = "="; return '='; case '&': c = input_stack::peek_char(); if (c == '&') { input_stack::get_char(); context_buffer = "&&"; return ANDAND; } context_buffer = "&"; return '&'; case '|': c = input_stack::peek_char(); if (c == '|') { input_stack::get_char(); context_buffer = "||"; return OROR; } context_buffer = "|"; return '|'; default: if (c != EOF && csalpha(c)) { token_buffer.clear(); token_buffer = c; for (;;) { c = input_stack::peek_char(); if (c == EOF || (!csalnum(c) && c != '_')) break; input_stack::get_char(); token_buffer += char(c); } int tok = lookup_keyword(token_buffer.contents(), token_buffer.length()); if (tok != 0) { context_buffer = token_buffer; return tok; } char *def = 0; if (lookup_flag) { token_buffer += '\0'; def = macro_table.lookup(token_buffer.contents()); token_buffer.set_length(token_buffer.length() - 1); if (def) { if (c == '(') { input_stack::get_char(); interpolate_macro_with_args(def); } else input_stack::push(new macro_input(def)); } } if (!def) { context_buffer = token_buffer; if (csupper(token_buffer[0])) return LABEL; else return VARIABLE; } } else { context_buffer = char(c); return (unsigned char)c; } break; } } }