bool step() { if(get_tval()=="datatype") { try_lex(); do_read_datatype(); lex_save(); try_lex(); } else if(get_tval()=="match") { try_lex(); do_read_match(); lex_save(); } else if(types.count(get_tval())) { do_read_type(); } else if(constructors.count(get_tval())) { do_read_ctor(); } else if(d<=d_limit && (get_tval()=="namespace" || get_tval()=="class" || get_tval()=="struct" || get_tval()=="=")) { maybe_acceptablescope=true; d_limit++; fprintf(OUTF,"%s",get_tval().c_str()); return lex(); } else if(maybe_function && is_identifier(get_tid()) && d<=d_limit && last_token==")") { /* assume matching function */ fprintf(OUTF,";\n"); lex_rewind(); // go back to last } or ; lex(); while(!is_identifier(get_tid()) && !is_keyword(get_tid())) lex(); maybe_function=false; //prevent nasty recursion do_read_matchfun(); last_token=""; maybe_function=true; } else { if(get_tval()=="{") { maybe_acceptablescope=false; ++d; } else if(get_tval()=="}") { lex_save(); --d; if(d<d_limit) --d_limit; // leave a "good" scope (namespace, class, struct) } else if(get_tval()==";") { lex_save(); if(maybe_acceptablescope) { maybe_acceptablescope=false; --d_limit; //class or struct was just a declaration } } fprintf(OUTF,"%s",get_tval().c_str()); if(!is_space(get_tid())) last_token=get_tval(); return lex(); } return true; }
static int lex_get_save(lex_t *lex, json_error_t *error) { int c = stream_get(&lex->stream, error); if(c != STREAM_STATE_EOF && c != STREAM_STATE_ERROR) lex_save(lex, c); return c; }
/* Parse a long string or long comment (tv set to NULL). */ static void lex_longstring(LexState *ls, TValue *tv, int sep) { lex_savenext(ls); /* Skip second '['. */ if (lex_iseol(ls)) /* Skip initial newline. */ lex_newline(ls); for (;;) { switch (ls->c) { case LEX_EOF: lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); break; case ']': if (lex_skipeq(ls) == sep) { lex_savenext(ls); /* Skip second ']'. */ goto endloop; } break; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */ break; default: lex_savenext(ls); break; } } endloop: if (tv) { GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep), sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); setstrV(ls->L, tv, str); } }
static void lex_save_cached(lex_t *lex) { while (lex->stream.buffer[lex->stream.buffer_pos] != '\0') { lex_save(lex, lex->stream.buffer[lex->stream.buffer_pos]); lex->stream.buffer_pos++; lex->stream.position++; } }
/* Parse a number literal. */ static void lex_number(LexState *ls, TValue *tv) { StrScanFmt fmt; LexChar c, xp = 'e'; lua_assert(lj_char_isdigit(ls->c)); if ((c = ls->c) == '0' && (lex_savenext(ls) | 0x20) == 'x') xp = 'p'; while (lj_char_isident(ls->c) || ls->c == '.' || ((ls->c == '-' || ls->c == '+') && (c | 0x20) == xp)) { c = ls->c; lex_savenext(ls); } lex_save(ls, '\0'); fmt = lj_strscan_scan((const uint8_t *)sbufB(&ls->sb), tv, (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); if (LJ_DUALNUM && fmt == STRSCAN_INT) { setitype(tv, LJ_TISNUM); } else if (fmt == STRSCAN_NUM) { /* Already in correct format. */ #if LJ_HASFFI } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); if (!ctype_ctsG(G(L))) { ptrdiff_t oldtop = savestack(L, L->top); luaopen_ffi(L); /* Load FFI library on-demand. */ L->top = restorestack(L, oldtop); } if (fmt == STRSCAN_IMAG) { cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; ((double *)cdataptr(cd))[1] = numV(tv); } else { cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8); *(uint64_t *)cdataptr(cd) = tv->u64; } lj_parse_keepcdata(ls, tv, cd); #endif } else { lua_assert(fmt == STRSCAN_ERROR); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } }
static int lex_scan(lex_t *lex, json_error_t *error) { int c; strbuffer_clear(&lex->saved_text); if(lex->token == TOKEN_STRING) { jsonp_free(lex->value.string); lex->value.string = NULL; } c = lex_get(lex, error); while(c == ' ' || c == '\t' || c == '\n' || c == '\r') c = lex_get(lex, error); if(c == STREAM_STATE_EOF) { lex->token = TOKEN_EOF; goto out; } if(c == STREAM_STATE_ERROR) { lex->token = TOKEN_INVALID; goto out; } lex_save(lex, c); if(c == '{' || c == '}' || c == '[' || c == ']' || c == ':' || c == ',') lex->token = c; else if(c == '"') lex_scan_string(lex, error); else if(l_isdigit(c) || c == '-') { if(lex_scan_number(lex, c, error)) goto out; } else if(l_isalpha(c)) { /* eat up the whole identifier for clearer error messages */ const char *saved_text; c = lex_get_save(lex, error); while(l_isalpha(c)) c = lex_get_save(lex, error); lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); if(strcmp(saved_text, "true") == 0) lex->token = TOKEN_TRUE; else if(strcmp(saved_text, "false") == 0) lex->token = TOKEN_FALSE; else if(strcmp(saved_text, "null") == 0) lex->token = TOKEN_NULL; else lex->token = TOKEN_INVALID; } else { /* save the rest of the input UTF-8 sequence to get an error message of valid UTF-8 */ lex_save_cached(lex); lex->token = TOKEN_INVALID; } out: return lex->token; }
static int lex_scan_number(lex_t *lex, int c, json_error_t *error) { const char *saved_text; char *end; double value; lex->token = TOKEN_INVALID; if(c == '-') c = lex_get_save(lex, error); if(c == '0') { c = lex_get_save(lex, error); if(l_isdigit(c)) { lex_unget_unsave(lex, c); goto out; } } else if(l_isdigit(c)) { c = lex_get_save(lex, error); while(l_isdigit(c)) c = lex_get_save(lex, error); } else { lex_unget_unsave(lex, c); goto out; } if(c != '.' && c != 'E' && c != 'e') { json_int_t value; lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); errno = 0; value = json_strtoint(saved_text, &end, 10); if(errno == ERANGE) { if(value < 0) error_set(error, lex, "too big negative integer"); else error_set(error, lex, "too big integer"); goto out; } assert(end == saved_text + lex->saved_text.length); lex->token = TOKEN_INTEGER; lex->value.integer = value; return 0; } if(c == '.') { c = lex_get(lex, error); if(!l_isdigit(c)) { lex_unget(lex, c); goto out; } lex_save(lex, c); c = lex_get_save(lex, error); while(l_isdigit(c)) c = lex_get_save(lex, error); } if(c == 'E' || c == 'e') { c = lex_get_save(lex, error); if(c == '+' || c == '-') c = lex_get_save(lex, error); if(!l_isdigit(c)) { lex_unget_unsave(lex, c); goto out; } c = lex_get_save(lex, error); while(l_isdigit(c)) c = lex_get_save(lex, error); } lex_unget_unsave(lex, c); if(jsonp_strtod(&lex->saved_text, &value)) { error_set(error, lex, "real number overflow"); goto out; } lex->token = TOKEN_REAL; lex->value.real = value; return 0; out: return -1; }
static z_token lex_scan(z_lexstate *ls) { if(ls->cur.eof) { lex_save(ls); return lex_newToken(ls, T_EOF, 0); } if(isWhite(nc)) { while(isWhite(nc)) { if(isNewLine(nc)) { lex_scanNewLine(ls); return lex_newToken(ls, T_NL, tk_generic); } else lex_nextchar(ls); } return lex_scan(ls); } /* line comment */ if(nc == '/') { if(nnc == '/') { while(!isNewLine(nc)) lex_nextchar(ls); return lex_scan(ls); } } /* multi line comment */ if(nc == '/') { if(nnc == '*') { lex_nextchar(ls); lex_nextchar(ls); for(;;) { if(ls->cur.eof) { syntaxError(ls, "unterminated comment reached end of file"); break; } else if(nc == '*') { lex_nextchar(ls); if(nc == '/') { lex_nextchar(ls); return lex_scan(ls); } } else if(isNewLine(nc)) { lex_scanNewLine(ls); } else lex_nextchar(ls); } } } lex_save(ls); /* numerical constants */ if(isDigit(nc)) { parse_number: while(isDigit(nc)) lex_nextchar(ls); if(nc == '.') { lex_nextchar(ls); while(isDigit(nc)) lex_nextchar(ls); if(nc == '.') syntaxError(ls, "invalid numerical constant"); } return lex_newToken(ls, T_NUMBER, tk_numeric); } /* identifiers */ else if(isAlpha(nc)) { parse_ident: while(isAlNum(nc) || nc == '_') lex_nextchar(ls); /* check if it matches a keyword token */ z_token tk = lex_newToken(ls, T_IDENT, tk_identifier); lex_matchKeyword(ls, &tk); return tk; } /* string literals */ else if(nc == '"' || nc == '\''){ //parse_string: char q = nc; lex_nextchar(ls); while(nc != q) { if(ls->cur.eof) { syntaxError(ls, "unterminated string literal reached end of file"); break; } /* skip escaped chars */ if(nc == '\\') { lex_nextchar(ls); continue; } if(isNewLine(nc)) { lex_scanNewLine(ls); } lex_nextchar(ls); } lex_nextchar(ls); // skip the closing cc return lex_newToken(ls, T_STRING, tk_string); } /* other multi char tokens */ switch(nc) { case '.': // may be numeric? lex_nextchar(ls); if(isDigit(nc)) goto parse_number; return lex_newToken(ls, '.', 0); case '_': // may be ident? lex_nextchar(ls); if(isAlNum(nc)) goto parse_ident; return lex_newToken(ls, '_', 0); case '+': lex_nextchar(ls); if(nc == '+') { lex_nextchar(ls); return lex_newToken(ls, T_INC, tk_op); } else if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_AA, tk_op); } return lex_newToken(ls, '+', tk_op); case '-': lex_nextchar(ls); if(nc == '-') { lex_nextchar(ls); return lex_newToken(ls, T_DEC, tk_op); } else if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_SA, tk_op); } return lex_newToken(ls, '-', tk_op); case '*': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_MA, tk_op); } return lex_newToken(ls, '*', tk_op); case '/': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_DA, tk_op); } return lex_newToken(ls, '/', tk_op); case '>': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_GTE, tk_op); } return lex_newToken(ls, '>', tk_op); case '<': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_LTE, tk_op); } else if(nc == '>') { lex_nextchar(ls); return lex_newToken(ls, T_NE, tk_op); } return lex_newToken(ls, '<', tk_op); case '=': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_EQ, tk_op); } return lex_newToken(ls, '=', tk_op); case '&': lex_nextchar(ls); if(nc == '&') { lex_nextchar(ls); return lex_newToken(ls, T_AND, tk_op); } return lex_newToken(ls, '&', tk_op); case '|': lex_nextchar(ls); if(nc == '|') { lex_nextchar(ls); return lex_newToken(ls, T_OR, tk_op); } return lex_newToken(ls, '|', tk_op); case '^': lex_nextchar(ls); if(nc == '^') { lex_nextchar(ls); return lex_newToken(ls, T_XOR, tk_op); } return lex_newToken(ls, '^', tk_op); case '!': lex_nextchar(ls); return lex_newToken(ls, T_NOT, tk_op); case ':': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_DE, tk_op); } return lex_newToken(ls, ':', 0); } char c = nc; lex_nextchar(ls); return lex_newToken(ls, c, 0); }
/* Save previous character and get next character. */ static LJ_AINLINE LexChar lex_savenext(LexState *ls) { lex_save(ls, ls->c); return lex_next(ls); }
/* Parse a string. */ static void lex_string(LexState *ls, TValue *tv) { LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */ lex_savenext(ls); while (ls->c != delim) { switch (ls->c) { case LEX_EOF: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': case '\r': lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { LexChar c = lex_next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (lex_next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9 << 4; } c += (lex_next(ls) & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9; } break; case 'u': /* Unicode escape '\u{XX...}'. */ if (lex_next(ls) != '{') goto err_xesc; lex_next(ls); c = 0; do { c = (c << 4) | (ls->c & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) goto err_xesc; c += 9; } if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ } while (lex_next(ls) != '}'); if (c < 0x800) { if (c < 0x80) break; lex_save(ls, 0xc0 | (c >> 6)); } else { if (c >= 0x10000) { lex_save(ls, 0xf0 | (c >> 18)); lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); } else { if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ lex_save(ls, 0xe0 | (c >> 12)); } lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); } c = 0x80 | (c & 0x3f); break; case 'z': /* Skip whitespace. */ lex_next(ls); while (lj_char_isspace(ls->c)) if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls); continue; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue; case '\\': case '\"': case '\'': break; case LEX_EOF: continue; default: if (!lj_char_isdigit(c)) goto err_xesc; c -= '0'; /* Decimal escape '\ddd'. */ if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (c > 255) { err_xesc: lj_lex_error(ls, TK_string, LJ_ERR_XESC); } lex_next(ls); } } lex_save(ls, c); continue; } lex_save(ls, c); lex_next(ls); continue; }
static int lex_scan_number(lex_t *lex, char c, json_error_t *error) { const char *saved_text; char *end; double value; lex->token = TOKEN_INVALID; if(c == '-') c = lex_get_save(lex, error); if(c == '0') { c = lex_get_save(lex, error); if(isdigit(c)) { lex_unget_unsave(lex, c); goto out; } } else if(isdigit(c)) { c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } else { lex_unget_unsave(lex, c); goto out; } if(c != '.' && c != 'E' && c != 'e') { long value; lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtol(saved_text, &end, 10); assert(end == saved_text + lex->saved_text.length); if((value == LONG_MAX && errno == ERANGE) || value > INT_MAX) { error_set(error, lex, "too big integer"); goto out; } else if((value == LONG_MIN && errno == ERANGE) || value < INT_MIN) { error_set(error, lex, "too big negative integer"); goto out; } lex->token = TOKEN_INTEGER; lex->value.integer = (int)value; return 0; } if(c == '.') { c = lex_get(lex, error); if(!isdigit(c)) goto out; lex_save(lex, c); c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } if(c == 'E' || c == 'e') { c = lex_get_save(lex, error); if(c == '+' || c == '-') c = lex_get_save(lex, error); if(!isdigit(c)) { lex_unget_unsave(lex, c); goto out; } c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtod(saved_text, &end); assert(end == saved_text + lex->saved_text.length); if(value == 0 && errno == ERANGE) { error_set(error, lex, "real number underflow"); goto out; } /* Cannot test for +/-HUGE_VAL because the HUGE_VAL constant is only defined in C99 mode. So let's trust in sole errno. */ else if(errno == ERANGE) { error_set(error, lex, "real number overflow"); goto out; } lex->token = TOKEN_REAL; lex->value.real = value; return 0; out: return -1; }
static int lex_get_save(lex_t *lex, json_error_t *error) { char c = stream_get(&lex->stream, error); lex_save(lex, c); return c; }
static int lex_scan_number(lex_t *lex, char c, json_error_t *error) { const char *saved_text; char *end; double value; lex->token = TOKEN_INVALID; if(c == '-') c = lex_get_save(lex, error); if(c == '0') { c = lex_get_save(lex, error); if(isdigit(c)) { lex_unget_unsave(lex, c); goto out; } } else if(isdigit(c)) { c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } else { lex_unget_unsave(lex, c); goto out; } if(c != '.' && c != 'E' && c != 'e') { long value; lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtol(saved_text, &end, 10); assert(end == saved_text + lex->saved_text.length); if((value == LONG_MAX && errno == ERANGE) || value > INT_MAX) { unsigned long uvalue = strtoul(saved_text, &end, 10); if( !(uvalue>value) ) { error_set(error, lex, "too big integer"); goto out; } else { value = uvalue; } } else if((value == LONG_MIN && errno == ERANGE) || value < INT_MIN) { error_set(error, lex, "too big negative integer"); goto out; } lex->token = TOKEN_INTEGER; lex->value.integer = (int)value; return 0; } if(c == '.') { c = lex_get(lex, error); if(!isdigit(c)) goto out; lex_save(lex, c); c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } if(c == 'E' || c == 'e') { c = lex_get_save(lex, error); if(c == '+' || c == '-') c = lex_get_save(lex, error); if(!isdigit(c)) { lex_unget_unsave(lex, c); goto out; } c = lex_get_save(lex, error); while(isdigit(c)) c = lex_get_save(lex, error); } lex_unget_unsave(lex, c); saved_text = strbuffer_value(&lex->saved_text); value = strtod(saved_text, &end); assert(end == saved_text + lex->saved_text.length); if(errno == ERANGE && value != 0) { error_set(error, lex, "real number overflow"); goto out; } lex->token = TOKEN_REAL; lex->value.real = value; return 0; out: return -1; }