/* Parse 64 bit integer. */ static int lex_number64(LexState *ls, TValue *tv) { uint64_t n = 0; uint8_t *p = (uint8_t *)ls->sb.buf; CTypeID id = CTID_INT64; GCcdata *cd; int numl = 0; if (p[0] == '0' && (p[1] & ~0x20) == 'X') { /* Hexadecimal. */ p += 2; if (!lj_char_isxdigit(*p)) return 0; do { n = n*16 + (*p & 15); if (!lj_char_isdigit(*p)) n += 9; p++; } while (lj_char_isxdigit(*p)); } else { /* Decimal. */ if (!lj_char_isdigit(*p)) return 0; do { n = n*10 + (*p - '0'); p++; } while (lj_char_isdigit(*p)); } for (;;) { /* Parse suffixes. */ if ((*p & ~0x20) == 'U') id = CTID_UINT64; else if ((*p & ~0x20) == 'L') numl++; else break; p++; } if (numl != 2 || *p != '\0') return 0; /* Return cdata holding a 64 bit integer. */ cd = lj_cdata_new_(ls->L, id, 8); *(uint64_t *)cdataptr(cd) = n; lj_parse_keepcdata(ls, tv, cd); return 1; /* Ok. */ }
/* Parse a number literal. */ static void lex_number(LexState *ls, TValue *tv) { StrScanFmt fmt; int c, xp = 'e'; lua_assert(lj_char_isdigit(ls->current)); if ((c = ls->current) == '0') { save_and_next(ls); if ((ls->current | 0x20) == 'x') xp = 'p'; } while (lj_char_isident(ls->current) || ls->current == '.' || ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { c = ls->current; save_and_next(ls); } save(ls, '\0'); fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); if (LJ_DUALNUM && fmt == STRSCAN_INT) { setitype(tv, LJ_TISNUM); } else if (fmt == STRSCAN_NUM) { /* Already in correct format. */ #if LJ_HASFFI } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); if (!ctype_ctsG(G(L))) { ptrdiff_t oldtop = savestack(L, L->top); luaopen_ffi(L); /* Load FFI library on-demand. */ L->top = restorestack(L, oldtop); } if (fmt == STRSCAN_IMAG) { cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; ((double *)cdataptr(cd))[1] = numV(tv); } else { cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8); *(uint64_t *)cdataptr(cd) = tv->u64; } lj_parse_keepcdata(ls, tv, cd); #endif } else { lua_assert(fmt == STRSCAN_ERROR); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } }
/* Parse a number literal. */ static void lex_number(LexState *ls, TValue *tv) { int c; lua_assert(lj_char_isdigit(ls->current)); do { c = ls->current; save_and_next(ls); } while (lj_char_isident(ls->current) || ls->current == '.' || ((ls->current == '-' || ls->current == '+') && ((c & ~0x20) == 'E' || (c & ~0x20) == 'P'))); #if LJ_HASFFI c &= ~0x20; if ((c == 'I' || c == 'L' || c == 'U') && !ctype_ctsG(G(ls->L))) lex_loadffi(ls->L); if (c == 'I') /* Parse imaginary part of complex number. */ ls->sb.n--; #endif save(ls, '\0'); #if LJ_HASFFI if ((c == 'L' || c == 'U') && lex_number64(ls, tv)) { /* Parse 64 bit int. */ return; } else #endif if (lj_str_numconv(ls->sb.buf, tv)) { #if LJ_HASFFI if (c == 'I') { /* Return cdata holding a complex number. */ GCcdata *cd = lj_cdata_new_(ls->L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; ((double *)cdataptr(cd))[1] = numberVnum(tv); lj_parse_keepcdata(ls, tv, cd); } #endif if (LJ_DUALNUM && tvisnum(tv)) { int32_t k = lj_num2int(numV(tv)); if ((lua_Number)k == numV(tv)) /* -0 cannot end up here. */ setintV(tv, k); } return; } lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); }
static int llex(LexState *ls, TValue *tv) { lj_str_resetbuf(&ls->sb); for (;;) { if (lj_char_isident(ls->current)) { GCstr *s; if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ lex_number(ls, tv); return TK_number; } /* Identifier or reserved word. */ do { save_and_next(ls); } while (lj_char_isident(ls->current)); s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); setstrV(ls->L, tv, s); if (s->reserved > 0) /* Reserved word? */ return TK_OFS + s->reserved; return TK_name; } switch (ls->current) { case '\n': case '\r': inclinenumber(ls); continue; case ' ': case '\t': case '\v': case '\f': next(ls); continue; case '-': next(ls); if (ls->current != '-') return '-'; /* else is a comment */ next(ls); if (ls->current == '[') { int sep = skip_sep(ls); lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ if (sep >= 0) { read_long_string(ls, NULL, sep); /* long comment */ lj_str_resetbuf(&ls->sb); continue; } } /* else short comment */ while (!currIsNewline(ls) && ls->current != END_OF_STREAM) next(ls); continue; case '[': { int sep = skip_sep(ls); if (sep >= 0) { read_long_string(ls, tv, sep); return TK_string; } else if (sep == -1) { return '['; } else { lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM); continue; } } case '=': next(ls); if (ls->current != '=') return '='; else { next(ls); return TK_eq; } case '<': next(ls); if (ls->current != '=') return '<'; else { next(ls); return TK_le; } case '>': next(ls); if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } case '~': next(ls); if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } case ':': next(ls); if (ls->current != ':') return ':'; else { next(ls); return TK_label; } case '"': case '\'': read_string(ls, ls->current, tv); return TK_string; case '.': save_and_next(ls); if (ls->current == '.') { next(ls); if (ls->current == '.') { next(ls); return TK_dots; /* ... */ } return TK_concat; /* .. */ } else if (!lj_char_isdigit(ls->current)) { return '.'; } else { lex_number(ls, tv); return TK_number; } case END_OF_STREAM: return TK_eof; default: { int c = ls->current; next(ls); return c; /* Single-char tokens (+ - / ...). */ } } } }
static void read_string(LexState *ls, int delim, TValue *tv) { save_and_next(ls); while (ls->current != delim) { switch (ls->current) { case END_OF_STREAM: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': case '\r': lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { int c = next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } break; case 'z': /* Skip whitespace. */ next(ls); while (lj_char_isspace(ls->current)) if (currIsNewline(ls)) inclinenumber(ls); else next(ls); continue; case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; case '\\': case '\"': case '\'': break; case END_OF_STREAM: continue; default: if (!lj_char_isdigit(c)) goto err_xesc; c -= '0'; /* Decimal escape '\ddd'. */ if (lj_char_isdigit(next(ls))) { c = c*10 + (ls->current - '0'); if (lj_char_isdigit(next(ls))) { c = c*10 + (ls->current - '0'); if (c > 255) { err_xesc: lj_lex_error(ls, TK_string, LJ_ERR_XESC); } next(ls); } } save(ls, c); continue; } save(ls, c); next(ls); continue; } default: save_and_next(ls); break; } } save_and_next(ls); /* skip delimiter */ setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); }
static void read_string(LexState *ls) { save_and_next(ls); while (ls->current != '"') { switch (ls->current) { case END_OF_STREAM: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\\': { int c = next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } break; case 'u': /* Unicode escape '\uXXXX'. */ c = (next(ls) & 15u) << 12; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 12; } c += (next(ls) & 15u) << 8; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 8; } c += (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } if (c >= 0x0800) { save(ls, 0xE0 | (c >> 12)); save(ls, 0x80 | ((c >> 6) & 0x3f)); c = 0x80 | (c & 0x3f); } else if (c >= 0x0080) { save(ls, 0xC0 | (c >> 6)); c = 0x80 | (c & 0x3f); } break; case 'U': /* Unicode escape '\UXXXXXXXX'. */ c = (next(ls) & 15u) << 28; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 28; } c += (next(ls) & 15u) << 24; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 24; } c += (next(ls) & 15u) << 20; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 20; } c += (next(ls) & 15u) << 16; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 16; } c += (next(ls) & 15u) << 12; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 12; } c += (next(ls) & 15u) << 8; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 8; } c += (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } if (c >= 0x4000000) { save(ls, 0xFC | (c >> 30)); save(ls, 0x80 | ((c >> 24) & 0x3f)); save(ls, 0x80 | ((c >> 18) & 0x3f)); save(ls, 0x80 | ((c >> 12) & 0x3f)); save(ls, 0x80 | ((c >> 6) & 0x3f)); c = 0x80 | (c & 0x3f); } else if (c >= 0x200000) {
/* Parse a string. */ static void lex_string(LexState *ls, TValue *tv) { LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */ lex_savenext(ls); while (ls->c != delim) { switch (ls->c) { case LEX_EOF: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': case '\r': lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { LexChar c = lex_next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (lex_next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9 << 4; } c += (lex_next(ls) & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9; } break; case 'u': /* Unicode escape '\u{XX...}'. */ if (lex_next(ls) != '{') goto err_xesc; lex_next(ls); c = 0; do { c = (c << 4) | (ls->c & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) goto err_xesc; c += 9; } if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ } while (lex_next(ls) != '}'); if (c < 0x800) { if (c < 0x80) break; lex_save(ls, 0xc0 | (c >> 6)); } else { if (c >= 0x10000) { lex_save(ls, 0xf0 | (c >> 18)); lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); } else { if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ lex_save(ls, 0xe0 | (c >> 12)); } lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); } c = 0x80 | (c & 0x3f); break; case 'z': /* Skip whitespace. */ lex_next(ls); while (lj_char_isspace(ls->c)) if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls); continue; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue; case '\\': case '\"': case '\'': break; case LEX_EOF: continue; default: if (!lj_char_isdigit(c)) goto err_xesc; c -= '0'; /* Decimal escape '\ddd'. */ if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (c > 255) { err_xesc: lj_lex_error(ls, TK_string, LJ_ERR_XESC); } lex_next(ls); } } lex_save(ls, c); continue; } lex_save(ls, c); lex_next(ls); continue; }