static void read_string(LexState *ls, int delim, TValue *tv) { save_and_next(ls); while (ls->current != delim) { switch (ls->current) { case END_OF_STREAM: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': case '\r': lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { int c; next(ls); /* do not save the `\' */ switch (ls->current) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; case END_OF_STREAM: continue; /* will raise an error next loop */ default: if (!lj_ctype_isdigit(ls->current)) { save_and_next(ls); /* handles \\, \", \', and \? */ } else { /* \xxx */ int i = 0; c = 0; do { c = 10*c + (ls->current-'0'); next(ls); } while (++i<3 && lj_ctype_isdigit(ls->current)); if (c > UCHAR_MAX) lj_lex_error(ls, TK_string, LJ_ERR_XESC); save(ls, c); } continue; } save(ls, c); next(ls); continue; } default: save_and_next(ls); break; } } save_and_next(ls); /* skip delimiter */ setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); }
static void read_long_string(LexState *ls, TValue *tv, int sep) { save_and_next(ls); /* skip 2nd `[' */ if (currIsNewline(ls)) /* string starts with a newline? */ inclinenumber(ls); /* skip it */ for (;;) { switch (ls->current) { case END_OF_STREAM: lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); break; case ']': if (skip_sep(ls) == sep) { save_and_next(ls); /* skip 2nd `]' */ goto endloop; } break; case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); if (!tv) lj_str_resetbuf(&ls->sb); /* avoid wasting space */ break; default: if (tv) save_and_next(ls); else next(ls); break; } } endloop: if (tv) { GCstr *str = lj_parse_keepstr(ls, ls->sb.buf + (2 + (MSize)sep), ls->sb.n - 2*(2 + (MSize)sep)); setstrV(ls->L, tv, str); } }
/* Parse a long string or long comment (tv set to NULL). */ static void lex_longstring(LexState *ls, TValue *tv, int sep) { lex_savenext(ls); /* Skip second '['. */ if (lex_iseol(ls)) /* Skip initial newline. */ lex_newline(ls); for (;;) { switch (ls->c) { case LEX_EOF: lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); break; case ']': if (lex_skipeq(ls) == sep) { lex_savenext(ls); /* Skip second ']'. */ goto endloop; } break; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */ break; default: lex_savenext(ls); break; } } endloop: if (tv) { GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep), sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); setstrV(ls->L, tv, str); } }
static LJ_NOINLINE void save_grow(LexState *ls, int c) { MSize newsize; if (ls->sb.sz >= LJ_MAX_STR/2) lj_lex_error(ls, 0, LJ_ERR_XELEM); newsize = ls->sb.sz * 2; lj_str_resizebuf(ls->L, &ls->sb, newsize); ls->sb.buf[ls->sb.n++] = (char)c; }
/* Skip line break. Handles "\n", "\r", "\r\n" or "\n\r". */ static void lex_newline(LexState *ls) { LexChar old = ls->c; lua_assert(lex_iseol(ls)); lex_next(ls); /* Skip "\n" or "\r". */ if (lex_iseol(ls) && ls->c != old) lex_next(ls); /* Skip "\n\r" or "\r\n". */ if (++ls->linenumber >= LJ_MAX_LINE) lj_lex_error(ls, ls->tok, LJ_ERR_XLINES); }
static void inclinenumber(LexState *ls) { int old = ls->current; lua_assert(currIsNewline(ls)); next(ls); /* skip `\n' or `\r' */ if (currIsNewline(ls) && ls->current != old) next(ls); /* skip `\n\r' or `\r\n' */ if (++ls->linenumber >= LJ_MAX_LINE) lj_lex_error(ls, ls->token, LJ_ERR_XLINES); }
static void save(LexState *ls, int c) { if (ls->sb.n + 1 > ls->sb.sz) { MSize newsize; if (ls->sb.sz >= LJ_MAX_STR/2) lj_lex_error(ls, 0, LJ_ERR_XELEM); newsize = ls->sb.sz * 2; lj_str_resizebuf(ls->L, &ls->sb, newsize); } ls->sb.buf[ls->sb.n++] = cast(char, c); }
/* Parse a number literal. */ static void lex_number(LexState *ls) { StrScanFmt fmt; TValue *tv = &ls->tokenval; int c, xp = 'e'; if (ls->current == '-' || ls->current == '+') { save_and_next(ls); } if ((c = ls->current) == '0') { save_and_next(ls); if ((ls->current | 0x20) == 'x') xp = 'p'; } while (lj_char_isident(ls->current) || ls->current == '.' || ((ls->current == '-' || ls->current == '+') && (c | 0x20) == xp)) { c = ls->current; save_and_next(ls); } save(ls, '\0'); fmt = lj_strscan_scan((const uint8_t *)ls->sb.buf, tv, (LJ_DUALNUM ? STRSCAN_OPT_TOINT : STRSCAN_OPT_TONUM) | (LJ_HASFFI ? (STRSCAN_OPT_LL|STRSCAN_OPT_IMAG) : 0)); ls->token = TK_number; if (LJ_DUALNUM && fmt == STRSCAN_INT) { setitype(tv, LJ_TISNUM); } else if (fmt == STRSCAN_NUM) { /* Already in correct format. */ #if LJ_HASFFI } else if (fmt != STRSCAN_ERROR) { lua_State *L = ls->L; GCcdata *cd; lua_assert(fmt == STRSCAN_I64 || fmt == STRSCAN_U64 || fmt == STRSCAN_IMAG); if (!ctype_ctsG(G(L))) { ptrdiff_t oldtop = savestack(L, L->top); luaopen_ffi(L); /* Load FFI library on-demand. */ L->top = restorestack(L, oldtop); } if (fmt == STRSCAN_IMAG) { cd = lj_cdata_new_(L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; ((double *)cdataptr(cd))[1] = numV(tv); } else { cd = lj_cdata_new_(L, fmt==STRSCAN_I64 ? CTID_INT64 : CTID_UINT64, 8); *(uint64_t *)cdataptr(cd) = tv->u64; } lj_parse_keepcdata(ls, tv, cd); #endif } else { lua_assert(fmt == STRSCAN_ERROR); lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); } }
static void read_numeral(LexState *ls, TValue *tv) { lua_assert(lj_ctype_isdigit(ls->current)); do { save_and_next(ls); } while (lj_ctype_isdigit(ls->current) || ls->current == '.'); if (check_next(ls, "Ee")) /* `E'? */ check_next(ls, "+-"); /* optional exponent sign */ while (lj_ctype_isident(ls->current)) save_and_next(ls); save(ls, '\0'); if (!lj_str_numconv(ls->sb.buf, tv)) lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); }
/* Parse a number literal. */ static void lex_number(LexState *ls, TValue *tv) { int c; lua_assert(lj_char_isdigit(ls->current)); do { c = ls->current; save_and_next(ls); } while (lj_char_isident(ls->current) || ls->current == '.' || ((ls->current == '-' || ls->current == '+') && ((c & ~0x20) == 'E' || (c & ~0x20) == 'P'))); #if LJ_HASFFI c &= ~0x20; if ((c == 'I' || c == 'L' || c == 'U') && !ctype_ctsG(G(ls->L))) lex_loadffi(ls->L); if (c == 'I') /* Parse imaginary part of complex number. */ ls->sb.n--; #endif save(ls, '\0'); #if LJ_HASFFI if ((c == 'L' || c == 'U') && lex_number64(ls, tv)) { /* Parse 64 bit int. */ return; } else #endif if (lj_str_numconv(ls->sb.buf, tv)) { #if LJ_HASFFI if (c == 'I') { /* Return cdata holding a complex number. */ GCcdata *cd = lj_cdata_new_(ls->L, CTID_COMPLEX_DOUBLE, 2*sizeof(double)); ((double *)cdataptr(cd))[0] = 0; ((double *)cdataptr(cd))[1] = numberVnum(tv); lj_parse_keepcdata(ls, tv, cd); } #endif if (LJ_DUALNUM && tvisnum(tv)) { int32_t k = lj_num2int(numV(tv)); if ((lua_Number)k == numV(tv)) /* -0 cannot end up here. */ setintV(tv, k); } return; } lj_lex_error(ls, TK_number, LJ_ERR_XNUMBER); }
static int llex(LexState *ls, TValue *tv) { lj_str_resetbuf(&ls->sb); for (;;) { if (lj_char_isident(ls->current)) { GCstr *s; if (lj_char_isdigit(ls->current)) { /* Numeric literal. */ lex_number(ls, tv); return TK_number; } /* Identifier or reserved word. */ do { save_and_next(ls); } while (lj_char_isident(ls->current)); s = lj_parse_keepstr(ls, ls->sb.buf, ls->sb.n); setstrV(ls->L, tv, s); if (s->reserved > 0) /* Reserved word? */ return TK_OFS + s->reserved; return TK_name; } switch (ls->current) { case '\n': case '\r': inclinenumber(ls); continue; case ' ': case '\t': case '\v': case '\f': next(ls); continue; case '-': next(ls); if (ls->current != '-') return '-'; /* else is a comment */ next(ls); if (ls->current == '[') { int sep = skip_sep(ls); lj_str_resetbuf(&ls->sb); /* `skip_sep' may dirty the buffer */ if (sep >= 0) { read_long_string(ls, NULL, sep); /* long comment */ lj_str_resetbuf(&ls->sb); continue; } } /* else short comment */ while (!currIsNewline(ls) && ls->current != END_OF_STREAM) next(ls); continue; case '[': { int sep = skip_sep(ls); if (sep >= 0) { read_long_string(ls, tv, sep); return TK_string; } else if (sep == -1) { return '['; } else { lj_lex_error(ls, TK_string, LJ_ERR_XLDELIM); continue; } } case '=': next(ls); if (ls->current != '=') return '='; else { next(ls); return TK_eq; } case '<': next(ls); if (ls->current != '=') return '<'; else { next(ls); return TK_le; } case '>': next(ls); if (ls->current != '=') return '>'; else { next(ls); return TK_ge; } case '~': next(ls); if (ls->current != '=') return '~'; else { next(ls); return TK_ne; } case ':': next(ls); if (ls->current != ':') return ':'; else { next(ls); return TK_label; } case '"': case '\'': read_string(ls, ls->current, tv); return TK_string; case '.': save_and_next(ls); if (ls->current == '.') { next(ls); if (ls->current == '.') { next(ls); return TK_dots; /* ... */ } return TK_concat; /* .. */ } else if (!lj_char_isdigit(ls->current)) { return '.'; } else { lex_number(ls, tv); return TK_number; } case END_OF_STREAM: return TK_eof; default: { int c = ls->current; next(ls); return c; /* Single-char tokens (+ - / ...). */ } } } }
static void read_string(LexState *ls, int delim, TValue *tv) { save_and_next(ls); while (ls->current != delim) { switch (ls->current) { case END_OF_STREAM: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': case '\r': lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { int c = next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } break; case 'z': /* Skip whitespace. */ next(ls); while (lj_char_isspace(ls->current)) if (currIsNewline(ls)) inclinenumber(ls); else next(ls); continue; case '\n': case '\r': save(ls, '\n'); inclinenumber(ls); continue; case '\\': case '\"': case '\'': break; case END_OF_STREAM: continue; default: if (!lj_char_isdigit(c)) goto err_xesc; c -= '0'; /* Decimal escape '\ddd'. */ if (lj_char_isdigit(next(ls))) { c = c*10 + (ls->current - '0'); if (lj_char_isdigit(next(ls))) { c = c*10 + (ls->current - '0'); if (c > 255) { err_xesc: lj_lex_error(ls, TK_string, LJ_ERR_XESC); } next(ls); } } save(ls, c); continue; } save(ls, c); next(ls); continue; } default: save_and_next(ls); break; } } save_and_next(ls); /* skip delimiter */ setstrV(ls->L, tv, lj_parse_keepstr(ls, ls->sb.buf + 1, ls->sb.n - 2)); }
static void read_string(LexState *ls) { save_and_next(ls); while (ls->current != '"') { switch (ls->current) { case END_OF_STREAM: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\\': { int c = next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } break; case 'u': /* Unicode escape '\uXXXX'. */ c = (next(ls) & 15u) << 12; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 12; } c += (next(ls) & 15u) << 8; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 8; } c += (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } if (c >= 0x0800) { save(ls, 0xE0 | (c >> 12)); save(ls, 0x80 | ((c >> 6) & 0x3f)); c = 0x80 | (c & 0x3f); } else if (c >= 0x0080) { save(ls, 0xC0 | (c >> 6)); c = 0x80 | (c & 0x3f); } break; case 'U': /* Unicode escape '\UXXXXXXXX'. */ c = (next(ls) & 15u) << 28; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 28; } c += (next(ls) & 15u) << 24; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 24; } c += (next(ls) & 15u) << 20; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 20; } c += (next(ls) & 15u) << 16; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 16; } c += (next(ls) & 15u) << 12; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 12; } c += (next(ls) & 15u) << 8; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 8; } c += (next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9 << 4; } c += (next(ls) & 15u); if (!lj_char_isdigit(ls->current)) { if (!lj_char_isxdigit(ls->current)) goto err_xesc; c += 9; } if (c >= 0x4000000) { save(ls, 0xFC | (c >> 30)); save(ls, 0x80 | ((c >> 24) & 0x3f)); save(ls, 0x80 | ((c >> 18) & 0x3f)); save(ls, 0x80 | ((c >> 12) & 0x3f)); save(ls, 0x80 | ((c >> 6) & 0x3f)); c = 0x80 | (c & 0x3f); } else if (c >= 0x200000) {
/* Parse a string. */ static void lex_string(LexState *ls, TValue *tv) { LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */ lex_savenext(ls); while (ls->c != delim) { switch (ls->c) { case LEX_EOF: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': case '\r': lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { LexChar c = lex_next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (lex_next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9 << 4; } c += (lex_next(ls) & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9; } break; case 'u': /* Unicode escape '\u{XX...}'. */ if (lex_next(ls) != '{') goto err_xesc; lex_next(ls); c = 0; do { c = (c << 4) | (ls->c & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) goto err_xesc; c += 9; } if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ } while (lex_next(ls) != '}'); if (c < 0x800) { if (c < 0x80) break; lex_save(ls, 0xc0 | (c >> 6)); } else { if (c >= 0x10000) { lex_save(ls, 0xf0 | (c >> 18)); lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); } else { if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ lex_save(ls, 0xe0 | (c >> 12)); } lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); } c = 0x80 | (c & 0x3f); break; case 'z': /* Skip whitespace. */ lex_next(ls); while (lj_char_isspace(ls->c)) if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls); continue; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue; case '\\': case '\"': case '\'': break; case LEX_EOF: continue; default: if (!lj_char_isdigit(c)) goto err_xesc; c -= '0'; /* Decimal escape '\ddd'. */ if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (c > 255) { err_xesc: lj_lex_error(ls, TK_string, LJ_ERR_XESC); } lex_next(ls); } } lex_save(ls, c); continue; } lex_save(ls, c); lex_next(ls); continue; }