/* Parse a long string or long comment (tv set to NULL). */ static void lex_longstring(LexState *ls, TValue *tv, int sep) { lex_savenext(ls); /* Skip second '['. */ if (lex_iseol(ls)) /* Skip initial newline. */ lex_newline(ls); for (;;) { switch (ls->c) { case LEX_EOF: lj_lex_error(ls, TK_eof, tv ? LJ_ERR_XLSTR : LJ_ERR_XLCOM); break; case ']': if (lex_skipeq(ls) == sep) { lex_savenext(ls); /* Skip second ']'. */ goto endloop; } break; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); if (!tv) lj_buf_reset(&ls->sb); /* Don't waste space for comments. */ break; default: lex_savenext(ls); break; } } endloop: if (tv) { GCstr *str = lj_parse_keepstr(ls, sbufB(&ls->sb) + (2 + (MSize)sep), sbuflen(&ls->sb) - 2*(2 + (MSize)sep)); setstrV(ls->L, tv, str); } }
/** * Test that we correctly identify a CRLF sequence split between two buffers */ TEST_F(Lexer,LexNewLineCRLFSplit) { webvtt_uint pos = 0; EXPECT_EQ( UNFINISHED, lex_newline( "\r", pos, false ) ); EXPECT_EQ( 1, pos ); EXPECT_EQ( L_NEWLINE0, lexerState() ); pos = 0; EXPECT_EQ( NEWLINE, lex_newline( "\n", pos, true ) ); EXPECT_EQ( 1, pos ); EXPECT_EQ( L_START, lexerState() ); }
/** * Test that we correctly identify a CR that is not followed by an LF, and * backup to the correct position. */ TEST_F(Lexer,LexNewLineCRXX) { webvtt_uint pos = 0; EXPECT_EQ( NEWLINE, lex_newline( "\rx", pos ) ); EXPECT_EQ( 1, pos ); EXPECT_EQ( L_START, lexerState() ); }
/** * Test that we correctly return BADTOKEN and backup appropriately when we don't * encounter a CR or LF */ TEST_F(Lexer,LexNewLineXX) { webvtt_uint pos = 0; EXPECT_EQ( BADTOKEN, lex_newline( "xxx", pos ) ); EXPECT_EQ( 0, pos ); EXPECT_EQ( L_START, lexerState() ); }
/** * Test that we return end-of-buffer as a newline. */ TEST_F(Lexer,LexNewLineEOL) { webvtt_uint pos = 0; EXPECT_EQ( NEWLINE, lex_newline( "", pos ) ); EXPECT_EQ( 0, pos ); EXPECT_EQ( L_START, lexerState() ); }
/* Parse a string. */ static void lex_string(LexState *ls, TValue *tv) { LexChar delim = ls->c; /* Delimiter is '\'' or '"'. */ lex_savenext(ls); while (ls->c != delim) { switch (ls->c) { case LEX_EOF: lj_lex_error(ls, TK_eof, LJ_ERR_XSTR); continue; case '\n': case '\r': lj_lex_error(ls, TK_string, LJ_ERR_XSTR); continue; case '\\': { LexChar c = lex_next(ls); /* Skip the '\\'. */ switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case 'x': /* Hexadecimal escape '\xXX'. */ c = (lex_next(ls) & 15u) << 4; if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9 << 4; } c += (lex_next(ls) & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) lj_lex_error(ls, TK_string, LJ_ERR_XHEX); c += 9; } break; case 'u': /* Unicode escape '\u{XX...}'. */ if (lex_next(ls) != '{') goto err_xesc; lex_next(ls); c = 0; do { c = (c << 4) | (ls->c & 15u); if (!lj_char_isdigit(ls->c)) { if (!lj_char_isxdigit(ls->c)) goto err_xesc; c += 9; } if (c >= 0x110000) goto err_xesc; /* Out of Unicode range. */ } while (lex_next(ls) != '}'); if (c < 0x800) { if (c < 0x80) break; lex_save(ls, 0xc0 | (c >> 6)); } else { if (c >= 0x10000) { lex_save(ls, 0xf0 | (c >> 18)); lex_save(ls, 0x80 | ((c >> 12) & 0x3f)); } else { if (c >= 0xd800 && c < 0xe000) goto err_xesc; /* No surrogates. */ lex_save(ls, 0xe0 | (c >> 12)); } lex_save(ls, 0x80 | ((c >> 6) & 0x3f)); } c = 0x80 | (c & 0x3f); break; case 'z': /* Skip whitespace. */ lex_next(ls); while (lj_char_isspace(ls->c)) if (lex_iseol(ls)) lex_newline(ls); else lex_next(ls); continue; case '\n': case '\r': lex_save(ls, '\n'); lex_newline(ls); continue; case '\\': case '\"': case '\'': break; case LEX_EOF: continue; default: if (!lj_char_isdigit(c)) goto err_xesc; c -= '0'; /* Decimal escape '\ddd'. */ if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (lj_char_isdigit(lex_next(ls))) { c = c*10 + (ls->c - '0'); if (c > 255) { err_xesc: lj_lex_error(ls, TK_string, LJ_ERR_XESC); } lex_next(ls); } } lex_save(ls, c); continue; } lex_save(ls, c); lex_next(ls); continue; }