/** * Do lexical analysis of buffer in 'str'. * * And start do add tokens after 'prev' * * Returns pointer to last token, or NULL if an error occured. * If an error occured 'ei' will be filled with data */ token_t * glw_view_lexer(glw_root_t *gr, const char *src, errorinfo_t *ei, rstr_t *f, token_t *prev) { const char *start; int line = 1; token_t *t; while(*src != 0) { if(*src == '\n') { /* newline */ /* TODO: DOS CR support ? */ src++; line++; continue; } if(*src <= 32) { /* whitespace */ src++; continue; } if(src[0] == 'v' && src[1] == 'o' && src[2] == 'i' && src[3] == 'd') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_VOID); src+=4; continue; } if(src[0] == 't' && src[1] == 'r' && src[2] == 'u' && src[3] == 'e') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_INT); src+=4; prev->t_int = 1; continue; } if(src[0] == 'f' && src[1] == 'a' && src[2] == 'l' && src[3] == 's' && src[4] == 'e') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_INT); src+=5; prev->t_int = 0; continue; } if(*src == '/' && src[1] == '/') { // C++ style comment src += 2; while(*src != '\n') src++; src++; line++; continue; } if(*src == '/' && src[1] == '*') { /* A normal C-comment */ src += 2; while(*src != '/' || src[-1] != '*') { if(*src == '\n') line++; src++; } src++; continue; } if(src[0] == '&' && src[1] == '&') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_BOOLEAN_AND); src+=2; continue; } if(src[0] == '?' && src[1] == '=') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_COND_ASSIGNMENT); src+=2; continue; } if(src[0] == '<' && src[1] == '-') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_LINK_ASSIGNMENT); src+=2; continue; } if(src[0] == ':' && src[1] == '=') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_REF_ASSIGNMENT); src+=2; continue; } if(src[0] == '_' && src[1] == '=' && src[2] == '_') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_DEBUG_ASSIGNMENT); src+=3; continue; } if(src[0] == '|' && src[1] == '|') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_BOOLEAN_OR); src+=2; continue; } if(src[0] == '^' && src[1] == '^') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_BOOLEAN_XOR); src+=2; continue; } if(src[0] == '=' && src[1] == '=') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_EQ); src+=2; continue; } if(src[0] == '!' && src[1] == '=') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_NEQ); src+=2; continue; } if(src[0] == '?' && src[1] == '?') { prev = lexer_add_token_simple(gr, prev, f, line, TOKEN_NULL_COALESCE); src+=2; continue; } if(!(src[0] == '-' && lex_isdigit(src[1]))) { if((t = lexer_single_char(gr, prev, f, line, *src)) != NULL) { src++; prev = t; continue; } } start = src; if(*src == '"' || *src == '\'') { /* A quoted string " ... " */ char stop = *src; src++; start++; while((*src != stop || (src[-1] == '\\' && src[-2] != '\\')) && *src != 0) { if(*src == '\n') line++; src++; } if(*src != stop) { snprintf(ei->error, sizeof(ei->error), "Unterminated quote"); snprintf(ei->file, sizeof(ei->file), "%s", rstr_get(f)); ei->line = line; return NULL; } prev = lexer_add_token_string(gr, prev, f, line, start, src, TOKEN_RSTRING); if(stop == '\'') prev->t_rstrtype = PROP_STR_RICH; src++; continue; } if(lex_isalpha(*src)) { /* Alphanumeric string */ while(lex_isalnum(*src)) src++; prev = lexer_add_token_string(gr, prev, f, line, start, src, TOKEN_IDENTIFIER); continue; } if(lex_isdigit(*src)) { /* Integer */ while(lex_isdigit(*src)) src++; if(*src == '.') { src++; /* , or a float */ while(lex_isdigit(*src)) src++; } if(*src == 'f') /* we support having the 'f' postfix around too */ src++; prev = lexer_add_token_float(gr, prev, f, line, start, src); continue; } snprintf(ei->error, sizeof(ei->error), "Invalid char '%c'", *src > 31 ? *src : ' '); snprintf(ei->file, sizeof(ei->file), "%s", rstr_get(f)); ei->line = line; return NULL; } return prev; }
static void read_wstring (LexState *LS, int del, SemInfo *seminfo) { size_t l = 0; checkbuffer(LS, l * 2); wsave_and_next(LS, l); while (LS->current != del) { checkbuffer(LS, l * 2); switch (LS->current) { case EOZ: wsave(LS, '\0', l); luaX_lexerror(LS, "unfinished string", TK_EOS); break; /* to avoid warnings */ case '\n': wsave(LS, '\0', l); luaX_lexerror(LS, "unfinished string", TK_STRING); break; /* to avoid warnings */ case '\\': next(LS); /* do not save the `\' */ switch (LS->current) { case 'a': wsave(LS, '\a', l); next(LS); break; case 'b': wsave(LS, '\b', l); next(LS); break; case 'f': wsave(LS, '\f', l); next(LS); break; case 'n': wsave(LS, '\n', l); next(LS); break; case 'r': wsave(LS, '\r', l); next(LS); break; case 't': wsave(LS, '\t', l); next(LS); break; case 'v': wsave(LS, '\v', l); next(LS); break; case '\n': wsave(LS, '\n', l); inclinenumber(LS); break; case EOZ: break; /* will raise an error next loop */ case 'x': { int ch; next(LS); ch = tolower(LS->current); if (!lex_isdigit(ch) && !(ch >= 'a' && ch <= 'f') ) save(LS, 'x', l); /* handles \\, \", \', and \? */ else { /* \xxx */ int c = 0; int i = 0; int numDigits = 4; do { ch = tolower(LS->current); if (lex_isdigit(ch)) c = 16*c + (ch-'0'); else if (ch >= 'a' && ch <= 'f') c = 16*c + (ch-'a') + 10; next(LS); ch = tolower(LS->current); } while (++i<numDigits && (lex_isdigit(ch) || (ch >= 'a' && ch <= 'f'))); wsave(LS, c, l); } break; } default: { if (!lex_isdigit(LS->current)) wsave_and_next(LS, l); /* handles \\, \", \', and \? */ else { /* \xxx */ int c = 0; int i = 0; do { c = 10*c + (LS->current-'0'); next(LS); } while (++i<3 && lex_isdigit(LS->current)); if (c > UCHAR_MAX) { wsave(LS, '\0', l); luaX_lexerror(LS, "escape sequence too large", TK_STRING); } wsave(LS, c, l); } } } break; default: wsave_and_next(LS, l); } } wsave_and_next(LS, l); /* skip delimiter */ wsave(LS, '\0', l); seminfo->ts = luaS_newlwstr(LS->L, (const lua_WChar*)(luaZ_buffer(LS->buff) + 1 * 2), (l - 3 * 2) / 2); }
int luaX_lex (LexState *LS, SemInfo *seminfo) { for (;;) { switch (LS->current) { case '\n': { inclinenumber(LS); continue; } case '-': { next(LS); if (LS->current != '-') return '-'; /* else is a comment */ next(LS); if (LS->current == '[' && (next(LS), LS->current == '[')) read_long_string(LS, NULL); /* long comment */ else /* short comment */ while (LS->current != '\n' && LS->current != EOZ) next(LS); continue; } case '[': { next(LS); if (LS->current != '[') return '['; else { read_long_string(LS, seminfo); return TK_STRING; } } case '=': { next(LS); if (LS->current != '=') return '='; else { next(LS); return TK_EQ; } } case '<': { next(LS); if (LS->current == '<') { next(LS); return TK_SHL; } else if (LS->current != '=') return '<'; else { next(LS); return TK_LE; } } case '>': { next(LS); if (LS->current == '>') { next(LS); return TK_SHR; } else if (LS->current != '=') return '>'; else { next(LS); return TK_GE; } } case '~': { next(LS); if (LS->current != '=') return '~'; else { next(LS); return TK_NE; } } case '"': case '\'': { read_string(LS, LS->current, seminfo); return TK_STRING; } case '.': { next(LS); if (LS->current == '.') { next(LS); if (LS->current == '.') { next(LS); return TK_DOTS; /* ... */ } else return TK_CONCAT; /* .. */ } else if (!lex_isdigit(LS->current)) return '.'; else { return read_numeral(LS, 1, seminfo); } } case EOZ: { return TK_EOS; } default: { if (isspace(LS->current)) { next(LS); continue; } else if (lex_isdigit(LS->current)) { return (read_numeral(LS, 0, seminfo)); } else if (lex_isalpha(LS->current) || LS->current == '_') { char saveCh = 0; size_t l; TString *ts; if (LS->current == 'L') { next(LS); if (LS->current == '"') { read_wstring(LS, LS->current, seminfo); return TK_WSTRING; } saveCh = 'L'; } /* identifier or reserved word */ l = readname(LS, saveCh); ts = luaS_newlstr(LS->L, luaZ_buffer(LS->buff), l); if (ts->tsv.reserved > 0) /* reserved word? */ return ts->tsv.reserved - 1 + FIRST_RESERVED; seminfo->ts = ts; return TK_NAME; } else { int c = LS->current; if (iscntrl(c)) luaX_error(LS, "invalid control char", luaO_pushfstring(LS->L, "char(%d)", c)); next(LS); return c; /* single-char tokens (+ - / ...) */ } } } } }
/* LUA_NUMBER */ static int read_numeral (LexState *LS, int period, SemInfo *seminfo) { int isReal = 0; int startsWithZero = LS->current == '0'; size_t l = 0; checkbuffer(LS, l); if (period) { save(LS, '.', l); isReal = 1; } if (startsWithZero) { next(LS); if (LS->current == 'x') { /* Process a hex number */ int ch = 0; int c = 0; int i = 0; int numDigits = 8; next(LS); do { ch = tolower(LS->current); if (lex_isdigit(ch)) c = 16*c + (ch-'0'); else if (ch >= 'a' && ch <= 'f') c = 16*c + (ch-'a') + 10; next(LS); ch = tolower(LS->current); } while (++i<numDigits && (lex_isdigit(ch) || (ch >= 'a' && ch <= 'f'))); seminfo->r = c; return TK_NUMBER; } else { checkbuffer(LS, 1); save(LS, '0', l); } } while (lex_isdigit(LS->current)) { checkbuffer(LS, l); save_and_next(LS, l); } if (LS->current == '.') { isReal = 1; save_and_next(LS, l); if (LS->current == '.') { save_and_next(LS, l); save(LS, '\0', l); luaX_lexerror(LS, "ambiguous syntax (decimal point x string concatenation)", TK_NUMBER); } } while (lex_isdigit(LS->current)) { checkbuffer(LS, l); save_and_next(LS, l); } if (LS->current == 'e' || LS->current == 'E') { isReal = 1; save_and_next(LS, l); /* read `E' */ if (LS->current == '+' || LS->current == '-') save_and_next(LS, l); /* optional exponent sign */ while (lex_isdigit(LS->current)) { checkbuffer(LS, l); save_and_next(LS, l); } } save(LS, '\0', l); if (isReal) { if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r)) luaX_lexerror(LS, "malformed number", TK_NUMBER); return TK_NUMBER; } else { if (!luaO_str2d(luaZ_buffer(LS->buff), &seminfo->r)) luaX_lexerror(LS, "malformed integer", TK_NUMBER); return TK_NUMBER; } }