static inline int parseString(YYSTYPE *lvalp, yyscan_t scanner, const char q, enum tokenizer_state tostate) { size_t len; /* look for " terminating the string */ const char *start = &scanner->in[scanner->pos], *end = start; do { const size_t siz = &scanner->in[scanner->insize] - end; end = memchr(end, q, siz); if(end && end > start && end[-1] == '\\') { ++end; continue; } break; } while (1); if(end && end >= start) len = end - start; else len = scanner->insize - scanner->pos; cli_textbuffer_append_normalize(&scanner->buf, start, len); if(end) { /* skip over end quote */ scanner->pos += len + 1; textbuffer_putc(&scanner->buf, '\0'); TOKEN_SET(lvalp, string, textbuffer_done(scanner)); scanner->state = Initial; assert(lvalp->val.string); return TOK_StringLiteral; } else { scanner->pos += len; /* unfinished string */ scanner->state = tostate; return 0; } }
static int parseOperator(YYSTYPE *lvalp, yyscan_t scanner) { size_t len = MIN(5, scanner->insize - scanner->pos); while(len) { const struct operator *kw = in_op_set(&scanner->in[scanner->pos], len); if(kw) { TOKEN_SET(lvalp, cstring, kw->name); scanner->pos += len; return kw->val; } len--; } /* never reached */ assert(0); scanner->pos++; TOKEN_SET(lvalp, cstring, NULL); return TOK_ERROR; }
static void handle_eval(struct tokens *tokens, size_t start, struct decode_result *res) { res->txtbuf.data = TOKEN_GET(&tokens->data[start], string); if(res->txtbuf.data && tokens->data[start+1].type == TOK_PAR_CLOSE) { TOKEN_SET(&tokens->data[start], string, NULL); res->txtbuf.pos = strlen(res->txtbuf.data); res->pos_begin = start-2; res->pos_end = start+2; } }
static inline int parseId(YYSTYPE *lvalp, yyscan_t scanner) { const struct keyword *kw; const unsigned char *in = (const unsigned char*)scanner->in; scanner->state = Initial; while(scanner->pos < scanner->insize) { unsigned char c = in[scanner->pos++]; enum char_class cClass = id_ctype[c]; switch(cClass) { case IdStart: textbuffer_putc(&scanner->buf, c); break; case Operator: /* the table contains OP only for \ */ assert(c == '\\'); if(scanner->pos < scanner->insize && in[scanner->pos++] == 'u') { textbuffer_putc(&scanner->buf, c); break; } if(scanner->pos == scanner->insize) { scanner->pos++; } /* else fallthrough */ default: /* character is no longer part of identifier */ scanner->state = Initial; textbuffer_putc(&scanner->buf, '\0'); scanner->pos--; kw = in_word_set(scanner->buf.data, scanner->buf.pos-1); if(kw) { /* we got a keyword */ TOKEN_SET(lvalp, cstring, kw->name); return kw->val; } /* it is not a keyword, just an identifier */ TOKEN_SET(lvalp, cstring, NULL); return TOK_IDENTIFIER_NAME; } } scanner->state = Identifier; return 0; }
static inline int parseNumber(YYSTYPE *lvalp, yyscan_t scanner) { const unsigned char *in = (const unsigned char*)scanner->in; int is_float = 0; while(scanner->pos < scanner->insize) { unsigned char c = in[scanner->pos++]; if(isdigit(c)) { textbuffer_putc(&scanner->buf, c); continue; } if(c =='.' && !is_float) { is_float = 1; textbuffer_putc(&scanner->buf, '.'); continue; } if((c=='e' || c=='E') && is_float) { textbuffer_putc(&scanner->buf, c); if(scanner->pos < scanner->insize) { c = in[scanner->pos++]; if(c == '+' || c == '-' || isdigit(c)) { textbuffer_putc(&scanner->buf, c); continue; } } } scanner->pos--; textbuffer_putc(&scanner->buf, '\0'); scanner->state = Initial; if (!scanner->buf.data) return 0; if(is_float) { TOKEN_SET(lvalp, dval, atof(scanner->buf.data)); return TOK_NumericFloat; } else { TOKEN_SET(lvalp, ival, atoi(scanner->buf.data)); return TOK_NumericInt; } } scanner->state = Number; return 0; }
END_TEST START_TEST (test_token_cstring) { const char *str = "test"; yystype tok; memset(&tok, 0, sizeof(tok)); TOKEN_SET(&tok, cstring, str); fail_unless(TOKEN_GET(&tok, string) == NULL, "token cstring->string"); fail_unless(TOKEN_GET(&tok, cstring) == str, "token string->cstring"); fail_unless(TOKEN_GET(&tok, scope) == NULL, "token string->scope"); fail_unless(TOKEN_GET(&tok, ival) == -1, "token string->ival"); }
END_TEST START_TEST (test_token_scope) { struct scope *sc = (struct scope*)0xdeadbeef; yystype tok; memset(&tok, 0, sizeof(tok)); TOKEN_SET(&tok, scope, sc); fail_unless(TOKEN_GET(&tok, string) == NULL, "token scope->string"); fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token scope->cstring"); fail_unless(TOKEN_GET(&tok, scope) == sc, "token scope->scope"); fail_unless(TOKEN_GET(&tok, ival) == -1, "token scope->ival"); }
END_TEST #endif /* CHECK_HAVE_LOOPS */ START_TEST (test_token_string) { char str[] = "test"; yystype tok; memset(&tok, 0, sizeof(tok)); TOKEN_SET(&tok, string, str); fail_unless(TOKEN_GET(&tok, string) == str, "token string get/set"); fail_unless(TOKEN_GET(&tok, cstring) == str, "token string->cstring"); fail_unless(TOKEN_GET(&tok, scope) == NULL, "token string->scope"); fail_unless(TOKEN_GET(&tok, ival) == -1, "token string->ival"); }
END_TEST START_TEST (test_token_dval) { int val = 0.12345; yystype tok; memset(&tok, 0, sizeof(tok)); TOKEN_SET(&tok, dval, val); fail_unless(TOKEN_GET(&tok, string) == NULL, "token dval->string"); fail_unless(TOKEN_GET(&tok, cstring) == NULL, "token dval->cstring"); fail_unless(TOKEN_GET(&tok, scope) == NULL, "token dval->scope"); fail_unless(TOKEN_GET(&tok, dval) - val < 1e-9, "token dval->dval"); fail_unless(TOKEN_GET(&tok, ival) == -1, "token dval->ival"); }
static int handle_unescape(struct tokens *tokens, size_t start) { if(tokens->data[start].type == TOK_StringLiteral) { char *R; struct tokens new_tokens; yystype tok; R = cli_unescape(TOKEN_GET(&tokens->data[start], cstring)); tok.type = TOK_StringLiteral; TOKEN_SET(&tok, string, R); new_tokens.capacity = new_tokens.cnt = 1; new_tokens.data = &tok; if(replace_token_range(tokens, start-2, start+2, &new_tokens) < 0) return CL_EMEM; } return CL_SUCCESS; }
void cli_js_parse_done(struct parser_state* state) { struct tokens * tokens = &state->tokens; size_t par_balance = 0, i; char end = '\0'; YYSTYPE val; cli_dbgmsg(MODULE "in cli_js_parse_done()\n"); /* close unfinished token */ switch (state->scanner->state) { case DoubleQString: end = '"'; break; case SingleQString: end = '\''; break; default: /* make gcc happy */ break; } if (end != '\0') cli_js_process_buffer(state, &end, 1); /* close remaining parenthesis */ for (i=0;i<tokens->cnt;i++) { if (tokens->data[i].type == TOK_PAR_OPEN) par_balance++; else if (tokens->data[i].type == TOK_PAR_CLOSE && par_balance > 0) par_balance--; } if (par_balance > 0) { memset(&val, 0, sizeof(val)); val.type = TOK_PAR_CLOSE; TOKEN_SET(&val, cstring, ")"); while (par_balance-- > 0) { add_token(state, &val); } } /* we had to close unfinished strings, parenthesis, * so that the folders/decoders can run properly */ run_folders(&state->tokens); run_decoders(state); yylex_destroy(state->scanner); state->scanner = NULL; }
/* buffer is html-normlike "chunk", if original file is bigger than buffer, * we rewind to a space, so we'll know that tokens won't be broken in half at * the end of a buffer. All tokens except string-literals of course. * So we can assume that after the buffer there is either a space, EOF, or a * chunk of text not containing whitespace at all (for which we care only if its * a stringliteral)*/ void cli_js_process_buffer(struct parser_state *state, const char *buf, size_t n) { struct scope* current = state->current; YYSTYPE val; int yv; YY_BUFFER_STATE yyb; if(!state->global) { /* this state has either not been initialized, * or cli_js_parse_done() was already called on it */ cli_warnmsg(MODULE "invalid state\n"); return; } yyb = yy_scan_bytes(buf, n, state->scanner); memset(&val, 0, sizeof(val)); val.vtype = vtype_undefined; /* on EOF yylex will return 0 */ while( (yv=yylex(&val, state->scanner)) != 0) { const char *text; size_t leng; val.type = yv; switch(yv) { case TOK_VAR: current->fsm_state = InsideVar; break; case TOK_IDENTIFIER_NAME: text = yyget_text(state->scanner); leng = yyget_leng(state->scanner); if(current->last_token == TOK_DOT) { /* this is a member name, don't normalize */ TOKEN_SET(&val, string, cli_strdup(text)); val.type = TOK_UNNORM_IDENTIFIER; } else { switch(current->fsm_state) { case WaitParameterList: state->syntax_errors++; /* fall through */ case Base: case InsideInitializer: TOKEN_SET(&val, cstring, scope_use(current, text, leng)); break; case InsideVar: case InsideFunctionDecl: TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state)); current->fsm_state = InsideInitializer; current->brackets = 0; break; case WaitFunctionName: TOKEN_SET(&val, cstring, scope_declare(current, text, leng, state)); current->fsm_state = WaitParameterList; break; } } break; case TOK_PAR_OPEN: switch(current->fsm_state) { case WaitFunctionName: /* fallthrough */ case WaitParameterList: current->fsm_state = InsideFunctionDecl; break; default: /* noop */ break; } break; case TOK_PAR_CLOSE: switch(current->fsm_state) { case WaitFunctionName: state->syntax_errors++; break; case WaitParameterList: current->fsm_state = Base; break; default: /* noop */ break; } break; case TOK_CURLY_BRACE_OPEN: switch(current->fsm_state) { case WaitFunctionName: /* fallthrough */ case WaitParameterList: case InsideFunctionDecl: /* in a syntactically correct * file, we would already be in * the Base state when we see a { */ current->fsm_state = Base; /* fall-through */ case InsideVar: case InsideInitializer: state->syntax_errors++; /* fall-through */ case Base: default: current->blocks++; break; } break; case TOK_CURLY_BRACE_CLOSE: if(current->blocks > 0) current->blocks--; else state->syntax_errors++; if(!current->blocks) { if(current->parent) { /* add dummy FUNCTION token to * mark function end */ TOKEN_SET(&val, cstring, "}"); add_token(state, &val); TOKEN_SET(&val, scope, NULL); val.type = TOK_FUNCTION; state->current = current = current->parent; } else{ /* extra } */ state->syntax_errors++; } } break; case TOK_BRACKET_OPEN: current->brackets++; break; case TOK_BRACKET_CLOSE: if(current->brackets > 0) current->brackets--; else state->syntax_errors++; break; case TOK_COMMA: if (current->fsm_state == InsideInitializer && current->brackets == 0 && current->blocks == 0) { /* initializer ended only if we * encountered a comma, and [] are * balanced. * This avoids switching state on: * var x = [4,y,u];*/ current->fsm_state = InsideVar; } break; case TOK_SEMICOLON: if (current->brackets == 0 && current->blocks == 0) { /* avoid switching state on unbalanced []: * var x = [test;testi]; */ current->fsm_state = Base; } break; case TOK_FUNCTION: current = scope_new(state); current->fsm_state = WaitFunctionName; TOKEN_SET(&val, scope, state->current); break; case TOK_StringLiteral: if(state->tokens.cnt > 1 && state->tokens.data[state->tokens.cnt-1].type == TOK_PLUS) { /* see if can fold */ yystype *prev_string = &state->tokens.data[state->tokens.cnt-2]; if(prev_string->type == TOK_StringLiteral) { char *str = TOKEN_GET(prev_string, string); size_t str_len = strlen(str); text = yyget_text(state->scanner); leng = yyget_leng(state->scanner); /* delete TOK_PLUS */ free_token(&state->tokens.data[--state->tokens.cnt]); str = cli_realloc(str, str_len + leng + 1); if (!str) break; strncpy(str+str_len, text, leng); str[str_len + leng] = '\0'; TOKEN_SET(prev_string, string, str); free(val.val.string); memset(&val, 0, sizeof(val)); val.vtype = vtype_undefined; continue; } } break; } if(val.vtype == vtype_undefined) { text = yyget_text(state->scanner); TOKEN_SET(&val, string, cli_strdup(text)); abort(); } add_token(state, &val); current->last_token = yv; memset(&val, 0, sizeof(val)); val.vtype = vtype_undefined; } }