static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) { json_t *result; lex_scan(lex, error); if(!(flags & JSON_DECODE_ANY)) { if(lex->token != '[' && lex->token != '{') { error_set(error, lex, "'[' or '{' expected"); return NULL; } } result = parse_value(lex, flags, error); if(!result) return NULL; if(!(flags & JSON_DISABLE_EOF_CHECK)) { lex_scan(lex, error); if(lex->token != TOKEN_EOF) { error_set(error, lex, "end of file expected"); json_decref(result); return NULL; } } if(error) { /* Save the position even though there was no error */ error->position = lex->stream.position; } return result; }
static json_t *parse_json(lex_t *lex, size_t flags, json_error_t *error) { json_t *result; lex_scan(lex, error); if(lex->token != '[' && lex->token != '{') { error_set(error, lex, "'[' or '{' expected"); return NULL; } result = parse_value(lex, flags, error); if(!result) return NULL; if(!(flags & JSON_DISABLE_EOF_CHECK)) { lex_scan(lex, error); if(lex->token != TOKEN_EOF) { error_set(error, lex, "end of file expected"); json_decref(result); result = NULL; } } return result; }
ZEND_API void zend_strip(void) { zval token; int token_type; int prev_space = 0; ZVAL_UNDEF(&token); while ((token_type=lex_scan(&token))) { switch (token_type) { case T_WHITESPACE: if (!prev_space) { zend_write(" ", sizeof(" ") - 1); prev_space = 1; } /* lack of break; is intentional */ case T_COMMENT: case T_DOC_COMMENT: ZVAL_UNDEF(&token); continue; case T_END_HEREDOC: zend_write((char*)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); /* read the following character, either newline or ; */ if (lex_scan(&token) != T_WHITESPACE) { zend_write((char*)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); } zend_write("\n", sizeof("\n") - 1); prev_space = 1; ZVAL_UNDEF(&token); continue; default: zend_write((char*)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); break; } if (Z_TYPE(token) == IS_STRING) { switch (token_type) { case T_OPEN_TAG: case T_OPEN_TAG_WITH_ECHO: case T_CLOSE_TAG: case T_WHITESPACE: case T_COMMENT: case T_DOC_COMMENT: break; default: zend_string_release(Z_STR(token)); break; } } prev_space = 0; ZVAL_UNDEF(&token); } /* Discard parse errors thrown during tokenization */ zend_clear_exception(); }
json_t *json_loads(const char *string, json_error_t *error) { lex_t lex; json_t *result; string_data_t stream_data = { .data = string, .pos = 0 }; if(lex_init(&lex, string_get, string_eof, (void *)&stream_data)) return NULL; result = parse_json(&lex, error); if(!result) goto out; lex_scan(&lex, error); if(lex.token != TOKEN_EOF) { error_set(error, &lex, "end of file expected"); json_decref(result); result = NULL; } out: lex_close(&lex); return result; } json_t *json_loadf(FILE *input, json_error_t *error) { lex_t lex; json_t *result; if(lex_init(&lex, (get_func)fgetc, (eof_func)feof, input)) return NULL; result = parse_json(&lex, error); if(!result) goto out; lex_scan(&lex, error); if(lex.token != TOKEN_EOF) { error_set(error, &lex, "end of file expected"); json_decref(result); result = NULL; } out: lex_close(&lex); return result; }
json_t *json_loads(const char *string, size_t flags, json_error_t *error) { lex_t lex; json_t *result; string_data_t stream_data = {string, 0}; (void)flags; /* unused */ if(lex_init(&lex, string_get, (void *)&stream_data)) return NULL; jsonp_error_init(error, "<string>"); result = parse_json(&lex, error); if(!result) goto out; lex_scan(&lex, error); if(lex.token != TOKEN_EOF) { error_set(error, &lex, "end of file expected"); json_decref(result); result = NULL; } out: lex_close(&lex); return result; }
json_t *json_loadf(FILE *input, size_t flags, json_error_t *error) { lex_t lex; const char *source; json_t *result; (void)flags; /* unused */ if(lex_init(&lex, (get_func)fgetc, input)) return NULL; if(input == stdin) source = "<stdin>"; else source = "<stream>"; jsonp_error_init(error, source); result = parse_json(&lex, error); if(!result) goto out; lex_scan(&lex, error); if(lex.token != TOKEN_EOF) { error_set(error, &lex, "end of file expected"); json_decref(result); result = NULL; } out: lex_close(&lex); return result; }
json_t *json_loads(const char *string, json_error_t *error) { lex_t lex; json_t *result; string_data_t stream_data = { /*.data = */string, /*.pos = */0 }; if(lex_init(&lex, string_get, string_eof, (void *)&stream_data)) return NULL; result = parse_json(&lex, error); if(!result) goto out; lex_scan(&lex, error); if(lex.token != TOKEN_EOF) { error_set(error, &lex, "end of file expected"); json_decref(result); result = NULL; } out: lex_close(&lex); return result; }
static json_t *parse_json(lex_t *lex, json_error_t *error) { lex_scan(lex, error); if(lex->token != '[' && lex->token != '{') { error_set(error, lex, "'[' or '{' expected"); return NULL; } return parse_value(lex, error); }
static json_t *parse_array(lex_t *lex, size_t flags, json_error_t *error) { json_t *array = json_array(); if(!array) return NULL; lex_scan(lex, error); if(lex->token == ']') return array; while(lex->token) { json_t *elem = parse_value(lex, flags, error); if(!elem) goto error; if(json_array_append(array, elem)) { json_decref(elem); goto error; } json_decref(elem); lex_scan(lex, error); if(lex->token != ',') break; lex_scan(lex, error); } if(lex->token != ']') { error_set(error, lex, "']' expected"); goto error; } return array; error: json_decref(array); return NULL; }
static json_ref parse_array(lex_t *lex, size_t flags, json_error_t *error) { auto array = json_array(); if(!array) return nullptr; lex_scan(lex, error); if(lex->token == ']') return array; while(lex->token) { auto elem = parse_value(lex, flags, error); if(!elem) goto error; if(json_array_append(array, elem)) { goto error; } lex_scan(lex, error); if(lex->token != ',') break; lex_scan(lex, error); } if(lex->token != ']') { error_set(error, lex, "']' expected"); goto error; } return array; error: return nullptr; }
int read_next_token(TokenCacheManager *tcm, Token **token, pval *phplval) { /* YY_TLS_VARS; TLS_VARS; */ if (GLOBAL(tc)->count == GLOBAL(tc)->pos || GLOBAL(php3_display_source)) { /* we need to read from the lexical scanner */ Token next_token; phplval->type = IS_LONG; /* the lex scanner doesn't always set phplval->type, make sure the type is not 'dirty' */ phplval->cs_data.switched = 0; next_token.token_type = lex_scan(phplval); /*printf("Read token: %c (%d)\n",next_token.token_type,next_token.token_type);*/ if (next_token.token_type == DONE_EVAL) { return DONE_EVAL; } if (GLOBAL(tc)->count >= GLOBAL(tc)->max_tokens) { /* we need more memory */ GLOBAL(tc)->tokens = (Token *) erealloc(GLOBAL(tc)->tokens, (sizeof(Token) * (GLOBAL(tc)->max_tokens += GLOBAL(tc)->block_size))); if (!GLOBAL(tc)->tokens) { return FAILURE; } } next_token.phplval = *phplval; next_token.lineno = GLOBAL(phplineno); next_token.phplval.offset = (tcm->active<<TOKEN_BITS) + GLOBAL(tc)->count; /* ugly hack to support $keyword */ if (last_token_suggests_variable_reference() && is_reserved_word(next_token.token_type)) { next_token.phplval.value.str.val = estrndup(phptext,phpleng); next_token.phplval.value.str.len = phpleng; next_token.phplval.type = IS_STRING; next_token.token_type = STRING; } /* end of ugly hack */ last_token_type=next_token.token_type; if (GLOBAL(php3_display_source)) { syntax_highlight(&next_token); *token = &next_token; return (*token)->token_type; } GLOBAL(tc)->tokens[GLOBAL(tc)->count] = next_token; GLOBAL(tc)->count++; } *token = &GLOBAL(tc)->tokens[GLOBAL(tc)->pos++]; return (*token)->token_type; }
json_t *json_loadf(FILE *input, json_error_t *error) { lex_t lex; json_t *result; if(lex_init(&lex, (get_func)fgetc, (eof_func)feof, input)) return NULL; result = parse_json(&lex, error); if(!result) goto out; lex_scan(&lex, error); if(lex.token != TOKEN_EOF) { error_set(error, &lex, "end of file expected"); json_decref(result); result = NULL; } out: lex_close(&lex); return result; }
static z_token lex_scan(z_lexstate *ls) { if(ls->cur.eof) { lex_save(ls); return lex_newToken(ls, T_EOF, 0); } if(isWhite(nc)) { while(isWhite(nc)) { if(isNewLine(nc)) { lex_scanNewLine(ls); return lex_newToken(ls, T_NL, tk_generic); } else lex_nextchar(ls); } return lex_scan(ls); } /* line comment */ if(nc == '/') { if(nnc == '/') { while(!isNewLine(nc)) lex_nextchar(ls); return lex_scan(ls); } } /* multi line comment */ if(nc == '/') { if(nnc == '*') { lex_nextchar(ls); lex_nextchar(ls); for(;;) { if(ls->cur.eof) { syntaxError(ls, "unterminated comment reached end of file"); break; } else if(nc == '*') { lex_nextchar(ls); if(nc == '/') { lex_nextchar(ls); return lex_scan(ls); } } else if(isNewLine(nc)) { lex_scanNewLine(ls); } else lex_nextchar(ls); } } } lex_save(ls); /* numerical constants */ if(isDigit(nc)) { parse_number: while(isDigit(nc)) lex_nextchar(ls); if(nc == '.') { lex_nextchar(ls); while(isDigit(nc)) lex_nextchar(ls); if(nc == '.') syntaxError(ls, "invalid numerical constant"); } return lex_newToken(ls, T_NUMBER, tk_numeric); } /* identifiers */ else if(isAlpha(nc)) { parse_ident: while(isAlNum(nc) || nc == '_') lex_nextchar(ls); /* check if it matches a keyword token */ z_token tk = lex_newToken(ls, T_IDENT, tk_identifier); lex_matchKeyword(ls, &tk); return tk; } /* string literals */ else if(nc == '"' || nc == '\''){ //parse_string: char q = nc; lex_nextchar(ls); while(nc != q) { if(ls->cur.eof) { syntaxError(ls, "unterminated string literal reached end of file"); break; } /* skip escaped chars */ if(nc == '\\') { lex_nextchar(ls); continue; } if(isNewLine(nc)) { lex_scanNewLine(ls); } lex_nextchar(ls); } lex_nextchar(ls); // skip the closing cc return lex_newToken(ls, T_STRING, tk_string); } /* other multi char tokens */ switch(nc) { case '.': // may be numeric? lex_nextchar(ls); if(isDigit(nc)) goto parse_number; return lex_newToken(ls, '.', 0); case '_': // may be ident? lex_nextchar(ls); if(isAlNum(nc)) goto parse_ident; return lex_newToken(ls, '_', 0); case '+': lex_nextchar(ls); if(nc == '+') { lex_nextchar(ls); return lex_newToken(ls, T_INC, tk_op); } else if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_AA, tk_op); } return lex_newToken(ls, '+', tk_op); case '-': lex_nextchar(ls); if(nc == '-') { lex_nextchar(ls); return lex_newToken(ls, T_DEC, tk_op); } else if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_SA, tk_op); } return lex_newToken(ls, '-', tk_op); case '*': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_MA, tk_op); } return lex_newToken(ls, '*', tk_op); case '/': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_DA, tk_op); } return lex_newToken(ls, '/', tk_op); case '>': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_GTE, tk_op); } return lex_newToken(ls, '>', tk_op); case '<': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_LTE, tk_op); } else if(nc == '>') { lex_nextchar(ls); return lex_newToken(ls, T_NE, tk_op); } return lex_newToken(ls, '<', tk_op); case '=': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_EQ, tk_op); } return lex_newToken(ls, '=', tk_op); case '&': lex_nextchar(ls); if(nc == '&') { lex_nextchar(ls); return lex_newToken(ls, T_AND, tk_op); } return lex_newToken(ls, '&', tk_op); case '|': lex_nextchar(ls); if(nc == '|') { lex_nextchar(ls); return lex_newToken(ls, T_OR, tk_op); } return lex_newToken(ls, '|', tk_op); case '^': lex_nextchar(ls); if(nc == '^') { lex_nextchar(ls); return lex_newToken(ls, T_XOR, tk_op); } return lex_newToken(ls, '^', tk_op); case '!': lex_nextchar(ls); return lex_newToken(ls, T_NOT, tk_op); case ':': lex_nextchar(ls); if(nc == '=') { lex_nextchar(ls); return lex_newToken(ls, T_DE, tk_op); } return lex_newToken(ls, ':', 0); } char c = nc; lex_nextchar(ls); return lex_newToken(ls, c, 0); }
static json_t *parse_object(lex_t *lex, size_t flags, json_error_t *error) { json_t *object = json_object(); if(!object) return NULL; lex_scan(lex, error); if(lex->token == '}') return object; while(1) { char *key; json_t *value; if(lex->token != TOKEN_STRING) { error_set(error, lex, "string or '}' expected"); goto error; } key = lex_steal_string(lex); if(!key) return NULL; if(flags & JSON_REJECT_DUPLICATES) { if(json_object_get(object, key)) { jsonp_free(key); error_set(error, lex, "duplicate object key"); goto error; } } lex_scan(lex, error); if(lex->token != ':') { jsonp_free(key); error_set(error, lex, "':' expected"); goto error; } lex_scan(lex, error); value = parse_value(lex, flags, error); if(!value) { jsonp_free(key); goto error; } if(json_object_set_nocheck(object, key, value)) { jsonp_free(key); json_decref(value); goto error; } json_decref(value); jsonp_free(key); lex_scan(lex, error); if(lex->token != ',') break; lex_scan(lex, error); } if(lex->token != '}') { error_set(error, lex, "'}' expected"); goto error; } return object; error: json_decref(object); return NULL; }
static zend_bool tokenize(zval *return_value, zend_string *source) { zval source_zval; zend_lex_state original_lex_state; zval token; zval keyword; int token_type; int token_line = 1; int need_tokens = -1; /* for __halt_compiler lexing. -1 = disabled */ ZVAL_STR_COPY(&source_zval, source); zend_save_lexical_state(&original_lex_state); if (zend_prepare_string_for_scanning(&source_zval, "") == FAILURE) { zend_restore_lexical_state(&original_lex_state); return 0; } LANG_SCNG(yy_state) = yycINITIAL; array_init(return_value); ZVAL_UNDEF(&token); while ((token_type = lex_scan(&token))) { if (token_type == T_CLOSE_TAG && zendtext[zendleng - 1] != '>') { CG(zend_lineno)++; } if (token_type >= 256) { array_init(&keyword); add_next_index_long(&keyword, token_type); if (token_type == T_END_HEREDOC) { if (CG(increment_lineno)) { token_line = ++CG(zend_lineno); CG(increment_lineno) = 0; } } add_next_index_stringl(&keyword, (char *)zendtext, zendleng); add_next_index_long(&keyword, token_line); add_next_index_zval(return_value, &keyword); } else { add_next_index_stringl(return_value, (char *)zendtext, zendleng); } if (Z_TYPE(token) != IS_UNDEF) { zval_dtor(&token); ZVAL_UNDEF(&token); } /* after T_HALT_COMPILER collect the next three non-dropped tokens */ if (need_tokens != -1) { if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG && token_type != T_COMMENT && token_type != T_DOC_COMMENT && --need_tokens == 0 ) { /* fetch the rest into a T_INLINE_HTML */ if (zendcursor != zendlimit) { array_init(&keyword); add_next_index_long(&keyword, T_INLINE_HTML); add_next_index_stringl(&keyword, (char *)zendcursor, zendlimit - zendcursor); add_next_index_long(&keyword, token_line); add_next_index_zval(return_value, &keyword); } break; } } else if (token_type == T_HALT_COMPILER) { need_tokens = 3; } token_line = CG(zend_lineno); } zval_dtor(&source_zval); zend_restore_lexical_state(&original_lex_state); return 1; }
static json_t *parse_object(lex_t *lex, json_error_t *error) { json_t *object = json_object(); if(!object) return NULL; lex_scan(lex, error); if(lex->token == '}') return object; while(1) { char *key; json_t *value; if(lex->token != TOKEN_STRING) { error_set(error, lex, "string or '}' expected"); goto error; } key = lex_steal_string(lex); if(!key) return NULL; lex_scan(lex, error); if(lex->token != ':') { free(key); error_set(error, lex, "':' expected"); goto error; } lex_scan(lex, error); value = parse_value(lex, error); if(!value) { free(key); goto error; } if(json_object_set_nocheck(object, key, value)) { free(key); json_decref(value); goto error; } json_decref(value); free(key); lex_scan(lex, error); if(lex->token != ',') break; lex_scan(lex, error); } if(lex->token != '}') { error_set(error, lex, "'}' expected"); goto error; } return object; error: json_decref(object); return NULL; }
ZEND_API void zend_highlight(zend_syntax_highlighter_ini *syntax_highlighter_ini) { zval token; int token_type; char *last_color = syntax_highlighter_ini->highlight_html; char *next_color; zend_printf("<code>"); zend_printf("<span style=\"color: %s\">\n", last_color); /* highlight stuff coming back from zendlex() */ ZVAL_UNDEF(&token); while ((token_type=lex_scan(&token))) { switch (token_type) { case T_INLINE_HTML: next_color = syntax_highlighter_ini->highlight_html; break; case T_COMMENT: case T_DOC_COMMENT: next_color = syntax_highlighter_ini->highlight_comment; break; case T_OPEN_TAG: case T_OPEN_TAG_WITH_ECHO: case T_CLOSE_TAG: case T_LINE: case T_FILE: case T_DIR: case T_TRAIT_C: case T_METHOD_C: case T_FUNC_C: case T_NS_C: case T_CLASS_C: next_color = syntax_highlighter_ini->highlight_default; break; case '"': case T_ENCAPSED_AND_WHITESPACE: case T_CONSTANT_ENCAPSED_STRING: next_color = syntax_highlighter_ini->highlight_string; break; case T_WHITESPACE: zend_html_puts((char*)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); /* no color needed */ ZVAL_UNDEF(&token); continue; break; default: if (Z_TYPE(token) == IS_UNDEF) { next_color = syntax_highlighter_ini->highlight_keyword; } else { next_color = syntax_highlighter_ini->highlight_default; } break; } if (last_color != next_color) { if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf("</span>"); } last_color = next_color; if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf("<span style=\"color: %s\">", last_color); } } zend_html_puts((char*)LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); if (Z_TYPE(token) == IS_STRING) { switch (token_type) { case T_OPEN_TAG: case T_OPEN_TAG_WITH_ECHO: case T_CLOSE_TAG: case T_WHITESPACE: case T_COMMENT: case T_DOC_COMMENT: break; default: zend_string_release(Z_STR(token)); break; } } ZVAL_UNDEF(&token); } if (last_color != syntax_highlighter_ini->highlight_html) { zend_printf("</span>\n"); } zend_printf("</span>\n"); zend_printf("</code>"); /* Discard parse errors thrown during tokenization */ zend_clear_exception(); }
static void tokenize(zval *return_value) { zval token; zval keyword; int token_type; zend_bool destroy; int token_line = 1; int need_tokens = -1; // for __halt_compiler lexing. -1 = disabled array_init(return_value); ZVAL_NULL(&token); while ((token_type = lex_scan(&token))) { destroy = 1; switch (token_type) { case T_CLOSE_TAG: if (zendtext[zendleng - 1] != '>') { CG(zend_lineno)++; } case T_OPEN_TAG: case T_OPEN_TAG_WITH_ECHO: case T_WHITESPACE: case T_COMMENT: case T_DOC_COMMENT: destroy = 0; break; } if (token_type >= 256) { array_init(&keyword); add_next_index_long(&keyword, token_type); if (token_type == T_END_HEREDOC) { if (CG(increment_lineno)) { token_line = ++CG(zend_lineno); CG(increment_lineno) = 0; } } add_next_index_stringl(&keyword, (char *)zendtext, zendleng); add_next_index_long(&keyword, token_line); add_next_index_zval(return_value, &keyword); } else { add_next_index_stringl(return_value, (char *)zendtext, zendleng); } if (destroy && Z_TYPE(token) != IS_NULL) { zval_dtor(&token); } ZVAL_NULL(&token); // after T_HALT_COMPILER collect the next three non-dropped tokens if (need_tokens != -1) { if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG && token_type != T_COMMENT && token_type != T_DOC_COMMENT && --need_tokens == 0 ) { // fetch the rest into a T_INLINE_HTML if (zendcursor != zendlimit) { array_init(&keyword); add_next_index_long(&keyword, T_INLINE_HTML); add_next_index_stringl(&keyword, (char *)zendcursor, zendlimit - zendcursor); add_next_index_long(&keyword, token_line); add_next_index_zval(return_value, &keyword); } break; } } else if (token_type == T_HALT_COMPILER) { need_tokens = 3; } token_line = CG(zend_lineno); } }
ZEND_API void zend_indent() { zval token; int token_type; int in_string=0; int nest_level=0; int emit_whitespace[256]; int i; TSRMLS_FETCH(); memset(emit_whitespace, 0, sizeof(int)*256); /* highlight stuff coming back from zendlex() */ token.type = 0; while ((token_type=lex_scan(&token TSRMLS_CC))) { switch (token_type) { case T_INLINE_HTML: zend_write(LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); break; case T_WHITESPACE: { token.type = 0; /* eat whitespace, emit newlines */ for (i=0; i<LANG_SCNG(yy_leng); i++) { emit_whitespace[(unsigned char) LANG_SCNG(yy_text)[i]]++; } continue; } break; case '"': in_string = !in_string; /* break missing intentionally */ default: if (token.type==0) { /* keyword */ switch (token_type) { case ',': ZEND_PUTS(", "); goto dflt_printout; break; case '{': nest_level++; if (emit_whitespace['\n']>0) { ZEND_PUTS(" {\n"); memset(emit_whitespace, 0, sizeof(int)*256); } else { ZEND_PUTS("{"); } break; case '}': nest_level--; if (emit_whitespace['\n']==0) { ZEND_PUTS("\n"); } for (i=0; i<nest_level; i++) { ZEND_PUTS(" "); } goto dflt_printout; break; dflt_printout: default: if (emit_whitespace['\n']>0) { for (i=0; i<emit_whitespace['\n']; i++) { ZEND_PUTS("\n"); } memset(emit_whitespace, 0, sizeof(int)*256); for (i=0; i<nest_level; i++) { ZEND_PUTS(" "); } } else { handle_whitespace(emit_whitespace); } zend_write(LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); break; } } else { handle_whitespace(emit_whitespace); if (in_string) { zend_write(LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); /* a part of a string */ } else { zend_write(LANG_SCNG(yy_text), LANG_SCNG(yy_leng)); } } break; } if (token.type == IS_STRING) { switch (token_type) { case T_OPEN_TAG: case T_CLOSE_TAG: case T_WHITESPACE: break; default: efree(token.value.str.val); break; } } token.type = 0; } }
void lex_nexttoken(z_lexstate *ls) { ls->token = lex_scan(ls); }