// Should only be called when we actually have the start of an array // Otherwise it is an internal error bool JSON::parse_json_array() { NOT_PRODUCT(const char* prev_pos); int c; mark_pos(); // Check that we are not called in error if (expect_any("[", "array start character", INTERNAL_ERROR) <= 0) { return false; } if (!callback(JSON_ARRAY_BEGIN, NULL, level++)) { return false; } for (;;) { mark_pos(); c = skip_to_token(); if (c == 0) { error(SYNTAX_ERROR, "EOS when expecting a json value or array end"); return false; } else if (c < 0) { return false; } else if (c == ']') { // We got here from either empty array "[]" or ending comma "[1,]" next(); break; } mark_pos(); NOT_PRODUCT(prev_pos = pos); if (parse_json_value() == false) { return false; } assert(pos > prev_pos, "parsing stalled"); c = skip_to_token(); mark_pos(); if (expect_any(",]", "value separator or array end") <= 0) { return false; } if (c == ']') { break; } } assert(c == ']', "array parsing ended without array end token (']')"); return callback(JSON_ARRAY_END, NULL, --level); }
//------------------------------------------------------------------- // Process one entry "@default VAR VALUE" //------------------------------------------------------------------- static const char* get_default(sc_param *p, const char *ptr, int isScript) { ptr = skip_to_token(ptr); if (p) { int type = MENUITEM_INT|MENUITEM_SCRIPT_PARAM; int range = 0; if (strncmp(ptr, "true", 4) == 0) { p->val = 1; type = MENUITEM_BOOL|MENUITEM_SCRIPT_PARAM; range = MENU_MINMAX(1,0); // Force boolean data type in Lua (ToDo: this is clunky, needs fixing) } else if (strncmp(ptr, "false", 5) == 0) { p->val = 0; type = MENUITEM_BOOL|MENUITEM_SCRIPT_PARAM; range = MENU_MINMAX(1,0); // Force boolean data type in Lua (ToDo: this is clunky, needs fixing) } else { p->val = strtol(ptr, NULL, 0); } p->old_val = p->val; if (isScript) // Loading from script file (rather than saved param set file) { p->def_val = p->val; p->range = range; p->range_type = type; } } return skip_token(ptr); }
int htmlParse::skip_ignore_part(stToken *tok, stTagProc *curr_tag) { stTagProc *this_tag; int prev_tok_val=0; int tmp=0; //fprintf(log,"--->IGNORE :%d :%d\n", tok->line_num, tok->offset); tmp = tok->line_num; while(htmlLex::get_token_mem(tok) != TOK_EOF) { if (tok->tok_val == CMNT_BEGIN) htmlLex::skip_to_cmnt_end(tok); else if (tok->tok_val == BTAG_BEGIN && curr_tag->pair==Opt_PAIR) { tok->offset --; break; } else if (tok->tok_val == ETAG_BEGIN) { if (curr_tag->pair==Must_PAIR) { skip_to_two_token(tok, STRING, TAG_END); if (tok->tok_val == STRING) { this_tag = htmlTagEntity::tag_idx(tok->tok_str); //fprintf(log,"%s<--:%d :%d\n", tok->tok_str, tok->line_num, tok->offset ); if (curr_tag==this_tag) { skip_to_token(tok, TAG_END); break; } } } else { // Pot_PAIR skip_to_token(tok, TAG_END); break; } } else if (tok->tok_val == STRING) { this_tag = htmlTagEntity::tag_idx(tok->tok_str); } prev_tok_val = tok->tok_val; } // //fprintf(log,"<--- IGNORE :%d :%d [%d]\n", tok->line_num, tok->offset, tok->line_num - tmp); return 0; }
bool JSON::parse_json_value() { int c; c = skip_to_token(); if (c == -1) { return false; } // Must start with object or array if (level == 0) { switch (c) { case '{': if (parse_json_object() == false) { return false; } c = skip_to_token(); if (c > 0) { mark_pos(); error(SYNTAX_ERROR, "Only one top level object/array is allowed."); return false; } else if (c < 0) { return false; } return true; case '[': if (parse_json_array() == false) { return false; } c = skip_to_token(); if (c > 0) { mark_pos(); error(SYNTAX_ERROR, "Only one top level object/array is allowed."); return false; } else if (c < 0) { return false; } return true; case 0: error(SYNTAX_ERROR, "EOS was encountered before any json declarations"); return false; default: error(SYNTAX_ERROR, "Json must start with an object or an array."); return false; } } else { // level > 0 switch (c) { case '{': return parse_json_object(); case '[': return parse_json_array(); case '"': return parse_json_string(); case '-': case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return parse_json_number(); case 't': return parse_json_symbol("true", JSON_TRUE); case 'f': return parse_json_symbol("false", JSON_FALSE); case 'n': return parse_json_symbol("null", JSON_NULL); case 0: error(SYNTAX_ERROR, "EOS was encountered when expecting a json value."); return false; default: error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?)."); return false; } } }
// Should only be called when we actually have the start of an object // Otherwise it is an internal error bool JSON::parse_json_object() { NOT_PRODUCT(const char* prev_pos); int c; mark_pos(); // Check that we are not called in error if (expect_any("{", "object start", INTERNAL_ERROR) <= 0) { return false; } if (!callback(JSON_OBJECT_BEGIN, NULL, level++)) { return false; } for (;;) { mark_pos(); c = skip_to_token(); if (c == 0) { error(SYNTAX_ERROR, "EOS when expecting an object key or object end"); return false; } else if (c < 0) { return false; } else if (c == '}') { // We got here from either empty object "{}" or ending comma "{a:1,}" next(); break; } NOT_PRODUCT(prev_pos = pos); if (parse_json_key() == false) { return false; } assert(pos > prev_pos, "parsing stalled"); skip_to_token(); mark_pos(); if (expect_any(":", "object key-value separator") <= 0) { return false; } skip_to_token(); mark_pos(); NOT_PRODUCT(prev_pos = pos); if (parse_json_value() == false) { return false; } assert(pos > prev_pos, "parsing stalled"); c = skip_to_token(); mark_pos(); if (expect_any(",}", "value separator or object end") <= 0) { return false; } if (c == '}') { break; } } assert(c == '}', "array parsing ended without object end token ('}')"); return callback(JSON_OBJECT_END, NULL, --level); }
/** HTML 문서를 parsing 하면서 text로 변환. */ int htmlParse::parse(stToken *tok, char *text, int maxlen) { int tok_val=0, prev_tok_val=0, prev_ch=0; int is_in_tag=0; uint4 tmp=0; int not_moved=0; char *start=text; stTagProc *curr_tag=0; //stTagProc **prev_tag=0; stEntityProc *ent_proc; int tag_depth=0; int is_in_BODY_tag=0; int is_in_A_tag=0; int is_in_PRE_tag=0; // 2003.11.8 int hlink_chars = 0; //int newline_delayed=0; // Optional Pair && Newline tag가 시작한 경우 TRUE //int dont_newline=0; // TRUE means "dont write newline" int pair_mark=0; int words_in_line=0; int b_tag_first_in_line=0; // 라인 첫머리에 B tag가 나온 경우 int glossary_marked=0; int is_glossary=0; tok->offset = 0; tok->line_num=1; *text = 0; maxlen -= 20; while(tok->offset < tok->src_len && (int)((uint4)text-(uint4)start) < maxlen) { tmp = tok->offset; htmlLex::get_token_mem(tok); #ifdef BUG if (tok->tok_len>=20) { printf("long tok: line=%d, tok_val=%d, CH=%c\n", tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]); fflush(stdout); printf("tok=%s\n", tok->tok_str); fflush(stdout); } #endif #ifdef DEB /* if (tok->tok_val==0 || tok->line_num==1) { printf("line=%d, tok_val=%d, CH=%c\n", tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]); printf("offset=%d, text-start=%d\n", tok->offset, text-start); } */ #endif if (text > start) { prev_ch = *(text-1); if (prev_ch=='\n') { words_in_line = 0; b_tag_first_in_line = 0; glossary_marked = 0; } } if (tok->tok_val == TOK_EOF) break; if (tok->tok_val == URL_CMNT) { //printf("%s", tok->tok_str); continue; } if (tok->offset==tmp) { #ifdef BUG //printf("parse(): offset not moved: line=%d, CH=%d(%c)\n", // tok->line_num, tok->src_mem[tok->offset-1], tok->src_mem[tok->offset-1]); #endif tok->offset++; if (++not_moved >= 2) break; else continue; } tok_val = tok->tok_val; if (tok_val == BTAG_BEGIN) { skip_to_two_token(tok, STRING, TAG_END); if (tok->tok_val != STRING) { // must be a HTML tag skip_to_token(tok, TAG_END); continue; } //prn_tabs(); //fprintf(log,"%s-->:%d :%d\n", tok->tok_str, tok->line_num, tok->offset); curr_tag = htmlTagEntity::tag_idx(tok->tok_str); if (curr_tag == NULL) { skip_to_token(tok, TAG_END); continue; } tag_depth++; #ifdef MAIN_TEXT_ONLY if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag) push_tag(curr_tag, tag_depth, (int)((uint4)text-(uint4)start)); if (curr_tag==htmlTagEntity::BODY_tag) { is_in_BODY_tag = 1; } #endif if (curr_tag==htmlTagEntity::A_tag) { is_in_A_tag = 1; } else if (curr_tag==htmlTagEntity::PRE_tag) { is_in_PRE_tag = 1; } else if(curr_tag==htmlTagEntity::B_tag||curr_tag==htmlTagEntity::DT_tag) { if (words_in_line==0) b_tag_first_in_line = 1; } else if (curr_tag==htmlTagEntity::TITLE_tag) { get_hyperlink_title(tok->src_mem + tok->offset , htmlTitle, HTMLTITLE_LEN); #ifdef DEB PRN("tok->scr_mem=%X, offset=%d, title=%X\n", tok->src_mem, tok->offset, htmlTitle); PRN("title: %s (%d)\n", htmlTitle, strlen(htmlTitle) ); #endif is_glossary = is_glossay_mode(htmlTitle); } else if (curr_tag==htmlTagEntity::BR_tag) { *text++ = '\n'; } if (curr_tag->proc == 0) { // 0 if (curr_tag->pair != Not_PAIR) skip_ignore_part(tok, curr_tag); else skip_to_token(tok, TAG_END); if (curr_tag->newline) { if (prev_ch != '\n') *text++ = '\n'; } continue; } if (curr_tag==htmlTagEntity::TITLE_tag) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '['; *text++ = '['; } else if (curr_tag->proc == 1) { } else if (!pair_mark && curr_tag->proc == 2) { pair_mark = 2; *text++ = '<'; } else if (!pair_mark && curr_tag->proc == 3) { pair_mark = 3; *text++ = '\''; } else if (!pair_mark && curr_tag->proc == 4) { pair_mark = 4; *text++ = '['; } else if (curr_tag->proc == 5) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '*'; *text++ = ' '; words_in_line = 0; } else if (curr_tag->proc == 6) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '\n'; } skip_to_token(tok, TAG_END); if (!pair_mark && prev_ch != ' ') *text++ = ' '; //if (!ISSPACE(*text)) *text++ = ' '; }// BTAG_BEGIN '<' else if (tok_val == ETAG_BEGIN) { skip_to_two_token(tok, STRING, TAG_END); if (tok->tok_val != STRING) { // must be a HTML tag skip_to_token(tok, TAG_END); continue; } curr_tag = htmlTagEntity::tag_idx(tok->tok_str); if (curr_tag == NULL) { skip_to_token(tok, TAG_END); continue; } #ifdef MAIN_TEXT_ONLY tag_depth--; if (curr_tag==htmlTagEntity::BODY_tag) { is_in_BODY_tag = 0; } if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag) end_tag(curr_tag, (int)((uint4)text-(uint4)start)); #endif if (curr_tag==htmlTagEntity::A_tag) { is_in_A_tag = 0; #ifdef MARK_A_LINK *text++ = '}'; #endif } else if (curr_tag==htmlTagEntity::PRE_tag) { is_in_PRE_tag = 0; } else if(curr_tag==htmlTagEntity::DT_tag || (curr_tag==htmlTagEntity::B_tag && (b_tag_first_in_line && is_glossary))) { if (glossary_marked==0) { *text++ = ' '; *text++ = ':'; //*text++ = ' '; b_tag_first_in_line = 0; glossary_marked = 1; } } else if (curr_tag==htmlTagEntity::TITLE_tag) { *text++ = ']'; *text++ = ']'; *text++ = '\n'; } else if (curr_tag->proc == 1) { } else if (pair_mark==2 && curr_tag->proc == 2) { pair_mark = 0; *text++ = '>'; } else if (pair_mark==3 && curr_tag->proc == 3) { pair_mark = 0; *text++ = '\''; } else if (pair_mark==4 && curr_tag->proc == 4) { pair_mark = 0; *text++ = ']'; } else if (curr_tag->proc == 5) { } else if (curr_tag->proc == 6) { if (prev_ch != '\n') *text++ = '\n'; *text++ = '\n'; } if (curr_tag->newline) { *text++ = '\n'; } skip_to_token(tok, TAG_END); }// ETAG_BEGIN '</' else if (tok_val == STAG_END) { skip_to_token(tok, TAG_END); is_in_tag = 0; tag_depth--; } else if (tok_val == TAG_EXC) { tmp = skip_to_token(tok, TAG_END); //fprintf(log,"<- > skipped %d\n", tmp); } else if (tok_val == CMNT_BEGIN) { //fprintf(log,"Cmnt -->: %d :%d\n", tok->line_num, tok->offset); htmlLex::skip_to_cmnt_end(tok); //fprintf(log,"Cmnt <--: %d :%d\n", tok->line_num, tok->offset); } else if (tok_val == ENTITY_STR) { if (prev_ch !=' ') *text++ = ' '; //2002.12.2 ent_proc = htmlTagEntity::entity_idx(tok->tok_str); if (ent_proc && ent_proc->conv[0]) { #ifdef BUG //prn_ent_proc(ent_proc); #endif strcpy(text, ent_proc->conv); text += strlen(ent_proc->conv); } else { //fprintf(log,"ignored entity = %s\n", tok->tok_str); } } else if (tok_val == ENTITY_NUM) { if (prev_ch !=' ') *text++ = ' '; //2002.12.2 ent_proc = htmlTagEntity::entity_id_idx(tok->tok_realval); if (ent_proc && ent_proc->conv[0]) { #ifdef BUG //prn_ent_proc(ent_proc); #endif strcpy(text, ent_proc->conv); text += strlen(ent_proc->conv); } else { if (tok->tok_realval < 0x80) { // 2005.7.19 *text = (char)tok->tok_realval ; text++; } } } else { char *t = text; if (is_in_PRE_tag || tok->tok_len>1 || tok_val == STRING || tok_val == NUMBER) { //if (prev_tok_val == TAG_END && *text != ' ') *text++ = ' '; // 2003. 3.20 //if (prev_tok_val == TAG_END && isalnum(prev_ch) ) // *text++ = ' '; words_in_line++; strcpy(text, tok->tok_str); text += tok->tok_len; } // 2002.10.16 한글은 space로 전환 else if (tok->tok_val == FR_STR) { if (do_prn_hangul) { if (prev_ch != ' ') *text++ = ' '; strcpy(text, tok->tok_str); text += tok->tok_len; words_in_line++; } else { if (prev_ch != ' ') *text++ = ' '; } } else if (tok->tok_len==1) { if (curr_tag==htmlTagEntity::PRE_tag) { #ifdef ODD_CHAR if ( (tok_val & 0xF0) != 0x90) *text++ = tok_val; else if (tok_val==0x92) { if (prev_tok_val != 0x92) *text++ = '\''; } #else *text++ = tok_val; #endif } //else if (tok_val == '\n' || tok_val == '\r') { else if (ISSPACE(tok_val)) { if (prev_ch != ' ') *text++ = ' '; } else { if (prev_ch==':' && tok_val==':') { } else *text++ = tok_val; } } if (is_in_A_tag) hlink_chars += (int)(text - t); } prev_tok_val = tok->tok_val; }// while(1) *text = 0; if ((int)(text-start) >= maxlen-5) { PRN("parse(): too far !! maxlen=%d, %d\n", maxlen, text-start); } if ((int)(text-start) < maxlen-5) memset(text, 0, 4); #ifdef DEB PRN("text=%X, start=%X, text=%d, start=%d\n", text, start, (int)text % 10000, (int)start % 10000); PRN("start[0]=%d text[0]=%d\n", start[0], text[0]); #endif return ((int)text-(int)start); }