Example #1
0
// Should only be called when we actually have the start of an array
// Otherwise it is an internal error
bool JSON::parse_json_array() {
  NOT_PRODUCT(const char* prev_pos);
  int c;

  mark_pos();
  // Check that we are not called in error
  if (expect_any("[", "array start character", INTERNAL_ERROR) <= 0) {
    return false;
  }

  if (!callback(JSON_ARRAY_BEGIN, NULL, level++)) {
    return false;
  }

  for (;;) {
    mark_pos();
    c = skip_to_token();
    if (c == 0) {
      error(SYNTAX_ERROR, "EOS when expecting a json value or array end");
      return false;
    } else if (c < 0) {
      return false;
    } else if (c == ']') {
      // We got here from either empty array "[]" or ending comma "[1,]"
      next();
      break;
    }

    mark_pos();
    NOT_PRODUCT(prev_pos = pos);
    if (parse_json_value() == false) {
      return false;
    }
    assert(pos > prev_pos, "parsing stalled");

    c = skip_to_token();
    mark_pos();
    if (expect_any(",]", "value separator or array end") <= 0) {
      return false;
    }
    if (c == ']') {
      break;
    }
  }

  assert(c == ']', "array parsing ended without array end token (']')");
  return callback(JSON_ARRAY_END, NULL, --level);
}
Example #2
0
//-------------------------------------------------------------------
// Process one entry "@default VAR VALUE"
//-------------------------------------------------------------------
static const char* get_default(sc_param *p, const char *ptr, int isScript)
{
    ptr = skip_to_token(ptr);
    if (p)
    {
        int type = MENUITEM_INT|MENUITEM_SCRIPT_PARAM;
        int range = 0;
        if (strncmp(ptr, "true", 4) == 0)
        {
            p->val = 1;
            type = MENUITEM_BOOL|MENUITEM_SCRIPT_PARAM;
            range = MENU_MINMAX(1,0);   // Force boolean data type in Lua (ToDo: this is clunky, needs fixing)

        }
        else if (strncmp(ptr, "false", 5) == 0)
        {
            p->val = 0;
            type = MENUITEM_BOOL|MENUITEM_SCRIPT_PARAM;
            range = MENU_MINMAX(1,0);   // Force boolean data type in Lua (ToDo: this is clunky, needs fixing)
        }
        else
        {
            p->val = strtol(ptr, NULL, 0);
        }
        p->old_val = p->val;
        if (isScript)   // Loading from script file (rather than saved param set file)
        {
            p->def_val = p->val;
            p->range = range;
            p->range_type = type;
        }
    }
    return skip_token(ptr);
}
Example #3
0
int htmlParse::skip_ignore_part(stToken *tok, stTagProc *curr_tag)
{
	stTagProc *this_tag;
	int	prev_tok_val=0;
	int	tmp=0;
	//fprintf(log,"--->IGNORE :%d :%d\n", tok->line_num, tok->offset);
	tmp = tok->line_num;
	while(htmlLex::get_token_mem(tok) != TOK_EOF) {
		if (tok->tok_val == CMNT_BEGIN) htmlLex::skip_to_cmnt_end(tok);
		else if (tok->tok_val == BTAG_BEGIN && curr_tag->pair==Opt_PAIR) {
			tok->offset --;
			break;
		}
		else if (tok->tok_val == ETAG_BEGIN) {
			if (curr_tag->pair==Must_PAIR) {
				skip_to_two_token(tok, STRING, TAG_END);
				if (tok->tok_val == STRING) {
					this_tag = htmlTagEntity::tag_idx(tok->tok_str);
					//fprintf(log,"%s<--:%d :%d\n", tok->tok_str, tok->line_num, tok->offset );
					
					if (curr_tag==this_tag) {
						skip_to_token(tok, TAG_END);
						break;
					}
				}		
				
			}
			else { // Pot_PAIR
				skip_to_token(tok, TAG_END);
				break;
			}
		}
		else if (tok->tok_val == STRING) {
			this_tag = htmlTagEntity::tag_idx(tok->tok_str);
		}

		prev_tok_val = tok->tok_val;
	} //	
	//fprintf(log,"<--- IGNORE :%d :%d [%d]\n", tok->line_num, tok->offset, tok->line_num - tmp);
	return 0;	
}
Example #4
0
bool JSON::parse_json_value() {
  int c;

  c = skip_to_token();
  if (c == -1) {
    return false;
  }

  // Must start with object or array
  if (level == 0) {

    switch (c) {
    case '{':
      if (parse_json_object() == false) {
        return false;
      }
      c = skip_to_token();
      if (c > 0) {
        mark_pos();
        error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
        return false;
      } else if (c < 0) {
        return false;
      }
      return true;

    case '[':
      if (parse_json_array() == false) {
        return false;
      }
      c = skip_to_token();
      if (c > 0) {
        mark_pos();
        error(SYNTAX_ERROR, "Only one top level object/array is allowed.");
        return false;
      } else if (c < 0) {
        return false;
      }
      return true;

    case 0:
      error(SYNTAX_ERROR, "EOS was encountered before any json declarations");
      return false;

    default:
      error(SYNTAX_ERROR, "Json must start with an object or an array.");
      return false;
    }
  } else { // level > 0
    switch (c) {
    case '{':
      return parse_json_object();

    case '[':
      return parse_json_array();

    case '"':
      return parse_json_string();

    case '-': case '0':
    case '1': case '2': case '3':
    case '4': case '5': case '6':
    case '7': case '8': case '9':
      return parse_json_number();

    case 't':
      return parse_json_symbol("true", JSON_TRUE);

    case 'f':
      return parse_json_symbol("false", JSON_FALSE);

    case 'n':
      return parse_json_symbol("null", JSON_NULL);

    case 0:
      error(SYNTAX_ERROR, "EOS was encountered when expecting a json value.");
      return false;

    default:
      error(SYNTAX_ERROR, "Could not parse as a json value (did you forget to quote your strings?).");
      return false;
    }
  }
}
Example #5
0
// Should only be called when we actually have the start of an object
// Otherwise it is an internal error
bool JSON::parse_json_object() {
  NOT_PRODUCT(const char* prev_pos);
  int c;

  mark_pos();
  // Check that we are not called in error
  if (expect_any("{", "object start", INTERNAL_ERROR) <= 0) {
    return false;
  }

  if (!callback(JSON_OBJECT_BEGIN, NULL, level++)) {
    return false;
  }

  for (;;) {
    mark_pos();
    c = skip_to_token();
    if (c == 0) {
      error(SYNTAX_ERROR, "EOS when expecting an object key or object end");
      return false;
    } else if (c < 0) {
      return false;
    } else if (c == '}') {
      // We got here from either empty object "{}" or ending comma "{a:1,}"
      next();
      break;
    }

    NOT_PRODUCT(prev_pos = pos);
    if (parse_json_key() == false) {
      return false;
    }
    assert(pos > prev_pos, "parsing stalled");

    skip_to_token();
    mark_pos();
    if (expect_any(":", "object key-value separator") <= 0) {
      return false;
    }

    skip_to_token();
    mark_pos();
    NOT_PRODUCT(prev_pos = pos);
    if (parse_json_value() == false) {
      return false;
    }
    assert(pos > prev_pos, "parsing stalled");

    c = skip_to_token();
    mark_pos();
    if (expect_any(",}", "value separator or object end") <= 0) {
      return false;
    }
    if (c == '}') {
      break;
    }
  }

  assert(c == '}', "array parsing ended without object end token ('}')");
  return callback(JSON_OBJECT_END, NULL, --level);
}
Example #6
0
/**
HTML 문서를 parsing 하면서 text로 변환.
*/
int htmlParse::parse(stToken *tok, char *text, int maxlen)
{
	int	tok_val=0, prev_tok_val=0, prev_ch=0;
	int	is_in_tag=0;
	uint4	tmp=0;
	int	not_moved=0;
	char *start=text;

	stTagProc *curr_tag=0;
	//stTagProc **prev_tag=0;
	stEntityProc *ent_proc;
	int	tag_depth=0;
	int	is_in_BODY_tag=0;
	int	is_in_A_tag=0;
	int	is_in_PRE_tag=0; // 2003.11.8
	int	hlink_chars = 0;
	//int	newline_delayed=0; // Optional Pair && Newline tag가 시작한 경우 TRUE
	//int	dont_newline=0; // TRUE means "dont write newline"
	int	pair_mark=0;
	int	words_in_line=0;
	int	b_tag_first_in_line=0; // 라인 첫머리에 B tag가 나온 경우 
	int	glossary_marked=0;
	int	is_glossary=0;

	tok->offset = 0;
	tok->line_num=1;
	*text = 0;

	maxlen -= 20;
	
	while(tok->offset <  tok->src_len && (int)((uint4)text-(uint4)start) < maxlen) {
	
		tmp = tok->offset;

		htmlLex::get_token_mem(tok);

		#ifdef BUG
		if (tok->tok_len>=20) {
			printf("long tok: line=%d, tok_val=%d, CH=%c\n",
			tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]);
			fflush(stdout);
			printf("tok=%s\n", tok->tok_str);
			fflush(stdout);
		}
		#endif
		
		#ifdef DEB
		/*
		if (tok->tok_val==0 || tok->line_num==1) {
			printf("line=%d, tok_val=%d, CH=%c\n",
			tok->line_num, tok->tok_val, tok->src_mem[tok->offset-1]);
			printf("offset=%d, text-start=%d\n", tok->offset, text-start);
		}
		*/
		#endif
		
		if (text > start) {
			prev_ch = *(text-1);
			if (prev_ch=='\n') {
				words_in_line = 0;
				b_tag_first_in_line = 0;
				glossary_marked = 0;
			}
		}
		
		if (tok->tok_val == TOK_EOF) break;

		if (tok->tok_val == URL_CMNT) {
			//printf("%s", tok->tok_str);
			continue;
		}
		
		if (tok->offset==tmp) {
			#ifdef BUG
			//printf("parse(): offset not moved: line=%d, CH=%d(%c)\n",
			//	tok->line_num, tok->src_mem[tok->offset-1], tok->src_mem[tok->offset-1]);
			#endif
			tok->offset++;
			if (++not_moved >= 2) break;
			else continue;
		}
		tok_val = tok->tok_val;

	
		if (tok_val == BTAG_BEGIN) {	
			skip_to_two_token(tok, STRING, TAG_END);
			if (tok->tok_val != STRING) { // must be a HTML tag
				skip_to_token(tok, TAG_END);
				continue;			
			}
			
			//prn_tabs();
			//fprintf(log,"%s-->:%d :%d\n", tok->tok_str, tok->line_num, tok->offset);
			
			curr_tag = htmlTagEntity::tag_idx(tok->tok_str);
			
			if (curr_tag == NULL) {
				skip_to_token(tok, TAG_END);
				continue;
			}

			tag_depth++;

			#ifdef MAIN_TEXT_ONLY
			if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag)
			push_tag(curr_tag, tag_depth, (int)((uint4)text-(uint4)start));
			

			if (curr_tag==htmlTagEntity::BODY_tag) {
				is_in_BODY_tag = 1;
			}
			#endif
			
			if (curr_tag==htmlTagEntity::A_tag) {
				is_in_A_tag = 1;
			}
			else if (curr_tag==htmlTagEntity::PRE_tag) {
				is_in_PRE_tag = 1;
			}
			else if(curr_tag==htmlTagEntity::B_tag||curr_tag==htmlTagEntity::DT_tag) {
				if (words_in_line==0)
					b_tag_first_in_line = 1;
			}
			else if (curr_tag==htmlTagEntity::TITLE_tag) {							
				get_hyperlink_title(tok->src_mem + tok->offset  , htmlTitle, HTMLTITLE_LEN);
					
				#ifdef DEB
				PRN("tok->scr_mem=%X, offset=%d, title=%X\n", 
					tok->src_mem, tok->offset, htmlTitle);
				PRN("title: %s (%d)\n", htmlTitle, strlen(htmlTitle) );
				#endif
				
				is_glossary =  is_glossay_mode(htmlTitle);
		
			}						
			else if (curr_tag==htmlTagEntity::BR_tag) {
				*text++ = '\n';
			}
	
			if (curr_tag->proc == 0) { // 0
				if (curr_tag->pair != Not_PAIR)
					skip_ignore_part(tok, curr_tag);
				else
					skip_to_token(tok, TAG_END);
				if (curr_tag->newline) {
					if (prev_ch != '\n') *text++ = '\n';
				}				
				continue;
			}

			if (curr_tag==htmlTagEntity::TITLE_tag) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '[';
				*text++ = '[';
			}
			else if (curr_tag->proc == 1) {
			}
			else if (!pair_mark && curr_tag->proc == 2) {
				pair_mark = 2;
				*text++ = '<';
			}
			else if (!pair_mark && curr_tag->proc == 3) {
				pair_mark = 3;
				*text++ = '\'';
			}
			else if (!pair_mark && curr_tag->proc == 4) {
				pair_mark = 4;
				*text++ = '[';
			}
			else if (curr_tag->proc == 5) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '*';
				*text++ = ' ';
				words_in_line = 0;
			}
			else if (curr_tag->proc == 6) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '\n';
			}
			
				
			skip_to_token(tok, TAG_END);

			if (!pair_mark && prev_ch != ' ') *text++ = ' ';
			//if (!ISSPACE(*text)) *text++ = ' ';

		}// BTAG_BEGIN '<'
				
		else if (tok_val == ETAG_BEGIN) {
					
			skip_to_two_token(tok, STRING, TAG_END);
			if (tok->tok_val != STRING) { // must be a HTML tag
				skip_to_token(tok, TAG_END);
				continue;
			}
			
		
			curr_tag = htmlTagEntity::tag_idx(tok->tok_str);

			if (curr_tag == NULL) {
				skip_to_token(tok, TAG_END);
				continue;
			}

			#ifdef MAIN_TEXT_ONLY
			tag_depth--;

			if (curr_tag==htmlTagEntity::BODY_tag) {
				is_in_BODY_tag = 0;
			}
			
			if (curr_tag->pair != Not_PAIR && is_in_BODY_tag && curr_tag!=htmlTagEntity::A_tag)
			end_tag(curr_tag, (int)((uint4)text-(uint4)start));
			#endif
			
			if (curr_tag==htmlTagEntity::A_tag) {
				is_in_A_tag = 0;
				#ifdef MARK_A_LINK
				*text++ = '}';
				#endif
			}
			else if (curr_tag==htmlTagEntity::PRE_tag) {
				is_in_PRE_tag = 0;
			}
			else if(curr_tag==htmlTagEntity::DT_tag || 
				(curr_tag==htmlTagEntity::B_tag && (b_tag_first_in_line && is_glossary)))
			{
				if (glossary_marked==0) {
					*text++ = ' ';
					*text++ = ':';
					//*text++ = ' ';
					b_tag_first_in_line = 0;
					glossary_marked = 1;
				}
					
			}
			else if (curr_tag==htmlTagEntity::TITLE_tag) {
				*text++ = ']';
				*text++ = ']';
				*text++ = '\n';
			}
			else if (curr_tag->proc == 1) {
			}
			else if (pair_mark==2 && curr_tag->proc == 2) {
				pair_mark = 0;
				*text++ = '>';
			}
			else if (pair_mark==3 && curr_tag->proc == 3) {
				pair_mark = 0;
				*text++ = '\'';
			}
			else if (pair_mark==4 && curr_tag->proc == 4) {
				pair_mark = 0;
				*text++ = ']';
			}
			else if (curr_tag->proc == 5) {				
			}
			else if (curr_tag->proc == 6) {
				if (prev_ch != '\n') *text++ = '\n';
				*text++ = '\n';
			}

			if (curr_tag->newline) {
				*text++ = '\n';
			}			
		
	
			skip_to_token(tok, TAG_END);

		}// ETAG_BEGIN '</'
		
		else if (tok_val == STAG_END) {
			skip_to_token(tok, TAG_END);
			is_in_tag = 0;
			tag_depth--;
		}
		
		else if (tok_val == TAG_EXC) {			
			tmp = skip_to_token(tok, TAG_END);
			//fprintf(log,"<- > skipped %d\n", tmp);
		}
		
		else if (tok_val == CMNT_BEGIN) {
			//fprintf(log,"Cmnt -->: %d :%d\n", tok->line_num, tok->offset);
			htmlLex::skip_to_cmnt_end(tok);
			//fprintf(log,"Cmnt <--: %d :%d\n", tok->line_num, tok->offset);
		}		
		else if (tok_val == ENTITY_STR) {
			if (prev_ch !=' ') *text++ = ' '; //2002.12.2
			
			ent_proc = htmlTagEntity::entity_idx(tok->tok_str);

			if (ent_proc && ent_proc->conv[0]) {
				#ifdef BUG
				//prn_ent_proc(ent_proc);
				#endif
				strcpy(text, ent_proc->conv);
				text += strlen(ent_proc->conv);
			}
			else {
				//fprintf(log,"ignored entity = %s\n", tok->tok_str);
			}
		
		}
		else if (tok_val == ENTITY_NUM) {
			if (prev_ch !=' ') *text++ = ' '; //2002.12.2
			
			ent_proc = htmlTagEntity::entity_id_idx(tok->tok_realval);

			if (ent_proc && ent_proc->conv[0]) {
				#ifdef BUG
				//prn_ent_proc(ent_proc);
				#endif
				strcpy(text, ent_proc->conv);
				text += strlen(ent_proc->conv);
			}
			else {			
				if (tok->tok_realval < 0x80) {
				// 2005.7.19
					*text = (char)tok->tok_realval ;
					text++;
				}

			}

		}
		else {
			char *t = text;
			if (is_in_PRE_tag || tok->tok_len>1 || tok_val == STRING || tok_val == NUMBER) {
				//if (prev_tok_val == TAG_END && *text != ' ') *text++ = ' ';
				// 2003. 3.20
				//if (prev_tok_val == TAG_END && isalnum(prev_ch) ) 
				//	*text++ = ' ';
				words_in_line++;
				strcpy(text, tok->tok_str);
				text += tok->tok_len;
			}
			// 2002.10.16 한글은 space로 전환 
			else if (tok->tok_val == FR_STR) {
				if (do_prn_hangul) {
					if (prev_ch != ' ') *text++ = ' ';
					strcpy(text, tok->tok_str);
					text += tok->tok_len;
					words_in_line++;
				}
				else {
					if (prev_ch != ' ') *text++ = ' ';
				}
			}
			else if (tok->tok_len==1) {		
				
				if (curr_tag==htmlTagEntity::PRE_tag) {					
					#ifdef ODD_CHAR
					if ( (tok_val & 0xF0) != 0x90)
						*text++ = tok_val;
					else if (tok_val==0x92) {
						if (prev_tok_val != 0x92)
							*text++ = '\'';
					}
					#else
					*text++ = tok_val;
					#endif
				}
				//else if (tok_val == '\n' || tok_val == '\r') {
				else if (ISSPACE(tok_val)) {
					if (prev_ch != ' ') *text++ = ' ';
				}
								
				else {
					if (prev_ch==':' && tok_val==':') { }
					else *text++ = tok_val;
				}
				
			}
			if (is_in_A_tag)
				hlink_chars += (int)(text - t);
		}

		prev_tok_val = tok->tok_val;
		
	}// while(1)

	*text = 0;

	if ((int)(text-start) >= maxlen-5) {
		PRN("parse(): too far !! maxlen=%d, %d\n", maxlen, text-start);
	}
	if ((int)(text-start) < maxlen-5)
		memset(text, 0, 4);
	#ifdef DEB
		PRN("text=%X, start=%X, text=%d, start=%d\n", 
			text, start, (int)text % 10000, (int)start % 10000);
		PRN("start[0]=%d text[0]=%d\n", start[0], text[0]);
	#endif

	return ((int)text-(int)start);
}