示例#1
0
string
xml_html_parser::transcode (string s2) {
  s= parse_string (s2);

  string encoding;
  if (test (s, "<?")) {
    s += 2;
    string target= parse_name ();
    skip_space ();
    if (target == "xml") {
      // since html==true implies we can accept horribly broken HTML, the
      // presence of an XML prolog is not enough to clear the flag.
      /* html= false; */
      while (s && !test (s, "?>")) {
	string attname= parse_name ();
	skip_space ();
	if (!test (s, "=")) break;
	s += 1;
	skip_space ();
	string val;
	if (test (s, "\"")) {
	  s += 1;
	  val= parse_until ("\"");
	  skip_space ();	  
	}
	else if (test (s, "'")) {
	  s += 1;
	  val= parse_until ("'");
	  skip_space ();
	}
	if (attname == "encoding") {
	  encoding= upcase_all (val);
	  break;
	}
      }
    }
  }

  if (N(encoding) != 0) {
    // cout << "encoding was specified\n" ;
    string s3= convert (s2, encoding, "UTF-8");
    if (N(s3) == 0)
      /* conversion from specified charset failed, do nothing (and pray) */ ;
    else return s3;
  }
  else {
    // cout << "guess encoding\n" ;
    if (check_encoding (s2, "UTF-8"))
      /* input encoding seems to be utf-8, do nothing */ ;
    else {
      string s3= convert (s2, "ISO-8859-1", "UTF-8");
      if (N(s3) != 0) return s3;
    }
  }

  return s2;
}
示例#2
0
string
xml_html_parser::parse_quoted () {
  if (test (s, "\42")) {
    s += 1;
    return parse_until ("\42");
  }
  if (test (s, "'")) {
    s += 1;
    return parse_until ("'");
  }
  return "";
}
示例#3
0
ss_inst *init_insts(stream_t *stream) {
    char *buf;
    char type;
    long IP, new_IP;
    new_IP = IP = getpos(stream);
    ss_inst *head, *ptr;
    ss_inst *indexes[4];
    head = ptr = malloc(sizeof(ss_inst));
    while (listench(stream)) {
        int len = get_parsable_length(stream);
        buf = calloc(len + 1, sizeof(char));
        getstr(stream, len, buf);
        if (buf[len - 1] == '(' && *buf == '?') {
            type = '(';
            indexes[1] = parse_until(stream, ')');
        } else if (*buf == '[') {
            type = '[';
            indexes[1] = parse_until(stream, ']');
        } else if (*buf == '{') {
            type = '{';
            indexes[1] = parse_until(stream, '}');
        } else if (*buf == '?') {
            type = '?';
            char iden = buf[len - 1];
            if (iden == '(') {
                indexes[1] = parse_until(stream, ')');/* {expr} */
            } else if (strchr(RETN, iden)) {
                move_stream(stream, 1);
                new_IP += get_parsable_length(stream);
                move_stream(stream, -1);
            }
            if (listench(stream) == '[') type = 'w';
            move_stream(stream, new_IP - IP - 1);
            // TODO conseq & alt and loop body
        } else {
            type = 0;
        }
        indexes[0] = init_inst(type, buf);
        if (type) {
            for (int i = 1; i < indexes[0]->branch_no; i ++)
                indexes[0]->indexes[i] = indexes[i];
            move_stream(stream, new_IP - IP - 1);
        }
        ptr->indexes[0] = indexes[0];
        ptr = ptr->indexes[0];
    }
    ss_inst *retn = head->indexes[0];
    free_inst(head);
    return retn;
}
示例#4
0
tree
xml_html_parser::parse_pi () {
  s += 2;
  string name= parse_name ();
  skip_space ();
  return tuple ("pi", name, parse_until ("?>"));
}
示例#5
0
tree
xml_html_parser::parse_closing () {
  s += 2;
  string name= parse_name ();
  (void) parse_until (">");
  return tuple ("end", name);
}
示例#6
0
文件: sam.c 项目: ewqasd200g/vis
static void parse_argv(const char **s, const char *argv[], size_t maxarg) {
	for (size_t i = 0; i < maxarg; i++) {
		skip_spaces(s);
		if (**s == '"' || **s == '\'')
			argv[i] = parse_delimited_text(s);
		else
			argv[i] = parse_until(s, " \t\n");
	}
}
示例#7
0
文件: parse.c 项目: Meai1/libpsyc
static inline
#endif
PsycParseIndexRC
psyc_parse_index (PsycParseIndexState *state, PsycString *idx)
{
    ParseRC ret;

    if (state->cursor >= state->buffer.length)
	return PSYC_PARSE_INDEX_END;

    state->startc = state->cursor;

    switch (state->part) {
    case PSYC_INDEX_PART_START:
    case PSYC_INDEX_PART_TYPE:
	idx->length = 0;
	idx->data = NULL;

	switch (state->buffer.data[state->cursor]) {
	case '#':
	    state->part = PSYC_INDEX_PART_LIST;
	    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
	    goto PSYC_INDEX_PART_LIST;
	case '.':
	    state->part = PSYC_INDEX_PART_DICT;
	    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
	    goto PSYC_INDEX_PART_STRUCT;
	case '{':
	    state->part = PSYC_INDEX_PART_DICT;
	    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
	    goto PSYC_INDEX_PART_DICT_LENGTH;
	default:
	    return PSYC_PARSE_INDEX_ERROR_TYPE;
	}

    case PSYC_INDEX_PART_LIST:
    PSYC_INDEX_PART_LIST:
	switch (parse_length((ParseState*)state, &idx->length)) {
	case PARSE_SUCCESS: // list index is complete
	    state->part = PSYC_INDEX_PART_TYPE;
	    return PSYC_PARSE_INDEX_LIST;
	case PARSE_INSUFFICIENT: // list index at the end of buffer
	    return PSYC_PARSE_INDEX_LIST_LAST;
	case PARSE_ERROR: // no index
	    return PSYC_PARSE_INDEX_ERROR_LIST;
	default: // should not be reached
	    return PSYC_PARSE_INDEX_ERROR;
	}

    case PSYC_INDEX_PART_STRUCT:
    PSYC_INDEX_PART_STRUCT:
	switch (parse_keyword((ParseState*)state, idx)) {
	case PARSE_SUCCESS: // end of keyword
	    state->part = PSYC_INDEX_PART_TYPE;
	    return PSYC_PARSE_INDEX_STRUCT;
	case PARSE_INSUFFICIENT: // end of buffer
	    return PSYC_PARSE_INDEX_STRUCT_LAST;
	case PARSE_ERROR: // no keyword
	    return PSYC_PARSE_INDEX_ERROR_STRUCT;
	default: // should not be reached
	    return PSYC_PARSE_INDEX_ERROR;
	}

    case PSYC_INDEX_PART_DICT_LENGTH:
    PSYC_INDEX_PART_DICT_LENGTH:
	switch (parse_length((ParseState*)state, &state->elemlen)) {
	case PARSE_SUCCESS: // length is complete
	    state->elemlen_found = 1;
	    state->elem_parsed = 0;
	    idx->length = state->elemlen;
	    idx->data = NULL;

	    if (state->buffer.data[state->cursor] != ' ')
		return PSYC_PARSE_INDEX_ERROR_DICT_LENGTH;

	    state->part = PSYC_INDEX_PART_DICT;
	    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
	    break;
	case PARSE_INSUFFICIENT: // length is incomplete
	    return PSYC_PARSE_DICT_INSUFFICIENT;
	case PARSE_ERROR: // no length
	    state->part = PSYC_INDEX_PART_DICT;
	    break;
	default: // should not be reached
	    return PSYC_PARSE_INDEX_ERROR;
	}
	// fall thru

    case PSYC_INDEX_PART_DICT:
	if (state->elemlen_found) {
	    switch (parse_binary((ParseState*)state, state->elemlen, idx,
				 &state->elem_parsed)) {
	    case PARSE_SUCCESS:
		if (idx->length == state->elem_parsed)
		    ret = PSYC_PARSE_INDEX_DICT;
		else
		    ret = PSYC_PARSE_INDEX_DICT_END;
		break;
	    case PARSE_INCOMPLETE:
		if (idx->length == state->elem_parsed)
		    ret = PSYC_PARSE_INDEX_DICT_START;
		else
		    ret = PSYC_PARSE_INDEX_DICT_CONT;
		break;
	    default: // should not be reached
		return PSYC_PARSE_INDEX_ERROR_DICT;
	    }
	} else {
	    switch (parse_until((ParseState*)state, '}', idx)) {
	    case PARSE_SUCCESS:
		ret = PSYC_PARSE_INDEX_DICT;
		break;
	    case PARSE_INSUFFICIENT:
		return PSYC_PARSE_INDEX_INSUFFICIENT;
	    default: // should not be reached
		return PSYC_PARSE_INDEX_ERROR_DICT;
	    }
	}

	state->part = PSYC_INDEX_PART_TYPE;
	state->cursor++;
	return ret;
    }

    return PSYC_PARSE_INDEX_ERROR; // should not be reached
}
示例#8
0
文件: parse.c 项目: Meai1/libpsyc
/**
 * Parse dictionary.
 *
 * dict		= [ type ] *dict-item
 * dict-item	= "{" ( dict-key / length SP OCTET) "}"
 *                ( type [ SP dict-value ] / [ length ] [ ":" type ] [ SP *OCTET ] )
 * dict-key	= %x00-7C / %x7E-FF	; any byte except "{"
 * dict-value	= %x00-7A / %x7C-FF	; any byte except "}"
 */
PsycParseDictRC
psyc_parse_dict (PsycParseDictState *state, PsycString *type, PsycString *elem)
{
    ParseRC ret;

    if (state->cursor >= state->buffer.length)
	return PSYC_PARSE_DICT_END;

    state->startc = state->cursor;

    switch (state->part) {
    case PSYC_DICT_PART_START:
	type->length = elem->length = 0;
	type->data = elem->data = NULL;

	state->part = PSYC_DICT_PART_TYPE;
	// fall thru

    case PSYC_DICT_PART_TYPE:
	switch (parse_keyword((ParseState*)state, type)) {
	case PARSE_SUCCESS: // end of keyword
	    state->part = PSYC_DICT_PART_KEY_START;
	    return PSYC_PARSE_DICT_TYPE;
	case PARSE_INSUFFICIENT: // end of buffer
	    return PSYC_PARSE_DICT_END;
	case PARSE_ERROR: // no keyword
	    state->part = PSYC_DICT_PART_KEY_START;
	    break;
	default: // should not be reached
	    return PSYC_PARSE_DICT_ERROR;
	}
	// fall thru

    case PSYC_DICT_PART_KEY_START:
	if (state->buffer.data[state->cursor] != '{')
	    return PSYC_PARSE_DICT_ERROR_KEY_START;

	type->length = elem->length = 0;
	type->data = elem->data = NULL;

	state->elem_parsed = 0;
	state->elemlen_found = 0;

	state->part = PSYC_DICT_PART_KEY_LENGTH;
	ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_DICT_INSUFFICIENT);
	// fall thru

    case PSYC_DICT_PART_KEY_LENGTH:
	switch (parse_length((ParseState*)state, &state->elemlen)) {
	case PARSE_SUCCESS: // length is complete
	    state->elemlen_found = 1;
	    state->elem_parsed = 0;
	    elem->length = state->elemlen;
	    elem->data = NULL;

	    if (state->buffer.data[state->cursor] != ' ')
		return PSYC_PARSE_DICT_ERROR_KEY_LENGTH;

	    state->part = PSYC_DICT_PART_KEY;
	    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_INSUFFICIENT);
	    break;
	case PARSE_INSUFFICIENT: // length is incomplete
	    return PSYC_PARSE_DICT_INSUFFICIENT;
	case PARSE_ERROR: // no length
	    state->part = PSYC_DICT_PART_KEY;
	    break;
	default: // should not be reached
	    return PSYC_PARSE_DICT_ERROR;
	}
	// fall thru

    case PSYC_DICT_PART_KEY:
	if (state->elemlen_found) {
	    switch (parse_binary((ParseState*)state, state->elemlen, elem,
				 &state->elem_parsed)) {
	    case PARSE_SUCCESS:
		if (elem->length == state->elem_parsed)
		    ret = PSYC_PARSE_DICT_KEY;
		else
		    ret = PSYC_PARSE_DICT_KEY_END;
		break;
	    case PARSE_INCOMPLETE:
		if (elem->length == state->elem_parsed)
		    ret = PSYC_PARSE_DICT_KEY_START;
		else
		    ret = PSYC_PARSE_DICT_KEY_CONT;
		break;
	    default: // should not be reached
		return PSYC_PARSE_DICT_ERROR;
	    }
	} else {
	    switch (parse_until((ParseState*)state, '}', elem)) {
	    case PARSE_SUCCESS:
		ret = PSYC_PARSE_DICT_KEY;
		break;
	    case PARSE_INSUFFICIENT:
		return PSYC_PARSE_DICT_INSUFFICIENT;
	    default: // should not be reached
		return PSYC_PARSE_DICT_ERROR;
	    }
	}

	state->part = PSYC_DICT_PART_VALUE_START;
	state->startc = state->cursor;
	return ret;

    case PSYC_DICT_PART_VALUE_START:
	switch (state->buffer.data[state->cursor] != '}')
	    return PSYC_PARSE_DICT_ERROR_VALUE_START;

	type->length = elem->length = 0;
	type->data = elem->data = NULL;

	state->elem_parsed = 0;
	state->elemlen_found = 0;

	state->part = PSYC_DICT_PART_VALUE_TYPE;
	ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_DICT_VALUE_LAST);
	// fall thru

    case PSYC_DICT_PART_VALUE_TYPE:
	if (state->buffer.data[state->cursor] == '=') {
	    ADVANCE_CURSOR_OR_RETURN(PSYC_PARSE_INSUFFICIENT);

	    switch (parse_keyword((ParseState*)state, type)) {
	    case PARSE_SUCCESS:
		switch (state->buffer.data[state->cursor]) {
		case ':':
		    state->part = PSYC_DICT_PART_VALUE_LENGTH;
		    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_DICT_VALUE_LAST);
		    break;
		case ' ':
		    state->part = PSYC_DICT_PART_VALUE;
		    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_DICT_VALUE_LAST);
		    goto PSYC_DICT_PART_VALUE;
		case '{':
		    state->part = PSYC_DICT_PART_KEY_START;
		    return PSYC_PARSE_DICT_VALUE;
		    break;
		default:
		    return PSYC_PARSE_DICT_ERROR_VALUE_TYPE;
		}
		break;
	    case PARSE_INSUFFICIENT: // end of buffer
		return PSYC_PARSE_DICT_VALUE_LAST;
	    case PARSE_ERROR:
		return PSYC_PARSE_DICT_ERROR_VALUE_TYPE;
	    default: // should not be reached
		return PSYC_PARSE_DICT_ERROR;
	    }
	}
	// fall thru

    case PSYC_DICT_PART_VALUE_LENGTH:
	switch (parse_length((ParseState*)state, &state->elemlen)) {
	case PARSE_SUCCESS: // length is complete
	    state->elemlen_found = 1;
	    state->elem_parsed = 0;
	    elem->length = state->elemlen;
	    elem->data = NULL;
	    break;
	case PARSE_INSUFFICIENT: // length is incomplete
	    return PSYC_PARSE_DICT_INSUFFICIENT;
	case PARSE_ERROR: // no length
	    break;
	default: // should not be reached
	    return PSYC_PARSE_DICT_ERROR;
	}

	switch (state->buffer.data[state->cursor]) {
	case ' ':
	    state->part = PSYC_DICT_PART_VALUE;
	    ADVANCE_STARTC_OR_RETURN(PSYC_PARSE_DICT_VALUE_LAST);
	    break;
	case '{':
	    state->part = PSYC_DICT_PART_KEY_START;
	    return PSYC_PARSE_DICT_VALUE;
	default:
	    return PSYC_PARSE_DICT_ERROR_VALUE_LENGTH;
	}
	// fall thru

    case PSYC_DICT_PART_VALUE:
    PSYC_DICT_PART_VALUE:
	if (state->elemlen_found) {
	    switch (parse_binary((ParseState*)state, state->elemlen, elem,
				 &state->elem_parsed)) {
	    case PARSE_SUCCESS:
		if (elem->length == state->elem_parsed)
		    ret = PSYC_PARSE_DICT_VALUE;
		else
		    ret = PSYC_PARSE_DICT_VALUE_END;
		break;
	    case PARSE_INCOMPLETE:
		if (elem->length == state->elem_parsed)
		    ret = PSYC_PARSE_DICT_VALUE_START;
		else
		    ret = PSYC_PARSE_DICT_VALUE_CONT;
		break;
	    default: // should not be reached
		return PSYC_PARSE_DICT_ERROR;
	    }
	} else {
	    switch (parse_until((ParseState*)state, '{', elem)) {
	    case PARSE_SUCCESS:
		ret = PSYC_PARSE_DICT_VALUE;
		break;
	    case PARSE_INSUFFICIENT:
		return PSYC_PARSE_DICT_VALUE_LAST;
	    default: // should not be reached
		return PSYC_PARSE_DICT_ERROR;
	    }
	}

	state->part = PSYC_DICT_PART_KEY_START;
	return ret;
    }

    return PSYC_PARSE_DICT_ERROR; // should not be reached
}
示例#9
0
tree
xml_html_parser::parse_notation () {
  s += 10;
  return tuple ("notation", parse_until (">"));
}
示例#10
0
tree
xml_html_parser::parse_attlist () {
  s += 9;
  return tuple ("attlist", parse_until (">"));
}
示例#11
0
tree
xml_html_parser::parse_element () {
  s += 9;
  return tuple ("element", parse_until (">"));
}
示例#12
0
tree
xml_html_parser::parse_cdata () {
  s += 9;
  return tuple ("cdata", parse_until ("]]>"));
}
示例#13
0
tree
xml_html_parser::parse_comment () {
  s += 4;
  return tuple ("comment", parse_until ("-->"));
}
示例#14
0
文件: sam.c 项目: ewqasd200g/vis
static char *parse_filename(const char **s) {
	skip_spaces(s);
	if (**s == '"' || **s == '\'')
		return parse_delimited_text(s);
	return parse_until(s, "\n");
}
示例#15
0
文件: sam.c 项目: ewqasd200g/vis
static char *parse_shellcmd(const char **s) {
	skip_spaces(s);
	return parse_until(s, "\n");
}