Exemplo n.º 1
0
static inline void
parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner)
{
	struct dom_scanner_token name;

	assert(dom_scanner_has_tokens(scanner)
	       && (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN
	           || (get_dom_stack_top(stack)->node->type == DOM_NODE_PROCESSING_INSTRUCTION)));

	if (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN)
		skip_dom_scanner_token(scanner);

	while (dom_scanner_has_tokens(scanner)) {
		struct dom_scanner_token *token = get_dom_scanner_token(scanner);

		assert(token);

		switch (token->type) {
		case SGML_TOKEN_TAG_END:
			skip_dom_scanner_token(scanner);
			/* and return */
		case SGML_TOKEN_ELEMENT:
		case SGML_TOKEN_ELEMENT_BEGIN:
		case SGML_TOKEN_ELEMENT_END:
		case SGML_TOKEN_ELEMENT_EMPTY_END:
			return;

		case SGML_TOKEN_IDENT:
			copy_struct(&name, token);

			/* Skip the attribute name token */
			token = get_next_dom_scanner_token(scanner);
			if (token && token->type == '=') {
				/* If the token is not a valid value token
				 * ignore it. */
				token = get_next_dom_scanner_token(scanner);
				if (token
				    && token->type != SGML_TOKEN_IDENT
				    && token->type != SGML_TOKEN_ATTRIBUTE
				    && token->type != SGML_TOKEN_STRING)
					token = NULL;
			} else {
				token = NULL;
			}

			add_sgml_attribute(stack, &name, token);

			/* Skip the value token */
			if (token)
				skip_dom_scanner_token(scanner);
			break;

		default:
			skip_dom_scanner_token(scanner);

		}
	}
}
Exemplo n.º 2
0
struct dom_scanner_token *
get_dom_scanner_token_debug(struct dom_scanner *scanner)
{
	if (!dom_scanner_has_tokens(scanner)) return NULL;

	dump_dom_scanner(scanner);

	/* Make sure we do not return invalid tokens */
	assert(!dom_scanner_has_tokens(scanner)
		|| scanner->current->type != 0);

	return get_dom_scanner_token(scanner);
}
Exemplo n.º 3
0
void
dump_dom_scanner(struct dom_scanner *scanner)
{
	unsigned char buffer[MAX_STR_LEN];
	struct dom_scanner_token *token = scanner->current;
	struct dom_scanner_token *table_end = scanner->table + scanner->tokens;
	unsigned char *srcpos = token->string, *bufpos = buffer;
	int src_lookahead = 50;
	int token_lookahead = 4;
	int srclen;

	if (!dom_scanner_has_tokens(scanner)) return;

	memset(buffer, 0, MAX_STR_LEN);
	for (; token_lookahead > 0 && token < table_end; token++, token_lookahead--) {
		int buflen = MAX_STR_LEN - (bufpos - buffer);
		int added = snprintf(bufpos, buflen, "[%.*s] ", token->length, token->string);

		bufpos += added;
	}

	if (scanner->tokens > token_lookahead) {
		memcpy(bufpos, "... ", 4);
		bufpos += 4;
	}

	srclen = strlen((const char *)srcpos);
	int_upper_bound(&src_lookahead, srclen);
	*bufpos++ = '[';

	/* Compress the lookahead string */
	for (; src_lookahead > 0; src_lookahead--, srcpos++, bufpos++) {
		if (*srcpos == '\n' || *srcpos == '\r' || *srcpos == '\t') {
			*bufpos++ = '\\';
			*bufpos = *srcpos == '\n' ? 'n'
				: (*srcpos == '\r' ? 'r' : 't');
		} else {
			*bufpos = *srcpos;
		}
	}

	if (srclen > src_lookahead)
		memcpy(bufpos, "...]", 4);
	else
		memcpy(bufpos, "]", 2);

	errfile = scanner->file, errline = scanner->line;
	elinks_wdebug("%s", buffer);
}
Exemplo n.º 4
0
Arquivo: select.c Projeto: ezc/elinks
/* Parse a CSS3 selector and add selector nodes to the @select struct. */
static enum dom_code
parse_dom_select(struct dom_select *select, struct dom_stack *stack,
		 struct dom_string *string)
{
	struct dom_scanner scanner;
	struct dom_select_node sel;

	init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0, 0);

	memset(&sel, 0, sizeof(sel));

	while (dom_scanner_has_tokens(&scanner)) {
		struct dom_scanner_token *token = get_dom_scanner_token(&scanner);
		enum dom_code code;
		struct dom_select_node *select_node;

		assert(token);

		if (token->type == '{'
		    || token->type == '}'
		    || token->type == ';'
		    || token->type == ',')
			break;

		/* Examine the selector fragment */

		switch (token->type) {
		case CSS_TOKEN_IDENT:
			sel.node.type = DOM_NODE_ELEMENT;
			copy_dom_string(&sel.node.string, &token->string);
			if (dom_scanner_token_contains(token, "*"))
				sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL;
			break;

		case CSS_TOKEN_HASH:
		case CSS_TOKEN_HEX_COLOR:
			/* ID fragment */
			sel.node.type = DOM_NODE_ATTRIBUTE;
			sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID;
			/* Skip the leading '#'. */
			skip_dom_scanner_token_char(token);
			break;

		case '[':
			sel.node.type = DOM_NODE_ATTRIBUTE;
			code = parse_dom_select_attribute(&sel, &scanner);
			if (code != DOM_CODE_OK)
				return code;
			break;

		case '.':
			token = get_next_dom_scanner_token(&scanner);
			if (!token || token->type != CSS_TOKEN_IDENT)
				return DOM_CODE_SYNTAX_ERR;

			sel.node.type = DOM_NODE_ATTRIBUTE;
			sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST;
			set_dom_string(&sel.node.string, "class", -1);
			copy_dom_string(&sel.node.data.attribute.value, &token->string);
			break;

		case ':':
			code = parse_dom_select_pseudo(select, &sel, &scanner);
			if (code != DOM_CODE_OK)
				return code;
			break;

		case '>':
			if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
				return DOM_CODE_SYNTAX_ERR;
			sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD;
			break;

		case '+':
			if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
				return DOM_CODE_SYNTAX_ERR;
			sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT;
			break;

		case '~':
			if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT)
				return DOM_CODE_SYNTAX_ERR;
			sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT;
			break;

		default:
			return DOM_CODE_SYNTAX_ERR;
		}

		skip_dom_scanner_token(&scanner);

		if (sel.node.type == DOM_NODE_UNKNOWN)
			continue;

		select_node = mem_calloc(1, sizeof(*select_node));
		copy_struct(select_node, &sel);

		if (!dom_stack_is_empty(stack)) {
			struct dom_node *node = &select_node->node;
			struct dom_node *parent = get_dom_stack_top(stack)->node;
			struct dom_node_list **list = get_dom_node_list(parent, node);
			int sort = (node->type == DOM_NODE_ATTRIBUTE);
			int index;

			assertm(list != NULL, "Adding node to bad parent [%d -> %d]",
				node->type, parent->type);

			index = *list && (*list)->size > 0 && sort
				? get_dom_node_map_index(*list, node) : -1;

			if (!add_to_dom_node_list(list, node, index)) {
				done_dom_node(node);
				return DOM_CODE_ALLOC_ERR;
			}

			node->parent = parent;

		} else {
			assert(!select->selector);
			select->selector = select_node;
		}

		code = push_dom_node(stack, &select_node->node);
		if (code != DOM_CODE_OK)
			return code;

		if (select_node->node.type != DOM_NODE_ELEMENT)
			pop_dom_node(stack);

		memset(&sel, 0, sizeof(sel));
	}

	if (select->selector)
		return DOM_CODE_OK;

	return DOM_CODE_ERR;
}
Exemplo n.º 5
0
static void
parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner)
{
	struct dom_scanner_token target;

	while (dom_scanner_has_tokens(scanner)) {
		struct dom_scanner_token *token = get_dom_scanner_token(scanner);

		switch (token->type) {
		case SGML_TOKEN_ELEMENT:
		case SGML_TOKEN_ELEMENT_BEGIN:
			if (!add_sgml_element(stack, token)) {
				if (token->type == SGML_TOKEN_ELEMENT) {
					skip_dom_scanner_token(scanner);
					break;
				}

				skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END);
				break;
			}

			if (token->type == SGML_TOKEN_ELEMENT_BEGIN) {
				parse_sgml_attributes(stack, scanner);
			} else {
				skip_dom_scanner_token(scanner);
			}

			break;

		case SGML_TOKEN_ELEMENT_EMPTY_END:
			pop_dom_node(stack);
			skip_dom_scanner_token(scanner);
			break;

		case SGML_TOKEN_ELEMENT_END:
			if (!token->string.length) {
				pop_dom_node(stack);
			} else {
				struct dom_string string;
				struct dom_stack_state *state;

				set_dom_string(&string, token->string.string, token->string.length);
				state = search_dom_stack(stack, DOM_NODE_ELEMENT,
							 &string);
				if (state) {
					struct sgml_parser_state *pstate;

					pstate = get_sgml_parser_state(stack, state);
					copy_struct(&pstate->end_token, token);

					pop_dom_state(stack, state);
				}
			}
			skip_dom_scanner_token(scanner);
			break;

		case SGML_TOKEN_NOTATION_COMMENT:
			add_sgml_node(stack, DOM_NODE_COMMENT, token);
			skip_dom_scanner_token(scanner);
			break;

		case SGML_TOKEN_NOTATION_ATTLIST:
		case SGML_TOKEN_NOTATION_DOCTYPE:
		case SGML_TOKEN_NOTATION_ELEMENT:
		case SGML_TOKEN_NOTATION_ENTITY:
		case SGML_TOKEN_NOTATION:
			skip_dom_scanner_token(scanner);
			break;

		case SGML_TOKEN_CDATA_SECTION:
			add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token);
			skip_dom_scanner_token(scanner);
			break;

		case SGML_TOKEN_PROCESS_XML_STYLESHEET:
		case SGML_TOKEN_PROCESS_XML:
		case SGML_TOKEN_PROCESS:
			copy_struct(&target, token);

			/* Skip the target token */
			token = get_next_dom_scanner_token(scanner);
			if (!token) break;

			assert(token->type == SGML_TOKEN_PROCESS_DATA);

			if (add_sgml_proc_instruction(stack, &target, token)
			    && (target.type == SGML_TOKEN_PROCESS_XML
			        || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET)
			    && token->string.length > 0) {
				/* Parse the <?xml data="attributes"?>. */
				struct dom_scanner attr_scanner;

				init_dom_scanner(&attr_scanner, &sgml_scanner_info,
						 &token->string, SGML_STATE_ELEMENT,
						 scanner->count_lines);

				if (dom_scanner_has_tokens(&attr_scanner))
					parse_sgml_attributes(stack, &attr_scanner);
			}

			pop_dom_node(stack);
			skip_dom_scanner_token(scanner);
			break;

		case SGML_TOKEN_ENTITY:
			add_sgml_node(stack, DOM_NODE_ENTITY_REFERENCE, token);
			skip_dom_scanner_token(scanner);
			break;

		case SGML_TOKEN_SPACE:
		case SGML_TOKEN_TEXT:
		default:
			add_sgml_node(stack, DOM_NODE_TEXT, token);
			skip_dom_scanner_token(scanner);
		}
	}
}