static inline void parse_sgml_attributes(struct dom_stack *stack, struct dom_scanner *scanner) { struct dom_scanner_token name; assert(dom_scanner_has_tokens(scanner) && (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN || (get_dom_stack_top(stack)->node->type == DOM_NODE_PROCESSING_INSTRUCTION))); if (get_dom_scanner_token(scanner)->type == SGML_TOKEN_ELEMENT_BEGIN) skip_dom_scanner_token(scanner); while (dom_scanner_has_tokens(scanner)) { struct dom_scanner_token *token = get_dom_scanner_token(scanner); assert(token); switch (token->type) { case SGML_TOKEN_TAG_END: skip_dom_scanner_token(scanner); /* and return */ case SGML_TOKEN_ELEMENT: case SGML_TOKEN_ELEMENT_BEGIN: case SGML_TOKEN_ELEMENT_END: case SGML_TOKEN_ELEMENT_EMPTY_END: return; case SGML_TOKEN_IDENT: copy_struct(&name, token); /* Skip the attribute name token */ token = get_next_dom_scanner_token(scanner); if (token && token->type == '=') { /* If the token is not a valid value token * ignore it. */ token = get_next_dom_scanner_token(scanner); if (token && token->type != SGML_TOKEN_IDENT && token->type != SGML_TOKEN_ATTRIBUTE && token->type != SGML_TOKEN_STRING) token = NULL; } else { token = NULL; } add_sgml_attribute(stack, &name, token); /* Skip the value token */ if (token) skip_dom_scanner_token(scanner); break; default: skip_dom_scanner_token(scanner); } } }
struct dom_scanner_token * get_dom_scanner_token_debug(struct dom_scanner *scanner) { if (!dom_scanner_has_tokens(scanner)) return NULL; dump_dom_scanner(scanner); /* Make sure we do not return invalid tokens */ assert(!dom_scanner_has_tokens(scanner) || scanner->current->type != 0); return get_dom_scanner_token(scanner); }
void dump_dom_scanner(struct dom_scanner *scanner) { unsigned char buffer[MAX_STR_LEN]; struct dom_scanner_token *token = scanner->current; struct dom_scanner_token *table_end = scanner->table + scanner->tokens; unsigned char *srcpos = token->string, *bufpos = buffer; int src_lookahead = 50; int token_lookahead = 4; int srclen; if (!dom_scanner_has_tokens(scanner)) return; memset(buffer, 0, MAX_STR_LEN); for (; token_lookahead > 0 && token < table_end; token++, token_lookahead--) { int buflen = MAX_STR_LEN - (bufpos - buffer); int added = snprintf(bufpos, buflen, "[%.*s] ", token->length, token->string); bufpos += added; } if (scanner->tokens > token_lookahead) { memcpy(bufpos, "... ", 4); bufpos += 4; } srclen = strlen((const char *)srcpos); int_upper_bound(&src_lookahead, srclen); *bufpos++ = '['; /* Compress the lookahead string */ for (; src_lookahead > 0; src_lookahead--, srcpos++, bufpos++) { if (*srcpos == '\n' || *srcpos == '\r' || *srcpos == '\t') { *bufpos++ = '\\'; *bufpos = *srcpos == '\n' ? 'n' : (*srcpos == '\r' ? 'r' : 't'); } else { *bufpos = *srcpos; } } if (srclen > src_lookahead) memcpy(bufpos, "...]", 4); else memcpy(bufpos, "]", 2); errfile = scanner->file, errline = scanner->line; elinks_wdebug("%s", buffer); }
/* Parse a CSS3 selector and add selector nodes to the @select struct. */ static enum dom_code parse_dom_select(struct dom_select *select, struct dom_stack *stack, struct dom_string *string) { struct dom_scanner scanner; struct dom_select_node sel; init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0, 0); memset(&sel, 0, sizeof(sel)); while (dom_scanner_has_tokens(&scanner)) { struct dom_scanner_token *token = get_dom_scanner_token(&scanner); enum dom_code code; struct dom_select_node *select_node; assert(token); if (token->type == '{' || token->type == '}' || token->type == ';' || token->type == ',') break; /* Examine the selector fragment */ switch (token->type) { case CSS_TOKEN_IDENT: sel.node.type = DOM_NODE_ELEMENT; copy_dom_string(&sel.node.string, &token->string); if (dom_scanner_token_contains(token, "*")) sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL; break; case CSS_TOKEN_HASH: case CSS_TOKEN_HEX_COLOR: /* ID fragment */ sel.node.type = DOM_NODE_ATTRIBUTE; sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID; /* Skip the leading '#'. */ skip_dom_scanner_token_char(token); break; case '[': sel.node.type = DOM_NODE_ATTRIBUTE; code = parse_dom_select_attribute(&sel, &scanner); if (code != DOM_CODE_OK) return code; break; case '.': token = get_next_dom_scanner_token(&scanner); if (!token || token->type != CSS_TOKEN_IDENT) return DOM_CODE_SYNTAX_ERR; sel.node.type = DOM_NODE_ATTRIBUTE; sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST; set_dom_string(&sel.node.string, "class", -1); copy_dom_string(&sel.node.data.attribute.value, &token->string); break; case ':': code = parse_dom_select_pseudo(select, &sel, &scanner); if (code != DOM_CODE_OK) return code; break; case '>': if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT) return DOM_CODE_SYNTAX_ERR; sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD; break; case '+': if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT) return DOM_CODE_SYNTAX_ERR; sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT; break; case '~': if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT) return DOM_CODE_SYNTAX_ERR; sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT; break; default: return DOM_CODE_SYNTAX_ERR; } skip_dom_scanner_token(&scanner); if (sel.node.type == DOM_NODE_UNKNOWN) continue; select_node = mem_calloc(1, sizeof(*select_node)); copy_struct(select_node, &sel); if (!dom_stack_is_empty(stack)) { struct dom_node *node = &select_node->node; struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node_list **list = get_dom_node_list(parent, node); int sort = (node->type == DOM_NODE_ATTRIBUTE); int index; assertm(list != NULL, "Adding node to bad parent [%d -> %d]", node->type, parent->type); index = *list && (*list)->size > 0 && sort ? get_dom_node_map_index(*list, node) : -1; if (!add_to_dom_node_list(list, node, index)) { done_dom_node(node); return DOM_CODE_ALLOC_ERR; } node->parent = parent; } else { assert(!select->selector); select->selector = select_node; } code = push_dom_node(stack, &select_node->node); if (code != DOM_CODE_OK) return code; if (select_node->node.type != DOM_NODE_ELEMENT) pop_dom_node(stack); memset(&sel, 0, sizeof(sel)); } if (select->selector) return DOM_CODE_OK; return DOM_CODE_ERR; }
static void parse_sgml_plain(struct dom_stack *stack, struct dom_scanner *scanner) { struct dom_scanner_token target; while (dom_scanner_has_tokens(scanner)) { struct dom_scanner_token *token = get_dom_scanner_token(scanner); switch (token->type) { case SGML_TOKEN_ELEMENT: case SGML_TOKEN_ELEMENT_BEGIN: if (!add_sgml_element(stack, token)) { if (token->type == SGML_TOKEN_ELEMENT) { skip_dom_scanner_token(scanner); break; } skip_sgml_tokens(scanner, SGML_TOKEN_TAG_END); break; } if (token->type == SGML_TOKEN_ELEMENT_BEGIN) { parse_sgml_attributes(stack, scanner); } else { skip_dom_scanner_token(scanner); } break; case SGML_TOKEN_ELEMENT_EMPTY_END: pop_dom_node(stack); skip_dom_scanner_token(scanner); break; case SGML_TOKEN_ELEMENT_END: if (!token->string.length) { pop_dom_node(stack); } else { struct dom_string string; struct dom_stack_state *state; set_dom_string(&string, token->string.string, token->string.length); state = search_dom_stack(stack, DOM_NODE_ELEMENT, &string); if (state) { struct sgml_parser_state *pstate; pstate = get_sgml_parser_state(stack, state); copy_struct(&pstate->end_token, token); pop_dom_state(stack, state); } } skip_dom_scanner_token(scanner); break; case SGML_TOKEN_NOTATION_COMMENT: add_sgml_node(stack, DOM_NODE_COMMENT, token); skip_dom_scanner_token(scanner); break; case SGML_TOKEN_NOTATION_ATTLIST: case SGML_TOKEN_NOTATION_DOCTYPE: case SGML_TOKEN_NOTATION_ELEMENT: case SGML_TOKEN_NOTATION_ENTITY: case SGML_TOKEN_NOTATION: skip_dom_scanner_token(scanner); break; case SGML_TOKEN_CDATA_SECTION: add_sgml_node(stack, DOM_NODE_CDATA_SECTION, token); skip_dom_scanner_token(scanner); break; case SGML_TOKEN_PROCESS_XML_STYLESHEET: case SGML_TOKEN_PROCESS_XML: case SGML_TOKEN_PROCESS: copy_struct(&target, token); /* Skip the target token */ token = get_next_dom_scanner_token(scanner); if (!token) break; assert(token->type == SGML_TOKEN_PROCESS_DATA); if (add_sgml_proc_instruction(stack, &target, token) && (target.type == SGML_TOKEN_PROCESS_XML || target.type == SGML_TOKEN_PROCESS_XML_STYLESHEET) && token->string.length > 0) { /* Parse the <?xml data="attributes"?>. */ struct dom_scanner attr_scanner; init_dom_scanner(&attr_scanner, &sgml_scanner_info, &token->string, SGML_STATE_ELEMENT, scanner->count_lines); if (dom_scanner_has_tokens(&attr_scanner)) parse_sgml_attributes(stack, &attr_scanner); } pop_dom_node(stack); skip_dom_scanner_token(scanner); break; case SGML_TOKEN_ENTITY: add_sgml_node(stack, DOM_NODE_ENTITY_REFERENCE, token); skip_dom_scanner_token(scanner); break; case SGML_TOKEN_SPACE: case SGML_TOKEN_TEXT: default: add_sgml_node(stack, DOM_NODE_TEXT, token); skip_dom_scanner_token(scanner); } } }