void pop_dom_node(struct dom_stack *stack) { struct dom_stack_state *state; int i; assert(stack); if (dom_stack_is_empty(stack)) return; state = get_dom_stack_top(stack); if (state->immutable) return; if (call_dom_stack_callbacks(stack, state, DOM_STACK_POP) || (stack->flags & DOM_STACK_FLAG_FREE_NODES)) done_dom_node(state->node); stack->depth--; assert(stack->depth >= 0); for (i = 0; i < stack->contexts_size; i++) { struct dom_stack_context *context = stack->contexts[i]; if (context->info->object_size) { void *state_data = get_dom_stack_state_data(context, state); memset(state_data, 0, context->info->object_size); } } memset(state, 0, sizeof(*state)); }
void pop_dom_nodes(struct dom_stack *stack, enum dom_node_type type, struct dom_string *string) { struct dom_stack_state *state; assert(stack); if (dom_stack_is_empty(stack)) return; state = search_dom_stack(stack, type, string); if (state) pop_dom_state(stack, state); }
void pop_dom_state(struct dom_stack *stack, struct dom_stack_state *target) { struct dom_stack_state *state; unsigned int pos; assert(stack); if (!target) return; if (dom_stack_is_empty(stack)) return; foreachback_dom_stack_state (stack, state, pos) { /* Don't pop past states marked immutable. */ if (state->immutable) break; /* Pop until the target state is reached. */ pop_dom_node(stack); if (state == target) break; } }
/* Parse a CSS3 selector and add selector nodes to the @select struct. */ static enum dom_code parse_dom_select(struct dom_select *select, struct dom_stack *stack, struct dom_string *string) { struct dom_scanner scanner; struct dom_select_node sel; init_dom_scanner(&scanner, &dom_css_scanner_info, string, 0, 0, 1, 0, 0); memset(&sel, 0, sizeof(sel)); while (dom_scanner_has_tokens(&scanner)) { struct dom_scanner_token *token = get_dom_scanner_token(&scanner); enum dom_code code; struct dom_select_node *select_node; assert(token); if (token->type == '{' || token->type == '}' || token->type == ';' || token->type == ',') break; /* Examine the selector fragment */ switch (token->type) { case CSS_TOKEN_IDENT: sel.node.type = DOM_NODE_ELEMENT; copy_dom_string(&sel.node.string, &token->string); if (dom_scanner_token_contains(token, "*")) sel.match.element |= DOM_SELECT_ELEMENT_UNIVERSAL; break; case CSS_TOKEN_HASH: case CSS_TOKEN_HEX_COLOR: /* ID fragment */ sel.node.type = DOM_NODE_ATTRIBUTE; sel.match.attribute |= DOM_SELECT_ATTRIBUTE_ID; /* Skip the leading '#'. */ skip_dom_scanner_token_char(token); break; case '[': sel.node.type = DOM_NODE_ATTRIBUTE; code = parse_dom_select_attribute(&sel, &scanner); if (code != DOM_CODE_OK) return code; break; case '.': token = get_next_dom_scanner_token(&scanner); if (!token || token->type != CSS_TOKEN_IDENT) return DOM_CODE_SYNTAX_ERR; sel.node.type = DOM_NODE_ATTRIBUTE; sel.match.attribute |= DOM_SELECT_ATTRIBUTE_SPACE_LIST; set_dom_string(&sel.node.string, "class", -1); copy_dom_string(&sel.node.data.attribute.value, &token->string); break; case ':': code = parse_dom_select_pseudo(select, &sel, &scanner); if (code != DOM_CODE_OK) return code; break; case '>': if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT) return DOM_CODE_SYNTAX_ERR; sel.match.element |= DOM_SELECT_RELATION_DIRECT_CHILD; break; case '+': if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT) return DOM_CODE_SYNTAX_ERR; sel.match.element |= DOM_SELECT_RELATION_DIRECT_ADJACENT; break; case '~': if (get_element_relation(&sel) != DOM_SELECT_RELATION_DESCENDANT) return DOM_CODE_SYNTAX_ERR; sel.match.element |= DOM_SELECT_RELATION_INDIRECT_ADJACENT; break; default: return DOM_CODE_SYNTAX_ERR; } skip_dom_scanner_token(&scanner); if (sel.node.type == DOM_NODE_UNKNOWN) continue; select_node = mem_calloc(1, sizeof(*select_node)); copy_struct(select_node, &sel); if (!dom_stack_is_empty(stack)) { struct dom_node *node = &select_node->node; struct dom_node *parent = get_dom_stack_top(stack)->node; struct dom_node_list **list = get_dom_node_list(parent, node); int sort = (node->type == DOM_NODE_ATTRIBUTE); int index; assertm(list != NULL, "Adding node to bad parent [%d -> %d]", node->type, parent->type); index = *list && (*list)->size > 0 && sort ? get_dom_node_map_index(*list, node) : -1; if (!add_to_dom_node_list(list, node, index)) { done_dom_node(node); return DOM_CODE_ALLOC_ERR; } node->parent = parent; } else { assert(!select->selector); select->selector = select_node; } code = push_dom_node(stack, &select_node->node); if (code != DOM_CODE_OK) return code; if (select_node->node.type != DOM_NODE_ELEMENT) pop_dom_node(stack); memset(&sel, 0, sizeof(sel)); } if (select->selector) return DOM_CODE_OK; return DOM_CODE_ERR; }
int main(int argc, char *argv[]) { struct sgml_parser *parser; enum sgml_document_type doctype = SGML_DOCTYPE_HTML; enum sgml_parser_flag flags = 0; enum sgml_parser_type type = SGML_PARSER_STREAM; enum dom_code code = 0; enum dom_config_flag normalize_flags = 0; struct dom_config config; int normalize = 0; int dump = 0; int complete = 1; size_t read_stdin = 0; struct dom_string uri = STATIC_DOM_STRING("dom://test"); struct dom_string source = STATIC_DOM_STRING("(no source)"); int i; for (i = 1; i < argc; i++) { char *arg = argv[i]; if (strncmp(arg, "--", 2)) break; arg += 2; if (get_test_opt(&arg, "uri", &i, argc, argv, "a URI")) { set_dom_string(&uri, arg, strlen((const char *)arg)); } else if (get_test_opt(&arg, "src", &i, argc, argv, "a string")) { set_dom_string(&source, arg, strlen((const char *)arg)); } else if (get_test_opt(&arg, "stdin", &i, argc, argv, "a number")) { read_stdin = atoi(arg); flags |= SGML_PARSER_INCREMENTAL; } else if (get_test_opt(&arg, "normalize", &i, argc, argv, "a string")) { normalize = 1; normalize_flags = parse_dom_config(arg, ','); type = SGML_PARSER_TREE; } else if (!strcmp(arg, "print-lines")) { flags |= SGML_PARSER_COUNT_LINES; } else if (!strcmp(arg, "incomplete")) { flags |= SGML_PARSER_INCREMENTAL; complete = 0; } else if (!strcmp(arg, "dump")) { type = SGML_PARSER_TREE; dump = 1; } else if (!strcmp(arg, "error")) { flags |= SGML_PARSER_DETECT_ERRORS; } else if (!strcmp(arg, "help")) { die(NULL); } else { die("Unknown argument '%s'", arg - 2); } } parser = init_sgml_parser(type, doctype, &uri, flags); if (!parser) return 1; parser->error_func = sgml_error_function; if (normalize) add_dom_config_normalizer(&parser->stack, &config, normalize_flags); else if (!dump) add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info); if (read_stdin > 0) { unsigned char *buffer; buffer = mem_alloc(read_stdin); if (!buffer) die("Cannot allocate buffer"); complete = 0; while (!complete) { size_t size = fread(buffer, 1, read_stdin, stdin); if (ferror(stdin)) die("error reading from stdin"); complete = feof(stdin); code = parse_sgml(parser, buffer, size, complete); switch (code) { case DOM_CODE_OK: break; case DOM_CODE_INCOMPLETE: if (!complete) break; /* Error */ default: complete = 1; } } mem_free(buffer); } else { code = parse_sgml(parser, source.string, source.length, complete); } if (parser->root) { assert(!complete || parser->stack.depth > 0); while (!dom_stack_is_empty(&parser->stack)) { get_dom_stack_top(&parser->stack)->immutable = 0; pop_dom_node(&parser->stack); } if (normalize || dump) { struct dom_stack stack; /* Note, that we cannot free nodes when walking the DOM * tree since walk_dom_node() uses an index to traverse * the tree. */ init_dom_stack(&stack, DOM_STACK_FLAG_NONE); /* XXX: This context needs to be added first because it * assumes the parser can be accessed via * stack->contexts[0].data. */ if (normalize) add_dom_stack_context(&stack, parser, &sgml_parser_test_context_info); else if (dump) add_sgml_file_dumper(&stack, stdout); walk_dom_nodes(&stack, parser->root); done_dom_stack(&stack); done_dom_node(parser->root); } } done_sgml_parser(parser); #ifdef DEBUG_MEMLEAK check_memory_leaks(); #endif return code != DOM_CODE_OK ? 1 : 0; }
/* FIXME: Instead of walking all nodes in the tree only visit those which are * of actual interest to the contexts on the stack. */ void walk_dom_nodes(struct dom_stack *stack, struct dom_node *root) { struct dom_stack_context *context; assert(root && stack); context = add_dom_stack_context(stack, NULL, &dom_stack_walk_context_info); if (!context) return; if (push_dom_node(stack, root) != DOM_CODE_OK) return; while (!dom_stack_is_empty(stack)) { struct dom_stack_state *state = get_dom_stack_top(stack); struct dom_stack_walk_state *wstate = get_dom_stack_state_data(context, state); struct dom_node_list *list = wstate->list; struct dom_node *node = state->node; switch (node->type) { case DOM_NODE_DOCUMENT: if (!list) list = node->data.document.children; break; case DOM_NODE_ELEMENT: if (!list) list = node->data.element.map; if (list == node->data.element.children) break; if (is_dom_node_list_member(list, wstate->index) && list == node->data.element.map) break; list = node->data.element.children; break; case DOM_NODE_PROCESSING_INSTRUCTION: if (!list) list = node->data.proc_instruction.map; break; case DOM_NODE_DOCUMENT_TYPE: if (!list) list = node->data.document_type.entities; if (list == node->data.document_type.notations) break; if (is_dom_node_list_member(list, wstate->index) && list == node->data.document_type.entities) break; list = node->data.document_type.notations; break; case DOM_NODE_ATTRIBUTE: case DOM_NODE_TEXT: case DOM_NODE_CDATA_SECTION: case DOM_NODE_COMMENT: case DOM_NODE_NOTATION: case DOM_NODE_DOCUMENT_FRAGMENT: case DOM_NODE_ENTITY_REFERENCE: case DOM_NODE_ENTITY: default: break; } /* Reset list state if it is a new list */ if (list != wstate->list) { wstate->list = list; wstate->index = 0; } /* If we have next child node */ if (is_dom_node_list_member(list, wstate->index)) { struct dom_node *child = list->entries[wstate->index++]; if (push_dom_node(stack, child) == DOM_CODE_OK) continue; } pop_dom_node(stack); } done_dom_stack_context(stack, context); }