struct sgml_parser * init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype, struct dom_string *uri) { struct sgml_parser *parser; enum dom_stack_flag flags = 0; parser = mem_calloc(1, sizeof(*parser)); if (!parser) return NULL; if (!init_dom_string(&parser->uri, uri->string, uri->length)) { mem_free(parser); return NULL; } parser->type = type; parser->info = get_sgml_info(doctype); if (type == SGML_PARSER_TREE) flags |= DOM_STACK_KEEP_NODES; init_dom_stack(&parser->stack, flags); /* FIXME: Some sgml backend specific callbacks? Handle HTML script tags, * and feed document.write() data back to the parser. */ add_dom_stack_context(&parser->stack, parser, &sgml_parser_context_info); /* Don't keep the 'fake' text nodes that holds the parsing data. */ init_dom_stack(&parser->parsing, 0); add_dom_stack_context(&parser->parsing, parser, &sgml_parsing_context_info); return parser; }
/* Basically this is just a wrapper for parse_dom_select() to ease error * handling. */ struct dom_select * init_dom_select(enum dom_select_syntax syntax, struct dom_string *string) { struct dom_select *select = mem_calloc(1, sizeof(select)); struct dom_stack stack; enum dom_code code; init_dom_stack(&stack, DOM_STACK_FLAG_NONE); add_dom_stack_tracer(&stack, "init-select: "); code = parse_dom_select(select, &stack, string); done_dom_stack(&stack); if (code == DOM_CODE_OK) return select; done_dom_select(select); return NULL; }
int main(int argc, char *argv[]) { struct sgml_parser *parser; enum sgml_document_type doctype = SGML_DOCTYPE_HTML; enum sgml_parser_flag flags = 0; enum sgml_parser_type type = SGML_PARSER_STREAM; enum dom_code code = 0; enum dom_config_flag normalize_flags = 0; struct dom_config config; int normalize = 0; int dump = 0; int complete = 1; size_t read_stdin = 0; struct dom_string uri = STATIC_DOM_STRING("dom://test"); struct dom_string source = STATIC_DOM_STRING("(no source)"); int i; for (i = 1; i < argc; i++) { char *arg = argv[i]; if (strncmp(arg, "--", 2)) break; arg += 2; if (get_test_opt(&arg, "uri", &i, argc, argv, "a URI")) { set_dom_string(&uri, arg, strlen((const char *)arg)); } else if (get_test_opt(&arg, "src", &i, argc, argv, "a string")) { set_dom_string(&source, arg, strlen((const char *)arg)); } else if (get_test_opt(&arg, "stdin", &i, argc, argv, "a number")) { read_stdin = atoi(arg); flags |= SGML_PARSER_INCREMENTAL; } else if (get_test_opt(&arg, "normalize", &i, argc, argv, "a string")) { normalize = 1; normalize_flags = parse_dom_config(arg, ','); type = SGML_PARSER_TREE; } else if (!strcmp(arg, "print-lines")) { flags |= SGML_PARSER_COUNT_LINES; } else if (!strcmp(arg, "incomplete")) { flags |= SGML_PARSER_INCREMENTAL; complete = 0; } else if (!strcmp(arg, "dump")) { type = SGML_PARSER_TREE; dump = 1; } else if (!strcmp(arg, "error")) { flags |= SGML_PARSER_DETECT_ERRORS; } else if (!strcmp(arg, "help")) { die(NULL); } else { die("Unknown argument '%s'", arg - 2); } } parser = init_sgml_parser(type, doctype, &uri, flags); if (!parser) return 1; parser->error_func = sgml_error_function; if (normalize) add_dom_config_normalizer(&parser->stack, &config, normalize_flags); else if (!dump) add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info); if (read_stdin > 0) { unsigned char *buffer; buffer = mem_alloc(read_stdin); if (!buffer) die("Cannot allocate buffer"); complete = 0; while (!complete) { size_t size = fread(buffer, 1, read_stdin, stdin); if (ferror(stdin)) die("error reading from stdin"); complete = feof(stdin); code = parse_sgml(parser, buffer, size, complete); switch (code) { case DOM_CODE_OK: break; case DOM_CODE_INCOMPLETE: if (!complete) break; /* Error */ default: complete = 1; } } mem_free(buffer); } else { code = parse_sgml(parser, source.string, source.length, complete); } if (parser->root) { assert(!complete || parser->stack.depth > 0); while (!dom_stack_is_empty(&parser->stack)) { get_dom_stack_top(&parser->stack)->immutable = 0; pop_dom_node(&parser->stack); } if (normalize || dump) { struct dom_stack stack; /* Note, that we cannot free nodes when walking the DOM * tree since walk_dom_node() uses an index to traverse * the tree. */ init_dom_stack(&stack, DOM_STACK_FLAG_NONE); /* XXX: This context needs to be added first because it * assumes the parser can be accessed via * stack->contexts[0].data. */ if (normalize) add_dom_stack_context(&stack, parser, &sgml_parser_test_context_info); else if (dump) add_sgml_file_dumper(&stack, stdout); walk_dom_nodes(&stack, parser->root); done_dom_stack(&stack); done_dom_node(parser->root); } } done_sgml_parser(parser); #ifdef DEBUG_MEMLEAK check_memory_leaks(); #endif return code != DOM_CODE_OK ? 1 : 0; }