Esempio n. 1
0
struct sgml_parser *
init_sgml_parser(enum sgml_parser_type type, enum sgml_document_type doctype,
		 struct dom_string *uri)
{
	struct sgml_parser *parser;
	enum dom_stack_flag flags = 0;

	parser = mem_calloc(1, sizeof(*parser));
	if (!parser) return NULL;

	if (!init_dom_string(&parser->uri, uri->string, uri->length)) {
		mem_free(parser);
		return NULL;
	}

	parser->type = type;
	parser->info = get_sgml_info(doctype);

	if (type == SGML_PARSER_TREE)
		flags |= DOM_STACK_KEEP_NODES;

	init_dom_stack(&parser->stack, flags);
	/* FIXME: Some sgml backend specific callbacks? Handle HTML script tags,
	 * and feed document.write() data back to the parser. */
	add_dom_stack_context(&parser->stack, parser, &sgml_parser_context_info);

	/* Don't keep the 'fake' text nodes that holds the parsing data. */
	init_dom_stack(&parser->parsing, 0);
	add_dom_stack_context(&parser->parsing, parser, &sgml_parsing_context_info);

	return parser;
}
Esempio n. 2
0
File: select.c Progetto: ezc/elinks
/* Basically this is just a wrapper for parse_dom_select() to ease error
 * handling. */
struct dom_select *
init_dom_select(enum dom_select_syntax syntax, struct dom_string *string)
{
	struct dom_select *select = mem_calloc(1, sizeof(select));
	struct dom_stack stack;
	enum dom_code code;

	init_dom_stack(&stack, DOM_STACK_FLAG_NONE);
	add_dom_stack_tracer(&stack, "init-select: ");

	code = parse_dom_select(select, &stack, string);
	done_dom_stack(&stack);

	if (code == DOM_CODE_OK)
		return select;

	done_dom_select(select);

	return NULL;
}
Esempio n. 3
0
int
main(int argc, char *argv[])
{
	struct sgml_parser *parser;
	enum sgml_document_type doctype = SGML_DOCTYPE_HTML;
	enum sgml_parser_flag flags = 0;
	enum sgml_parser_type type = SGML_PARSER_STREAM;
	enum dom_code code = 0;
	enum dom_config_flag normalize_flags = 0;
	struct dom_config config;
	int normalize = 0;
	int dump = 0;
	int complete = 1;
	size_t read_stdin = 0;
	struct dom_string uri = STATIC_DOM_STRING("dom://test");
	struct dom_string source = STATIC_DOM_STRING("(no source)");
	int i;

	for (i = 1; i < argc; i++) {
		char *arg = argv[i];

		if (strncmp(arg, "--", 2))
			break;

		arg += 2;

		if (get_test_opt(&arg, "uri", &i, argc, argv, "a URI")) {
			set_dom_string(&uri, arg, strlen((const char *)arg));

		} else if (get_test_opt(&arg, "src", &i, argc, argv, "a string")) {
			set_dom_string(&source, arg, strlen((const char *)arg));

		} else if (get_test_opt(&arg, "stdin", &i, argc, argv, "a number")) {
			read_stdin = atoi(arg);
			flags |= SGML_PARSER_INCREMENTAL;

		} else if (get_test_opt(&arg, "normalize", &i, argc, argv, "a string")) {
			normalize = 1;
			normalize_flags = parse_dom_config(arg, ',');
			type = SGML_PARSER_TREE;

		} else if (!strcmp(arg, "print-lines")) {
			flags |= SGML_PARSER_COUNT_LINES;

		} else if (!strcmp(arg, "incomplete")) {
			flags |= SGML_PARSER_INCREMENTAL;
			complete = 0;

		} else if (!strcmp(arg, "dump")) {
			type = SGML_PARSER_TREE;
			dump = 1;

		} else if (!strcmp(arg, "error")) {
			flags |= SGML_PARSER_DETECT_ERRORS;

		} else if (!strcmp(arg, "help")) {
			die(NULL);

		} else {
			die("Unknown argument '%s'", arg - 2);
		}
	}

	parser = init_sgml_parser(type, doctype, &uri, flags);
	if (!parser) return 1;

	parser->error_func = sgml_error_function;
	if (normalize)
		add_dom_config_normalizer(&parser->stack, &config, normalize_flags);
	else if (!dump)
		add_dom_stack_context(&parser->stack, NULL, &sgml_parser_test_context_info);

	if (read_stdin > 0) {
		unsigned char *buffer;

		buffer = mem_alloc(read_stdin);
		if (!buffer)
			die("Cannot allocate buffer");

		complete = 0;

		while (!complete) {
			size_t size = fread(buffer, 1, read_stdin, stdin);

			if (ferror(stdin))
				die("error reading from stdin");

			complete = feof(stdin);

			code = parse_sgml(parser, buffer, size, complete);
			switch (code) {
			case DOM_CODE_OK:
				break;

			case DOM_CODE_INCOMPLETE:
				if (!complete) break;
				/* Error */
			default:
				complete = 1;
			}
		}

		mem_free(buffer);

	} else {
		code = parse_sgml(parser, source.string, source.length, complete);
	}

	if (parser->root) {
		assert(!complete || parser->stack.depth > 0);

		while (!dom_stack_is_empty(&parser->stack)) {
			get_dom_stack_top(&parser->stack)->immutable = 0;
			pop_dom_node(&parser->stack);
		}

		if (normalize || dump) {
			struct dom_stack stack;

			/* Note, that we cannot free nodes when walking the DOM
			 * tree since walk_dom_node() uses an index to traverse
			 * the tree. */
			init_dom_stack(&stack, DOM_STACK_FLAG_NONE);
			/* XXX: This context needs to be added first because it
			 * assumes the parser can be accessed via
			 * stack->contexts[0].data. */
			if (normalize)
				add_dom_stack_context(&stack, parser, &sgml_parser_test_context_info);
			else if (dump)
				add_sgml_file_dumper(&stack, stdout);
			walk_dom_nodes(&stack, parser->root);
			done_dom_stack(&stack);
			done_dom_node(parser->root);
		}
	}

	done_sgml_parser(parser);
#ifdef DEBUG_MEMLEAK
	check_memory_leaks();
#endif

	return code != DOM_CODE_OK ? 1 : 0;
}