示例#1
0
文件: main.c 项目: gubaojian/trylearn
int main() {
    hubbub_parser *parser;
    hubbub_parser_optparams params;


    assert(hubbub_parser_create("UTF-8", false, &parser) == HUBBUB_OK);

    params.token_handler.handler = token_handler;
    params.token_handler.pw = NULL;




    char* utf8Html = readFile("/Users/furture/code/litehtml/hello.html");


    hubbub_parser_parse_chunk(parser,utf8Html, 14752 ) == HUBBUB_OK;


    const char *charset;
    hubbub_charset_source cssource;
    hubbub_parser_read_charset(parser, &cssource);
    printf("Charset: %s (from %d)\n", charset, cssource);

    hubbub_parser_destroy(parser);

    printf("Hello, World!\n");
    return 0;
}
示例#2
0
static int run_test(int argc, char **argv, unsigned int CHUNK_SIZE)
{
	hubbub_parser *parser;
	hubbub_parser_optparams params;
	FILE *fp;
	size_t len, origlen;
	uint8_t *buf = alloca(CHUNK_SIZE);
	const char *charset;
	hubbub_charset_source cssource;

	UNUSED(argc);

	assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) ==
			HUBBUB_OK);

	params.token_handler.handler = token_handler;
	params.token_handler.pw = NULL;
	assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER,
			&params) == HUBBUB_OK);

	fp = fopen(argv[1], "rb");
	if (fp == NULL) {
		printf("Failed opening %s\n", argv[1]);
		return 1;
	}

	fseek(fp, 0, SEEK_END);
	origlen = len = ftell(fp);
	fseek(fp, 0, SEEK_SET);

	while (len > 0) {
		ssize_t bytes_read = fread(buf, 1, CHUNK_SIZE, fp);
                
                if (bytes_read < 1)
                        break;
                
		assert(hubbub_parser_parse_chunk(parser,
				buf, bytes_read) == HUBBUB_OK);

		len -= bytes_read;
	}
        
        assert(len == 0);

	fclose(fp);

	charset = hubbub_parser_read_charset(parser, &cssource);

	printf("Charset: %s (from %d)\n", charset, cssource);

	hubbub_parser_destroy(parser);

	printf("PASS\n");

	return 0;
}
示例#3
0
/**
 * Destroy a Hubbub parser instance
 *
 * \param parser  The Hubbub parser object
 */
void dom_hubbub_parser_destroy(dom_hubbub_parser *parser)
{
	hubbub_parser_destroy(parser->parser);
	parser->parser = NULL;

	if (parser->doc != NULL) {
		dom_node_unref((struct dom_node *) parser->doc);
		parser->doc = NULL;
	}

	free(parser);
}
示例#4
0
void handle_hubbub_request(struct hubbubmsg_request *request) {
    hubbub_error err;
    hubbub_parser_optparams optparams;
    switch (request->type) {
    case HUBBUBMSG_CREATE_PARSER:
        if (parser)
            break;
        DEBUG(stderr, "create parser: %s\n", (char *)request->kind.create_parser_info.enc.data);
        err = hubbub_parser_create((char *)request->kind.create_parser_info.enc.data,
                                   request->kind.create_parser_info.fix_enc,
                                   myrealloc,
                                   NULL,
                                   &parser);
        DEBUG(stderr, "create parser error: %d\n", (int)err);

        optparams.tree_handler = &tree_handler;
        err = hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, &optparams);
        DEBUG(stderr, "setopt tree handler error: %d\n", (int)err);

        optparams.document_node = (void *)1;
        err = hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE,
                                   &optparams);
        DEBUG(stderr, "setopt tree handler error: %d\n", (int)err);
        break;
    case HUBBUBMSG_DESTROY_PARSER:
        if (!parser)
            break;
        DEBUG(stderr, "destroy parser\n");
        hubbub_parser_destroy(parser);
        parser = NULL;
        break;
    case HUBBUBMSG_PARSE_CHUNK:
        DEBUG(stderr, "parse chunk: %s\n", (char *)request->kind.parse_chunk_info.data.data);
        if (!parser) {
            DEBUG(stderr, "no parser\n");
            break;
        }
        hubbub_parser_parse_chunk(parser,
                                  request->kind.parse_chunk_info.data.data,
                                  request->kind.parse_chunk_info.data.len);
    }
}
示例#5
0
int main(int argc, char **argv)
{
    FILE *fp;
    char buf[4096];
    size_t *chunks;
    size_t n_chunks;
    hubbub_parser *parser;
    uint32_t i;

    buf_t got = { NULL, 0, 0 };

    if (argc != 2) {
        printf("Usage: %s <filename>\n", argv[0]);
        return 1;
    }

    fp = fopen(argv[1], "rb");
    if (fp == NULL) {
        printf("Failed opening %s\n", argv[1]);
        return 1;
    }

    /* Format:
     * #chunks <n>
     * <n> lines
     * #data
     * <data>
     */

    assert(fgets(buf, sizeof(buf), fp) != NULL);
    assert(strncmp(buf, "#chunks ", sizeof("#chunks ") - 1) == 0);
    n_chunks = atoi(buf + sizeof("#chunks ") - 1);

    chunks = malloc(n_chunks * sizeof(size_t));
    assert(chunks != NULL);

    for (i = 0; i < n_chunks; i++) {
        assert(fgets(buf, sizeof(buf), fp) != NULL);
        chunks[i] = atoi(buf);
    }

    assert(fgets(buf, sizeof(buf), fp) != NULL);
    assert(strcmp(buf, "#data\n") == 0);

    parser = setup_parser();

    for (i = 0; i < n_chunks; i++) {
        ssize_t bytes_read;
        assert(chunks[i] <= sizeof(buf));

        bytes_read = fread(buf, 1, chunks[i], fp);
        assert((size_t)(bytes_read) == chunks[i]);

        assert(hubbub_parser_parse_chunk(parser, (uint8_t *) buf,
                                         chunks[i]) == HUBBUB_OK);
    }

    assert(hubbub_parser_completed(parser) == HUBBUB_OK);

    node_print(&got, Document, 0);
    printf("%s", got.buf);

    hubbub_parser_destroy(parser);
    while (Document) {
        node_t *victim = Document;
        Document = victim->next;
        delete_node(victim);
    }
    Document = NULL;

    printf("PASS\n");

    fclose(fp);

    free(got.buf);

    return 0;
}
示例#6
0
/**
 * Create a Hubbub parser instance
 *
 * \param params The binding creation parameters
 * \param parser Pointer to location to recive instance.
 * \param document Pointer to location to receive document.
 * \return Error code
 */
dom_hubbub_error
dom_hubbub_parser_create(dom_hubbub_parser_params *params,
			 dom_hubbub_parser **parser,
			 dom_document **document)
{
	dom_hubbub_parser *binding;
	hubbub_parser_optparams optparams;
	hubbub_error error;
	dom_exception err;
	dom_string *idname = NULL;

	/* check result parameters */
	if (document == NULL) {
		return DOM_HUBBUB_BADPARM;
	}

	if (parser == NULL) {
		return DOM_HUBBUB_BADPARM;
	}

	/* setup binding parser context */
	binding = malloc(sizeof(dom_hubbub_parser));
	if (binding == NULL) {
		return DOM_HUBBUB_NOMEM;
	}

	binding->parser = NULL;
	binding->doc = NULL;
	binding->encoding = params->enc;

	if (params->enc != NULL) {
		binding->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_HEADER;
	} else {
		binding->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_DETECTED;
	}

	binding->complete = false;

	if (params->msg == NULL) {
		binding->msg = dom_hubbub_parser_default_msg;
	} else {
		binding->msg = params->msg;
	}
	binding->mctx = params->ctx;

	/* ensure script function is valid or use the default */
	if (params->script == NULL) {
		binding->script = dom_hubbub_parser_default_script;
	} else {
		binding->script = params->script;
	}

	/* create hubbub parser */
	error = hubbub_parser_create(binding->encoding,
				     params->fix_enc,
				     dom_hubbub_alloc,
				     NULL,
				     &binding->parser);
	if (error != HUBBUB_OK)	 {
		free(binding);
		return (DOM_HUBBUB_HUBBUB_ERR | error);
	}

	/* create DOM document */
	err = dom_implementation_create_document(DOM_IMPLEMENTATION_HTML,
						 NULL,
						 NULL,
						 NULL,
						 params->daf,
						 params->ctx,
						 &binding->doc);
	if (err != DOM_NO_ERR) {
		hubbub_parser_destroy(binding->parser);
		free(binding);
		return DOM_HUBBUB_DOM;
	}

	binding->tree_handler = tree_handler;
	binding->tree_handler.ctx = (void *)binding;

	/* set tree handler on parser */
	optparams.tree_handler = &binding->tree_handler;
	hubbub_parser_setopt(binding->parser,
			     HUBBUB_PARSER_TREE_HANDLER,
			     &optparams);

	/* set document node*/
	optparams.document_node = dom_node_ref((struct dom_node *)binding->doc);
	hubbub_parser_setopt(binding->parser,
			     HUBBUB_PARSER_DOCUMENT_NODE,
			     &optparams);

	/* set scripting state */
	optparams.enable_scripting = params->enable_script;
	hubbub_parser_setopt(binding->parser,
			     HUBBUB_PARSER_ENABLE_SCRIPTING,
			     &optparams);

	/* set the document id parameter before the parse so searches
	 * based on id succeed.
	 */
	err = dom_string_create_interned((const uint8_t *) "id",
					 SLEN("id"),
					 &idname);
	if (err != DOM_NO_ERR) {
		binding->msg(DOM_MSG_ERROR, binding->mctx, "Can't set DOM document id name");
		hubbub_parser_destroy(binding->parser);
		free(binding);
		return DOM_HUBBUB_DOM;
	}
	_dom_document_set_id_name(binding->doc, idname);
	dom_string_unref(idname);

	/* set return parameters */
	*document = (dom_document *)dom_node_ref(binding->doc);
	*parser = binding;

	return DOM_HUBBUB_OK;
}
示例#7
0
int main(int argc, char **argv)
{
	FILE *fp;
	char line[2048];

	bool reprocess = false;
	bool passed = true;

	hubbub_parser *parser = NULL;
	enum reading_state state = EXPECT_DATA;

	buf_t expected = { NULL, 0, 0 };
	buf_t got = { NULL, 0, 0 };


	if (argc != 2) {
		printf("Usage: %s <filename>\n", argv[0]);
		return 1;
	}

	fp = fopen(argv[1], "rb");
	if (fp == NULL) {
		printf("Failed opening %s\n", argv[1]);
		return 1;
	}

	/* We rely on lines not being anywhere near 2048 characters... */
	while (reprocess || (passed && fgets(line, sizeof line, fp) == line)) {
		reprocess = false;

		switch (state)
		{
		case ERASE_DATA:
			buf_clear(&got);
			buf_clear(&expected);

			hubbub_parser_destroy(parser);
			while (Document) {
				node_t *victim = Document;
				Document = victim->next;
				delete_node(victim);
			}
			Document = NULL;

			state = EXPECT_DATA;

 		case EXPECT_DATA:
			if (strcmp(line, "#data\n") == 0) {
				parser = setup_parser();
				state = READING_DATA;
			}
			break;

		case READING_DATA:
		case READING_DATA_AFTER_FIRST:
			if (strcmp(line, "#errors\n") == 0) {
				assert(hubbub_parser_completed(parser) == HUBBUB_OK);
				state = READING_ERRORS;
			} else {
				size_t len = strlen(line);

				if (state == READING_DATA_AFTER_FIRST) {
					assert(hubbub_parser_parse_chunk(parser,
						(uint8_t *)"\n",
						1) == HUBBUB_OK);
				} else {
					state = READING_DATA_AFTER_FIRST;
				}

				printf(": %s", line);
				assert(hubbub_parser_parse_chunk(parser, (uint8_t *)line,
						len - 1) == HUBBUB_OK);
			}
			break;


		case READING_ERRORS:
			if (strcmp(line, "#document-fragment\n") == 0) {
				state = ERASE_DATA;
				reprocess = true;
			}

			if (strcmp(line, "#document\n") == 0)
				state = READING_TREE;
			else {
			}
			break;

		case READING_TREE:
			if (strcmp(line, "#data\n") == 0) {
				node_print(&got, Document, 0);

				/* Trim off the last newline */
				expected.buf[strlen(expected.buf) - 1] = '\0';

				passed = !strcmp(got.buf, expected.buf);
				if (!passed) {
					printf("expected:\n");
					printf("%s", expected.buf);
					printf("got:\n");
					printf("%s", got.buf);
				}

				state = ERASE_DATA;
				reprocess = true;
			} else {
				buf_add(&expected, line);
			}
			break;
		}
	}

	if (Document != NULL) {
		node_print(&got, Document, 0);

		passed = !strcmp(got.buf, expected.buf);
		if (!passed) {
			printf("expected:\n");
			printf("%s", expected.buf);
			printf("got:\n");
			printf("%s", got.buf);
		}

		hubbub_parser_destroy(parser);
		while (Document) {
			node_t *victim = Document;
			Document = victim->next;
			delete_node(victim);
		}
	}

	printf("%s\n", passed ? "PASS" : "FAIL");

	fclose(fp);

	free(got.buf);
	free(expected.buf);

	return 0;
}