Exemple #1
0
int main() {
    hubbub_parser *parser;
    hubbub_parser_optparams params;


    assert(hubbub_parser_create("UTF-8", false, &parser) == HUBBUB_OK);

    params.token_handler.handler = token_handler;
    params.token_handler.pw = NULL;




    char* utf8Html = readFile("/Users/furture/code/litehtml/hello.html");


    hubbub_parser_parse_chunk(parser,utf8Html, 14752 ) == HUBBUB_OK;


    const char *charset;
    hubbub_charset_source cssource;
    hubbub_parser_read_charset(parser, &cssource);
    printf("Charset: %s (from %d)\n", charset, cssource);

    hubbub_parser_destroy(parser);

    printf("Hello, World!\n");
    return 0;
}
Exemple #2
0
static int run_test(int argc, char **argv, unsigned int CHUNK_SIZE)
{
	hubbub_parser *parser;
	hubbub_parser_optparams params;
	FILE *fp;
	size_t len, origlen;
	uint8_t *buf = alloca(CHUNK_SIZE);
	const char *charset;
	hubbub_charset_source cssource;

	UNUSED(argc);

	assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) ==
			HUBBUB_OK);

	params.token_handler.handler = token_handler;
	params.token_handler.pw = NULL;
	assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER,
			&params) == HUBBUB_OK);

	fp = fopen(argv[1], "rb");
	if (fp == NULL) {
		printf("Failed opening %s\n", argv[1]);
		return 1;
	}

	fseek(fp, 0, SEEK_END);
	origlen = len = ftell(fp);
	fseek(fp, 0, SEEK_SET);

	while (len > 0) {
		ssize_t bytes_read = fread(buf, 1, CHUNK_SIZE, fp);
                
                if (bytes_read < 1)
                        break;
                
		assert(hubbub_parser_parse_chunk(parser,
				buf, bytes_read) == HUBBUB_OK);

		len -= bytes_read;
	}
        
        assert(len == 0);

	fclose(fp);

	charset = hubbub_parser_read_charset(parser, &cssource);

	printf("Charset: %s (from %d)\n", charset, cssource);

	hubbub_parser_destroy(parser);

	printf("PASS\n");

	return 0;
}
Exemple #3
0
static hubbub_error change_encoding(void *parser, const char *charset)
{
	dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
	uint32_t source;
	const char *name;

	/* If we have an encoding here, it means we are *certain* */
	if (dom_parser->encoding != NULL) {
		return HUBBUB_OK;
	}

	/* Find the confidence otherwise (can only be from a BOM) */
	name = hubbub_parser_read_charset(dom_parser->parser, &source);

	if (source == HUBBUB_CHARSET_CONFIDENT) {
		dom_parser->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_DETECTED;
		dom_parser->encoding = charset;
		return HUBBUB_OK;
	}

	/* So here we have something of confidence tentative... */
	/* http://www.whatwg.org/specs/web-apps/current-work/#change */

	/* 2. "If the new encoding is identical or equivalent to the encoding
	 * that is already being used to interpret the input stream, then set
	 * the confidence to confident and abort these steps." */

	/* Whatever happens, the encoding should be set here; either for
	 * reprocessing with a different charset, or for confirming that the
	 * charset is in fact correct */
	dom_parser->encoding = charset;
	dom_parser->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_META;

	/* Equal encodings will have the same string pointers */
	return (charset == name) ? HUBBUB_OK : HUBBUB_ENCODINGCHANGE;
}