int main() { hubbub_parser *parser; hubbub_parser_optparams params; assert(hubbub_parser_create("UTF-8", false, &parser) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; char* utf8Html = readFile("/Users/furture/code/litehtml/hello.html"); hubbub_parser_parse_chunk(parser,utf8Html, 14752 ) == HUBBUB_OK; const char *charset; hubbub_charset_source cssource; hubbub_parser_read_charset(parser, &cssource); printf("Charset: %s (from %d)\n", charset, cssource); hubbub_parser_destroy(parser); printf("Hello, World!\n"); return 0; }
static int run_test(int argc, char **argv, unsigned int CHUNK_SIZE) { hubbub_parser *parser; hubbub_parser_optparams params; FILE *fp; size_t len, origlen; uint8_t *buf = alloca(CHUNK_SIZE); const char *charset; hubbub_charset_source cssource; UNUSED(argc); assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); fp = fopen(argv[1], "rb"); if (fp == NULL) { printf("Failed opening %s\n", argv[1]); return 1; } fseek(fp, 0, SEEK_END); origlen = len = ftell(fp); fseek(fp, 0, SEEK_SET); while (len > 0) { ssize_t bytes_read = fread(buf, 1, CHUNK_SIZE, fp); if (bytes_read < 1) break; assert(hubbub_parser_parse_chunk(parser, buf, bytes_read) == HUBBUB_OK); len -= bytes_read; } assert(len == 0); fclose(fp); charset = hubbub_parser_read_charset(parser, &cssource); printf("Charset: %s (from %d)\n", charset, cssource); hubbub_parser_destroy(parser); printf("PASS\n"); return 0; }
static hubbub_error change_encoding(void *parser, const char *charset) { dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser; uint32_t source; const char *name; /* If we have an encoding here, it means we are *certain* */ if (dom_parser->encoding != NULL) { return HUBBUB_OK; } /* Find the confidence otherwise (can only be from a BOM) */ name = hubbub_parser_read_charset(dom_parser->parser, &source); if (source == HUBBUB_CHARSET_CONFIDENT) { dom_parser->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_DETECTED; dom_parser->encoding = charset; return HUBBUB_OK; } /* So here we have something of confidence tentative... */ /* http://www.whatwg.org/specs/web-apps/current-work/#change */ /* 2. "If the new encoding is identical or equivalent to the encoding * that is already being used to interpret the input stream, then set * the confidence to confident and abort these steps." */ /* Whatever happens, the encoding should be set here; either for * reprocessing with a different charset, or for confirming that the * charset is in fact correct */ dom_parser->encoding = charset; dom_parser->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_META; /* Equal encodings will have the same string pointers */ return (charset == name) ? HUBBUB_OK : HUBBUB_ENCODINGCHANGE; }