int main() { hubbub_parser *parser; hubbub_parser_optparams params; assert(hubbub_parser_create("UTF-8", false, &parser) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; char* utf8Html = readFile("/Users/furture/code/litehtml/hello.html"); hubbub_parser_parse_chunk(parser,utf8Html, 14752 ) == HUBBUB_OK; const char *charset; hubbub_charset_source cssource; hubbub_parser_read_charset(parser, &cssource); printf("Charset: %s (from %d)\n", charset, cssource); hubbub_parser_destroy(parser); printf("Hello, World!\n"); return 0; }
static int run_test(int argc, char **argv, unsigned int CHUNK_SIZE) { hubbub_parser *parser; hubbub_parser_optparams params; FILE *fp; size_t len, origlen; uint8_t *buf = alloca(CHUNK_SIZE); const char *charset; hubbub_charset_source cssource; UNUSED(argc); assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); fp = fopen(argv[1], "rb"); if (fp == NULL) { printf("Failed opening %s\n", argv[1]); return 1; } fseek(fp, 0, SEEK_END); origlen = len = ftell(fp); fseek(fp, 0, SEEK_SET); while (len > 0) { ssize_t bytes_read = fread(buf, 1, CHUNK_SIZE, fp); if (bytes_read < 1) break; assert(hubbub_parser_parse_chunk(parser, buf, bytes_read) == HUBBUB_OK); len -= bytes_read; } assert(len == 0); fclose(fp); charset = hubbub_parser_read_charset(parser, &cssource); printf("Charset: %s (from %d)\n", charset, cssource); hubbub_parser_destroy(parser); printf("PASS\n"); return 0; }
void handle_hubbub_request(struct hubbubmsg_request *request) { hubbub_error err; hubbub_parser_optparams optparams; switch (request->type) { case HUBBUBMSG_CREATE_PARSER: if (parser) break; DEBUG(stderr, "create parser: %s\n", (char *)request->kind.create_parser_info.enc.data); err = hubbub_parser_create((char *)request->kind.create_parser_info.enc.data, request->kind.create_parser_info.fix_enc, myrealloc, NULL, &parser); DEBUG(stderr, "create parser error: %d\n", (int)err); optparams.tree_handler = &tree_handler; err = hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, &optparams); DEBUG(stderr, "setopt tree handler error: %d\n", (int)err); optparams.document_node = (void *)1; err = hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE, &optparams); DEBUG(stderr, "setopt tree handler error: %d\n", (int)err); break; case HUBBUBMSG_DESTROY_PARSER: if (!parser) break; DEBUG(stderr, "destroy parser\n"); hubbub_parser_destroy(parser); parser = NULL; break; case HUBBUBMSG_PARSE_CHUNK: DEBUG(stderr, "parse chunk: %s\n", (char *)request->kind.parse_chunk_info.data.data); if (!parser) { DEBUG(stderr, "no parser\n"); break; } hubbub_parser_parse_chunk(parser, request->kind.parse_chunk_info.data.data, request->kind.parse_chunk_info.data.len); } }
/* * Create, initialise, and return, a parser instance. */ static hubbub_parser *setup_parser(void) { hubbub_parser *parser; hubbub_parser_optparams params; assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) == HUBBUB_OK); params.tree_handler = &tree_handler; assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, ¶ms) == HUBBUB_OK); params.document_node = (void *)1; assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE, ¶ms) == HUBBUB_OK); params.enable_scripting = true; assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_ENABLE_SCRIPTING, ¶ms) == HUBBUB_OK); return parser; }
/** * Create a Hubbub parser instance * * \param params The binding creation parameters * \param parser Pointer to location to recive instance. * \param document Pointer to location to receive document. * \return Error code */ dom_hubbub_error dom_hubbub_parser_create(dom_hubbub_parser_params *params, dom_hubbub_parser **parser, dom_document **document) { dom_hubbub_parser *binding; hubbub_parser_optparams optparams; hubbub_error error; dom_exception err; dom_string *idname = NULL; /* check result parameters */ if (document == NULL) { return DOM_HUBBUB_BADPARM; } if (parser == NULL) { return DOM_HUBBUB_BADPARM; } /* setup binding parser context */ binding = malloc(sizeof(dom_hubbub_parser)); if (binding == NULL) { return DOM_HUBBUB_NOMEM; } binding->parser = NULL; binding->doc = NULL; binding->encoding = params->enc; if (params->enc != NULL) { binding->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_HEADER; } else { binding->encoding_source = DOM_HUBBUB_ENCODING_SOURCE_DETECTED; } binding->complete = false; if (params->msg == NULL) { binding->msg = dom_hubbub_parser_default_msg; } else { binding->msg = params->msg; } binding->mctx = params->ctx; /* ensure script function is valid or use the default */ if (params->script == NULL) { binding->script = dom_hubbub_parser_default_script; } else { binding->script = params->script; } /* create hubbub parser */ error = hubbub_parser_create(binding->encoding, params->fix_enc, dom_hubbub_alloc, NULL, &binding->parser); if (error != HUBBUB_OK) { free(binding); return (DOM_HUBBUB_HUBBUB_ERR | error); } /* create DOM document */ err = dom_implementation_create_document(DOM_IMPLEMENTATION_HTML, NULL, NULL, NULL, params->daf, params->ctx, &binding->doc); if (err != DOM_NO_ERR) { hubbub_parser_destroy(binding->parser); free(binding); return DOM_HUBBUB_DOM; } binding->tree_handler = tree_handler; binding->tree_handler.ctx = (void *)binding; /* set tree handler on parser */ optparams.tree_handler = &binding->tree_handler; hubbub_parser_setopt(binding->parser, HUBBUB_PARSER_TREE_HANDLER, &optparams); /* set document node*/ optparams.document_node = dom_node_ref((struct dom_node *)binding->doc); hubbub_parser_setopt(binding->parser, HUBBUB_PARSER_DOCUMENT_NODE, &optparams); /* set scripting state */ optparams.enable_scripting = params->enable_script; hubbub_parser_setopt(binding->parser, HUBBUB_PARSER_ENABLE_SCRIPTING, &optparams); /* set the document id parameter before the parse so searches * based on id succeed. */ err = dom_string_create_interned((const uint8_t *) "id", SLEN("id"), &idname); if (err != DOM_NO_ERR) { binding->msg(DOM_MSG_ERROR, binding->mctx, "Can't set DOM document id name"); hubbub_parser_destroy(binding->parser); free(binding); return DOM_HUBBUB_DOM; } _dom_document_set_id_name(binding->doc, idname); dom_string_unref(idname); /* set return parameters */ *document = (dom_document *)dom_node_ref(binding->doc); *parser = binding; return DOM_HUBBUB_OK; }