int main() { hubbub_parser *parser; hubbub_parser_optparams params; assert(hubbub_parser_create("UTF-8", false, &parser) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; char* utf8Html = readFile("/Users/furture/code/litehtml/hello.html"); hubbub_parser_parse_chunk(parser,utf8Html, 14752 ) == HUBBUB_OK; const char *charset; hubbub_charset_source cssource; hubbub_parser_read_charset(parser, &cssource); printf("Charset: %s (from %d)\n", charset, cssource); hubbub_parser_destroy(parser); printf("Hello, World!\n"); return 0; }
static int run_test(int argc, char **argv, unsigned int CHUNK_SIZE) { hubbub_parser *parser; hubbub_parser_optparams params; FILE *fp; size_t len, origlen; uint8_t *buf = alloca(CHUNK_SIZE); const char *charset; hubbub_charset_source cssource; UNUSED(argc); assert(hubbub_parser_create("UTF-8", false, myrealloc, NULL, &parser) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); fp = fopen(argv[1], "rb"); if (fp == NULL) { printf("Failed opening %s\n", argv[1]); return 1; } fseek(fp, 0, SEEK_END); origlen = len = ftell(fp); fseek(fp, 0, SEEK_SET); while (len > 0) { ssize_t bytes_read = fread(buf, 1, CHUNK_SIZE, fp); if (bytes_read < 1) break; assert(hubbub_parser_parse_chunk(parser, buf, bytes_read) == HUBBUB_OK); len -= bytes_read; } assert(len == 0); fclose(fp); charset = hubbub_parser_read_charset(parser, &cssource); printf("Charset: %s (from %d)\n", charset, cssource); hubbub_parser_destroy(parser); printf("PASS\n"); return 0; }
/** * Parse data with Hubbub parser * * \param parser The parser object * \param data The data to be parsed * \param len The length of the data to be parsed * \return DOM_HUBBUB_OK on success, * DOM_HUBBUB_HUBBUB_ERR | <hubbub_error> on failure */ dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser *parser, const uint8_t *data, size_t len) { hubbub_error err; err = hubbub_parser_parse_chunk(parser->parser, data, len); if (err != HUBBUB_OK) return DOM_HUBBUB_HUBBUB_ERR | err; return DOM_HUBBUB_OK; }
void handle_hubbub_request(struct hubbubmsg_request *request) { hubbub_error err; hubbub_parser_optparams optparams; switch (request->type) { case HUBBUBMSG_CREATE_PARSER: if (parser) break; DEBUG(stderr, "create parser: %s\n", (char *)request->kind.create_parser_info.enc.data); err = hubbub_parser_create((char *)request->kind.create_parser_info.enc.data, request->kind.create_parser_info.fix_enc, myrealloc, NULL, &parser); DEBUG(stderr, "create parser error: %d\n", (int)err); optparams.tree_handler = &tree_handler; err = hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER, &optparams); DEBUG(stderr, "setopt tree handler error: %d\n", (int)err); optparams.document_node = (void *)1; err = hubbub_parser_setopt(parser, HUBBUB_PARSER_DOCUMENT_NODE, &optparams); DEBUG(stderr, "setopt tree handler error: %d\n", (int)err); break; case HUBBUBMSG_DESTROY_PARSER: if (!parser) break; DEBUG(stderr, "destroy parser\n"); hubbub_parser_destroy(parser); parser = NULL; break; case HUBBUBMSG_PARSE_CHUNK: DEBUG(stderr, "parse chunk: %s\n", (char *)request->kind.parse_chunk_info.data.data); if (!parser) { DEBUG(stderr, "no parser\n"); break; } hubbub_parser_parse_chunk(parser, request->kind.parse_chunk_info.data.data, request->kind.parse_chunk_info.data.len); } }
int main(int argc, char **argv) { FILE *fp; char buf[4096]; size_t *chunks; size_t n_chunks; hubbub_parser *parser; uint32_t i; buf_t got = { NULL, 0, 0 }; if (argc != 2) { printf("Usage: %s <filename>\n", argv[0]); return 1; } fp = fopen(argv[1], "rb"); if (fp == NULL) { printf("Failed opening %s\n", argv[1]); return 1; } /* Format: * #chunks <n> * <n> lines * #data * <data> */ assert(fgets(buf, sizeof(buf), fp) != NULL); assert(strncmp(buf, "#chunks ", sizeof("#chunks ") - 1) == 0); n_chunks = atoi(buf + sizeof("#chunks ") - 1); chunks = malloc(n_chunks * sizeof(size_t)); assert(chunks != NULL); for (i = 0; i < n_chunks; i++) { assert(fgets(buf, sizeof(buf), fp) != NULL); chunks[i] = atoi(buf); } assert(fgets(buf, sizeof(buf), fp) != NULL); assert(strcmp(buf, "#data\n") == 0); parser = setup_parser(); for (i = 0; i < n_chunks; i++) { ssize_t bytes_read; assert(chunks[i] <= sizeof(buf)); bytes_read = fread(buf, 1, chunks[i], fp); assert((size_t)(bytes_read) == chunks[i]); assert(hubbub_parser_parse_chunk(parser, (uint8_t *) buf, chunks[i]) == HUBBUB_OK); } assert(hubbub_parser_completed(parser) == HUBBUB_OK); node_print(&got, Document, 0); printf("%s", got.buf); hubbub_parser_destroy(parser); while (Document) { node_t *victim = Document; Document = victim->next; delete_node(victim); } Document = NULL; printf("PASS\n"); fclose(fp); free(got.buf); return 0; }
int main(int argc, char **argv) { FILE *fp; char line[2048]; bool reprocess = false; bool passed = true; hubbub_parser *parser = NULL; enum reading_state state = EXPECT_DATA; buf_t expected = { NULL, 0, 0 }; buf_t got = { NULL, 0, 0 }; if (argc != 2) { printf("Usage: %s <filename>\n", argv[0]); return 1; } fp = fopen(argv[1], "rb"); if (fp == NULL) { printf("Failed opening %s\n", argv[1]); return 1; } /* We rely on lines not being anywhere near 2048 characters... */ while (reprocess || (passed && fgets(line, sizeof line, fp) == line)) { reprocess = false; switch (state) { case ERASE_DATA: buf_clear(&got); buf_clear(&expected); hubbub_parser_destroy(parser); while (Document) { node_t *victim = Document; Document = victim->next; delete_node(victim); } Document = NULL; state = EXPECT_DATA; case EXPECT_DATA: if (strcmp(line, "#data\n") == 0) { parser = setup_parser(); state = READING_DATA; } break; case READING_DATA: case READING_DATA_AFTER_FIRST: if (strcmp(line, "#errors\n") == 0) { assert(hubbub_parser_completed(parser) == HUBBUB_OK); state = READING_ERRORS; } else { size_t len = strlen(line); if (state == READING_DATA_AFTER_FIRST) { assert(hubbub_parser_parse_chunk(parser, (uint8_t *)"\n", 1) == HUBBUB_OK); } else { state = READING_DATA_AFTER_FIRST; } printf(": %s", line); assert(hubbub_parser_parse_chunk(parser, (uint8_t *)line, len - 1) == HUBBUB_OK); } break; case READING_ERRORS: if (strcmp(line, "#document-fragment\n") == 0) { state = ERASE_DATA; reprocess = true; } if (strcmp(line, "#document\n") == 0) state = READING_TREE; else { } break; case READING_TREE: if (strcmp(line, "#data\n") == 0) { node_print(&got, Document, 0); /* Trim off the last newline */ expected.buf[strlen(expected.buf) - 1] = '\0'; passed = !strcmp(got.buf, expected.buf); if (!passed) { printf("expected:\n"); printf("%s", expected.buf); printf("got:\n"); printf("%s", got.buf); } state = ERASE_DATA; reprocess = true; } else { buf_add(&expected, line); } break; } } if (Document != NULL) { node_print(&got, Document, 0); passed = !strcmp(got.buf, expected.buf); if (!passed) { printf("expected:\n"); printf("%s", expected.buf); printf("got:\n"); printf("%s", got.buf); } hubbub_parser_destroy(parser); while (Document) { node_t *victim = Document; Document = victim->next; delete_node(victim); } } printf("%s\n", passed ? "PASS" : "FAIL"); fclose(fp); free(got.buf); free(expected.buf); return 0; }