void run_test(context *ctx) { parserutils_inputstream *stream; hubbub_tokeniser *tok; hubbub_tokeniser_optparams params; int i, max_i; struct array_list *outputsave = ctx->output; if (ctx->content_model == NULL) { max_i = 1; } else { max_i = array_list_length(ctx->content_model); } /* We test for each of the content models specified */ for (i = 0; i < max_i; i++) { /* Reset expected output */ ctx->output = outputsave; ctx->output_index = 0; ctx->char_off = 0; assert(parserutils_inputstream_create("UTF-8", 0, NULL, &stream) == PARSERUTILS_OK); assert(hubbub_tokeniser_create(stream, &tok) == HUBBUB_OK); if (ctx->last_start_tag != NULL) { /* Fake up a start tag, in PCDATA state */ size_t len = strlen(ctx->last_start_tag) + 3; uint8_t *buf = malloc(len); snprintf((char *) buf, len, "<%s>", ctx->last_start_tag); assert(parserutils_inputstream_append(stream, buf, len - 1) == PARSERUTILS_OK); assert(hubbub_tokeniser_run(tok) == HUBBUB_OK); free(buf); } if (ctx->process_cdata) { params.process_cdata = ctx->process_cdata; assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_PROCESS_CDATA, ¶ms) == HUBBUB_OK); } params.token_handler.handler = token_handler; params.token_handler.pw = ctx; assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); if (ctx->content_model == NULL) { params.content_model.model = HUBBUB_CONTENT_MODEL_PCDATA; } else { const char *cm = json_object_get_string( (struct json_object *) array_list_get_idx(ctx->content_model, i)); if (strcmp(cm, "PCDATA") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_PCDATA; } else if (strcmp(cm, "RCDATA") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_RCDATA; } else if (strcmp(cm, "CDATA") == 0) { params.content_model.model = HUBBUB_CONTENT_MODEL_CDATA; } else { params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT; } } assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_CONTENT_MODEL, ¶ms) == HUBBUB_OK); assert(parserutils_inputstream_append(stream, ctx->input, ctx->input_len) == PARSERUTILS_OK); assert(parserutils_inputstream_append(stream, NULL, 0) == PARSERUTILS_OK); printf("Input: '%.*s' (%d)\n", (int) ctx->input_len, (const char *) ctx->input, (int) ctx->input_len); assert(hubbub_tokeniser_run(tok) == HUBBUB_OK); hubbub_tokeniser_destroy(tok); parserutils_inputstream_destroy(stream); } }
/** * Configure a hubbub parser * * \param parser Parser instance to configure * \param type Option to set * \param params Option-specific parameters * \return HUBBUB_OK on success, appropriate error otherwise */ hubbub_error hubbub_parser_setopt(hubbub_parser *parser, hubbub_parser_opttype type, hubbub_parser_optparams *params) { hubbub_error result = HUBBUB_OK; if (parser == NULL || params == NULL) return HUBBUB_BADPARM; switch (type) { case HUBBUB_PARSER_TOKEN_HANDLER: if (parser->tb != NULL) { /* Client is defining their own token handler, * so we must destroy the default treebuilder */ hubbub_treebuilder_destroy(parser->tb); parser->tb = NULL; } result = hubbub_tokeniser_setopt(parser->tok, HUBBUB_TOKENISER_TOKEN_HANDLER, (hubbub_tokeniser_optparams *) params); break; case HUBBUB_PARSER_ERROR_HANDLER: /* The error handler does not cascade, so tell both the * treebuilder (if extant) and the tokeniser. */ if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, HUBBUB_TREEBUILDER_ERROR_HANDLER, (hubbub_treebuilder_optparams *) params); } if (result == HUBBUB_OK) { result = hubbub_tokeniser_setopt(parser->tok, HUBBUB_TOKENISER_ERROR_HANDLER, (hubbub_tokeniser_optparams *) params); } break; case HUBBUB_PARSER_CONTENT_MODEL: result = hubbub_tokeniser_setopt(parser->tok, HUBBUB_TOKENISER_CONTENT_MODEL, (hubbub_tokeniser_optparams *) params); break; case HUBBUB_PARSER_PAUSE: result = hubbub_tokeniser_setopt(parser->tok, HUBBUB_TOKENISER_PAUSE, (hubbub_tokeniser_optparams *) params); break; case HUBBUB_PARSER_TREE_HANDLER: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, HUBBUB_TREEBUILDER_TREE_HANDLER, (hubbub_treebuilder_optparams *) params); } break; case HUBBUB_PARSER_DOCUMENT_NODE: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, HUBBUB_TREEBUILDER_DOCUMENT_NODE, (hubbub_treebuilder_optparams *) params); } break; case HUBBUB_PARSER_ENABLE_SCRIPTING: if (parser->tb != NULL) { result = hubbub_treebuilder_setopt(parser->tb, HUBBUB_TREEBUILDER_ENABLE_SCRIPTING, (hubbub_treebuilder_optparams *) params); } break; default: result = HUBBUB_INVALID; } return result; }
int main(int argc, char **argv) { parserutils_inputstream *stream; hubbub_tokeniser *tok; hubbub_tokeniser_optparams params; FILE *fp; size_t len, origlen; #define CHUNK_SIZE (4096) uint8_t buf[CHUNK_SIZE]; if (argc != 2) { printf("Usage: %s <filename>\n", argv[0]); return 1; } assert(parserutils_inputstream_create("UTF-8", 0, NULL, myrealloc, NULL, &stream) == PARSERUTILS_OK); assert(hubbub_tokeniser_create(stream, myrealloc, NULL, &tok) == HUBBUB_OK); params.token_handler.handler = token_handler; params.token_handler.pw = NULL; assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_TOKEN_HANDLER, ¶ms) == HUBBUB_OK); fp = fopen(argv[1], "rb"); if (fp == NULL) { printf("Failed opening %s\n", argv[1]); return 1; } fseek(fp, 0, SEEK_END); origlen = len = ftell(fp); fseek(fp, 0, SEEK_SET); while (len > 0) { ssize_t bytes_read = fread(buf, 1, CHUNK_SIZE, fp); if (bytes_read < 1) break; assert(parserutils_inputstream_append(stream, buf, bytes_read) == HUBBUB_OK); len -= bytes_read; assert(hubbub_tokeniser_run(tok) == HUBBUB_OK); } assert(len == 0); fclose(fp); hubbub_tokeniser_destroy(tok); parserutils_inputstream_destroy(stream); printf("PASS\n"); return 0; }