Esempio n. 1
0
void run_test(context *ctx)
{
	parserutils_inputstream *stream;
	hubbub_tokeniser *tok;
	hubbub_tokeniser_optparams params;
	int i, max_i;
	struct array_list *outputsave = ctx->output;

	if (ctx->content_model == NULL) {
		max_i = 1;
	} else {
		max_i = array_list_length(ctx->content_model);
	}

	/* We test for each of the content models specified */
	for (i = 0; i < max_i; i++) {
		/* Reset expected output */
		ctx->output = outputsave;
		ctx->output_index = 0;
		ctx->char_off = 0;

		assert(parserutils_inputstream_create("UTF-8", 0, NULL,
				&stream) == PARSERUTILS_OK);

		assert(hubbub_tokeniser_create(stream, &tok) == HUBBUB_OK);

		if (ctx->last_start_tag != NULL) {
			/* Fake up a start tag, in PCDATA state */
			size_t len = strlen(ctx->last_start_tag) + 3;
			uint8_t *buf = malloc(len);

			snprintf((char *) buf, len, "<%s>", 
					ctx->last_start_tag);

			assert(parserutils_inputstream_append(stream,
				buf, len - 1) == PARSERUTILS_OK);

			assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);

			free(buf);
		}

		if (ctx->process_cdata) {
			params.process_cdata = ctx->process_cdata;
			assert(hubbub_tokeniser_setopt(tok,
					HUBBUB_TOKENISER_PROCESS_CDATA,
					&params) == HUBBUB_OK);
		}

		params.token_handler.handler = token_handler;
		params.token_handler.pw = ctx;
		assert(hubbub_tokeniser_setopt(tok,
				HUBBUB_TOKENISER_TOKEN_HANDLER,
				&params) == HUBBUB_OK);

		if (ctx->content_model == NULL) {
			params.content_model.model =
					HUBBUB_CONTENT_MODEL_PCDATA;
		} else {
			const char *cm = json_object_get_string(
				(struct json_object *)
				array_list_get_idx(ctx->content_model, i));

			if (strcmp(cm, "PCDATA") == 0) {
				params.content_model.model =
						HUBBUB_CONTENT_MODEL_PCDATA;
			} else if (strcmp(cm, "RCDATA") == 0) {
				params.content_model.model =
						HUBBUB_CONTENT_MODEL_RCDATA;
			} else if (strcmp(cm, "CDATA") == 0) {
				params.content_model.model =
						HUBBUB_CONTENT_MODEL_CDATA;
			} else {
				params.content_model.model =
					HUBBUB_CONTENT_MODEL_PLAINTEXT;
			}
		}
		assert(hubbub_tokeniser_setopt(tok,
				HUBBUB_TOKENISER_CONTENT_MODEL,
				&params) == HUBBUB_OK);

		assert(parserutils_inputstream_append(stream,
				ctx->input, ctx->input_len) == PARSERUTILS_OK);

		assert(parserutils_inputstream_append(stream, NULL, 0) ==
				PARSERUTILS_OK);

		printf("Input: '%.*s' (%d)\n", (int) ctx->input_len,
				(const char *) ctx->input, 
				(int) ctx->input_len);

		assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);

		hubbub_tokeniser_destroy(tok);

		parserutils_inputstream_destroy(stream);
	}
}
Esempio n. 2
0
/**
 * Configure a hubbub parser
 *
 * \param parser  Parser instance to configure
 * \param type    Option to set
 * \param params  Option-specific parameters
 * \return HUBBUB_OK on success, appropriate error otherwise
 */
hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
		hubbub_parser_opttype type,
		hubbub_parser_optparams *params)
{
	hubbub_error result = HUBBUB_OK;

	if (parser == NULL || params == NULL)
		return HUBBUB_BADPARM;

	switch (type) {
	case HUBBUB_PARSER_TOKEN_HANDLER:
		if (parser->tb != NULL) {
			/* Client is defining their own token handler,
			 * so we must destroy the default treebuilder */
			hubbub_treebuilder_destroy(parser->tb);
			parser->tb = NULL;
		}
		result = hubbub_tokeniser_setopt(parser->tok,
				HUBBUB_TOKENISER_TOKEN_HANDLER,
				(hubbub_tokeniser_optparams *) params);
		break;

	case HUBBUB_PARSER_ERROR_HANDLER:
		/* The error handler does not cascade, so tell both the
		 * treebuilder (if extant) and the tokeniser. */
		if (parser->tb != NULL) {
			result = hubbub_treebuilder_setopt(parser->tb,
					HUBBUB_TREEBUILDER_ERROR_HANDLER,
					(hubbub_treebuilder_optparams *) params);
		}
		if (result == HUBBUB_OK) {
			result = hubbub_tokeniser_setopt(parser->tok,
					HUBBUB_TOKENISER_ERROR_HANDLER,
					(hubbub_tokeniser_optparams *) params);
		}
		break;

	case HUBBUB_PARSER_CONTENT_MODEL:
		result = hubbub_tokeniser_setopt(parser->tok,
				HUBBUB_TOKENISER_CONTENT_MODEL,
				(hubbub_tokeniser_optparams *) params);
		break;

	case HUBBUB_PARSER_PAUSE:
		result = hubbub_tokeniser_setopt(parser->tok,
				HUBBUB_TOKENISER_PAUSE,
				(hubbub_tokeniser_optparams *) params);
		break;

	case HUBBUB_PARSER_TREE_HANDLER:
		if (parser->tb != NULL) {
			result = hubbub_treebuilder_setopt(parser->tb,
					HUBBUB_TREEBUILDER_TREE_HANDLER,
					(hubbub_treebuilder_optparams *) params);
		}
		break;

	case HUBBUB_PARSER_DOCUMENT_NODE:
		if (parser->tb != NULL) {
			result = hubbub_treebuilder_setopt(parser->tb,
					HUBBUB_TREEBUILDER_DOCUMENT_NODE,
					(hubbub_treebuilder_optparams *) params);
		}
		break;

	case HUBBUB_PARSER_ENABLE_SCRIPTING:
		if (parser->tb != NULL) {
			result = hubbub_treebuilder_setopt(parser->tb,
					HUBBUB_TREEBUILDER_ENABLE_SCRIPTING,
					(hubbub_treebuilder_optparams *) params);
		}
		break;

	default:
		result = HUBBUB_INVALID;
	}

	return result;
}
Esempio n. 3
0
int main(int argc, char **argv)
{
    parserutils_inputstream *stream;
    hubbub_tokeniser *tok;
    hubbub_tokeniser_optparams params;
    FILE *fp;
    size_t len, origlen;
#define CHUNK_SIZE (4096)
    uint8_t buf[CHUNK_SIZE];

    if (argc != 2) {
        printf("Usage: %s <filename>\n", argv[0]);
        return 1;
    }

    assert(parserutils_inputstream_create("UTF-8", 0, NULL,
                                          myrealloc, NULL, &stream) == PARSERUTILS_OK);

    assert(hubbub_tokeniser_create(stream, myrealloc, NULL, &tok) ==
           HUBBUB_OK);

    params.token_handler.handler = token_handler;
    params.token_handler.pw = NULL;
    assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_TOKEN_HANDLER,
                                   &params) == HUBBUB_OK);

    fp = fopen(argv[1], "rb");
    if (fp == NULL) {
        printf("Failed opening %s\n", argv[1]);
        return 1;
    }

    fseek(fp, 0, SEEK_END);
    origlen = len = ftell(fp);
    fseek(fp, 0, SEEK_SET);

    while (len > 0) {
        ssize_t bytes_read = fread(buf, 1, CHUNK_SIZE, fp);

        if (bytes_read < 1)
            break;

        assert(parserutils_inputstream_append(stream,
                                              buf, bytes_read) == HUBBUB_OK);

        len -= bytes_read;

        assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
    }

    assert(len == 0);

    fclose(fp);

    hubbub_tokeniser_destroy(tok);

    parserutils_inputstream_destroy(stream);

    printf("PASS\n");

    return 0;
}