Пример #1
0
bool handle_line(const char *data, size_t datalen, void *pw)
{
	line_ctx *ctx = (line_ctx *) pw;

	if (data[0] == '#') {
		if (ctx->inexp) {
			/* This marks end of testcase, so run it */

			if (ctx->buf[ctx->bufused - 1] == '\n')
				ctx->bufused -= 1;

			if (ctx->exp[ctx->expused - 1] == '\n')
				ctx->expused -= 1;

			run_test(ctx);

			ctx->buf[0] = '\0';
			ctx->exp[0] = '\0';
			ctx->bufused = 0;
			ctx->expused = 0;
			ctx->exp_ret = PARSERUTILS_OK;
		}

		if (strncasecmp(data+1, "data", 4) == 0) {
			parserutils_charset_codec_optparams params;
			const char *ptr = data + 6;

			ctx->indata = true;
			ctx->inexp = false;

			if (strncasecmp(ptr, "decode", 6) == 0)
				ctx->dir = DECODE;
			else if (strncasecmp(ptr, "encode", 6) == 0)
				ctx->dir = ENCODE;
			else
				ctx->dir = BOTH;

			ptr += 7;

			if (strncasecmp(ptr, "LOOSE", 5) == 0) {
				params.error_mode.mode =
					PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE;
				ptr += 6;
			} else if (strncasecmp(ptr, "STRICT", 6) == 0) {
				params.error_mode.mode =
					PARSERUTILS_CHARSET_CODEC_ERROR_STRICT;
				ptr += 7;
			} else {
				params.error_mode.mode =
					PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT;
				ptr += 9;
			}

			assert(parserutils_charset_codec_setopt(ctx->codec,
				PARSERUTILS_CHARSET_CODEC_ERROR_MODE,
				(parserutils_charset_codec_optparams *) &params)
				== PARSERUTILS_OK);
		} else if (strncasecmp(data+1, "expected", 8) == 0) {
			ctx->indata = false;
			ctx->inexp = true;

			ctx->exp_ret = parserutils_error_from_string(data + 10,
					datalen - 10 - 1 /* \n */);
		} else if (strncasecmp(data+1, "reset", 5) == 0) {
			ctx->indata = false;
			ctx->inexp = false;

			parserutils_charset_codec_reset(ctx->codec);
		} else if (strncasecmp(data+1, "enc", 3) == 0) {
			const char *enc = data + 5;
			const char *end;
			char *enc_name;

			for (end = enc; !isspace(*end); end++)
				;

			enc_name = malloc(end - enc + 1);
			memcpy(enc_name, enc, end - enc);
			enc_name[end - enc] = 0;

			assert(parserutils_charset_codec_create(enc_name,
					myrealloc, NULL, &ctx->codec) ==
					PARSERUTILS_OK);

			ctx->hadenc = true;

			free(enc_name);
		}
	} else {
		if (ctx->indata) {
			memcpy(ctx->buf + ctx->bufused, data, datalen);
			ctx->bufused += datalen;
		}
		if (ctx->inexp) {
			memcpy(ctx->exp + ctx->expused, data, datalen);
			ctx->expused += datalen;
		}
	}

	return true;
}
Пример #2
0
bool handle_line(const char *data, size_t datalen, void *pw)
{
	line_ctx *ctx = (line_ctx *) pw;

	if (data[0] == '#') {
		if (ctx->inexp) {
			/* This marks end of testcase, so run it */

			if (ctx->buf[ctx->bufused - 1] == '\n')
				ctx->bufused -= 1;

			if (ctx->exp[ctx->expused - 1] == '\n')
				ctx->expused -= 1;

			run_test(ctx);

			ctx->buf[0] = '\0';
			ctx->exp[0] = '\0';
			ctx->bufused = 0;
			ctx->expused = 0;
			ctx->exp_ret = PARSERUTILS_OK;
		}

		if (strncasecmp(data+1, "data", 4) == 0) {
			parserutils_charset_codec_optparams params;
			const char *ptr = data + 6;

			ctx->indata = true;
			ctx->inexp = false;

			if (strncasecmp(ptr, "decode", 6) == 0)
				ctx->dir = DECODE;
			else if (strncasecmp(ptr, "encode", 6) == 0)
				ctx->dir = ENCODE;
			else
				ctx->dir = BOTH;

			ptr += 7;

			if (strncasecmp(ptr, "LOOSE", 5) == 0) {
				params.error_mode.mode =
					PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE;
				ptr += 6;
			} else if (strncasecmp(ptr, "STRICT", 6) == 0) {
				params.error_mode.mode =
					PARSERUTILS_CHARSET_CODEC_ERROR_STRICT;
				ptr += 7;
			} else {
				params.error_mode.mode =
					PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT;
				ptr += 9;
			}

			assert(parserutils_charset_codec_setopt(ctx->codec,
				PARSERUTILS_CHARSET_CODEC_ERROR_MODE,
				(parserutils_charset_codec_optparams *) &params)
				== PARSERUTILS_OK);
		} else if (strncasecmp(data+1, "expected", 8) == 0) {
			ctx->indata = false;
			ctx->inexp = true;

			ctx->exp_ret = parserutils_error_from_string(data + 10,
					datalen - 10 - 1 /* \n */);
		} else if (strncasecmp(data+1, "reset", 5) == 0) {
			ctx->indata = false;
			ctx->inexp = false;

			parserutils_charset_codec_reset(ctx->codec);
		}
	} else {
		if (ctx->indata) {
			/* Process "&#xNNNN" as 16-bit code units.  */
			while (datalen) {
				uint16_t nCodePoint;

				if (data[0] == '\n') {
					ctx->buf[ctx->bufused++] = *data++;
					--datalen;
					continue;
				}
				assert(datalen >= sizeof ("&#xNNNN")-1 \
					&& data[0] == '&' && data[1] == '#' \
					&& data[2] == 'x' && isxdigit(data[3]) \
					&& isxdigit(data[4]) && isxdigit(data[5]) \
					&& isxdigit(data[6]));
				/* UTF-16 code is always host endian (different
				   than UCS-32 !).  */
				nCodePoint = (hex2digit(data[3]) << 12) | 
						(hex2digit(data[4]) <<  8) | 
						(hex2digit(data[5]) <<  4) | 
						hex2digit(data[6]);
				*((uint16_t *) (void *) (ctx->buf + ctx->bufused)) = 
						nCodePoint;
				ctx->bufused += 2;
				data += sizeof ("&#xNNNN")-1;
				datalen -= sizeof ("&#xNNNN")-1;
			}
		}
		if (ctx->inexp) {
			/* Process "&#xXXXXYYYY as 32-bit code units.  */
			while (datalen) {
				uint32_t nCodePoint;

				if (data[0] == '\n') {
					ctx->exp[ctx->expused++] = *data++;
					--datalen;
					continue;
				}
				assert(datalen >= sizeof ("&#xXXXXYYYY")-1 \
					&& data[0] == '&' && data[1] == '#' \
					&& data[2] == 'x' && isxdigit(data[3]) \
					&& isxdigit(data[4]) && isxdigit(data[5]) \
					&& isxdigit(data[6]) && isxdigit(data[7]) \
					&& isxdigit(data[8]) && isxdigit(data[9]) \
					&& isxdigit(data[10]));
				/* UCS-4 code is always big endian, so convert
				   host endian to big endian.  */
				nCodePoint =
					htonl((hex2digit(data[3]) << 28)
					| (hex2digit(data[4]) << 24)
					| (hex2digit(data[5]) << 20)
					| (hex2digit(data[6]) << 16)
					| (hex2digit(data[7]) << 12)
					| (hex2digit(data[8]) << 8)
					| (hex2digit(data[9]) << 4)
					| hex2digit(data[10]));
				*((uint32_t *) (void *) (ctx->exp + ctx->expused)) = 
						nCodePoint;
				ctx->expused += 4;
				data += sizeof ("&#xXXXXYYYY")-1;
				datalen -= sizeof ("&#xXXXXYYYY")-1;
			}
		}
	}

	return true;
}