bool handle_line(const char *data, size_t datalen, void *pw) { line_ctx *ctx = (line_ctx *) pw; if (data[0] == '#') { if (ctx->inexp) { /* This marks end of testcase, so run it */ if (ctx->buf[ctx->bufused - 1] == '\n') ctx->bufused -= 1; if (ctx->exp[ctx->expused - 1] == '\n') ctx->expused -= 1; run_test(ctx); ctx->buf[0] = '\0'; ctx->exp[0] = '\0'; ctx->bufused = 0; ctx->expused = 0; ctx->exp_ret = PARSERUTILS_OK; } if (strncasecmp(data+1, "data", 4) == 0) { parserutils_charset_codec_optparams params; const char *ptr = data + 6; ctx->indata = true; ctx->inexp = false; if (strncasecmp(ptr, "decode", 6) == 0) ctx->dir = DECODE; else if (strncasecmp(ptr, "encode", 6) == 0) ctx->dir = ENCODE; else ctx->dir = BOTH; ptr += 7; if (strncasecmp(ptr, "LOOSE", 5) == 0) { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE; ptr += 6; } else if (strncasecmp(ptr, "STRICT", 6) == 0) { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_STRICT; ptr += 7; } else { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT; ptr += 9; } assert(parserutils_charset_codec_setopt(ctx->codec, PARSERUTILS_CHARSET_CODEC_ERROR_MODE, (parserutils_charset_codec_optparams *) ¶ms) == PARSERUTILS_OK); } else if (strncasecmp(data+1, "expected", 8) == 0) { ctx->indata = false; ctx->inexp = true; ctx->exp_ret = parserutils_error_from_string(data + 10, datalen - 10 - 1 /* \n */); } else if (strncasecmp(data+1, "reset", 5) == 0) { ctx->indata = false; ctx->inexp = false; parserutils_charset_codec_reset(ctx->codec); } else if (strncasecmp(data+1, "enc", 3) == 0) { const char *enc = data + 5; const char *end; char *enc_name; for (end = enc; !isspace(*end); end++) ; enc_name = malloc(end - enc + 1); memcpy(enc_name, enc, end - enc); enc_name[end - enc] = 0; assert(parserutils_charset_codec_create(enc_name, myrealloc, NULL, &ctx->codec) == PARSERUTILS_OK); ctx->hadenc = true; free(enc_name); } } else { if (ctx->indata) { memcpy(ctx->buf + ctx->bufused, data, datalen); ctx->bufused += datalen; } if (ctx->inexp) { memcpy(ctx->exp + ctx->expused, data, datalen); ctx->expused += datalen; } } return true; }
bool handle_line(const char *data, size_t datalen, void *pw) { line_ctx *ctx = (line_ctx *) pw; if (data[0] == '#') { if (ctx->inexp) { /* This marks end of testcase, so run it */ if (ctx->buf[ctx->bufused - 1] == '\n') ctx->bufused -= 1; if (ctx->exp[ctx->expused - 1] == '\n') ctx->expused -= 1; run_test(ctx); ctx->buf[0] = '\0'; ctx->exp[0] = '\0'; ctx->bufused = 0; ctx->expused = 0; ctx->exp_ret = PARSERUTILS_OK; } if (strncasecmp(data+1, "data", 4) == 0) { parserutils_charset_codec_optparams params; const char *ptr = data + 6; ctx->indata = true; ctx->inexp = false; if (strncasecmp(ptr, "decode", 6) == 0) ctx->dir = DECODE; else if (strncasecmp(ptr, "encode", 6) == 0) ctx->dir = ENCODE; else ctx->dir = BOTH; ptr += 7; if (strncasecmp(ptr, "LOOSE", 5) == 0) { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_LOOSE; ptr += 6; } else if (strncasecmp(ptr, "STRICT", 6) == 0) { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_STRICT; ptr += 7; } else { params.error_mode.mode = PARSERUTILS_CHARSET_CODEC_ERROR_TRANSLIT; ptr += 9; } assert(parserutils_charset_codec_setopt(ctx->codec, PARSERUTILS_CHARSET_CODEC_ERROR_MODE, (parserutils_charset_codec_optparams *) ¶ms) == PARSERUTILS_OK); } else if (strncasecmp(data+1, "expected", 8) == 0) { ctx->indata = false; ctx->inexp = true; ctx->exp_ret = parserutils_error_from_string(data + 10, datalen - 10 - 1 /* \n */); } else if (strncasecmp(data+1, "reset", 5) == 0) { ctx->indata = false; ctx->inexp = false; parserutils_charset_codec_reset(ctx->codec); } } else { if (ctx->indata) { /* Process "&#xNNNN" as 16-bit code units. */ while (datalen) { uint16_t nCodePoint; if (data[0] == '\n') { ctx->buf[ctx->bufused++] = *data++; --datalen; continue; } assert(datalen >= sizeof ("&#xNNNN")-1 \ && data[0] == '&' && data[1] == '#' \ && data[2] == 'x' && isxdigit(data[3]) \ && isxdigit(data[4]) && isxdigit(data[5]) \ && isxdigit(data[6])); /* UTF-16 code is always host endian (different than UCS-32 !). */ nCodePoint = (hex2digit(data[3]) << 12) | (hex2digit(data[4]) << 8) | (hex2digit(data[5]) << 4) | hex2digit(data[6]); *((uint16_t *) (void *) (ctx->buf + ctx->bufused)) = nCodePoint; ctx->bufused += 2; data += sizeof ("&#xNNNN")-1; datalen -= sizeof ("&#xNNNN")-1; } } if (ctx->inexp) { /* Process "&#xXXXXYYYY as 32-bit code units. */ while (datalen) { uint32_t nCodePoint; if (data[0] == '\n') { ctx->exp[ctx->expused++] = *data++; --datalen; continue; } assert(datalen >= sizeof ("&#xXXXXYYYY")-1 \ && data[0] == '&' && data[1] == '#' \ && data[2] == 'x' && isxdigit(data[3]) \ && isxdigit(data[4]) && isxdigit(data[5]) \ && isxdigit(data[6]) && isxdigit(data[7]) \ && isxdigit(data[8]) && isxdigit(data[9]) \ && isxdigit(data[10])); /* UCS-4 code is always big endian, so convert host endian to big endian. */ nCodePoint = htonl((hex2digit(data[3]) << 28) | (hex2digit(data[4]) << 24) | (hex2digit(data[5]) << 20) | (hex2digit(data[6]) << 16) | (hex2digit(data[7]) << 12) | (hex2digit(data[8]) << 8) | (hex2digit(data[9]) << 4) | hex2digit(data[10])); *((uint32_t *) (void *) (ctx->exp + ctx->expused)) = nCodePoint; ctx->expused += 4; data += sizeof ("&#xXXXXYYYY")-1; datalen -= sizeof ("&#xXXXXYYYY")-1; } } } return true; }