static int tst_convert_x(yaz_iconv_t cd, const char *buf, const char *cmpbuf, int expect_error) { int ret = 1; WRBUF b = wrbuf_alloc(); char outbuf[16]; size_t inbytesleft = strlen(buf); const char *inp = buf; int rounds = 0; for (rounds = 0; inbytesleft && rounds < (int) sizeof(outbuf); rounds++) { size_t outbytesleft = sizeof(outbuf); char *outp = outbuf; size_t r = yaz_iconv(cd, (char**) &inp, &inbytesleft, &outp, &outbytesleft); wrbuf_write(b, outbuf, outp - outbuf); if (r == (size_t) (-1)) { int e = yaz_iconv_error(cd); if (e != YAZ_ICONV_E2BIG) { if (expect_error != -1) if (e != expect_error) ret = 0; break; } } else { size_t outbytesleft = sizeof(outbuf); char *outp = outbuf; r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft); wrbuf_write(b, outbuf, outp - outbuf); if (expect_error != -1) if (expect_error) ret = 0; break; } } if (wrbuf_len(b) == strlen(cmpbuf) && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b))) ; else { WRBUF w = wrbuf_alloc(); ret = 0; wrbuf_rewind(w); wrbuf_puts_escaped(w, buf); yaz_log(YLOG_LOG, "input %s", wrbuf_cstr(w)); wrbuf_rewind(w); wrbuf_write_escaped(w, wrbuf_buf(b), wrbuf_len(b)); yaz_log(YLOG_LOG, "got %s", wrbuf_cstr(w)); wrbuf_rewind(w); wrbuf_puts_escaped(w, cmpbuf); yaz_log(YLOG_LOG, "exp %s", wrbuf_cstr(w)); wrbuf_destroy(w); } wrbuf_destroy(b); return ret; }
static void process_text_file(struct config_t *p_config) { char *line = 0; char linebuf[1024]; xmlDoc *doc = xmlParseFile(p_config->conffile); xmlNode *xml_node = xmlDocGetRootElement(doc); long unsigned int token_count = 0; long unsigned int line_count = 0; UErrorCode status = U_ZERO_ERROR; if (!xml_node) { printf("Could not parse XML config file '%s' \n", p_config->conffile); exit(1); } p_config->chain = icu_chain_xml_config(xml_node, 1, &status); if (!p_config->chain || !U_SUCCESS(status)) { printf("Could not set up ICU chain from config file '%s' \n", p_config->conffile); exit(1); } if (p_config->xmloutput) fprintf(p_config->outfile, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<icu>\n" "<tokens>\n"); /* read input lines for processing */ while ((line=fgets(linebuf, sizeof(linebuf)-1, p_config->infile))) { WRBUF sw = wrbuf_alloc(); WRBUF cdata = wrbuf_alloc(); int success = icu_chain_assign_cstr(p_config->chain, line, &status); line_count++; while (success && icu_chain_next_token(p_config->chain, &status)) { if (U_FAILURE(status)) success = 0; else { size_t start, len; const char *org_string = 0; const char *sortkey = icu_chain_token_sortkey(p_config->chain); icu_chain_get_org_info2(p_config->chain, &start, &len, &org_string); wrbuf_rewind(sw); wrbuf_puts_escaped(sw, sortkey); token_count++; if (p_config->xmloutput) { fprintf(p_config->outfile, "<token id=\"%lu\" line=\"%lu\"", token_count, line_count); wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, icu_chain_token_norm(p_config->chain)); fprintf(p_config->outfile, " norm=\"%s\"", wrbuf_cstr(cdata)); wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, icu_chain_token_display(p_config->chain)); fprintf(p_config->outfile, " display=\"%s\"", wrbuf_cstr(cdata)); if (p_config->sortoutput) { wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, wrbuf_cstr(sw)); fprintf(p_config->outfile, " sortkey=\"%s\"", wrbuf_cstr(cdata)); } fprintf(p_config->outfile, "/>\n"); } else { fprintf(p_config->outfile, "%lu %lu '%s' '%s'", token_count, line_count, icu_chain_token_norm(p_config->chain), icu_chain_token_display(p_config->chain)); if (p_config->sortoutput) { fprintf(p_config->outfile, " '%s'", wrbuf_cstr(sw)); } if (p_config->org_output) { fprintf(p_config->outfile, " %ld+%ld", (long) start, (long) len); fputc(' ', p_config->outfile); fwrite(org_string, 1, start, p_config->outfile); fputc('*', p_config->outfile); fwrite(org_string + start, 1, len, p_config->outfile); fputc('*', p_config->outfile); fputs(org_string + start + len, p_config->outfile); } fprintf(p_config->outfile, "\n"); } } } wrbuf_destroy(sw); wrbuf_destroy(cdata); } if (p_config->xmloutput) fprintf(p_config->outfile, "</tokens>\n" "</icu>\n"); icu_chain_destroy(p_config->chain); xmlFreeDoc(doc); if (line) free(line); }
static int test_iter(struct icu_chain *chain, const char *input, const char *expected) { yaz_icu_iter_t iter = icu_iter_create(chain); WRBUF result, second, sort_result; int success = 1; if (!iter) { yaz_log(YLOG_WARN, "test_iter: input=%s !iter", input); return 0; } if (icu_iter_next(iter)) { yaz_log(YLOG_WARN, "test_iter: expecting 0 before icu_iter_first"); return 0; } sort_result = wrbuf_alloc(); result = wrbuf_alloc(); icu_iter_first(iter, input); while (icu_iter_next(iter)) { const char *sort_str = icu_iter_get_sortkey(iter); if (sort_str) { wrbuf_puts(sort_result, "["); wrbuf_puts_escaped(sort_result, sort_str); wrbuf_puts(sort_result, "]"); } else { wrbuf_puts(sort_result, "[NULL]"); } wrbuf_puts(result, "["); wrbuf_puts(result, icu_iter_get_norm(iter)); wrbuf_puts(result, "]"); } yaz_log(YLOG_LOG, "sortkey=%s", wrbuf_cstr(sort_result)); second = wrbuf_alloc(); icu_iter_first(iter, input); while (icu_iter_next(iter)) { wrbuf_puts(second, "["); wrbuf_puts(second, icu_iter_get_norm(iter)); wrbuf_puts(second, "]"); } icu_iter_destroy(iter); if (strcmp(expected, wrbuf_cstr(result))) { yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s", input, expected, wrbuf_cstr(result)); success = 0; } if (strcmp(expected, wrbuf_cstr(second))) { yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s (2nd)", input, expected, wrbuf_cstr(second)); success = 0; } wrbuf_destroy(result); wrbuf_destroy(second); wrbuf_destroy(sort_result); return success; }