static const char *pp2_charset_token_icu(pp2_charset_token_t prt) { if (icu_iter_next(prt->iter)) { return icu_iter_get_norm(prt->iter); } return 0; }
static void check_icu_iter1(void) { UErrorCode status = U_ZERO_ERROR; struct icu_chain *chain = 0; xmlNode *xml_node; yaz_icu_iter_t iter; const char *xml_str = "<icu locale=\"en\">" "<tokenize rule=\"w\"/>" "<transform rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>" "</icu>"; xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); YAZ_CHECK(doc); if (!doc) return; xml_node = xmlDocGetRootElement(doc); YAZ_CHECK(xml_node); if (!xml_node) return ; chain = icu_chain_xml_config(xml_node, 1, &status); xmlFreeDoc(doc); YAZ_CHECK(chain); iter = icu_iter_create(chain); icu_iter_first(iter, "a string with 15 tokens and 8 displays"); YAZ_CHECK(iter); if (!iter) return; while (icu_iter_next(iter)) { yaz_log(YLOG_LOG, "[%s]", icu_iter_get_norm(iter)); } icu_iter_destroy(iter); icu_chain_destroy(chain); }
const char *icu_chain_token_norm(struct icu_chain *chain) { if (chain->iter) return icu_iter_get_norm(chain->iter); return 0; }
static void check_norm(void) { UErrorCode status = U_ZERO_ERROR; struct icu_chain *chain = 0; xmlNode *xml_node; yaz_icu_iter_t it; const char *xml_str = " <icu_chain id=\"relevance\" locale=\"en\">" " <transform rule=\"[:Control:] Any-Remove\"/>" " <tokenize rule=\"l\"/>" " <transform rule=\"[[:WhiteSpace:][:Punctuation:]`] Remove\"/>" " <casemap rule=\"l\"/>" " </icu_chain>"; xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str)); YAZ_CHECK(doc); if (!doc) return; xml_node = xmlDocGetRootElement(doc); YAZ_CHECK(xml_node); if (!xml_node) return ; chain = icu_chain_xml_config(xml_node, 1, &status); it = icu_iter_create(chain); if (it) { icu_iter_first(it, " y😄"); while (icu_iter_next(it)) { const char *norm_str = icu_iter_get_norm(it); size_t start, len; YAZ_CHECK(norm_str); if (norm_str) yaz_log(YLOG_LOG, "norm_str len=%ld=%s", (long) strlen(norm_str), norm_str); icu_iter_get_org_info(it, &start, &len); YAZ_CHECK(start <= 1000); YAZ_CHECK(len <= 1000); } icu_iter_first(it, "\n y😄"); while (icu_iter_next(it)) { const char *norm_str = icu_iter_get_norm(it); size_t start, len; YAZ_CHECK(norm_str); if (norm_str) yaz_log(YLOG_LOG, "norm_str len=%ld=%s", (long) strlen(norm_str), norm_str); icu_iter_get_org_info(it, &start, &len); YAZ_CHECK(start <= 1000); YAZ_CHECK(len <= 1000); } } icu_iter_destroy(it); icu_chain_destroy(chain); xmlFreeDoc(doc); }
static int test_iter(struct icu_chain *chain, const char *input, const char *expected) { yaz_icu_iter_t iter = icu_iter_create(chain); WRBUF result, second, sort_result; int success = 1; if (!iter) { yaz_log(YLOG_WARN, "test_iter: input=%s !iter", input); return 0; } if (icu_iter_next(iter)) { yaz_log(YLOG_WARN, "test_iter: expecting 0 before icu_iter_first"); return 0; } sort_result = wrbuf_alloc(); result = wrbuf_alloc(); icu_iter_first(iter, input); while (icu_iter_next(iter)) { const char *sort_str = icu_iter_get_sortkey(iter); if (sort_str) { wrbuf_puts(sort_result, "["); wrbuf_puts_escaped(sort_result, sort_str); wrbuf_puts(sort_result, "]"); } else { wrbuf_puts(sort_result, "[NULL]"); } wrbuf_puts(result, "["); wrbuf_puts(result, icu_iter_get_norm(iter)); wrbuf_puts(result, "]"); } yaz_log(YLOG_LOG, "sortkey=%s", wrbuf_cstr(sort_result)); second = wrbuf_alloc(); icu_iter_first(iter, input); while (icu_iter_next(iter)) { wrbuf_puts(second, "["); wrbuf_puts(second, icu_iter_get_norm(iter)); wrbuf_puts(second, "]"); } icu_iter_destroy(iter); if (strcmp(expected, wrbuf_cstr(result))) { yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s", input, expected, wrbuf_cstr(result)); success = 0; } if (strcmp(expected, wrbuf_cstr(second))) { yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s (2nd)", input, expected, wrbuf_cstr(second)); success = 0; } wrbuf_destroy(result); wrbuf_destroy(second); wrbuf_destroy(sort_result); return success; }