Пример #1
0
int icu_chain_assign_cstr(struct icu_chain *chain, const char *src8cstr,
                          UErrorCode *status)
{
    if (chain->iter)
        icu_iter_destroy(chain->iter);
    chain->iter = icu_iter_create(chain);
    icu_iter_first(chain->iter, src8cstr);
    return 1;
}
Пример #2
0
pp2_charset_token_t pp2_charset_tokenize(pp2_charset_t pct)
{
    pp2_charset_token_t prt = xmalloc(sizeof(*prt));

    assert(pct);

    prt->norm_str = wrbuf_alloc();
    prt->sort_str = wrbuf_alloc();
    prt->cp = 0;
    prt->last_cp = 0;
    prt->pct = pct;

#if YAZ_HAVE_ICU
    prt->iter = 0;
    if (pct->icu_chn)
        prt->iter = icu_iter_create(pct->icu_chn);
#endif
    return prt;
}
Пример #3
0
static void check_icu_iter1(void)
{
    UErrorCode status = U_ZERO_ERROR;
    struct icu_chain *chain = 0;
    xmlNode *xml_node;
    yaz_icu_iter_t iter;

    const char *xml_str = "<icu locale=\"en\">"
        "<tokenize rule=\"w\"/>"
        "<transform rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
        "</icu>";

    xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
    YAZ_CHECK(doc);
    if (!doc)
        return;
    xml_node = xmlDocGetRootElement(doc);
    YAZ_CHECK(xml_node);
    if (!xml_node)
        return ;

    chain = icu_chain_xml_config(xml_node, 1, &status);

    xmlFreeDoc(doc);
    YAZ_CHECK(chain);

    iter = icu_iter_create(chain);
    icu_iter_first(iter, "a string with 15 tokens and 8 displays");
    YAZ_CHECK(iter);
    if (!iter)
        return;
    while (icu_iter_next(iter))
    {
        yaz_log(YLOG_LOG, "[%s]", icu_iter_get_norm(iter));
    }
    icu_iter_destroy(iter);
    icu_chain_destroy(chain);
}
Пример #4
0
static void check_norm(void)
{
    UErrorCode status = U_ZERO_ERROR;
    struct icu_chain *chain = 0;
    xmlNode *xml_node;
    yaz_icu_iter_t it;

    const char *xml_str =
        "  <icu_chain id=\"relevance\" locale=\"en\">"
        "    <transform rule=\"[:Control:] Any-Remove\"/>"
        "    <tokenize rule=\"l\"/>"
        "    <transform rule=\"[[:WhiteSpace:][:Punctuation:]`] Remove\"/>"
        "    <casemap rule=\"l\"/>"
        "  </icu_chain>";

    xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
    YAZ_CHECK(doc);
    if (!doc)
        return;
    xml_node = xmlDocGetRootElement(doc);
    YAZ_CHECK(xml_node);
    if (!xml_node)
        return ;
    chain = icu_chain_xml_config(xml_node, 1, &status);

    it = icu_iter_create(chain);
    if (it)
    {
        icu_iter_first(it, " y😄");
        while (icu_iter_next(it))
        {
            const char *norm_str = icu_iter_get_norm(it);
            size_t start, len;

            YAZ_CHECK(norm_str);
            if (norm_str)
                yaz_log(YLOG_LOG, "norm_str len=%ld=%s",
                        (long) strlen(norm_str), norm_str);
            icu_iter_get_org_info(it, &start, &len);
            YAZ_CHECK(start <= 1000);
            YAZ_CHECK(len <= 1000);
        }

        icu_iter_first(it, "\n y😄");
        while (icu_iter_next(it))
        {
            const char *norm_str = icu_iter_get_norm(it);
            size_t start, len;

            YAZ_CHECK(norm_str);
            if (norm_str)
                yaz_log(YLOG_LOG, "norm_str len=%ld=%s",
                        (long) strlen(norm_str), norm_str);
            icu_iter_get_org_info(it, &start, &len);
            YAZ_CHECK(start <= 1000);
            YAZ_CHECK(len <= 1000);
        }
    }
    icu_iter_destroy(it);
    icu_chain_destroy(chain);
    xmlFreeDoc(doc);
}
Пример #5
0
static int test_iter(struct icu_chain *chain, const char *input,
                     const char *expected)
{
    yaz_icu_iter_t iter = icu_iter_create(chain);
    WRBUF result, second, sort_result;
    int success = 1;

    if (!iter)
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s !iter", input);
        return 0;
    }

    if (icu_iter_next(iter))
    {
        yaz_log(YLOG_WARN, "test_iter: expecting 0 before icu_iter_first");
        return 0;
    }

    sort_result = wrbuf_alloc();
    result = wrbuf_alloc();
    icu_iter_first(iter, input);
    while (icu_iter_next(iter))
    {
        const char *sort_str = icu_iter_get_sortkey(iter);
        if (sort_str)
        {
            wrbuf_puts(sort_result, "[");
            wrbuf_puts_escaped(sort_result, sort_str);
            wrbuf_puts(sort_result, "]");
        }
        else
        {
            wrbuf_puts(sort_result, "[NULL]");
        }
        wrbuf_puts(result, "[");
        wrbuf_puts(result, icu_iter_get_norm(iter));
        wrbuf_puts(result, "]");
    }
    yaz_log(YLOG_LOG, "sortkey=%s", wrbuf_cstr(sort_result));
    second = wrbuf_alloc();
    icu_iter_first(iter, input);
    while (icu_iter_next(iter))
    {
        wrbuf_puts(second, "[");
        wrbuf_puts(second, icu_iter_get_norm(iter));
        wrbuf_puts(second, "]");
    }

    icu_iter_destroy(iter);

    if (strcmp(expected, wrbuf_cstr(result)))
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s",
                input, expected, wrbuf_cstr(result));
        success = 0;
    }

    if (strcmp(expected, wrbuf_cstr(second)))
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s (2nd)",
                input, expected, wrbuf_cstr(second));
        success = 0;
    }

    wrbuf_destroy(result);
    wrbuf_destroy(second);
    wrbuf_destroy(sort_result);
    return success;
}