Exemple #1
0
static const char *pp2_charset_token_icu(pp2_charset_token_t prt)
{
    if (icu_iter_next(prt->iter))
    {
        return icu_iter_get_norm(prt->iter);
    }
    return 0;
}
Exemple #2
0
static void check_icu_iter1(void)
{
    UErrorCode status = U_ZERO_ERROR;
    struct icu_chain *chain = 0;
    xmlNode *xml_node;
    yaz_icu_iter_t iter;

    const char *xml_str = "<icu locale=\"en\">"
        "<tokenize rule=\"w\"/>"
        "<transform rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
        "</icu>";

    xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
    YAZ_CHECK(doc);
    if (!doc)
        return;
    xml_node = xmlDocGetRootElement(doc);
    YAZ_CHECK(xml_node);
    if (!xml_node)
        return ;

    chain = icu_chain_xml_config(xml_node, 1, &status);

    xmlFreeDoc(doc);
    YAZ_CHECK(chain);

    iter = icu_iter_create(chain);
    icu_iter_first(iter, "a string with 15 tokens and 8 displays");
    YAZ_CHECK(iter);
    if (!iter)
        return;
    while (icu_iter_next(iter))
    {
        yaz_log(YLOG_LOG, "[%s]", icu_iter_get_norm(iter));
    }
    icu_iter_destroy(iter);
    icu_chain_destroy(chain);
}
Exemple #3
0
const char *icu_chain_token_norm(struct icu_chain *chain)
{
    if (chain->iter)
        return icu_iter_get_norm(chain->iter);
    return 0;
}
Exemple #4
0
static void check_norm(void)
{
    UErrorCode status = U_ZERO_ERROR;
    struct icu_chain *chain = 0;
    xmlNode *xml_node;
    yaz_icu_iter_t it;

    const char *xml_str =
        "  <icu_chain id=\"relevance\" locale=\"en\">"
        "    <transform rule=\"[:Control:] Any-Remove\"/>"
        "    <tokenize rule=\"l\"/>"
        "    <transform rule=\"[[:WhiteSpace:][:Punctuation:]`] Remove\"/>"
        "    <casemap rule=\"l\"/>"
        "  </icu_chain>";

    xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
    YAZ_CHECK(doc);
    if (!doc)
        return;
    xml_node = xmlDocGetRootElement(doc);
    YAZ_CHECK(xml_node);
    if (!xml_node)
        return ;
    chain = icu_chain_xml_config(xml_node, 1, &status);

    it = icu_iter_create(chain);
    if (it)
    {
        icu_iter_first(it, " y😄");
        while (icu_iter_next(it))
        {
            const char *norm_str = icu_iter_get_norm(it);
            size_t start, len;

            YAZ_CHECK(norm_str);
            if (norm_str)
                yaz_log(YLOG_LOG, "norm_str len=%ld=%s",
                        (long) strlen(norm_str), norm_str);
            icu_iter_get_org_info(it, &start, &len);
            YAZ_CHECK(start <= 1000);
            YAZ_CHECK(len <= 1000);
        }

        icu_iter_first(it, "\n y😄");
        while (icu_iter_next(it))
        {
            const char *norm_str = icu_iter_get_norm(it);
            size_t start, len;

            YAZ_CHECK(norm_str);
            if (norm_str)
                yaz_log(YLOG_LOG, "norm_str len=%ld=%s",
                        (long) strlen(norm_str), norm_str);
            icu_iter_get_org_info(it, &start, &len);
            YAZ_CHECK(start <= 1000);
            YAZ_CHECK(len <= 1000);
        }
    }
    icu_iter_destroy(it);
    icu_chain_destroy(chain);
    xmlFreeDoc(doc);
}
Exemple #5
0
static int test_iter(struct icu_chain *chain, const char *input,
                     const char *expected)
{
    yaz_icu_iter_t iter = icu_iter_create(chain);
    WRBUF result, second, sort_result;
    int success = 1;

    if (!iter)
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s !iter", input);
        return 0;
    }

    if (icu_iter_next(iter))
    {
        yaz_log(YLOG_WARN, "test_iter: expecting 0 before icu_iter_first");
        return 0;
    }

    sort_result = wrbuf_alloc();
    result = wrbuf_alloc();
    icu_iter_first(iter, input);
    while (icu_iter_next(iter))
    {
        const char *sort_str = icu_iter_get_sortkey(iter);
        if (sort_str)
        {
            wrbuf_puts(sort_result, "[");
            wrbuf_puts_escaped(sort_result, sort_str);
            wrbuf_puts(sort_result, "]");
        }
        else
        {
            wrbuf_puts(sort_result, "[NULL]");
        }
        wrbuf_puts(result, "[");
        wrbuf_puts(result, icu_iter_get_norm(iter));
        wrbuf_puts(result, "]");
    }
    yaz_log(YLOG_LOG, "sortkey=%s", wrbuf_cstr(sort_result));
    second = wrbuf_alloc();
    icu_iter_first(iter, input);
    while (icu_iter_next(iter))
    {
        wrbuf_puts(second, "[");
        wrbuf_puts(second, icu_iter_get_norm(iter));
        wrbuf_puts(second, "]");
    }

    icu_iter_destroy(iter);

    if (strcmp(expected, wrbuf_cstr(result)))
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s",
                input, expected, wrbuf_cstr(result));
        success = 0;
    }

    if (strcmp(expected, wrbuf_cstr(second)))
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s (2nd)",
                input, expected, wrbuf_cstr(second));
        success = 0;
    }

    wrbuf_destroy(result);
    wrbuf_destroy(second);
    wrbuf_destroy(sort_result);
    return success;
}