Exemple #1
0
static int tst_convert_x(yaz_iconv_t cd, const char *buf, const char *cmpbuf,
                         int expect_error)
{
    int ret = 1;
    WRBUF b = wrbuf_alloc();
    char outbuf[16];
    size_t inbytesleft = strlen(buf);
    const char *inp = buf;
    int rounds = 0;
    for (rounds = 0; inbytesleft && rounds < (int) sizeof(outbuf); rounds++)
    {
        size_t outbytesleft = sizeof(outbuf);
        char *outp = outbuf;
        size_t r = yaz_iconv(cd, (char**) &inp,  &inbytesleft,
                             &outp, &outbytesleft);
        wrbuf_write(b, outbuf, outp - outbuf);
        if (r == (size_t) (-1))
        {
            int e = yaz_iconv_error(cd);
            if (e != YAZ_ICONV_E2BIG)
            {
                if (expect_error != -1)
                    if (e != expect_error)
                        ret = 0;
                break;
            }
        }
        else
        {
            size_t outbytesleft = sizeof(outbuf);
            char *outp = outbuf;
            r = yaz_iconv(cd, 0, 0, &outp, &outbytesleft);
            wrbuf_write(b, outbuf, outp - outbuf);
            if (expect_error != -1)
                if (expect_error)
                    ret = 0;
            break;
        }
    }
    if (wrbuf_len(b) == strlen(cmpbuf)
        && !memcmp(cmpbuf, wrbuf_buf(b), wrbuf_len(b)))
        ;
    else
    {
        WRBUF w = wrbuf_alloc();

        ret = 0;
        wrbuf_rewind(w);
        wrbuf_puts_escaped(w, buf);
        yaz_log(YLOG_LOG, "input %s", wrbuf_cstr(w));

        wrbuf_rewind(w);
        wrbuf_write_escaped(w, wrbuf_buf(b), wrbuf_len(b));
        yaz_log(YLOG_LOG, "got %s", wrbuf_cstr(w));

        wrbuf_rewind(w);
        wrbuf_puts_escaped(w, cmpbuf);
        yaz_log(YLOG_LOG, "exp %s", wrbuf_cstr(w));

        wrbuf_destroy(w);
    }

    wrbuf_destroy(b);
    return ret;
}
Exemple #2
0
static void process_text_file(struct config_t *p_config)
{
    char *line = 0;
    char linebuf[1024];

    xmlDoc *doc = xmlParseFile(p_config->conffile);
    xmlNode *xml_node = xmlDocGetRootElement(doc);

    long unsigned int token_count = 0;
    long unsigned int line_count = 0;

    UErrorCode status = U_ZERO_ERROR;

    if (!xml_node)
    {
        printf("Could not parse XML config file '%s' \n",
                p_config->conffile);
        exit(1);
    }

    p_config->chain = icu_chain_xml_config(xml_node, 1, &status);

    if (!p_config->chain || !U_SUCCESS(status))
    {
        printf("Could not set up ICU chain from config file '%s' \n",
                p_config->conffile);
        exit(1);
    }

    if (p_config->xmloutput)
        fprintf(p_config->outfile,
                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
                "<icu>\n"
                "<tokens>\n");

    /* read input lines for processing */
    while ((line=fgets(linebuf, sizeof(linebuf)-1, p_config->infile)))
    {
        WRBUF sw = wrbuf_alloc();
        WRBUF cdata = wrbuf_alloc();
        int success = icu_chain_assign_cstr(p_config->chain, line, &status);
        line_count++;

        while (success && icu_chain_next_token(p_config->chain, &status))
        {
            if (U_FAILURE(status))
                success = 0;
            else
            {
                size_t start, len;
                const char *org_string = 0;
                const char *sortkey = icu_chain_token_sortkey(p_config->chain);

                icu_chain_get_org_info2(p_config->chain, &start, &len,
                                        &org_string);
                wrbuf_rewind(sw);
                wrbuf_puts_escaped(sw, sortkey);
                token_count++;
                if (p_config->xmloutput)
                {
                    fprintf(p_config->outfile,
                            "<token id=\"%lu\" line=\"%lu\"",
                            token_count, line_count);

                    wrbuf_rewind(cdata);
                    wrbuf_xmlputs(cdata, icu_chain_token_norm(p_config->chain));
                    fprintf(p_config->outfile, " norm=\"%s\"",
                            wrbuf_cstr(cdata));

                    wrbuf_rewind(cdata);
                    wrbuf_xmlputs(cdata, icu_chain_token_display(p_config->chain));
                    fprintf(p_config->outfile, " display=\"%s\"",
                            wrbuf_cstr(cdata));

                    if (p_config->sortoutput)
                    {
                        wrbuf_rewind(cdata);
                        wrbuf_xmlputs(cdata, wrbuf_cstr(sw));
                        fprintf(p_config->outfile, " sortkey=\"%s\"",
                                wrbuf_cstr(cdata));
                    }
                    fprintf(p_config->outfile, "/>\n");
                }
                else
                {
                    fprintf(p_config->outfile, "%lu %lu '%s' '%s'",
                            token_count,
                            line_count,
                            icu_chain_token_norm(p_config->chain),
                            icu_chain_token_display(p_config->chain));
                    if (p_config->sortoutput)
                    {
                        fprintf(p_config->outfile, " '%s'", wrbuf_cstr(sw));
                    }
                    if (p_config->org_output)
                    {
                        fprintf(p_config->outfile, " %ld+%ld",
                                (long) start, (long) len);
                        fputc(' ', p_config->outfile);
                        fwrite(org_string, 1, start, p_config->outfile);
                        fputc('*', p_config->outfile);
                        fwrite(org_string + start, 1, len, p_config->outfile);
                        fputc('*', p_config->outfile);
                        fputs(org_string + start + len, p_config->outfile);
                    }
                    fprintf(p_config->outfile, "\n");
                }
            }
        }
        wrbuf_destroy(sw);
        wrbuf_destroy(cdata);
    }

    if (p_config->xmloutput)
        fprintf(p_config->outfile,
                "</tokens>\n"
                "</icu>\n");

    icu_chain_destroy(p_config->chain);
    xmlFreeDoc(doc);
    if (line)
        free(line);
}
Exemple #3
0
static int test_iter(struct icu_chain *chain, const char *input,
                     const char *expected)
{
    yaz_icu_iter_t iter = icu_iter_create(chain);
    WRBUF result, second, sort_result;
    int success = 1;

    if (!iter)
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s !iter", input);
        return 0;
    }

    if (icu_iter_next(iter))
    {
        yaz_log(YLOG_WARN, "test_iter: expecting 0 before icu_iter_first");
        return 0;
    }

    sort_result = wrbuf_alloc();
    result = wrbuf_alloc();
    icu_iter_first(iter, input);
    while (icu_iter_next(iter))
    {
        const char *sort_str = icu_iter_get_sortkey(iter);
        if (sort_str)
        {
            wrbuf_puts(sort_result, "[");
            wrbuf_puts_escaped(sort_result, sort_str);
            wrbuf_puts(sort_result, "]");
        }
        else
        {
            wrbuf_puts(sort_result, "[NULL]");
        }
        wrbuf_puts(result, "[");
        wrbuf_puts(result, icu_iter_get_norm(iter));
        wrbuf_puts(result, "]");
    }
    yaz_log(YLOG_LOG, "sortkey=%s", wrbuf_cstr(sort_result));
    second = wrbuf_alloc();
    icu_iter_first(iter, input);
    while (icu_iter_next(iter))
    {
        wrbuf_puts(second, "[");
        wrbuf_puts(second, icu_iter_get_norm(iter));
        wrbuf_puts(second, "]");
    }

    icu_iter_destroy(iter);

    if (strcmp(expected, wrbuf_cstr(result)))
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s",
                input, expected, wrbuf_cstr(result));
        success = 0;
    }

    if (strcmp(expected, wrbuf_cstr(second)))
    {
        yaz_log(YLOG_WARN, "test_iter: input=%s expected=%s got=%s (2nd)",
                input, expected, wrbuf_cstr(second));
        success = 0;
    }

    wrbuf_destroy(result);
    wrbuf_destroy(second);
    wrbuf_destroy(sort_result);
    return success;
}