예제 #1
0
static void write_metadata(WRBUF w, struct conf_service *service,
                           struct record_metadata **ml, int full, int indent)
{
    int imeta;

    for (imeta = 0; imeta < service->num_metadata; imeta++)
    {
        struct conf_metadata *cmd = &service->metadata[imeta];
        struct record_metadata *md;
        if (!cmd->brief && !full)
            continue;
        for (md = ml[imeta]; md; md = md->next)
        {
            struct record_metadata_attr *attr = md->attributes;
            int i;
            for (i = 0; i < indent; i++)
                wrbuf_putc(w, ' ');
            wrbuf_printf(w, "<md-%s", cmd->name);

            for (; attr; attr = attr->next)
            {
                wrbuf_printf(w, " %s=\"", attr->name);
                wrbuf_xmlputs(w, attr->value);
                wrbuf_puts(w, "\"");
            }
            wrbuf_puts(w, ">");
            switch (cmd->type)
            {
            case Metadata_type_generic:
                wrbuf_xmlputs(w, md->data.text.disp);
                break;
            case Metadata_type_year:
                wrbuf_printf(w, "%d", md->data.number.min);
                if (md->data.number.min != md->data.number.max)
                    wrbuf_printf(w, "-%d", md->data.number.max);
                break;
            default:
                wrbuf_puts(w, "[can't represent]");
                break;
            }
            wrbuf_printf(w, "</md-%s>\n", cmd->name);
        }
    }
}
예제 #2
0
static void write_subrecord(struct record *r, WRBUF w,
                            struct conf_service *service, int show_details)
{
    const char *name = session_setting_oneval(
                           client_get_database(r->client), PZ_NAME);

    wrbuf_puts(w, " <location id=\"");
    wrbuf_xmlputs(w, client_get_id(r->client));
    wrbuf_puts(w, "\"\n");

    wrbuf_puts(w, "    name=\"");
    wrbuf_xmlputs(w,  *name ? name : "Unknown");
    wrbuf_puts(w, "\" ");

    wrbuf_puts(w, "checksum=\"");
    wrbuf_printf(w,  "%u", r->checksum);
    wrbuf_puts(w, "\">\n");

    write_metadata(w, service, r->metadata, show_details, 2);
    wrbuf_puts(w, " </location>\n");
}
예제 #3
0
static void cmd_info(struct http_channel *c)
{
    char yaz_version_str[20];
    char yaz_sha1_str[42];

    response_open_no_status(c, "info");
    wrbuf_puts(c->wrbuf, "\n <version>\n");
    wrbuf_puts(c->wrbuf, "  <pazpar2");
#ifdef PAZPAR2_VERSION_SHA1
    wrbuf_printf(c->wrbuf, " sha1=\"%s\"", PAZPAR2_VERSION_SHA1);
#endif
    wrbuf_puts(c->wrbuf, ">");
    wrbuf_xmlputs(c->wrbuf, VERSION);
    wrbuf_puts(c->wrbuf, "</pazpar2>\n");

    yaz_version(yaz_version_str, yaz_sha1_str);
    wrbuf_puts(c->wrbuf, "  <yaz compiled=\"");
    wrbuf_xmlputs(c->wrbuf, YAZ_VERSION);
    wrbuf_puts(c->wrbuf, "\" sha1=\"");
    wrbuf_xmlputs(c->wrbuf, yaz_sha1_str);
    wrbuf_puts(c->wrbuf, "\">");
    wrbuf_xmlputs(c->wrbuf, yaz_version_str);
    wrbuf_puts(c->wrbuf, "</yaz>\n");

    wrbuf_puts(c->wrbuf, " </version>\n");
#if HAVE_UNISTD_H
    {
        char hostname_str[64];
        if (gethostname(hostname_str, sizeof(hostname_str)) == 0)
        {
            wrbuf_puts(c->wrbuf, " <host>");
            wrbuf_xmlputs(c->wrbuf, hostname_str);
            wrbuf_puts(c->wrbuf, "</host>\n");
        }
    }
#endif
    info_services(c->server, c->wrbuf);

    response_close(c, "info");
}
예제 #4
0
static void error(struct http_response *rs,
                  enum pazpar2_error_code code,
                  const char *addinfo)
{
    struct http_channel *c = rs->channel;
    WRBUF text = wrbuf_alloc();
    const char *http_status = "417";
    const char *msg = get_msg(code);

    rs->msg = nmem_strdup(c->nmem, msg);
    strcpy(rs->code, http_status);

    wrbuf_printf(text, HTTP_COMMAND_RESPONSE_PREFIX "<error code=\"%d\" msg=\"%s\">", (int) code,
                 msg);
    if (addinfo)
        wrbuf_xmlputs(text, addinfo);
    wrbuf_puts(text, "</error>");

    yaz_log(YLOG_WARN, "HTTP %s %s%s%s", http_status,
            msg, addinfo ? ": " : "" , addinfo ? addinfo : "");
    rs->payload = nmem_strdup(c->nmem, wrbuf_cstr(text));
    wrbuf_destroy(text);
    http_send_response(c);
}
예제 #5
0
void relevance_countwords(struct relevance *r, struct record_cluster *cluster,
                          const char *words, const char *rank,
                          const char *name)
{
    int *w = r->term_frequency_vec_tmp;
    const char *norm_str;
    int i, length = 0;
    double lead_decay = r->lead_decay;
    struct word_entry *e;
    WRBUF wr = cluster->relevance_explain1;
    int printed_about_field = 0;

    pp2_charset_token_first(r->prt, words, 0);
    for (e = r->entries, i = 1; i < r->vec_len; i++, e = e->next)
    {
        w[i] = 0;
        r->term_pos[i] = 0;
    }

    assert(rank);
    while ((norm_str = pp2_charset_token_next(r->prt)))
    {
        int local_weight = 0;
        e = word_entry_match(r, norm_str, rank, &local_weight);
        if (e)
        {
            int res = e->termno;
            int j;

            if (!printed_about_field)
            {
                printed_about_field = 1;
                wrbuf_printf(wr, "field=%s content=", name);
                if (strlen(words) > 50)
                {
                    wrbuf_xmlputs_n(wr, words, 49);
                    wrbuf_puts(wr, " ...");
                }
                else
                    wrbuf_xmlputs(wr, words);
                wrbuf_puts(wr, ";\n");
            }
            assert(res < r->vec_len);
            w[res] += local_weight / (1 + log2(1 + lead_decay * length));
            wrbuf_printf(wr, "%s: w[%d] += w(%d) / "
                         "(1+log2(1+lead_decay(%f) * length(%d)));\n",
                         e->display_str, res, local_weight, lead_decay, length);
            j = res - 1;
            if (j > 0 && r->term_pos[j])
            {
                int d = length + 1 - r->term_pos[j];
                wrbuf_printf(wr, "%s: w[%d] += w[%d](%d) * follow(%f) / "
                             "(1+log2(d(%d));\n",
                             e->display_str, res, res, w[res],
                             r->follow_factor, d);
                w[res] += w[res] * r->follow_factor / (1 + log2(d));
            }
            for (j = 0; j < r->vec_len; j++)
                r->term_pos[j] = j < res ? 0 : length + 1;
        }
        length++;
    }

    for (e = r->entries, i = 1; i < r->vec_len; i++, e = e->next)
    {
        if (length == 0 || w[i] == 0)
            continue;
        wrbuf_printf(wr, "%s: tf[%d] += w[%d](%d)", e->display_str, i, i, w[i]);
        switch (r->length_divide)
        {
        case 0:
            cluster->term_frequency_vecf[i] += (double) w[i];
            break;
        case 1:
            wrbuf_printf(wr, " / log2(1+length(%d))", length);
            cluster->term_frequency_vecf[i] +=
                (double) w[i] / log2(1 + length);
            break;
        case 2:
            wrbuf_printf(wr, " / length(%d)", length);
            cluster->term_frequency_vecf[i] += (double) w[i] / length;
        }
        cluster->term_frequency_vec[i] += w[i];
        wrbuf_printf(wr, " (%f);\n", cluster->term_frequency_vecf[i]);
    }

    cluster->term_frequency_vec[0] += length;
}
예제 #6
0
파일: yaz-icu.c 프로젝트: funkymalc/yaz
static void process_text_file(struct config_t *p_config)
{
    char *line = 0;
    char linebuf[1024];

    xmlDoc *doc = xmlParseFile(p_config->conffile);
    xmlNode *xml_node = xmlDocGetRootElement(doc);

    long unsigned int token_count = 0;
    long unsigned int line_count = 0;

    UErrorCode status = U_ZERO_ERROR;

    if (!xml_node)
    {
        printf("Could not parse XML config file '%s' \n",
                p_config->conffile);
        exit(1);
    }

    p_config->chain = icu_chain_xml_config(xml_node, 1, &status);

    if (!p_config->chain || !U_SUCCESS(status))
    {
        printf("Could not set up ICU chain from config file '%s' \n",
                p_config->conffile);
        exit(1);
    }

    if (p_config->xmloutput)
        fprintf(p_config->outfile,
                "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
                "<icu>\n"
                "<tokens>\n");

    /* read input lines for processing */
    while ((line=fgets(linebuf, sizeof(linebuf)-1, p_config->infile)))
    {
        WRBUF sw = wrbuf_alloc();
        WRBUF cdata = wrbuf_alloc();
        int success = icu_chain_assign_cstr(p_config->chain, line, &status);
        line_count++;

        while (success && icu_chain_next_token(p_config->chain, &status))
        {
            if (U_FAILURE(status))
                success = 0;
            else
            {
                size_t start, len;
                const char *org_string = 0;
                const char *sortkey = icu_chain_token_sortkey(p_config->chain);

                icu_chain_get_org_info2(p_config->chain, &start, &len,
                                        &org_string);
                wrbuf_rewind(sw);
                wrbuf_puts_escaped(sw, sortkey);
                token_count++;
                if (p_config->xmloutput)
                {
                    fprintf(p_config->outfile,
                            "<token id=\"%lu\" line=\"%lu\"",
                            token_count, line_count);

                    wrbuf_rewind(cdata);
                    wrbuf_xmlputs(cdata, icu_chain_token_norm(p_config->chain));
                    fprintf(p_config->outfile, " norm=\"%s\"",
                            wrbuf_cstr(cdata));

                    wrbuf_rewind(cdata);
                    wrbuf_xmlputs(cdata, icu_chain_token_display(p_config->chain));
                    fprintf(p_config->outfile, " display=\"%s\"",
                            wrbuf_cstr(cdata));

                    if (p_config->sortoutput)
                    {
                        wrbuf_rewind(cdata);
                        wrbuf_xmlputs(cdata, wrbuf_cstr(sw));
                        fprintf(p_config->outfile, " sortkey=\"%s\"",
                                wrbuf_cstr(cdata));
                    }
                    fprintf(p_config->outfile, "/>\n");
                }
                else
                {
                    fprintf(p_config->outfile, "%lu %lu '%s' '%s'",
                            token_count,
                            line_count,
                            icu_chain_token_norm(p_config->chain),
                            icu_chain_token_display(p_config->chain));
                    if (p_config->sortoutput)
                    {
                        fprintf(p_config->outfile, " '%s'", wrbuf_cstr(sw));
                    }
                    if (p_config->org_output)
                    {
                        fprintf(p_config->outfile, " %ld+%ld",
                                (long) start, (long) len);
                        fputc(' ', p_config->outfile);
                        fwrite(org_string, 1, start, p_config->outfile);
                        fputc('*', p_config->outfile);
                        fwrite(org_string + start, 1, len, p_config->outfile);
                        fputc('*', p_config->outfile);
                        fputs(org_string + start + len, p_config->outfile);
                    }
                    fprintf(p_config->outfile, "\n");
                }
            }
        }
        wrbuf_destroy(sw);
        wrbuf_destroy(cdata);
    }

    if (p_config->xmloutput)
        fprintf(p_config->outfile,
                "</tokens>\n"
                "</icu>\n");

    icu_chain_destroy(p_config->chain);
    xmlFreeDoc(doc);
    if (line)
        free(line);
}
예제 #7
0
static void show_record(struct http_channel *c, struct http_session *s)
{
    struct http_response *rs = c->response;
    struct http_request *rq = c->request;
    struct record_cluster *rec, *prev_r, *next_r;
    struct conf_service *service;
    const char *idstr = http_argbyname(rq, "id");
    const char *offsetstr = http_argbyname(rq, "offset");
    const char *binarystr = http_argbyname(rq, "binary");
    const char *checksumstr = http_argbyname(rq, "checksum");

    if (!s)
        return;
    service = s->psession->service;
    if (!idstr)
    {
        error(rs, PAZPAR2_MISSING_PARAMETER, "id");
        return;
    }
    wrbuf_rewind(c->wrbuf);
    if (!(rec = show_single_start(s->psession, idstr, &prev_r, &next_r)))
    {
        if (session_active_clients(s->psession) == 0)
        {
            error(rs, PAZPAR2_RECORD_MISSING, idstr);
        }
        else if (session_set_watch(s->psession, SESSION_WATCH_RECORD,
                                   cmd_record_ready, c, c) != 0)
        {
            error(rs, PAZPAR2_RECORD_MISSING, idstr);
        }
        return;
    }
    if (offsetstr || checksumstr)
    {
        const char *syntax = http_argbyname(rq, "syntax");
        const char *esn = http_argbyname(rq, "esn");
        int i;
        struct record*r = rec->records;
        int binary = 0;
        const char *nativesyntax = http_argbyname(rq, "nativesyntax");

        if (binarystr && *binarystr != '0')
            binary = 1;

        if (checksumstr)
        {
            long v = atol(checksumstr);
            for (i = 0; r; r = r->next)
                if (v == r->checksum)
                    break;
            if (!r)
                error(rs, PAZPAR2_RECORD_FAIL, "no record");
        }
        else
        {
            int offset = atoi(offsetstr);
            for (i = 0; i < offset && r; r = r->next, i++)
                ;
            if (!r)
                error(rs, PAZPAR2_RECORD_FAIL, "no record at offset given");
        }
        if (r)
        {
            http_channel_observer_t obs =
                http_add_observer(c, r->client, show_raw_reset);
            int ret = client_show_raw_begin(r->client, r->position,
                                            syntax, esn,
                                            obs /* data */,
                                            show_raw_record_error,
                                            (binary ?
                                             show_raw_record_ok_binary :
                                             show_raw_record_ok),
                                            (binary ? 1 : 0),
                                            nativesyntax);
            if (ret == -1)
            {
                http_remove_observer(obs);
                error(rs, PAZPAR2_NO_SESSION, 0);
            }
        }
    }
    else
    {
        struct record *r;
        response_open_no_status(c, "record");
        wrbuf_puts(c->wrbuf, "\n <recid>");
        wrbuf_xmlputs(c->wrbuf, rec->recid);
        wrbuf_puts(c->wrbuf, "</recid>\n");
        if (prev_r)
        {
            wrbuf_puts(c->wrbuf, " <prevrecid>");
            wrbuf_xmlputs(c->wrbuf, prev_r->recid);
            wrbuf_puts(c->wrbuf, "</prevrecid>\n");
        }
        if (next_r)
        {
            wrbuf_puts(c->wrbuf, " <nextrecid>");
            wrbuf_xmlputs(c->wrbuf, next_r->recid);
            wrbuf_puts(c->wrbuf, "</nextrecid>\n");
        }
        wrbuf_printf(c->wrbuf, " <activeclients>%d</activeclients>\n",
                     session_active_clients(s->psession));
        write_metadata(c->wrbuf, service, rec->metadata, 1, 1);
        for (r = rec->records; r; r = r->next)
            write_subrecord(r, c->wrbuf, service, 2);
        response_close(c, "record");
    }
    show_single_stop(s->psession, rec);
}
예제 #8
0
static void bytarget_response(struct http_channel *c, struct http_session *s, const char *cmd_status) {
    int count, i;
    struct hitsbytarget *ht;
    struct http_request *rq = c->request;
    const char *settings = http_argbyname(rq, "settings");
    int version = get_version(rq);
    ht = get_hitsbytarget(s->psession, &count, c->nmem);
    if (!cmd_status)
        /* Old protocol, always ok */
        response_open(c, "bytarget");
    else {
        /* New protocol, OK or WARNING (...)*/
        response_open_no_status(c, "bytarget");
        wrbuf_printf(c->wrbuf, "<status>%s</status>", cmd_status);
    }

    if (count == 0)
        yaz_log(YLOG_WARN, "Empty bytarget Response. No targets found!");
    for (i = 0; i < count; i++)
    {
        wrbuf_puts(c->wrbuf, "\n<target>");

        wrbuf_puts(c->wrbuf, "<id>");
        wrbuf_xmlputs(c->wrbuf, ht[i].id);
        wrbuf_puts(c->wrbuf, "</id>\n");

        if (ht[i].name && ht[i].name[0])
        {
            wrbuf_puts(c->wrbuf, "<name>");
            wrbuf_xmlputs(c->wrbuf, ht[i].name);
            wrbuf_puts(c->wrbuf, "</name>\n");
        }

        wrbuf_printf(c->wrbuf, "<hits>" ODR_INT_PRINTF "</hits>\n", ht[i].hits);
        wrbuf_printf(c->wrbuf, "<diagnostic>%d</diagnostic>\n", ht[i].diagnostic);
        if (ht[i].diagnostic)
        {
            wrbuf_puts(c->wrbuf, "<message>");
            wrbuf_xmlputs(c->wrbuf, ht[i].message);
            wrbuf_puts(c->wrbuf, "</message>\n");
            wrbuf_puts(c->wrbuf, "<addinfo>");
            if (ht[i].addinfo)
                wrbuf_xmlputs(c->wrbuf, ht[i].addinfo);
            wrbuf_puts(c->wrbuf, "</addinfo>\n");
        }

        wrbuf_printf(c->wrbuf, "<records>%d</records>\n", ht[i].records - ht[i].filtered);
        if (version >= 2) {
            wrbuf_printf(c->wrbuf, "<filtered>%d</filtered>\n", ht[i].filtered);
            wrbuf_printf(c->wrbuf, "<approximation>" ODR_INT_PRINTF "</approximation>\n", ht[i].approximation);
        }
        wrbuf_puts(c->wrbuf, "<state>");
        wrbuf_xmlputs(c->wrbuf, ht[i].state);
        wrbuf_puts(c->wrbuf, "</state>\n");
        if (settings && *settings == '1')
        {
            wrbuf_puts(c->wrbuf, "<settings>\n");
            wrbuf_puts(c->wrbuf, ht[i].settings_xml);
            wrbuf_puts(c->wrbuf, "</settings>\n");
        }
        if (ht[i].suggestions_xml && ht[i].suggestions_xml[0]) {
            wrbuf_puts(c->wrbuf, "<suggestions>");
            wrbuf_puts(c->wrbuf, ht[i].suggestions_xml);
            wrbuf_puts(c->wrbuf, "</suggestions>");
        }
        wrbuf_puts(c->wrbuf, "</target>");
    }
    response_close(c, "bytarget");
}
예제 #9
0
static void show_records(struct http_channel *c, struct http_session *s, int active)
{
    struct http_request *rq = c->request;
    struct http_response *rs = c->response;
    struct record_cluster **rl;
    struct reclist_sortparms *sp;
    const char *start = http_argbyname(rq, "start");
    const char *num = http_argbyname(rq, "num");
    const char *sort = http_argbyname(rq, "sort");
    int version = get_version(rq);

    int startn = 0;
    int numn = 20;
    int total;
    Odr_int total_hits;
    Odr_int approx_hits;
    int i;
    struct conf_service *service = 0;
    if (!s)
        return;

    // We haven't counted clients yet if we're called on a block release
    if (active < 0)
        active = session_active_clients(s->psession);

    if (start)
        startn = atoi(start);
    if (num)
        numn = atoi(num);

    service = s->psession->service;
    if (!sort) {
        sort = service->default_sort;
    }
    if (!(sp = reclist_parse_sortparms(c->nmem, sort, service)))
    {
        error(rs, PAZPAR2_MALFORMED_PARAMETER_VALUE, "sort");
        return;

    }

    rl = show_range_start(s->psession, sp, startn, &numn, &total, &total_hits, &approx_hits);

    response_open(c, "show");
    wrbuf_printf(c->wrbuf, "\n<activeclients>%d</activeclients>\n", active);
    wrbuf_printf(c->wrbuf, "<merged>%d</merged>\n", total);
    wrbuf_printf(c->wrbuf, "<total>" ODR_INT_PRINTF "</total>\n", total_hits);
    if (version >= 2) {
        wrbuf_printf(c->wrbuf, "<approximation>" ODR_INT_PRINTF "</approximation>\n", approx_hits);
    }
    wrbuf_printf(c->wrbuf, "<start>%d</start>\n", startn);
    wrbuf_printf(c->wrbuf, "<num>%d</num>\n", numn);

    for (i = 0; i < numn; i++)
    {
        int ccount;
        struct record *p;
        struct record_cluster *rec = rl[i];
        struct conf_service *service = s->psession->service;

        wrbuf_puts(c->wrbuf, "<hit>\n");
        write_metadata(c->wrbuf, service, rec->metadata, 0, 1);
        for (ccount = 0, p = rl[i]->records; p;  p = p->next, ccount++)
            write_subrecord(p, c->wrbuf, service, 0); // subrecs w/o details
        wrbuf_printf(c->wrbuf, " <count>%d</count>\n", ccount);
        if (strstr(sort, "relevance"))
        {
            wrbuf_printf(c->wrbuf, " <relevance>%d</relevance>\n",
                         rec->relevance_score);
            if (service->rank_debug)
            {
                wrbuf_printf(c->wrbuf, " <relevance_info>\n");
                wrbuf_xmlputs(c->wrbuf, wrbuf_cstr(rec->relevance_explain1));
                wrbuf_xmlputs(c->wrbuf, wrbuf_cstr(rec->relevance_explain2));
                wrbuf_printf(c->wrbuf, " </relevance_info>\n");
            }
        }
        wrbuf_puts(c->wrbuf, " <recid>");
        wrbuf_xmlputs(c->wrbuf, rec->recid);
        wrbuf_puts(c->wrbuf, "</recid>\n");
        wrbuf_puts(c->wrbuf, "</hit>\n");
    }

    show_range_stop(s->psession, rl);

    response_close(c, "show");
}