static void write_metadata(WRBUF w, struct conf_service *service, struct record_metadata **ml, int full, int indent) { int imeta; for (imeta = 0; imeta < service->num_metadata; imeta++) { struct conf_metadata *cmd = &service->metadata[imeta]; struct record_metadata *md; if (!cmd->brief && !full) continue; for (md = ml[imeta]; md; md = md->next) { struct record_metadata_attr *attr = md->attributes; int i; for (i = 0; i < indent; i++) wrbuf_putc(w, ' '); wrbuf_printf(w, "<md-%s", cmd->name); for (; attr; attr = attr->next) { wrbuf_printf(w, " %s=\"", attr->name); wrbuf_xmlputs(w, attr->value); wrbuf_puts(w, "\""); } wrbuf_puts(w, ">"); switch (cmd->type) { case Metadata_type_generic: wrbuf_xmlputs(w, md->data.text.disp); break; case Metadata_type_year: wrbuf_printf(w, "%d", md->data.number.min); if (md->data.number.min != md->data.number.max) wrbuf_printf(w, "-%d", md->data.number.max); break; default: wrbuf_puts(w, "[can't represent]"); break; } wrbuf_printf(w, "</md-%s>\n", cmd->name); } } }
static void write_subrecord(struct record *r, WRBUF w, struct conf_service *service, int show_details) { const char *name = session_setting_oneval( client_get_database(r->client), PZ_NAME); wrbuf_puts(w, " <location id=\""); wrbuf_xmlputs(w, client_get_id(r->client)); wrbuf_puts(w, "\"\n"); wrbuf_puts(w, " name=\""); wrbuf_xmlputs(w, *name ? name : "Unknown"); wrbuf_puts(w, "\" "); wrbuf_puts(w, "checksum=\""); wrbuf_printf(w, "%u", r->checksum); wrbuf_puts(w, "\">\n"); write_metadata(w, service, r->metadata, show_details, 2); wrbuf_puts(w, " </location>\n"); }
static void cmd_info(struct http_channel *c) { char yaz_version_str[20]; char yaz_sha1_str[42]; response_open_no_status(c, "info"); wrbuf_puts(c->wrbuf, "\n <version>\n"); wrbuf_puts(c->wrbuf, " <pazpar2"); #ifdef PAZPAR2_VERSION_SHA1 wrbuf_printf(c->wrbuf, " sha1=\"%s\"", PAZPAR2_VERSION_SHA1); #endif wrbuf_puts(c->wrbuf, ">"); wrbuf_xmlputs(c->wrbuf, VERSION); wrbuf_puts(c->wrbuf, "</pazpar2>\n"); yaz_version(yaz_version_str, yaz_sha1_str); wrbuf_puts(c->wrbuf, " <yaz compiled=\""); wrbuf_xmlputs(c->wrbuf, YAZ_VERSION); wrbuf_puts(c->wrbuf, "\" sha1=\""); wrbuf_xmlputs(c->wrbuf, yaz_sha1_str); wrbuf_puts(c->wrbuf, "\">"); wrbuf_xmlputs(c->wrbuf, yaz_version_str); wrbuf_puts(c->wrbuf, "</yaz>\n"); wrbuf_puts(c->wrbuf, " </version>\n"); #if HAVE_UNISTD_H { char hostname_str[64]; if (gethostname(hostname_str, sizeof(hostname_str)) == 0) { wrbuf_puts(c->wrbuf, " <host>"); wrbuf_xmlputs(c->wrbuf, hostname_str); wrbuf_puts(c->wrbuf, "</host>\n"); } } #endif info_services(c->server, c->wrbuf); response_close(c, "info"); }
static void error(struct http_response *rs, enum pazpar2_error_code code, const char *addinfo) { struct http_channel *c = rs->channel; WRBUF text = wrbuf_alloc(); const char *http_status = "417"; const char *msg = get_msg(code); rs->msg = nmem_strdup(c->nmem, msg); strcpy(rs->code, http_status); wrbuf_printf(text, HTTP_COMMAND_RESPONSE_PREFIX "<error code=\"%d\" msg=\"%s\">", (int) code, msg); if (addinfo) wrbuf_xmlputs(text, addinfo); wrbuf_puts(text, "</error>"); yaz_log(YLOG_WARN, "HTTP %s %s%s%s", http_status, msg, addinfo ? ": " : "" , addinfo ? addinfo : ""); rs->payload = nmem_strdup(c->nmem, wrbuf_cstr(text)); wrbuf_destroy(text); http_send_response(c); }
void relevance_countwords(struct relevance *r, struct record_cluster *cluster, const char *words, const char *rank, const char *name) { int *w = r->term_frequency_vec_tmp; const char *norm_str; int i, length = 0; double lead_decay = r->lead_decay; struct word_entry *e; WRBUF wr = cluster->relevance_explain1; int printed_about_field = 0; pp2_charset_token_first(r->prt, words, 0); for (e = r->entries, i = 1; i < r->vec_len; i++, e = e->next) { w[i] = 0; r->term_pos[i] = 0; } assert(rank); while ((norm_str = pp2_charset_token_next(r->prt))) { int local_weight = 0; e = word_entry_match(r, norm_str, rank, &local_weight); if (e) { int res = e->termno; int j; if (!printed_about_field) { printed_about_field = 1; wrbuf_printf(wr, "field=%s content=", name); if (strlen(words) > 50) { wrbuf_xmlputs_n(wr, words, 49); wrbuf_puts(wr, " ..."); } else wrbuf_xmlputs(wr, words); wrbuf_puts(wr, ";\n"); } assert(res < r->vec_len); w[res] += local_weight / (1 + log2(1 + lead_decay * length)); wrbuf_printf(wr, "%s: w[%d] += w(%d) / " "(1+log2(1+lead_decay(%f) * length(%d)));\n", e->display_str, res, local_weight, lead_decay, length); j = res - 1; if (j > 0 && r->term_pos[j]) { int d = length + 1 - r->term_pos[j]; wrbuf_printf(wr, "%s: w[%d] += w[%d](%d) * follow(%f) / " "(1+log2(d(%d));\n", e->display_str, res, res, w[res], r->follow_factor, d); w[res] += w[res] * r->follow_factor / (1 + log2(d)); } for (j = 0; j < r->vec_len; j++) r->term_pos[j] = j < res ? 0 : length + 1; } length++; } for (e = r->entries, i = 1; i < r->vec_len; i++, e = e->next) { if (length == 0 || w[i] == 0) continue; wrbuf_printf(wr, "%s: tf[%d] += w[%d](%d)", e->display_str, i, i, w[i]); switch (r->length_divide) { case 0: cluster->term_frequency_vecf[i] += (double) w[i]; break; case 1: wrbuf_printf(wr, " / log2(1+length(%d))", length); cluster->term_frequency_vecf[i] += (double) w[i] / log2(1 + length); break; case 2: wrbuf_printf(wr, " / length(%d)", length); cluster->term_frequency_vecf[i] += (double) w[i] / length; } cluster->term_frequency_vec[i] += w[i]; wrbuf_printf(wr, " (%f);\n", cluster->term_frequency_vecf[i]); } cluster->term_frequency_vec[0] += length; }
static void process_text_file(struct config_t *p_config) { char *line = 0; char linebuf[1024]; xmlDoc *doc = xmlParseFile(p_config->conffile); xmlNode *xml_node = xmlDocGetRootElement(doc); long unsigned int token_count = 0; long unsigned int line_count = 0; UErrorCode status = U_ZERO_ERROR; if (!xml_node) { printf("Could not parse XML config file '%s' \n", p_config->conffile); exit(1); } p_config->chain = icu_chain_xml_config(xml_node, 1, &status); if (!p_config->chain || !U_SUCCESS(status)) { printf("Could not set up ICU chain from config file '%s' \n", p_config->conffile); exit(1); } if (p_config->xmloutput) fprintf(p_config->outfile, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" "<icu>\n" "<tokens>\n"); /* read input lines for processing */ while ((line=fgets(linebuf, sizeof(linebuf)-1, p_config->infile))) { WRBUF sw = wrbuf_alloc(); WRBUF cdata = wrbuf_alloc(); int success = icu_chain_assign_cstr(p_config->chain, line, &status); line_count++; while (success && icu_chain_next_token(p_config->chain, &status)) { if (U_FAILURE(status)) success = 0; else { size_t start, len; const char *org_string = 0; const char *sortkey = icu_chain_token_sortkey(p_config->chain); icu_chain_get_org_info2(p_config->chain, &start, &len, &org_string); wrbuf_rewind(sw); wrbuf_puts_escaped(sw, sortkey); token_count++; if (p_config->xmloutput) { fprintf(p_config->outfile, "<token id=\"%lu\" line=\"%lu\"", token_count, line_count); wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, icu_chain_token_norm(p_config->chain)); fprintf(p_config->outfile, " norm=\"%s\"", wrbuf_cstr(cdata)); wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, icu_chain_token_display(p_config->chain)); fprintf(p_config->outfile, " display=\"%s\"", wrbuf_cstr(cdata)); if (p_config->sortoutput) { wrbuf_rewind(cdata); wrbuf_xmlputs(cdata, wrbuf_cstr(sw)); fprintf(p_config->outfile, " sortkey=\"%s\"", wrbuf_cstr(cdata)); } fprintf(p_config->outfile, "/>\n"); } else { fprintf(p_config->outfile, "%lu %lu '%s' '%s'", token_count, line_count, icu_chain_token_norm(p_config->chain), icu_chain_token_display(p_config->chain)); if (p_config->sortoutput) { fprintf(p_config->outfile, " '%s'", wrbuf_cstr(sw)); } if (p_config->org_output) { fprintf(p_config->outfile, " %ld+%ld", (long) start, (long) len); fputc(' ', p_config->outfile); fwrite(org_string, 1, start, p_config->outfile); fputc('*', p_config->outfile); fwrite(org_string + start, 1, len, p_config->outfile); fputc('*', p_config->outfile); fputs(org_string + start + len, p_config->outfile); } fprintf(p_config->outfile, "\n"); } } } wrbuf_destroy(sw); wrbuf_destroy(cdata); } if (p_config->xmloutput) fprintf(p_config->outfile, "</tokens>\n" "</icu>\n"); icu_chain_destroy(p_config->chain); xmlFreeDoc(doc); if (line) free(line); }
static void show_record(struct http_channel *c, struct http_session *s) { struct http_response *rs = c->response; struct http_request *rq = c->request; struct record_cluster *rec, *prev_r, *next_r; struct conf_service *service; const char *idstr = http_argbyname(rq, "id"); const char *offsetstr = http_argbyname(rq, "offset"); const char *binarystr = http_argbyname(rq, "binary"); const char *checksumstr = http_argbyname(rq, "checksum"); if (!s) return; service = s->psession->service; if (!idstr) { error(rs, PAZPAR2_MISSING_PARAMETER, "id"); return; } wrbuf_rewind(c->wrbuf); if (!(rec = show_single_start(s->psession, idstr, &prev_r, &next_r))) { if (session_active_clients(s->psession) == 0) { error(rs, PAZPAR2_RECORD_MISSING, idstr); } else if (session_set_watch(s->psession, SESSION_WATCH_RECORD, cmd_record_ready, c, c) != 0) { error(rs, PAZPAR2_RECORD_MISSING, idstr); } return; } if (offsetstr || checksumstr) { const char *syntax = http_argbyname(rq, "syntax"); const char *esn = http_argbyname(rq, "esn"); int i; struct record*r = rec->records; int binary = 0; const char *nativesyntax = http_argbyname(rq, "nativesyntax"); if (binarystr && *binarystr != '0') binary = 1; if (checksumstr) { long v = atol(checksumstr); for (i = 0; r; r = r->next) if (v == r->checksum) break; if (!r) error(rs, PAZPAR2_RECORD_FAIL, "no record"); } else { int offset = atoi(offsetstr); for (i = 0; i < offset && r; r = r->next, i++) ; if (!r) error(rs, PAZPAR2_RECORD_FAIL, "no record at offset given"); } if (r) { http_channel_observer_t obs = http_add_observer(c, r->client, show_raw_reset); int ret = client_show_raw_begin(r->client, r->position, syntax, esn, obs /* data */, show_raw_record_error, (binary ? show_raw_record_ok_binary : show_raw_record_ok), (binary ? 1 : 0), nativesyntax); if (ret == -1) { http_remove_observer(obs); error(rs, PAZPAR2_NO_SESSION, 0); } } } else { struct record *r; response_open_no_status(c, "record"); wrbuf_puts(c->wrbuf, "\n <recid>"); wrbuf_xmlputs(c->wrbuf, rec->recid); wrbuf_puts(c->wrbuf, "</recid>\n"); if (prev_r) { wrbuf_puts(c->wrbuf, " <prevrecid>"); wrbuf_xmlputs(c->wrbuf, prev_r->recid); wrbuf_puts(c->wrbuf, "</prevrecid>\n"); } if (next_r) { wrbuf_puts(c->wrbuf, " <nextrecid>"); wrbuf_xmlputs(c->wrbuf, next_r->recid); wrbuf_puts(c->wrbuf, "</nextrecid>\n"); } wrbuf_printf(c->wrbuf, " <activeclients>%d</activeclients>\n", session_active_clients(s->psession)); write_metadata(c->wrbuf, service, rec->metadata, 1, 1); for (r = rec->records; r; r = r->next) write_subrecord(r, c->wrbuf, service, 2); response_close(c, "record"); } show_single_stop(s->psession, rec); }
static void bytarget_response(struct http_channel *c, struct http_session *s, const char *cmd_status) { int count, i; struct hitsbytarget *ht; struct http_request *rq = c->request; const char *settings = http_argbyname(rq, "settings"); int version = get_version(rq); ht = get_hitsbytarget(s->psession, &count, c->nmem); if (!cmd_status) /* Old protocol, always ok */ response_open(c, "bytarget"); else { /* New protocol, OK or WARNING (...)*/ response_open_no_status(c, "bytarget"); wrbuf_printf(c->wrbuf, "<status>%s</status>", cmd_status); } if (count == 0) yaz_log(YLOG_WARN, "Empty bytarget Response. No targets found!"); for (i = 0; i < count; i++) { wrbuf_puts(c->wrbuf, "\n<target>"); wrbuf_puts(c->wrbuf, "<id>"); wrbuf_xmlputs(c->wrbuf, ht[i].id); wrbuf_puts(c->wrbuf, "</id>\n"); if (ht[i].name && ht[i].name[0]) { wrbuf_puts(c->wrbuf, "<name>"); wrbuf_xmlputs(c->wrbuf, ht[i].name); wrbuf_puts(c->wrbuf, "</name>\n"); } wrbuf_printf(c->wrbuf, "<hits>" ODR_INT_PRINTF "</hits>\n", ht[i].hits); wrbuf_printf(c->wrbuf, "<diagnostic>%d</diagnostic>\n", ht[i].diagnostic); if (ht[i].diagnostic) { wrbuf_puts(c->wrbuf, "<message>"); wrbuf_xmlputs(c->wrbuf, ht[i].message); wrbuf_puts(c->wrbuf, "</message>\n"); wrbuf_puts(c->wrbuf, "<addinfo>"); if (ht[i].addinfo) wrbuf_xmlputs(c->wrbuf, ht[i].addinfo); wrbuf_puts(c->wrbuf, "</addinfo>\n"); } wrbuf_printf(c->wrbuf, "<records>%d</records>\n", ht[i].records - ht[i].filtered); if (version >= 2) { wrbuf_printf(c->wrbuf, "<filtered>%d</filtered>\n", ht[i].filtered); wrbuf_printf(c->wrbuf, "<approximation>" ODR_INT_PRINTF "</approximation>\n", ht[i].approximation); } wrbuf_puts(c->wrbuf, "<state>"); wrbuf_xmlputs(c->wrbuf, ht[i].state); wrbuf_puts(c->wrbuf, "</state>\n"); if (settings && *settings == '1') { wrbuf_puts(c->wrbuf, "<settings>\n"); wrbuf_puts(c->wrbuf, ht[i].settings_xml); wrbuf_puts(c->wrbuf, "</settings>\n"); } if (ht[i].suggestions_xml && ht[i].suggestions_xml[0]) { wrbuf_puts(c->wrbuf, "<suggestions>"); wrbuf_puts(c->wrbuf, ht[i].suggestions_xml); wrbuf_puts(c->wrbuf, "</suggestions>"); } wrbuf_puts(c->wrbuf, "</target>"); } response_close(c, "bytarget"); }
static void show_records(struct http_channel *c, struct http_session *s, int active) { struct http_request *rq = c->request; struct http_response *rs = c->response; struct record_cluster **rl; struct reclist_sortparms *sp; const char *start = http_argbyname(rq, "start"); const char *num = http_argbyname(rq, "num"); const char *sort = http_argbyname(rq, "sort"); int version = get_version(rq); int startn = 0; int numn = 20; int total; Odr_int total_hits; Odr_int approx_hits; int i; struct conf_service *service = 0; if (!s) return; // We haven't counted clients yet if we're called on a block release if (active < 0) active = session_active_clients(s->psession); if (start) startn = atoi(start); if (num) numn = atoi(num); service = s->psession->service; if (!sort) { sort = service->default_sort; } if (!(sp = reclist_parse_sortparms(c->nmem, sort, service))) { error(rs, PAZPAR2_MALFORMED_PARAMETER_VALUE, "sort"); return; } rl = show_range_start(s->psession, sp, startn, &numn, &total, &total_hits, &approx_hits); response_open(c, "show"); wrbuf_printf(c->wrbuf, "\n<activeclients>%d</activeclients>\n", active); wrbuf_printf(c->wrbuf, "<merged>%d</merged>\n", total); wrbuf_printf(c->wrbuf, "<total>" ODR_INT_PRINTF "</total>\n", total_hits); if (version >= 2) { wrbuf_printf(c->wrbuf, "<approximation>" ODR_INT_PRINTF "</approximation>\n", approx_hits); } wrbuf_printf(c->wrbuf, "<start>%d</start>\n", startn); wrbuf_printf(c->wrbuf, "<num>%d</num>\n", numn); for (i = 0; i < numn; i++) { int ccount; struct record *p; struct record_cluster *rec = rl[i]; struct conf_service *service = s->psession->service; wrbuf_puts(c->wrbuf, "<hit>\n"); write_metadata(c->wrbuf, service, rec->metadata, 0, 1); for (ccount = 0, p = rl[i]->records; p; p = p->next, ccount++) write_subrecord(p, c->wrbuf, service, 0); // subrecs w/o details wrbuf_printf(c->wrbuf, " <count>%d</count>\n", ccount); if (strstr(sort, "relevance")) { wrbuf_printf(c->wrbuf, " <relevance>%d</relevance>\n", rec->relevance_score); if (service->rank_debug) { wrbuf_printf(c->wrbuf, " <relevance_info>\n"); wrbuf_xmlputs(c->wrbuf, wrbuf_cstr(rec->relevance_explain1)); wrbuf_xmlputs(c->wrbuf, wrbuf_cstr(rec->relevance_explain2)); wrbuf_printf(c->wrbuf, " </relevance_info>\n"); } } wrbuf_puts(c->wrbuf, " <recid>"); wrbuf_xmlputs(c->wrbuf, rec->recid); wrbuf_puts(c->wrbuf, "</recid>\n"); wrbuf_puts(c->wrbuf, "</hit>\n"); } show_range_stop(s->psession, rl); response_close(c, "show"); }