static int form_prefs_and_ranks(const EPI*epi, const TEXT_RESULTS_INFO *text_results_info, const TEXT_PREFS_INFO *trec_prefs, PREFS_AND_RANKS *prefs_and_ranks, long *num_judged, long *num_judged_ret) { long lnum_judged_ret; /* local num_judged_ret */ long next_unretrieved_rank; long i; long num_results; /* Current number of results (changes as docs thrown away from docno_results) */ long num_prefs = trec_prefs->num_text_prefs; PREFS_AND_RANKS *ranks_ptr, *end_ranks, *start_ptr; /* Copy docno results and add ranks */ num_results = text_results_info->num_text_results; if (NULL == (docno_results = te_chk_and_malloc(docno_results, &max_docno_results, num_results, sizeof(DOCNO_RESULTS)))) return (UNDEF); for (i = 0; i < num_results; i++) { docno_results[i].docno = text_results_info->text_results[i].docno; docno_results[i].sim = text_results_info->text_results[i].sim; } /* Sort results by sim, breaking ties lexicographically using docno */ qsort((char *) docno_results, (int) num_results, sizeof(DOCNO_RESULTS), comp_sim_docno); if (epi->debug_level >= 5) debug_print_docno_results(docno_results, num_results, "After input, before ranks"); /* Only look at epi->max_num_docs_per_topic (not normally an issue) */ if (num_results > epi->max_num_docs_per_topic) num_results = epi->max_num_docs_per_topic; /* Add ranks to docno_results (starting at 1) */ for (i = 0; i < num_results; i++) { docno_results[i].rank = i + 1; } /* Sort docno_results by increasing docno */ qsort((char *) docno_results, (int) num_results, sizeof(DOCNO_RESULTS), comp_docno); /* Error checking for duplicates */ for (i = 1; i < num_results; i++) { if (0 == strcmp(docno_results[i].docno, docno_results[i - 1].docno)) { fprintf(stderr, "rec_eval.form_prefs_counts: duplicate docs %s", docno_results[i].docno); return (UNDEF); } } if (epi->debug_level >= 5) debug_print_docno_results(docno_results, num_results, "After -M, ranks"); /* Copy trec_prefs - sort by docno. Space already reserved */ for (i = 0; i < num_prefs; i++) { prefs_and_ranks[i].jg = trec_prefs->text_prefs[i].jg; prefs_and_ranks[i].jsg = trec_prefs->text_prefs[i].jsg; prefs_and_ranks[i].rel_level = trec_prefs->text_prefs[i].rel_level; prefs_and_ranks[i].docno = trec_prefs->text_prefs[i].docno; } qsort((char *) prefs_and_ranks, (int) num_prefs, sizeof(PREFS_AND_RANKS), comp_prefs_and_ranks_docno); if (epi->debug_level >= 5) debug_print_prefs_and_ranks(prefs_and_ranks, num_prefs, "Input, before ranks"); /* Go through docno_results and prefs_and_ranks in parallel (both sorted by docno) and assign ranks of -1 to those docs in docno_results that are not in prefs_and_ranks */ ranks_ptr = prefs_and_ranks; end_ranks = &prefs_and_ranks[num_prefs]; for (i = 0; i < num_results && ranks_ptr < end_ranks; i++) { while (ranks_ptr < end_ranks && strcmp(ranks_ptr->docno, docno_results[i].docno) < 0) ranks_ptr++; if (ranks_ptr < end_ranks && strcmp(ranks_ptr->docno, docno_results[i].docno) == 0) { do { ranks_ptr++; } while (ranks_ptr < end_ranks && strcmp(ranks_ptr->docno, docno_results[i].docno) == 0); } else /* dpcno_results[i] not judged */ docno_results[i].rank = -1; } /* sort docno_results[0..i] by increasing rank */ num_results = i; qsort((char *) docno_results, (int) num_results, sizeof(DOCNO_RESULTS), comp_results_inc_rank); if (epi->debug_level >= 5) debug_print_docno_results(docno_results, num_results, "After marking not judged"); /* Assign new docid ranks starting at 0 for only judged docs */ lnum_judged_ret = 0; for (i = 0; i < num_results; i++) { if (docno_results[i].rank > -1) { docno_results[lnum_judged_ret].docno = docno_results[i].docno; docno_results[lnum_judged_ret].rank = lnum_judged_ret; lnum_judged_ret++; } } num_results = lnum_judged_ret; /* Sort docno_results by increasing docno */ qsort((char *) docno_results, (int) num_results, sizeof(DOCNO_RESULTS), comp_docno); if (epi->debug_level >= 5) debug_print_docno_results(docno_results, num_results, "After assigning docid_ranks"); /* Go through reduced docno_results and prefs_and_ranks in parallel and assign ranks in prefs_and_ranks from docno_results. Note all docnos in docno_results now guaranteed to be in prefs_and_ranks */ ranks_ptr = prefs_and_ranks; end_ranks = &prefs_and_ranks[num_prefs]; next_unretrieved_rank = num_results; for (i = 0; i < num_results; i++) { while (strcmp(ranks_ptr->docno, docno_results[i].docno) < 0) { ranks_ptr->rank = next_unretrieved_rank++; start_ptr = ranks_ptr++; while (strcmp(ranks_ptr->docno, start_ptr->docno) == 0) { ranks_ptr->rank = start_ptr->rank; ranks_ptr++; } } ranks_ptr->rank = docno_results[i].rank; start_ptr = ranks_ptr++; while (ranks_ptr < end_ranks && strcmp(ranks_ptr->docno, start_ptr->docno) == 0) { ranks_ptr->rank = start_ptr->rank; ranks_ptr++; } } while (ranks_ptr < end_ranks) { ranks_ptr->rank = next_unretrieved_rank++; start_ptr = ranks_ptr++; while (ranks_ptr < end_ranks && strcmp(ranks_ptr->docno, start_ptr->docno) == 0) { ranks_ptr->rank = start_ptr->rank; ranks_ptr++; } } /* Now sort prefs_and_ranks by jg, jsg, rel_level, docid_rank */ qsort((void *) prefs_and_ranks, num_prefs, sizeof(PREFS_AND_RANKS), comp_prefs_and_ranks_jg_rel_level); if (epi->debug_level >= 4) { printf("Form_prefs: num_judged %ld, num_judged_ret %ld\n", next_unretrieved_rank, num_results); debug_print_prefs_and_ranks(prefs_and_ranks, num_prefs, "Final prefs"); } *num_judged = next_unretrieved_rank; *num_judged_ret = num_results; return (1); }
int form_prefs_counts(const EPI *epi, const REL_INFO *rel_info, const RESULTS *results, RESULTS_PREFS *results_prefs) { long i; char *jgid, *jsgid; long jg_ind; long num_jgs_with_subgroups; float rel_level; EC * ec_pool_ptr; float *rel_pool_ptr; long *rank_pool_ptr; unsigned char *pa_pool_ptr; unsigned char **pa_ptr_pool_ptr; long start_jg; long num_rel_level; long num_sub_group; TEXT_RESULTS_INFO *text_results_info; TEXT_PREFS_INFO *trec_prefs; if (epi->debug_level >= 3) printf("Debug: Form_prefs starting query '%s'\n", results->qid); if (0 == strcmp(current_query, results->qid)) { /* Have done this query already. Return cached values */ results_prefs->num_jgs = num_jgs; results_prefs->jgs = jgs; results_prefs->num_judged = num_judged; results_prefs->num_judged_ret = num_judged_ret; results_prefs->pref_counts = (COUNTS_ARRAY ) { ca_pool, ca_ptr_pool, num_judged }; if (epi->debug_level >= 3) printf("Returned Cached Form_prefs\n"); return (0); } /* Check that format type of result info and rel info are correct */ if ((strcmp("prefs", rel_info->rel_format) && strcmp("qrels_prefs", rel_info->rel_format)) || strcmp("trec_results", results->ret_format)) { fprintf(stderr, "rec_eval.form_prefs_info: prefs_info format not (prefs or qrels_prefs) or results format not trec_results\n"); return (UNDEF); } /* Make sure enough space for query and save copy */ i = strlen(results->qid) + 1; if (NULL == (current_query = te_chk_and_malloc(current_query, &max_current_query, i, sizeof(char)))) return (UNDEF); (void) strncpy(current_query, results->qid, i); text_results_info = (TEXT_RESULTS_INFO *) results->q_results; trec_prefs = (TEXT_PREFS_INFO *) rel_info->q_rel_info; /* Reserve space for returned and intermediate values, if needed */ if (NULL == (prefs_and_ranks = te_chk_and_malloc(prefs_and_ranks, &max_prefs_and_ranks, trec_prefs->num_text_prefs, sizeof(PREFS_AND_RANKS))) || NULL == (ec_pool = te_chk_and_malloc(ec_pool, &max_ec_pool, trec_prefs->num_text_prefs, sizeof(EC))) || NULL == (rank_pool = te_chk_and_malloc(rank_pool, &max_rank_pool, trec_prefs->num_text_prefs, sizeof(long)))) return (UNDEF); /* get prefs_and_ranks from results and prefs. Will be sorted by jg, jsg, rel_level, rank. Set num_judged, num_judged_ret */ if (UNDEF == form_prefs_and_ranks(epi, text_results_info, trec_prefs, prefs_and_ranks, &num_judged, &num_judged_ret)) return (UNDEF); /* Go through prefs_and ranks, count and reserve space for judgment groups. Also count number of JGs that have subgroups and will need preference arrays. */ jgid = ""; jsgid = ""; num_jgs = 0; num_jgs_with_subgroups = 0; for (i = 0; i < trec_prefs->num_text_prefs; i++) { if (strcmp(jgid, prefs_and_ranks[i].jg)) { /* New JG */ jgid = prefs_and_ranks[i].jg; jsgid = prefs_and_ranks[i].jsg; num_jgs++; } else if (strcmp(jsgid, prefs_and_ranks[i].jsg)) { jsgid = prefs_and_ranks[i].jsg; num_jgs_with_subgroups++; } } /* Reserve pool space for JGs, and final pref_counts */ if (NULL == (jgs = te_chk_and_malloc(jgs, &max_num_jgs, num_jgs, sizeof(JG))) || NULL == (ca_pool = te_chk_and_malloc(ca_pool, &max_ca_pool, num_judged * num_judged, sizeof(unsigned short))) || NULL == (ca_ptr_pool = te_chk_and_malloc(ca_ptr_pool, &max_ca_ptr_pool, num_judged, sizeof(unsigned short *)))) return (UNDEF); if (num_jgs_with_subgroups) { /* Reserve pool space for preference arrays, and rel_level arrays */ if (NULL == (rel_pool = te_chk_and_malloc(rel_pool, &max_rel_pool, num_judged * num_jgs_with_subgroups, sizeof(float))) || NULL == (pa_pool = te_chk_and_malloc(pa_pool, &max_pa_pool, num_judged * num_judged * num_jgs_with_subgroups, sizeof(unsigned char))) || NULL == (pa_ptr_pool = te_chk_and_malloc(pa_ptr_pool, &max_pa_ptr_pool, num_judged * num_jgs_with_subgroups, sizeof(unsigned char *)))) return (UNDEF); } ec_pool_ptr = ec_pool; rel_pool_ptr = rel_pool; rank_pool_ptr = rank_pool; pa_pool_ptr = pa_pool; pa_ptr_pool_ptr = pa_ptr_pool; /* setup returned results_prefs so its summary values can be filled in */ results_prefs->num_jgs = num_jgs; results_prefs->jgs = jgs; results_prefs->num_judged = num_judged; results_prefs->num_judged_ret = num_judged_ret; results_prefs->pref_counts = (COUNTS_ARRAY ) { ca_pool, ca_ptr_pool, num_judged }; init_counts_array(&results_prefs->pref_counts); /* Go through prefs_and_ranks, determine and construct appropriate JG preference format. Preferences are counted and add to summary values as each JG is handled. */ jg_ind = 0; start_jg = 0; num_rel_level = 0; num_sub_group = 0; rel_level = -3.0; /* Illegal rel_level */ jgid = prefs_and_ranks[0].jg; jsgid = ""; for (i = 0; i < trec_prefs->num_text_prefs; i++) { if (strcmp(jgid, prefs_and_ranks[i].jg)) { /* New judgment group. Form previous JG and initialize coounts for new JG */ if (num_sub_group > 1) { /* Preference array JG */ jgs[jg_ind].num_ecs = 0; /* Indicator thet prefs_array used */ jgs[jg_ind].prefs_array = (PREFS_ARRAY ) { pa_pool_ptr, pa_ptr_pool_ptr, num_judged }; pa_pool_ptr += num_judged * num_judged; pa_ptr_pool_ptr += num_judged; jgs[jg_ind].rel_array = rel_pool_ptr; rel_pool_ptr += num_judged; if (UNDEF == form_jg_pa(&prefs_and_ranks[start_jg], i - start_jg, &jgs[jg_ind], results_prefs)) return (UNDEF); } else { /* EC JG */ jgs[jg_ind].ecs = ec_pool_ptr; ec_pool_ptr += num_rel_level; jgs[jg_ind].num_ecs = num_rel_level; if (UNDEF == form_jg_ec(&prefs_and_ranks[start_jg], i - start_jg, rank_pool_ptr, &jgs[jg_ind], results_prefs)) return (UNDEF); rank_pool_ptr += i - start_jg; } jgid = prefs_and_ranks[i].jg; jg_ind++; jsgid = ""; start_jg = i; num_sub_group = 0; num_rel_level = 0; rel_level = -3.0; /* Illegal rel_level */ } if (strcmp(jsgid, prefs_and_ranks[i].jsg)) { num_sub_group++; jsgid = prefs_and_ranks[i].jsg; } if (rel_level != prefs_and_ranks[i].rel_level) { num_rel_level++; rel_level = prefs_and_ranks[i].rel_level; } } /* Form last JG */ if (num_sub_group > 1) { /* Preference array JG */ jgs[jg_ind].num_ecs = 0; /* Indicator thet prefs_array used */ jgs[jg_ind].prefs_array = (PREFS_ARRAY ) { pa_pool_ptr, pa_ptr_pool_ptr, num_judged }; pa_pool_ptr += num_judged * num_judged; pa_ptr_pool_ptr += num_judged; jgs[jg_ind].rel_array = rel_pool_ptr; rel_pool_ptr += num_judged; if (UNDEF == form_jg_pa(&prefs_and_ranks[start_jg], i - start_jg, &jgs[jg_ind], results_prefs)) return (UNDEF); } else { /* EC JG */ jgs[jg_ind].ecs = ec_pool_ptr; ec_pool_ptr += num_rel_level; jgs[jg_ind].num_ecs = num_rel_level; if (UNDEF == form_jg_ec(&prefs_and_ranks[start_jg], i - start_jg, rank_pool_ptr, &jgs[jg_ind], results_prefs)) return (UNDEF); } if (epi->debug_level >= 3) debug_print_results_prefs(results_prefs); return (1); }
static int add_transitives(PREFS_ARRAY *pa) { PREFS_ARRAY m1; PREFS_ARRAY m2; long i, j; PREFS_ARRAY *array_in, *array_out, *temp; /* Need two temporary arrays of same size as pa. Reserve space and zero out arrays */ if (NULL == (temp_pa_pool = te_chk_and_malloc(temp_pa_pool, &max_temp_pa_pool, 2 * pa->num_judged * pa->num_judged, sizeof(unsigned char))) || NULL == (temp_pa_ptr_pool = te_chk_and_malloc(temp_pa_ptr_pool, &max_temp_pa_ptr_pool, 2 * pa->num_judged, sizeof(unsigned char *)))) return (UNDEF); m1 = (PREFS_ARRAY ) { temp_pa_pool, temp_pa_ptr_pool, pa->num_judged }; m2 = (PREFS_ARRAY ) { temp_pa_pool + pa->num_judged * pa->num_judged, temp_pa_ptr_pool + pa->num_judged, pa->num_judged }; if (pa->num_judged != saved_num_judged) { /* if new size array, must reset row pointers */ saved_num_judged = pa->num_judged; for (i = 0; i < pa->num_judged; i++) { m1.array[i] = m1.full_array + i * pa->num_judged; m2.array[i] = m2.full_array + i * pa->num_judged; } } /* Add diagonal all ones in pa */ for (i = 0; i < pa->num_judged; i++) { pa->array[i][i] = 1; } /* Multiply prefs_array by itself until there are no changes */ array_in = pa; array_out = &m1; if (mult_and_check_change(pa, array_in, array_out)) { array_in = array_out; array_out = &m2; while (mult_and_check_change(pa, array_in, array_out)) { temp = array_in; array_in = array_out; array_out = temp; } /* There were changes, now set pa values to those of array_out */ for (i = 0; i < pa->num_judged; i++) { for (j = 0; j < pa->num_judged; j++) { pa->array[i][j] = array_out->array[i][j]; } } } /* Reset all diagonals to 0 in pa */ for (i = 0; i < pa->num_judged; i++) { pa->array[i][i] = 0; } /* Check for inconsistencies */ for (i = 0; i < pa->num_judged; i++) { for (j = 0; j < pa->num_judged; j++) { if (i != j && pa->array[i][j] && pa->array[j][i]) { fprintf(stderr, "rec_eval.form_prefs_counts: Pref inconsistency found\n internal rank %ld and internal rank %ld are conflicted\n", i, j); abort(); return (-1); } } } return (1); }
int te_form_res_rels (const EPI *epi, const REL_INFO *rel_info, const RESULTS *results, RES_RELS *res_rels) { long i; long num_results; long max_rel; TEXT_RESULTS_INFO *text_results_info; TEXT_QRELS_INFO *trec_qrels; TEXT_QRELS *qrels_ptr, *end_qrels; if (0 == strcmp (current_query, results->qid)) { /* Have done this query already. Return cached values */ *res_rels = saved_res_rels; return (0); } /* Check that format type of result info and rel info are correct */ if (strcmp ("qrels", rel_info->rel_format) || strcmp ("trec_results", results->ret_format)) { fprintf (stderr, "trec_eval.form_res_qrels: rel_info format not qrels or results format not trec_results\n"); return (UNDEF); } /* Make sure enough space for query and save copy */ i = strlen(results->qid)+1; if (NULL == (current_query = te_chk_and_malloc (current_query, &max_current_query, i, sizeof (char)))) return (UNDEF); (void) strncpy (current_query, results->qid, i); text_results_info = (TEXT_RESULTS_INFO *) results->q_results; trec_qrels = (TEXT_QRELS_INFO *) rel_info->q_rel_info; num_results = text_results_info->num_text_results; /* Check and reserve space for output structure */ /* Reserve space for temp structure copying results */ if (NULL == (ranked_rel_list = te_chk_and_malloc (ranked_rel_list, &max_ranked_rel_list, num_results, sizeof (long))) || NULL == (docno_info = te_chk_and_malloc (docno_info, &max_docno_info, num_results, sizeof (DOCNO_INFO)))) return (UNDEF); for (i = 0; i < num_results; i++) { docno_info[i].docno = text_results_info->text_results[i].docno; docno_info[i].sim = text_results_info->text_results[i].sim; } /* Sort results by sim, breaking ties lexicographically using docno */ qsort ((char *) docno_info, (int) num_results, sizeof (DOCNO_INFO), comp_sim_docno); /* Only look at epi->max_num_docs_per_topic (not normally an issue) */ if (num_results > epi->max_num_docs_per_topic) num_results = epi->max_num_docs_per_topic; /* Add ranks to docno_info (starting at 1) */ for (i = 0; i < num_results; i++) { docno_info[i].rank = i+1; } /* Sort trec_top lexicographically */ qsort ((char *) docno_info, (int) num_results, sizeof (DOCNO_INFO), comp_docno); /* Error checking for duplicates */ for (i = 1; i < num_results; i++) { if (0 == strcmp (docno_info[i].docno, docno_info[i-1].docno)) { fprintf (stderr, "trec_eval.form_res_qrels: duplicate docs %s", docno_info[i].docno); return (UNDEF); } } /* Find max_rel among qid, reserve and zero space for rel_levels */ /* Check for duplicate docnos. */ qrels_ptr = trec_qrels->text_qrels; end_qrels = &trec_qrels->text_qrels [trec_qrels->num_text_qrels]; max_rel = qrels_ptr->rel; qrels_ptr++; while (qrels_ptr < end_qrels) { if (max_rel < qrels_ptr->rel) max_rel = qrels_ptr->rel; if (0 == strcmp ((qrels_ptr-1)->docno, qrels_ptr->docno)) { fprintf (stderr, "trec_eval.form_res_rels: duplicate docs %s\n", qrels_ptr->docno); return (UNDEF); } qrels_ptr++; } if (NULL == (rel_levels = te_chk_and_malloc (rel_levels, &max_rel_levels, (max_rel+1), sizeof (long)))) return (UNDEF); (void) memset (rel_levels, 0, (max_rel+1) * sizeof (long)); /* Construct rank_rel array and rel_levels */ /* Go through docno_info, trec_qrels in parallel to determine relevance for each doc in docno_info. Note that trec_qrels already sorted by docno with no duplicates */ qrels_ptr = trec_qrels->text_qrels; end_qrels = &trec_qrels->text_qrels [trec_qrels->num_text_qrels]; for (i = 0; i < num_results; i++) { while (qrels_ptr < end_qrels && strcmp (qrels_ptr->docno, docno_info[i].docno) < 0) { if (qrels_ptr->rel >= 0) rel_levels[qrels_ptr->rel]++; qrels_ptr++; } if (qrels_ptr >= end_qrels || strcmp (qrels_ptr->docno, docno_info[i].docno) > 0) { /* Doc is non-judged */ docno_info[i].rel = RELVALUE_NONPOOL; } else { /* Doc is in pool, assign relevance */ if (qrels_ptr->rel < 0) /* In pool, but unjudged (eg, infAP uses a sample of pool)*/ docno_info[i].rel = RELVALUE_UNJUDGED; else { docno_info[i].rel = qrels_ptr->rel; } if (qrels_ptr->rel >= 0) rel_levels[qrels_ptr->rel]++; qrels_ptr++; } } /* Finish counting rels */ while (qrels_ptr < end_qrels) { if (qrels_ptr->rel >= 0) rel_levels[qrels_ptr->rel]++; qrels_ptr++; } /* Construct ranked_rel_list and associated counts */ saved_res_rels.num_rel_ret = 0; saved_res_rels.num_nonpool = 0; saved_res_rels.num_unjudged_in_pool = 0; saved_res_rels.results_rel_list = ranked_rel_list; saved_res_rels.rel_levels = rel_levels; if (epi->judged_docs_only_flag) { /* If judged_docs_only_flag, then must fix up ranks to reflect unjudged docs being thrown out. Note: done this way to preserve original tie-breaking based on text docno */ long rrl; /* Sort tuples by increasing rank among judged docs*/ qsort ((char *) docno_info, (int) num_results, sizeof (DOCNO_INFO), comp_rank_judged); rrl = 0; i = 0; while (i < num_results && docno_info[i].rel >= 0) { if (docno_info[i].rel >= epi->relevance_level) saved_res_rels.num_rel_ret++; saved_res_rels.results_rel_list[rrl++] = docno_info[i++].rel; } saved_res_rels.num_ret = rrl; } else { /* Normal path. Assign rel value to appropriate rank */ for (i = 0; i < num_results; i++) { saved_res_rels.results_rel_list[docno_info[i].rank - 1] = docno_info[i].rel; if (RELVALUE_NONPOOL == docno_info[i].rel) saved_res_rels.num_nonpool++; else if (RELVALUE_UNJUDGED == docno_info[i].rel) saved_res_rels.num_unjudged_in_pool++; else { if (docno_info[i].rel >= epi->relevance_level) saved_res_rels.num_rel_ret++; } } saved_res_rels.num_ret = num_results; } saved_res_rels.num_rel = 0; for (i = 0; i <= max_rel; i++) { if (saved_res_rels.rel_levels[i]) { saved_res_rels.num_rel_levels = i + 1; if (i >= epi->relevance_level) saved_res_rels.num_rel += saved_res_rels.rel_levels[i]; } } *res_rels = saved_res_rels; return (1); }