int main(int argc, char *argv[]) { Index *ind; if (argc < 3) { cerr << "usage: dumpTerm <index_name> <internal/external termid> [-ext]" << endl; exit (1); } ind = IndexManager::openIndex(argv[1]); TERMID_T did; if (argc == 3) did = atoi(argv[2]); else did = ind->term(argv[2]); cout << ind->term(did) << endl; DocInfoList *tList = ind->docInfoList(did); if (tList == NULL) { cerr << ": empty docInfoList" << endl; exit (1); } DocInfo *info; tList->startIteration(); while (tList->hasMore()) { info = tList->nextEntry(); const LOC_T *pos = info->positions(); COUNT_T count = info->termCount(); cout << ind->document(info->docID()) << "(" << count << "): "; if (pos != NULL) { for (COUNT_T i = 0; i < count; i++) cout << pos[i] << " "; } cout << endl; } delete tList; delete(ind); return 0; }
void Retrieval(double *qryArr, IndexedRealVector &results, Index *ind){ //retrieve documents with respect to the array representation of the query lemur::retrieval::ArrayAccumulator scoreAccumulator(ind->docCount()); scoreAccumulator.reset(); for (int t=1; t<=ind->termCountUnique();t++) { if (qryArr[t]>0) { // fetch inverted entries for a specific query term DocInfoList *docList = ind->docInfoList(t); // iterate over all individual documents docList->startIteration(); while (docList->hasMore()) { DocInfo *matchInfo = docList->nextEntry(); // for each matched term, calculated the evidence double wt; if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) { wt = computeRawTFWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) { wt = computeRawTFIDFWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) { wt = computeLogTFIDFWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) { wt = computeOkapiWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){ wt = computeCustomWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else{ cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl; exit(1); } scoreAccumulator.incScore(matchInfo->docID(),wt); } delete docList; } } // Adjust the scores for the documents when it is necessary double s; int d; for (d=1; d<=ind->docCount(); d++) { if (scoreAccumulator.findScore(d,s)) { } else { s=0; } if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){ results.PushValue(d, computeCustomAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else{ cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl; exit(1); } } }