// the most general, but probably also most inefficient implementation void lemur::api::RetrievalMethod::scoreCollection(const QueryRep &qry, IndexedRealVector &results) { results.clear(); for (DOCID_T id=1; id<=ind.docCount();id++) { results.PushValue(id, scoreDoc(qry, id)); } }
// keywordToDocs '+indexPath+' "'+query+'"','r' int main(int argc, const char* argv[]) { if (argc == 2) { if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { cout << endl << "Usage:" << endl << endl << "keywordToDocs indexPath \"query\"" << endl << endl << "Returns a list of doc numbers that contain the query" << endl << endl; return(0); } } Index *ind = IndexManager::openIndex(argv[1]); ArrayAccumulator accumulator(ind->docCount()); RetrievalMethod *myMethod = new TFIDFRetMethod(*ind, accumulator); IndexedRealVector results; StringQuery *q = new StringQuery(argv[2]); // construct a TextQuery QueryRep * qr = myMethod->computeQueryRep(*q); // compute the query representation // now score all documents myMethod->scoreCollection(*qr, results); results.Sort(); // sorting results, assume a higher score means more relevant IndexedRealVector::iterator it; it = results.begin(); while ((it != results.end())) { cout << (*it).ind // this is the document ID << endl; it++; } }
/// Score a set of documents w.r.t. a query rep (e.g. for re-ranking) void lemur::api::RetrievalMethod::scoreDocSet(const QueryRep &qry, const DocIDSet &docSet, IndexedRealVector &results) { results.clear(); docSet.startIteration(); while (docSet.hasMore()) { int docID; double prevScore; docSet.nextIDInfo(docID, prevScore); results.PushValue(docID, scoreDoc(qry, docID)); } }
/** Score a set of documents w.r.t. a query rep (e.g. for re-ranking) */ void LinkDistributionRetMethod::scoreDocSet(const QueryRep &qry, const DocIDSet &docSet, IndexedRealVector &results) { results.clear(); docSet.startIteration(); while(docSet.hasMore()) { int docID; double prevScore; docSet.nextIDInfo(docID, prevScore); double phi = lemur::api::ParamGetDouble("phi", 0); double currentScore = (1 - phi) * prevScore + phi * scoreDoc(qry, docID); results.PushValue(docID, currentScore); }//while }//end scoreDocSet
void LinkDistributionRetMethod::scoreDocSet(const Query &q, const DocIDSet &docSet, IndexedRealVector &results) { //assert(LinkDistributionParameter::docWindowSize>0); results.clear(); docSet.startIteration(); while(docSet.hasMore()) { int docID; double prevScore; docSet.nextIDInfo(docID, prevScore); //cerr<<"docID:"<<docID<<endl; double phi = lemur::api::ParamGetDouble("phi", 0); double currentScore = (1 - phi) * prevScore + phi * scoreDoc(q, docID); results.PushValue(docID, currentScore); }//while }
void lemur::api::StructQueryRetMethod::scoreInvertedIndex(const QueryRep &qRep, IndexedRealVector &scores, bool scoreAll) { COUNT_T numDocs = ind.docCount(); COUNT_T i; lemur::retrieval::QueryNode *queryRoot = ((lemur::retrieval::StructQueryRep *)(&qRep))->topnode(); scores.clear(); // eval the query StructQueryScoreFunc * scorer = (StructQueryScoreFunc *)scoreFunc(); for (i = 1; i <= numDocs; i++) { if (queryRoot->dList[i]) { DocumentRep *dRep = computeDocRep(i); scores.PushValue(i, scorer->evalQuery(queryRoot, dRep)); delete dRep; } else if (scoreAll) { scores.PushValue(i, scorer->adjustedScore(0, queryRoot)); } } }
void Retrieval(double *qryArr, IndexedRealVector &results, Index *ind){ //retrieve documents with respect to the array representation of the query lemur::retrieval::ArrayAccumulator scoreAccumulator(ind->docCount()); scoreAccumulator.reset(); for (int t=1; t<=ind->termCountUnique();t++) { if (qryArr[t]>0) { // fetch inverted entries for a specific query term DocInfoList *docList = ind->docInfoList(t); // iterate over all individual documents docList->startIteration(); while (docList->hasMore()) { DocInfo *matchInfo = docList->nextEntry(); // for each matched term, calculated the evidence double wt; if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) { wt = computeRawTFWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) { wt = computeRawTFIDFWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) { wt = computeLogTFIDFWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) { wt = computeOkapiWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){ wt = computeCustomWeight(matchInfo->docID(), // doc ID t, // term ID matchInfo->termCount(), // freq of term t in this doc qryArr[t], // freq of term t in the query ind); }else{ cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl; exit(1); } scoreAccumulator.incScore(matchInfo->docID(),wt); } delete docList; } } // Adjust the scores for the documents when it is necessary double s; int d; for (d=1; d<=ind->docCount(); d++) { if (scoreAccumulator.findScore(d,s)) { } else { s=0; } if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) { results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){ results.PushValue(d, computeCustomAdjustedScore(s, // the score from the accumulator d, // doc ID ind)); // index }else{ cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl; exit(1); } } }