Пример #1
0
// the most general, but probably also most inefficient implementation
void lemur::api::RetrievalMethod::scoreCollection(const QueryRep &qry, IndexedRealVector &results)
{
  results.clear();
  for (DOCID_T id=1; id<=ind.docCount();id++) {
    results.PushValue(id, scoreDoc(qry, id));
  }
}
Пример #2
0
// keywordToDocs '+indexPath+' "'+query+'"','r'
int main(int argc, const char* argv[]) 
{
	if (argc == 2) {
		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
			cout << endl << "Usage:" << endl << endl <<
				"keywordToDocs indexPath \"query\"" << endl << endl <<
				"Returns a list of doc numbers that contain the query" <<
				endl << endl;
			return(0);
		}
	}
	Index *ind = IndexManager::openIndex(argv[1]);

	ArrayAccumulator accumulator(ind->docCount());
	RetrievalMethod *myMethod = new TFIDFRetMethod(*ind, accumulator); 
	IndexedRealVector results; 
    StringQuery *q = new StringQuery(argv[2]); // construct a TextQuery
    QueryRep * qr = myMethod->computeQueryRep(*q); // compute the query representation 
    // now score all documents    
    myMethod->scoreCollection(*qr, results);
    results.Sort(); // sorting results, assume a higher score means more relevant
    IndexedRealVector::iterator it;
    it = results.begin();
    while ((it != results.end())) {
         cout << (*it).ind  // this is the document ID 
              << endl;
         it++;
    }
}
Пример #3
0
/// Score a set of documents w.r.t. a query rep (e.g. for re-ranking)
void lemur::api::RetrievalMethod::scoreDocSet(const QueryRep &qry, const DocIDSet &docSet, IndexedRealVector &results)
{
  results.clear();
  docSet.startIteration();
  while (docSet.hasMore()) {
    int docID;
    double prevScore;
    docSet.nextIDInfo(docID, prevScore);
    results.PushValue(docID, scoreDoc(qry, docID));
  }
}
Пример #4
0
/** Score a set of documents w.r.t. a query rep (e.g. for re-ranking) */
void LinkDistributionRetMethod::scoreDocSet(const QueryRep &qry, const DocIDSet &docSet, IndexedRealVector &results)
{
	results.clear();
	docSet.startIteration();

	while(docSet.hasMore())
	{
		int docID;
		double prevScore;
		docSet.nextIDInfo(docID, prevScore);
		
		double phi = lemur::api::ParamGetDouble("phi", 0);
		double currentScore = (1 - phi) * prevScore + phi * scoreDoc(qry, docID);
		results.PushValue(docID, currentScore);
	}//while
}//end scoreDocSet
Пример #5
0
void LinkDistributionRetMethod::scoreDocSet(const Query &q, const DocIDSet &docSet, IndexedRealVector &results)
{
	//assert(LinkDistributionParameter::docWindowSize>0);

	results.clear();
	docSet.startIteration();
	while(docSet.hasMore())
	{
		int docID;
		double prevScore;
		docSet.nextIDInfo(docID, prevScore);
		//cerr<<"docID:"<<docID<<endl;
		double phi = lemur::api::ParamGetDouble("phi", 0);
		double currentScore = (1 - phi) * prevScore + phi * scoreDoc(q, docID);
		results.PushValue(docID, currentScore);
	}//while

}
Пример #6
0
void lemur::api::StructQueryRetMethod::scoreInvertedIndex(const QueryRep &qRep, 
                                                          IndexedRealVector &scores, 
                                                          bool scoreAll) {
  COUNT_T numDocs = ind.docCount();
  COUNT_T i;
  lemur::retrieval::QueryNode *queryRoot = ((lemur::retrieval::StructQueryRep *)(&qRep))->topnode();
  scores.clear();  
  // eval the query
  StructQueryScoreFunc * scorer = (StructQueryScoreFunc *)scoreFunc();
  for (i = 1; i <= numDocs; i++) {
    if (queryRoot->dList[i]) {
      DocumentRep *dRep = computeDocRep(i);
      scores.PushValue(i, scorer->evalQuery(queryRoot, dRep));
      delete dRep;
    } else if (scoreAll) {
      scores.PushValue(i, scorer->adjustedScore(0, queryRoot));
    }
  }
}
Пример #7
0
void Retrieval(double *qryArr, IndexedRealVector &results, Index *ind){
  //retrieve documents with respect to the array representation of the query

  lemur::retrieval::ArrayAccumulator scoreAccumulator(ind->docCount());

  scoreAccumulator.reset();
  for (int t=1; t<=ind->termCountUnique();t++) {
    if (qryArr[t]>0) {      
      // fetch inverted entries for a specific query term
      DocInfoList *docList = ind->docInfoList(t);

      // iterate over all individual documents 
      docList->startIteration();
      while (docList->hasMore()) {
	DocInfo *matchInfo = docList->nextEntry();
	// for each matched term, calculated the evidence

	double wt;

	if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) {
	  wt = computeRawTFWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) {
	  wt = computeRawTFIDFWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) {
	  wt = computeLogTFIDFWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) {
	  wt = computeOkapiWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){
	  wt = computeCustomWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else{
	  cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl;
          exit(1);
	}
	scoreAccumulator.incScore(matchInfo->docID(),wt);  
      }
      delete docList;
    }
  }

  // Adjust the scores for the documents when it is necessary
  double s;
  int d;
  for (d=1; d<=ind->docCount(); d++) {
    if (scoreAccumulator.findScore(d,s)) {
    } else {
      s=0;
    }

    if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){
      results.PushValue(d, computeCustomAdjustedScore(s, // the score from the accumulator
						      d, // doc ID
					      ind)); // index     
    }else{
      cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl;
      exit(1);
    }
  }
}