Пример #1
0
int main(int argc, char *argv[]) {
  Index *ind;
  if (argc < 3) {
    cerr << "usage: dumpTerm <index_name> <internal/external termid> [-ext]" 
	 << endl;
    exit (1);
  }
  
  ind = IndexManager::openIndex(argv[1]);
  TERMID_T did;
  if (argc == 3)
    did = atoi(argv[2]);
  else did = ind->term(argv[2]);
  
  cout << ind->term(did) << endl;
  DocInfoList *tList = ind->docInfoList(did);
  if (tList == NULL) {
    cerr << ": empty docInfoList" << endl;
    exit (1);
  }
  
  DocInfo *info;
  tList->startIteration();
  while (tList->hasMore()) {
    info = tList->nextEntry();
    const LOC_T *pos = info->positions();
    COUNT_T count = info->termCount();
    cout << ind->document(info->docID()) << "(" << count << "): ";
    if (pos != NULL) {
      for (COUNT_T i = 0; i < count; i++)
	cout << pos[i] << " ";
    }
    cout << endl;
  }
  delete tList;
  delete(ind);
  return 0;
}
Пример #2
0
void Retrieval(double *qryArr, IndexedRealVector &results, Index *ind){
  //retrieve documents with respect to the array representation of the query

  lemur::retrieval::ArrayAccumulator scoreAccumulator(ind->docCount());

  scoreAccumulator.reset();
  for (int t=1; t<=ind->termCountUnique();t++) {
    if (qryArr[t]>0) {      
      // fetch inverted entries for a specific query term
      DocInfoList *docList = ind->docInfoList(t);

      // iterate over all individual documents 
      docList->startIteration();
      while (docList->hasMore()) {
	DocInfo *matchInfo = docList->nextEntry();
	// for each matched term, calculated the evidence

	double wt;

	if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) {
	  wt = computeRawTFWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) {
	  wt = computeRawTFIDFWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) {
	  wt = computeLogTFIDFWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) {
	  wt = computeOkapiWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){
	  wt = computeCustomWeight(matchInfo->docID(),  // doc ID
				  t, // term ID
				  matchInfo->termCount(), // freq of term t in this doc
				  qryArr[t], // freq of term t in the query
				  ind);	  
	}else{
	  cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl;
          exit(1);
	}
	scoreAccumulator.incScore(matchInfo->docID(),wt);  
      }
      delete docList;
    }
  }

  // Adjust the scores for the documents when it is necessary
  double s;
  int d;
  for (d=1; d<=ind->docCount(); d++) {
    if (scoreAccumulator.findScore(d,s)) {
    } else {
      s=0;
    }

    if (strcmp(LocalParameter::weightScheme.c_str(),"RawTF")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"RawTFIDF")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"LogTFIDF")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"Okapi")==0) {
      results.PushValue(d, computeAdjustedScore(s, // the score from the accumulator
						d, // doc ID
						ind)); // index
    }else if (strcmp(LocalParameter::weightScheme.c_str(),"Custom")==0){
      results.PushValue(d, computeCustomAdjustedScore(s, // the score from the accumulator
						      d, // doc ID
					      ind)); // index     
    }else{
      cerr<<"The weighting scheme of "<<LocalParameter::weightScheme.c_str()<<" is not supported"<<endl;
      exit(1);
    }
  }
}