示例#1
0
// keywordToDocs '+indexPath+' "'+query+'"','r'
int main(int argc, const char* argv[]) 
{
	if (argc == 2) {
		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
			cout << endl << "Usage:" << endl << endl <<
				"keywordToDocs indexPath \"query\"" << endl << endl <<
				"Returns a list of doc numbers that contain the query" <<
				endl << endl;
			return(0);
		}
	}
	Index *ind = IndexManager::openIndex(argv[1]);

	ArrayAccumulator accumulator(ind->docCount());
	RetrievalMethod *myMethod = new TFIDFRetMethod(*ind, accumulator); 
	IndexedRealVector results; 
    StringQuery *q = new StringQuery(argv[2]); // construct a TextQuery
    QueryRep * qr = myMethod->computeQueryRep(*q); // compute the query representation 
    // now score all documents    
    myMethod->scoreCollection(*qr, results);
    results.Sort(); // sorting results, assume a higher score means more relevant
    IndexedRealVector::iterator it;
    it = results.begin();
    while ((it != results.end())) {
         cout << (*it).ind  // this is the document ID 
              << endl;
         it++;
    }
}
示例#2
0
/// A retrieval evaluation program
int AppMain(int argc, char *argv[]) {
  

  //Step 1: Open the index file
  Index  *ind;

  try {
    ind  = IndexManager::openIndex(LocalParameter::databaseIndex);
  } 
  catch (Exception &ex) {
    ex.writeMessage();
    throw Exception("RelEval", "Can't open index, check parameter index");
  }

  //Step 2: Open the query file
  DocStream *qryStream;
  try {
    qryStream = new lemur::parse::BasicDocStream(LocalParameter::queryStream);
  } 
  catch (Exception &ex) {
    ex.writeMessage(cerr);
    throw Exception("RetEval", 
                    "Can't open query file, check parameter textQuery");
  }

  //Step 3: Create the result file
  ofstream result(LocalParameter::resultFile.c_str());
  ResultFile resultFile(1);
  resultFile.openForWrite(result, *ind);


  // go through each query
  
  qryStream->startDocIteration();
  while (qryStream->hasMore()) {
    Document *qryDoc = qryStream->nextDoc();
    const char *queryID = qryDoc->getID();
    cout << "query: "<< queryID <<endl;

    double *queryArr = new double[ind->termCountUnique()+1];  //the array that contains the weights of query terms; for orignial query 
    ComputeQryArr(qryDoc,queryArr, ind); 

    IndexedRealVector results(ind->docCount());
    results.clear();

    Retrieval(queryArr,results,ind);

    results.Sort();
    resultFile.writeResults(queryID, &results, LocalParameter::resultCount);

    delete queryArr;
  }



  result.close();
  delete qryStream;
  delete ind;
  return 0;
}
示例#3
0
int AppMain(int argc, char *argv[]) {

  Index  *ind;

  try {
    ind = IndexManager::openIndex(RetrievalParameter::databaseIndex);
  } 
  catch (Exception &ex) {
    ex.writeMessage();
    throw Exception("QueryClarity", 
                    "Can't open index, check parameter index");
  }

  lemur::retrieval::ArrayAccumulator accumulator(ind->docCount());
  IndexedRealVector res(ind->docCount());
  ofstream os(LocalParameter::expandedQuery.c_str());

  lemur::retrieval::SimpleKLRetMethod *model;
  model =  new lemur::retrieval::SimpleKLRetMethod(*ind, SimpleKLParameter::smoothSupportFile, 
                                 accumulator);
  model->setDocSmoothParam(SimpleKLParameter::docPrm);
  model->setQueryModelParam(SimpleKLParameter::qryPrm);
  DocStream *qryStream;
  try {
    qryStream = new lemur::parse::BasicDocStream(RetrievalParameter::textQuerySet);
  } catch (Exception &ex) {
    ex.writeMessage(cerr);
    throw Exception("QueryClarity", "Can't open query file");
  }
  qryStream->startDocIteration();
  TextQuery *q;
  while (qryStream->hasMore()) {
    Document *d = qryStream->nextDoc();
    q = new TextQuery(*d);
    QueryRep *qr = model->computeQueryRep(*q);
    res.clear();
    QueryClarity(qr, q->id(), &res, model, os);     
    delete qr;
    delete q;
  }
  os.close();
  delete model;
  delete qryStream;
  delete ind;
  return 0;
}
示例#4
0
int AppMain(int argc, char *argv[]) {
  ofstream ofs;
  Index * dbIndex;
  try {
    dbIndex = IndexManager::openIndex(LocalParameter::index);
  } 
  catch (Exception &ex) {
    ex.writeMessage();
    throw Exception("GenL2Norm", "Can't open index, check parameter index");
  }
  // pre-compute IDF values
  double *idfV = new double[dbIndex->termCountUnique()+1];
  TERMID_T i;
  for (i=1; i<=dbIndex->termCountUnique(); i++) {
    idfV[i] = log((dbIndex->docCount()+1)/(0.5+dbIndex->docCount(i)));
  }
  ofs.open(LocalParameter::L2File.c_str(), ios::out | std::ios::binary);
  for (i = 1; i <= dbIndex->docCount(); i++) {
    TermInfoList *qList = dbIndex->termInfoList(i);
    TermInfo *qInfo;
    qList->startIteration();
    TERMID_T idx;
    COUNT_T dtf;
    double norm = 0, tmp;
    while (qList->hasMore()) {
      qInfo = qList->nextEntry();
      idx = qInfo->termID();
      dtf = qInfo->count();
      tmp = dtf * idfV[idx];
      norm += tmp * tmp;
    }
    delete qList;
    //  docNorms[docID] = sqrt(norm);
    norm = sqrt(norm);
    if (norm == 0) norm = 1;
    ofs << i << " " << norm << endl;
  }
  ofs.close();
  delete[](idfV);
  delete dbIndex;
  return 0;
}
示例#5
0
int AppMain(int argc, char * argv[]) {
  if (argc > 2) {
    cerr << "Usage: OfflineCluster <parameter file>" << endl;
    return -1;
  }
  COUNT_T i;
  Index *myIndex;
  try {
    myIndex  = IndexManager::openIndex(ClusterParam::databaseIndex);
  } catch (Exception &ex) {
    ex.writeMessage();
    throw Exception("OfflineCluster", 
                    "Can't open index, check parameter index");
  }
  // construct cluster method.
  lemur::cluster::OfflineCluster* clusterDB;
  clusterDB = new lemur::cluster::OfflineCluster(*myIndex,
                                                 ClusterParam::simType,
                                                 ClusterParam::clusterType,
                                                 ClusterParam::docMode);
  // crank through the collection
  COUNT_T numDocs = myIndex->docCount();
  if (numDocs > 100) numDocs = 100;
  vector <DOCID_T> toCluster;
  for (i = 1; i <= numDocs; i++) {
    toCluster.push_back(i);
  }

  cout << "Using kmeans on " << numDocs << " documents..." << endl;
  vector <lemur::cluster::Cluster *> *clusters;
  clusters = clusterDB->kMeans(toCluster, ClusterParam::numParts,
                               ClusterParam::maxIters);

  for (i = 0; i < clusters->size(); i++) {
    (*clusters)[i]->print();
    delete((*clusters)[i]);
  }
  delete(clusters);
  cout << "Using bisecting kmeans on " << numDocs << " documents..." << endl;
  clusters = clusterDB->bisecting_kMeans(toCluster, ClusterParam::numParts,
                                         ClusterParam::numIters,
                                         ClusterParam::maxIters);
  for (i = 0; i < clusters->size(); i++) {
    (*clusters)[i]->print();
    delete((*clusters)[i]);
  }
  delete(clusters);
  delete clusterDB;
  delete myIndex;
  return 0;
}