// keywordToDocs '+indexPath+' "'+query+'"','r' int main(int argc, const char* argv[]) { if (argc == 2) { if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { cout << endl << "Usage:" << endl << endl << "keywordToDocs indexPath \"query\"" << endl << endl << "Returns a list of doc numbers that contain the query" << endl << endl; return(0); } } Index *ind = IndexManager::openIndex(argv[1]); ArrayAccumulator accumulator(ind->docCount()); RetrievalMethod *myMethod = new TFIDFRetMethod(*ind, accumulator); IndexedRealVector results; StringQuery *q = new StringQuery(argv[2]); // construct a TextQuery QueryRep * qr = myMethod->computeQueryRep(*q); // compute the query representation // now score all documents myMethod->scoreCollection(*qr, results); results.Sort(); // sorting results, assume a higher score means more relevant IndexedRealVector::iterator it; it = results.begin(); while ((it != results.end())) { cout << (*it).ind // this is the document ID << endl; it++; } }
/// A retrieval evaluation program int AppMain(int argc, char *argv[]) { //Step 1: Open the index file Index *ind; try { ind = IndexManager::openIndex(LocalParameter::databaseIndex); } catch (Exception &ex) { ex.writeMessage(); throw Exception("RelEval", "Can't open index, check parameter index"); } //Step 2: Open the query file DocStream *qryStream; try { qryStream = new lemur::parse::BasicDocStream(LocalParameter::queryStream); } catch (Exception &ex) { ex.writeMessage(cerr); throw Exception("RetEval", "Can't open query file, check parameter textQuery"); } //Step 3: Create the result file ofstream result(LocalParameter::resultFile.c_str()); ResultFile resultFile(1); resultFile.openForWrite(result, *ind); // go through each query qryStream->startDocIteration(); while (qryStream->hasMore()) { Document *qryDoc = qryStream->nextDoc(); const char *queryID = qryDoc->getID(); cout << "query: "<< queryID <<endl; double *queryArr = new double[ind->termCountUnique()+1]; //the array that contains the weights of query terms; for orignial query ComputeQryArr(qryDoc,queryArr, ind); IndexedRealVector results(ind->docCount()); results.clear(); Retrieval(queryArr,results,ind); results.Sort(); resultFile.writeResults(queryID, &results, LocalParameter::resultCount); delete queryArr; } result.close(); delete qryStream; delete ind; return 0; }
int AppMain(int argc, char *argv[]) { Index *ind; try { ind = IndexManager::openIndex(RetrievalParameter::databaseIndex); } catch (Exception &ex) { ex.writeMessage(); throw Exception("QueryClarity", "Can't open index, check parameter index"); } lemur::retrieval::ArrayAccumulator accumulator(ind->docCount()); IndexedRealVector res(ind->docCount()); ofstream os(LocalParameter::expandedQuery.c_str()); lemur::retrieval::SimpleKLRetMethod *model; model = new lemur::retrieval::SimpleKLRetMethod(*ind, SimpleKLParameter::smoothSupportFile, accumulator); model->setDocSmoothParam(SimpleKLParameter::docPrm); model->setQueryModelParam(SimpleKLParameter::qryPrm); DocStream *qryStream; try { qryStream = new lemur::parse::BasicDocStream(RetrievalParameter::textQuerySet); } catch (Exception &ex) { ex.writeMessage(cerr); throw Exception("QueryClarity", "Can't open query file"); } qryStream->startDocIteration(); TextQuery *q; while (qryStream->hasMore()) { Document *d = qryStream->nextDoc(); q = new TextQuery(*d); QueryRep *qr = model->computeQueryRep(*q); res.clear(); QueryClarity(qr, q->id(), &res, model, os); delete qr; delete q; } os.close(); delete model; delete qryStream; delete ind; return 0; }
int AppMain(int argc, char *argv[]) { ofstream ofs; Index * dbIndex; try { dbIndex = IndexManager::openIndex(LocalParameter::index); } catch (Exception &ex) { ex.writeMessage(); throw Exception("GenL2Norm", "Can't open index, check parameter index"); } // pre-compute IDF values double *idfV = new double[dbIndex->termCountUnique()+1]; TERMID_T i; for (i=1; i<=dbIndex->termCountUnique(); i++) { idfV[i] = log((dbIndex->docCount()+1)/(0.5+dbIndex->docCount(i))); } ofs.open(LocalParameter::L2File.c_str(), ios::out | std::ios::binary); for (i = 1; i <= dbIndex->docCount(); i++) { TermInfoList *qList = dbIndex->termInfoList(i); TermInfo *qInfo; qList->startIteration(); TERMID_T idx; COUNT_T dtf; double norm = 0, tmp; while (qList->hasMore()) { qInfo = qList->nextEntry(); idx = qInfo->termID(); dtf = qInfo->count(); tmp = dtf * idfV[idx]; norm += tmp * tmp; } delete qList; // docNorms[docID] = sqrt(norm); norm = sqrt(norm); if (norm == 0) norm = 1; ofs << i << " " << norm << endl; } ofs.close(); delete[](idfV); delete dbIndex; return 0; }
int AppMain(int argc, char * argv[]) { if (argc > 2) { cerr << "Usage: OfflineCluster <parameter file>" << endl; return -1; } COUNT_T i; Index *myIndex; try { myIndex = IndexManager::openIndex(ClusterParam::databaseIndex); } catch (Exception &ex) { ex.writeMessage(); throw Exception("OfflineCluster", "Can't open index, check parameter index"); } // construct cluster method. lemur::cluster::OfflineCluster* clusterDB; clusterDB = new lemur::cluster::OfflineCluster(*myIndex, ClusterParam::simType, ClusterParam::clusterType, ClusterParam::docMode); // crank through the collection COUNT_T numDocs = myIndex->docCount(); if (numDocs > 100) numDocs = 100; vector <DOCID_T> toCluster; for (i = 1; i <= numDocs; i++) { toCluster.push_back(i); } cout << "Using kmeans on " << numDocs << " documents..." << endl; vector <lemur::cluster::Cluster *> *clusters; clusters = clusterDB->kMeans(toCluster, ClusterParam::numParts, ClusterParam::maxIters); for (i = 0; i < clusters->size(); i++) { (*clusters)[i]->print(); delete((*clusters)[i]); } delete(clusters); cout << "Using bisecting kmeans on " << numDocs << " documents..." << endl; clusters = clusterDB->bisecting_kMeans(toCluster, ClusterParam::numParts, ClusterParam::numIters, ClusterParam::maxIters); for (i = 0; i < clusters->size(); i++) { (*clusters)[i]->print(); delete((*clusters)[i]); } delete(clusters); delete clusterDB; delete myIndex; return 0; }