int main(int argc, char *argv[]) { Index *ind; if (argc < 3) { cerr << "usage: dumpDoc <index_name> <internal/external docid> [-ext]" << endl; exit (1); } ind = IndexManager::openIndex(argv[1]); DOCID_T did; if (argc == 3) did = atoi(argv[2]); else did = ind->document(argv[2]); cout << ind->document(did) << ": " << ind->docLength(did) << endl; TermInfoList *tList = ind->termInfoList(did); if (tList == NULL) { cerr << ": empty termInfoList" << endl; exit (1); } TermInfo *info; tList->startIteration(); while (tList->hasMore()) { info = tList->nextEntry(); cout << info->termID() << ":" << ind->term(info->termID()) << ": " << info->count() << endl; } delete tList; delete(ind); return 0; }
int AppMain(int argc, char *argv[]) { ofstream ofs; Index * dbIndex; try { dbIndex = IndexManager::openIndex(LocalParameter::index); } catch (Exception &ex) { ex.writeMessage(); throw Exception("GenL2Norm", "Can't open index, check parameter index"); } // pre-compute IDF values double *idfV = new double[dbIndex->termCountUnique()+1]; TERMID_T i; for (i=1; i<=dbIndex->termCountUnique(); i++) { idfV[i] = log((dbIndex->docCount()+1)/(0.5+dbIndex->docCount(i))); } ofs.open(LocalParameter::L2File.c_str(), ios::out | std::ios::binary); for (i = 1; i <= dbIndex->docCount(); i++) { TermInfoList *qList = dbIndex->termInfoList(i); TermInfo *qInfo; qList->startIteration(); TERMID_T idx; COUNT_T dtf; double norm = 0, tmp; while (qList->hasMore()) { qInfo = qList->nextEntry(); idx = qInfo->termID(); dtf = qInfo->count(); tmp = dtf * idfV[idx]; norm += tmp * tmp; } delete qList; // docNorms[docID] = sqrt(norm); norm = sqrt(norm); if (norm == 0) norm = 1; ofs << i << " " << norm << endl; } ofs.close(); delete[](idfV); delete dbIndex; return 0; }
// ROOTPATH+'docToKeywords '+indexPath+' '+str(docno) int main(int argc, const char* argv[]) { Index *ind; if (argc == 2) { if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) { cout << endl << "Usage:" << endl << endl << "docToKeywords indexPath docno" << endl << endl << "Returns an ordered list of {occurance, word} tuples from" << "the specified document number." << endl << endl; return(0); } } ind = IndexManager::openIndex(argv[1]); TermInfoList *termList = ind->termInfoList(atoi(argv[2])); // iterate over entries in termList, i.e., all words in the document termList->startIteration(); TermInfo *tEntry; while (termList->hasMore()) { tEntry = termList->nextEntry(); cout << tEntry->count() << " " << (ind->term(tEntry->termID())) << endl; } }