Пример #1
0
int main(int argc, char *argv[]) {
  Index *ind;
  if (argc < 3) {
    cerr << "usage: dumpDoc <index_name> <internal/external docid> [-ext]" 
	 << endl;
    exit (1);
  }
  ind = IndexManager::openIndex(argv[1]);
  DOCID_T did;
  if (argc == 3)
    did = atoi(argv[2]);
  else did = ind->document(argv[2]);
  cout << ind->document(did) << ": " << ind->docLength(did) << endl;
  TermInfoList *tList = ind->termInfoList(did);
  if (tList == NULL) {
    cerr << ": empty termInfoList" << endl;
    exit (1);
  }
  TermInfo *info;
  tList->startIteration();
  while (tList->hasMore()) {
    info = tList->nextEntry();
    cout << info->termID() << ":" << ind->term(info->termID()) << ": " << info->count() << endl;
  }
  delete tList;
  delete(ind);
  return 0;
}
Пример #2
0
int AppMain(int argc, char *argv[]) {
  ofstream ofs;
  Index * dbIndex;
  try {
    dbIndex = IndexManager::openIndex(LocalParameter::index);
  } 
  catch (Exception &ex) {
    ex.writeMessage();
    throw Exception("GenL2Norm", "Can't open index, check parameter index");
  }
  // pre-compute IDF values
  double *idfV = new double[dbIndex->termCountUnique()+1];
  TERMID_T i;
  for (i=1; i<=dbIndex->termCountUnique(); i++) {
    idfV[i] = log((dbIndex->docCount()+1)/(0.5+dbIndex->docCount(i)));
  }
  ofs.open(LocalParameter::L2File.c_str(), ios::out | std::ios::binary);
  for (i = 1; i <= dbIndex->docCount(); i++) {
    TermInfoList *qList = dbIndex->termInfoList(i);
    TermInfo *qInfo;
    qList->startIteration();
    TERMID_T idx;
    COUNT_T dtf;
    double norm = 0, tmp;
    while (qList->hasMore()) {
      qInfo = qList->nextEntry();
      idx = qInfo->termID();
      dtf = qInfo->count();
      tmp = dtf * idfV[idx];
      norm += tmp * tmp;
    }
    delete qList;
    //  docNorms[docID] = sqrt(norm);
    norm = sqrt(norm);
    if (norm == 0) norm = 1;
    ofs << i << " " << norm << endl;
  }
  ofs.close();
  delete[](idfV);
  delete dbIndex;
  return 0;
}
Пример #3
0
// ROOTPATH+'docToKeywords '+indexPath+' '+str(docno)
int main(int argc, const char* argv[])
{
	Index *ind;
	
	if (argc == 2) {
		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
			cout << endl << "Usage:" << endl << endl <<
				"docToKeywords indexPath docno" << endl << endl << 
				"Returns an ordered list of {occurance, word} tuples from" <<
				"the specified document number." << endl << endl;
			return(0);
		}
	}

	ind = IndexManager::openIndex(argv[1]);  
  	TermInfoList *termList = ind->termInfoList(atoi(argv[2]));
	// iterate over entries in termList, i.e., all words in the document
	termList->startIteration();   
	TermInfo *tEntry;
	while (termList->hasMore()) {
		tEntry = termList->nextEntry();
 		cout << tEntry->count() << " " << (ind->term(tEntry->termID())) << endl;
	}
}