Ejemplo n.º 1
0
  void TestHighFreqTerms(const char_t* index, int_t numTerms) {
	  long_t start = lucene::util::Misc::currentTimeMillis();

		IndexReader& reader = IndexReader::open(index);
    
        TermInfoQueue* tiq = new TermInfoQueue(numTerms);
		TermEnum* terms = &reader.getTerms();
    
        int_t minFreq = 0;
        while (terms->next()) {
          if (terms->DocFreq() > minFreq) {
			tiq->put(new TermInfo(terms->getTerm(), terms->DocFreq()));
            if (tiq->Size() > numTerms) {		  // if tiq overfull
              tiq->pop();				  // remove lowest in tiq
              minFreq = ((TermInfo*)tiq->top())->docFreq; // reset minFreq
            }
          }
        }
    
        while (tiq->Size() != 0) {
          TermInfo* termInfo = (TermInfo*)tiq->pop();
          //_cout << termInfo->term->toString() << _T(" ") << termInfo->docFreq << endl;
        }
    
        reader.close();
        delete &reader;

		_cout << (lucene::util::Misc::currentTimeMillis()-start) << _T(" milliseconds/") << numTerms << _T("Terms") << endl;
  }
Ejemplo n.º 2
0
	/**
     * FIXME: Describe <code>rewrite</code> method here.
     *
     * @param reader an <code>IndexReader</code> value
     * @return a <code>Query</code> value
     * @exception IOException if an error occurs
     */
    Query* RangeQuery::rewrite(IndexReader* reader){

        BooleanQuery* query = _CLNEW BooleanQuery;
        TermEnum* enumerator = reader->terms(lowerTerm);
		Term* lastTerm = NULL;
        try {
            bool checkLower = false;
            if (!inclusive) // make adjustments to set to exclusive
                checkLower = true;

            const TCHAR* testField = getField();
            do {
                lastTerm = enumerator->term();
                if (lastTerm != NULL && lastTerm->field() == testField ) {
                    if (!checkLower || _tcscmp(lastTerm->text(),lowerTerm->text()) > 0) {
                        checkLower = false;
                        if (upperTerm != NULL) {
                            int compare = _tcscmp(upperTerm->text(),lastTerm->text());
                            /* if beyond the upper term, or is exclusive and
                             * this is equal to the upper term, break out */
                            if ((compare < 0) || (!inclusive && compare == 0))
                                break;
                        }
                        TermQuery* tq = _CLNEW TermQuery(lastTerm); // found a match
                        tq->setBoost(getBoost()); // set the boost
                        query->add(tq, true, false, false); // add to query
                    }
                }else {
                    break;
                }
				_CLDECDELETE(lastTerm);
            }
            while (enumerator->next());
		}catch(...){
			_CLDECDELETE(lastTerm); //always need to delete this
			_CLDELETE(query); //in case of error, delete the query
            enumerator->close();
			_CLDELETE(enumerator);
			throw; //rethrow
		}
		_CLDECDELETE(lastTerm); //always need to delete this
		enumerator->close();
		_CLDELETE(enumerator);

        return query;
    }
Ejemplo n.º 3
0
void
indexdump(const char* dir) {
    IndexReader* indexreader = IndexReader::open(dir);
    int32_t max = indexreader->maxDoc();
    for (int i=0; i<max; ++i) {
        Document* doc = indexreader->document(i);
        if (doc) {
            docdump(doc);
        }
    }
    TermEnum* terms = indexreader->terms();
    Term* t = 0;
    while (terms->next()) {
        t = terms->term();
        printf("%s: %s\n", t2a(t->field()).c_str(), t2a(t->text()).c_str());
        _CLDECDELETE(t);
    }
}
Ejemplo n.º 4
0
   Query* PrefixQuery::rewrite(IndexReader* reader){
    BooleanQuery* query = _CLNEW BooleanQuery();
    TermEnum* enumerator = reader->terms(prefix);
	Term* lastTerm = NULL;
    try {
      const TCHAR* prefixText = prefix->text();
      const TCHAR* prefixField = prefix->field();
      const TCHAR* tmp;
      size_t i;
	  int32_t prefixLen = prefix->textLength();
      do {
        lastTerm = enumerator->term();
		if (lastTerm != NULL && lastTerm->field() == prefixField ){
		  
		  //now see if term->text() starts with prefixText
		  int32_t termLen = lastTerm->textLength();
		  if ( prefixLen>termLen )
			  break; //the prefix is longer than the term, can't be matched

            tmp = lastTerm->text();
            
            //check for prefix match in reverse, since most change will be at the end
            for ( i=prefixLen-1;i!=-1;--i ){
                if ( tmp[i] != prefixText[i] ){
                    tmp=NULL;//signals inequality
                    break;
                }
            }
            if ( tmp == NULL )
                break;

          TermQuery* tq = _CLNEW TermQuery(lastTerm);	  // found a match
          tq->setBoost(getBoost());                // set the boost
          query->add(tq,true,false, false);		  // add to query
        } else
          break;
		_CLDECDELETE(lastTerm);
      } while (enumerator->next());
    }_CLFINALLY(
      enumerator->close();
	  _CLDELETE(enumerator);
	  _CLDECDELETE(lastTerm);
	);
Ejemplo n.º 5
0
vector<string>
CLuceneIndexReader::keywords(const string& keywordmatch,
        const vector<string>& fieldnames, uint32_t max, uint32_t offset) {
    vector<string> fn;
    if (fieldnames.size()) {
        fn = fieldnames;
    } else {
        fn = fieldNames();
    }
    set<wstring> s;
    wstring prefix = utf8toucs2(keywordmatch);
    const wchar_t* prefixtext = prefix.c_str();
    string::size_type prefixLen = prefix.length();
    vector<string>::const_iterator i;
    Term* lastTerm = 0;
    for (i = fn.begin(); i != fn.end() && s.size() << max; ++i) {
         wstring fieldname(utf8toucs2(*i));
         Term term(fieldname.c_str(), prefix.c_str());
         TermEnum* enumerator = reader->terms(&term);
         do {
             lastTerm = enumerator->term(false);
             if (lastTerm) {
                 if (prefixLen > lastTerm->textLength()
                         || wcsncmp(lastTerm->text(), prefixtext, prefixLen)
                             != 0) {
                     break;
                 }
                 s.insert(lastTerm->text());
             }
         } while (enumerator->next() && s.size() < max);
    }

    vector<string> k;
    k.reserve(s.size());
    set<wstring>::const_iterator j;
    for (j = s.begin(); j != s.end(); ++j) {
        k.push_back(wchartoutf8(*j));
    }
    return k;
}