Пример #1
0
  TCHAR* FuzzyQuery::toString(const TCHAR* field) const{
	  StringBuffer buffer(100); // TODO: Have a better estimation for the initial buffer length
	  Term* term = getTerm(false); // no need to increase ref count
	  if ( field==NULL || _tcscmp(term->field(),field)!=0 ) {
		  buffer.append(term->field());
		  buffer.appendChar( _T(':'));
	  }
	  buffer.append(term->text());
	  buffer.appendChar( _T('~') );
	  buffer.appendFloat(minimumSimilarity,1);
	  buffer.appendBoost(getBoost());
	  return buffer.giveBuffer();
  }
Пример #2
0
	/**
     * FIXME: Describe <code>rewrite</code> method here.
     *
     * @param reader an <code>IndexReader</code> value
     * @return a <code>Query</code> value
     * @exception IOException if an error occurs
     */
    Query* RangeQuery::rewrite(IndexReader* reader){

        BooleanQuery* query = _CLNEW BooleanQuery;
        TermEnum* enumerator = reader->terms(lowerTerm);
		Term* lastTerm = NULL;
        try {
            bool checkLower = false;
            if (!inclusive) // make adjustments to set to exclusive
                checkLower = true;

            const TCHAR* testField = getField();
            do {
                lastTerm = enumerator->term();
                if (lastTerm != NULL && lastTerm->field() == testField ) {
                    if (!checkLower || _tcscmp(lastTerm->text(),lowerTerm->text()) > 0) {
                        checkLower = false;
                        if (upperTerm != NULL) {
                            int compare = _tcscmp(upperTerm->text(),lastTerm->text());
                            /* if beyond the upper term, or is exclusive and
                             * this is equal to the upper term, break out */
                            if ((compare < 0) || (!inclusive && compare == 0))
                                break;
                        }
                        TermQuery* tq = _CLNEW TermQuery(lastTerm); // found a match
                        tq->setBoost(getBoost()); // set the boost
                        query->add(tq, true, false, false); // add to query
                    }
                }else {
                    break;
                }
				_CLDECDELETE(lastTerm);
            }
            while (enumerator->next());
		}catch(...){
			_CLDECDELETE(lastTerm); //always need to delete this
			_CLDELETE(query); //in case of error, delete the query
            enumerator->close();
			_CLDELETE(enumerator);
			throw; //rethrow
		}
		_CLDECDELETE(lastTerm); //always need to delete this
		enumerator->close();
		_CLDELETE(enumerator);

        return query;
    }
Пример #3
0
void
indexdump(const char* dir) {
    IndexReader* indexreader = IndexReader::open(dir);
    int32_t max = indexreader->maxDoc();
    for (int i=0; i<max; ++i) {
        Document* doc = indexreader->document(i);
        if (doc) {
            docdump(doc);
        }
    }
    TermEnum* terms = indexreader->terms();
    Term* t = 0;
    while (terms->next()) {
        t = terms->term();
        printf("%s: %s\n", t2a(t->field()).c_str(), t2a(t->text()).c_str());
        _CLDECDELETE(t);
    }
}
Пример #4
0
   Query* PrefixQuery::rewrite(IndexReader* reader){
    BooleanQuery* query = _CLNEW BooleanQuery();
    TermEnum* enumerator = reader->terms(prefix);
	Term* lastTerm = NULL;
    try {
      const TCHAR* prefixText = prefix->text();
      const TCHAR* prefixField = prefix->field();
      const TCHAR* tmp;
      size_t i;
	  int32_t prefixLen = prefix->textLength();
      do {
        lastTerm = enumerator->term();
		if (lastTerm != NULL && lastTerm->field() == prefixField ){
		  
		  //now see if term->text() starts with prefixText
		  int32_t termLen = lastTerm->textLength();
		  if ( prefixLen>termLen )
			  break; //the prefix is longer than the term, can't be matched

            tmp = lastTerm->text();
            
            //check for prefix match in reverse, since most change will be at the end
            for ( i=prefixLen-1;i!=-1;--i ){
                if ( tmp[i] != prefixText[i] ){
                    tmp=NULL;//signals inequality
                    break;
                }
            }
            if ( tmp == NULL )
                break;

          TermQuery* tq = _CLNEW TermQuery(lastTerm);	  // found a match
          tq->setBoost(getBoost());                // set the boost
          query->add(tq,true,false, false);		  // add to query
        } else
          break;
		_CLDECDELETE(lastTerm);
      } while (enumerator->next());
    }_CLFINALLY(
      enumerator->close();
	  _CLDELETE(enumerator);
	  _CLDECDELETE(lastTerm);
	);
	void QueryTermExtractor::getTerms(const Query * query, WeightedTermList * terms, bool prohibited, const TCHAR* fieldName) 
	{
		if (query->instanceOf( BooleanQuery::getClassName() ))
        {
			getTermsFromBooleanQuery((BooleanQuery *) query, terms, prohibited, fieldName);
        }
// FilteredQuery not implemented yet
// 		else if (query->instanceOf( FilteredQuery::getClassName() ))
// 			getTermsFromFilteredQuery((FilteredQuery *) query, terms);
		else
        {
            TermSet nonWeightedTerms;
            query->extractTerms(&nonWeightedTerms);
            for (TermSet::iterator iter = nonWeightedTerms.begin(); iter != nonWeightedTerms.end(); iter++)
            {
                Term * term = (Term *)(*iter);
                if ( fieldName == NULL || term->field() == fieldName )
                    terms->insert(_CLNEW WeightedTerm(query->getBoost(), term->text()));
                _CLLDECDELETE( term );
            }
        }
	}