Пример #1
0
void testExtractFromWildcardQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
    WildcardQuery * wildcard;
    Term *          t1;
    Query *         rewrite;


    t1 = _CLNEW Term( _T("data"), _T("aaaa?") );
    wildcard = _CLNEW WildcardQuery( t1 );
    rewrite = wildcard->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 3, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        if(    0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) 
            && 0 != _tcscmp( _T( "aaaab" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaaac" ), pTerm->text()))
        {
            assertTrueMsg( _T( "wrong term" ), false );
        }
    }

    clearTermSet( termSet );
    if( rewrite != wildcard )
        _CLDELETE( rewrite );
    _CLDELETE( wildcard );
    

    t1 = _CLNEW Term( _T("data"), _T("aaa*") );
    wildcard = _CLNEW WildcardQuery( t1 );
    rewrite = wildcard->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 5, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        assertTrueMsg( _T( "wrong term" ), ( 0 == _tcsncmp( _T( "aaa" ), pTerm->text(), 3 )));
    }

    clearTermSet( termSet );
    if( rewrite != wildcard )
        _CLDELETE( rewrite );
    _CLDELETE( wildcard );


    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
Пример #2
0
void clearTermSet( TermSet& termSet )
{
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        _CLLDECDELETE( pTerm );
    }
    termSet.clear();
}
Пример #3
0
bool TermSet::is_subset(const TermSet& y) const
{
  TermSet::iterator i, e, j;

  for(i=y.begin(), e=y.end(); i!=e; i++) {
    if ( (j=find(*i))==end() ) return false;  // term in y isn't here
    if ( !(*j).is_subset(*i) ) return false;  // y term isn't a subset of this term
  }

  return true;
}
Пример #4
0
void testExtractFromTermQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
 
    Term * t1 = _CLNEW Term( _T("data"), _T("aaaaa") );
    Term * t2 = _CLNEW Term( _T("data"), _T("bbbbb") );
    Query * q1 = _CLNEW TermQuery( t1 );
    Query * q2 = _CLNEW TermQuery( t2 );
    Query * rewrite1 = q1->rewrite( pReader );
    Query * rewrite2 = q2->rewrite( pReader );

    rewrite1->extractTerms( &termSet );
    assertEqualsMsg( _T( "wrong number of terms" ), 1, termSet.size() );
    assertEqualsMsg( _T( "wrong term" ), 0, t1->compareTo( *(termSet.begin())) );
    clearTermSet( termSet );

    rewrite2->extractTerms( &termSet );
    assertEqualsMsg( _T( "wrong number of terms" ), 1, termSet.size() );
    assertEqualsMsg( _T( "wrong term" ), 0, t2->compareTo( *(termSet.begin())) );
    clearTermSet( termSet );

    _CLLDECDELETE( t1 );
    _CLLDECDELETE( t2 );

    if( q1 != rewrite1 )
        _CLDELETE( rewrite1 );
    _CLDELETE( q1 );
    
    if( q2 != rewrite2 )
        _CLDELETE( rewrite2 );
    _CLDELETE( q2 );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
Пример #5
0
bool Grammar::calculateFirstSet(NonTerminal nt, SententialForm * rule) {

    SententialForm::Node * node = rule->begin();
    bool changed = false;

    while(node) {
        VocabSymbol * sym = node->symbol;

        // Test to see what terminal it is.
        Terminal * term = dynamic_cast<Terminal *>(sym);
        NonTerminal * nonterm = dynamic_cast<NonTerminal *>(sym);

        if (term) {

            if (first[nt].find(*term) == first[nt].end()) {
                first[nt].insert(*term);
                return true;
            } else {
                return false;
            }

        } else if (nonterm) {

            // FIXME: There are some really ineffecient operations here.
            // Since this it a one time thing, I'll leave it for now... -Alan
            unsigned int oldSize = first[nt].size();
            TermSet newSet;
            insert_iterator<TermSet> newSetIterator(newSet, newSet.begin());
            set_union(first[nt].begin(), first[nt].end(),
                      first[*nonterm].begin(), first[*nonterm].end(),
                      newSetIterator);


            if (oldSize != newSet.size()) {
                changed = true;
                first[nt] = newSet;
            }

            if (!nullable[*nonterm]) {
                return changed;
            }

        } else {
            cout << "Found a symbol that is neither terminal nor non-terminal!" << endl;
        }

        // Check the next one.
        node = node->next;
    }
    return changed;
}
Пример #6
0
void testExtractFromFuzzyQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
    FuzzyQuery *    fuzzy;
    Term *          t1;
    Query *         rewrite;


    t1 = _CLNEW Term( _T("data"), _T("aaaab") );
    fuzzy = _CLNEW FuzzyQuery( t1, 0.7f );
    rewrite = fuzzy->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 4, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        if(    0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) 
            && 0 != _tcscmp( _T( "aaaab" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaabb" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaaac" ), pTerm->text()))
        {
            assertTrueMsg( _T( "wrong term" ), false );
        }
    }

    clearTermSet( termSet );
    if( rewrite != fuzzy )
        _CLDELETE( rewrite );
    _CLDELETE( fuzzy );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
	void QueryTermExtractor::getTerms(const Query * query, WeightedTermList * terms, bool prohibited, const TCHAR* fieldName) 
	{
		if (query->instanceOf( BooleanQuery::getClassName() ))
        {
			getTermsFromBooleanQuery((BooleanQuery *) query, terms, prohibited, fieldName);
        }
// FilteredQuery not implemented yet
// 		else if (query->instanceOf( FilteredQuery::getClassName() ))
// 			getTermsFromFilteredQuery((FilteredQuery *) query, terms);
		else
        {
            TermSet nonWeightedTerms;
            query->extractTerms(&nonWeightedTerms);
            for (TermSet::iterator iter = nonWeightedTerms.begin(); iter != nonWeightedTerms.end(); iter++)
            {
                Term * term = (Term *)(*iter);
                if ( fieldName == NULL || term->field() == fieldName )
                    terms->insert(_CLNEW WeightedTerm(query->getBoost(), term->text()));
                _CLLDECDELETE( term );
            }
        }
	}
Пример #8
0
void testExtractFromBooleanQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
 
    Term * t1 = _CLNEW Term( _T("data"), _T("aaaab") );
    Term * t2 = _CLNEW Term( _T("data"), _T("aaabb") );
    Term * t3 = _CLNEW Term( _T("data"), _T("aaabb") );
    BooleanQuery * bq = _CLNEW BooleanQuery();
    bq->add( _CLNEW TermQuery( t1 ), true, BooleanClause::SHOULD );
    bq->add( _CLNEW TermQuery( t2 ), true, BooleanClause::SHOULD );
    bq->add( _CLNEW TermQuery( t3 ), true, BooleanClause::SHOULD );

    Query * rewrite = bq->rewrite( pReader );

    rewrite->extractTerms( &termSet );
    assertEqualsMsg( _T( "wrong number of terms" ), 2, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        assertTrueMsg( _T( "wrong term" ), ( 0 == t1->compareTo( pTerm ) || 0 == t2->compareTo( pTerm )));
    }
    clearTermSet( termSet );

    _CLLDECDELETE( t1 );
    _CLLDECDELETE( t2 );
    _CLLDECDELETE( t3 );

    if( rewrite != bq )
        _CLDELETE( rewrite );
    _CLDELETE( bq );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
Пример #9
0
bool TermSet::operator<(const TermSet& y) const
{
  return std::lexicographical_compare(begin(), end(), y.begin(), y.end());
}
Пример #10
0
bool TermSet::operator==(const TermSet& y) const
{
  if (size()!=y.size()) return false;
  return std::equal(begin(), end(), y.begin());
}
Пример #11
0
bool Grammar::calculateFollowSet(NonTerminal nt, SententialForm * rule) {
	
	SententialForm::Node * node = rule->begin();
	bool changed = false;

	while(node) {
		VocabSymbol * sym = node->symbol;

		// Test to see what terminal it is.
		NonTerminal * nonterm = dynamic_cast<NonTerminal *>(sym);

		if (nonterm) {

			// FIXME: There are some really ineffecient operations here.
			// Since this it a one time thing, I'll leave it for now... -Alan


			SententialForm::Node * canFollow = node->next;
			while (canFollow) {
				Terminal * fTerm = dynamic_cast<Terminal *>(canFollow->symbol);
				NonTerminal * fNonterm = dynamic_cast<NonTerminal *>(canFollow->symbol);

				// If it is a terminal. That is in the suffix before a non-terminal.
				if (fTerm) {

					// All other suffix vocab will not be in the follow.
					// (At least not from this rule.
					if (follow[*nonterm].find(*fTerm) == follow[*nonterm].end()) {
						follow[*nonterm].insert(*fTerm);
						changed = true;
					} else {
						changed = false;
					}
					break;

				} else if (fNonterm) {

					unsigned int oldSize = follow[*nonterm].size();
					// New follow set for comparison.
					TermSet newSet;
					insert_iterator<TermSet> newSetIterator(newSet, newSet.begin());

					set_union(follow[*nonterm].begin(), follow[*nonterm].end(),
					          first[*fNonterm].begin(), first[*fNonterm].end(),
					          newSetIterator);

					if (oldSize != newSet.size()) {
						changed = true;
						follow[*nonterm] = newSet;
					}

					if (nullable[*fNonterm]) {
						canFollow = canFollow->next;
					} else {
						break;
					}
				}

			}

			// If we ever reach the very end of the production,
			// We need to add the follow-set of the parent to the follow-set
			// of the current non-terminal.
			if (canFollow == 0) {

				// Add the parent set here.
				unsigned int oldSize = follow[*nonterm].size();
				// New follow set for comparison.
				TermSet newSet;
				insert_iterator<TermSet> newSetIterator(newSet, newSet.begin());

				set_union(follow[*nonterm].begin(), follow[*nonterm].end(),
				          follow[nt].begin(), follow[nt].end(),
				          newSetIterator);

				if (oldSize != newSet.size()) {
					changed = true;
					follow[*nonterm] = newSet;
				}

			}
		} 
		  
		// Check the next one.
		node = node->next;
	}

	return changed;
}