Beispiel #1
0
QStringList ZefaniaLex::getAllKeys()
{
    try {
        if(!hasIndex()) {
            if(buildIndex() != 0) {
                return QStringList();
            }
        }
        const QString index = indexPath();
        IndexReader* reader = IndexReader::open(index.toStdString().c_str());
        QStringList ret;
        for(int i = 0; i < reader->numDocs(); i++) {
            Document doc;
            reader->document(i, doc);
    #ifdef OBV_USE_WSTRING
            ret.append(QString::fromWCharArray(doc.get(_T("key"))));
    #else
            ret.append(QString::fromUtf16((const ushort*)doc.get(_T("key"))));
    #endif
        }
        return ret;
    }
    catch(...)
    {
        return QStringList();
    }
}
Beispiel #2
0
void createIndex(CuTest* tc, Directory* dir, bool multiSegment) {
    WhitespaceAnalyzer whitespaceAnalyzer;
    IndexWriter w(dir, &whitespaceAnalyzer, true);

    w.setMergePolicy(_CLNEW LogDocMergePolicy());
    Document doc;
    for (int i = 0; i < 100; i++) {
        createDocument(doc, i, 4);
        w.addDocument(&doc);
        if (multiSegment && (i % 10) == 0) {
            w.flush();
        }
    }

    if (!multiSegment) {
        w.optimize();
    }

    w.close();

    IndexReader* r = IndexReader::open(dir);
    if (multiSegment) {
        CuAssert(tc,_T("check is multi"), strcmp(r->getObjectName(),"MultiSegmentReader")==0);
    } else {
        CuAssert(tc,_T("check is segment"), strcmp(r->getObjectName(),"SegmentReader")==0);
    }
    r->close();
    _CLDELETE(r);
}
/**
 * Verifies that the index has the correct number of documents.
 */
void TestSpansAdvanced2::testVerifyIndex()
{
    IndexReader * reader = IndexReader::open( directory );
    assertEquals( 8, reader->numDocs() );
    reader->close();
    _CLDELETE( reader );
}
    void testEqualScores() 
    {
        // NOTE: uses index build in *this* setUp
        
        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

	    Hits * pResult;

        // some hits match more terms then others, score should be the same
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        pResult = pSearch->search( q );
        size_t numHits = pResult->length();
        assertEqualsMsg( _T( "wrong number of results" ), 6, numHits );
        float_t score = pResult->score( 0 );
        for( size_t i = 1; i < numHits; i++ )
        {
            assertTrueMsg( _T( "score was not the same" ), score == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Beispiel #5
0
int
listFields(int argc, char** argv) {
    // parse arguments
    parseArguments(argc, argv);
    string backend = options['t'];
    string indexdir = options['d'];

    // check arguments: indexdir
    if (indexdir.length() == 0) {
        pe("Provide the directory with the index.\n");
        return usage(argc, argv);
    }

    // create an index manager
    IndexManager* manager = getIndexManager(backend, indexdir);
    if (manager == 0) {
        return usage(argc, argv);
    }
    IndexReader* reader = manager->indexReader();
    vector<string> fields = reader->fieldNames();
    vector<string>::const_iterator i;
    for (i=fields.begin(); i!=fields.end(); ++i) {
        printf("%s\n", i->c_str());
    }
    IndexPluginLoader::deleteIndexManager(manager);
    return 0;
}
Beispiel #6
0
QStringList ZefaniaLex::getAllKeys()
{
    if(!m_entryList.isEmpty()) {
        return m_entryList;
    }
    try {
        if(!hasIndex()) {
            if(buildIndex() != 0) {
                return QStringList();
            }
        }
        const QString index = indexPath();
        IndexReader* reader = IndexReader::open(index.toStdString().c_str());
        QStringList ret;
        for(int i = 0; i < reader->numDocs(); i++) {
            Document doc;
            reader->document(i, doc);

            ret.append(SearchTools::toQString(doc.get(_T("key"))));
        }
        m_entryList = ret;
        return ret;
    }
    catch(CLuceneError &err) {
        myWarning() << "clucene error = " << err.what();
        return QStringList();

    }
    catch(...) {
        return QStringList();
    }
}
Beispiel #7
0
/**
 * 初始化 全量index
 *
 * @param path     数据存放路径
 *
 * @return  0: success ;   -1: 程序处理失败
 */
int initFullIndex(const char * path)
{
    IndexReader * reader = IndexReader::getInstance();

    if ( NULL == reader )
    {
        TERR("IndexReader instance is null");
        return -1;
    }

    if ( (NULL == path) || strlen( path ) <= 0 )
    {
        TERR("index node's path attribute is null");
        return -1;
    }

    TLOG("begin to load full index! path:%s", path);

    if ( reader->open( path ) < 0)
    {
        TERR("load full index failed! path:%s", path);
        return -1;
    }

    TLOG("load full index success!");

    return 0;
}
Beispiel #8
0
int main()
{
	Parser p;
	IndexReader in;
    in.genIndexFromFile();
    Search s;
    string query;
	while (getline(cin,query)) {
        Query* q=p.parse(query);
        //cout<<"------"<<endl;
        //cout<<q->sign<<" "<<q->token<<endl;
        /*for(int i=0;i<q->size();i++)
        {
        	Query* s=q->get(i);
        	//cout<<s->sign<<" "<<s->token<<endl;
        	for(int j=0;j<s->size();j++)
        	{
        		Query* p=s->get(j);
        	//	cout<<p->sign<<" "<<p->token<<endl;
        	}
        }*/
        vector<vector<string> > l;
    	s.search(q,in,l);
    	s.show(q,in,l);
		delete q;
    }
}
// BK> all test functions are the same except RAMDirectory constructor, so shared code moved here
void checkDir(CuTest *tc, MockRAMDirectory * ramDir) {

    // Check size
    CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size"));

    // open reader to test document count
    IndexReader * reader = IndexReader::open(ramDir);
    CuAssertEquals(tc, docsToAdd, reader->numDocs(), _T("document count"));

    // open search to check if all doc's are there
    IndexSearcher * searcher = _CLNEW IndexSearcher(reader);

    // search for all documents
    Document doc;
    for (int i = 0; i < docsToAdd; i++) {
        searcher->doc(i, doc);
        CuAssertTrue(tc, doc.getField(_T("content")) != NULL, _T("content is NULL"));
    }

    // cleanup
    reader->close();
    searcher->close();
    _CLLDELETE(reader);
    _CLLDELETE(searcher);
}
_LUCENE_THREAD_FUNC(atomicSearchTest, _directory){
  Directory* directory = (Directory*)_directory;

  uint64_t stopTime = Misc::currentTimeMillis() + 1000*ATOMIC_SEARCH_RUN_TIME_SEC;
  int count = 0;
  try {
    while(Misc::currentTimeMillis() < stopTime && !atomicSearchFailed) {
      IndexReader* r = IndexReader::open(directory);

      try {
        if ( 100 != r->numDocs() ){
          fprintf(stderr, "err 2: 100 != %d \n", r->numDocs());
          atomicSearchFailed = true;
        }
      } catch (CLuceneError& e) {
        fprintf(stderr, "err 3: %d:%s\n", e.number(), e.what());
        atomicSearchFailed = true;
        break;
      }
      r->close();
      _CLDELETE(r);

      count++;
    }
  } catch (CLuceneError& e) {
    fprintf(stderr, "err 4: #%d: %s\n", e.number(), e.what());
    atomicSearchFailed = true;
  }

  _LUCENE_THREAD_FUNC_RETURN(0);
}
    void testBoost()
    {
        // NOTE: uses index build in *this* setUp

        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );
	    Hits * pResult;

        // test for correct application of query normalization
        // must use a non score normalizing method for this.
        Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true );
        q->setBoost( 100 );
        pResult = pSearch->search( q );
        for( size_t i = 1; i < pResult->length(); i++ )
        {
            assertTrueMsg( _T( "score was not was not correct" ), 1.0f == pResult->score( i ));
        }
        _CLDELETE( pResult );
        _CLDELETE( q );


        //
        // Ensure that boosting works to score one clause of a query higher
        // than another.
        //
        Query * q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( .1f );
        Query * q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        BooleanQuery * bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 1, pResult->id( 0 ));
        assertEquals( 0, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        q1->setBoost( 10.0f );
        q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::SHOULD );
        bq->add( q2, true, BooleanClause::SHOULD );

        pResult = pSearch->search( bq );
        assertEquals( 0, pResult->id( 0 ));
        assertEquals( 1, pResult->id( 1 ));
        assertTrue( pResult->score( 0 ) > pResult->score( 1 ));
        _CLDELETE( pResult );
        _CLDELETE( bq );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Beispiel #12
0
void testExtractFromWildcardQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
    WildcardQuery * wildcard;
    Term *          t1;
    Query *         rewrite;


    t1 = _CLNEW Term( _T("data"), _T("aaaa?") );
    wildcard = _CLNEW WildcardQuery( t1 );
    rewrite = wildcard->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 3, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        if(    0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) 
            && 0 != _tcscmp( _T( "aaaab" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaaac" ), pTerm->text()))
        {
            assertTrueMsg( _T( "wrong term" ), false );
        }
    }

    clearTermSet( termSet );
    if( rewrite != wildcard )
        _CLDELETE( rewrite );
    _CLDELETE( wildcard );
    

    t1 = _CLNEW Term( _T("data"), _T("aaa*") );
    wildcard = _CLNEW WildcardQuery( t1 );
    rewrite = wildcard->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 5, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        assertTrueMsg( _T( "wrong term" ), ( 0 == _tcsncmp( _T( "aaa" ), pTerm->text(), 3 )));
    }

    clearTermSet( termSet );
    if( rewrite != wildcard )
        _CLDELETE( rewrite );
    _CLDELETE( wildcard );


    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
static void verifyNumDocs(CuTest *tc, Directory * dir, int numDocs) {

    IndexReader * reader = IndexReader::open(dir);
    assertEquals(numDocs, reader->maxDoc());
    assertEquals(numDocs, reader->numDocs());
    reader->close();
    _CLLDELETE(reader);
}
void SegmentMerger::closeReaders()
{
    for (uint32_t i = 0; i < readers.size(); i++) {
        // close readers
        IndexReader* reader = readers[i];
        reader->close();
    }
}
int
SmallFileIndex::init_data_source(void *init_para,
                                 RecordReader **reader)
{
    int ret = 0;
    vector<plfs_pathback> &droppings = *(((index_init_para_t *)init_para)->namefiles);
    list<index_mapping_t> *fid = ((index_init_para_t *)init_para)->fids;
    MinimumHeap *min_heap = new MinimumHeap(fid->size(), index_compare_func);

    mlog(SMF_DAPI, "Start to build index %p.", this);
    if (fid->size() == 0) {
        *reader = min_heap;
        return 0;
    }
    unsigned int buf_size = get_read_buffer_size(fid->size());
    list<index_mapping_t>::const_iterator itr;
    for (itr = fid->begin(); itr != fid->end(); itr++ ) {
        string index_fname;
        IndexReader *indexfile;
        int pop_result;
        struct plfs_pathback entry;
        entry.back = droppings[itr->second].back;
        assert(itr->second < droppings.size());
        ret = dropping_name2index(droppings[itr->second].bpath, entry.bpath);
        if (ret) {
            mlog(SMF_ERR, "Unable to get index file name from name file:%s.",
                 droppings[itr->second].bpath.c_str());
            break;
        }
        indexfile = new IndexReader(entry, *itr, buf_size);
        /* Only after this pop_front(), we can get the first record. */
        pop_result = indexfile->pop_front();
        if (pop_result == 1 && indexfile->front()) {
            mlog(SMF_DAPI, "Load index entries from %s.", entry.bpath.c_str());
            min_heap->push_back(indexfile);
        } else if (pop_result == 0 || pop_result == -ENOENT) {
            delete indexfile;
            mlog(SMF_DAPI, "Skip empty or non-existent index file:%s.",
                 entry.bpath.c_str());
        } else {
            delete indexfile;
            mlog(SMF_ERR, "Unable to read index entries from %s, err = %d!",
                 entry.bpath.c_str(), pop_result);
            ret = pop_result;
            break;
        }
    }
    if (ret == 0) {
        mlog(SMF_DAPI, "Successfully build index %p.", this);
        *reader = min_heap;
    } else {
        delete min_heap;
        mlog(SMF_DAPI, "Failed to build index %p. errno = %d.", this, ret);
    }
    return ret;
}
Beispiel #16
0
void SearchFilesC(const char* index, const char* fobizzle){

    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
  
        //printf("Enter query string: ");
        strncpy(line,fobizzle,80);
        //line[strlen(line)-1]=0;
        

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);


        
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);
        
        buf = q->toString(_T("contents"));
        
        _tprintf(_T("Searching for: %S\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();
        //SearchData search[h->length()];
        for ( size_t i=0; i < h->length(); i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i));
            //search[i].set_path(doc->get(_T("path")));
        
        }


        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();

    reader->close();
    _CLLDELETE(reader);
};
Beispiel #17
0
void SearchFiles(const char* index){
    standard::StandardAnalyzer analyzer;
    char line[80];
    TCHAR tline[80];
    TCHAR* buf;

    IndexReader* reader = IndexReader::open(index);
    while (true) {
        printf("Enter query string: ");
        char* tmp = fgets(line,80,stdin);
        if ( tmp == NULL ) continue;
        line[strlen(line)-1]=0;

        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ){
            _CLLDELETE(reader);
            reader = newreader;
        }
        IndexSearcher s(reader);

        if ( strlen(line) == 0 )
            break;
        STRCPY_AtoT(tline,line,80);
        Query* q = QueryParser::parse(tline,_T("contents"),&analyzer);

        buf = q->toString(_T("contents"));
        _tprintf(_T("Searching for: %s\n\n"), buf);
        _CLDELETE_LCARRAY(buf);

        uint64_t str = Misc::currentTimeMillis();
        Hits* h = s.search(q);
        uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str);
        str = Misc::currentTimeMillis();

        for ( size_t i=0;i<h->length();i++ ){
            Document* doc = &h->doc(i);
            //const TCHAR* buf = doc.get(_T("contents"));
            _tprintf(_T("%d. %s - %f\n"), i, doc->get(_T("path")), h->score(i));
            //print result to web interface:
            LINH_PRINT_WEB(i,doc->get(_T("path")),h->score(i));
        }

        printf("\n\nSearch took: %d ms.\n", srch);
        printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));

        _CLLDELETE(h);
        _CLLDELETE(q);

        s.close();
    }
    reader->close();
    _CLLDELETE(reader);
}
static void verifyTermDocs(CuTest *tc, Directory * dir, Term * term, int numDocs) {

    IndexReader * reader = IndexReader::open(dir);
    TermDocs * termDocs = reader->termDocs(term);
    int count = 0;
    while (termDocs->next())
        count++;
    assertEquals(numDocs, count);
    termDocs->close();
    _CLLDELETE(termDocs);
    reader->close();
    _CLLDELETE(reader);
}
Beispiel #19
0
    static int EIO_Search(eio_req* req) 
    {
        search_baton_t* baton = static_cast<search_baton_t*>(req->data);

        standard::StandardAnalyzer analyzer;
        IndexReader* reader = 0;
        try {
            reader = IndexReader::open(*(*baton->index));
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
          return 0;
        } catch(...) {
          baton->error = "Got an unknown exception";
          return 0;
        }
        IndexReader* newreader = reader->reopen();
        if ( newreader != reader ) {
            delete reader;
            reader = newreader;
        }
        IndexSearcher s(reader);

        try {
            TCHAR* searchString = STRDUP_AtoT(*(*baton->search));
            Query* q = QueryParser::parse(searchString, _T(""), &analyzer);
            Hits* hits = s.search(q);

            HandleScope scope;
            //_CLDELETE(q);
            free(searchString);
            // Build the result array
            Local<v8::Array> resultArray = v8::Array::New();
            for (size_t i=0; i < hits->length(); i++) {
                Document& doc(hits->doc(i));
                // {"id":"ab34", "score":1.0}
                Local<Object> resultObject = Object::New();
                // TODO:  This dup might be a leak
                resultObject->Set(String::New("id"), String::New(STRDUP_TtoA(doc.get(_T("_id")))));
                resultObject->Set(String::New("score"), Number::New(hits->score(i)));
                resultArray->Set(i, resultObject);
            }
            baton->results = Persistent<v8::Array>::New(resultArray);
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
        } catch(...) {
          baton->error = "Got an unknown exception";
        }

        return 0;
    }
Beispiel #20
0
int
get(int argc, char** argv) {
    // parse arguments
    parseArguments(argc, argv);
    string backend = options['t'];
    string indexdir = options['d'];

    // check arguments: indexdir
    if (indexdir.length() == 0) {
        pe("Provide the directory with the index.\n");
        return usage(argc, argv);
    }

    // check arguments: dirs
    if (arguments.size() == 0) {
        pe("'%s' '%s'\n", backend.c_str(), indexdir.c_str());
        pe("Provide one or more files to search.\n");
        return usage(argc, argv);
    }
    
    // create an index manager
    IndexManager* manager = getIndexManager(backend, indexdir);
    if (manager == 0) {
        return usage(argc, argv);
    }
    IndexReader* reader = manager->indexReader();
    QueryParser parser;
    
    for (vector<string>::iterator iter = arguments.begin();
         iter != arguments.end(); ++iter) {
        Query query = parser.buildQuery( "system.location:'"+ *iter + '\'');
        vector<IndexedDocument> matches = reader->query(query, 0, 10);
        if (matches.size() == 0)
            printf ("%s: is not indexed\n", iter->c_str());
        else
        {
            printf ("Information associated to %s:\n", iter->c_str());
            for (vector<IndexedDocument>::iterator it = matches.begin();
                 it != matches.end(); ++it)
            {
                printIndexedDocument(*it);
            }
        }
    }
    
    IndexPluginLoader::deleteIndexManager(manager);
    return 0;
}
Beispiel #21
0
void
indexdump(const char* dir) {
    IndexReader* indexreader = IndexReader::open(dir);
    int32_t max = indexreader->maxDoc();
    for (int i=0; i<max; ++i) {
        Document* doc = indexreader->document(i);
        if (doc) {
            docdump(doc);
        }
    }
    TermEnum* terms = indexreader->terms();
    Term* t = 0;
    while (terms->next()) {
        t = terms->term();
        printf("%s: %s\n", t2a(t->field()).c_str(), t2a(t->text()).c_str());
        _CLDECDELETE(t);
    }
}
Beispiel #22
0
void RetManager::show(ostream &OF)
{
        unsigned i;
        for(i=0;i<retN;i++)
        {
                OF<<topicNum<<"\t";
                OF<<theIndex->findDoc(retDocID[i])<<"\t";
                OF<<retDocScore[i]<<endl;
        }
}
// case 4: tail segments, invariants hold, copy, invariants not hold
void testMergeAfterCopy(CuTest * tc) {

    // main directory
    Directory * dir = _CLNEW RAMDirectory();
    // auxiliary directory
    Directory * aux = _CLNEW RAMDirectory();

    WhitespaceAnalyzer  an;

    setUpDirs(tc, dir, aux);

    IndexReader * reader = IndexReader::open(aux);
    for (int i = 0; i < 20; i++) {
      reader->deleteDocument(i);
    }
    assertEquals(10, reader->numDocs());
    reader->close();
    _CLLDELETE(reader);

    IndexWriter4Test * writer = newWriter(dir, &an, false);
    writer->setMaxBufferedDocs(4);
    writer->setMergeFactor(4);

    ValueArray<Directory*> dirs(2);
    dirs[0] = aux;
    dirs[1] = aux;
    writer->addIndexesNoOptimize(dirs);

    assertEquals(1020, writer->docCount());
    assertEquals(1000, writer->getDocCount(0));
    writer->close();
    _CLLDELETE(writer);

    // make sure the index is correct
    verifyNumDocs(tc, dir, 1020);

    dir->close();
    _CLLDELETE(dir);

    aux->close();
    _CLLDELETE(aux);
}
    void testBooleanOrderUnAffected()
    {
        // NOTE: uses index build in *this* setUp

        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );

        // first do a regular RangeQuery which uses term expansion so
        // docs with more terms in range get higher scores
        Term * pLower = _CLNEW Term( _T( "data" ), _T( "1" ));
        Term * pUpper = _CLNEW Term( _T( "data" ), _T( "4" ));
        Query * rq = _CLNEW RangeQuery( pLower, pUpper, true );
        _CLLDECDELETE( pUpper );
        _CLLDECDELETE( pLower );

        Hits * pExpected = pSearch->search( rq );
        size_t numHits = pExpected->length();
 
        // now do a boolean where which also contains a
        // ConstantScoreRangeQuery and make sure the order is the same
        
        BooleanQuery * q = _CLNEW BooleanQuery();
        q->add( rq, true, BooleanClause::MUST );
        q->add( csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true ), true, BooleanClause::MUST );
 
        Hits * pActual = pSearch->search( q );
        assertEqualsMsg( _T( "wrong number of hits" ), numHits, pActual->length() );
        for( size_t i = 0; i < numHits; i++ )
        {
            assertEqualsMsg( _T( "mismatch in docid for a hit" ), pExpected->id( i ), pActual->id( i ));
        }
        _CLDELETE( pActual );
        _CLDELETE( pExpected );
        _CLDELETE( q );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Beispiel #25
0
void testExtractFromFuzzyQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
    FuzzyQuery *    fuzzy;
    Term *          t1;
    Query *         rewrite;


    t1 = _CLNEW Term( _T("data"), _T("aaaab") );
    fuzzy = _CLNEW FuzzyQuery( t1, 0.7f );
    rewrite = fuzzy->rewrite( pReader );
    rewrite->extractTerms( &termSet );
    _CLLDECDELETE( t1 );

    assertEqualsMsg( _T( "wrong number of terms" ), 4, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        if(    0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) 
            && 0 != _tcscmp( _T( "aaaab" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaabb" ), pTerm->text())
            && 0 != _tcscmp( _T( "aaaac" ), pTerm->text()))
        {
            assertTrueMsg( _T( "wrong term" ), false );
        }
    }

    clearTermSet( termSet );
    if( rewrite != fuzzy )
        _CLDELETE( rewrite );
    _CLDELETE( fuzzy );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
int32_t SegmentMerger::mergeFields()
{
    //Func - Merge the fields of all segments 
    //Pre  - true
    //Post - The field infos and field values of all segments have been merged.

    //Create a new FieldInfos
    fieldInfos = _CLNEW FieldInfos();		  // merge field names

    //Condition check to see if fieldInfos points to a valid instance
    CND_CONDITION(fieldInfos != NULL, "Memory allocation for fieldInfos failed");

    IndexReader* reader = NULL;

    int32_t docCount = 0;

    //Iterate through all readers
    for (uint32_t i = 0; i < readers.size(); i++) {
        //get the i-th reader
        reader = readers[i];
        //Condition check to see if reader points to a valid instance
        CND_CONDITION(reader != NULL,"No IndexReader found");

        StringArrayWithDeletor tmp;

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION_OFFSET, tmp);
        addIndexed(reader, fieldInfos, tmp, true, true, true);

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION, tmp);
        addIndexed(reader, fieldInfos, tmp, true, true, false);

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR_WITH_OFFSET, tmp);
        addIndexed(reader, fieldInfos, tmp, true, false, true);

        tmp.clear();
        reader->getFieldNames(IndexReader::TERMVECTOR, tmp);
        addIndexed(reader, fieldInfos, tmp, true, false, false);

        tmp.clear();
        reader->getFieldNames(IndexReader::INDEXED, tmp);
        addIndexed(reader, fieldInfos, tmp, false, false, false);

        tmp.clear();
        reader->getFieldNames(IndexReader::UNINDEXED, tmp);
        if (tmp.size() > 0) {
            TCHAR** arr = _CL_NEWARRAY(TCHAR*,tmp.size()+1);
            tmp.toArray(arr);
            fieldInfos->add((const TCHAR**)arr, false);
            _CLDELETE_ARRAY(arr);
            //no need to delete the contents, since tmp is responsible for it
        }
    }
Beispiel #27
0
void testExtractFromTermQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
 
    Term * t1 = _CLNEW Term( _T("data"), _T("aaaaa") );
    Term * t2 = _CLNEW Term( _T("data"), _T("bbbbb") );
    Query * q1 = _CLNEW TermQuery( t1 );
    Query * q2 = _CLNEW TermQuery( t2 );
    Query * rewrite1 = q1->rewrite( pReader );
    Query * rewrite2 = q2->rewrite( pReader );

    rewrite1->extractTerms( &termSet );
    assertEqualsMsg( _T( "wrong number of terms" ), 1, termSet.size() );
    assertEqualsMsg( _T( "wrong term" ), 0, t1->compareTo( *(termSet.begin())) );
    clearTermSet( termSet );

    rewrite2->extractTerms( &termSet );
    assertEqualsMsg( _T( "wrong number of terms" ), 1, termSet.size() );
    assertEqualsMsg( _T( "wrong term" ), 0, t2->compareTo( *(termSet.begin())) );
    clearTermSet( termSet );

    _CLLDECDELETE( t1 );
    _CLLDECDELETE( t2 );

    if( q1 != rewrite1 )
        _CLDELETE( rewrite1 );
    _CLDELETE( q1 );
    
    if( q2 != rewrite2 )
        _CLDELETE( rewrite2 );
    _CLDELETE( q2 );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
Beispiel #28
0
void testExtractFromBooleanQuery( CuTest * tc )
{
    Directory *     pIndex  = setUpIndex();
    IndexReader *   pReader = IndexReader::open( pIndex );
    TermSet         termSet;
 
    Term * t1 = _CLNEW Term( _T("data"), _T("aaaab") );
    Term * t2 = _CLNEW Term( _T("data"), _T("aaabb") );
    Term * t3 = _CLNEW Term( _T("data"), _T("aaabb") );
    BooleanQuery * bq = _CLNEW BooleanQuery();
    bq->add( _CLNEW TermQuery( t1 ), true, BooleanClause::SHOULD );
    bq->add( _CLNEW TermQuery( t2 ), true, BooleanClause::SHOULD );
    bq->add( _CLNEW TermQuery( t3 ), true, BooleanClause::SHOULD );

    Query * rewrite = bq->rewrite( pReader );

    rewrite->extractTerms( &termSet );
    assertEqualsMsg( _T( "wrong number of terms" ), 2, termSet.size() );
    for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ )
    {
        Term * pTerm = *itTerms;
        assertTrueMsg( _T( "wrong term" ), ( 0 == t1->compareTo( pTerm ) || 0 == t2->compareTo( pTerm )));
    }
    clearTermSet( termSet );

    _CLLDECDELETE( t1 );
    _CLLDECDELETE( t2 );
    _CLLDECDELETE( t3 );

    if( rewrite != bq )
        _CLDELETE( rewrite );
    _CLDELETE( bq );
    
    pReader->close();
    _CLDELETE( pReader );

    closeIndex( pIndex );
    pIndex = NULL;
}
    /////////////////////////////////////////////////////////////////////////////
    // CLucene specific 
    // Visual Studio 2005 shows memory leaks for this test, but some other 
    // tools do not detect any memory leaks. So what is right?
    // IN VC80 shows memory leaks ONLY if both sub-queries are added as 
    // MUST BooleanClauses. 
    void testBooleanMemLeaks()
    {
        IndexReader * pReader = IndexReader::open( m_pSmall );
	    IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader );
 
        Query * q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true );  // matches document #0
        Query * q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true );  // matches document #1
        BooleanQuery * bq = _CLNEW BooleanQuery( true );
        bq->add( q1, true, BooleanClause::MUST );
        bq->add( q2, true, BooleanClause::MUST );

        Hits * pResult = pSearch->search( bq );

        _CLDELETE( pResult );
        _CLDELETE( bq );

        pSearch->close();
        _CLDELETE( pSearch );

        pReader->close();
        _CLDELETE( pReader );
    }
Beispiel #30
0
void
DirAnalyzer::Private::update(StreamAnalyzer* analyzer) {
    IndexReader* reader = manager.indexReader();
    vector<pair<string, struct stat> > dirfiles;
    map<string, time_t> dbdirfiles;
    vector<string> toDelete;
    vector<pair<string, struct stat> > toIndex;
    try {
        string path;
        // loop over all files that exist in the index
        int r = dirlister.nextDir(path, dirfiles);
        while (r >= 0 && (caller == 0 || caller->continueAnalysis())) {
            if (r < 0) {
                continue;
            }
            // get the files that are in the current database
            reader->getChildren(path, dbdirfiles);

            // get all files in this directory
            vector<pair<string, struct stat> >::const_iterator end
                = dirfiles.end();
            map<string, time_t>::const_iterator dbend = dbdirfiles.end();
            for (vector<pair<string, struct stat> >::const_iterator i
                    = dirfiles.begin(); i != end; ++i) {
                const string& filepath(i->first);
                time_t mtime = i->second.st_mtime;

                // check if this file is new or not
                map<string, time_t>::iterator j = dbdirfiles.find(filepath);
                bool newfile = j == dbend;
                bool updatedfile = !newfile && j->second != mtime;

                if (newfile || (updatedfile && !S_ISDIR(i->second.st_mode))) {
                    // if the file has not yet been indexed or if the mtime has
                    // changed, index it
                    // if a directory has been updated, this will not change the index
                    // so the entry is not removed from the index, nor reindexed
                    toIndex.push_back(make_pair(filepath, i->second));
                } else {
                    // files left in dbdirfiles after this loop will be deleted from the
                    // index. because this file has not changed, it should not be
                    // removed from the index
                    dbdirfiles.erase(j);
                }
            }
            // all the files left in dbdirfiles, are not in the current
            // directory and should be deleted
            for (map<string, time_t>::const_iterator i = dbdirfiles.begin();
                    i != dbend; ++i) {
                toDelete.push_back(i->first);
            }
            if (toDelete.size() > 0) {
                manager.indexWriter()->deleteEntries(toDelete);
            }
            vector<pair<string, struct stat> >::const_iterator fend
                = toIndex.end();
            for (vector<pair<string, struct stat> >::const_iterator i
                    = toIndex.begin(); i != fend; ++i) {
                AnalysisResult analysisresult(i->first, i->second.st_mtime,
                    *manager.indexWriter(), *analyzer, path);
                if (S_ISREG(i->second.st_mode)) {
                    InputStream* file = FileInputStream::open(i->first.c_str());
                    analysisresult.index(file);
                    delete file;
                } else {
                    analysisresult.index(0);
                }
            }
            toDelete.clear();
            toIndex.clear();
            r = dirlister.nextDir(path, dirfiles);
        }
    } catch(...) {
        fprintf(stderr, "Unknown error\n");
    }
}