QStringList ZefaniaLex::getAllKeys() { try { if(!hasIndex()) { if(buildIndex() != 0) { return QStringList(); } } const QString index = indexPath(); IndexReader* reader = IndexReader::open(index.toStdString().c_str()); QStringList ret; for(int i = 0; i < reader->numDocs(); i++) { Document doc; reader->document(i, doc); #ifdef OBV_USE_WSTRING ret.append(QString::fromWCharArray(doc.get(_T("key")))); #else ret.append(QString::fromUtf16((const ushort*)doc.get(_T("key")))); #endif } return ret; } catch(...) { return QStringList(); } }
void createIndex(CuTest* tc, Directory* dir, bool multiSegment) { WhitespaceAnalyzer whitespaceAnalyzer; IndexWriter w(dir, &whitespaceAnalyzer, true); w.setMergePolicy(_CLNEW LogDocMergePolicy()); Document doc; for (int i = 0; i < 100; i++) { createDocument(doc, i, 4); w.addDocument(&doc); if (multiSegment && (i % 10) == 0) { w.flush(); } } if (!multiSegment) { w.optimize(); } w.close(); IndexReader* r = IndexReader::open(dir); if (multiSegment) { CuAssert(tc,_T("check is multi"), strcmp(r->getObjectName(),"MultiSegmentReader")==0); } else { CuAssert(tc,_T("check is segment"), strcmp(r->getObjectName(),"SegmentReader")==0); } r->close(); _CLDELETE(r); }
/** * Verifies that the index has the correct number of documents. */ void TestSpansAdvanced2::testVerifyIndex() { IndexReader * reader = IndexReader::open( directory ); assertEquals( 8, reader->numDocs() ); reader->close(); _CLDELETE( reader ); }
void testEqualScores() { // NOTE: uses index build in *this* setUp IndexReader * pReader = IndexReader::open( m_pSmall ); IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader ); Hits * pResult; // some hits match more terms then others, score should be the same Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true ); pResult = pSearch->search( q ); size_t numHits = pResult->length(); assertEqualsMsg( _T( "wrong number of results" ), 6, numHits ); float_t score = pResult->score( 0 ); for( size_t i = 1; i < numHits; i++ ) { assertTrueMsg( _T( "score was not the same" ), score == pResult->score( i )); } _CLDELETE( pResult ); _CLDELETE( q ); pSearch->close(); _CLDELETE( pSearch ); pReader->close(); _CLDELETE( pReader ); }
int listFields(int argc, char** argv) { // parse arguments parseArguments(argc, argv); string backend = options['t']; string indexdir = options['d']; // check arguments: indexdir if (indexdir.length() == 0) { pe("Provide the directory with the index.\n"); return usage(argc, argv); } // create an index manager IndexManager* manager = getIndexManager(backend, indexdir); if (manager == 0) { return usage(argc, argv); } IndexReader* reader = manager->indexReader(); vector<string> fields = reader->fieldNames(); vector<string>::const_iterator i; for (i=fields.begin(); i!=fields.end(); ++i) { printf("%s\n", i->c_str()); } IndexPluginLoader::deleteIndexManager(manager); return 0; }
QStringList ZefaniaLex::getAllKeys() { if(!m_entryList.isEmpty()) { return m_entryList; } try { if(!hasIndex()) { if(buildIndex() != 0) { return QStringList(); } } const QString index = indexPath(); IndexReader* reader = IndexReader::open(index.toStdString().c_str()); QStringList ret; for(int i = 0; i < reader->numDocs(); i++) { Document doc; reader->document(i, doc); ret.append(SearchTools::toQString(doc.get(_T("key")))); } m_entryList = ret; return ret; } catch(CLuceneError &err) { myWarning() << "clucene error = " << err.what(); return QStringList(); } catch(...) { return QStringList(); } }
/** * 初始化 全量index * * @param path 数据存放路径 * * @return 0: success ; -1: 程序处理失败 */ int initFullIndex(const char * path) { IndexReader * reader = IndexReader::getInstance(); if ( NULL == reader ) { TERR("IndexReader instance is null"); return -1; } if ( (NULL == path) || strlen( path ) <= 0 ) { TERR("index node's path attribute is null"); return -1; } TLOG("begin to load full index! path:%s", path); if ( reader->open( path ) < 0) { TERR("load full index failed! path:%s", path); return -1; } TLOG("load full index success!"); return 0; }
int main() { Parser p; IndexReader in; in.genIndexFromFile(); Search s; string query; while (getline(cin,query)) { Query* q=p.parse(query); //cout<<"------"<<endl; //cout<<q->sign<<" "<<q->token<<endl; /*for(int i=0;i<q->size();i++) { Query* s=q->get(i); //cout<<s->sign<<" "<<s->token<<endl; for(int j=0;j<s->size();j++) { Query* p=s->get(j); // cout<<p->sign<<" "<<p->token<<endl; } }*/ vector<vector<string> > l; s.search(q,in,l); s.show(q,in,l); delete q; } }
// BK> all test functions are the same except RAMDirectory constructor, so shared code moved here void checkDir(CuTest *tc, MockRAMDirectory * ramDir) { // Check size CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size")); // open reader to test document count IndexReader * reader = IndexReader::open(ramDir); CuAssertEquals(tc, docsToAdd, reader->numDocs(), _T("document count")); // open search to check if all doc's are there IndexSearcher * searcher = _CLNEW IndexSearcher(reader); // search for all documents Document doc; for (int i = 0; i < docsToAdd; i++) { searcher->doc(i, doc); CuAssertTrue(tc, doc.getField(_T("content")) != NULL, _T("content is NULL")); } // cleanup reader->close(); searcher->close(); _CLLDELETE(reader); _CLLDELETE(searcher); }
_LUCENE_THREAD_FUNC(atomicSearchTest, _directory){ Directory* directory = (Directory*)_directory; uint64_t stopTime = Misc::currentTimeMillis() + 1000*ATOMIC_SEARCH_RUN_TIME_SEC; int count = 0; try { while(Misc::currentTimeMillis() < stopTime && !atomicSearchFailed) { IndexReader* r = IndexReader::open(directory); try { if ( 100 != r->numDocs() ){ fprintf(stderr, "err 2: 100 != %d \n", r->numDocs()); atomicSearchFailed = true; } } catch (CLuceneError& e) { fprintf(stderr, "err 3: %d:%s\n", e.number(), e.what()); atomicSearchFailed = true; break; } r->close(); _CLDELETE(r); count++; } } catch (CLuceneError& e) { fprintf(stderr, "err 4: #%d: %s\n", e.number(), e.what()); atomicSearchFailed = true; } _LUCENE_THREAD_FUNC_RETURN(0); }
void testBoost() { // NOTE: uses index build in *this* setUp IndexReader * pReader = IndexReader::open( m_pSmall ); IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader ); Hits * pResult; // test for correct application of query normalization // must use a non score normalizing method for this. Query * q = csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true ); q->setBoost( 100 ); pResult = pSearch->search( q ); for( size_t i = 1; i < pResult->length(); i++ ) { assertTrueMsg( _T( "score was not was not correct" ), 1.0f == pResult->score( i )); } _CLDELETE( pResult ); _CLDELETE( q ); // // Ensure that boosting works to score one clause of a query higher // than another. // Query * q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true ); // matches document #0 q1->setBoost( .1f ); Query * q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true ); // matches document #1 BooleanQuery * bq = _CLNEW BooleanQuery( true ); bq->add( q1, true, BooleanClause::SHOULD ); bq->add( q2, true, BooleanClause::SHOULD ); pResult = pSearch->search( bq ); assertEquals( 1, pResult->id( 0 )); assertEquals( 0, pResult->id( 1 )); assertTrue( pResult->score( 0 ) > pResult->score( 1 )); _CLDELETE( pResult ); _CLDELETE( bq ); q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true ); // matches document #0 q1->setBoost( 10.0f ); q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true ); // matches document #1 bq = _CLNEW BooleanQuery( true ); bq->add( q1, true, BooleanClause::SHOULD ); bq->add( q2, true, BooleanClause::SHOULD ); pResult = pSearch->search( bq ); assertEquals( 0, pResult->id( 0 )); assertEquals( 1, pResult->id( 1 )); assertTrue( pResult->score( 0 ) > pResult->score( 1 )); _CLDELETE( pResult ); _CLDELETE( bq ); pSearch->close(); _CLDELETE( pSearch ); pReader->close(); _CLDELETE( pReader ); }
void testExtractFromWildcardQuery( CuTest * tc ) { Directory * pIndex = setUpIndex(); IndexReader * pReader = IndexReader::open( pIndex ); TermSet termSet; WildcardQuery * wildcard; Term * t1; Query * rewrite; t1 = _CLNEW Term( _T("data"), _T("aaaa?") ); wildcard = _CLNEW WildcardQuery( t1 ); rewrite = wildcard->rewrite( pReader ); rewrite->extractTerms( &termSet ); _CLLDECDELETE( t1 ); assertEqualsMsg( _T( "wrong number of terms" ), 3, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; if( 0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaab" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaac" ), pTerm->text())) { assertTrueMsg( _T( "wrong term" ), false ); } } clearTermSet( termSet ); if( rewrite != wildcard ) _CLDELETE( rewrite ); _CLDELETE( wildcard ); t1 = _CLNEW Term( _T("data"), _T("aaa*") ); wildcard = _CLNEW WildcardQuery( t1 ); rewrite = wildcard->rewrite( pReader ); rewrite->extractTerms( &termSet ); _CLLDECDELETE( t1 ); assertEqualsMsg( _T( "wrong number of terms" ), 5, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; assertTrueMsg( _T( "wrong term" ), ( 0 == _tcsncmp( _T( "aaa" ), pTerm->text(), 3 ))); } clearTermSet( termSet ); if( rewrite != wildcard ) _CLDELETE( rewrite ); _CLDELETE( wildcard ); pReader->close(); _CLDELETE( pReader ); closeIndex( pIndex ); pIndex = NULL; }
static void verifyNumDocs(CuTest *tc, Directory * dir, int numDocs) { IndexReader * reader = IndexReader::open(dir); assertEquals(numDocs, reader->maxDoc()); assertEquals(numDocs, reader->numDocs()); reader->close(); _CLLDELETE(reader); }
void SegmentMerger::closeReaders() { for (uint32_t i = 0; i < readers.size(); i++) { // close readers IndexReader* reader = readers[i]; reader->close(); } }
int SmallFileIndex::init_data_source(void *init_para, RecordReader **reader) { int ret = 0; vector<plfs_pathback> &droppings = *(((index_init_para_t *)init_para)->namefiles); list<index_mapping_t> *fid = ((index_init_para_t *)init_para)->fids; MinimumHeap *min_heap = new MinimumHeap(fid->size(), index_compare_func); mlog(SMF_DAPI, "Start to build index %p.", this); if (fid->size() == 0) { *reader = min_heap; return 0; } unsigned int buf_size = get_read_buffer_size(fid->size()); list<index_mapping_t>::const_iterator itr; for (itr = fid->begin(); itr != fid->end(); itr++ ) { string index_fname; IndexReader *indexfile; int pop_result; struct plfs_pathback entry; entry.back = droppings[itr->second].back; assert(itr->second < droppings.size()); ret = dropping_name2index(droppings[itr->second].bpath, entry.bpath); if (ret) { mlog(SMF_ERR, "Unable to get index file name from name file:%s.", droppings[itr->second].bpath.c_str()); break; } indexfile = new IndexReader(entry, *itr, buf_size); /* Only after this pop_front(), we can get the first record. */ pop_result = indexfile->pop_front(); if (pop_result == 1 && indexfile->front()) { mlog(SMF_DAPI, "Load index entries from %s.", entry.bpath.c_str()); min_heap->push_back(indexfile); } else if (pop_result == 0 || pop_result == -ENOENT) { delete indexfile; mlog(SMF_DAPI, "Skip empty or non-existent index file:%s.", entry.bpath.c_str()); } else { delete indexfile; mlog(SMF_ERR, "Unable to read index entries from %s, err = %d!", entry.bpath.c_str(), pop_result); ret = pop_result; break; } } if (ret == 0) { mlog(SMF_DAPI, "Successfully build index %p.", this); *reader = min_heap; } else { delete min_heap; mlog(SMF_DAPI, "Failed to build index %p. errno = %d.", this, ret); } return ret; }
void SearchFilesC(const char* index, const char* fobizzle){ standard::StandardAnalyzer analyzer; char line[80]; TCHAR tline[80]; TCHAR* buf; IndexReader* reader = IndexReader::open(index); //printf("Enter query string: "); strncpy(line,fobizzle,80); //line[strlen(line)-1]=0; IndexReader* newreader = reader->reopen(); if ( newreader != reader ){ _CLLDELETE(reader); reader = newreader; } IndexSearcher s(reader); STRCPY_AtoT(tline,line,80); Query* q = QueryParser::parse(tline,_T("contents"),&analyzer); buf = q->toString(_T("contents")); _tprintf(_T("Searching for: %S\n\n"), buf); _CLDELETE_LCARRAY(buf); uint64_t str = Misc::currentTimeMillis(); Hits* h = s.search(q); uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str); str = Misc::currentTimeMillis(); //SearchData search[h->length()]; for ( size_t i=0; i < h->length(); i++ ){ Document* doc = &h->doc(i); //const TCHAR* buf = doc.get(_T("contents")); _tprintf(_T("%d. %S - %f\n"), i, doc->get(_T("path")), h->score(i)); //search[i].set_path(doc->get(_T("path"))); } printf("\n\nSearch took: %d ms.\n", srch); printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str)); _CLLDELETE(h); _CLLDELETE(q); s.close(); reader->close(); _CLLDELETE(reader); };
void SearchFiles(const char* index){ standard::StandardAnalyzer analyzer; char line[80]; TCHAR tline[80]; TCHAR* buf; IndexReader* reader = IndexReader::open(index); while (true) { printf("Enter query string: "); char* tmp = fgets(line,80,stdin); if ( tmp == NULL ) continue; line[strlen(line)-1]=0; IndexReader* newreader = reader->reopen(); if ( newreader != reader ){ _CLLDELETE(reader); reader = newreader; } IndexSearcher s(reader); if ( strlen(line) == 0 ) break; STRCPY_AtoT(tline,line,80); Query* q = QueryParser::parse(tline,_T("contents"),&analyzer); buf = q->toString(_T("contents")); _tprintf(_T("Searching for: %s\n\n"), buf); _CLDELETE_LCARRAY(buf); uint64_t str = Misc::currentTimeMillis(); Hits* h = s.search(q); uint32_t srch = (int32_t)(Misc::currentTimeMillis() - str); str = Misc::currentTimeMillis(); for ( size_t i=0;i<h->length();i++ ){ Document* doc = &h->doc(i); //const TCHAR* buf = doc.get(_T("contents")); _tprintf(_T("%d. %s - %f\n"), i, doc->get(_T("path")), h->score(i)); //print result to web interface: LINH_PRINT_WEB(i,doc->get(_T("path")),h->score(i)); } printf("\n\nSearch took: %d ms.\n", srch); printf("Screen dump took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str)); _CLLDELETE(h); _CLLDELETE(q); s.close(); } reader->close(); _CLLDELETE(reader); }
static void verifyTermDocs(CuTest *tc, Directory * dir, Term * term, int numDocs) { IndexReader * reader = IndexReader::open(dir); TermDocs * termDocs = reader->termDocs(term); int count = 0; while (termDocs->next()) count++; assertEquals(numDocs, count); termDocs->close(); _CLLDELETE(termDocs); reader->close(); _CLLDELETE(reader); }
static int EIO_Search(eio_req* req) { search_baton_t* baton = static_cast<search_baton_t*>(req->data); standard::StandardAnalyzer analyzer; IndexReader* reader = 0; try { reader = IndexReader::open(*(*baton->index)); } catch (CLuceneError& E) { baton->error.assign(E.what()); return 0; } catch(...) { baton->error = "Got an unknown exception"; return 0; } IndexReader* newreader = reader->reopen(); if ( newreader != reader ) { delete reader; reader = newreader; } IndexSearcher s(reader); try { TCHAR* searchString = STRDUP_AtoT(*(*baton->search)); Query* q = QueryParser::parse(searchString, _T(""), &analyzer); Hits* hits = s.search(q); HandleScope scope; //_CLDELETE(q); free(searchString); // Build the result array Local<v8::Array> resultArray = v8::Array::New(); for (size_t i=0; i < hits->length(); i++) { Document& doc(hits->doc(i)); // {"id":"ab34", "score":1.0} Local<Object> resultObject = Object::New(); // TODO: This dup might be a leak resultObject->Set(String::New("id"), String::New(STRDUP_TtoA(doc.get(_T("_id"))))); resultObject->Set(String::New("score"), Number::New(hits->score(i))); resultArray->Set(i, resultObject); } baton->results = Persistent<v8::Array>::New(resultArray); } catch (CLuceneError& E) { baton->error.assign(E.what()); } catch(...) { baton->error = "Got an unknown exception"; } return 0; }
int get(int argc, char** argv) { // parse arguments parseArguments(argc, argv); string backend = options['t']; string indexdir = options['d']; // check arguments: indexdir if (indexdir.length() == 0) { pe("Provide the directory with the index.\n"); return usage(argc, argv); } // check arguments: dirs if (arguments.size() == 0) { pe("'%s' '%s'\n", backend.c_str(), indexdir.c_str()); pe("Provide one or more files to search.\n"); return usage(argc, argv); } // create an index manager IndexManager* manager = getIndexManager(backend, indexdir); if (manager == 0) { return usage(argc, argv); } IndexReader* reader = manager->indexReader(); QueryParser parser; for (vector<string>::iterator iter = arguments.begin(); iter != arguments.end(); ++iter) { Query query = parser.buildQuery( "system.location:'"+ *iter + '\''); vector<IndexedDocument> matches = reader->query(query, 0, 10); if (matches.size() == 0) printf ("%s: is not indexed\n", iter->c_str()); else { printf ("Information associated to %s:\n", iter->c_str()); for (vector<IndexedDocument>::iterator it = matches.begin(); it != matches.end(); ++it) { printIndexedDocument(*it); } } } IndexPluginLoader::deleteIndexManager(manager); return 0; }
void indexdump(const char* dir) { IndexReader* indexreader = IndexReader::open(dir); int32_t max = indexreader->maxDoc(); for (int i=0; i<max; ++i) { Document* doc = indexreader->document(i); if (doc) { docdump(doc); } } TermEnum* terms = indexreader->terms(); Term* t = 0; while (terms->next()) { t = terms->term(); printf("%s: %s\n", t2a(t->field()).c_str(), t2a(t->text()).c_str()); _CLDECDELETE(t); } }
void RetManager::show(ostream &OF) { unsigned i; for(i=0;i<retN;i++) { OF<<topicNum<<"\t"; OF<<theIndex->findDoc(retDocID[i])<<"\t"; OF<<retDocScore[i]<<endl; } }
// case 4: tail segments, invariants hold, copy, invariants not hold void testMergeAfterCopy(CuTest * tc) { // main directory Directory * dir = _CLNEW RAMDirectory(); // auxiliary directory Directory * aux = _CLNEW RAMDirectory(); WhitespaceAnalyzer an; setUpDirs(tc, dir, aux); IndexReader * reader = IndexReader::open(aux); for (int i = 0; i < 20; i++) { reader->deleteDocument(i); } assertEquals(10, reader->numDocs()); reader->close(); _CLLDELETE(reader); IndexWriter4Test * writer = newWriter(dir, &an, false); writer->setMaxBufferedDocs(4); writer->setMergeFactor(4); ValueArray<Directory*> dirs(2); dirs[0] = aux; dirs[1] = aux; writer->addIndexesNoOptimize(dirs); assertEquals(1020, writer->docCount()); assertEquals(1000, writer->getDocCount(0)); writer->close(); _CLLDELETE(writer); // make sure the index is correct verifyNumDocs(tc, dir, 1020); dir->close(); _CLLDELETE(dir); aux->close(); _CLLDELETE(aux); }
void testBooleanOrderUnAffected() { // NOTE: uses index build in *this* setUp IndexReader * pReader = IndexReader::open( m_pSmall ); IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader ); // first do a regular RangeQuery which uses term expansion so // docs with more terms in range get higher scores Term * pLower = _CLNEW Term( _T( "data" ), _T( "1" )); Term * pUpper = _CLNEW Term( _T( "data" ), _T( "4" )); Query * rq = _CLNEW RangeQuery( pLower, pUpper, true ); _CLLDECDELETE( pUpper ); _CLLDECDELETE( pLower ); Hits * pExpected = pSearch->search( rq ); size_t numHits = pExpected->length(); // now do a boolean where which also contains a // ConstantScoreRangeQuery and make sure the order is the same BooleanQuery * q = _CLNEW BooleanQuery(); q->add( rq, true, BooleanClause::MUST ); q->add( csrq( _T( "data" ), _T( "1" ), _T( "6" ), true, true ), true, BooleanClause::MUST ); Hits * pActual = pSearch->search( q ); assertEqualsMsg( _T( "wrong number of hits" ), numHits, pActual->length() ); for( size_t i = 0; i < numHits; i++ ) { assertEqualsMsg( _T( "mismatch in docid for a hit" ), pExpected->id( i ), pActual->id( i )); } _CLDELETE( pActual ); _CLDELETE( pExpected ); _CLDELETE( q ); pSearch->close(); _CLDELETE( pSearch ); pReader->close(); _CLDELETE( pReader ); }
void testExtractFromFuzzyQuery( CuTest * tc ) { Directory * pIndex = setUpIndex(); IndexReader * pReader = IndexReader::open( pIndex ); TermSet termSet; FuzzyQuery * fuzzy; Term * t1; Query * rewrite; t1 = _CLNEW Term( _T("data"), _T("aaaab") ); fuzzy = _CLNEW FuzzyQuery( t1, 0.7f ); rewrite = fuzzy->rewrite( pReader ); rewrite->extractTerms( &termSet ); _CLLDECDELETE( t1 ); assertEqualsMsg( _T( "wrong number of terms" ), 4, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; if( 0 != _tcscmp( _T( "aaaaa" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaab" ), pTerm->text()) && 0 != _tcscmp( _T( "aaabb" ), pTerm->text()) && 0 != _tcscmp( _T( "aaaac" ), pTerm->text())) { assertTrueMsg( _T( "wrong term" ), false ); } } clearTermSet( termSet ); if( rewrite != fuzzy ) _CLDELETE( rewrite ); _CLDELETE( fuzzy ); pReader->close(); _CLDELETE( pReader ); closeIndex( pIndex ); pIndex = NULL; }
int32_t SegmentMerger::mergeFields() { //Func - Merge the fields of all segments //Pre - true //Post - The field infos and field values of all segments have been merged. //Create a new FieldInfos fieldInfos = _CLNEW FieldInfos(); // merge field names //Condition check to see if fieldInfos points to a valid instance CND_CONDITION(fieldInfos != NULL, "Memory allocation for fieldInfos failed"); IndexReader* reader = NULL; int32_t docCount = 0; //Iterate through all readers for (uint32_t i = 0; i < readers.size(); i++) { //get the i-th reader reader = readers[i]; //Condition check to see if reader points to a valid instance CND_CONDITION(reader != NULL,"No IndexReader found"); StringArrayWithDeletor tmp; tmp.clear(); reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION_OFFSET, tmp); addIndexed(reader, fieldInfos, tmp, true, true, true); tmp.clear(); reader->getFieldNames(IndexReader::TERMVECTOR_WITH_POSITION, tmp); addIndexed(reader, fieldInfos, tmp, true, true, false); tmp.clear(); reader->getFieldNames(IndexReader::TERMVECTOR_WITH_OFFSET, tmp); addIndexed(reader, fieldInfos, tmp, true, false, true); tmp.clear(); reader->getFieldNames(IndexReader::TERMVECTOR, tmp); addIndexed(reader, fieldInfos, tmp, true, false, false); tmp.clear(); reader->getFieldNames(IndexReader::INDEXED, tmp); addIndexed(reader, fieldInfos, tmp, false, false, false); tmp.clear(); reader->getFieldNames(IndexReader::UNINDEXED, tmp); if (tmp.size() > 0) { TCHAR** arr = _CL_NEWARRAY(TCHAR*,tmp.size()+1); tmp.toArray(arr); fieldInfos->add((const TCHAR**)arr, false); _CLDELETE_ARRAY(arr); //no need to delete the contents, since tmp is responsible for it } }
void testExtractFromTermQuery( CuTest * tc ) { Directory * pIndex = setUpIndex(); IndexReader * pReader = IndexReader::open( pIndex ); TermSet termSet; Term * t1 = _CLNEW Term( _T("data"), _T("aaaaa") ); Term * t2 = _CLNEW Term( _T("data"), _T("bbbbb") ); Query * q1 = _CLNEW TermQuery( t1 ); Query * q2 = _CLNEW TermQuery( t2 ); Query * rewrite1 = q1->rewrite( pReader ); Query * rewrite2 = q2->rewrite( pReader ); rewrite1->extractTerms( &termSet ); assertEqualsMsg( _T( "wrong number of terms" ), 1, termSet.size() ); assertEqualsMsg( _T( "wrong term" ), 0, t1->compareTo( *(termSet.begin())) ); clearTermSet( termSet ); rewrite2->extractTerms( &termSet ); assertEqualsMsg( _T( "wrong number of terms" ), 1, termSet.size() ); assertEqualsMsg( _T( "wrong term" ), 0, t2->compareTo( *(termSet.begin())) ); clearTermSet( termSet ); _CLLDECDELETE( t1 ); _CLLDECDELETE( t2 ); if( q1 != rewrite1 ) _CLDELETE( rewrite1 ); _CLDELETE( q1 ); if( q2 != rewrite2 ) _CLDELETE( rewrite2 ); _CLDELETE( q2 ); pReader->close(); _CLDELETE( pReader ); closeIndex( pIndex ); pIndex = NULL; }
void testExtractFromBooleanQuery( CuTest * tc ) { Directory * pIndex = setUpIndex(); IndexReader * pReader = IndexReader::open( pIndex ); TermSet termSet; Term * t1 = _CLNEW Term( _T("data"), _T("aaaab") ); Term * t2 = _CLNEW Term( _T("data"), _T("aaabb") ); Term * t3 = _CLNEW Term( _T("data"), _T("aaabb") ); BooleanQuery * bq = _CLNEW BooleanQuery(); bq->add( _CLNEW TermQuery( t1 ), true, BooleanClause::SHOULD ); bq->add( _CLNEW TermQuery( t2 ), true, BooleanClause::SHOULD ); bq->add( _CLNEW TermQuery( t3 ), true, BooleanClause::SHOULD ); Query * rewrite = bq->rewrite( pReader ); rewrite->extractTerms( &termSet ); assertEqualsMsg( _T( "wrong number of terms" ), 2, termSet.size() ); for( TermSet::iterator itTerms = termSet.begin(); itTerms != termSet.end(); itTerms++ ) { Term * pTerm = *itTerms; assertTrueMsg( _T( "wrong term" ), ( 0 == t1->compareTo( pTerm ) || 0 == t2->compareTo( pTerm ))); } clearTermSet( termSet ); _CLLDECDELETE( t1 ); _CLLDECDELETE( t2 ); _CLLDECDELETE( t3 ); if( rewrite != bq ) _CLDELETE( rewrite ); _CLDELETE( bq ); pReader->close(); _CLDELETE( pReader ); closeIndex( pIndex ); pIndex = NULL; }
///////////////////////////////////////////////////////////////////////////// // CLucene specific // Visual Studio 2005 shows memory leaks for this test, but some other // tools do not detect any memory leaks. So what is right? // IN VC80 shows memory leaks ONLY if both sub-queries are added as // MUST BooleanClauses. void testBooleanMemLeaks() { IndexReader * pReader = IndexReader::open( m_pSmall ); IndexSearcher * pSearch = _CLNEW IndexSearcher( pReader ); Query * q1 = csrq( _T( "data" ), _T( "A" ), _T( "A" ), true, true ); // matches document #0 Query * q2 = csrq( _T( "data" ), _T( "Z" ), _T( "Z" ), true, true ); // matches document #1 BooleanQuery * bq = _CLNEW BooleanQuery( true ); bq->add( q1, true, BooleanClause::MUST ); bq->add( q2, true, BooleanClause::MUST ); Hits * pResult = pSearch->search( bq ); _CLDELETE( pResult ); _CLDELETE( bq ); pSearch->close(); _CLDELETE( pSearch ); pReader->close(); _CLDELETE( pReader ); }
void DirAnalyzer::Private::update(StreamAnalyzer* analyzer) { IndexReader* reader = manager.indexReader(); vector<pair<string, struct stat> > dirfiles; map<string, time_t> dbdirfiles; vector<string> toDelete; vector<pair<string, struct stat> > toIndex; try { string path; // loop over all files that exist in the index int r = dirlister.nextDir(path, dirfiles); while (r >= 0 && (caller == 0 || caller->continueAnalysis())) { if (r < 0) { continue; } // get the files that are in the current database reader->getChildren(path, dbdirfiles); // get all files in this directory vector<pair<string, struct stat> >::const_iterator end = dirfiles.end(); map<string, time_t>::const_iterator dbend = dbdirfiles.end(); for (vector<pair<string, struct stat> >::const_iterator i = dirfiles.begin(); i != end; ++i) { const string& filepath(i->first); time_t mtime = i->second.st_mtime; // check if this file is new or not map<string, time_t>::iterator j = dbdirfiles.find(filepath); bool newfile = j == dbend; bool updatedfile = !newfile && j->second != mtime; if (newfile || (updatedfile && !S_ISDIR(i->second.st_mode))) { // if the file has not yet been indexed or if the mtime has // changed, index it // if a directory has been updated, this will not change the index // so the entry is not removed from the index, nor reindexed toIndex.push_back(make_pair(filepath, i->second)); } else { // files left in dbdirfiles after this loop will be deleted from the // index. because this file has not changed, it should not be // removed from the index dbdirfiles.erase(j); } } // all the files left in dbdirfiles, are not in the current // directory and should be deleted for (map<string, time_t>::const_iterator i = dbdirfiles.begin(); i != dbend; ++i) { toDelete.push_back(i->first); } if (toDelete.size() > 0) { manager.indexWriter()->deleteEntries(toDelete); } vector<pair<string, struct stat> >::const_iterator fend = toIndex.end(); for (vector<pair<string, struct stat> >::const_iterator i = toIndex.begin(); i != fend; ++i) { AnalysisResult analysisresult(i->first, i->second.st_mtime, *manager.indexWriter(), *analyzer, path); if (S_ISREG(i->second.st_mode)) { InputStream* file = FileInputStream::open(i->first.c_str()); analysisresult.index(file); delete file; } else { analysisresult.index(0); } } toDelete.clear(); toIndex.clear(); r = dirlister.nextDir(path, dirfiles); } } catch(...) { fprintf(stderr, "Unknown error\n"); } }