int cmd_add( int_t argc, char** argv ){ if ( argc < 2 ) usage(); bool dontRecurse = false; bool optimize = false; bool store = false; char_t* path = NULL; for ( int i=0;i< argc-2;i++ ){ if ( strcmp(argv[i], "-dontrecurse")==0 ) dontRecurse = true; else if ( strcmp(argv[i],"-optimize")==0 ) optimize = true; else if ( strcmp(argv[i],"-store")==0 ) store = true; } char_t* target = TO_CHAR_T(argv[argc-2]); path = TO_CHAR_T(argv[argc-1]); IndexWriter* writer = NULL; Directory* d = NULL; lucene::analysis::standard::StandardAnalyzer an; if ( IndexReader::indexExists(target) ){ d = &FSDirectory::getDirectory( target,false ); writer = new IndexWriter( *d, an, false); }else{ d = &FSDirectory::getDirectory(target,true); writer = new IndexWriter( *d ,an, true); } struct Struct_Stat buf; if ( Cmd_Stat(path,&buf) == 0 ){ if ( buf.st_mode & S_IFREG ){ Document& doc = FileDocument( path, store ); writer->addDocument( doc ); delete &doc; }else if ( buf.st_mode & S_IFDIR ){ indexDocs(writer, path, dontRecurse, store); }else{ cerr << "File/directory is not valid."; } }else{ printf( "File/directory does not exist." ); } if ( optimize ) writer->optimize(); writer->close(); delete[] target; delete[] path; return 1; }
static Handle<Value> IndexText(const Arguments& args) { HandleScope scope; IndexWriter* writer = NULL; lucene::analysis::WhitespaceAnalyzer an; if (IndexReader::indexExists(*String::Utf8Value(args[2])) ){ if ( IndexReader::isLocked(*String::Utf8Value(args[2])) ){ printf("Index was locked... unlocking it.\n"); IndexReader::unlock(*String::Utf8Value(args[2])); } writer = _CLNEW IndexWriter( *String::Utf8Value(args[2]), &an, false); }else{ writer = _CLNEW IndexWriter( *String::Utf8Value(args[2]) ,&an, true); } // We can tell the writer to flush at certain occasions //writer->setRAMBufferSizeMB(0.5); //writer->setMaxBufferedDocs(3); // To bypass a possible exception (we have no idea what we will be indexing...) writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE // Turn this off to make indexing faster; we'll turn it on later before optimizing writer->setUseCompoundFile(false); uint64_t str = Misc::currentTimeMillis(); Document doc; doc.clear(); TCHAR path[CL_MAX_DIR]; STRCPY_AtoT(path,*String::Utf8Value(args[0]),CL_MAX_DIR); TCHAR contents[CL_MAX_DIR]; STRCPY_AtoT(contents,*String::Utf8Value(args[1]),CL_MAX_DIR); (&doc)->add( *_CLNEW Field(_T("path"), path, Field::STORE_YES | Field::INDEX_UNTOKENIZED ) ); (&doc)->add( *_CLNEW Field(_T("contents"), contents, Field::STORE_YES | Field::INDEX_TOKENIZED) ); writer->addDocument( &doc ); // Make the index use as little files as possible, and optimize it writer->setUseCompoundFile(true); writer->optimize(); // Close and clean up writer->close(); _CLLDELETE(writer); printf("Indexing took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));IndexWriter(*String::Utf8Value(args[2]), &an, false); return scope.Close(Undefined()); }
void testEndThreadException(CuTest *tc) { const int MAX_DOCS=1500; RAMDirectory ram; WhitespaceAnalyzer an; IndexWriter* writer = _CLNEW IndexWriter(&ram, &an, true); // add some documents Document doc; for (int i = 0; i < MAX_DOCS; i++) { TCHAR * tmp = English::IntToEnglish(i); doc.add(* new Field(_T("content"), tmp, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); writer->addDocument(&doc); doc.clear(); _CLDELETE_ARRAY( tmp ); } CuAssertEquals(tc, MAX_DOCS, writer->docCount()); writer->close(); _CLLDELETE(writer); // this sequence is OK: delete searcher after search thread finish { IndexSearcher * searcher = _CLNEW IndexSearcher(&ram); _LUCENE_THREADID_TYPE thread = _LUCENE_THREAD_CREATE(&searchDocs, searcher); SCOPED_LOCK_MUTEX(searchMutex); CONDITION_WAIT(searchMutex, searchCondition); // _LUCENE_SLEEP(9999); //make sure that deleteMutex is being waited on... CONDITION_NOTIFYALL(deleteCondition); _LUCENE_THREAD_JOIN(thread); searcher->close(); _CLLDELETE(searcher); } // this produces memory exception: delete searcher after search finish but before thread finish { IndexSearcher * searcher = _CLNEW IndexSearcher(&ram); _LUCENE_THREADID_TYPE thread = _LUCENE_THREAD_CREATE(&searchDocs, searcher); SCOPED_LOCK_MUTEX(searchMutex); CONDITION_WAIT(searchMutex, searchCondition); searcher->close(); _CLLDELETE(searcher); CONDITION_NOTIFYALL(deleteCondition); _LUCENE_THREAD_JOIN(thread); } ram.close(); }
void testIncludeLowerTrue(CuTest* tc) { WhitespaceAnalyzer a; RAMDirectory* index = _CLNEW RAMDirectory(); IndexWriter* writer = _CLNEW IndexWriter(index, &a, true); Document doc; doc.add(*_CLNEW Field(_T("Category"), _T("a 1"), Field::STORE_YES | Field::INDEX_TOKENIZED)); writer->addDocument(&doc); doc.clear(); doc.add(*_CLNEW Field(_T("Category"), _T("a 2"), Field::STORE_YES | Field::INDEX_TOKENIZED)); writer->addDocument(&doc); doc.clear(); doc.add(*_CLNEW Field(_T("Category"), _T("a 3"), Field::STORE_YES | Field::INDEX_TOKENIZED)); writer->addDocument(&doc); doc.clear(); writer->close(); _CLLDELETE(writer); IndexSearcher* s = _CLNEW IndexSearcher(index); Filter* f = _CLNEW RangeFilter(_T("Category"), _T("3"), _T("3"), true, true); Term* t = _CLNEW Term(_T("Category"), _T("a")); Query* q1 = _CLNEW TermQuery(t); _CLLDECDELETE(t); t = _CLNEW Term(_T("Category"), _T("3")); Query* q2 = _CLNEW TermQuery(t); _CLLDECDELETE(t); Hits* h = s->search(q1); assertTrue(h->length() == 3); _CLLDELETE(h); h = s->search(q2); assertTrue(h->length() == 1); _CLLDELETE(h); h = s->search(q1, f); assertTrue(h->length() == 1); _CLLDELETE(h); s->close(); _CLLDELETE(s); _CLLDELETE(q1); _CLLDELETE(q2); _CLLDELETE(f); index->close(); _CLLDECDELETE(index); }
void TestSpansAdvanced::setUp() { directory = _CLNEW RAMDirectory(); Analyzer * analyzer = _CLNEW StandardAnalyzer(); IndexWriter * writer = _CLNEW IndexWriter( directory, analyzer, true ); addDocuments( writer ); writer->close(); _CLDELETE( writer ); _CLDELETE( analyzer ); searcher = _CLNEW IndexSearcher( directory ); }
static Handle<Value> IndexFiles(const Arguments& args) { HandleScope scope; IndexWriter* writer = NULL; lucene::analysis::WhitespaceAnalyzer an; if (IndexReader::indexExists(*String::Utf8Value(args[1])) ){ if ( IndexReader::isLocked(*String::Utf8Value(args[1])) ){ printf("Index was locked... unlocking it.\n"); IndexReader::unlock(*String::Utf8Value(args[1])); } writer = _CLNEW IndexWriter( *String::Utf8Value(args[1]), &an, false); }else{ writer = _CLNEW IndexWriter( *String::Utf8Value(args[1]) ,&an, true); } //writer->setInfoStream(&std::cout); // We can tell the writer to flush at certain occasions //writer->setRAMBufferSizeMB(0.5); //writer->setMaxBufferedDocs(3); // To bypass a possible exception (we have no idea what we will be indexing...) writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE // Turn this off to make indexing faster; we'll turn it on later before optimizing writer->setUseCompoundFile(false); uint64_t str = Misc::currentTimeMillis(); indexDocs(writer, *String::Utf8Value(args[0])); // Make the index use as little files as possible, and optimize it writer->setUseCompoundFile(true); writer->optimize(); // Close and clean up writer->close(); _CLLDELETE(writer); printf("Indexing took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));IndexWriter(*String::Utf8Value(args[1]), &an, false); //Lucene* lucene = ObjectWrap::Unwrap<Lucene>(args.This()); return scope.Close(String::New("foo")); }
static int EIO_Index(eio_req* req) { index_baton_t* baton = static_cast<index_baton_t*>(req->data); lucene::analysis::standard::StandardAnalyzer an; IndexWriter* writer = 0; bool writerOpen = false; try { bool needsCreation = true; if (IndexReader::indexExists(*(*baton->index))) { if (IndexReader::isLocked(*(*baton->index))) { IndexReader::unlock(*(*baton->index)); } needsCreation = false; } writer = new IndexWriter(*(*baton->index), &an, needsCreation); writerOpen = true; // To bypass a possible exception (we have no idea what we will be indexing...) writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE // Turn this off to make indexing faster; we'll turn it on later before optimizing writer->setUseCompoundFile(false); uint64_t start = Misc::currentTimeMillis(); writer->addDocument(baton->doc->document()); // Make the index use as little files as possible, and optimize it writer->setUseCompoundFile(true); writer->optimize(); baton->indexTime = (Misc::currentTimeMillis() - start); } catch (CLuceneError& E) { baton->error.assign(E.what()); } catch(...) { baton->error = "Got an unknown exception"; } // Close and clean up if (writerOpen == true) { writer->close(); } delete writer; //(*(*baton->index), &an, false); return 0; }
/// TestBooleanScorer.java, ported 5/9/2009 void testBooleanScorer(CuTest *tc) { const TCHAR* FIELD = _T("category"); RAMDirectory directory; TCHAR* values[] = { _T("1"), _T("2"), _T("3"), _T("4"), NULL}; try { WhitespaceAnalyzer a; IndexWriter* writer = _CLNEW IndexWriter(&directory, &a, true); for (size_t i = 0; values[i]!=NULL; i++) { Document* doc = _CLNEW Document(); doc->add(*_CLNEW Field(FIELD, values[i], Field::STORE_YES | Field::INDEX_TOKENIZED)); writer->addDocument(doc); _CLLDELETE(doc); } writer->close(); _CLLDELETE(writer); BooleanQuery* booleanQuery1 = _CLNEW BooleanQuery(); Term *t = _CLNEW Term(FIELD, _T("1")); booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); _CLDECDELETE(t); t = _CLNEW Term(FIELD, _T("2")); booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD); _CLDECDELETE(t); BooleanQuery* query = _CLNEW BooleanQuery(); query->add(booleanQuery1, true, BooleanClause::MUST); t = _CLNEW Term(FIELD, _T("9")); query->add(_CLNEW TermQuery(t), true, BooleanClause::MUST_NOT); _CLDECDELETE(t); IndexSearcher *indexSearcher = _CLNEW IndexSearcher(&directory); Hits *hits = indexSearcher->search(query); CLUCENE_ASSERT(2 == hits->length()); // Number of matched documents _CLLDELETE(hits); _CLLDELETE(indexSearcher); _CLLDELETE(query); } catch (CLuceneError& e) { CuFail(tc, e.twhat()); } }
void FuzzyIndex::appendFields( const QString& table, const QMap< unsigned int, QString >& fields ) { try { qDebug() << "Appending to index:" << fields.count(); bool create = !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() ); IndexWriter luceneWriter = IndexWriter( m_luceneDir, m_analyzer, create ); Document doc; QMapIterator< unsigned int, QString > it( fields ); while ( it.hasNext() ) { it.next(); unsigned int id = it.key(); QString name = it.value(); { Field* field = _CLNEW Field( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str(), Field::STORE_YES | Field::INDEX_UNTOKENIZED ); doc.add( *field ); } { Field* field = _CLNEW Field( _T( "id" ), QString::number( id ).toStdWString().c_str(), Field::STORE_YES | Field::INDEX_NO ); doc.add( *field ); } luceneWriter.addDocument( &doc ); doc.clear(); } luceneWriter.close(); } catch( CLuceneError& error ) { qDebug() << "Caught CLucene error:" << error.what(); Q_ASSERT( false ); } }
void TestBasics::setUp() { directory = _CLNEW RAMDirectory(); Analyzer * analyzer = _CLNEW SimpleAnalyzer(); IndexWriter * writer = _CLNEW IndexWriter( directory, analyzer, true ); TCHAR buffer[ 200 ]; for( int32_t i = 0; i < 1000; i++ ) { Document doc; English::IntToEnglish( i, buffer, 200 ); doc.add( * _CLNEW Field( _T( "field" ), buffer, Field::STORE_YES | Field::INDEX_TOKENIZED )); writer->addDocument( &doc ); } writer->close(); _CLDELETE( writer ); _CLDELETE( analyzer ); searcher = _CLNEW IndexSearcher( directory ); }
void testRAMDirectorySize(CuTest * tc) { MockRAMDirectory * ramDir = _CLNEW MockRAMDirectory(indexDir); WhitespaceAnalyzer analyzer; IndexWriter * writer = _CLNEW IndexWriter(ramDir, &analyzer, false); writer->optimize(); CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size")); _LUCENE_THREADID_TYPE* threads = _CL_NEWARRAY(_LUCENE_THREADID_TYPE, numThreads); ThreadData * tdata = _CL_NEWARRAY(ThreadData, numThreads); for (int i=0; i<numThreads; i++) { tdata[i].num = i; tdata[i].dir = ramDir; tdata[i].tc = tc; tdata[i].writer = writer; threads[i] = _LUCENE_THREAD_CREATE(&indexDocs, &tdata[i]); } for (int i=0; i<numThreads; i++) { _LUCENE_THREAD_JOIN(threads[i]); } _CLDELETE_ARRAY(threads); _CLDELETE_ARRAY(tdata); writer->optimize(); CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size")); CuAssertEquals(tc, docsToAdd + (numThreads * (docsPerThread-1)), writer->docCount(), _T("document count")); writer->close(); _CLLDELETE(writer); ramDir->close(); _CLLDELETE(ramDir); }
// setup the index void testRAMDirectorySetUp (CuTest *tc) { if (strlen(cl_tempDir) + 13 > CL_MAX_PATH) CuFail(tc, _T("Not enough space in indexDir buffer")); sprintf(indexDir, "%s/RAMDirIndex", cl_tempDir); WhitespaceAnalyzer analyzer; IndexWriter * writer = new IndexWriter(indexDir, &analyzer, true); // add some documents TCHAR * text; for (int i = 0; i < docsToAdd; i++) { Document doc; text = English::IntToEnglish(i); doc.add(* new Field(_T("content"), text, Field::STORE_YES | Field::INDEX_UNTOKENIZED)); writer->addDocument(&doc); _CLDELETE_ARRAY(text); } CuAssertEquals(tc, docsToAdd, writer->docCount(), _T("document count")); writer->close(); _CLDELETE( writer ); }
void setUp() { TCHAR tbuffer[16]; const TCHAR* data[] = { _T( "A 1 2 3 4 5 6" ), _T( "Z 4 5 6" ), NULL, _T( "B 2 4 5 6" ), _T( "Y 3 5 6" ), NULL, _T( "C 3 6" ), _T( "X 4 5 6" ) }; m_pSmall = _CLNEW RAMDirectory(); Analyzer * pAnalyzer = _CLNEW WhitespaceAnalyzer(); IndexWriter * pWriter = _CLNEW IndexWriter( m_pSmall, pAnalyzer, true ); for( size_t i = 0; i < sizeof( data ) / sizeof( data[0] ); i++ ) { _itot( i, tbuffer, 10 ); Document doc; doc.add( * _CLNEW Field( _T( "id" ), tbuffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED )); doc.add( * _CLNEW Field( _T( "all" ), _T( "all" ), Field::STORE_YES | Field::INDEX_UNTOKENIZED )); if( data[ i ] ) doc.add( * _CLNEW Field( _T( "data" ), data[ i ], Field::STORE_YES | Field::INDEX_TOKENIZED )); pWriter->addDocument( &doc ); } pWriter->optimize(); pWriter->close(); _CLDELETE( pWriter ); _CLDELETE( pAnalyzer ); }
int ZefaniaLex::buildIndex() { DEBUG_FUNC_NAME; myDebug() << "building index!!!"; QFile file(m_modulePath); Document indexdoc; const QString index = indexPath(); QDir dir("/"); dir.mkpath(index); m_refText.setSettings(m_settings); IndexWriter* writer = nullptr; const TCHAR* stop_words[] = { nullptr }; standard::StandardAnalyzer an(stop_words); //open the xml file if(!file.open(QFile::ReadOnly | QFile::Text)) return 1; m_xml = new QXmlStreamReader(&file); try { if(IndexReader::indexExists(index.toStdString().c_str())) { if(IndexReader::isLocked(index.toStdString().c_str())) { myDebug() << "Index was locked... unlocking it."; IndexReader::unlock(index.toStdString().c_str()); } } writer = new IndexWriter(index.toStdString().c_str() , &an, true); writer->setMaxFieldLength(0x7FFFFFFFL); writer->setUseCompoundFile(false); TCHAR *buffer = SearchTools::createBuffer(); if(m_xml->readNextStartElement()) { if(cmp(m_xml->name(), "dictionary")) { while(m_xml->readNextStartElement()) { if(cmp(m_xml->name(), "item")) { QString content; const QString key = m_xml->attributes().value("id").toString(); bool hasTitle = false; while(true) { m_xml->readNext(); if(m_xml->tokenType() == QXmlStreamReader::EndElement && (cmp(m_xml->name(), QLatin1String("item")))) break; if(m_xml->tokenType() == QXmlStreamReader::Characters) { content += m_xml->text().toString(); } else if(cmp(m_xml->name(), "title")) { const QString title = parseTitle(); content += "<h3 class='title'>" + key; if(!title.isEmpty()) { content.append(" - " + title); } content.append("</h3>"); hasTitle = true; } else if(cmp(m_xml->name(), "transliteration")) { const QString trans = parseTrans(); if(!trans.isEmpty()) { content += "<span class='transliteration'>" + trans + "</span>" ; } } else if(cmp(m_xml->name(), "pronunciation")) { const QString pr = parsePron(); if(!pr.isEmpty()) { content += "<span class='pronunciation'>" + pr + "</span>"; } } else if(cmp(m_xml->name(), "description")) { content += "<span class='description'>" + parseDesc() + "</span>"; } else { content += m_xml->readElementText(QXmlStreamReader::IncludeChildElements); } } if(!hasTitle) { content.prepend("<h3 class='title'>" + key + "</h3>"); } indexdoc.clear(); indexdoc.add(*_CLNEW Field(_T("key"), SearchTools::toTCHAR(key, buffer), Field::STORE_YES | Field::INDEX_TOKENIZED)); indexdoc.add(*_CLNEW Field(_T("content"), SearchTools::toTCHAR(content, buffer), Field::STORE_YES | Field::INDEX_TOKENIZED)); writer->addDocument(&indexdoc); } else { m_xml->skipCurrentElement(); } } } else { myWarning() << "not a file"; } } writer->setUseCompoundFile(true); writer->optimize(); writer->close(); delete writer; } catch(...) { } file.close(); delete m_xml; m_xml = nullptr; return 0; }
int BibleQuoteDict::buildIndex() { DEBUG_FUNC_NAME // parse both and add docs to the indexwriter //myDebug() << m_modulePath; QFileInfo fileInfo(m_modulePath); //myDebug() << fileInfo.absoluteDir(); QDir moduleDir(fileInfo.absoluteDir()); moduleDir.setFilter(QDir::Files); QFileInfoList list = moduleDir.entryInfoList(); QFileInfo htmlFileInfo; foreach(const QFileInfo & info, list) { if((info.suffix().compare("html", Qt::CaseInsensitive) == 0 || info.suffix().compare("htm", Qt::CaseInsensitive) == 0) && info.baseName().compare(fileInfo.baseName(), Qt::CaseInsensitive) == 0) { htmlFileInfo = info; break; } } //myDebug() << htmlFileInfo.absoluteFilePath(); if(!htmlFileInfo.isReadable() || !fileInfo.isReadable()) { myWarning() << "cannot open file to build index"; //todo: qmessagebox return 1; } QFile configFile(fileInfo.absoluteFilePath()); QFile htmlFile(htmlFileInfo.absoluteFilePath()); const QString encoding = m_settings->encoding; QTextCodec *codec = QTextCodec::codecForName(encoding.toStdString().c_str()); if(!configFile.open(QIODevice::ReadOnly | QIODevice::Text) || !htmlFile.open(QIODevice::ReadOnly | QIODevice::Text)) { myWarning() << "cannot open file to build index"; return 1; } QTextStream configIn(&configFile); configIn.setCodec(codec); QTextStream htmlIn(&htmlFile); htmlIn.setCodec(codec); const QString index = indexPath(); QDir dir("/"); dir.mkpath(index); QProgressDialog progress(QObject::tr("Build index"), QObject::tr("Cancel"), 0, 0); progress.setWindowModality(Qt::WindowModal); IndexWriter* writer = NULL; const TCHAR* stop_words[] = { NULL }; standard::StandardAnalyzer an(stop_words); if(IndexReader::indexExists(index.toStdString().c_str())) { if(IndexReader::isLocked(index.toStdString().c_str())) { myDebug() << "Index was locked... unlocking it."; IndexReader::unlock(index.toStdString().c_str()); } } writer = new IndexWriter(index.toStdString().c_str() , &an, true); //writer->setMaxFieldLength(0x7FFFFFFFL); writer->setUseCompoundFile(false); //index Document indexdoc; const QString title = configIn.readLine(); QString id = configIn.readLine(); long num = configIn.readLine().toLong(); const QString pre = htmlIn.read(num - 1); myDebug() << title << pre; TCHAR *buffer = SearchTools::createBuffer(); while(!configIn.atEnd()) { long n = num; const QString key = id; id = configIn.readLine(); num = configIn.readLine().toLong(); const QString data = htmlIn.read(num - n - 1); if(key.isEmpty() || data.isEmpty()) continue; indexdoc.clear(); indexdoc.add(*_CLNEW Field(_T("key"), SearchTools::toTCHAR(key, buffer), Field::STORE_YES | Field::INDEX_TOKENIZED)); indexdoc.add(*_CLNEW Field(_T("content"), SearchTools::toTCHAR(data, buffer), Field::STORE_YES | Field::INDEX_TOKENIZED)); writer->addDocument(&indexdoc); } writer->setUseCompoundFile(true); writer->optimize(); writer->close(); _CLLDELETE(writer); return 0; }
/// TestBooleanPrefixQuery.java, ported 5/9/2009 void testBooleanPrefixQuery(CuTest* tc) { RAMDirectory directory; WhitespaceAnalyzer a; TCHAR* categories[] = {_T("food"), _T("foodanddrink"), _T("foodanddrinkandgoodtimes"), _T("food and drink"), NULL}; Query* rw1 = NULL; Query* rw2 = NULL; try { IndexWriter* writer = _CLNEW IndexWriter(&directory, &a, true); for (size_t i = 0; categories[i]!=NULL; i++) { Document* doc = new Document(); doc->add(*_CLNEW Field(_T("category"), categories[i], Field::STORE_YES | Field::INDEX_UNTOKENIZED)); writer->addDocument(doc); _CLLDELETE(doc); } writer->close(); _CLLDELETE(writer); IndexReader* reader = IndexReader::open(&directory); Term* t = _CLNEW Term(_T("category"), _T("foo")); PrefixQuery* query = _CLNEW PrefixQuery(t); _CLDECDELETE(t); rw1 = query->rewrite(reader); BooleanQuery* bq = _CLNEW BooleanQuery(); bq->add(query, true, BooleanClause::MUST); rw2 = bq->rewrite(reader); reader->close(); // TODO: check necessity (_CLLDELETE(reader) alone will not do the same cleanup) _CLLDELETE(reader); _CLLDELETE(bq); } catch (CLuceneError& e) { CuFail(tc, e.twhat()); } BooleanQuery* bq1 = NULL; if (rw1->instanceOf(BooleanQuery::getClassName())) { bq1 = (BooleanQuery*) rw1; } BooleanQuery* bq2 = NULL; if (rw2->instanceOf(BooleanQuery::getClassName())) { bq2 = (BooleanQuery*) rw2; } else { CuFail(tc, _T("Rewrite")); } bool bClausesMatch = bq1->getClauseCount() == bq2->getClauseCount(); _CLLDELETE(rw1); _CLLDELETE(rw2); if (!bClausesMatch) { CuFail(tc, _T("Number of Clauses Mismatch")); } }
MetaInfo ZefaniaLex::buildIndexFromXmlDoc(KoXmlDocument *xmldoc) { try { MetaInfo info; int couldBe = 0;//1 = RMac Document indexdoc; const QString index = indexPath(); QString fileTitle; QString uid; QString type; QDir dir("/"); dir.mkpath(index); RefText refText; refText.setSettings(m_settings); IndexWriter* writer = NULL; const TCHAR* stop_words[] = { NULL }; standard::StandardAnalyzer an(stop_words); if(IndexReader::indexExists(index.toStdString().c_str())) { if(IndexReader::isLocked(index.toStdString().c_str())) { myDebug() << "Index was locked... unlocking it."; IndexReader::unlock(index.toStdString().c_str()); } } writer = new IndexWriter(index.toStdString().c_str() , &an, true); writer->setMaxFieldLength(0x7FFFFFFFL); writer->setUseCompoundFile(false); KoXmlNode item = xmldoc->documentElement().firstChild(); type = xmldoc->documentElement().toElement().attribute("type", ""); for(int c = 0; !item.isNull();) { QString key = ""; QString title = ""; QString trans = ""; QString pron = ""; QString desc = ""; KoXmlElement e = item.toElement(); if(e.tagName().compare("INFORMATION", Qt::CaseInsensitive) == 0) { KoXmlNode title = item.namedItem("subject"); KoXmlNode identifer = item.namedItem("identifier"); fileTitle = title.toElement().text(); uid = identifer.toElement().text(); } else if(e.tagName().compare("item", Qt::CaseInsensitive) == 0) { key = e.attribute("id"); KoXmlNode details = item.firstChild(); while(!details.isNull()) { KoXmlElement edetails = details.toElement(); if(edetails.tagName().compare("title", Qt::CaseInsensitive) == 0) { title = edetails.text(); } else if(edetails.tagName().compare("transliteration", Qt::CaseInsensitive) == 0) { trans = edetails.text(); } else if(edetails.tagName().compare("pronunciation", Qt::CaseInsensitive) == 0) { KoXmlNode em = details.firstChild(); while(!em.isNull()) { if(em.toElement().tagName().compare("em", Qt::CaseInsensitive) == 0) pron = "<em>" + em.toElement().text() + "</em>"; em = em.nextSibling(); } } else if(edetails.tagName().compare("description", Qt::CaseInsensitive) == 0) { KoXmlNode descNode = details.firstChild(); while(!descNode.isNull()) { if(descNode.nodeType() == 2) { desc += descNode.toText().data(); } else if(descNode.nodeType() == 1) { KoXmlElement descElement = descNode.toElement(); if(descElement.tagName().compare("reflink", Qt::CaseInsensitive) == 0) { if(descElement.hasAttribute("mscope")) { const QString mscope = descElement.attribute("mscope", ";;;"); VerseUrl url; url.fromMscope(mscope); desc += " <a href=\"" + url.toString() + "\">" + refText.toString(url) + "</a> "; } else if(descElement.hasAttribute("target")) { desc += descElement.text(); } } else if(descElement.tagName().compare("see", Qt::CaseInsensitive) == 0) { const QString target = descElement.attribute("target", ""); //todo: currently we assume target = x-self StrongUrl url; bool ok = url.fromText(descElement.text()); if(ok) desc += " <a href=\"" + url.toString() + "\">" + descElement.text() + "</a> "; } } descNode = descNode.nextSibling(); } desc += "<hr />"; } details = details.nextSibling(); } if(couldBe == 0) { if(key.toUpper() == "A-APF" || key.toUpper() == "X-NSN" || key.toUpper() == "V-PAP-DPN") { couldBe = 1; } } QString content = "<h3 class='strongTitle'>" + key + " - " + title + "</h3>"; if(!trans.isEmpty()) { content += " (" + trans + ") "; } if(!pron.isEmpty()) { content += " [" + pron + "] "; } content += "<br />" + desc; indexdoc.clear(); #ifdef OBV_USE_WSTRING indexdoc.add(*_CLNEW Field(_T("key"), key.toStdWString().c_str(), Field::STORE_YES | Field::INDEX_TOKENIZED)); indexdoc.add(*_CLNEW Field(_T("content"), content.toStdWString().c_str(), Field::STORE_YES | Field::INDEX_TOKENIZED)); #else indexdoc.add(*_CLNEW Field(_T("key"), reinterpret_cast<const wchar_t *>(key.utf16()), Field::STORE_YES | Field::INDEX_TOKENIZED)); indexdoc.add(*_CLNEW Field(_T("content"), reinterpret_cast<const wchar_t *>(content.utf16()), Field::STORE_YES | Field::INDEX_TOKENIZED)); #endif writer->addDocument(&indexdoc); } item = item.nextSibling(); c++; } writer->setUseCompoundFile(true); writer->optimize(); writer->close(); delete writer; info.setName(fileTitle); info.setUID(uid); if(type == "x-strong") { info.setDefaultModule(OBVCore::DefaultStrongDictModule); info.setContent(OBVCore::StrongsContent); } else if(type == "x-dictionary") { if(couldBe == 1) { info.setDefaultModule(OBVCore::DefaultRMACDictModule); info.setContent(OBVCore::RMacContent); } else { info.setDefaultModule(OBVCore::DefaultDictModule); } } return info; } catch(...) { return MetaInfo(); } }