Пример #1
0
int cmd_add( int_t argc, char** argv ){
	if ( argc < 2 )
		usage();

	bool dontRecurse = false;
	bool optimize = false;
	bool store = false;
	char_t* path = NULL;
	
	for ( int i=0;i< argc-2;i++ ){
		if ( strcmp(argv[i], "-dontrecurse")==0 )
			dontRecurse = true;
		else if ( strcmp(argv[i],"-optimize")==0 )
			optimize = true;
		else if ( strcmp(argv[i],"-store")==0 )
			store = true;
	}
	
	char_t* target = TO_CHAR_T(argv[argc-2]);
	path = TO_CHAR_T(argv[argc-1]);

	IndexWriter* writer = NULL;
	Directory* d = NULL;
	lucene::analysis::standard::StandardAnalyzer an;
	
	if ( IndexReader::indexExists(target) ){
		d = &FSDirectory::getDirectory( target,false );
		writer = new IndexWriter( *d, an, false);
	}else{
		d = &FSDirectory::getDirectory(target,true);
		writer = new IndexWriter( *d ,an, true);
	}
	
	struct Struct_Stat buf;
	if ( Cmd_Stat(path,&buf) == 0 ){
		if ( buf.st_mode & S_IFREG ){
			Document& doc = FileDocument( path, store );
			writer->addDocument( doc );
			delete &doc;
		}else if ( buf.st_mode & S_IFDIR ){
			indexDocs(writer, path, dontRecurse, store);
		}else{
			cerr << "File/directory is not valid.";
		}
	}else{
		printf( "File/directory does not exist." );
	}
		if ( optimize )
			writer->optimize();

		writer->close();
		delete[] target;
		delete[] path;

	return 1;
}
Пример #2
0
void testEndThreadException(CuTest *tc) {

    const int MAX_DOCS=1500;
	RAMDirectory ram;
	WhitespaceAnalyzer an;
	IndexWriter* writer = _CLNEW IndexWriter(&ram, &an, true);

    // add some documents
    Document doc;
    for (int i = 0; i < MAX_DOCS; i++) {
        TCHAR * tmp = English::IntToEnglish(i);
        doc.add(* new Field(_T("content"), tmp, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
        writer->addDocument(&doc);
        doc.clear();
        _CLDELETE_ARRAY( tmp );
    }

    CuAssertEquals(tc, MAX_DOCS, writer->docCount());
    
    writer->close();
	_CLLDELETE(writer);
    
    // this sequence is OK: delete searcher after search thread finish
    {
        IndexSearcher * searcher = _CLNEW IndexSearcher(&ram);
        _LUCENE_THREADID_TYPE thread = _LUCENE_THREAD_CREATE(&searchDocs, searcher);
        SCOPED_LOCK_MUTEX(searchMutex);

        CONDITION_WAIT(searchMutex, searchCondition);
//        _LUCENE_SLEEP(9999); //make sure that deleteMutex is being waited on...
        CONDITION_NOTIFYALL(deleteCondition);

        _LUCENE_THREAD_JOIN(thread);

        searcher->close();
        _CLLDELETE(searcher);
    }

    // this produces memory exception: delete searcher after search finish but before thread finish
    {
        IndexSearcher * searcher = _CLNEW IndexSearcher(&ram);
        _LUCENE_THREADID_TYPE thread = _LUCENE_THREAD_CREATE(&searchDocs, searcher);
        SCOPED_LOCK_MUTEX(searchMutex);

        CONDITION_WAIT(searchMutex, searchCondition);
        searcher->close();
        _CLLDELETE(searcher);
        CONDITION_NOTIFYALL(deleteCondition);

        _LUCENE_THREAD_JOIN(thread);
    }


    ram.close();
}
Пример #3
0
void testIncludeLowerTrue(CuTest* tc)
{
    WhitespaceAnalyzer a;
    RAMDirectory* index = _CLNEW RAMDirectory();
    IndexWriter* writer = _CLNEW IndexWriter(index,
        &a, true);

    Document doc;
    doc.add(*_CLNEW Field(_T("Category"), _T("a 1"), Field::STORE_YES | Field::INDEX_TOKENIZED));
    writer->addDocument(&doc); doc.clear();

    doc.add(*_CLNEW Field(_T("Category"), _T("a 2"), Field::STORE_YES | Field::INDEX_TOKENIZED));
    writer->addDocument(&doc); doc.clear();

    doc.add(*_CLNEW Field(_T("Category"), _T("a 3"), Field::STORE_YES | Field::INDEX_TOKENIZED));
    writer->addDocument(&doc); doc.clear();

    writer->close();
    _CLLDELETE(writer);

    IndexSearcher* s = _CLNEW IndexSearcher(index);
    Filter* f = _CLNEW RangeFilter(_T("Category"), _T("3"), _T("3"), true, true);

    Term* t = _CLNEW Term(_T("Category"), _T("a"));
    Query* q1 = _CLNEW TermQuery(t);
    _CLLDECDELETE(t);

    t = _CLNEW Term(_T("Category"), _T("3"));
    Query* q2 = _CLNEW TermQuery(t);
    _CLLDECDELETE(t);

    Hits* h = s->search(q1);
    assertTrue(h->length() == 3);
    _CLLDELETE(h);

    h = s->search(q2);
    assertTrue(h->length() == 1);
    _CLLDELETE(h);

    h = s->search(q1, f);
    assertTrue(h->length() == 1);
    _CLLDELETE(h);

    s->close();
    _CLLDELETE(s);
    _CLLDELETE(q1);
    _CLLDELETE(q2);
    _CLLDELETE(f);

    index->close();
    _CLLDECDELETE(index);
}
Пример #4
0
void TestSpansAdvanced::setUp()
{
    directory = _CLNEW RAMDirectory();
    Analyzer * analyzer = _CLNEW StandardAnalyzer();
    IndexWriter * writer = _CLNEW IndexWriter( directory, analyzer, true );

    addDocuments( writer );

    writer->close();
    _CLDELETE( writer );
    _CLDELETE( analyzer );

    searcher = _CLNEW IndexSearcher( directory );
}
Пример #5
0
/// TestBooleanScorer.java, ported 5/9/2009
void testBooleanScorer(CuTest *tc) {
    const TCHAR* FIELD = _T("category");
    RAMDirectory directory;

    TCHAR* values[] = { _T("1"), _T("2"), _T("3"), _T("4"), NULL};

    try {
        WhitespaceAnalyzer a;
        IndexWriter* writer = _CLNEW IndexWriter(&directory, &a, true);
        for (size_t i = 0; values[i]!=NULL; i++) {
            Document* doc = _CLNEW Document();
            doc->add(*_CLNEW Field(FIELD, values[i], Field::STORE_YES | Field::INDEX_TOKENIZED));
            writer->addDocument(doc);
            _CLLDELETE(doc);
        }
        writer->close();
        _CLLDELETE(writer);

        BooleanQuery* booleanQuery1 = _CLNEW BooleanQuery();
        Term *t = _CLNEW Term(FIELD, _T("1"));
        booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD);
        _CLDECDELETE(t);
        t = _CLNEW Term(FIELD, _T("2"));
        booleanQuery1->add(_CLNEW TermQuery(t), true, BooleanClause::SHOULD);
        _CLDECDELETE(t);

        BooleanQuery* query = _CLNEW BooleanQuery();
        query->add(booleanQuery1, true, BooleanClause::MUST);
        t = _CLNEW Term(FIELD, _T("9"));
        query->add(_CLNEW TermQuery(t), true, BooleanClause::MUST_NOT);
        _CLDECDELETE(t);

        IndexSearcher *indexSearcher = _CLNEW IndexSearcher(&directory);
        Hits *hits = indexSearcher->search(query);
        CLUCENE_ASSERT(2 == hits->length()); // Number of matched documents
        _CLLDELETE(hits);
        _CLLDELETE(indexSearcher);

        _CLLDELETE(query);
    }
    catch (CLuceneError& e) {
        CuFail(tc, e.twhat());
    }
}
Пример #6
0
  static Handle<Value> IndexText(const Arguments& args)
  {
    HandleScope scope;
    	IndexWriter* writer = NULL;
	lucene::analysis::WhitespaceAnalyzer an;

	if (IndexReader::indexExists(*String::Utf8Value(args[2])) ){
		if ( IndexReader::isLocked(*String::Utf8Value(args[2])) ){
			printf("Index was locked... unlocking it.\n");
			IndexReader::unlock(*String::Utf8Value(args[2]));
		}

		writer = _CLNEW IndexWriter( *String::Utf8Value(args[2]), &an, false);
	}else{
		writer = _CLNEW IndexWriter( *String::Utf8Value(args[2]) ,&an, true);
	}
	// We can tell the writer to flush at certain occasions
    //writer->setRAMBufferSizeMB(0.5);
    //writer->setMaxBufferedDocs(3);

    // To bypass a possible exception (we have no idea what we will be indexing...)
    writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE

    // Turn this off to make indexing faster; we'll turn it on later before optimizing
    writer->setUseCompoundFile(false);

	uint64_t str = Misc::currentTimeMillis();

	Document doc;

    doc.clear();

    TCHAR path[CL_MAX_DIR];
    STRCPY_AtoT(path,*String::Utf8Value(args[0]),CL_MAX_DIR);

    TCHAR contents[CL_MAX_DIR];
    STRCPY_AtoT(contents,*String::Utf8Value(args[1]),CL_MAX_DIR);

    (&doc)->add( *_CLNEW Field(_T("path"), path, Field::STORE_YES | Field::INDEX_UNTOKENIZED ) );
    (&doc)->add( *_CLNEW Field(_T("contents"), contents, Field::STORE_YES | Field::INDEX_TOKENIZED) );

    writer->addDocument( &doc );

    // Make the index use as little files as possible, and optimize it
    writer->setUseCompoundFile(true);
    writer->optimize();

    // Close and clean up
    writer->close();
	_CLLDELETE(writer);

	printf("Indexing took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));IndexWriter(*String::Utf8Value(args[2]), &an, false);

    return scope.Close(Undefined());

  }
Пример #7
0
void
FuzzyIndex::appendFields( const QString& table, const QMap< unsigned int, QString >& fields )
{
    try
    {
        qDebug() << "Appending to index:" << fields.count();
        bool create = !IndexReader::indexExists( TomahawkUtils::appDataDir().absoluteFilePath( "tomahawk.lucene" ).toStdString().c_str() );
        IndexWriter luceneWriter = IndexWriter( m_luceneDir, m_analyzer, create );
        Document doc;

        QMapIterator< unsigned int, QString > it( fields );
        while ( it.hasNext() )
        {
            it.next();
            unsigned int id = it.key();
            QString name = it.value();

            {
                Field* field = _CLNEW Field( table.toStdWString().c_str(), DatabaseImpl::sortname( name ).toStdWString().c_str(),
                                            Field::STORE_YES | Field::INDEX_UNTOKENIZED );
                doc.add( *field );
            }

            {
                Field* field = _CLNEW Field( _T( "id" ), QString::number( id ).toStdWString().c_str(),
                Field::STORE_YES | Field::INDEX_NO );
                doc.add( *field );
            }

            luceneWriter.addDocument( &doc );
            doc.clear();
        }

        luceneWriter.close();
    }
    catch( CLuceneError& error )
    {
        qDebug() << "Caught CLucene error:" << error.what();
        Q_ASSERT( false );
    }
}
Пример #8
0
void TestBasics::setUp()
{
    directory = _CLNEW RAMDirectory();
    Analyzer * analyzer = _CLNEW SimpleAnalyzer();
    IndexWriter * writer = _CLNEW IndexWriter( directory, analyzer, true );
    TCHAR buffer[ 200 ];

    for( int32_t i = 0; i < 1000; i++ )
    {
        Document doc;
        English::IntToEnglish( i, buffer, 200 );
        doc.add( * _CLNEW Field( _T( "field" ), buffer, Field::STORE_YES | Field::INDEX_TOKENIZED ));
        writer->addDocument( &doc );
    }
    
    writer->close();
    _CLDELETE( writer );
    _CLDELETE( analyzer );

    searcher = _CLNEW IndexSearcher( directory );
}
Пример #9
0
void testRAMDirectorySize(CuTest * tc)  {

    MockRAMDirectory * ramDir = _CLNEW MockRAMDirectory(indexDir);
    WhitespaceAnalyzer analyzer;
    IndexWriter * writer = _CLNEW IndexWriter(ramDir, &analyzer, false);
    writer->optimize();

    CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size"));

    _LUCENE_THREADID_TYPE* threads = _CL_NEWARRAY(_LUCENE_THREADID_TYPE, numThreads);
    ThreadData * tdata = _CL_NEWARRAY(ThreadData, numThreads);

    for (int i=0; i<numThreads; i++) {
        tdata[i].num = i;
        tdata[i].dir = ramDir;
        tdata[i].tc = tc;
        tdata[i].writer = writer;
        threads[i] = _LUCENE_THREAD_CREATE(&indexDocs, &tdata[i]);
    }

    for (int i=0; i<numThreads; i++) {
        _LUCENE_THREAD_JOIN(threads[i]);
    }

    _CLDELETE_ARRAY(threads);
    _CLDELETE_ARRAY(tdata);

    writer->optimize();
    CuAssertTrue(tc, ramDir->sizeInBytes == ramDir->getRecomputedSizeInBytes(), _T("RAMDir size"));

    CuAssertEquals(tc, docsToAdd + (numThreads * (docsPerThread-1)), writer->docCount(), _T("document count"));

    writer->close();
    _CLLDELETE(writer);

    ramDir->close();
    _CLLDELETE(ramDir);
}
Пример #10
0
// setup the index
void testRAMDirectorySetUp (CuTest *tc) {

    if (strlen(cl_tempDir) + 13 > CL_MAX_PATH)
        CuFail(tc, _T("Not enough space in indexDir buffer"));

    sprintf(indexDir, "%s/RAMDirIndex", cl_tempDir);
    WhitespaceAnalyzer analyzer;
    IndexWriter * writer  = new IndexWriter(indexDir, &analyzer, true);

    // add some documents
    TCHAR * text;
    for (int i = 0; i < docsToAdd; i++) {
        Document doc;
        text = English::IntToEnglish(i);
        doc.add(* new Field(_T("content"), text, Field::STORE_YES | Field::INDEX_UNTOKENIZED));
        writer->addDocument(&doc);
        _CLDELETE_ARRAY(text);
    }

    CuAssertEquals(tc, docsToAdd, writer->docCount(), _T("document count"));
    writer->close();
    _CLDELETE( writer );
}
    void setUp()
    {
        TCHAR       tbuffer[16];
        const TCHAR*      data[] = 
        {   
            _T( "A 1 2 3 4 5 6" ),
            _T( "Z       4 5 6" ),
            NULL,
            _T( "B   2   4 5 6" ),
            _T( "Y     3   5 6" ),
            NULL,
            _T( "C     3     6" ),
            _T( "X       4 5 6" )
        };
        
        m_pSmall = _CLNEW RAMDirectory();
        Analyzer * pAnalyzer = _CLNEW WhitespaceAnalyzer();
        IndexWriter * pWriter = _CLNEW IndexWriter( m_pSmall, pAnalyzer, true );
        
        for( size_t i = 0; i < sizeof( data ) / sizeof( data[0] ); i++ )
        {
            _itot( i, tbuffer, 10 ); 
            Document doc;
            doc.add( * _CLNEW Field( _T( "id" ), tbuffer, Field::STORE_YES | Field::INDEX_UNTOKENIZED ));
            doc.add( * _CLNEW Field( _T( "all" ), _T( "all" ), Field::STORE_YES | Field::INDEX_UNTOKENIZED ));
            if( data[ i ] )
                doc.add( * _CLNEW Field( _T( "data" ), data[ i ], Field::STORE_YES | Field::INDEX_TOKENIZED ));

            pWriter->addDocument( &doc );
        }
        
        pWriter->optimize();
        pWriter->close();
        _CLDELETE( pWriter );
        _CLDELETE( pAnalyzer );
    }
Пример #12
0
    static int EIO_Index(eio_req* req) {
        index_baton_t* baton = static_cast<index_baton_t*>(req->data);

        lucene::analysis::standard::StandardAnalyzer an;
        IndexWriter* writer = 0;
        bool writerOpen = false;
        
        try {
          bool needsCreation = true;
          if (IndexReader::indexExists(*(*baton->index))) {
              if (IndexReader::isLocked(*(*baton->index))) {
                  IndexReader::unlock(*(*baton->index));
              }
              needsCreation = false;
          }
          writer = new IndexWriter(*(*baton->index), &an, needsCreation);
          writerOpen = true;
        
          // To bypass a possible exception (we have no idea what we will be indexing...)
          writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE
          // Turn this off to make indexing faster; we'll turn it on later before optimizing
          writer->setUseCompoundFile(false);
          uint64_t start = Misc::currentTimeMillis();

          writer->addDocument(baton->doc->document());
          
          // Make the index use as little files as possible, and optimize it
          writer->setUseCompoundFile(true);
          writer->optimize();

          baton->indexTime = (Misc::currentTimeMillis() - start);
        } catch (CLuceneError& E) {
          baton->error.assign(E.what());
        } catch(...) {
          baton->error = "Got an unknown exception";
        }
        
        // Close and clean up
        if (writerOpen == true) {
           writer->close();
        }       
        delete writer;
        //(*(*baton->index), &an, false);

        return 0;
    }
Пример #13
0
  static Handle<Value> IndexFiles(const Arguments& args)
  {
    HandleScope scope;
	IndexWriter* writer = NULL;
	lucene::analysis::WhitespaceAnalyzer an;
	
	if (IndexReader::indexExists(*String::Utf8Value(args[1])) ){
		if ( IndexReader::isLocked(*String::Utf8Value(args[1])) ){
			printf("Index was locked... unlocking it.\n");
			IndexReader::unlock(*String::Utf8Value(args[1]));
		}

		writer = _CLNEW IndexWriter( *String::Utf8Value(args[1]), &an, false);
	}else{
		writer = _CLNEW IndexWriter( *String::Utf8Value(args[1]) ,&an, true);
	}

    //writer->setInfoStream(&std::cout);

    // We can tell the writer to flush at certain occasions
    //writer->setRAMBufferSizeMB(0.5);
    //writer->setMaxBufferedDocs(3);

    // To bypass a possible exception (we have no idea what we will be indexing...)
    writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE
    
    // Turn this off to make indexing faster; we'll turn it on later before optimizing
    writer->setUseCompoundFile(false);

	uint64_t str = Misc::currentTimeMillis();

	indexDocs(writer, *String::Utf8Value(args[0]));
	
    // Make the index use as little files as possible, and optimize it
    writer->setUseCompoundFile(true);
    writer->optimize();
	
    // Close and clean up
    writer->close();
	_CLLDELETE(writer);

	printf("Indexing took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));IndexWriter(*String::Utf8Value(args[1]), &an, false);
    
    //Lucene* lucene = ObjectWrap::Unwrap<Lucene>(args.This());

    return scope.Close(String::New("foo"));
  }
Пример #14
0
/*
int
xesamquery(int argc, char** argv) {
    // parse arguments
    parseArguments(argc, argv);
    string backend = options['t'];
    string indexdir = options['d'];
    string ulfile = options['u'];
    string qlfile = options['q'];

    // check arguments: indexdir
    if (indexdir.length() == 0) {
        pe("Provide the directory with the index.\n");
        return usage(argc, argv);
    }

    if ((ulfile.length() == 0) && (qlfile.length() == 0)) {
        pe ("Provide at last one file containing the xesam query formulated ");
        pe ("with query language or user language.\n");
        return usage(argc, argv);
    }
    
    if (arguments.size() != 0) {
        pe("You do not have to provide other arguments.\n");
        return usage(argc, argv);
        return 1;
    }

    Xesam2Strigi xesam2strigi;

    if (qlfile.length() != 0) {
        printf ("processing xesam querylanguage query from file %s\n", qlfile.c_str());

        if (xesam2strigi.parse_file ( qlfile, Xesam2Strigi::QueryLanguage))
            printf ("Xesam query parsed successfully\n");
        else
        {
            pe ("error parsing query\n");
            return 1;
        }
    }
    else if (ulfile.length() != 0) {
      printf ("processing xesam userlanguage query from file %s\n", ulfile.c_str());

      if (xesam2strigi.parse_file ( ulfile, Xesam2Strigi::UserLanguage))
        printf ("Xesam query parsed successfully\n");
      else
      {
        pe ("error parsing query\n");
        return 1;
      }

    }
    
    // create an index manager
    IndexManager* manager = getIndexManager(backend, indexdir);
    if (manager == 0) {
        return usage(argc, argv);
    }
    IndexReader* reader = manager->indexReader();

    unsigned int results = 0;
    const uint batchsize = 10;
    Query query = xesam2strigi.query();
    vector<IndexedDocument> matches = reader->query(query, 0, batchsize);

    if (matches.size() != 0) {
        printf ("Search results:\n");
    } else {
        printf ("No results for search\n");
    }

    while (matches.size() > 0) {
        for (vector<IndexedDocument>::iterator it = matches.begin();
            it != matches.end(); ++it) {
                printf ("\"%s\" matched\n", it->uri.c_str());
                printIndexedDocument(*it);
            }

        results += (int)matches.size();

        if (matches.size() == batchsize) {
            // maybe there're other results
            matches = reader->query(query, results + 1, 10);
        } else {
            matches.clear(); // force while() exit
        }
    }

    if (results != 0)
        printf ("Query returned %i results\n", results);

    IndexPluginLoader::deleteIndexManager(manager);
    return 0;
}
*/
int
deindex(int argc, char** argv) {
    // parse arguments
    parseArguments(argc, argv);
    string backend = options['t'];
    string indexdir = options['d'];

    // check arguments: indexdir
    if (indexdir.length() == 0) {
        pe("Provide the directory with the index.\n");
        return usage(argc, argv);
    }

    // check arguments: dirs
    if (arguments.size() == 0) {
        pe("'%s' '%s'\n", backend.c_str(), indexdir.c_str());
        pe("Provide directories/files to deindex.\n");
        return usage(argc, argv);
    }
    
    AnalyzerConfiguration config;
    
    // create an index manager
    IndexManager* manager = getIndexManager(backend, indexdir);
    if (manager == 0) {
        return usage(argc, argv);
    }
    
    // retrieve all indexed files at level 0 (archives contents aren't returned)
    map<string, time_t> indexedFiles;
/* TODO: make this code not use indexReader->files(0)

    IndexReader* indexReader = manager->indexReader();
    indexedFiles = indexReader->files(0);
*/
    vector<string> toDelete;
    
    for (vector<string>::iterator iter = arguments.begin();
         iter != arguments.end(); ++iter)
    {
        // find all indexed files whose path starts with *iter
        FindIndexedFiles match (*iter);
        map<string, time_t>::iterator itBeg, itEnd, itMatch;
        itBeg = indexedFiles.begin();
        itEnd = indexedFiles.end();
        
        itMatch = find_if (itBeg, itEnd, match);
        while (itMatch != itEnd)
        {
            toDelete.push_back (itMatch->first);
            itBeg = ++itMatch;
            itMatch = find_if (itBeg, itEnd, match);
        }
    }
    
    if (toDelete.empty())
        printf ("no file will be deindexed\n");
    else
    {
        for (vector<string>::iterator iter = toDelete.begin();
             iter != toDelete.end(); ++iter)
        {
            printf ("%s will be deindex\n", iter->c_str());
        }
        IndexWriter* writer = manager->indexWriter();
        writer->deleteEntries(toDelete);
        writer->commit();
        writer->optimize();
    }
    
    IndexPluginLoader::deleteIndexManager(manager);
    return 0;
}
Пример #15
0
int ZefaniaLex::buildIndex()
{
    DEBUG_FUNC_NAME;

    myDebug() << "building index!!!";
    QFile file(m_modulePath);
    Document indexdoc;
    const QString index = indexPath();
    QDir dir("/");
    dir.mkpath(index);


    m_refText.setSettings(m_settings);
    IndexWriter* writer = nullptr;
    const TCHAR* stop_words[] = { nullptr };
    standard::StandardAnalyzer an(stop_words);

    //open the xml file
    if(!file.open(QFile::ReadOnly | QFile::Text))
        return 1;
    m_xml = new QXmlStreamReader(&file);

    try {

        if(IndexReader::indexExists(index.toStdString().c_str())) {
            if(IndexReader::isLocked(index.toStdString().c_str())) {
                myDebug() << "Index was locked... unlocking it.";
                IndexReader::unlock(index.toStdString().c_str());
            }
        }
        writer = new IndexWriter(index.toStdString().c_str() , &an, true);

        writer->setMaxFieldLength(0x7FFFFFFFL);
        writer->setUseCompoundFile(false);

        TCHAR *buffer = SearchTools::createBuffer();

        if(m_xml->readNextStartElement()) {
            if(cmp(m_xml->name(), "dictionary")) {
                while(m_xml->readNextStartElement()) {
                    if(cmp(m_xml->name(), "item")) {
                        QString content;
                        const QString key = m_xml->attributes().value("id").toString();
                        bool hasTitle = false;
                        while(true) {
                            m_xml->readNext();

                            if(m_xml->tokenType() == QXmlStreamReader::EndElement && (cmp(m_xml->name(), QLatin1String("item"))))
                                break;

                            if(m_xml->tokenType() == QXmlStreamReader::Characters) {
                                content += m_xml->text().toString();
                            } else if(cmp(m_xml->name(), "title")) {
                                const QString title = parseTitle();
                                content += "<h3 class='title'>" + key;
                                if(!title.isEmpty()) {
                                    content.append(" - " +  title);
                                }
                                content.append("</h3>");
                                hasTitle = true;
                            } else if(cmp(m_xml->name(), "transliteration")) {
                                const QString trans = parseTrans();
                                if(!trans.isEmpty()) {
                                    content += "<span class='transliteration'>" + trans + "</span>" ;
                                }
                            }  else if(cmp(m_xml->name(), "pronunciation")) {
                                const QString pr = parsePron();
                                if(!pr.isEmpty()) {
                                    content += "<span class='pronunciation'>" + pr + "</span>";
                                }
                            } else if(cmp(m_xml->name(), "description")) {
                                content += "<span class='description'>" + parseDesc() + "</span>";
                            } else {
                                content += m_xml->readElementText(QXmlStreamReader::IncludeChildElements);
                            }
                        }
                        if(!hasTitle) {
                            content.prepend("<h3 class='title'>" + key + "</h3>");
                        }
                        indexdoc.clear();
                        indexdoc.add(*_CLNEW Field(_T("key"), SearchTools::toTCHAR(key, buffer), Field::STORE_YES |  Field::INDEX_TOKENIZED));
                        indexdoc.add(*_CLNEW Field(_T("content"), SearchTools::toTCHAR(content, buffer), Field::STORE_YES |  Field::INDEX_TOKENIZED));
                        writer->addDocument(&indexdoc);

                    } else {
                        m_xml->skipCurrentElement();
                    }
                }
            } else {
                myWarning() << "not a file";
            }
        }

        writer->setUseCompoundFile(true);
        writer->optimize();

        writer->close();
        delete writer;
    }
    catch(...) {
    }

    file.close();
    delete m_xml;
    m_xml = nullptr;
    return 0;
}
Пример #16
0
/// TestBooleanPrefixQuery.java, ported 5/9/2009
void testBooleanPrefixQuery(CuTest* tc) {
    RAMDirectory directory;
    WhitespaceAnalyzer a;

    TCHAR* categories[] = {_T("food"), _T("foodanddrink"),
        _T("foodanddrinkandgoodtimes"), _T("food and drink"), NULL};

    Query* rw1 = NULL;
    Query* rw2 = NULL;
    try {
        IndexWriter* writer = _CLNEW IndexWriter(&directory, &a, true);
        for (size_t i = 0; categories[i]!=NULL; i++) {
            Document* doc = new Document();
            doc->add(*_CLNEW Field(_T("category"), categories[i], Field::STORE_YES | Field::INDEX_UNTOKENIZED));
            writer->addDocument(doc);
            _CLLDELETE(doc);
        }
        writer->close();
        _CLLDELETE(writer);

        IndexReader* reader = IndexReader::open(&directory);
        Term* t = _CLNEW Term(_T("category"), _T("foo"));
        PrefixQuery* query = _CLNEW PrefixQuery(t);
        _CLDECDELETE(t);

        rw1 = query->rewrite(reader);

        BooleanQuery* bq = _CLNEW BooleanQuery();
        bq->add(query, true, BooleanClause::MUST);

        rw2 = bq->rewrite(reader);

        reader->close(); // TODO: check necessity (_CLLDELETE(reader) alone will not do the same cleanup)

        _CLLDELETE(reader);
        _CLLDELETE(bq);
    } catch (CLuceneError& e) {
        CuFail(tc, e.twhat());
    }

    BooleanQuery* bq1 = NULL;
    if (rw1->instanceOf(BooleanQuery::getClassName())) {
        bq1 = (BooleanQuery*) rw1;
    }

    BooleanQuery* bq2 = NULL;
    if (rw2->instanceOf(BooleanQuery::getClassName())) {
        bq2 = (BooleanQuery*) rw2;
    } else {
        CuFail(tc, _T("Rewrite"));
    }

    bool bClausesMatch = bq1->getClauseCount() == bq2->getClauseCount();

    _CLLDELETE(rw1);
    _CLLDELETE(rw2);

    if (!bClausesMatch) {
        CuFail(tc, _T("Number of Clauses Mismatch"));
    }
}
Пример #17
0
int BibleQuoteDict::buildIndex()
{
    DEBUG_FUNC_NAME

    // parse both and add docs to the indexwriter
    //myDebug() << m_modulePath;
    QFileInfo fileInfo(m_modulePath);
    //myDebug() << fileInfo.absoluteDir();
    QDir moduleDir(fileInfo.absoluteDir());
    moduleDir.setFilter(QDir::Files);
    QFileInfoList list = moduleDir.entryInfoList();

    QFileInfo htmlFileInfo;
    foreach(const QFileInfo & info, list) {
        if((info.suffix().compare("html", Qt::CaseInsensitive) == 0 || info.suffix().compare("htm", Qt::CaseInsensitive) == 0) && info.baseName().compare(fileInfo.baseName(), Qt::CaseInsensitive) == 0) {
            htmlFileInfo = info;
            break;
        }
    }
    //myDebug() << htmlFileInfo.absoluteFilePath();
    if(!htmlFileInfo.isReadable() || !fileInfo.isReadable()) {
        myWarning() << "cannot open file to build index";
        //todo: qmessagebox
        return 1;
    }

    QFile configFile(fileInfo.absoluteFilePath());
    QFile htmlFile(htmlFileInfo.absoluteFilePath());

    const QString encoding = m_settings->encoding;
    QTextCodec *codec = QTextCodec::codecForName(encoding.toStdString().c_str());

    if(!configFile.open(QIODevice::ReadOnly | QIODevice::Text) || !htmlFile.open(QIODevice::ReadOnly | QIODevice::Text)) {
        myWarning() << "cannot open file to build index";
        return 1;
    }
    QTextStream configIn(&configFile);
    configIn.setCodec(codec);
    QTextStream htmlIn(&htmlFile);
    htmlIn.setCodec(codec);

    const QString index = indexPath();
    QDir dir("/");
    dir.mkpath(index);
    QProgressDialog progress(QObject::tr("Build index"), QObject::tr("Cancel"), 0, 0);
    progress.setWindowModality(Qt::WindowModal);

    IndexWriter* writer = NULL;
    const TCHAR* stop_words[] = { NULL };
    standard::StandardAnalyzer an(stop_words);

    if(IndexReader::indexExists(index.toStdString().c_str())) {
        if(IndexReader::isLocked(index.toStdString().c_str())) {
            myDebug() << "Index was locked... unlocking it.";
            IndexReader::unlock(index.toStdString().c_str());
        }
    }
    writer = new IndexWriter(index.toStdString().c_str() , &an, true);

    //writer->setMaxFieldLength(0x7FFFFFFFL);
    writer->setUseCompoundFile(false);

    //index
    Document indexdoc;

    const QString title = configIn.readLine();
    QString id = configIn.readLine();
    long num = configIn.readLine().toLong();
    const QString pre = htmlIn.read(num - 1);
    myDebug() << title << pre;
    TCHAR *buffer = SearchTools::createBuffer();
    while(!configIn.atEnd()) {
        long n = num;
        const QString key = id;
        id = configIn.readLine();
        num = configIn.readLine().toLong();
        const QString data = htmlIn.read(num - n - 1);
        if(key.isEmpty() || data.isEmpty())
            continue;

        indexdoc.clear();

        indexdoc.add(*_CLNEW Field(_T("key"), SearchTools::toTCHAR(key, buffer), Field::STORE_YES |  Field::INDEX_TOKENIZED));
        indexdoc.add(*_CLNEW Field(_T("content"), SearchTools::toTCHAR(data, buffer), Field::STORE_YES |  Field::INDEX_TOKENIZED));

        writer->addDocument(&indexdoc);

    }

    writer->setUseCompoundFile(true);
    writer->optimize();

    writer->close();
    _CLLDELETE(writer);
    return 0;
}
Пример #18
0
 void put(SerialNum serialNum, const search::DocumentIdT lid) {
     iw.put(serialNum, *dummyDoc, lid);
     iw.commit(serialNum, std::shared_ptr<IDestructorCallback>());
 }
Пример #19
0
 void remove(SerialNum serialNum, const search::DocumentIdT lid) {
     iw.remove(serialNum, lid);
     iw.commit(serialNum, std::shared_ptr<IDestructorCallback>());
 }
Пример #20
0
// IndexWriter methods
void addDocument1(IndexWriter & i, lucene::document::Document * d) 
{ i.addDocument(d); }
Пример #21
0
MetaInfo ZefaniaLex::buildIndexFromXmlDoc(KoXmlDocument *xmldoc)
{
    try {

    MetaInfo info;
    int couldBe = 0;//1 = RMac

    Document indexdoc;
    const QString index = indexPath();
    QString fileTitle;
    QString uid;
    QString type;

    QDir dir("/");
    dir.mkpath(index);

    RefText refText;
    refText.setSettings(m_settings);

    IndexWriter* writer = NULL;
    const TCHAR* stop_words[] = { NULL };
    standard::StandardAnalyzer an(stop_words);

    if(IndexReader::indexExists(index.toStdString().c_str())) {
        if(IndexReader::isLocked(index.toStdString().c_str())) {
            myDebug() << "Index was locked... unlocking it.";
            IndexReader::unlock(index.toStdString().c_str());
        }
    }
    writer = new IndexWriter(index.toStdString().c_str() , &an, true);

    writer->setMaxFieldLength(0x7FFFFFFFL);
    writer->setUseCompoundFile(false);

    KoXmlNode item = xmldoc->documentElement().firstChild();
    type = xmldoc->documentElement().toElement().attribute("type", "");

    for(int c = 0; !item.isNull();) {
        QString key = "";
        QString title = "";
        QString trans = "";
        QString pron = "";
        QString desc = "";
        KoXmlElement e = item.toElement();
        if(e.tagName().compare("INFORMATION", Qt::CaseInsensitive) == 0) {
            KoXmlNode title = item.namedItem("subject");
            KoXmlNode identifer = item.namedItem("identifier");

            fileTitle = title.toElement().text();
            uid = identifer.toElement().text();

        } else if(e.tagName().compare("item", Qt::CaseInsensitive) == 0) {
            key = e.attribute("id");
            KoXmlNode details = item.firstChild();
            while(!details.isNull()) {
                KoXmlElement edetails = details.toElement();
                if(edetails.tagName().compare("title", Qt::CaseInsensitive) == 0) {
                    title = edetails.text();
                } else if(edetails.tagName().compare("transliteration", Qt::CaseInsensitive) == 0) {
                    trans = edetails.text();
                } else if(edetails.tagName().compare("pronunciation", Qt::CaseInsensitive) == 0) {
                    KoXmlNode em = details.firstChild();
                    while(!em.isNull()) {
                        if(em.toElement().tagName().compare("em", Qt::CaseInsensitive) == 0)
                            pron = "<em>" + em.toElement().text() + "</em>";
                        em = em.nextSibling();
                    }
                } else if(edetails.tagName().compare("description", Qt::CaseInsensitive) == 0) {
                    KoXmlNode descNode = details.firstChild();
                    while(!descNode.isNull()) {
                        if(descNode.nodeType() == 2) {
                            desc += descNode.toText().data();
                        } else if(descNode.nodeType() == 1) {
                            KoXmlElement descElement = descNode.toElement();
                            if(descElement.tagName().compare("reflink", Qt::CaseInsensitive) == 0) {
                                if(descElement.hasAttribute("mscope")) {
                                    const QString mscope = descElement.attribute("mscope", ";;;");

                                    VerseUrl url;
                                    url.fromMscope(mscope);

                                    desc += " <a href=\"" + url.toString() + "\">" + refText.toString(url) + "</a> ";
                                } else if(descElement.hasAttribute("target")) {
                                    desc += descElement.text();
                                }

                            } else if(descElement.tagName().compare("see", Qt::CaseInsensitive) == 0) {
                                const QString target = descElement.attribute("target", "");
                                //todo: currently we assume target = x-self
                                StrongUrl url;
                                bool ok = url.fromText(descElement.text());
                                if(ok)
                                    desc += " <a href=\"" + url.toString() + "\">" + descElement.text() + "</a> ";
                            }
                        }

                        descNode = descNode.nextSibling();
                    }
                    desc += "<hr />";
                }
                details = details.nextSibling();
            }
            if(couldBe == 0) {
                if(key.toUpper() == "A-APF" || key.toUpper() == "X-NSN" || key.toUpper() == "V-PAP-DPN") {
                    couldBe = 1;
                }
            }
            QString content = "<h3 class='strongTitle'>" + key + " - " + title + "</h3>";
            if(!trans.isEmpty()) {
                content += " (" + trans + ") ";
            }
            if(!pron.isEmpty()) {
                content += " [" + pron + "] ";
            }
            content += "<br />" + desc;
            indexdoc.clear();
#ifdef OBV_USE_WSTRING
            indexdoc.add(*_CLNEW Field(_T("key"), key.toStdWString().c_str(), Field::STORE_YES |  Field::INDEX_TOKENIZED));
            indexdoc.add(*_CLNEW Field(_T("content"), content.toStdWString().c_str(), Field::STORE_YES |  Field::INDEX_TOKENIZED));
#else
            indexdoc.add(*_CLNEW Field(_T("key"), reinterpret_cast<const wchar_t *>(key.utf16()), Field::STORE_YES |  Field::INDEX_TOKENIZED));
            indexdoc.add(*_CLNEW Field(_T("content"), reinterpret_cast<const wchar_t *>(content.utf16()), Field::STORE_YES |  Field::INDEX_TOKENIZED));
#endif
            writer->addDocument(&indexdoc);

        }
        item = item.nextSibling();
        c++;
    }
    writer->setUseCompoundFile(true);
    writer->optimize();

    writer->close();
    delete writer;
    info.setName(fileTitle);
    info.setUID(uid);
    if(type == "x-strong") {
        info.setDefaultModule(OBVCore::DefaultStrongDictModule);
        info.setContent(OBVCore::StrongsContent);
    } else if(type == "x-dictionary") {
        if(couldBe == 1) {
            info.setDefaultModule(OBVCore::DefaultRMACDictModule);
            info.setContent(OBVCore::RMacContent);
        } else {
            info.setDefaultModule(OBVCore::DefaultDictModule);
        }
    }
    return info;
    }
    catch(...) {
        return MetaInfo();
    }
}