static Handle<Value> IndexText(const Arguments& args) { HandleScope scope; IndexWriter* writer = NULL; lucene::analysis::WhitespaceAnalyzer an; if (IndexReader::indexExists(*String::Utf8Value(args[2])) ){ if ( IndexReader::isLocked(*String::Utf8Value(args[2])) ){ printf("Index was locked... unlocking it.\n"); IndexReader::unlock(*String::Utf8Value(args[2])); } writer = _CLNEW IndexWriter( *String::Utf8Value(args[2]), &an, false); }else{ writer = _CLNEW IndexWriter( *String::Utf8Value(args[2]) ,&an, true); } // We can tell the writer to flush at certain occasions //writer->setRAMBufferSizeMB(0.5); //writer->setMaxBufferedDocs(3); // To bypass a possible exception (we have no idea what we will be indexing...) writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE // Turn this off to make indexing faster; we'll turn it on later before optimizing writer->setUseCompoundFile(false); uint64_t str = Misc::currentTimeMillis(); Document doc; doc.clear(); TCHAR path[CL_MAX_DIR]; STRCPY_AtoT(path,*String::Utf8Value(args[0]),CL_MAX_DIR); TCHAR contents[CL_MAX_DIR]; STRCPY_AtoT(contents,*String::Utf8Value(args[1]),CL_MAX_DIR); (&doc)->add( *_CLNEW Field(_T("path"), path, Field::STORE_YES | Field::INDEX_UNTOKENIZED ) ); (&doc)->add( *_CLNEW Field(_T("contents"), contents, Field::STORE_YES | Field::INDEX_TOKENIZED) ); writer->addDocument( &doc ); // Make the index use as little files as possible, and optimize it writer->setUseCompoundFile(true); writer->optimize(); // Close and clean up writer->close(); _CLLDELETE(writer); printf("Indexing took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));IndexWriter(*String::Utf8Value(args[2]), &an, false); return scope.Close(Undefined()); }
static Handle<Value> IndexFiles(const Arguments& args) { HandleScope scope; IndexWriter* writer = NULL; lucene::analysis::WhitespaceAnalyzer an; if (IndexReader::indexExists(*String::Utf8Value(args[1])) ){ if ( IndexReader::isLocked(*String::Utf8Value(args[1])) ){ printf("Index was locked... unlocking it.\n"); IndexReader::unlock(*String::Utf8Value(args[1])); } writer = _CLNEW IndexWriter( *String::Utf8Value(args[1]), &an, false); }else{ writer = _CLNEW IndexWriter( *String::Utf8Value(args[1]) ,&an, true); } //writer->setInfoStream(&std::cout); // We can tell the writer to flush at certain occasions //writer->setRAMBufferSizeMB(0.5); //writer->setMaxBufferedDocs(3); // To bypass a possible exception (we have no idea what we will be indexing...) writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE // Turn this off to make indexing faster; we'll turn it on later before optimizing writer->setUseCompoundFile(false); uint64_t str = Misc::currentTimeMillis(); indexDocs(writer, *String::Utf8Value(args[0])); // Make the index use as little files as possible, and optimize it writer->setUseCompoundFile(true); writer->optimize(); // Close and clean up writer->close(); _CLLDELETE(writer); printf("Indexing took: %d ms.\n\n", (int32_t)(Misc::currentTimeMillis() - str));IndexWriter(*String::Utf8Value(args[1]), &an, false); //Lucene* lucene = ObjectWrap::Unwrap<Lucene>(args.This()); return scope.Close(String::New("foo")); }
static int EIO_Index(eio_req* req) { index_baton_t* baton = static_cast<index_baton_t*>(req->data); lucene::analysis::standard::StandardAnalyzer an; IndexWriter* writer = 0; bool writerOpen = false; try { bool needsCreation = true; if (IndexReader::indexExists(*(*baton->index))) { if (IndexReader::isLocked(*(*baton->index))) { IndexReader::unlock(*(*baton->index)); } needsCreation = false; } writer = new IndexWriter(*(*baton->index), &an, needsCreation); writerOpen = true; // To bypass a possible exception (we have no idea what we will be indexing...) writer->setMaxFieldLength(0x7FFFFFFFL); // LUCENE_INT32_MAX_SHOULDBE // Turn this off to make indexing faster; we'll turn it on later before optimizing writer->setUseCompoundFile(false); uint64_t start = Misc::currentTimeMillis(); writer->addDocument(baton->doc->document()); // Make the index use as little files as possible, and optimize it writer->setUseCompoundFile(true); writer->optimize(); baton->indexTime = (Misc::currentTimeMillis() - start); } catch (CLuceneError& E) { baton->error.assign(E.what()); } catch(...) { baton->error = "Got an unknown exception"; } // Close and clean up if (writerOpen == true) { writer->close(); } delete writer; //(*(*baton->index), &an, false); return 0; }
MetaInfo ZefaniaLex::buildIndexFromXmlDoc(KoXmlDocument *xmldoc) { try { MetaInfo info; int couldBe = 0;//1 = RMac Document indexdoc; const QString index = indexPath(); QString fileTitle; QString uid; QString type; QDir dir("/"); dir.mkpath(index); RefText refText; refText.setSettings(m_settings); IndexWriter* writer = NULL; const TCHAR* stop_words[] = { NULL }; standard::StandardAnalyzer an(stop_words); if(IndexReader::indexExists(index.toStdString().c_str())) { if(IndexReader::isLocked(index.toStdString().c_str())) { myDebug() << "Index was locked... unlocking it."; IndexReader::unlock(index.toStdString().c_str()); } } writer = new IndexWriter(index.toStdString().c_str() , &an, true); writer->setMaxFieldLength(0x7FFFFFFFL); writer->setUseCompoundFile(false); KoXmlNode item = xmldoc->documentElement().firstChild(); type = xmldoc->documentElement().toElement().attribute("type", ""); for(int c = 0; !item.isNull();) { QString key = ""; QString title = ""; QString trans = ""; QString pron = ""; QString desc = ""; KoXmlElement e = item.toElement(); if(e.tagName().compare("INFORMATION", Qt::CaseInsensitive) == 0) { KoXmlNode title = item.namedItem("subject"); KoXmlNode identifer = item.namedItem("identifier"); fileTitle = title.toElement().text(); uid = identifer.toElement().text(); } else if(e.tagName().compare("item", Qt::CaseInsensitive) == 0) { key = e.attribute("id"); KoXmlNode details = item.firstChild(); while(!details.isNull()) { KoXmlElement edetails = details.toElement(); if(edetails.tagName().compare("title", Qt::CaseInsensitive) == 0) { title = edetails.text(); } else if(edetails.tagName().compare("transliteration", Qt::CaseInsensitive) == 0) { trans = edetails.text(); } else if(edetails.tagName().compare("pronunciation", Qt::CaseInsensitive) == 0) { KoXmlNode em = details.firstChild(); while(!em.isNull()) { if(em.toElement().tagName().compare("em", Qt::CaseInsensitive) == 0) pron = "<em>" + em.toElement().text() + "</em>"; em = em.nextSibling(); } } else if(edetails.tagName().compare("description", Qt::CaseInsensitive) == 0) { KoXmlNode descNode = details.firstChild(); while(!descNode.isNull()) { if(descNode.nodeType() == 2) { desc += descNode.toText().data(); } else if(descNode.nodeType() == 1) { KoXmlElement descElement = descNode.toElement(); if(descElement.tagName().compare("reflink", Qt::CaseInsensitive) == 0) { if(descElement.hasAttribute("mscope")) { const QString mscope = descElement.attribute("mscope", ";;;"); VerseUrl url; url.fromMscope(mscope); desc += " <a href=\"" + url.toString() + "\">" + refText.toString(url) + "</a> "; } else if(descElement.hasAttribute("target")) { desc += descElement.text(); } } else if(descElement.tagName().compare("see", Qt::CaseInsensitive) == 0) { const QString target = descElement.attribute("target", ""); //todo: currently we assume target = x-self StrongUrl url; bool ok = url.fromText(descElement.text()); if(ok) desc += " <a href=\"" + url.toString() + "\">" + descElement.text() + "</a> "; } } descNode = descNode.nextSibling(); } desc += "<hr />"; } details = details.nextSibling(); } if(couldBe == 0) { if(key.toUpper() == "A-APF" || key.toUpper() == "X-NSN" || key.toUpper() == "V-PAP-DPN") { couldBe = 1; } } QString content = "<h3 class='strongTitle'>" + key + " - " + title + "</h3>"; if(!trans.isEmpty()) { content += " (" + trans + ") "; } if(!pron.isEmpty()) { content += " [" + pron + "] "; } content += "<br />" + desc; indexdoc.clear(); #ifdef OBV_USE_WSTRING indexdoc.add(*_CLNEW Field(_T("key"), key.toStdWString().c_str(), Field::STORE_YES | Field::INDEX_TOKENIZED)); indexdoc.add(*_CLNEW Field(_T("content"), content.toStdWString().c_str(), Field::STORE_YES | Field::INDEX_TOKENIZED)); #else indexdoc.add(*_CLNEW Field(_T("key"), reinterpret_cast<const wchar_t *>(key.utf16()), Field::STORE_YES | Field::INDEX_TOKENIZED)); indexdoc.add(*_CLNEW Field(_T("content"), reinterpret_cast<const wchar_t *>(content.utf16()), Field::STORE_YES | Field::INDEX_TOKENIZED)); #endif writer->addDocument(&indexdoc); } item = item.nextSibling(); c++; } writer->setUseCompoundFile(true); writer->optimize(); writer->close(); delete writer; info.setName(fileTitle); info.setUID(uid); if(type == "x-strong") { info.setDefaultModule(OBVCore::DefaultStrongDictModule); info.setContent(OBVCore::StrongsContent); } else if(type == "x-dictionary") { if(couldBe == 1) { info.setDefaultModule(OBVCore::DefaultRMACDictModule); info.setContent(OBVCore::RMacContent); } else { info.setDefaultModule(OBVCore::DefaultDictModule); } } return info; } catch(...) { return MetaInfo(); } }
int ZefaniaLex::buildIndex() { DEBUG_FUNC_NAME; myDebug() << "building index!!!"; QFile file(m_modulePath); Document indexdoc; const QString index = indexPath(); QDir dir("/"); dir.mkpath(index); m_refText.setSettings(m_settings); IndexWriter* writer = nullptr; const TCHAR* stop_words[] = { nullptr }; standard::StandardAnalyzer an(stop_words); //open the xml file if(!file.open(QFile::ReadOnly | QFile::Text)) return 1; m_xml = new QXmlStreamReader(&file); try { if(IndexReader::indexExists(index.toStdString().c_str())) { if(IndexReader::isLocked(index.toStdString().c_str())) { myDebug() << "Index was locked... unlocking it."; IndexReader::unlock(index.toStdString().c_str()); } } writer = new IndexWriter(index.toStdString().c_str() , &an, true); writer->setMaxFieldLength(0x7FFFFFFFL); writer->setUseCompoundFile(false); TCHAR *buffer = SearchTools::createBuffer(); if(m_xml->readNextStartElement()) { if(cmp(m_xml->name(), "dictionary")) { while(m_xml->readNextStartElement()) { if(cmp(m_xml->name(), "item")) { QString content; const QString key = m_xml->attributes().value("id").toString(); bool hasTitle = false; while(true) { m_xml->readNext(); if(m_xml->tokenType() == QXmlStreamReader::EndElement && (cmp(m_xml->name(), QLatin1String("item")))) break; if(m_xml->tokenType() == QXmlStreamReader::Characters) { content += m_xml->text().toString(); } else if(cmp(m_xml->name(), "title")) { const QString title = parseTitle(); content += "<h3 class='title'>" + key; if(!title.isEmpty()) { content.append(" - " + title); } content.append("</h3>"); hasTitle = true; } else if(cmp(m_xml->name(), "transliteration")) { const QString trans = parseTrans(); if(!trans.isEmpty()) { content += "<span class='transliteration'>" + trans + "</span>" ; } } else if(cmp(m_xml->name(), "pronunciation")) { const QString pr = parsePron(); if(!pr.isEmpty()) { content += "<span class='pronunciation'>" + pr + "</span>"; } } else if(cmp(m_xml->name(), "description")) { content += "<span class='description'>" + parseDesc() + "</span>"; } else { content += m_xml->readElementText(QXmlStreamReader::IncludeChildElements); } } if(!hasTitle) { content.prepend("<h3 class='title'>" + key + "</h3>"); } indexdoc.clear(); indexdoc.add(*_CLNEW Field(_T("key"), SearchTools::toTCHAR(key, buffer), Field::STORE_YES | Field::INDEX_TOKENIZED)); indexdoc.add(*_CLNEW Field(_T("content"), SearchTools::toTCHAR(content, buffer), Field::STORE_YES | Field::INDEX_TOKENIZED)); writer->addDocument(&indexdoc); } else { m_xml->skipCurrentElement(); } } } else { myWarning() << "not a file"; } } writer->setUseCompoundFile(true); writer->optimize(); writer->close(); delete writer; } catch(...) { } file.close(); delete m_xml; m_xml = nullptr; return 0; }