Beispiel #1
0
MetaInfo ZefaniaLex::buildIndexFromXmlDoc(KoXmlDocument *xmldoc)
{
    try {

    MetaInfo info;
    int couldBe = 0;//1 = RMac

    Document indexdoc;
    const QString index = indexPath();
    QString fileTitle;
    QString uid;
    QString type;

    QDir dir("/");
    dir.mkpath(index);

    RefText refText;
    refText.setSettings(m_settings);

    IndexWriter* writer = NULL;
    const TCHAR* stop_words[] = { NULL };
    standard::StandardAnalyzer an(stop_words);

    if(IndexReader::indexExists(index.toStdString().c_str())) {
        if(IndexReader::isLocked(index.toStdString().c_str())) {
            myDebug() << "Index was locked... unlocking it.";
            IndexReader::unlock(index.toStdString().c_str());
        }
    }
    writer = new IndexWriter(index.toStdString().c_str() , &an, true);

    writer->setMaxFieldLength(0x7FFFFFFFL);
    writer->setUseCompoundFile(false);

    KoXmlNode item = xmldoc->documentElement().firstChild();
    type = xmldoc->documentElement().toElement().attribute("type", "");

    for(int c = 0; !item.isNull();) {
        QString key = "";
        QString title = "";
        QString trans = "";
        QString pron = "";
        QString desc = "";
        KoXmlElement e = item.toElement();
        if(e.tagName().compare("INFORMATION", Qt::CaseInsensitive) == 0) {
            KoXmlNode title = item.namedItem("subject");
            KoXmlNode identifer = item.namedItem("identifier");

            fileTitle = title.toElement().text();
            uid = identifer.toElement().text();

        } else if(e.tagName().compare("item", Qt::CaseInsensitive) == 0) {
            key = e.attribute("id");
            KoXmlNode details = item.firstChild();
            while(!details.isNull()) {
                KoXmlElement edetails = details.toElement();
                if(edetails.tagName().compare("title", Qt::CaseInsensitive) == 0) {
                    title = edetails.text();
                } else if(edetails.tagName().compare("transliteration", Qt::CaseInsensitive) == 0) {
                    trans = edetails.text();
                } else if(edetails.tagName().compare("pronunciation", Qt::CaseInsensitive) == 0) {
                    KoXmlNode em = details.firstChild();
                    while(!em.isNull()) {
                        if(em.toElement().tagName().compare("em", Qt::CaseInsensitive) == 0)
                            pron = "<em>" + em.toElement().text() + "</em>";
                        em = em.nextSibling();
                    }
                } else if(edetails.tagName().compare("description", Qt::CaseInsensitive) == 0) {
                    KoXmlNode descNode = details.firstChild();
                    while(!descNode.isNull()) {
                        if(descNode.nodeType() == 2) {
                            desc += descNode.toText().data();
                        } else if(descNode.nodeType() == 1) {
                            KoXmlElement descElement = descNode.toElement();
                            if(descElement.tagName().compare("reflink", Qt::CaseInsensitive) == 0) {
                                if(descElement.hasAttribute("mscope")) {
                                    const QString mscope = descElement.attribute("mscope", ";;;");

                                    VerseUrl url;
                                    url.fromMscope(mscope);

                                    desc += " <a href=\"" + url.toString() + "\">" + refText.toString(url) + "</a> ";
                                } else if(descElement.hasAttribute("target")) {
                                    desc += descElement.text();
                                }

                            } else if(descElement.tagName().compare("see", Qt::CaseInsensitive) == 0) {
                                const QString target = descElement.attribute("target", "");
                                //todo: currently we assume target = x-self
                                StrongUrl url;
                                bool ok = url.fromText(descElement.text());
                                if(ok)
                                    desc += " <a href=\"" + url.toString() + "\">" + descElement.text() + "</a> ";
                            }
                        }

                        descNode = descNode.nextSibling();
                    }
                    desc += "<hr />";
                }
                details = details.nextSibling();
            }
            if(couldBe == 0) {
                if(key.toUpper() == "A-APF" || key.toUpper() == "X-NSN" || key.toUpper() == "V-PAP-DPN") {
                    couldBe = 1;
                }
            }
            QString content = "<h3 class='strongTitle'>" + key + " - " + title + "</h3>";
            if(!trans.isEmpty()) {
                content += " (" + trans + ") ";
            }
            if(!pron.isEmpty()) {
                content += " [" + pron + "] ";
            }
            content += "<br />" + desc;
            indexdoc.clear();
#ifdef OBV_USE_WSTRING
            indexdoc.add(*_CLNEW Field(_T("key"), key.toStdWString().c_str(), Field::STORE_YES |  Field::INDEX_TOKENIZED));
            indexdoc.add(*_CLNEW Field(_T("content"), content.toStdWString().c_str(), Field::STORE_YES |  Field::INDEX_TOKENIZED));
#else
            indexdoc.add(*_CLNEW Field(_T("key"), reinterpret_cast<const wchar_t *>(key.utf16()), Field::STORE_YES |  Field::INDEX_TOKENIZED));
            indexdoc.add(*_CLNEW Field(_T("content"), reinterpret_cast<const wchar_t *>(content.utf16()), Field::STORE_YES |  Field::INDEX_TOKENIZED));
#endif
            writer->addDocument(&indexdoc);

        }
        item = item.nextSibling();
        c++;
    }
    writer->setUseCompoundFile(true);
    writer->optimize();

    writer->close();
    delete writer;
    info.setName(fileTitle);
    info.setUID(uid);
    if(type == "x-strong") {
        info.setDefaultModule(OBVCore::DefaultStrongDictModule);
        info.setContent(OBVCore::StrongsContent);
    } else if(type == "x-dictionary") {
        if(couldBe == 1) {
            info.setDefaultModule(OBVCore::DefaultRMACDictModule);
            info.setContent(OBVCore::RMacContent);
        } else {
            info.setDefaultModule(OBVCore::DefaultDictModule);
        }
    }
    return info;
    }
    catch(...) {
        return MetaInfo();
    }
}
Beispiel #2
0
MetaInfo ZefaniaLex::readInfo(const QString &name)
{
    DEBUG_FUNC_NAME
    QFile file(name);
    int couldBe = 0;//1 = RMac
    QString uid = "";
    QString type;
    ZefaniaXmlReader reader(name);
    MetaInfo info = reader.readMetaInfo();

    //open the xml file
    if(!file.open(QFile::ReadOnly | QFile::Text))
        return MetaInfo();
    m_xml = new QXmlStreamReader(&file);


    if(m_xml->readNextStartElement()) {
        if(cmp(m_xml->name(), "dictionary")) {
            type = m_xml->attributes().value("type").toString();
            if(type != "x-dictionary") {
                couldBe = -1;// do not scan the keys
            }
            while(m_xml->readNextStartElement()) {
                if(cmp(m_xml->name(), "item")) {
                    const QString key = m_xml->attributes().value("id").toString();
                    if(couldBe == 0) {
                        if(key.toUpper() == "A-APF" || key.toUpper() == "X-NSN" || key.toUpper() == "V-PAP-DPN") {//todo: speed
                            couldBe = 1;
                        }
                    } else {
                        break;
                    }
                } else {
                    m_xml->skipCurrentElement();
                }
            }
        }
    }

    if(info.name().isEmpty() && !info.subject.isEmpty()) {
        info.setName(info.subject);
    }

    info.setUID(uid);
    if(type == "x-strong") {
        info.setDefaultModule(ModuleTools::DefaultStrongDictModule);
        info.setContent(ModuleTools::StrongsContent);
    } else if(type == "x-dictionary") {
        if(couldBe == 1) {
            info.setDefaultModule(ModuleTools::DefaultRMACDictModule);
            info.setContent(ModuleTools::RMACContent);
        } else {
            info.setDefaultModule(ModuleTools::DefaultDictModule);
            info.setContent(ModuleTools::DictionaryContent);
        }
    }


    file.close();
    delete m_xml;
    m_xml = nullptr;

    return info;
}