コード例 #1
0
ファイル: noteindexer.cpp プロジェクト: jeffkowalski/Nixnote2
void NoteIndexer::addTextIndex(int lid, QString content) {
    // Delete any old content
    NSqlQuery sql(db);
    sql.prepare("Delete from SearchIndex where lid=:lid and source=:source");
    sql.bindValue(":lid", lid);
    sql.bindValue(":source", "text");
    sql.exec();

    // Add the new content.  it is basically a text version of the note with a weight of 100.
    sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
    sql.bindValue(":lid", lid);
    sql.bindValue(":weight", 100);
    sql.bindValue(":source", "text");

    content = global.normalizeTermForSearchAndIndex(content);
    sql.bindValue(":content", content);

    sql.exec();

    sql.prepare("Delete from DataStore where lid=:lid and key=:key");
    sql.bindValue(":lid", lid);
    sql.bindValue(":key", NOTE_INDEX_NEEDED);
    sql.exec();
}
コード例 #2
0
ファイル: indexrunner.cpp プロジェクト: jeffkowalski/Nixnote2
// Index any resources
void IndexRunner::indexRecognition(qint32 lid, Resource &r) {

    if (!keepRunning || pauseIndexing) {
        //indexTimer->start();
        return;
    }

    // Add filename or source url to search index
    if (r.attributes.isSet()) {
        NSqlQuery sql(db);
        ResourceAttributes a = r.attributes;
        if (a.fileName.isSet()) {
            sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
            sql.bindValue(":lid", lid);
            sql.bindValue(":weight", 100);
            sql.bindValue(":source", "recognition");
            sql.bindValue(":content", QString(a.fileName));
            sql.exec();
        }
        if (a.sourceURL.isSet()) {
            sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
            sql.bindValue(":lid", lid);
            sql.bindValue(":weight", 100);
            sql.bindValue(":source", "recognition");
            sql.bindValue(":content", QString(a.sourceURL));
            sql.exec();
        }
    }


    // Make sure we have something to look through.
    Data recognition;
    if (r.recognition.isSet())
        recognition = r.recognition;
    if (!recognition.body.isSet())
        return;

    QDomDocument doc;
    QString emsg;
    doc.setContent(recognition.body, &emsg);

    // look for text tags
    QDomNodeList anchors = doc.documentElement().elementsByTagName("t");
#if QT_VERSION < 0x050000
    for (unsigned int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) {
#else
    for (int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) {
#endif
        QApplication::processEvents();
        QDomElement enmedia = anchors.at(i).toElement();
        QString weight = enmedia.attribute("w");
        QString text = enmedia.text();
        if (text != "") {
            IndexRecord *rec = new IndexRecord();
            rec->weight = weight.toInt();
            rec->lid = lid;
            rec->content = text;
            rec->source = "recognition";
            if (indexHash->contains(lid)) {
                delete indexHash->value(lid);
                indexHash->remove(lid);
            }
            indexHash->insert(lid, rec);
        }
    }
}


// Index any PDFs that are attached.  Basically it turns the PDF into text and adds it the same
// way as a note's body
void IndexRunner::indexPdf(qint32 lid, Resource &r) {
    if (!global.indexPDFLocally)
        return;
    if (!keepRunning || pauseIndexing) {
        //indexTimer->start();
        return;
    }
    ResourceTable rtable(db);
    qint32 reslid = rtable.getLid(r.guid);
    if (lid <= 0) {
        //indexTimer->start();
        return;
    }
    QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf";

    QString text = "";
    Poppler::Document *doc = Poppler::Document::load(file);
    if (doc == nullptr || doc->isEncrypted() || doc->isLocked()) {
        //indexTimer->start();
        return;
    }
    for (int i=0; keepRunning && !pauseIndexing && i<doc->numPages(); i++) {
        QRectF rect;
        text = text + doc->page(i)->text(rect) + QString(" ");
    }
    IndexRecord *rec = new IndexRecord();
    rec->content = text;
    rec->source = "recognition";
    rec->weight = 100;
    rec->lid = lid;
    if (indexHash->contains(lid)) {
        delete indexHash->value(lid);
        indexHash->remove(lid);
    }
    indexHash->insert(lid, rec);
}




// Index any files that are attached.
void IndexRunner::indexAttachment(qint32 lid, Resource &r) {
    if (!officeFound)
        return;
    QLOG_DEBUG() << "indexing attachment to note " << lid;
    if (!keepRunning || pauseIndexing) {
        //indexTimer->start();
        return;
    }
    ResourceTable rtable(db);
    qint32 reslid = rtable.getLid(r.guid);
    if (lid <= 0) {
        //indexTimer->start();
        return;
    }
    QLOG_DEBUG() << "Resource " << reslid;
    QString extension = "";
    ResourceAttributes attributes;
    if (r.attributes.isSet())
        attributes = r.attributes;
    if (attributes.fileName.isSet()) {
        extension = attributes.fileName;
        int i = extension.indexOf(".");
	if (i != -1)
	  extension = extension.mid(i);
    }
    if (extension != ".doc"  && extension != ".xls"  && extension != ".ppt" &&
        extension != ".docx" && extension != ".xlsx" && extension != ".pptx" &&
        extension != ".pps"  && extension != ".pdf"  && extension != ".odt"  &&
        extension != ".odf"  && extension != ".ott"  && extension != ".odm"  &&
        extension != ".html" && extension != ".txt"  && extension != ".oth"  &&
        extension != ".ods"  && extension != ".ots"  && extension != ".odg"  &&
        extension != ".otg"  && extension != ".odp"  && extension != ".otp"  &&
        extension != ".odb"  && extension != ".oxt"  && extension != ".htm"  &&
        extension != ".docm")
                return;

    QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +extension;
    QFile dataFile(file);
    if (!dataFile.exists()) {
        QDir dir(global.fileManager.getDbaDirPath());
        QStringList filterList;
        filterList.append(QString::number(lid)+".*");
        QStringList list= dir.entryList(filterList, QDir::Files);
        if (list.size() > 0) {
            file = global.fileManager.getDbaDirPath()+list[0];
        }
    }

    QString outDir = global.fileManager.getTmpDirPath();

    QProcess sofficeProcess;
    QString cmd = "soffice --headless --convert-to txt:\"Text\" --outdir "
                    +outDir + " "
                    +file;

    sofficeProcess.start(cmd,
                         QIODevice::ReadWrite|QIODevice::Unbuffered);

    QLOG_DEBUG() << "Starting soffice ";
    sofficeProcess.waitForStarted();
    QLOG_DEBUG() << "Waiting for completion";
    sofficeProcess.waitForFinished();
    int rc = sofficeProcess.exitCode();
    QLOG_DEBUG() << "soffice Errors:" << sofficeProcess.readAllStandardError();
    QLOG_DEBUG() << "soffice Output:" << sofficeProcess.readAllStandardOutput();
    QLOG_DEBUG() << "return code:" << rc;
    if (rc == 255) {
        QLOG_ERROR() << "soffice not found.  Disabling attachment indexing.";
        this->officeFound = false;
        return;
    }
    QFile txtFile(outDir+QString::number(reslid) +".txt");
    if (txtFile.open(QIODevice::ReadOnly)) {
        QString text;
        text = txtFile.readAll();
        NSqlQuery sql(db);
        db->lockForWrite();
        sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, 'recognition', :content)");
        sql.bindValue(":lid", lid);
        sql.bindValue(":weight", 100);

        text = global.normalizeTermForSearchAndIndex(text);
        sql.bindValue(":content", text);

        QLOG_DEBUG() << "Adding note resource to index DB";
        sql.exec();
        db->unlock();
        txtFile.close();
    }
    QDir dir;
    dir.remove(outDir+QString::number(reslid) +".txt");
}


void IndexRunner::flushCache() {
    if (indexHash->size() <= 0)
        return;
    QDateTime start = QDateTime::currentDateTimeUtc();
    NSqlQuery sql(db);
    db->lockForWrite();
    sql.exec("begin");
    QHash<qint32, IndexRecord*>::iterator i;

    // Start adding words to the index.  Every 200 sql insertions we do a commit
    int commitCount = 200;

    for (i=indexHash->begin(); keepRunning && !pauseIndexing && i!=indexHash->end(); ++i) {
        qint32 lid = i.key();
        IndexRecord *rec = i.value();
        qint32 weight = rec->weight;
        QString source = rec->source;
        QString content = rec->content;
        delete rec;

        // Delete any old content
        sql.prepare("Delete from SearchIndex where lid=:lid and source=:source");
        sql.bindValue(":lid", lid);
        sql.bindValue(":source", source);
        sql.exec();

        // Add the new content.  it is basically a text version of the note with a weight of 100.
        sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
        sql.bindValue(":lid", lid);
        sql.bindValue(":weight", weight);
        sql.bindValue(":source", source);

        content = global.normalizeTermForSearchAndIndex(content);
        sql.bindValue(":content", content);

        sql.exec();
        commitCount--;
        if (commitCount <= 0) {
            sql.exec("commit");
            commitCount = 200;
        }
    }
    indexHash->clear();
    sql.exec("commit");

    sql.finish();
    db->unlock();
    QDateTime finish = QDateTime::currentDateTimeUtc();

    QLOG_DEBUG() << "Index Cache Flush Complete: " <<
                    finish.toMSecsSinceEpoch() - start.toMSecsSinceEpoch()
                    << " milliseconds.";
}



void IndexRunner::busy(bool value, bool finished) {
    iAmBusy=value;
    emit(this->indexDone(finished));
}
コード例 #3
0
ファイル: noteindexer.cpp プロジェクト: jeffkowalski/Nixnote2
// Index any resources
void NoteIndexer::indexRecognition(qint32 reslid, Resource &r) {

    QLOG_TRACE_IN();
    if (!r.noteGuid.isSet() || !r.guid.isSet())
        return;

    if (reslid <= 0)
        return;

    NSqlQuery sql(db);

    // Make sure we have something to look through.
    Data recognition;
    if (r.recognition.isSet())
        recognition = r.recognition;
    if (!recognition.body.isSet())
        return;

    QDomDocument doc;
    QString emsg;
    doc.setContent(recognition.body, &emsg);

    // look for text tags
    QDomNodeList anchors = doc.documentElement().elementsByTagName("t");

    QLOG_TRACE() << "Beginning insertion of recognition:";
    QLOG_TRACE() << "Anchors found: " << anchors.length();
    sql.exec("begin;");
#if QT_VERSION < 0x050000
    for (unsigned int i=0;  i<anchors.length(); i++) {
#else
    for (int i=0; i<anchors.length(); i++) {
#endif
        QLOG_TRACE() << "Anchor: " << i;
        QApplication::processEvents();
        QDomElement enmedia = anchors.at(i).toElement();
        QString weight = enmedia.attribute("w");
        QString text = enmedia.text();
        if (text != "") {
            // Add the new content.  it is basically a text version of the note with a weight of 100.
            sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
            sql.bindValue(":lid", reslid);
            sql.bindValue(":weight", weight);
            sql.bindValue(":source", "recognition");


            text = global.normalizeTermForSearchAndIndex(text);
            sql.bindValue(":content", text);

            sql.exec();
        }
    }
    QLOG_TRACE() << "Committing";
    sql.exec("commit");
    QLOG_TRACE_OUT();
}



// Index any PDFs that are attached.  Basically it turns the PDF into text and adds it the same
// way as a note's body
void NoteIndexer::indexPdf(qint32 reslid) {

    QLOG_TRACE_IN();
    if (!global.indexPDFLocally)
        return;

    NSqlQuery sql(db);
    if (reslid <= 0)
        return;

    QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf";

    QString text = "";
    Poppler::Document *doc = Poppler::Document::load(file);
    if (doc == nullptr || doc->isEncrypted() || doc->isLocked())
        return;

    for (int i=0; i<doc->numPages(); i++) {
        QRectF rect;
        text = text + doc->page(i)->text(rect) + QString(" ");
    }

    QLOG_TRACE() << "Adding PDF";
    // Add the new content.  it is basically a text version of the note with a weight of 100.
    sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
    sql.bindValue(":lid", reslid);
    sql.bindValue(":weight", 100);
    sql.bindValue(":source", "recognition");

    text = global.normalizeTermForSearchAndIndex(text);
    sql.bindValue(":content", text);

    sql.exec();
    QLOG_TRACE_OUT();
}