void NoteIndexer::addTextIndex(int lid, QString content) { // Delete any old content NSqlQuery sql(db); sql.prepare("Delete from SearchIndex where lid=:lid and source=:source"); sql.bindValue(":lid", lid); sql.bindValue(":source", "text"); sql.exec(); // Add the new content. it is basically a text version of the note with a weight of 100. sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", 100); sql.bindValue(":source", "text"); content = global.normalizeTermForSearchAndIndex(content); sql.bindValue(":content", content); sql.exec(); sql.prepare("Delete from DataStore where lid=:lid and key=:key"); sql.bindValue(":lid", lid); sql.bindValue(":key", NOTE_INDEX_NEEDED); sql.exec(); }
// Index any resources void IndexRunner::indexRecognition(qint32 lid, Resource &r) { if (!keepRunning || pauseIndexing) { //indexTimer->start(); return; } // Add filename or source url to search index if (r.attributes.isSet()) { NSqlQuery sql(db); ResourceAttributes a = r.attributes; if (a.fileName.isSet()) { sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", 100); sql.bindValue(":source", "recognition"); sql.bindValue(":content", QString(a.fileName)); sql.exec(); } if (a.sourceURL.isSet()) { sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", 100); sql.bindValue(":source", "recognition"); sql.bindValue(":content", QString(a.sourceURL)); sql.exec(); } } // Make sure we have something to look through. Data recognition; if (r.recognition.isSet()) recognition = r.recognition; if (!recognition.body.isSet()) return; QDomDocument doc; QString emsg; doc.setContent(recognition.body, &emsg); // look for text tags QDomNodeList anchors = doc.documentElement().elementsByTagName("t"); #if QT_VERSION < 0x050000 for (unsigned int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) { #else for (int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) { #endif QApplication::processEvents(); QDomElement enmedia = anchors.at(i).toElement(); QString weight = enmedia.attribute("w"); QString text = enmedia.text(); if (text != "") { IndexRecord *rec = new IndexRecord(); rec->weight = weight.toInt(); rec->lid = lid; rec->content = text; rec->source = "recognition"; if (indexHash->contains(lid)) { delete indexHash->value(lid); indexHash->remove(lid); } indexHash->insert(lid, rec); } } } // Index any PDFs that are attached. Basically it turns the PDF into text and adds it the same // way as a note's body void IndexRunner::indexPdf(qint32 lid, Resource &r) { if (!global.indexPDFLocally) return; if (!keepRunning || pauseIndexing) { //indexTimer->start(); return; } ResourceTable rtable(db); qint32 reslid = rtable.getLid(r.guid); if (lid <= 0) { //indexTimer->start(); return; } QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf"; QString text = ""; Poppler::Document *doc = Poppler::Document::load(file); if (doc == nullptr || doc->isEncrypted() || doc->isLocked()) { //indexTimer->start(); return; } for (int i=0; keepRunning && !pauseIndexing && i<doc->numPages(); i++) { QRectF rect; text = text + doc->page(i)->text(rect) + QString(" "); } IndexRecord *rec = new IndexRecord(); rec->content = text; rec->source = "recognition"; rec->weight = 100; rec->lid = lid; if (indexHash->contains(lid)) { delete indexHash->value(lid); indexHash->remove(lid); } indexHash->insert(lid, rec); } // Index any files that are attached. void IndexRunner::indexAttachment(qint32 lid, Resource &r) { if (!officeFound) return; QLOG_DEBUG() << "indexing attachment to note " << lid; if (!keepRunning || pauseIndexing) { //indexTimer->start(); return; } ResourceTable rtable(db); qint32 reslid = rtable.getLid(r.guid); if (lid <= 0) { //indexTimer->start(); return; } QLOG_DEBUG() << "Resource " << reslid; QString extension = ""; ResourceAttributes attributes; if (r.attributes.isSet()) attributes = r.attributes; if (attributes.fileName.isSet()) { extension = attributes.fileName; int i = extension.indexOf("."); if (i != -1) extension = extension.mid(i); } if (extension != ".doc" && extension != ".xls" && extension != ".ppt" && extension != ".docx" && extension != ".xlsx" && extension != ".pptx" && extension != ".pps" && extension != ".pdf" && extension != ".odt" && extension != ".odf" && extension != ".ott" && extension != ".odm" && extension != ".html" && extension != ".txt" && extension != ".oth" && extension != ".ods" && extension != ".ots" && extension != ".odg" && extension != ".otg" && extension != ".odp" && extension != ".otp" && extension != ".odb" && extension != ".oxt" && extension != ".htm" && extension != ".docm") return; QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +extension; QFile dataFile(file); if (!dataFile.exists()) { QDir dir(global.fileManager.getDbaDirPath()); QStringList filterList; filterList.append(QString::number(lid)+".*"); QStringList list= dir.entryList(filterList, QDir::Files); if (list.size() > 0) { file = global.fileManager.getDbaDirPath()+list[0]; } } QString outDir = global.fileManager.getTmpDirPath(); QProcess sofficeProcess; QString cmd = "soffice --headless --convert-to txt:\"Text\" --outdir " +outDir + " " +file; sofficeProcess.start(cmd, QIODevice::ReadWrite|QIODevice::Unbuffered); QLOG_DEBUG() << "Starting soffice "; sofficeProcess.waitForStarted(); QLOG_DEBUG() << "Waiting for completion"; sofficeProcess.waitForFinished(); int rc = sofficeProcess.exitCode(); QLOG_DEBUG() << "soffice Errors:" << sofficeProcess.readAllStandardError(); QLOG_DEBUG() << "soffice Output:" << sofficeProcess.readAllStandardOutput(); QLOG_DEBUG() << "return code:" << rc; if (rc == 255) { QLOG_ERROR() << "soffice not found. Disabling attachment indexing."; this->officeFound = false; return; } QFile txtFile(outDir+QString::number(reslid) +".txt"); if (txtFile.open(QIODevice::ReadOnly)) { QString text; text = txtFile.readAll(); NSqlQuery sql(db); db->lockForWrite(); sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, 'recognition', :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", 100); text = global.normalizeTermForSearchAndIndex(text); sql.bindValue(":content", text); QLOG_DEBUG() << "Adding note resource to index DB"; sql.exec(); db->unlock(); txtFile.close(); } QDir dir; dir.remove(outDir+QString::number(reslid) +".txt"); } void IndexRunner::flushCache() { if (indexHash->size() <= 0) return; QDateTime start = QDateTime::currentDateTimeUtc(); NSqlQuery sql(db); db->lockForWrite(); sql.exec("begin"); QHash<qint32, IndexRecord*>::iterator i; // Start adding words to the index. Every 200 sql insertions we do a commit int commitCount = 200; for (i=indexHash->begin(); keepRunning && !pauseIndexing && i!=indexHash->end(); ++i) { qint32 lid = i.key(); IndexRecord *rec = i.value(); qint32 weight = rec->weight; QString source = rec->source; QString content = rec->content; delete rec; // Delete any old content sql.prepare("Delete from SearchIndex where lid=:lid and source=:source"); sql.bindValue(":lid", lid); sql.bindValue(":source", source); sql.exec(); // Add the new content. it is basically a text version of the note with a weight of 100. sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", weight); sql.bindValue(":source", source); content = global.normalizeTermForSearchAndIndex(content); sql.bindValue(":content", content); sql.exec(); commitCount--; if (commitCount <= 0) { sql.exec("commit"); commitCount = 200; } } indexHash->clear(); sql.exec("commit"); sql.finish(); db->unlock(); QDateTime finish = QDateTime::currentDateTimeUtc(); QLOG_DEBUG() << "Index Cache Flush Complete: " << finish.toMSecsSinceEpoch() - start.toMSecsSinceEpoch() << " milliseconds."; } void IndexRunner::busy(bool value, bool finished) { iAmBusy=value; emit(this->indexDone(finished)); }
// Index any resources void NoteIndexer::indexRecognition(qint32 reslid, Resource &r) { QLOG_TRACE_IN(); if (!r.noteGuid.isSet() || !r.guid.isSet()) return; if (reslid <= 0) return; NSqlQuery sql(db); // Make sure we have something to look through. Data recognition; if (r.recognition.isSet()) recognition = r.recognition; if (!recognition.body.isSet()) return; QDomDocument doc; QString emsg; doc.setContent(recognition.body, &emsg); // look for text tags QDomNodeList anchors = doc.documentElement().elementsByTagName("t"); QLOG_TRACE() << "Beginning insertion of recognition:"; QLOG_TRACE() << "Anchors found: " << anchors.length(); sql.exec("begin;"); #if QT_VERSION < 0x050000 for (unsigned int i=0; i<anchors.length(); i++) { #else for (int i=0; i<anchors.length(); i++) { #endif QLOG_TRACE() << "Anchor: " << i; QApplication::processEvents(); QDomElement enmedia = anchors.at(i).toElement(); QString weight = enmedia.attribute("w"); QString text = enmedia.text(); if (text != "") { // Add the new content. it is basically a text version of the note with a weight of 100. sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", reslid); sql.bindValue(":weight", weight); sql.bindValue(":source", "recognition"); text = global.normalizeTermForSearchAndIndex(text); sql.bindValue(":content", text); sql.exec(); } } QLOG_TRACE() << "Committing"; sql.exec("commit"); QLOG_TRACE_OUT(); } // Index any PDFs that are attached. Basically it turns the PDF into text and adds it the same // way as a note's body void NoteIndexer::indexPdf(qint32 reslid) { QLOG_TRACE_IN(); if (!global.indexPDFLocally) return; NSqlQuery sql(db); if (reslid <= 0) return; QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf"; QString text = ""; Poppler::Document *doc = Poppler::Document::load(file); if (doc == nullptr || doc->isEncrypted() || doc->isLocked()) return; for (int i=0; i<doc->numPages(); i++) { QRectF rect; text = text + doc->page(i)->text(rect) + QString(" "); } QLOG_TRACE() << "Adding PDF"; // Add the new content. it is basically a text version of the note with a weight of 100. sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", reslid); sql.bindValue(":weight", 100); sql.bindValue(":source", "recognition"); text = global.normalizeTermForSearchAndIndex(text); sql.bindValue(":content", text); sql.exec(); QLOG_TRACE_OUT(); }