void RenderThread::run() { double factor = qBound(0.01, m_zoomFactor, 10.0); if (!QFile::exists(m_pdfUrl)) return; Poppler::Document* document = Poppler::Document::load(m_pdfUrl); if (!document || document->isLocked()) { delete document; return; } // Access page of the PDF file document->setRenderHint(Poppler::Document::Antialiasing, true); document->setRenderHint(Poppler::Document::TextAntialiasing, true); Poppler::Page* pdfPage = document->page(0); // Document starts at page 0 if (pdfPage == 0) { return; } // Generate a QImage of the rendered page QImage image = pdfPage->renderToImage(factor*200.0, factor*200.0); emit previewReady(image); delete pdfPage; delete document; }
void TestPassword::password3() { Poppler::Document *doc; doc = Poppler::Document::load( QString::fromUtf8(TESTDATADIR "/unittestcases/PasswordEncrypted.pdf") ); QVERIFY( doc ); QVERIFY( doc->isLocked() ); QVERIFY( !doc->unlock( "", "password" ) ); QVERIFY( !doc->isLocked() ); delete doc; }
void TestPassword::password2b() { Poppler::Document *doc; doc = Poppler::Document::load(QString::fromUtf8(TESTDATADIR "/unittestcases/Gday garçon - owner.pdf") ); QVERIFY( doc ); QVERIFY( !doc->isLocked() ); QVERIFY( !doc->unlock( QString::fromUtf8("garçon").toLatin1(), "" ) ); QVERIFY( !doc->isLocked() ); delete doc; }
// Index any PDFs that are attached. Basically it turns the PDF into text and adds it the same // way as a note's body void NoteIndexer::indexPdf(qint32 reslid) { NSqlQuery sql(global.db); if (reslid <= 0) return; QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf"; QString text = ""; Poppler::Document *doc = Poppler::Document::load(file); if (doc == NULL || doc->isEncrypted() || doc->isLocked()) return; for (int i=0; i<doc->numPages(); i++) { QRectF rect; text = text + doc->page(i)->text(rect) + QString(" "); } // Add the new content. it is basically a text version of the note with a weight of 100. sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", reslid); sql.bindValue(":weight", 100); sql.bindValue(":source", "recognition"); sql.bindValue(":content", text); sql.exec(); }
// Index any PDFs that are attached. Basically it turns the PDF into text and adds it the same // way as a note's body void IndexRunner::indexPdf(qint32 lid, Resource &r) { if (!keepRunning || pauseIndexing) { indexTimer->start(); return; } ResourceTable rtable(db); qint32 reslid = rtable.getLid(r.guid); if (lid <= 0) { indexTimer->start(); return; } QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf"; QString text = ""; Poppler::Document *doc = Poppler::Document::load(file); if (doc == NULL || doc->isEncrypted() || doc->isLocked()) { indexTimer->start(); return; } for (int i=0; keepRunning && !pauseIndexing && i<doc->numPages(); i++) { QRectF rect; text = text + doc->page(i)->text(rect) + QString(" "); } IndexRecord *rec = new IndexRecord(); rec->content = text; rec->source = "recognition"; rec->weight = 100; rec->lid = lid; if (indexHash->contains(lid)) { delete indexHash->value(lid); indexHash->remove(lid); } indexHash->insert(lid, rec); }
//TODO think about storing the documents instead of the filenames in a list //Would avoid creation of the documents for each search but probably take up more memory bool MainWindow::searchPDFPostResults(const QFileInfo &fi, QString searchString) { bool nHitsExceeded = false; Poppler::Document *document = Poppler::Document::load(fi.absoluteFilePath()); if (!document || document->isLocked()) { //Oops, document empty => error message QMessageBox mbox; mbox.setText(tr("Could not read the file \n'") + fi.fileName() + tr("\ntherefore, possible matches might be missing")); mbox.exec(); delete document; return false; } //Get document title QString title = document->info("Title"); //If no title specified => use filename instead if(title == "") title = fi.fileName(); QString shortTitle = QString(title); if (shortTitle.length() > 16) { shortTitle = "..." + shortTitle.right(16); } ui->statusBar->showMessage("Durchsuche PDF: " + shortTitle); qDebug() << "Searching pdf: " << title; //Search the document for the searchString int hitCounter = 0; for (int page = 0; page < document->numPages(); page++) { //Parameters 1,2,3,4 of the search function //will contain the rectangle coordinates of where the text was found. //Since we are not interested in these coordinates, we just pass a single variable double coordinate = 0.0; if (document->page(page)->search(searchString, coordinate,coordinate,coordinate,coordinate, Poppler::Page::NextResult, Poppler::Page::CaseInsensitive)) { hitCounter++; ui->statusBar->showMessage(tr("Searching in ") + shortTitle + " - hit on page: " + QString::number(page)); //Yay, we found the search String on this page //=> Add this page to the result list addDocNPageToResultList(title, fi, page); // TODO make this limit a variable if(hitCounter >= maxHits) { QMessageBox mbox; mbox.setText(tr("Search has already reached the maximum of ") + QString::number(maxHits) + tr(" hits please choose a more specific criterion")); mbox.exec(); nHitsExceeded = true; break; } } } //Don't forget to delete the document delete document; //Return whether the max number of hits was reached return nHitsExceeded; }
void guFolderInspector::extractIsbnsFromPdf(QString fileName, QList<QString> &ISBNList) { //PDFDoc *doc; //doc = new PDFDoc() //qDebug() << "file:" << fileName; Poppler::Document* document; document = Poppler::Document::load(fileName,0,0); //Poppler::Document:: if (!document || document->isLocked()) { // ... error message .... delete document; return; } // Paranoid safety check if (!document) { // ... error message ... return; } Poppler::Page* pdfPage; isbnMethods find; // Access page of the PDF file int numOfPages = document->numPages(); //количество страниц int numOfSearchPages = 15; //qDebug() << "num of pages " << numOfPages; if(numOfPages < numOfSearchPages) { numOfSearchPages = numOfPages; } for ( int pageNumber = 0 ; pageNumber < numOfSearchPages ; pageNumber++) { pdfPage = document->page(pageNumber); // Document starts at page 0 if (!pdfPage) { // ... error message ... continue; } //QtMsgHandler qInstallMsgHandler ( QtMsgHandler handler ) QString pageContent; pageContent = pdfPage->text(QRectF(QPointF(0,0),pdfPage->pageSizeF())); find.findIsbns(pageContent, ISBNList); // ... use image ... //qDebug() << pageContent; } // after the usage, the page must be deleted delete pdfPage; //Finally, don't forget to destroy the document: delete document; }
int main( int argc, char **argv ) { QApplication a( argc, argv ); // QApplication required! QTime t; t.start(); QDir directory( argv[1] ); foreach ( const QString &fileName, directory.entryList() ) { if (fileName.endsWith("pdf") ) { qDebug() << "Doing" << fileName.toLatin1().data() << ":"; Poppler::Document *doc = Poppler::Document::load( directory.canonicalPath()+"/"+fileName ); if (!doc) { qWarning() << "doc not loaded"; } else if ( doc->isLocked() ) { if (! doc->unlock( "", "password" ) ) { qWarning() << "couldn't unlock document"; delete doc; } } else { doc->pdfVersion(); doc->info("Title"); doc->info("Subject"); doc->info("Author"); doc->info("Keywords"); doc->info("Creator"); doc->info("Producer"); doc->date("CreationDate").toString(); doc->date("ModDate").toString(); doc->numPages(); doc->isLinearized(); doc->isEncrypted(); doc->okToPrint(); doc->okToCopy(); doc->okToChange(); doc->okToAddNotes(); doc->pageMode(); for( int index = 0; index < doc->numPages(); ++index ) { Poppler::Page *page = doc->page( index ); QImage image = page->renderToImage(); page->pageSize(); page->orientation(); delete page; std::cout << "."; std::cout.flush(); } std::cout << std::endl; delete doc; } } } std::cout << "Elapsed time: " << (t.elapsed()/1000) << "seconds" << std::endl; }
// Index any resources void IndexRunner::indexRecognition(qint32 lid, Resource &r) { if (!keepRunning || pauseIndexing) { //indexTimer->start(); return; } // Add filename or source url to search index if (r.attributes.isSet()) { NSqlQuery sql(db); ResourceAttributes a = r.attributes; if (a.fileName.isSet()) { sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", 100); sql.bindValue(":source", "recognition"); sql.bindValue(":content", QString(a.fileName)); sql.exec(); } if (a.sourceURL.isSet()) { sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", 100); sql.bindValue(":source", "recognition"); sql.bindValue(":content", QString(a.sourceURL)); sql.exec(); } } // Make sure we have something to look through. Data recognition; if (r.recognition.isSet()) recognition = r.recognition; if (!recognition.body.isSet()) return; QDomDocument doc; QString emsg; doc.setContent(recognition.body, &emsg); // look for text tags QDomNodeList anchors = doc.documentElement().elementsByTagName("t"); #if QT_VERSION < 0x050000 for (unsigned int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) { #else for (int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) { #endif QApplication::processEvents(); QDomElement enmedia = anchors.at(i).toElement(); QString weight = enmedia.attribute("w"); QString text = enmedia.text(); if (text != "") { IndexRecord *rec = new IndexRecord(); rec->weight = weight.toInt(); rec->lid = lid; rec->content = text; rec->source = "recognition"; if (indexHash->contains(lid)) { delete indexHash->value(lid); indexHash->remove(lid); } indexHash->insert(lid, rec); } } } // Index any PDFs that are attached. Basically it turns the PDF into text and adds it the same // way as a note's body void IndexRunner::indexPdf(qint32 lid, Resource &r) { if (!global.indexPDFLocally) return; if (!keepRunning || pauseIndexing) { //indexTimer->start(); return; } ResourceTable rtable(db); qint32 reslid = rtable.getLid(r.guid); if (lid <= 0) { //indexTimer->start(); return; } QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf"; QString text = ""; Poppler::Document *doc = Poppler::Document::load(file); if (doc == nullptr || doc->isEncrypted() || doc->isLocked()) { //indexTimer->start(); return; } for (int i=0; keepRunning && !pauseIndexing && i<doc->numPages(); i++) { QRectF rect; text = text + doc->page(i)->text(rect) + QString(" "); } IndexRecord *rec = new IndexRecord(); rec->content = text; rec->source = "recognition"; rec->weight = 100; rec->lid = lid; if (indexHash->contains(lid)) { delete indexHash->value(lid); indexHash->remove(lid); } indexHash->insert(lid, rec); } // Index any files that are attached. void IndexRunner::indexAttachment(qint32 lid, Resource &r) { if (!officeFound) return; QLOG_DEBUG() << "indexing attachment to note " << lid; if (!keepRunning || pauseIndexing) { //indexTimer->start(); return; } ResourceTable rtable(db); qint32 reslid = rtable.getLid(r.guid); if (lid <= 0) { //indexTimer->start(); return; } QLOG_DEBUG() << "Resource " << reslid; QString extension = ""; ResourceAttributes attributes; if (r.attributes.isSet()) attributes = r.attributes; if (attributes.fileName.isSet()) { extension = attributes.fileName; int i = extension.indexOf("."); if (i != -1) extension = extension.mid(i); } if (extension != ".doc" && extension != ".xls" && extension != ".ppt" && extension != ".docx" && extension != ".xlsx" && extension != ".pptx" && extension != ".pps" && extension != ".pdf" && extension != ".odt" && extension != ".odf" && extension != ".ott" && extension != ".odm" && extension != ".html" && extension != ".txt" && extension != ".oth" && extension != ".ods" && extension != ".ots" && extension != ".odg" && extension != ".otg" && extension != ".odp" && extension != ".otp" && extension != ".odb" && extension != ".oxt" && extension != ".htm" && extension != ".docm") return; QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +extension; QFile dataFile(file); if (!dataFile.exists()) { QDir dir(global.fileManager.getDbaDirPath()); QStringList filterList; filterList.append(QString::number(lid)+".*"); QStringList list= dir.entryList(filterList, QDir::Files); if (list.size() > 0) { file = global.fileManager.getDbaDirPath()+list[0]; } } QString outDir = global.fileManager.getTmpDirPath(); QProcess sofficeProcess; QString cmd = "soffice --headless --convert-to txt:\"Text\" --outdir " +outDir + " " +file; sofficeProcess.start(cmd, QIODevice::ReadWrite|QIODevice::Unbuffered); QLOG_DEBUG() << "Starting soffice "; sofficeProcess.waitForStarted(); QLOG_DEBUG() << "Waiting for completion"; sofficeProcess.waitForFinished(); int rc = sofficeProcess.exitCode(); QLOG_DEBUG() << "soffice Errors:" << sofficeProcess.readAllStandardError(); QLOG_DEBUG() << "soffice Output:" << sofficeProcess.readAllStandardOutput(); QLOG_DEBUG() << "return code:" << rc; if (rc == 255) { QLOG_ERROR() << "soffice not found. Disabling attachment indexing."; this->officeFound = false; return; } QFile txtFile(outDir+QString::number(reslid) +".txt"); if (txtFile.open(QIODevice::ReadOnly)) { QString text; text = txtFile.readAll(); NSqlQuery sql(db); db->lockForWrite(); sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, 'recognition', :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", 100); text = global.normalizeTermForSearchAndIndex(text); sql.bindValue(":content", text); QLOG_DEBUG() << "Adding note resource to index DB"; sql.exec(); db->unlock(); txtFile.close(); } QDir dir; dir.remove(outDir+QString::number(reslid) +".txt"); } void IndexRunner::flushCache() { if (indexHash->size() <= 0) return; QDateTime start = QDateTime::currentDateTimeUtc(); NSqlQuery sql(db); db->lockForWrite(); sql.exec("begin"); QHash<qint32, IndexRecord*>::iterator i; // Start adding words to the index. Every 200 sql insertions we do a commit int commitCount = 200; for (i=indexHash->begin(); keepRunning && !pauseIndexing && i!=indexHash->end(); ++i) { qint32 lid = i.key(); IndexRecord *rec = i.value(); qint32 weight = rec->weight; QString source = rec->source; QString content = rec->content; delete rec; // Delete any old content sql.prepare("Delete from SearchIndex where lid=:lid and source=:source"); sql.bindValue(":lid", lid); sql.bindValue(":source", source); sql.exec(); // Add the new content. it is basically a text version of the note with a weight of 100. sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)"); sql.bindValue(":lid", lid); sql.bindValue(":weight", weight); sql.bindValue(":source", source); content = global.normalizeTermForSearchAndIndex(content); sql.bindValue(":content", content); sql.exec(); commitCount--; if (commitCount <= 0) { sql.exec("commit"); commitCount = 200; } } indexHash->clear(); sql.exec("commit"); sql.finish(); db->unlock(); QDateTime finish = QDateTime::currentDateTimeUtc(); QLOG_DEBUG() << "Index Cache Flush Complete: " << finish.toMSecsSinceEpoch() - start.toMSecsSinceEpoch() << " milliseconds."; } void IndexRunner::busy(bool value, bool finished) { iAmBusy=value; emit(this->indexDone(finished)); }
int main( int argc, char **argv ) { QApplication a( argc, argv ); // QApplication required! if ( argc < 2 || (argc == 3 && strcmp(argv[2], "-arthur") != 0) || argc > 3) { // use argument as file name qWarning() << "usage: test-poppler-qt4 filename [-arthur]"; exit(1); } Poppler::Document *doc = Poppler::Document::load(QFile::decodeName(argv[1])); if (!doc) { qWarning() << "doc not loaded"; exit(1); } if (doc->isLocked()) { qWarning() << "document locked (needs password)"; exit(0); } if (doc->numPages() <= 0) { delete doc; qDebug() << "Doc has no pages"; return 0; } QString backendString; if (argc == 3 && strcmp(argv[2], "-arthur") == 0) { backendString = "Arthur"; doc->setRenderBackend(Poppler::Document::ArthurBackend); } else { backendString = "Splash"; doc->setRenderBackend(Poppler::Document::SplashBackend); } doc->setRenderHint(Poppler::Document::Antialiasing, true); doc->setRenderHint(Poppler::Document::TextAntialiasing, true); for (int i = 0; i < doc->numPages(); ++i) { Poppler::Page *page = doc->page(i); if (page) { qDebug() << "Rendering page using" << backendString << "backend: " << i; QTime t = QTime::currentTime(); QImage image = page->renderToImage(); qDebug() << "Rendering took" << t.msecsTo(QTime::currentTime()) << "msecs"; image.save(QString("test-rennder-to-file%1.ppm").arg(i)); delete page; } } return 0; }
void PdfViewer::loadDocument(const QString &file, PdfView::PositionHandling keepPosition) { //QTime t = QTime::currentTime(); #ifndef QT_NO_CURSOR QApplication::setOverrideCursor(Qt::WaitCursor); #endif // QT_NO_CURSOR // TODO: close only when the new file fails to load const QString tempFileName = file; // we must copy file in a new variable because otherwise closeDocument() empties file if file == m_file (which is the case in slotReload()) closeDocument(); bool isLoaded = m_pdfView->load(tempFileName); if (!isLoaded) { QPointer<QMessageBox> msgBox = new QMessageBox(QMessageBox::Critical, tr("Open Error"), tr("Cannot open:\n") + tempFileName, QMessageBox::Ok, this); msgBox->exec(); delete msgBox; m_fileOpenRecentAction->removeFile(tempFileName); #ifndef QT_NO_CURSOR QApplication::restoreOverrideCursor(); #endif // QT_NO_CURSOR return; } Poppler::Document *doc = m_pdfView->document(); while (doc->isLocked()) { bool ok = true; QString password = QInputDialog::getText(this, tr("Document Password"), tr("Please insert the password of the document:"), QLineEdit::Password, QString(), &ok); if (!ok) { m_pdfView->close(); m_fileOpenRecentAction->removeFile(tempFileName); #ifndef QT_NO_CURSOR QApplication::restoreOverrideCursor(); #endif // QT_NO_CURSOR return; } doc->unlock(password.toLatin1(), password.toLatin1()); } m_file = QFileInfo(tempFileName).absoluteFilePath(); m_fileOpenRecentAction->addFile(m_file); // remove previous file from m_watcher and add new file if (m_watcher) { const QStringList files = m_watcher->files(); if (!files.isEmpty()) m_watcher->removePaths(files); m_watcher->addPath(m_file); } //qCritical() << t.msecsTo(QTime::currentTime()); Q_FOREACH(DocumentObserver *obs, m_observers) { if (keepPosition == PdfView::DontKeepPosition) obs->documentLoaded(); obs->pageChanged(m_currentPage, keepPosition); //qCritical() << t.msecsTo(QTime::currentTime()); } // set window title const QString docTitle = doc->info("Title"); setWindowTitle((docTitle.isEmpty() ? QFileInfo(m_file).fileName() : docTitle) + " - " + QCoreApplication::applicationName()); // enable actions m_fileSaveCopyAction->setEnabled(true); m_findAction->setEnabled(true); m_findNextAction->setEnabled(true); m_findPreviousAction->setEnabled(true); m_showPresentationAction->setEnabled(true); #ifndef QT_NO_CURSOR QApplication::restoreOverrideCursor(); #endif // QT_NO_CURSOR //qCritical() << "close and load document" << t.msecsTo(QTime::currentTime()); }