Esempio n. 1
1
void RenderThread::run()
{
    double factor = qBound(0.01, m_zoomFactor, 10.0);
            
    if (!QFile::exists(m_pdfUrl)) return;

    Poppler::Document* document = Poppler::Document::load(m_pdfUrl);
    if (!document || document->isLocked()) {
        delete document;
        return;
    }

    // Access page of the PDF file
    document->setRenderHint(Poppler::Document::Antialiasing, true);
    document->setRenderHint(Poppler::Document::TextAntialiasing, true);
    Poppler::Page* pdfPage = document->page(0);  // Document starts at page 0
    if (pdfPage == 0) {
        return;
    }

    // Generate a QImage of the rendered page
    QImage image = pdfPage->renderToImage(factor*200.0, factor*200.0);
    emit previewReady(image);
    
    delete pdfPage;
    delete document;
}
Esempio n. 2
0
void TestPassword::password3()
{
    Poppler::Document *doc;
    doc = Poppler::Document::load( QString::fromUtf8(TESTDATADIR "/unittestcases/PasswordEncrypted.pdf") );
    QVERIFY( doc );
    QVERIFY( doc->isLocked() );
    QVERIFY( !doc->unlock( "", "password" ) );
    QVERIFY( !doc->isLocked() );

    delete doc;
}
Esempio n. 3
0
void TestPassword::password2b()
{
    Poppler::Document *doc;
    doc = Poppler::Document::load(QString::fromUtf8(TESTDATADIR "/unittestcases/Gday garçon - owner.pdf") );
    QVERIFY( doc );
    QVERIFY( !doc->isLocked() );
    QVERIFY( !doc->unlock( QString::fromUtf8("garçon").toLatin1(), "" ) );
    QVERIFY( !doc->isLocked() );

    delete doc;
}
Esempio n. 4
0
// Index any PDFs that are attached.  Basically it turns the PDF into text and adds it the same
// way as a note's body
void NoteIndexer::indexPdf(qint32 reslid) {

    NSqlQuery sql(global.db);
    if (reslid <= 0)
        return;

    QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf";

    QString text = "";
    Poppler::Document *doc = Poppler::Document::load(file);
    if (doc == NULL || doc->isEncrypted() || doc->isLocked())
        return;

    for (int i=0; i<doc->numPages(); i++) {
        QRectF rect;
        text = text + doc->page(i)->text(rect) + QString(" ");
    }

    // Add the new content.  it is basically a text version of the note with a weight of 100.
    sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
    sql.bindValue(":lid", reslid);
    sql.bindValue(":weight", 100);
    sql.bindValue(":source", "recognition");
    sql.bindValue(":content", text);
    sql.exec();
}
Esempio n. 5
0
// Index any PDFs that are attached.  Basically it turns the PDF into text and adds it the same
// way as a note's body
void IndexRunner::indexPdf(qint32 lid, Resource &r) {
    if (!keepRunning || pauseIndexing) {
        indexTimer->start();
        return;
    }
    ResourceTable rtable(db);
    qint32 reslid = rtable.getLid(r.guid);
    if (lid <= 0) {
        indexTimer->start();
        return;
    }
    QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf";

    QString text = "";
    Poppler::Document *doc = Poppler::Document::load(file);
    if (doc == NULL || doc->isEncrypted() || doc->isLocked()) {
        indexTimer->start();
        return;
    }
    for (int i=0; keepRunning && !pauseIndexing && i<doc->numPages(); i++) {
        QRectF rect;
        text = text + doc->page(i)->text(rect) + QString(" ");
    }
    IndexRecord *rec = new IndexRecord();
    rec->content = text;
    rec->source = "recognition";
    rec->weight = 100;
    rec->lid = lid;
    if (indexHash->contains(lid)) {
        delete indexHash->value(lid);
        indexHash->remove(lid);
    }
    indexHash->insert(lid, rec);
}
Esempio n. 6
0
//TODO think about storing the documents instead of the filenames in a list
//Would avoid creation of the documents for each search but probably take up more memory
bool MainWindow::searchPDFPostResults(const QFileInfo &fi, QString searchString)
{
    bool nHitsExceeded = false;
    Poppler::Document *document = Poppler::Document::load(fi.absoluteFilePath());
    if (!document || document->isLocked()) {
      //Oops, document empty => error message
      QMessageBox mbox;
      mbox.setText(tr("Could not read the file \n'") + fi.fileName() +
                   tr("\ntherefore, possible matches might be missing"));
      mbox.exec();
      delete document;
      return false;
    }
    //Get document title
    QString title = document->info("Title");
    //If no title specified => use filename instead
    if(title == "")
        title = fi.fileName();
    QString shortTitle = QString(title);
    if (shortTitle.length() > 16)
    {
        shortTitle = "..." + shortTitle.right(16);
    }
    ui->statusBar->showMessage("Durchsuche PDF: " + shortTitle);
    qDebug() << "Searching pdf: " << title;
    //Search the document for the searchString
    int hitCounter = 0;
    for (int page = 0; page < document->numPages(); page++) {
        //Parameters 1,2,3,4 of the search function
        //will contain the rectangle coordinates of where the text was found.
        //Since we are not interested in these coordinates, we just pass a single variable
        double coordinate = 0.0;
        if (document->page(page)->search(searchString, coordinate,coordinate,coordinate,coordinate,
            Poppler::Page::NextResult,
            Poppler::Page::CaseInsensitive)) {
            hitCounter++;
            ui->statusBar->showMessage(tr("Searching in ") + shortTitle +
                                       " - hit on page: " + QString::number(page));
            //Yay, we found the search String on this page
            //=> Add this page to the result list
            addDocNPageToResultList(title, fi, page);
            // TODO make this limit a variable
            if(hitCounter >= maxHits)
            {
                QMessageBox mbox;
                mbox.setText(tr("Search has already reached the maximum of ") + QString::number(maxHits)
                             + tr(" hits please choose a more specific criterion"));
                mbox.exec();
                nHitsExceeded = true;
                break;
            }
        }
    }

    //Don't forget to delete the document
    delete document;
    //Return whether the max number of hits was reached
    return nHitsExceeded;
}
void guFolderInspector::extractIsbnsFromPdf(QString fileName, QList<QString> &ISBNList)
{
    //PDFDoc *doc;
    //doc = new PDFDoc()
    //qDebug() << "file:" << fileName;
    Poppler::Document* document;
    document = Poppler::Document::load(fileName,0,0);


    //Poppler::Document::
    if (!document || document->isLocked()) {

        // ... error message ....
        delete document;
        return;
    }
    // Paranoid safety check
    if (!document) {
        // ... error message ...
        return;
    }

    Poppler::Page* pdfPage;
    isbnMethods find;

    // Access page of the PDF file
    int numOfPages = document->numPages(); //количество страниц
    int numOfSearchPages = 15;
    //qDebug() << "num of pages " << numOfPages;
    if(numOfPages < numOfSearchPages)
    {
        numOfSearchPages = numOfPages;
    }
    for ( int pageNumber = 0 ; pageNumber < numOfSearchPages ; pageNumber++)
    {
        pdfPage = document->page(pageNumber);  // Document starts at page 0
        if (!pdfPage) {
            // ... error message ...
            continue;
        }

        //QtMsgHandler qInstallMsgHandler ( QtMsgHandler handler )
        QString pageContent;
        pageContent = pdfPage->text(QRectF(QPointF(0,0),pdfPage->pageSizeF()));

        find.findIsbns(pageContent, ISBNList);

        // ... use image ...
        //qDebug() << pageContent;
    }

    // after the usage, the page must be deleted
    delete pdfPage;

    //Finally, don't forget to destroy the document:

    delete document;
}
int main( int argc, char **argv )
{
    QApplication a( argc, argv );               // QApplication required!

    QTime t;
    t.start();

    QDir directory( argv[1] );
    foreach ( const QString &fileName, directory.entryList() ) {
        if (fileName.endsWith("pdf") ) {
	    qDebug() << "Doing" << fileName.toLatin1().data() << ":";
	    Poppler::Document *doc = Poppler::Document::load( directory.canonicalPath()+"/"+fileName );
	    if (!doc) {
		qWarning() << "doc not loaded";
	    } else if ( doc->isLocked() ) {
	        if (! doc->unlock( "", "password" ) ) {
		    qWarning() << "couldn't unlock document";
		    delete doc;
		}
	    } else {
		doc->pdfVersion();
		doc->info("Title");
		doc->info("Subject");
		doc->info("Author");
		doc->info("Keywords");
		doc->info("Creator");
		doc->info("Producer");
		doc->date("CreationDate").toString();
		doc->date("ModDate").toString();
		doc->numPages();
		doc->isLinearized();
		doc->isEncrypted();
		doc->okToPrint();
		doc->okToCopy();
		doc->okToChange();
		doc->okToAddNotes();
		doc->pageMode();

		for( int index = 0; index < doc->numPages(); ++index ) {
		    Poppler::Page *page = doc->page( index );
		    QImage image = page->renderToImage();
		    page->pageSize();
		    page->orientation();
		    delete page;
		    std::cout << ".";
		    std::cout.flush();
		}
		std::cout << std::endl;
		delete doc;
	    }
	}
    }

    std::cout << "Elapsed time: " << (t.elapsed()/1000) << "seconds" << std::endl;

}
Esempio n. 9
0
// Index any resources
void IndexRunner::indexRecognition(qint32 lid, Resource &r) {

    if (!keepRunning || pauseIndexing) {
        //indexTimer->start();
        return;
    }

    // Add filename or source url to search index
    if (r.attributes.isSet()) {
        NSqlQuery sql(db);
        ResourceAttributes a = r.attributes;
        if (a.fileName.isSet()) {
            sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
            sql.bindValue(":lid", lid);
            sql.bindValue(":weight", 100);
            sql.bindValue(":source", "recognition");
            sql.bindValue(":content", QString(a.fileName));
            sql.exec();
        }
        if (a.sourceURL.isSet()) {
            sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
            sql.bindValue(":lid", lid);
            sql.bindValue(":weight", 100);
            sql.bindValue(":source", "recognition");
            sql.bindValue(":content", QString(a.sourceURL));
            sql.exec();
        }
    }


    // Make sure we have something to look through.
    Data recognition;
    if (r.recognition.isSet())
        recognition = r.recognition;
    if (!recognition.body.isSet())
        return;

    QDomDocument doc;
    QString emsg;
    doc.setContent(recognition.body, &emsg);

    // look for text tags
    QDomNodeList anchors = doc.documentElement().elementsByTagName("t");
#if QT_VERSION < 0x050000
    for (unsigned int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) {
#else
    for (int i=0; keepRunning && !pauseIndexing && i<anchors.length(); i++) {
#endif
        QApplication::processEvents();
        QDomElement enmedia = anchors.at(i).toElement();
        QString weight = enmedia.attribute("w");
        QString text = enmedia.text();
        if (text != "") {
            IndexRecord *rec = new IndexRecord();
            rec->weight = weight.toInt();
            rec->lid = lid;
            rec->content = text;
            rec->source = "recognition";
            if (indexHash->contains(lid)) {
                delete indexHash->value(lid);
                indexHash->remove(lid);
            }
            indexHash->insert(lid, rec);
        }
    }
}


// Index any PDFs that are attached.  Basically it turns the PDF into text and adds it the same
// way as a note's body
void IndexRunner::indexPdf(qint32 lid, Resource &r) {
    if (!global.indexPDFLocally)
        return;
    if (!keepRunning || pauseIndexing) {
        //indexTimer->start();
        return;
    }
    ResourceTable rtable(db);
    qint32 reslid = rtable.getLid(r.guid);
    if (lid <= 0) {
        //indexTimer->start();
        return;
    }
    QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +".pdf";

    QString text = "";
    Poppler::Document *doc = Poppler::Document::load(file);
    if (doc == nullptr || doc->isEncrypted() || doc->isLocked()) {
        //indexTimer->start();
        return;
    }
    for (int i=0; keepRunning && !pauseIndexing && i<doc->numPages(); i++) {
        QRectF rect;
        text = text + doc->page(i)->text(rect) + QString(" ");
    }
    IndexRecord *rec = new IndexRecord();
    rec->content = text;
    rec->source = "recognition";
    rec->weight = 100;
    rec->lid = lid;
    if (indexHash->contains(lid)) {
        delete indexHash->value(lid);
        indexHash->remove(lid);
    }
    indexHash->insert(lid, rec);
}




// Index any files that are attached.
void IndexRunner::indexAttachment(qint32 lid, Resource &r) {
    if (!officeFound)
        return;
    QLOG_DEBUG() << "indexing attachment to note " << lid;
    if (!keepRunning || pauseIndexing) {
        //indexTimer->start();
        return;
    }
    ResourceTable rtable(db);
    qint32 reslid = rtable.getLid(r.guid);
    if (lid <= 0) {
        //indexTimer->start();
        return;
    }
    QLOG_DEBUG() << "Resource " << reslid;
    QString extension = "";
    ResourceAttributes attributes;
    if (r.attributes.isSet())
        attributes = r.attributes;
    if (attributes.fileName.isSet()) {
        extension = attributes.fileName;
        int i = extension.indexOf(".");
	if (i != -1)
	  extension = extension.mid(i);
    }
    if (extension != ".doc"  && extension != ".xls"  && extension != ".ppt" &&
        extension != ".docx" && extension != ".xlsx" && extension != ".pptx" &&
        extension != ".pps"  && extension != ".pdf"  && extension != ".odt"  &&
        extension != ".odf"  && extension != ".ott"  && extension != ".odm"  &&
        extension != ".html" && extension != ".txt"  && extension != ".oth"  &&
        extension != ".ods"  && extension != ".ots"  && extension != ".odg"  &&
        extension != ".otg"  && extension != ".odp"  && extension != ".otp"  &&
        extension != ".odb"  && extension != ".oxt"  && extension != ".htm"  &&
        extension != ".docm")
                return;

    QString file = global.fileManager.getDbaDirPath() + QString::number(reslid) +extension;
    QFile dataFile(file);
    if (!dataFile.exists()) {
        QDir dir(global.fileManager.getDbaDirPath());
        QStringList filterList;
        filterList.append(QString::number(lid)+".*");
        QStringList list= dir.entryList(filterList, QDir::Files);
        if (list.size() > 0) {
            file = global.fileManager.getDbaDirPath()+list[0];
        }
    }

    QString outDir = global.fileManager.getTmpDirPath();

    QProcess sofficeProcess;
    QString cmd = "soffice --headless --convert-to txt:\"Text\" --outdir "
                    +outDir + " "
                    +file;

    sofficeProcess.start(cmd,
                         QIODevice::ReadWrite|QIODevice::Unbuffered);

    QLOG_DEBUG() << "Starting soffice ";
    sofficeProcess.waitForStarted();
    QLOG_DEBUG() << "Waiting for completion";
    sofficeProcess.waitForFinished();
    int rc = sofficeProcess.exitCode();
    QLOG_DEBUG() << "soffice Errors:" << sofficeProcess.readAllStandardError();
    QLOG_DEBUG() << "soffice Output:" << sofficeProcess.readAllStandardOutput();
    QLOG_DEBUG() << "return code:" << rc;
    if (rc == 255) {
        QLOG_ERROR() << "soffice not found.  Disabling attachment indexing.";
        this->officeFound = false;
        return;
    }
    QFile txtFile(outDir+QString::number(reslid) +".txt");
    if (txtFile.open(QIODevice::ReadOnly)) {
        QString text;
        text = txtFile.readAll();
        NSqlQuery sql(db);
        db->lockForWrite();
        sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, 'recognition', :content)");
        sql.bindValue(":lid", lid);
        sql.bindValue(":weight", 100);

        text = global.normalizeTermForSearchAndIndex(text);
        sql.bindValue(":content", text);

        QLOG_DEBUG() << "Adding note resource to index DB";
        sql.exec();
        db->unlock();
        txtFile.close();
    }
    QDir dir;
    dir.remove(outDir+QString::number(reslid) +".txt");
}


void IndexRunner::flushCache() {
    if (indexHash->size() <= 0)
        return;
    QDateTime start = QDateTime::currentDateTimeUtc();
    NSqlQuery sql(db);
    db->lockForWrite();
    sql.exec("begin");
    QHash<qint32, IndexRecord*>::iterator i;

    // Start adding words to the index.  Every 200 sql insertions we do a commit
    int commitCount = 200;

    for (i=indexHash->begin(); keepRunning && !pauseIndexing && i!=indexHash->end(); ++i) {
        qint32 lid = i.key();
        IndexRecord *rec = i.value();
        qint32 weight = rec->weight;
        QString source = rec->source;
        QString content = rec->content;
        delete rec;

        // Delete any old content
        sql.prepare("Delete from SearchIndex where lid=:lid and source=:source");
        sql.bindValue(":lid", lid);
        sql.bindValue(":source", source);
        sql.exec();

        // Add the new content.  it is basically a text version of the note with a weight of 100.
        sql.prepare("Insert into SearchIndex (lid, weight, source, content) values (:lid, :weight, :source, :content)");
        sql.bindValue(":lid", lid);
        sql.bindValue(":weight", weight);
        sql.bindValue(":source", source);

        content = global.normalizeTermForSearchAndIndex(content);
        sql.bindValue(":content", content);

        sql.exec();
        commitCount--;
        if (commitCount <= 0) {
            sql.exec("commit");
            commitCount = 200;
        }
    }
    indexHash->clear();
    sql.exec("commit");

    sql.finish();
    db->unlock();
    QDateTime finish = QDateTime::currentDateTimeUtc();

    QLOG_DEBUG() << "Index Cache Flush Complete: " <<
                    finish.toMSecsSinceEpoch() - start.toMSecsSinceEpoch()
                    << " milliseconds.";
}



void IndexRunner::busy(bool value, bool finished) {
    iAmBusy=value;
    emit(this->indexDone(finished));
}
Esempio n. 10
0
int main( int argc, char **argv )
{
    QApplication a( argc, argv );               // QApplication required!

    if ( argc < 2 ||
        (argc == 3 && strcmp(argv[2], "-arthur") != 0) ||
        argc > 3)
    {
        // use argument as file name
        qWarning() << "usage: test-poppler-qt4 filename [-arthur]";
        exit(1);
    }
  
    Poppler::Document *doc = Poppler::Document::load(QFile::decodeName(argv[1]));
    if (!doc)
    {
        qWarning() << "doc not loaded";
        exit(1);
    }

    if (doc->isLocked())
    {
        qWarning() << "document locked (needs password)";
        exit(0);
    }
  
    if (doc->numPages() <= 0)
    {
        delete doc;
        qDebug() << "Doc has no pages";
        return 0;
    }

    QString backendString;
    if (argc == 3 && strcmp(argv[2], "-arthur") == 0)
    {
        backendString = "Arthur";
        doc->setRenderBackend(Poppler::Document::ArthurBackend);
    }
    else
    {
        backendString = "Splash";
        doc->setRenderBackend(Poppler::Document::SplashBackend);
    }
    doc->setRenderHint(Poppler::Document::Antialiasing, true);
    doc->setRenderHint(Poppler::Document::TextAntialiasing, true);
    
    for (int i = 0; i < doc->numPages(); ++i)
    {
        Poppler::Page *page = doc->page(i);
        if (page) {
            qDebug() << "Rendering page using" << backendString << "backend: " << i;
            QTime t = QTime::currentTime();
            QImage image = page->renderToImage();
            qDebug() << "Rendering took" << t.msecsTo(QTime::currentTime()) << "msecs";
            image.save(QString("test-rennder-to-file%1.ppm").arg(i));
            delete page;
        }
    }
    
    return 0;
}
Esempio n. 11
0
void PdfViewer::loadDocument(const QString &file, PdfView::PositionHandling keepPosition)
{
//QTime t = QTime::currentTime();
#ifndef QT_NO_CURSOR
	QApplication::setOverrideCursor(Qt::WaitCursor);
#endif // QT_NO_CURSOR

	// TODO: close only when the new file fails to load
	const QString tempFileName = file; // we must copy file in a new variable because otherwise closeDocument() empties file if file == m_file (which is the case in slotReload())
	closeDocument();

	bool isLoaded = m_pdfView->load(tempFileName);
	if (!isLoaded) {
		QPointer<QMessageBox> msgBox = new QMessageBox(QMessageBox::Critical,
		    tr("Open Error"), tr("Cannot open:\n") + tempFileName,
		    QMessageBox::Ok, this);
		msgBox->exec();
		delete msgBox;
		m_fileOpenRecentAction->removeFile(tempFileName);
#ifndef QT_NO_CURSOR
		QApplication::restoreOverrideCursor();
#endif // QT_NO_CURSOR
		return;
	}

	Poppler::Document *doc = m_pdfView->document();
	while (doc->isLocked()) {
		bool ok = true;
		QString password = QInputDialog::getText(this, tr("Document Password"),
		                                         tr("Please insert the password of the document:"),
		                                         QLineEdit::Password, QString(), &ok);
		if (!ok) {
			m_pdfView->close();
			m_fileOpenRecentAction->removeFile(tempFileName);
#ifndef QT_NO_CURSOR
			QApplication::restoreOverrideCursor();
#endif // QT_NO_CURSOR
			return;
		}
		doc->unlock(password.toLatin1(), password.toLatin1());
	}

	m_file = QFileInfo(tempFileName).absoluteFilePath();

	m_fileOpenRecentAction->addFile(m_file);
	// remove previous file from m_watcher and add new file
	if (m_watcher)
	{
		const QStringList files = m_watcher->files();
		if (!files.isEmpty())
			m_watcher->removePaths(files);
		m_watcher->addPath(m_file);
	}

//qCritical() << t.msecsTo(QTime::currentTime());
    Q_FOREACH(DocumentObserver *obs, m_observers) {
		if (keepPosition == PdfView::DontKeepPosition)
			obs->documentLoaded();
        obs->pageChanged(m_currentPage, keepPosition);
//qCritical() << t.msecsTo(QTime::currentTime());
    }

	// set window title
	const QString docTitle = doc->info("Title");
	setWindowTitle((docTitle.isEmpty() ? QFileInfo(m_file).fileName() : docTitle)
	    + " - " + QCoreApplication::applicationName());

	// enable actions
    m_fileSaveCopyAction->setEnabled(true);
	m_findAction->setEnabled(true);
	m_findNextAction->setEnabled(true);
	m_findPreviousAction->setEnabled(true);
	m_showPresentationAction->setEnabled(true);

#ifndef QT_NO_CURSOR
	QApplication::restoreOverrideCursor();
#endif // QT_NO_CURSOR
//qCritical() << "close and load document" << t.msecsTo(QTime::currentTime());
}