bool load(const char* fileName) { delete _pdfDoc; delete _outputDev; delete _nullOutputDev; for (int i=0; i<_bmpCache.size(); i++) delete _bmpCache[i]; _bmpCache.resize(0); for (int i=0; i<_textCache.size(); i++) delete _textCache[i]; _textCache.resize(0); _pdfDoc= new PDFDoc(new GooString(fileName), NULL, NULL, NULL); if (!_pdfDoc->isOk()) { printf("error loading pdf"); return false; } GBool bitmapTopDown = gTrue; SplashColor white; white[0]=0xff; white[1]=0xff; white[2]=0xff; // _outputDev = new SplashOutputDev_mod(splashModeRGB8, 4, gFalse, white, bitmapTopDown); _outputDev = new SplashOutputDev(splashModeRGB8, 4, gFalse, white, bitmapTopDown); if(!_outputDev) { printf("error loading pdf"); return false; } #ifdef USE_NULLOUTPUTDEV _nullOutputDev=new NullOutputDev(); #endif #ifdef POPPLER_VERSION _outputDev->startDoc(_pdfDoc); #else _outputDev->startDoc(_pdfDoc->getXRef()); #endif _bmpCache.resize(_pdfDoc->getNumPages()); for (int i=0; i<_bmpCache.size(); i++) _bmpCache[i]=NULL; _textCache.resize(_pdfDoc->getNumPages()); for (int i=0; i<_textCache.size(); i++) _textCache[i]=NULL; return true; }
int pdf_get_rect(char* filename, int page_num, int pdf_box, realrect* box) /* return the box converted to TeX points */ { GooString* name = new GooString(filename); PDFDoc* doc = new PDFDoc(name); if (!doc) { delete name; return -1; } /* if the doc got created, it now owns name, so we mustn't delete it! */ if (!doc->isOk()) { delete doc; return -1; } int pages = doc->getNumPages(); if (page_num > pages) page_num = pages; if (page_num < 0) page_num = pages + 1 + page_num; if (page_num < 1) page_num = 1; Page* page = doc->getCatalog()->getPage(page_num); PDFRectangle* r; switch (pdf_box) { default: case pdfbox_crop: r = page->getCropBox(); break; case pdfbox_media: r = page->getMediaBox(); break; case pdfbox_bleed: r = page->getBleedBox(); break; case pdfbox_trim: r = page->getTrimBox(); break; case pdfbox_art: r = page->getArtBox(); break; } box->x = 72.27 / 72 * my_fmin(r->x1, r->x2); box->y = 72.27 / 72 * my_fmin(r->y1, r->y2); box->wd = 72.27 / 72 * fabs(r->x2 - r->x1); box->ht = 72.27 / 72 * fabs(r->y2 - r->y1); delete doc; return 0; }
KoFilter::ConversionStatus PdfImport::convert(const QByteArray& from, const QByteArray& to) { debugPdf << "to:" << to << " from:" << from; if (from != "application/pdf" || to != "image/svg+xml") { return KoFilter::NotImplemented; } // read config file globalParams = new GlobalParams(); if (! globalParams) return KoFilter::NotImplemented; GooString * fname = new GooString(QFile::encodeName(m_chain->inputFile()).data()); PDFDoc * pdfDoc = new PDFDoc(fname, 0, 0, 0); if (! pdfDoc) { delete globalParams; return KoFilter::StupidError; } if (! pdfDoc->isOk()) { delete globalParams; delete pdfDoc; return KoFilter::StupidError; } double hDPI = 72.0; double vDPI = 72.0; int firstPage = 1; int lastPage = pdfDoc->getNumPages(); debugPdf << "converting pages" << firstPage << "-" << lastPage; SvgOutputDev * dev = new SvgOutputDev(m_chain->outputFile()); if (dev->isOk()) { int rotate = 0; GBool useMediaBox = gTrue; GBool crop = gFalse; GBool printing = gFalse; pdfDoc->displayPages(dev, firstPage, lastPage, hDPI, vDPI, rotate, useMediaBox, crop, printing); dev->dumpContent(); } debugPdf << "wrote file to" << m_chain->outputFile(); delete dev; delete pdfDoc; delete globalParams; globalParams = 0; // check for memory leaks Object::memCheck(stderr); return KoFilter::OK; }
gboolean import_pdf(const gchar *filename, DiagramData *dia, DiaContext *ctx, void* user_data) { PDFDoc *doc; GooString *fileName = new GooString(filename); // no passwords yet GooString *ownerPW = NULL; GooString *userPW = NULL; gboolean ret = FALSE; // without this we will get strange crashes (at least with /O2 build) globalParams = new GlobalParams(); doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW); if (!doc->isOk()) { dia_context_add_message (ctx, _("PDF document not OK.\n%s"), dia_context_get_filename (ctx)); } else { DiaOutputDev *diaOut = new DiaOutputDev(dia, doc->getNumPages()); for (int pg = 1; pg <= doc->getNumPages(); ++pg) { Page *page = doc->getPage (pg); if (!page || !page->isOk()) continue; doc->displayPage(diaOut, pg, 72.0, 72.0, /* DPI, scaling elsewhere */ 0, /* rotate */ gTrue, /* useMediaBox */ gTrue, /* Crop */ gFalse /* printing */ ); } delete diaOut; ret = TRUE; } delete doc; delete globalParams; delete fileName; return ret; }
int pdf_count_pages(char* filename) { int pages = 0; GooString* name = new GooString(filename); PDFDoc* doc = new PDFDoc(name); if (!doc) { delete name; return 0; } /* if the doc got created, it now owns name, so we mustn't delete it! */ if (doc->isOk()) pages = doc->getNumPages(); delete doc; return pages; }
bool PdfPlug::convert(QString fn) { bool firstPg = true; int currentLayer = m_Doc->activeLayer(); int baseLayer = m_Doc->activeLayer(); importedColors.clear(); if(progressDialog) { progressDialog->setOverallProgress(2); progressDialog->setLabel("GI", tr("Generating Items")); qApp->processEvents(); } QFile f(fn); oldDocItemCount = m_Doc->Items->count(); if (progressDialog) { progressDialog->setBusyIndicator("GI"); qApp->processEvents(); } globalParams = new GlobalParams(); if (globalParams) { GooString *fname = new GooString(QFile::encodeName(fn).data()); globalParams->setErrQuiet(gTrue); GBool hasOcg = gFalse; QList<OptionalContentGroup*> ocgGroups; // globalParams->setPrintCommands(gTrue); PDFDoc *pdfDoc = new PDFDoc(fname, 0, 0, 0); if (pdfDoc) { if (pdfDoc->isOk()) { double hDPI = 72.0; double vDPI = 72.0; int firstPage = 1; int lastPage = pdfDoc->getNumPages(); SlaOutputDev *dev = new SlaOutputDev(m_Doc, &Elements, &importedColors, importerFlags); if (dev->isOk()) { OCGs* ocg = pdfDoc->getOptContentConfig(); if (ocg) { hasOcg = ocg->hasOCGs(); if (hasOcg) { QStringList ocgNames; Array *order = ocg->getOrderArray(); if (order) { for (int i = 0; i < order->getLength (); ++i) { Object orderItem; order->get(i, &orderItem); if (orderItem.isDict()) { Object ref; order->getNF(i, &ref); if (ref.isRef()) { OptionalContentGroup *oc = ocg->findOcgByRef(ref.getRef()); QString ocgName = UnicodeParsedString(oc->getName()); if (!ocgNames.contains(ocgName)) { ocgGroups.prepend(oc); ocgNames.append(ocgName); } } ref.free(); } else { GooList *ocgs; int i; ocgs = ocg->getOCGs (); for (i = 0; i < ocgs->getLength (); ++i) { OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i); QString ocgName = UnicodeParsedString(oc->getName()); if (!ocgNames.contains(ocgName)) { ocgGroups.prepend(oc); ocgNames.append(ocgName); } } } } } else { GooList *ocgs; int i; ocgs = ocg->getOCGs (); for (i = 0; i < ocgs->getLength (); ++i) { OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i); QString ocgName = UnicodeParsedString(oc->getName()); if (!ocgNames.contains(ocgName)) { ocgGroups.prepend(oc); ocgNames.append(ocgName); } } } } } GBool useMediaBox = gTrue; GBool crop = gFalse; GBool printing = gFalse; dev->startDoc(pdfDoc, pdfDoc->getXRef(), pdfDoc->getCatalog()); int rotate = pdfDoc->getPageRotate(firstPage); if (importerFlags & LoadSavePlugin::lfCreateDoc) { // POPPLER_VERSION appeared in 0.19.0 first #ifdef POPPLER_VERSION if (hasOcg) { QString actL = m_Doc->activeLayerName(); for (int a = 0; a < ocgGroups.count(); a++) { OptionalContentGroup *oc = ocgGroups[a]; if (actL != UnicodeParsedString(oc->getName())) currentLayer = m_Doc->addLayer(UnicodeParsedString(oc->getName()), false); else currentLayer = m_Doc->layerIDFromName(UnicodeParsedString(oc->getName())); // POPPLER_VERSION appeared in 0.19.0 first #ifdef POPPLER_VERSION if ((oc->getViewState() == OptionalContentGroup::ocUsageOn) || (oc->getViewState() == OptionalContentGroup::ocUsageUnset)) m_Doc->setLayerVisible(currentLayer, true); else m_Doc->setLayerVisible(currentLayer, false); if ((oc->getPrintState() == OptionalContentGroup::ocUsageOn) || (oc->getPrintState() == OptionalContentGroup::ocUsageUnset)) m_Doc->setLayerPrintable(currentLayer, true); else m_Doc->setLayerPrintable(currentLayer, false); #else if (oc->getState() == OptionalContentGroup::On) { m_Doc->setLayerVisible(currentLayer, true); m_Doc->setLayerPrintable(currentLayer, true); } else { m_Doc->setLayerVisible(currentLayer, false); m_Doc->setLayerPrintable(currentLayer, false); } #endif oc->setState(OptionalContentGroup::Off); } dev->layersSetByOCG = true; } #endif Object info; pdfDoc->getDocInfo(&info); if (info.isDict()) { Object obj; GooString *s1; Dict *infoDict = info.getDict(); if (infoDict->lookup((char*)"Title", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setTitle(UnicodeParsedString(obj.getString())); obj.free(); } if (infoDict->lookup((char*)"Author", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setAuthor(UnicodeParsedString(obj.getString())); obj.free(); } if (infoDict->lookup((char*)"Subject", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setSubject(UnicodeParsedString(obj.getString())); obj.free(); } if (infoDict->lookup((char*)"Keywords", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setKeywords(UnicodeParsedString(obj.getString())); obj.free(); } } info.free(); for (int pp = 0; pp < lastPage; pp++) { m_Doc->setActiveLayer(baseLayer); if (firstPg) firstPg = false; else m_Doc->addPage(pp); m_Doc->currentPage()->setInitialHeight(pdfDoc->getPageMediaHeight(pp + 1)); m_Doc->currentPage()->setInitialWidth(pdfDoc->getPageMediaWidth(pp + 1)); m_Doc->currentPage()->setHeight(pdfDoc->getPageMediaHeight(pp + 1)); m_Doc->currentPage()->setWidth(pdfDoc->getPageMediaWidth(pp + 1)); m_Doc->currentPage()->MPageNam = CommonStrings::trMasterPageNormal; m_Doc->currentPage()->m_pageSize = "Custom"; m_Doc->setPageSize("Custom"); m_Doc->reformPages(true); if (hasOcg) { for (int a = 0; a < ocgGroups.count(); a++) { OptionalContentGroup *oc = ocgGroups[a]; // m_Doc->setActiveLayer(UnicodeParsedString(oc->getName())); // currentLayer = m_Doc->activeLayer(); oc->setState(OptionalContentGroup::On); // pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing); // oc->setState(OptionalContentGroup::Off); } pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing); } else pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing); } } else { if (hasOcg) { for (int a = 0; a < ocgGroups.count(); a++) { ocgGroups[a]->setState(OptionalContentGroup::On); } } pdfDoc->displayPage(dev, firstPage, hDPI, vDPI, rotate, useMediaBox, crop, printing); } } delete dev; } } delete pdfDoc; } delete globalParams; globalParams = 0; // qDebug() << "converting finished"; // qDebug() << "Imported" << Elements.count() << "Elements"; if (Elements.count() == 0) { if (importedColors.count() != 0) { for (int cd = 0; cd < importedColors.count(); cd++) { m_Doc->PageColors.remove(importedColors[cd]); } } } if (progressDialog) progressDialog->close(); return true; }
int main(int argc, char *argv[]) { // parse args bool ok = parseArgs(argDesc, &argc, argv); if (!ok || argc < 2 || argc > 3 || printHelp) { fprintf(stderr, "pdftoipe version %s\n", PDFTOIPE_VERSION); printUsage("pdftoipe", "<PDF-file> [<XML-file>]", argDesc); return 1; } GooString *fileName = new GooString(argv[1]); globalParams = new GlobalParams(); if (quiet) globalParams->setErrQuiet(quiet); GooString *ownerPW, *userPW; if (ownerPassword[0]) { ownerPW = new GooString(ownerPassword); } else { ownerPW = 0; } if (userPassword[0]) { userPW = new GooString(userPassword); } else { userPW = 0; } // open PDF file PDFDoc *doc = new PDFDoc(fileName, ownerPW, userPW); delete userPW; delete ownerPW; if (!doc->isOk()) return 1; // construct XML file name std::string xmlFileName; if (argc == 3) { xmlFileName = argv[2]; } else { const char *p = fileName->c_str() + fileName->getLength() - 4; if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { xmlFileName = std::string(fileName->c_str(), fileName->getLength() - 4); } else { xmlFileName = fileName->c_str(); } xmlFileName += ".ipe"; } // get page range if (firstPage < 1) firstPage = 1; if (lastPage < 1 || lastPage > doc->getNumPages()) lastPage = doc->getNumPages(); // write XML file XmlOutputDev *xmlOut = new XmlOutputDev(xmlFileName, doc->getXRef(), doc->getCatalog(), firstPage, lastPage); // tell output device about text handling xmlOut->setTextHandling(math, notext, literal, mergeLevel, unicodeLevel); int exitCode = 2; if (xmlOut->isOk()) { doc->displayPages(xmlOut, firstPage, lastPage, // double hDPI, double vDPI, int rotate, // bool useMediaBox, bool crop, bool printing, 72.0, 72.0, 0, false, false, false); exitCode = 0; } if (xmlOut->hasUnicode()) { fprintf(stderr, "The document contains Unicode (non-ASCII) text.\n"); if (unicodeLevel <= 1) fprintf(stderr, "Unknown Unicode characters were replaced by [U+XXX].\n"); else fprintf(stderr, "UTF-8 was set as document encoding in the preamble.\n"); } // clean up delete xmlOut; delete doc; delete globalParams; return exitCode; }
int main(int argc, char *argv[]) { PDFDoc *doc; GString *fileName; GString *ownerPW, *userPW; SplashColor paperColor; SplashOutputDev *splashOut; GBool ok; int exitCode; int pg; exitCode = 99; // parse args ok = parseArgs(argDesc, &argc, argv); if (!ok || argc != 2 || printVersion || printHelp) { fprintf(stderr, "pdf2jpeg version %s\n", xpdfVersion); fprintf(stderr, "%s\n", xpdfCopyright); if (!printVersion) { printUsage("pdf2jpeg", "<PDF-file> -o <jpegfile>", argDesc); } goto err0; } fileName = new GString(argv[1]); // read config file globalParams = new GlobalParams(cfgFileName); globalParams->setupBaseFonts(NULL); // open PDF file if (ownerPassword[0]) { ownerPW = new GString(ownerPassword); } else { ownerPW = NULL; } if (userPassword[0]) { userPW = new GString(userPassword); } else { userPW = NULL; } doc = new PDFDoc(fileName, ownerPW, userPW); if (userPW) { delete userPW; } if (ownerPW) { delete ownerPW; } if (!doc->isOk()) { exitCode = 1; goto err1; } paperColor[0] = paperColor[1] = paperColor[2] = 0xff; splashOut = new SplashOutputDev(splashModeRGB8, 1, gFalse, paperColor); splashOut->startDoc(doc->getXRef()); if(page>=1 && page<=doc->getNumPages()) { double r = resolution; if(width) { int old_width = doc->getPageCropWidth(page); r = 72.0*width/old_width; } doc->displayPage(splashOut, page, r, r, 0, gFalse, gTrue, gFalse); SplashBitmap*bitmap = splashOut->getBitmap(); if(bitmap) { Guchar*rgb = bitmap->getDataPtr(); int width = bitmap->getWidth(); int height = bitmap->getHeight(); jpeg_save(rgb, width, height, quality, output); } } delete splashOut; exitCode = 0; // clean up err1: delete doc; delete globalParams; err0: // check for memory leaks Object::memCheck(stderr); gMemReport(stderr); return exitCode; }
int extract_images_from_pdf(char* filename, char* target, char* owner_password, char* user_password, char* range, char* format, int jpg_quality, GBool dump_jpg, GBool tiff_jpg) { if (user_cancelled) return gpret_user_cancelled; // load config xpdf_rc xrc; // open file xpdf_doc xdoc(filename, owner_password, user_password); PDFDoc* doc = xdoc.get_doc(); if (!doc->isOk()) return doc->getErrorCode() == errEncrypted ? gpret_pdf_encrypted : gpret_cant_open_pdf; // check for copy permission // if (!doc->okToCopy()) // return gpret_dont_allow_copy; // get page range page_range range_list(range); if (*range == '\0') { range_list.add_item(range_item(1, doc->getNumPages())); } if (user_cancelled) return gpret_user_cancelled; // write image files fi_loader fi; int progress = 0; image_extractor img_out(target, dump_jpg, format, jpg_quality, tiff_jpg); for (int i = 0; i < range_list.item_count(); i++) { range_item& item = range_list.get_item(i); for (int pg = item.first; pg <= min(item.last, doc->getNumPages()); pg++) { if (user_cancelled) return gpret_user_cancelled; doc->displayPage(&img_out, pg, 72, 72, 0, gFalse, gTrue, gFalse); printf("progress: %d\n", ++progress * 100 / range_list.page_count()); } } printf("image count: %d\n", img_out.get_image_number()); return gpret_success; }
indri::parse::UnparsedDocument* indri::parse::PDFDocumentExtractor::nextDocument() { if( !_documentPath.length() ) return 0; PDFDoc* doc = 0; TextOutputDev* textOut = 0; GString* gfilename = new GString(_documentPath.c_str()); doc = new PDFDoc( gfilename ); // if the doc is not ok, or ok to copy, it // will be a document of length 0. if( doc->isOk() && doc->okToCopy() ) { void* stream = &_documentTextBuffer; textOut = new TextOutputDev( buffer_write, stream, gFalse, gFalse); if ( textOut->isOk() ) { int firstPage = 1; int lastPage = doc->getNumPages(); double hDPI=72.0; double vDPI=72.0; int rotate=0; GBool useMediaBox=gFalse; GBool crop=gTrue; GBool printing=gFalse; if(doc->readMetadata()!=NULL) { GString rawMetaData = doc->readMetadata(); GString preparedMetaData=""; //zoek <rdf:RDF en eindig bij </rdf:RDF>!! for(int x=0; x<rawMetaData.getLength(); x++) { if(rawMetaData.getChar(x)!='?' && rawMetaData.getChar(x)!=':') { //skip characters which the XMLReader doesn't understand preparedMetaData.append(rawMetaData.getChar(x)); } } std::string metaData(preparedMetaData.getCString()); int startbegin = metaData.find("<rdf"); int stopend = metaData.find(">", metaData.rfind("</rdf") ); metaData = metaData.substr(startbegin, (stopend-startbegin)+1 ); indri::xml::XMLReader reader; try { std::auto_ptr<indri::xml::XMLNode> result( reader.read( metaData.c_str() ) ); appendPdfMetaData( result.get() ); } catch( lemur::api::Exception& e ) { LEMUR_RETHROW( e, "Had trouble reading PDF metadata" ); } if( _author.length()>0 || _title.length()>0 ) { std::string createdPdfHeader; createdPdfHeader="<head>\n"; if(_title.length()>0) { createdPdfHeader+="<title>"; createdPdfHeader+=_title; createdPdfHeader+="</title>\n"; } if(_author.length()>0) { createdPdfHeader+="<author>"; createdPdfHeader+=_author; createdPdfHeader+="</author>\n"; } createdPdfHeader+="</head>\n"; char *metastream = _documentTextBuffer.write( createdPdfHeader.length()+1 ); strcpy(metastream, createdPdfHeader.c_str()); } } doc->displayPages(textOut, firstPage, lastPage, hDPI, vDPI, rotate, useMediaBox, crop, printing); } } delete textOut; delete doc; _unparsedDocument.textLength = _documentTextBuffer.position(); _unparsedDocument.contentLength = _unparsedDocument.textLength ? _documentTextBuffer.position() - 1 : 0 ; // no null 0 if text is empty. char* docnoPoint = _documentTextBuffer.write( _documentPath.length()+1 ); strcpy( docnoPoint, _documentPath.c_str() ); _unparsedDocument.text = _documentTextBuffer.front(); _unparsedDocument.content = _documentTextBuffer.front(); _unparsedDocument.metadata.clear(); indri::parse::MetadataPair pair; pair.key = "path"; pair.value = docnoPoint; pair.valueLength = _documentPath.length()+1; _unparsedDocument.metadata.push_back( pair ); _docnostring.assign(_documentPath.c_str() ); cleanDocno(); pair.value = _docnostring.c_str(); pair.valueLength = _docnostring.length()+1; pair.key = "docno"; _unparsedDocument.metadata.push_back( pair ); _documentPath = ""; return &_unparsedDocument; }