JNIEXPORT void JNICALL Java_com_foolabs_xpdf_PDFPage__1getText (JNIEnv *env, jobject obj, jobject document, jobject javaCollector, jboolean physicalLayout, jdouble fixedPitch, jboolean rawOrder) { Page *page = getHandle<Page>(env, obj); PDFDoc *doc = getHandle<PDFDoc>(env, document); TextCollector *collector = new TextCollector(env, javaCollector); GBool gPhysicalLayout = physicalLayout ? gTrue : gFalse; GBool gRawOrder = rawOrder ? gTrue : gFalse; TextOutputDev *outputDevice = new TextOutputDev(&TextCollector::CollectText, collector, gPhysicalLayout, gRawOrder); if (outputDevice->isOk()) { const double hDPI = 72; const double vDPI = 72; const int rotate = 0; const GBool useMediaBox = gFalse; const GBool crop = gTrue; const GBool printing = gFalse; Catalog *catalog = doc->getCatalog(); page->display(outputDevice, hDPI, vDPI, rotate, useMediaBox, crop, printing, catalog); } delete collector; // All text should already be in Java delete outputDevice; }
// 0 <= pageNo <numPage CImage* getPage(int w, int h, int pageNo) { if(!cacheValid(w,h,pageNo)) { // poppler uses 1 indexing for pageNo. double DPI_W=calcDPI_width(w, pageNo); double DPI_H=calcDPI_height(h, pageNo); double DPI=MIN(DPI_W, DPI_H); delete _textCache[pageNo]; _textCache[pageNo]=new intmatrixn(); _textCacheState=pageNo; //_pdfDoc->getPageRotate(pageNo+1) _pdfDoc->displayPage(_outputDev, pageNo+1, DPI, DPI, 0, gFalse, gTrue, gFalse); #ifdef USE_NULLOUTPUTDEV _nullOutputDev->setInfo(_pdfDoc->getPageRotate(pageNo+1), _pdfDoc->getCatalog()->getPage(pageNo+1)->getCropBox()->x1, _pdfDoc->getCatalog()->getPage(pageNo+1)->getCropBox()->y1, _pdfDoc->getPageCropWidth(pageNo+1), _pdfDoc->getPageCropHeight(pageNo+1), _outputDev->getBitmap()->getWidth(), _outputDev->getBitmap()->getHeight()); _pdfDoc->displayPage(_nullOutputDev, pageNo+1, DPI, DPI, 0, gFalse, gTrue, gFalse); #endif _textCacheState=-1; /*//_pdfDoc->displayPageSlice(_outputDev, pageNo+1, DPI, DPI, 0, gFalse, gTrue, gFalse, 20,20, 400,400); if(zoom2<zoom1) { if(h==_outputDev->getBitmap()->getHeight()) bOkay=true; else { vDPI=72.0*h/_pdfDoc->getPageCropHeight(pageNo+1); } } else { if(w==_outputDev->getBitmap()->getWidth()) bOkay=true; else { double tt=72*w/_pdfDoc->getPageCropHeight(pageNo+1); } } } while(!bOkay);*/ SplashBitmap *temp=_outputDev->takeBitmap(); CImage* ptr=new CImage(); ptr->SetData(temp->getWidth(), temp->getHeight(), temp->getDataPtr(), temp->getRowSize()); delete _bmpCache[pageNo]; _bmpCache[pageNo]=ptr; delete temp; } return _bmpCache[pageNo]; }
int pdf_get_rect(char* filename, int page_num, int pdf_box, realrect* box) /* return the box converted to TeX points */ { GooString* name = new GooString(filename); PDFDoc* doc = new PDFDoc(name); if (!doc) { delete name; return -1; } /* if the doc got created, it now owns name, so we mustn't delete it! */ if (!doc->isOk()) { delete doc; return -1; } int pages = doc->getNumPages(); if (page_num > pages) page_num = pages; if (page_num < 0) page_num = pages + 1 + page_num; if (page_num < 1) page_num = 1; Page* page = doc->getCatalog()->getPage(page_num); PDFRectangle* r; switch (pdf_box) { default: case pdfbox_crop: r = page->getCropBox(); break; case pdfbox_media: r = page->getMediaBox(); break; case pdfbox_bleed: r = page->getBleedBox(); break; case pdfbox_trim: r = page->getTrimBox(); break; case pdfbox_art: r = page->getArtBox(); break; } box->x = 72.27 / 72 * my_fmin(r->x1, r->x2); box->y = 72.27 / 72 * my_fmin(r->y1, r->y2); box->wd = 72.27 / 72 * fabs(r->x2 - r->x1); box->ht = 72.27 / 72 * fabs(r->y2 - r->y1); delete doc; return 0; }
bool PdfPlug::convert(QString fn) { bool firstPg = true; int currentLayer = m_Doc->activeLayer(); int baseLayer = m_Doc->activeLayer(); importedColors.clear(); if(progressDialog) { progressDialog->setOverallProgress(2); progressDialog->setLabel("GI", tr("Generating Items")); qApp->processEvents(); } QFile f(fn); oldDocItemCount = m_Doc->Items->count(); if (progressDialog) { progressDialog->setBusyIndicator("GI"); qApp->processEvents(); } globalParams = new GlobalParams(); if (globalParams) { GooString *fname = new GooString(QFile::encodeName(fn).data()); globalParams->setErrQuiet(gTrue); GBool hasOcg = gFalse; QList<OptionalContentGroup*> ocgGroups; // globalParams->setPrintCommands(gTrue); PDFDoc *pdfDoc = new PDFDoc(fname, 0, 0, 0); if (pdfDoc) { if (pdfDoc->isOk()) { double hDPI = 72.0; double vDPI = 72.0; int firstPage = 1; int lastPage = pdfDoc->getNumPages(); SlaOutputDev *dev = new SlaOutputDev(m_Doc, &Elements, &importedColors, importerFlags); if (dev->isOk()) { OCGs* ocg = pdfDoc->getOptContentConfig(); if (ocg) { hasOcg = ocg->hasOCGs(); if (hasOcg) { QStringList ocgNames; Array *order = ocg->getOrderArray(); if (order) { for (int i = 0; i < order->getLength (); ++i) { Object orderItem; order->get(i, &orderItem); if (orderItem.isDict()) { Object ref; order->getNF(i, &ref); if (ref.isRef()) { OptionalContentGroup *oc = ocg->findOcgByRef(ref.getRef()); QString ocgName = UnicodeParsedString(oc->getName()); if (!ocgNames.contains(ocgName)) { ocgGroups.prepend(oc); ocgNames.append(ocgName); } } ref.free(); } else { GooList *ocgs; int i; ocgs = ocg->getOCGs (); for (i = 0; i < ocgs->getLength (); ++i) { OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i); QString ocgName = UnicodeParsedString(oc->getName()); if (!ocgNames.contains(ocgName)) { ocgGroups.prepend(oc); ocgNames.append(ocgName); } } } } } else { GooList *ocgs; int i; ocgs = ocg->getOCGs (); for (i = 0; i < ocgs->getLength (); ++i) { OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i); QString ocgName = UnicodeParsedString(oc->getName()); if (!ocgNames.contains(ocgName)) { ocgGroups.prepend(oc); ocgNames.append(ocgName); } } } } } GBool useMediaBox = gTrue; GBool crop = gFalse; GBool printing = gFalse; dev->startDoc(pdfDoc, pdfDoc->getXRef(), pdfDoc->getCatalog()); int rotate = pdfDoc->getPageRotate(firstPage); if (importerFlags & LoadSavePlugin::lfCreateDoc) { // POPPLER_VERSION appeared in 0.19.0 first #ifdef POPPLER_VERSION if (hasOcg) { QString actL = m_Doc->activeLayerName(); for (int a = 0; a < ocgGroups.count(); a++) { OptionalContentGroup *oc = ocgGroups[a]; if (actL != UnicodeParsedString(oc->getName())) currentLayer = m_Doc->addLayer(UnicodeParsedString(oc->getName()), false); else currentLayer = m_Doc->layerIDFromName(UnicodeParsedString(oc->getName())); // POPPLER_VERSION appeared in 0.19.0 first #ifdef POPPLER_VERSION if ((oc->getViewState() == OptionalContentGroup::ocUsageOn) || (oc->getViewState() == OptionalContentGroup::ocUsageUnset)) m_Doc->setLayerVisible(currentLayer, true); else m_Doc->setLayerVisible(currentLayer, false); if ((oc->getPrintState() == OptionalContentGroup::ocUsageOn) || (oc->getPrintState() == OptionalContentGroup::ocUsageUnset)) m_Doc->setLayerPrintable(currentLayer, true); else m_Doc->setLayerPrintable(currentLayer, false); #else if (oc->getState() == OptionalContentGroup::On) { m_Doc->setLayerVisible(currentLayer, true); m_Doc->setLayerPrintable(currentLayer, true); } else { m_Doc->setLayerVisible(currentLayer, false); m_Doc->setLayerPrintable(currentLayer, false); } #endif oc->setState(OptionalContentGroup::Off); } dev->layersSetByOCG = true; } #endif Object info; pdfDoc->getDocInfo(&info); if (info.isDict()) { Object obj; GooString *s1; Dict *infoDict = info.getDict(); if (infoDict->lookup((char*)"Title", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setTitle(UnicodeParsedString(obj.getString())); obj.free(); } if (infoDict->lookup((char*)"Author", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setAuthor(UnicodeParsedString(obj.getString())); obj.free(); } if (infoDict->lookup((char*)"Subject", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setSubject(UnicodeParsedString(obj.getString())); obj.free(); } if (infoDict->lookup((char*)"Keywords", &obj )->isString()) { s1 = obj.getString(); m_Doc->documentInfo().setKeywords(UnicodeParsedString(obj.getString())); obj.free(); } } info.free(); for (int pp = 0; pp < lastPage; pp++) { m_Doc->setActiveLayer(baseLayer); if (firstPg) firstPg = false; else m_Doc->addPage(pp); m_Doc->currentPage()->setInitialHeight(pdfDoc->getPageMediaHeight(pp + 1)); m_Doc->currentPage()->setInitialWidth(pdfDoc->getPageMediaWidth(pp + 1)); m_Doc->currentPage()->setHeight(pdfDoc->getPageMediaHeight(pp + 1)); m_Doc->currentPage()->setWidth(pdfDoc->getPageMediaWidth(pp + 1)); m_Doc->currentPage()->MPageNam = CommonStrings::trMasterPageNormal; m_Doc->currentPage()->m_pageSize = "Custom"; m_Doc->setPageSize("Custom"); m_Doc->reformPages(true); if (hasOcg) { for (int a = 0; a < ocgGroups.count(); a++) { OptionalContentGroup *oc = ocgGroups[a]; // m_Doc->setActiveLayer(UnicodeParsedString(oc->getName())); // currentLayer = m_Doc->activeLayer(); oc->setState(OptionalContentGroup::On); // pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing); // oc->setState(OptionalContentGroup::Off); } pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing); } else pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing); } } else { if (hasOcg) { for (int a = 0; a < ocgGroups.count(); a++) { ocgGroups[a]->setState(OptionalContentGroup::On); } } pdfDoc->displayPage(dev, firstPage, hDPI, vDPI, rotate, useMediaBox, crop, printing); } } delete dev; } } delete pdfDoc; } delete globalParams; globalParams = 0; // qDebug() << "converting finished"; // qDebug() << "Imported" << Elements.count() << "Elements"; if (Elements.count() == 0) { if (importedColors.count() != 0) { for (int cd = 0; cd < importedColors.count(); cd++) { m_Doc->PageColors.remove(importedColors[cd]); } } } if (progressDialog) progressDialog->close(); return true; }
int main(int argc, char *argv[]) { // parse args bool ok = parseArgs(argDesc, &argc, argv); if (!ok || argc < 2 || argc > 3 || printHelp) { fprintf(stderr, "pdftoipe version %s\n", PDFTOIPE_VERSION); printUsage("pdftoipe", "<PDF-file> [<XML-file>]", argDesc); return 1; } GooString *fileName = new GooString(argv[1]); globalParams = new GlobalParams(); if (quiet) globalParams->setErrQuiet(quiet); GooString *ownerPW, *userPW; if (ownerPassword[0]) { ownerPW = new GooString(ownerPassword); } else { ownerPW = 0; } if (userPassword[0]) { userPW = new GooString(userPassword); } else { userPW = 0; } // open PDF file PDFDoc *doc = new PDFDoc(fileName, ownerPW, userPW); delete userPW; delete ownerPW; if (!doc->isOk()) return 1; // construct XML file name std::string xmlFileName; if (argc == 3) { xmlFileName = argv[2]; } else { const char *p = fileName->c_str() + fileName->getLength() - 4; if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) { xmlFileName = std::string(fileName->c_str(), fileName->getLength() - 4); } else { xmlFileName = fileName->c_str(); } xmlFileName += ".ipe"; } // get page range if (firstPage < 1) firstPage = 1; if (lastPage < 1 || lastPage > doc->getNumPages()) lastPage = doc->getNumPages(); // write XML file XmlOutputDev *xmlOut = new XmlOutputDev(xmlFileName, doc->getXRef(), doc->getCatalog(), firstPage, lastPage); // tell output device about text handling xmlOut->setTextHandling(math, notext, literal, mergeLevel, unicodeLevel); int exitCode = 2; if (xmlOut->isOk()) { doc->displayPages(xmlOut, firstPage, lastPage, // double hDPI, double vDPI, int rotate, // bool useMediaBox, bool crop, bool printing, 72.0, 72.0, 0, false, false, false); exitCode = 0; } if (xmlOut->hasUnicode()) { fprintf(stderr, "The document contains Unicode (non-ASCII) text.\n"); if (unicodeLevel <= 1) fprintf(stderr, "Unknown Unicode characters were replaced by [U+XXX].\n"); else fprintf(stderr, "UTF-8 was set as document encoding in the preamble.\n"); } // clean up delete xmlOut; delete doc; delete globalParams; return exitCode; }