Пример #1
0
JNIEXPORT void JNICALL Java_com_foolabs_xpdf_PDFPage__1getText
	  (JNIEnv *env, jobject obj, jobject document, jobject javaCollector,
			  jboolean physicalLayout, jdouble fixedPitch, jboolean rawOrder) {
	Page *page = getHandle<Page>(env, obj);
	PDFDoc *doc = getHandle<PDFDoc>(env, document);

	TextCollector *collector = new TextCollector(env, javaCollector);

	GBool gPhysicalLayout = physicalLayout ? gTrue : gFalse;
	GBool gRawOrder = rawOrder ? gTrue : gFalse;

	TextOutputDev *outputDevice =
			new TextOutputDev(&TextCollector::CollectText, collector, gPhysicalLayout, gRawOrder);

	if (outputDevice->isOk()) {
		const double hDPI = 72;
		const double vDPI = 72;
		const int rotate = 0;
		const GBool useMediaBox = gFalse;
		const GBool crop = gTrue;
		const GBool printing = gFalse;
		Catalog *catalog = doc->getCatalog();

		page->display(outputDevice, hDPI, vDPI, rotate, useMediaBox, crop,
				printing, catalog);
	}

	delete collector; // All text should already be in Java
	delete outputDevice;
}
Пример #2
0
	// 0 <= pageNo <numPage
	CImage* getPage(int w, int h, int pageNo)
	{
		if(!cacheValid(w,h,pageNo))
		{
			// poppler uses 1 indexing for pageNo.

			double DPI_W=calcDPI_width(w, pageNo);
			double DPI_H=calcDPI_height(h, pageNo);
			double DPI=MIN(DPI_W, DPI_H);

			delete _textCache[pageNo]; _textCache[pageNo]=new intmatrixn();
			_textCacheState=pageNo;
			//_pdfDoc->getPageRotate(pageNo+1)
			_pdfDoc->displayPage(_outputDev, pageNo+1, DPI, DPI, 0, gFalse, gTrue, gFalse);
#ifdef USE_NULLOUTPUTDEV
			_nullOutputDev->setInfo(_pdfDoc->getPageRotate(pageNo+1),
									_pdfDoc->getCatalog()->getPage(pageNo+1)->getCropBox()->x1,
									_pdfDoc->getCatalog()->getPage(pageNo+1)->getCropBox()->y1,
									_pdfDoc->getPageCropWidth(pageNo+1),
									_pdfDoc->getPageCropHeight(pageNo+1),
									_outputDev->getBitmap()->getWidth(),
									_outputDev->getBitmap()->getHeight());
		
			_pdfDoc->displayPage(_nullOutputDev, pageNo+1, DPI, DPI, 0, gFalse, gTrue, gFalse);
#endif
			_textCacheState=-1;
				/*//_pdfDoc->displayPageSlice(_outputDev, pageNo+1, DPI, DPI, 0, gFalse, gTrue, gFalse, 20,20, 400,400);
				if(zoom2<zoom1)
				{
					if(h==_outputDev->getBitmap()->getHeight())
						bOkay=true;
					else
					{
						vDPI=72.0*h/_pdfDoc->getPageCropHeight(pageNo+1);
					}
				}
				else
				{
					if(w==_outputDev->getBitmap()->getWidth())
						bOkay=true;
					else
					{
						double tt=72*w/_pdfDoc->getPageCropHeight(pageNo+1);
					}
				}

			}
			while(!bOkay);*/

			SplashBitmap *temp=_outputDev->takeBitmap();

			CImage* ptr=new CImage();
			ptr->SetData(temp->getWidth(), temp->getHeight(), temp->getDataPtr(), temp->getRowSize());
			delete _bmpCache[pageNo]; _bmpCache[pageNo]=ptr;
			delete temp;
		}

		return _bmpCache[pageNo];
	}
Пример #3
0
int
pdf_get_rect(char* filename, int page_num, int pdf_box, realrect* box)
	/* return the box converted to TeX points */
{
	GooString*	name = new GooString(filename);
	PDFDoc*		doc = new PDFDoc(name);

	if (!doc) {
		delete name;
		return -1;
	}

	/* if the doc got created, it now owns name, so we mustn't delete it! */

	if (!doc->isOk()) {
		delete doc;
		return -1;
	}

	int			pages = doc->getNumPages();
	if (page_num > pages)
		page_num = pages;
	if (page_num < 0)
		page_num = pages + 1 + page_num;
	if (page_num < 1)
		page_num = 1;

	Page*		page = doc->getCatalog()->getPage(page_num);

	PDFRectangle*	r;
	switch (pdf_box) {
		default:
		case pdfbox_crop:
			r = page->getCropBox();
			break;
		case pdfbox_media:
			r = page->getMediaBox();
			break;
		case pdfbox_bleed:
			r = page->getBleedBox();
			break;
		case pdfbox_trim:
			r = page->getTrimBox();
			break;
		case pdfbox_art:
			r = page->getArtBox();
			break;
	}

	box->x  = 72.27 / 72 * my_fmin(r->x1, r->x2);
	box->y  = 72.27 / 72 * my_fmin(r->y1, r->y2);
	box->wd = 72.27 / 72 * fabs(r->x2 - r->x1);
	box->ht = 72.27 / 72 * fabs(r->y2 - r->y1);

	delete doc;

	return 0;
}
bool PdfPlug::convert(QString fn)
{
	bool firstPg = true;
	int currentLayer = m_Doc->activeLayer();
	int baseLayer = m_Doc->activeLayer();
	importedColors.clear();
	if(progressDialog)
	{
		progressDialog->setOverallProgress(2);
		progressDialog->setLabel("GI", tr("Generating Items"));
		qApp->processEvents();
	}
	QFile f(fn);
	oldDocItemCount = m_Doc->Items->count();
	if (progressDialog)
	{
		progressDialog->setBusyIndicator("GI");
		qApp->processEvents();
	}

	globalParams = new GlobalParams();
	if (globalParams)
	{
		GooString *fname = new GooString(QFile::encodeName(fn).data());
		globalParams->setErrQuiet(gTrue);
		GBool hasOcg = gFalse;
		QList<OptionalContentGroup*> ocgGroups;
//		globalParams->setPrintCommands(gTrue);
		PDFDoc *pdfDoc = new PDFDoc(fname, 0, 0, 0);
		if (pdfDoc)
		{
			if (pdfDoc->isOk())
			{
				double hDPI = 72.0;
				double vDPI = 72.0;
				int firstPage = 1;
				int lastPage = pdfDoc->getNumPages();
				SlaOutputDev *dev = new SlaOutputDev(m_Doc, &Elements, &importedColors, importerFlags);
				if (dev->isOk())
				{
					OCGs* ocg = pdfDoc->getOptContentConfig();
					if (ocg)
					{
						hasOcg = ocg->hasOCGs();
						if (hasOcg)
						{

							QStringList ocgNames;
							Array *order = ocg->getOrderArray();
							if (order)
							{
								for (int i = 0; i < order->getLength (); ++i)
								{
									Object orderItem;
									order->get(i, &orderItem);
									if (orderItem.isDict())
									{
										Object ref;
										order->getNF(i, &ref);
										if (ref.isRef())
										{
											OptionalContentGroup *oc = ocg->findOcgByRef(ref.getRef());
											QString ocgName = UnicodeParsedString(oc->getName());
											if (!ocgNames.contains(ocgName))
											{
												ocgGroups.prepend(oc);
												ocgNames.append(ocgName);
											}
										}
										ref.free();
									}
									else
									{
										GooList *ocgs;
										int i;
										ocgs = ocg->getOCGs ();
										for (i = 0; i < ocgs->getLength (); ++i)
										{
											OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i);
											QString ocgName = UnicodeParsedString(oc->getName());
											if (!ocgNames.contains(ocgName))
											{
												ocgGroups.prepend(oc);
												ocgNames.append(ocgName);
											}
										}
									}
								}
							}
							else
							{
								GooList *ocgs;
								int i;
								ocgs = ocg->getOCGs ();
								for (i = 0; i < ocgs->getLength (); ++i)
								{
									OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i);
									QString ocgName = UnicodeParsedString(oc->getName());
									if (!ocgNames.contains(ocgName))
									{
										ocgGroups.prepend(oc);
										ocgNames.append(ocgName);
									}
								}
							}
						}
					}
					GBool useMediaBox = gTrue;
					GBool crop = gFalse;
					GBool printing = gFalse;
					dev->startDoc(pdfDoc, pdfDoc->getXRef(), pdfDoc->getCatalog());
					int rotate = pdfDoc->getPageRotate(firstPage);
					if (importerFlags & LoadSavePlugin::lfCreateDoc)
					{
// POPPLER_VERSION appeared in 0.19.0 first
#ifdef POPPLER_VERSION
						if (hasOcg)
						{
							QString actL = m_Doc->activeLayerName();
							for (int a = 0; a < ocgGroups.count(); a++)
							{
								OptionalContentGroup *oc = ocgGroups[a];
								if (actL != UnicodeParsedString(oc->getName()))
									currentLayer = m_Doc->addLayer(UnicodeParsedString(oc->getName()), false);
								else
									currentLayer = m_Doc->layerIDFromName(UnicodeParsedString(oc->getName()));
// POPPLER_VERSION appeared in 0.19.0 first
#ifdef POPPLER_VERSION
								if ((oc->getViewState() == OptionalContentGroup::ocUsageOn) || (oc->getViewState() == OptionalContentGroup::ocUsageUnset))
									m_Doc->setLayerVisible(currentLayer, true);
								else
									m_Doc->setLayerVisible(currentLayer, false);
								if ((oc->getPrintState() == OptionalContentGroup::ocUsageOn) || (oc->getPrintState() == OptionalContentGroup::ocUsageUnset))
									m_Doc->setLayerPrintable(currentLayer, true);
								else
									m_Doc->setLayerPrintable(currentLayer, false);
#else
								if (oc->getState() == OptionalContentGroup::On)
								{
									m_Doc->setLayerVisible(currentLayer, true);
									m_Doc->setLayerPrintable(currentLayer, true);
								}
								else
								{
									m_Doc->setLayerVisible(currentLayer, false);
									m_Doc->setLayerPrintable(currentLayer, false);
								}
#endif
								oc->setState(OptionalContentGroup::Off);
							}
							dev->layersSetByOCG = true;
						}
#endif
						Object info;
						pdfDoc->getDocInfo(&info);
						if (info.isDict())
						{
							Object obj;
							GooString *s1;
							Dict *infoDict = info.getDict();
							if (infoDict->lookup((char*)"Title", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setTitle(UnicodeParsedString(obj.getString()));
								obj.free();
							}
							if (infoDict->lookup((char*)"Author", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setAuthor(UnicodeParsedString(obj.getString()));
								obj.free();
							}
							if (infoDict->lookup((char*)"Subject", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setSubject(UnicodeParsedString(obj.getString()));
								obj.free();
							}
							if (infoDict->lookup((char*)"Keywords", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setKeywords(UnicodeParsedString(obj.getString()));
								obj.free();
							}
						}
						info.free();
						for (int pp = 0; pp < lastPage; pp++)
						{
							m_Doc->setActiveLayer(baseLayer);
							if (firstPg)
								firstPg = false;
							else
								m_Doc->addPage(pp);
							m_Doc->currentPage()->setInitialHeight(pdfDoc->getPageMediaHeight(pp + 1));
							m_Doc->currentPage()->setInitialWidth(pdfDoc->getPageMediaWidth(pp + 1));
							m_Doc->currentPage()->setHeight(pdfDoc->getPageMediaHeight(pp + 1));
							m_Doc->currentPage()->setWidth(pdfDoc->getPageMediaWidth(pp + 1));
							m_Doc->currentPage()->MPageNam = CommonStrings::trMasterPageNormal;
							m_Doc->currentPage()->m_pageSize = "Custom";
							m_Doc->setPageSize("Custom");
							m_Doc->reformPages(true);
							if (hasOcg)
							{
								for (int a = 0; a < ocgGroups.count(); a++)
								{
									OptionalContentGroup *oc = ocgGroups[a];
								//	m_Doc->setActiveLayer(UnicodeParsedString(oc->getName()));
								//	currentLayer = m_Doc->activeLayer();
									oc->setState(OptionalContentGroup::On);
								//	pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing);
								//	oc->setState(OptionalContentGroup::Off);
								}
								pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing);
							}
							else
								pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing);
						}
					}
					else
					{
						if (hasOcg)
						{
							for (int a = 0; a < ocgGroups.count(); a++)
							{
								ocgGroups[a]->setState(OptionalContentGroup::On);
							}
						}
						pdfDoc->displayPage(dev, firstPage, hDPI, vDPI, rotate, useMediaBox, crop, printing);
					}
				}
				delete dev;
			}
		}
		delete pdfDoc;
	}
	delete globalParams;
	globalParams = 0;

//	qDebug() << "converting finished";
//	qDebug() << "Imported" << Elements.count() << "Elements";

	if (Elements.count() == 0)
	{
		if (importedColors.count() != 0)
		{
			for (int cd = 0; cd < importedColors.count(); cd++)
			{
				m_Doc->PageColors.remove(importedColors[cd]);
			}
		}
	}

	if (progressDialog)
		progressDialog->close();
	return true;
}
Пример #5
0
int main(int argc, char *argv[])
{
  // parse args
  bool ok = parseArgs(argDesc, &argc, argv);
  if (!ok || argc < 2 || argc > 3 || printHelp) {
    fprintf(stderr, "pdftoipe version %s\n", PDFTOIPE_VERSION);
    printUsage("pdftoipe", "<PDF-file> [<XML-file>]", argDesc);
    return 1;
  }

  GooString *fileName = new GooString(argv[1]);

  globalParams = new GlobalParams();
  if (quiet)
    globalParams->setErrQuiet(quiet);

  GooString *ownerPW, *userPW;
  if (ownerPassword[0]) {
    ownerPW = new GooString(ownerPassword);
  } else {
    ownerPW = 0;
  }
  if (userPassword[0]) {
    userPW = new GooString(userPassword);
  } else {
    userPW = 0;
  }

  // open PDF file
  PDFDoc *doc = new PDFDoc(fileName, ownerPW, userPW);
  delete userPW;
  delete ownerPW;

  if (!doc->isOk())
    return 1;
  
  // construct XML file name
  std::string xmlFileName;
  if (argc == 3) {
    xmlFileName = argv[2];
  } else {
    const char *p = fileName->c_str() + fileName->getLength() - 4;
    if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
        xmlFileName = std::string(fileName->c_str(),
                                  fileName->getLength() - 4);
    } else {
      xmlFileName = fileName->c_str();
    }
    xmlFileName += ".ipe";
  }

  // get page range
  if (firstPage < 1)
    firstPage = 1;

  if (lastPage < 1 || lastPage > doc->getNumPages())
    lastPage = doc->getNumPages();

  // write XML file
  XmlOutputDev *xmlOut = 
    new XmlOutputDev(xmlFileName, doc->getXRef(),
                     doc->getCatalog(), firstPage, lastPage);

  // tell output device about text handling
  xmlOut->setTextHandling(math, notext, literal, mergeLevel, unicodeLevel);
  
  int exitCode = 2;
  if (xmlOut->isOk()) {
    doc->displayPages(xmlOut, firstPage, lastPage, 
		      // double hDPI, double vDPI, int rotate,
		      // bool useMediaBox, bool crop, bool printing,
		      72.0, 72.0, 0, false, false, false);
    exitCode = 0;
  }

  if (xmlOut->hasUnicode()) {
    fprintf(stderr, "The document contains Unicode (non-ASCII) text.\n");
    if (unicodeLevel <= 1)
      fprintf(stderr, "Unknown Unicode characters were replaced by [U+XXX].\n");
    else
      fprintf(stderr, "UTF-8 was set as document encoding in the preamble.\n");
  }

  // clean up
  delete xmlOut;
  delete doc;
  delete globalParams;

  return exitCode;
}