Пример #1
0
int
pdf_get_rect(char* filename, int page_num, int pdf_box, realrect* box)
	/* return the box converted to TeX points */
{
	GooString*	name = new GooString(filename);
	PDFDoc*		doc = new PDFDoc(name);

	if (!doc) {
		delete name;
		return -1;
	}

	/* if the doc got created, it now owns name, so we mustn't delete it! */

	if (!doc->isOk()) {
		delete doc;
		return -1;
	}

	int			pages = doc->getNumPages();
	if (page_num > pages)
		page_num = pages;
	if (page_num < 0)
		page_num = pages + 1 + page_num;
	if (page_num < 1)
		page_num = 1;

	Page*		page = doc->getCatalog()->getPage(page_num);

	PDFRectangle*	r;
	switch (pdf_box) {
		default:
		case pdfbox_crop:
			r = page->getCropBox();
			break;
		case pdfbox_media:
			r = page->getMediaBox();
			break;
		case pdfbox_bleed:
			r = page->getBleedBox();
			break;
		case pdfbox_trim:
			r = page->getTrimBox();
			break;
		case pdfbox_art:
			r = page->getArtBox();
			break;
	}

	box->x  = 72.27 / 72 * my_fmin(r->x1, r->x2);
	box->y  = 72.27 / 72 * my_fmin(r->y1, r->y2);
	box->wd = 72.27 / 72 * fabs(r->x2 - r->x1);
	box->ht = 72.27 / 72 * fabs(r->y2 - r->y1);

	delete doc;

	return 0;
}
Пример #2
0
KoFilter::ConversionStatus PdfImport::convert(const QByteArray& from, const QByteArray& to)
{
    debugPdf << "to:" << to << " from:" << from;

    if (from != "application/pdf" || to != "image/svg+xml") {
        return KoFilter::NotImplemented;
    }

    // read config file
    globalParams = new GlobalParams();
    if (! globalParams)
        return KoFilter::NotImplemented;

    GooString * fname = new GooString(QFile::encodeName(m_chain->inputFile()).data());
    PDFDoc * pdfDoc = new PDFDoc(fname, 0, 0, 0);
    if (! pdfDoc) {
        delete globalParams;
        return KoFilter::StupidError;
    }

    if (! pdfDoc->isOk()) {
        delete globalParams;
        delete pdfDoc;
        return KoFilter::StupidError;
    }

    double hDPI = 72.0;
    double vDPI = 72.0;

    int firstPage = 1;
    int lastPage = pdfDoc->getNumPages();

    debugPdf << "converting pages" << firstPage << "-" << lastPage;

    SvgOutputDev * dev = new SvgOutputDev(m_chain->outputFile());
    if (dev->isOk()) {
        int rotate = 0;
        GBool useMediaBox = gTrue;
        GBool crop = gFalse;
        GBool printing = gFalse;
        pdfDoc->displayPages(dev, firstPage, lastPage, hDPI, vDPI, rotate, useMediaBox, crop, printing);
        dev->dumpContent();
    }

    debugPdf << "wrote file to" << m_chain->outputFile();

    delete dev;
    delete pdfDoc;
    delete globalParams;
    globalParams = 0;

    // check for memory leaks
    Object::memCheck(stderr);

    return KoFilter::OK;
}
Пример #3
0
	bool load(const char* fileName)
	{
		delete _pdfDoc;
		delete _outputDev;
		delete _nullOutputDev;

		for (int i=0; i<_bmpCache.size(); i++)	delete _bmpCache[i];
		_bmpCache.resize(0);
		for (int i=0; i<_textCache.size(); i++)	delete _textCache[i];
		_textCache.resize(0);

		_pdfDoc= new PDFDoc(new GooString(fileName), NULL, NULL, NULL);
		if (!_pdfDoc->isOk()) {
			printf("error loading pdf");
			return false;
		}

		GBool bitmapTopDown = gTrue;

		SplashColor white;
		white[0]=0xff;
		white[1]=0xff;
		white[2]=0xff;

		// _outputDev = new SplashOutputDev_mod(splashModeRGB8, 4, gFalse, white, bitmapTopDown);
		_outputDev = new SplashOutputDev(splashModeRGB8, 4, gFalse, white, bitmapTopDown);
		if(!_outputDev)
		{
			printf("error loading pdf");
			return false;
		}
#ifdef USE_NULLOUTPUTDEV
		_nullOutputDev=new NullOutputDev();
#endif
#ifdef POPPLER_VERSION
		_outputDev->startDoc(_pdfDoc);
#else
		_outputDev->startDoc(_pdfDoc->getXRef());
#endif

		_bmpCache.resize(_pdfDoc->getNumPages());
		for (int i=0; i<_bmpCache.size(); i++)	_bmpCache[i]=NULL;
		_textCache.resize(_pdfDoc->getNumPages());
		for (int i=0; i<_textCache.size(); i++)	_textCache[i]=NULL;

		return true;
	}
unsigned char
epdf_document_unlock (Epdf_Document *document, const char *password)
{
    if (!document)
        return 0;

    if (document->locked) {
        /* racier then it needs to be */
        GooString *pwd = new GooString(password);
        PDFDoc *pdfdoc = new PDFDoc(document->pdfdoc->getFileName(), pwd);
        delete pwd;
        if (pdfdoc->isOk()) {

            document->pdfdoc = pdfdoc;
            document->locked = false;
        }
    }

    return document->locked;
}
Пример #5
0
gboolean
import_pdf(const gchar *filename, DiagramData *dia, DiaContext *ctx, void* user_data)
{
  PDFDoc *doc;
  GooString *fileName = new GooString(filename);
  // no passwords yet
  GooString *ownerPW = NULL;
  GooString *userPW = NULL;
  gboolean ret = FALSE;

  // without this we will get strange crashes (at least with /O2 build)
  globalParams = new GlobalParams();

  doc = PDFDocFactory().createPDFDoc(*fileName, ownerPW, userPW);
  if (!doc->isOk()) {
    dia_context_add_message (ctx, _("PDF document not OK.\n%s"),
			     dia_context_get_filename (ctx));
  } else {
    DiaOutputDev *diaOut = new DiaOutputDev(dia, doc->getNumPages());

    for (int pg = 1; pg <= doc->getNumPages(); ++pg) {
      Page *page = doc->getPage (pg);
      if (!page || !page->isOk())
        continue;
      doc->displayPage(diaOut, pg,
		       72.0, 72.0, /* DPI, scaling elsewhere */
		       0, /* rotate */
		       gTrue, /* useMediaBox */
		       gTrue, /* Crop */
		       gFalse /* printing */
		       );
    }
    delete diaOut;
    ret = TRUE;
  }
  delete doc;
  delete globalParams;
  delete fileName;

  return ret;
}
Пример #6
0
int
pdf_count_pages(char* filename)
{
	int			pages = 0;
	GooString*	name = new GooString(filename);
	PDFDoc*		doc = new PDFDoc(name);

	if (!doc) {
		delete name;
		return 0;
	}

	/* if the doc got created, it now owns name, so we mustn't delete it! */

	if (doc->isOk())
		pages = doc->getNumPages();

	delete doc;

	return pages;
}
Пример #7
0
bool XojPopplerDocument::load(const char * filename, const char * password, GError ** error) {
	XOJ_CHECK_TYPE(XojPopplerDocument);

	PDFDoc * newDoc;
	GooString * filename_g;
	GooString * password_g;

	if (!globalParams) {
		globalParams = new GlobalParams();
	}

	if (!filename) {
		return false;
	}

	password_g = NULL;
	if (password != NULL) {
		if (g_utf8_validate(password, -1, NULL)) {
			gchar *password_latin;

			password_latin = g_convert(password, -1, "ISO-8859-1", "UTF-8", NULL, NULL, NULL);
			password_g = new GooString(password_latin);
			g_free(password_latin);
		} else {
			password_g = new GooString(password);
		}
	}

#ifdef G_OS_WIN32
	wchar_t *filenameW;
	int wlen;

	wlen = MultiByteToWideChar(CP_UTF8, 0, filename, -1, NULL, 0);

	filenameW = new WCHAR[wlen];
	if (!filenameW)
	return NULL;

	wlen = MultiByteToWideChar(CP_UTF8, 0, filename, -1, filenameW, wlen);

	newDoc = new PDFDoc(filenameW, wlen, password_g, password_g);
	delete filenameW;
#else
	filename_g = new GooString(filename);
	newDoc = new PDFDoc(filename_g, password_g, password_g);
#endif
	delete password_g;

	if (!newDoc->isOk()) {
		int fopen_errno;
		switch (newDoc->getErrorCode()) {
		case errOpenFile:
			// If there was an error opening the file, count it as a G_FILE_ERROR
			// and set the GError parameters accordingly. (this assumes that the
			// only way to get an errOpenFile error is if newDoc was created using
			// a filename and thus fopen was called, which right now is true.
			fopen_errno = newDoc->getFopenErrno();
			g_set_error(error, G_FILE_ERROR, g_file_error_from_errno(fopen_errno), "%s", g_strerror(fopen_errno));
			break;
		case errBadCatalog:
			g_set_error(error, 0, 0, "Failed to read the document catalog");
			break;
		case errDamaged:
			g_set_error(error, 0, 0, "PDF document is damaged");
			break;
		case errEncrypted:
			g_set_error(error, 0, 0, "Document is encrypted");
			break;
		default:
			g_set_error(error, 0, 0, "Failed to load document");
		}

		delete newDoc;
		return false;
	}

	if (this->data) {
		this->data->unreference();
	}
	this->data = new _IntPopplerDocument(newDoc);

	return true;
}
bool PdfPlug::convert(QString fn)
{
	bool firstPg = true;
	int currentLayer = m_Doc->activeLayer();
	int baseLayer = m_Doc->activeLayer();
	importedColors.clear();
	if(progressDialog)
	{
		progressDialog->setOverallProgress(2);
		progressDialog->setLabel("GI", tr("Generating Items"));
		qApp->processEvents();
	}
	QFile f(fn);
	oldDocItemCount = m_Doc->Items->count();
	if (progressDialog)
	{
		progressDialog->setBusyIndicator("GI");
		qApp->processEvents();
	}

	globalParams = new GlobalParams();
	if (globalParams)
	{
		GooString *fname = new GooString(QFile::encodeName(fn).data());
		globalParams->setErrQuiet(gTrue);
		GBool hasOcg = gFalse;
		QList<OptionalContentGroup*> ocgGroups;
//		globalParams->setPrintCommands(gTrue);
		PDFDoc *pdfDoc = new PDFDoc(fname, 0, 0, 0);
		if (pdfDoc)
		{
			if (pdfDoc->isOk())
			{
				double hDPI = 72.0;
				double vDPI = 72.0;
				int firstPage = 1;
				int lastPage = pdfDoc->getNumPages();
				SlaOutputDev *dev = new SlaOutputDev(m_Doc, &Elements, &importedColors, importerFlags);
				if (dev->isOk())
				{
					OCGs* ocg = pdfDoc->getOptContentConfig();
					if (ocg)
					{
						hasOcg = ocg->hasOCGs();
						if (hasOcg)
						{

							QStringList ocgNames;
							Array *order = ocg->getOrderArray();
							if (order)
							{
								for (int i = 0; i < order->getLength (); ++i)
								{
									Object orderItem;
									order->get(i, &orderItem);
									if (orderItem.isDict())
									{
										Object ref;
										order->getNF(i, &ref);
										if (ref.isRef())
										{
											OptionalContentGroup *oc = ocg->findOcgByRef(ref.getRef());
											QString ocgName = UnicodeParsedString(oc->getName());
											if (!ocgNames.contains(ocgName))
											{
												ocgGroups.prepend(oc);
												ocgNames.append(ocgName);
											}
										}
										ref.free();
									}
									else
									{
										GooList *ocgs;
										int i;
										ocgs = ocg->getOCGs ();
										for (i = 0; i < ocgs->getLength (); ++i)
										{
											OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i);
											QString ocgName = UnicodeParsedString(oc->getName());
											if (!ocgNames.contains(ocgName))
											{
												ocgGroups.prepend(oc);
												ocgNames.append(ocgName);
											}
										}
									}
								}
							}
							else
							{
								GooList *ocgs;
								int i;
								ocgs = ocg->getOCGs ();
								for (i = 0; i < ocgs->getLength (); ++i)
								{
									OptionalContentGroup *oc = (OptionalContentGroup *)ocgs->get(i);
									QString ocgName = UnicodeParsedString(oc->getName());
									if (!ocgNames.contains(ocgName))
									{
										ocgGroups.prepend(oc);
										ocgNames.append(ocgName);
									}
								}
							}
						}
					}
					GBool useMediaBox = gTrue;
					GBool crop = gFalse;
					GBool printing = gFalse;
					dev->startDoc(pdfDoc, pdfDoc->getXRef(), pdfDoc->getCatalog());
					int rotate = pdfDoc->getPageRotate(firstPage);
					if (importerFlags & LoadSavePlugin::lfCreateDoc)
					{
// POPPLER_VERSION appeared in 0.19.0 first
#ifdef POPPLER_VERSION
						if (hasOcg)
						{
							QString actL = m_Doc->activeLayerName();
							for (int a = 0; a < ocgGroups.count(); a++)
							{
								OptionalContentGroup *oc = ocgGroups[a];
								if (actL != UnicodeParsedString(oc->getName()))
									currentLayer = m_Doc->addLayer(UnicodeParsedString(oc->getName()), false);
								else
									currentLayer = m_Doc->layerIDFromName(UnicodeParsedString(oc->getName()));
// POPPLER_VERSION appeared in 0.19.0 first
#ifdef POPPLER_VERSION
								if ((oc->getViewState() == OptionalContentGroup::ocUsageOn) || (oc->getViewState() == OptionalContentGroup::ocUsageUnset))
									m_Doc->setLayerVisible(currentLayer, true);
								else
									m_Doc->setLayerVisible(currentLayer, false);
								if ((oc->getPrintState() == OptionalContentGroup::ocUsageOn) || (oc->getPrintState() == OptionalContentGroup::ocUsageUnset))
									m_Doc->setLayerPrintable(currentLayer, true);
								else
									m_Doc->setLayerPrintable(currentLayer, false);
#else
								if (oc->getState() == OptionalContentGroup::On)
								{
									m_Doc->setLayerVisible(currentLayer, true);
									m_Doc->setLayerPrintable(currentLayer, true);
								}
								else
								{
									m_Doc->setLayerVisible(currentLayer, false);
									m_Doc->setLayerPrintable(currentLayer, false);
								}
#endif
								oc->setState(OptionalContentGroup::Off);
							}
							dev->layersSetByOCG = true;
						}
#endif
						Object info;
						pdfDoc->getDocInfo(&info);
						if (info.isDict())
						{
							Object obj;
							GooString *s1;
							Dict *infoDict = info.getDict();
							if (infoDict->lookup((char*)"Title", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setTitle(UnicodeParsedString(obj.getString()));
								obj.free();
							}
							if (infoDict->lookup((char*)"Author", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setAuthor(UnicodeParsedString(obj.getString()));
								obj.free();
							}
							if (infoDict->lookup((char*)"Subject", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setSubject(UnicodeParsedString(obj.getString()));
								obj.free();
							}
							if (infoDict->lookup((char*)"Keywords", &obj )->isString())
							{
								s1 = obj.getString();
								m_Doc->documentInfo().setKeywords(UnicodeParsedString(obj.getString()));
								obj.free();
							}
						}
						info.free();
						for (int pp = 0; pp < lastPage; pp++)
						{
							m_Doc->setActiveLayer(baseLayer);
							if (firstPg)
								firstPg = false;
							else
								m_Doc->addPage(pp);
							m_Doc->currentPage()->setInitialHeight(pdfDoc->getPageMediaHeight(pp + 1));
							m_Doc->currentPage()->setInitialWidth(pdfDoc->getPageMediaWidth(pp + 1));
							m_Doc->currentPage()->setHeight(pdfDoc->getPageMediaHeight(pp + 1));
							m_Doc->currentPage()->setWidth(pdfDoc->getPageMediaWidth(pp + 1));
							m_Doc->currentPage()->MPageNam = CommonStrings::trMasterPageNormal;
							m_Doc->currentPage()->m_pageSize = "Custom";
							m_Doc->setPageSize("Custom");
							m_Doc->reformPages(true);
							if (hasOcg)
							{
								for (int a = 0; a < ocgGroups.count(); a++)
								{
									OptionalContentGroup *oc = ocgGroups[a];
								//	m_Doc->setActiveLayer(UnicodeParsedString(oc->getName()));
								//	currentLayer = m_Doc->activeLayer();
									oc->setState(OptionalContentGroup::On);
								//	pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing);
								//	oc->setState(OptionalContentGroup::Off);
								}
								pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing);
							}
							else
								pdfDoc->displayPage(dev, pp + 1, hDPI, vDPI, rotate, useMediaBox, crop, printing);
						}
					}
					else
					{
						if (hasOcg)
						{
							for (int a = 0; a < ocgGroups.count(); a++)
							{
								ocgGroups[a]->setState(OptionalContentGroup::On);
							}
						}
						pdfDoc->displayPage(dev, firstPage, hDPI, vDPI, rotate, useMediaBox, crop, printing);
					}
				}
				delete dev;
			}
		}
		delete pdfDoc;
	}
	delete globalParams;
	globalParams = 0;

//	qDebug() << "converting finished";
//	qDebug() << "Imported" << Elements.count() << "Elements";

	if (Elements.count() == 0)
	{
		if (importedColors.count() != 0)
		{
			for (int cd = 0; cd < importedColors.count(); cd++)
			{
				m_Doc->PageColors.remove(importedColors[cd]);
			}
		}
	}

	if (progressDialog)
		progressDialog->close();
	return true;
}
Пример #9
0
int main(int argc, char *argv[])
{
  // parse args
  bool ok = parseArgs(argDesc, &argc, argv);
  if (!ok || argc < 2 || argc > 3 || printHelp) {
    fprintf(stderr, "pdftoipe version %s\n", PDFTOIPE_VERSION);
    printUsage("pdftoipe", "<PDF-file> [<XML-file>]", argDesc);
    return 1;
  }

  GooString *fileName = new GooString(argv[1]);

  globalParams = new GlobalParams();
  if (quiet)
    globalParams->setErrQuiet(quiet);

  GooString *ownerPW, *userPW;
  if (ownerPassword[0]) {
    ownerPW = new GooString(ownerPassword);
  } else {
    ownerPW = 0;
  }
  if (userPassword[0]) {
    userPW = new GooString(userPassword);
  } else {
    userPW = 0;
  }

  // open PDF file
  PDFDoc *doc = new PDFDoc(fileName, ownerPW, userPW);
  delete userPW;
  delete ownerPW;

  if (!doc->isOk())
    return 1;
  
  // construct XML file name
  std::string xmlFileName;
  if (argc == 3) {
    xmlFileName = argv[2];
  } else {
    const char *p = fileName->c_str() + fileName->getLength() - 4;
    if (!strcmp(p, ".pdf") || !strcmp(p, ".PDF")) {
        xmlFileName = std::string(fileName->c_str(),
                                  fileName->getLength() - 4);
    } else {
      xmlFileName = fileName->c_str();
    }
    xmlFileName += ".ipe";
  }

  // get page range
  if (firstPage < 1)
    firstPage = 1;

  if (lastPage < 1 || lastPage > doc->getNumPages())
    lastPage = doc->getNumPages();

  // write XML file
  XmlOutputDev *xmlOut = 
    new XmlOutputDev(xmlFileName, doc->getXRef(),
                     doc->getCatalog(), firstPage, lastPage);

  // tell output device about text handling
  xmlOut->setTextHandling(math, notext, literal, mergeLevel, unicodeLevel);
  
  int exitCode = 2;
  if (xmlOut->isOk()) {
    doc->displayPages(xmlOut, firstPage, lastPage, 
		      // double hDPI, double vDPI, int rotate,
		      // bool useMediaBox, bool crop, bool printing,
		      72.0, 72.0, 0, false, false, false);
    exitCode = 0;
  }

  if (xmlOut->hasUnicode()) {
    fprintf(stderr, "The document contains Unicode (non-ASCII) text.\n");
    if (unicodeLevel <= 1)
      fprintf(stderr, "Unknown Unicode characters were replaced by [U+XXX].\n");
    else
      fprintf(stderr, "UTF-8 was set as document encoding in the preamble.\n");
  }

  // clean up
  delete xmlOut;
  delete doc;
  delete globalParams;

  return exitCode;
}
Пример #10
0
int main(int argc, char *argv[]) {
  PDFDoc *doc;
  GString *fileName;
  GString *ownerPW, *userPW;
  SplashColor paperColor;
  SplashOutputDev *splashOut;
  GBool ok;
  int exitCode;
  int pg;

  exitCode = 99;

  // parse args
  ok = parseArgs(argDesc, &argc, argv);
  
  if (!ok || argc != 2 || printVersion || printHelp) {
    fprintf(stderr, "pdf2jpeg version %s\n", xpdfVersion);
    fprintf(stderr, "%s\n", xpdfCopyright);
    if (!printVersion) {
      printUsage("pdf2jpeg", "<PDF-file> -o <jpegfile>", argDesc);
    }
    goto err0;
  }
  fileName = new GString(argv[1]);

  // read config file
  globalParams = new GlobalParams(cfgFileName);
  globalParams->setupBaseFonts(NULL);
  
  // open PDF file
  if (ownerPassword[0]) {
    ownerPW = new GString(ownerPassword);
  } else {
    ownerPW = NULL;
  }
  if (userPassword[0]) {
    userPW = new GString(userPassword);
  } else {
    userPW = NULL;
  }
  doc = new PDFDoc(fileName, ownerPW, userPW);
  if (userPW) {
    delete userPW;
  }
  if (ownerPW) {
    delete ownerPW;
  }
  if (!doc->isOk()) {
    exitCode = 1;
    goto err1;
  }

  paperColor[0] = paperColor[1] = paperColor[2] = 0xff;
  splashOut = new SplashOutputDev(splashModeRGB8, 1, gFalse, paperColor);
  
  splashOut->startDoc(doc->getXRef());

  if(page>=1 && page<=doc->getNumPages()) {
      double r = resolution;
      if(width) {
	int old_width = doc->getPageCropWidth(page);
	r = 72.0*width/old_width;
      }
      doc->displayPage(splashOut, page, r, r, 0, gFalse, gTrue, gFalse);
      SplashBitmap*bitmap = splashOut->getBitmap();
      if(bitmap) {
	Guchar*rgb = bitmap->getDataPtr();
	int width = bitmap->getWidth();
	int height = bitmap->getHeight();
	jpeg_save(rgb, width, height, quality, output);
      }
  }
  delete splashOut;

  exitCode = 0;

  // clean up
 err1:
  delete doc;
  delete globalParams;
 err0:

  // check for memory leaks
  Object::memCheck(stderr);
  gMemReport(stderr);

  return exitCode;
}
Пример #11
0
int extract_images_from_pdf(char* filename,
                            char* target,
                            char* owner_password,
                            char* user_password,
                            char* range,
                            char* format,
                            int jpg_quality,
                            GBool dump_jpg,
                            GBool tiff_jpg)
{
    if (user_cancelled)
        return gpret_user_cancelled;

    // load config
    xpdf_rc xrc;

    // open file
    xpdf_doc xdoc(filename, owner_password, user_password);
    PDFDoc* doc = xdoc.get_doc();
    if (!doc->isOk())
        return doc->getErrorCode() == errEncrypted ?
               gpret_pdf_encrypted :
               gpret_cant_open_pdf;

    // check for copy permission
    // if (!doc->okToCopy())
    //	 return gpret_dont_allow_copy;

    // get page range
    page_range range_list(range);
    if (*range == '\0')
    {
        range_list.add_item(range_item(1, doc->getNumPages()));
    }

    if (user_cancelled)
        return gpret_user_cancelled;

    // write image files
    fi_loader fi;
    int progress = 0;
    image_extractor img_out(target, dump_jpg, format, jpg_quality, tiff_jpg);
    for (int i = 0; i < range_list.item_count(); i++)
    {
        range_item& item = range_list.get_item(i);
        for (int pg = item.first;
                pg <= min(item.last, doc->getNumPages());
                pg++)
        {
            if (user_cancelled)
                return gpret_user_cancelled;
            doc->displayPage(&img_out, pg, 72, 72, 0, gFalse, gTrue, gFalse);
            printf("progress: %d\n",
                   ++progress * 100 / range_list.page_count());
        }
    }

    printf("image count: %d\n", img_out.get_image_number());

    return gpret_success;
}
Пример #12
0
int main(int argc, char *argv[])
{
	int opt;
	PDFRectangle rect;

	while ((opt = getopt(argc, argv, "dop:m:")) != -1) {
		switch (opt) {
		case 'd':
			opt_debug++;
			break;
		case 'o':
			opt_oreilly = 1;
			break;
		case 'p':
			opt_page = atoi(optarg);
			break;
		case 'm':
			sscanf(optarg, "%lf %lf %lf %lf",
				   &rect.x1, &rect.y1, &rect.x2, &rect.y2);
			opt_margin = &rect;
			break;
		}
	}

	if (optind + 2 > argc) {
		fprintf(stderr, "Usage: %s [infile] [outfile]\n", argv[0]);
		fprintf(stderr, "\n");
		return EXIT_FAILURE;
	}

	globalParams = new GlobalParams();
	GooString *in_file = new GooString(argv[optind]);
	GooString *out_file = new GooString(argv[optind+1]);

	if(opt_debug){
		printf("Input File: %s\n", in_file->getCString());
		printf("Output File: %s\n", out_file->getCString());
	}

	//GooString *owner_pw = new GooString("");
	PDFDoc *doc = new PDFDoc(in_file, NULL, NULL);
	if (!doc->isOk()) {
		perror("pdf open error");
		return EXIT_FAILURE;
	}

	int ret;
	ret = pdfcrop(doc);
	if (ret) {
		printf("crop failed: %d\n", ret);
		return EXIT_FAILURE;
	}

	//ret = doc->saveAs(out_file, writeStandard);
	ret = doc->saveAs(out_file, writeForceRewrite);
	if (ret) {
		printf("save failed: %d\n", ret);
		return EXIT_FAILURE;
	}

	delete doc;
	// double free?
	//delete in_file;
	//delete out_file;
	// some times blocked.
	//delete globalParams;
	return EXIT_SUCCESS;
}
Пример #13
0
indri::parse::UnparsedDocument* indri::parse::PDFDocumentExtractor::nextDocument() {
  if( !_documentPath.length() )
    return 0;

  PDFDoc* doc = 0;
  TextOutputDev* textOut = 0;
  GString* gfilename = new GString(_documentPath.c_str());
  doc = new PDFDoc( gfilename );
  // if the doc is not ok, or ok to copy, it
  // will be a document of length 0.
  if( doc->isOk() && doc->okToCopy() ) {
    void* stream = &_documentTextBuffer;
    textOut = new TextOutputDev( buffer_write, stream, gFalse, gFalse);
    if ( textOut->isOk() ) {
      int firstPage = 1;
      int lastPage = doc->getNumPages();
	  double hDPI=72.0;
	  double vDPI=72.0;
	  int rotate=0;
	  GBool useMediaBox=gFalse;
	  GBool crop=gTrue; 
	  GBool printing=gFalse; 
	  if(doc->readMetadata()!=NULL)
	  {
		  GString rawMetaData = doc->readMetadata();
		  GString preparedMetaData="";

		  //zoek <rdf:RDF  en eindig bij </rdf:RDF>!! 
		  for(int x=0; x<rawMetaData.getLength(); x++) {
			  if(rawMetaData.getChar(x)!='?' && rawMetaData.getChar(x)!=':') {
				  //skip characters which the XMLReader doesn't understand
				  preparedMetaData.append(rawMetaData.getChar(x));
			  }
		  }
		  std::string metaData(preparedMetaData.getCString());
		  int startbegin = metaData.find("<rdf");
		  int stopend = metaData.find(">", metaData.rfind("</rdf") );
		  metaData = metaData.substr(startbegin, (stopend-startbegin)+1 );
	  

     	  indri::xml::XMLReader reader;

		  try {
			  std::auto_ptr<indri::xml::XMLNode> result( reader.read( metaData.c_str() ) );
			  appendPdfMetaData( result.get() );
		  } catch( lemur::api::Exception& e ) {
			LEMUR_RETHROW( e, "Had trouble reading PDF metadata" );
		  } 
		  if( _author.length()>0 || _title.length()>0 )
		  {
			std::string createdPdfHeader;
			createdPdfHeader="<head>\n";
			if(_title.length()>0) {
				createdPdfHeader+="<title>";
				createdPdfHeader+=_title;
				createdPdfHeader+="</title>\n";
			}
			if(_author.length()>0) {
				createdPdfHeader+="<author>";
				createdPdfHeader+=_author;
				createdPdfHeader+="</author>\n";
			}
			createdPdfHeader+="</head>\n";
			char *metastream = _documentTextBuffer.write( createdPdfHeader.length()+1 );
			strcpy(metastream, createdPdfHeader.c_str());
		  }
	  }
      doc->displayPages(textOut, firstPage, lastPage, hDPI, vDPI, rotate, useMediaBox, crop, printing);
    }
  }
  

  delete textOut;
  delete doc;

  _unparsedDocument.textLength = _documentTextBuffer.position();
  _unparsedDocument.contentLength = _unparsedDocument.textLength ? _documentTextBuffer.position() - 1 : 0 ; // no null 0 if text is empty.
  char* docnoPoint = _documentTextBuffer.write( _documentPath.length()+1 );
  strcpy( docnoPoint, _documentPath.c_str() );
  _unparsedDocument.text = _documentTextBuffer.front();
  _unparsedDocument.content = _documentTextBuffer.front();
  _unparsedDocument.metadata.clear();

  indri::parse::MetadataPair pair;

  pair.key = "path";
  pair.value = docnoPoint;
  pair.valueLength = _documentPath.length()+1;
  _unparsedDocument.metadata.push_back( pair );

  _docnostring.assign(_documentPath.c_str() );
  cleanDocno();
  pair.value = _docnostring.c_str();
  pair.valueLength = _docnostring.length()+1;
  pair.key = "docno";
  _unparsedDocument.metadata.push_back( pair );

  _documentPath = "";

  return &_unparsedDocument;
}