Exemplo n.º 1
0
/*
  put_pages_on_new_sheet() - put the contents of pages onto one page in dest_doc

  How does it work?
  1.  Create a page for the new sheet that will contain all source-pages
  2.  Analyze their sizes! If too small, just center. If too big, warn
  3.  Analye their userunit-value... if one of them != 1 we have to insert 
      scale-operations to all others and need to use userunit in the sheet too
  4.  Copy each entry of the resource-dicts of every page of the sheet 
      (if it has not already been copied for a previous sheet)
  5.  Create a dict that contains src-resource-name to dest-resource-name for 
      this sheet (this is an n:m relation where n >= m)
  6.  For each source-page-stream create a new content-stream and change
      scale (if user-unit is needed), translation and rotation... And add a 
      clipping-path around the page
  7.  Copy all source content-streams of the current page to this new content-
      stream renaming all its resources to the new names
  8.  TODO: Annots?
  9.  TODO: Preseparated pages
  10. TODO: Merge procedure sets 
*/
static int put_pages_on_new_sheet(fz_context *dest_ctx, pdf_document *dest_doc, 
	fz_context *src_ctx, pdf_document *src_doc, 
	struct pos_info *positions, size_t put_count)
{
	/* copy each entry of the resource dict */
	static const int RENAME_INITIAL_CAP = 128;
	struct put_info put_info = { dest_doc, src_doc, NULL, NULL, 0 };
	
	/* what destianation page is currently opened? */
	int sheet_pagenum = -1;
	pdf_page *sheet = NULL;
	
	size_t i;
	for(i = 0; i < put_count; i++) {
		/* if the current sheet changes, we need to close the current and open 
		   the new one*/
		if(sheet_pagenum != positions[i].sheet_pagenum) {
			if(sheet != NULL)
				pdf_drop_page(dest_ctx, sheet);
			sheet_pagenum = positions[i].sheet_pagenum;
			sheet = pdf_load_page(dest_ctx, dest_doc, sheet_pagenum);
			// TODO: We assume that the destination-page uses arrays for the
			// content and has a usable resoruce-dict... TODO: add those checks
		}
		
		/* load the source-page */
		pdf_page *src_page = pdf_load_page(src_ctx, src_doc, positions[i].src_pagenum);
		
		/* create a rename_dict */
		put_info.rename_dict = pdf_new_dict(dest_ctx, dest_doc, RENAME_INITIAL_CAP);
	
		/* copy all resources, adjust the page and finally copy the content */
		copy_and_rename_resources(dest_ctx, sheet->resources, 
			src_ctx, src_page->resources, &put_info);
		adjust_page_position(src_ctx, src_doc, src_page, positions + i);
		copy_content_streams_of_page(
			dest_ctx, sheet, src_ctx, src_page, &put_info, positions + i);

		/* free everything we created for that source-page */
		pdf_drop_obj(dest_ctx, put_info.rename_dict);
		pdf_drop_page(src_ctx, src_page);
	}

	if(sheet != NULL)
		pdf_drop_page(dest_ctx, sheet);

	return(0);
}
Exemplo n.º 2
0
static ErrorCode juggler_impose_create_sheet(fz_context *ctx, pdf_document *dest_doc, int width, int height)
{
	/* create and insert page to dest_doc */
	fz_rect rect = { 0, 0, width, height };// { 0, 0, 1390, 1684 };
	pdf_page *sheet = pdf_create_page(ctx, dest_doc, rect, 0, 0);

	sheet->resources = pdf_new_dict(ctx, dest_doc, 16);
	pdf_dict_puts(ctx, sheet->me, "Resources", sheet->resources); /* will be droped when freeing the page */
	sheet->contents = pdf_new_array(ctx, dest_doc, 8);
	pdf_dict_puts(ctx, sheet->me, "Contents", sheet->contents); /* will be droped when freeing the page */

	pdf_insert_page(ctx, dest_doc, sheet, INT_MAX);
	
	pdf_drop_page(ctx, sheet);
	
	return(NoError);
}
Exemplo n.º 3
0
void PDFDocument::PDFPageCache::Discard(
    const int& page, pdf_page* const& page_struct) {
  std::unique_lock<std::mutex> lock(_mutex);
  pdf_drop_page(_parent->_fz_context, _parent->_pdf_document, page_struct);
}
Exemplo n.º 4
0
static int make_fake_doc(pdfapp_t *app)
{
	fz_context *ctx = app->ctx;
	fz_matrix ctm = { 1, 0, 0, 1, 0, 0 };
	fz_rect bounds;
	pdf_page *newpage = NULL;
	pdf_document *pdf = NULL;
	fz_device *dev = NULL;
	fz_path *path = NULL;
	fz_stroke_state stroke = fz_default_stroke_state;
	float red[3] = { 1, 0, 0 };
	int i;

	fz_var(pdf);
	fz_var(dev);
	fz_var(newpage);

	fz_try(ctx)
	{
		pdf = pdf_create_document(ctx);
		app->doc = &pdf->super;
		bounds.x0 = 0;
		bounds.y0 = 0;
		bounds.x1 = app->winw;
		bounds.y1 = app->winh;

		newpage = pdf_create_page(ctx, pdf, bounds, 72, 0);

		dev = pdf_page_write(ctx, pdf, newpage);

		/* Now the page content */
		fz_begin_page(ctx, dev, &bounds, &ctm);

		path = fz_new_path(ctx);
		fz_moveto(ctx, path, 0, 0);
		fz_lineto(ctx, path, bounds.x1, bounds.y1);
		fz_moveto(ctx, path, 0, bounds.y1);
		fz_lineto(ctx, path, bounds.x1, 0);

		stroke.linewidth = fz_min(bounds.x1, bounds.y1)/4;

		fz_stroke_path(ctx, dev, path, &stroke, &ctm, fz_device_rgb(ctx), red, 1);

		fz_end_page(ctx, dev);

		fz_drop_device(ctx, dev);
		dev = NULL;

		/* Create enough copies of our blank(ish) page so that the
		 * page number is preserved if and when a subsequent load
		 * works. */
		for (i = 0; i < app->pagecount; i++)
			pdf_insert_page(ctx, pdf, newpage, INT_MAX);
	}
	fz_always(ctx)
	{
		fz_drop_path(ctx, path);
		pdf_drop_page(ctx, newpage);
		fz_drop_device(ctx, dev);
		dev = NULL;
	}
	fz_catch(ctx)
	{
		fz_rethrow(ctx);
	}

	return 0;
}
Exemplo n.º 5
0
std::string PDFDocument::GetPageText(int page, int line_sep) {
  // 1. Init MuPDF structures.
  pdf_page* page_struct = GetPage(page);

#if MUPDF_VERSION < 10012
  fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context);
#endif

  // 2. Render page.
#if MUPDF_VERSION >= 10012
  fz_stext_options stext_options = {0};
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), &stext_options);
#elif MUPDF_VERSION >= 10010
  fz_stext_options stext_options = {0};
  // See #elif MUPDF_VERSION >= 10009 block below.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet, &stext_options);
#elif MUPDF_VERSION >= 10009
  // The function below is a wrapper around fz_run_page that uses a fresh
  // device. We can't use pdf_run_page to gather the text for us.
  // These notes are also left in here in case MuPDF's API changes again.
  fz_stext_page* text_page = fz_new_stext_page_from_page(
      _fz_context, &(page_struct->super), text_sheet);
#else
  fz_stext_page* text_page = fz_new_text_page(_fz_context);
  fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page);
  // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page
  // segfaults :-/
  fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity);
  pdf_run_page(
      _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr);
  fz_end_page(_fz_context, dev);
#endif

  // 3. Build text.
  std::string r;
#if MUPDF_VERSION >= 10012
  for (fz_stext_block* text_block = text_page->first_block;
       text_block != nullptr; text_block = text_block->next) {
    if (text_block->type != FZ_STEXT_BLOCK_TEXT) {
      continue;
    }
    for (fz_stext_line* text_line = text_block->u.t.first_line;
         text_line != nullptr; text_line = text_line->next) {
      for (fz_stext_char* text_char = text_line->first_char;
           text_char != nullptr; text_char = text_char->next) {
        {
          const int c = text_char->c;
#else
  for (fz_page_block* page_block = text_page->blocks;
       page_block < text_page->blocks + text_page->len; ++page_block) {
    assert(page_block != nullptr);
    if (page_block->type != FZ_PAGE_BLOCK_TEXT) {
      continue;
    }
    fz_stext_block* const text_block = page_block->u.text;
    assert(text_block != nullptr);
    for (fz_stext_line* text_line = text_block->lines;
         text_line < text_block->lines + text_block->len; ++text_line) {
      assert(text_line != nullptr);
      for (fz_stext_span* text_span = text_line->first_span;
           text_span != nullptr; text_span = text_span->next) {
        for (int i = 0; i < text_span->len; ++i) {
          const int c = text_span->text[i].c;
#endif
          // A single UTF-8 character cannot take more than 4 bytes, but let's
          // go for 8.
          char buffer[8];
          const int num_bytes = fz_runetochar(buffer, c);
          assert(num_bytes <= static_cast<int>(sizeof(buffer)));
          buffer[num_bytes] = '\0';
          r += buffer;
        }
      }
      if (!isspace(r.back())) {
        r += line_sep;
      }
    }
  }

  // 4. Clean up.
  fz_drop_stext_page(_fz_context, text_page);
#if MUPDF_VERSION < 10012
  fz_drop_stext_sheet(_fz_context, text_sheet);
#endif

  return r;
}

PDFDocument::PDFOutlineItem::~PDFOutlineItem() {}

PDFDocument::PDFOutlineItem::PDFOutlineItem(fz_outline* src) {
  if (src == nullptr) {
    _dest_page = -1;
  } else {
    _title = src->title;
#if MUPDF_VERSION >= 10010
    _dest_page = src->page;
#else
    _dest_page = src->dest.ld.gotor.page;
#endif
  }
}

int PDFDocument::PDFOutlineItem::GetDestPage() const { return _dest_page; }

PDFDocument::PDFOutlineItem* PDFDocument::PDFOutlineItem::Build(
    fz_context* ctx, fz_outline* src) {
  PDFOutlineItem* root = nullptr;
  std::vector<std::unique_ptr<OutlineItem>> items;
  BuildRecursive(src, &items);
  fz_drop_outline(ctx, src);
  if (items.empty()) {
    return nullptr;
  } else if (items.size() == 1) {
    root = dynamic_cast<PDFOutlineItem*>(items[0].release());
  } else {
    root = new PDFOutlineItem(nullptr);
    root->_title = DEFAULT_ROOT_OUTLINE_ITEM_TITLE;
    root->_children.swap(items);
  }
  return root;
}

void PDFDocument::PDFOutlineItem::BuildRecursive(
    fz_outline* src,
    std::vector<std::unique_ptr<Document::OutlineItem>>* output) {
  assert(output != nullptr);
  for (fz_outline* i = src; i != nullptr; i = i->next) {
    PDFOutlineItem* item = new PDFOutlineItem(i);
    if (i->down != nullptr) {
      BuildRecursive(i->down, &(item->_children));
    }
    output->push_back(std::unique_ptr<Document::OutlineItem>(item));
  }
}

PDFDocument::PDFPageCache::PDFPageCache(int cache_size, PDFDocument* parent)
    : Cache<int, pdf_page*>(cache_size), _parent(parent) {}

PDFDocument::PDFPageCache::~PDFPageCache() { Clear(); }

pdf_page* PDFDocument::PDFPageCache::Load(const int& page) {
  std::unique_lock<std::mutex> lock(_mutex);
  return pdf_load_page(_parent->_fz_context, _parent->_pdf_document, page);
}

void PDFDocument::PDFPageCache::Discard(
    const int& page, pdf_page* const& page_struct) {
  std::unique_lock<std::mutex> lock(_mutex);
  pdf_drop_page(_parent->_fz_context, _parent->_pdf_document, page_struct);
}