void fz_run_page(fz_document *doc, fz_page *page, fz_device *dev, const fz_matrix *transform, fz_cookie *cookie) { fz_annot *annot; fz_rect mediabox; fz_bound_page(doc, page, &mediabox); fz_begin_page(dev, &mediabox, transform); fz_run_page_contents(doc, page, dev, transform, cookie); if (cookie && cookie->progress_max != -1) { int count = 1; for (annot = fz_first_annot(doc, page); annot; annot = fz_next_annot(doc, annot)) count++; cookie->progress_max += count; } for (annot = fz_first_annot(doc, page); annot; annot = fz_next_annot(doc, annot)) { /* Check the cookie for aborting */ if (cookie) { if (cookie->abort) break; cookie->progress++; } fz_run_annot(doc, page, annot, dev, transform, cookie); } fz_end_page(dev); }
static void runpage(int number) { fz_rect mediabox; fz_page *page; fz_device *dev; page = fz_load_page(ctx, doc, number - 1); fz_bound_page(ctx, page, &mediabox); dev = fz_begin_page(ctx, out, &mediabox); fz_run_page(ctx, page, dev, &fz_identity, NULL); fz_end_page(ctx, out, dev); fz_drop_page(ctx, page); }
std::string PDFDocument::GetPageText(int page, int line_sep) { // 1. Init MuPDF structures. pdf_page* page_struct = GetPage(page); fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context); // 2. Render page. #if MUPDF_VERSION >= 10010 fz_stext_options stext_options = { 0 }; // See #elif MUPDF_VERSION >= 10009 block below. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), text_sheet, &stext_options); #elif MUPDF_VERSION >= 10009 // The function below is a wrapper around fz_run_page that uses a fresh // device. We can't use pdf_run_page to gather the text for us. // These notes are also left in here in case MuPDF's API changes again. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), text_sheet); #else fz_stext_page* text_page = fz_new_text_page(_fz_context); fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page); // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page // segfaults :-/ fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity); pdf_run_page( _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr); fz_end_page(_fz_context, dev); #endif // 3. Build text. std::string r; for (fz_page_block* page_block = text_page->blocks; page_block < text_page->blocks + text_page->len; ++page_block) { assert(page_block != nullptr); if (page_block->type != FZ_PAGE_BLOCK_TEXT) { continue; } fz_stext_block* const text_block = page_block->u.text; assert(text_block != nullptr); for (fz_stext_line* text_line = text_block->lines; text_line < text_block->lines + text_block->len; ++text_line) { assert(text_line != nullptr); for (fz_stext_span* text_span = text_line->first_span; text_span != nullptr; text_span = text_span->next) { for (int i = 0; i < text_span->len; ++i) { const int c = text_span->text[i].c; // A single UTF-8 character cannot take more than 4 bytes, but let's // go for 8. char buffer[8]; const int num_bytes = fz_runetochar(buffer, c); assert(num_bytes <= static_cast<int>(sizeof(buffer))); buffer[num_bytes] = '\0'; r += buffer; } } if (!isspace(r.back())) { r += line_sep; } } } // 4. Clean up. fz_drop_stext_page(_fz_context, text_page); fz_drop_stext_sheet(_fz_context, text_sheet); return r; }
void fz_run_display_list(fz_display_list *list, fz_device *dev, const fz_matrix *top_ctm, const fz_rect *scissor, fz_cookie *cookie) { fz_display_node *node; fz_matrix ctm; int clipped = 0; int tiled = 0; int progress = 0; fz_context *ctx = dev->ctx; if (!scissor) scissor = &fz_infinite_rect; if (cookie) { cookie->progress_max = list->len; cookie->progress = 0; } for (node = list->first; node; node = node->next) { int empty; fz_rect node_rect = node->rect; fz_transform_rect(&node_rect, top_ctm); /* Check the cookie for aborting */ if (cookie) { if (cookie->abort) break; cookie->progress = progress++; } /* cull objects to draw using a quick visibility test */ if (tiled || node->cmd == FZ_CMD_BEGIN_TILE || node->cmd == FZ_CMD_END_TILE || node->cmd == FZ_CMD_BEGIN_PAGE || node->cmd == FZ_CMD_END_PAGE) { empty = 0; } else { fz_rect rect = node_rect; fz_intersect_rect(&rect, scissor); empty = fz_is_empty_rect(&rect); } if (clipped || empty) { switch (node->cmd) { case FZ_CMD_CLIP_PATH: case FZ_CMD_CLIP_STROKE_PATH: case FZ_CMD_CLIP_STROKE_TEXT: case FZ_CMD_CLIP_IMAGE_MASK: case FZ_CMD_BEGIN_MASK: case FZ_CMD_BEGIN_GROUP: clipped++; continue; case FZ_CMD_CLIP_TEXT: /* Accumulated text has no extra pops */ if (node->flag != 2) clipped++; continue; case FZ_CMD_POP_CLIP: case FZ_CMD_END_GROUP: if (!clipped) goto visible; clipped--; continue; case FZ_CMD_END_MASK: if (!clipped) goto visible; continue; default: continue; } } visible: fz_concat(&ctm, &node->ctm, top_ctm); fz_try(ctx) { switch (node->cmd) { case FZ_CMD_BEGIN_PAGE: fz_begin_page(dev, &node_rect, &ctm); break; case FZ_CMD_END_PAGE: fz_end_page(dev); break; case FZ_CMD_FILL_PATH: fz_fill_path(dev, node->item.path, node->flag, &ctm, node->colorspace, node->color, node->alpha); break; case FZ_CMD_STROKE_PATH: fz_stroke_path(dev, node->item.path, node->stroke, &ctm, node->colorspace, node->color, node->alpha); break; case FZ_CMD_CLIP_PATH: fz_clip_path(dev, node->item.path, &node_rect, node->flag, &ctm); break; case FZ_CMD_CLIP_STROKE_PATH: fz_clip_stroke_path(dev, node->item.path, &node_rect, node->stroke, &ctm); break; case FZ_CMD_FILL_TEXT: fz_fill_text(dev, node->item.text, &ctm, node->colorspace, node->color, node->alpha); break; case FZ_CMD_STROKE_TEXT: fz_stroke_text(dev, node->item.text, node->stroke, &ctm, node->colorspace, node->color, node->alpha); break; case FZ_CMD_CLIP_TEXT: fz_clip_text(dev, node->item.text, &ctm, node->flag); break; case FZ_CMD_CLIP_STROKE_TEXT: fz_clip_stroke_text(dev, node->item.text, node->stroke, &ctm); break; case FZ_CMD_IGNORE_TEXT: fz_ignore_text(dev, node->item.text, &ctm); break; case FZ_CMD_FILL_SHADE: if ((dev->hints & FZ_IGNORE_SHADE) == 0) fz_fill_shade(dev, node->item.shade, &ctm, node->alpha); break; case FZ_CMD_FILL_IMAGE: if ((dev->hints & FZ_IGNORE_IMAGE) == 0) fz_fill_image(dev, node->item.image, &ctm, node->alpha); break; case FZ_CMD_FILL_IMAGE_MASK: if ((dev->hints & FZ_IGNORE_IMAGE) == 0) fz_fill_image_mask(dev, node->item.image, &ctm, node->colorspace, node->color, node->alpha); break; case FZ_CMD_CLIP_IMAGE_MASK: if ((dev->hints & FZ_IGNORE_IMAGE) == 0) fz_clip_image_mask(dev, node->item.image, &node_rect, &ctm); break; case FZ_CMD_POP_CLIP: fz_pop_clip(dev); break; case FZ_CMD_BEGIN_MASK: fz_begin_mask(dev, &node_rect, node->flag, node->colorspace, node->color); break; case FZ_CMD_END_MASK: fz_end_mask(dev); break; case FZ_CMD_BEGIN_GROUP: fz_begin_group(dev, &node_rect, (node->flag & ISOLATED) != 0, (node->flag & KNOCKOUT) != 0, node->item.blendmode, node->alpha); break; case FZ_CMD_END_GROUP: fz_end_group(dev); break; case FZ_CMD_BEGIN_TILE: { int cached; fz_rect tile_rect; tiled++; tile_rect.x0 = node->color[2]; tile_rect.y0 = node->color[3]; tile_rect.x1 = node->color[4]; tile_rect.y1 = node->color[5]; cached = fz_begin_tile_id(dev, &node->rect, &tile_rect, node->color[0], node->color[1], &ctm, node->flag); if (cached) node = skip_to_end_tile(node, &progress); break; } case FZ_CMD_END_TILE: tiled--; fz_end_tile(dev); break; /* SumatraPDF: support transfer functions */ case FZ_CMD_APPLY_TRANSFER_FUNCTION: fz_apply_transfer_function(dev, node->item.tr, node->flag); break; } } fz_catch(ctx) { /* Swallow the error */ if (cookie) cookie->errors++; fz_warn(ctx, "Ignoring error during interpretation"); } } }
static int make_fake_doc(pdfapp_t *app) { fz_context *ctx = app->ctx; fz_matrix ctm = { 1, 0, 0, 1, 0, 0 }; fz_rect bounds; pdf_page *newpage = NULL; pdf_document *pdf = NULL; fz_device *dev = NULL; fz_path *path = NULL; fz_stroke_state stroke = fz_default_stroke_state; float red[3] = { 1, 0, 0 }; int i; fz_var(pdf); fz_var(dev); fz_var(newpage); fz_try(ctx) { pdf = pdf_create_document(ctx); app->doc = &pdf->super; bounds.x0 = 0; bounds.y0 = 0; bounds.x1 = app->winw; bounds.y1 = app->winh; newpage = pdf_create_page(ctx, pdf, bounds, 72, 0); dev = pdf_page_write(ctx, pdf, newpage); /* Now the page content */ fz_begin_page(ctx, dev, &bounds, &ctm); path = fz_new_path(ctx); fz_moveto(ctx, path, 0, 0); fz_lineto(ctx, path, bounds.x1, bounds.y1); fz_moveto(ctx, path, 0, bounds.y1); fz_lineto(ctx, path, bounds.x1, 0); stroke.linewidth = fz_min(bounds.x1, bounds.y1)/4; fz_stroke_path(ctx, dev, path, &stroke, &ctm, fz_device_rgb(ctx), red, 1); fz_end_page(ctx, dev); fz_drop_device(ctx, dev); dev = NULL; /* Create enough copies of our blank(ish) page so that the * page number is preserved if and when a subsequent load * works. */ for (i = 0; i < app->pagecount; i++) pdf_insert_page(ctx, pdf, newpage, INT_MAX); } fz_always(ctx) { fz_drop_path(ctx, path); pdf_drop_page(ctx, newpage); fz_drop_device(ctx, dev); dev = NULL; } fz_catch(ctx) { fz_rethrow(ctx); } return 0; }
std::string PDFDocument::GetPageText(int page, int line_sep) { // 1. Init MuPDF structures. pdf_page* page_struct = GetPage(page); #if MUPDF_VERSION < 10012 fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context); #endif // 2. Render page. #if MUPDF_VERSION >= 10012 fz_stext_options stext_options = {0}; // See #elif MUPDF_VERSION >= 10009 block below. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), &stext_options); #elif MUPDF_VERSION >= 10010 fz_stext_options stext_options = {0}; // See #elif MUPDF_VERSION >= 10009 block below. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), text_sheet, &stext_options); #elif MUPDF_VERSION >= 10009 // The function below is a wrapper around fz_run_page that uses a fresh // device. We can't use pdf_run_page to gather the text for us. // These notes are also left in here in case MuPDF's API changes again. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), text_sheet); #else fz_stext_page* text_page = fz_new_text_page(_fz_context); fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page); // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page // segfaults :-/ fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity); pdf_run_page( _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr); fz_end_page(_fz_context, dev); #endif // 3. Build text. std::string r; #if MUPDF_VERSION >= 10012 for (fz_stext_block* text_block = text_page->first_block; text_block != nullptr; text_block = text_block->next) { if (text_block->type != FZ_STEXT_BLOCK_TEXT) { continue; } for (fz_stext_line* text_line = text_block->u.t.first_line; text_line != nullptr; text_line = text_line->next) { for (fz_stext_char* text_char = text_line->first_char; text_char != nullptr; text_char = text_char->next) { { const int c = text_char->c; #else for (fz_page_block* page_block = text_page->blocks; page_block < text_page->blocks + text_page->len; ++page_block) { assert(page_block != nullptr); if (page_block->type != FZ_PAGE_BLOCK_TEXT) { continue; } fz_stext_block* const text_block = page_block->u.text; assert(text_block != nullptr); for (fz_stext_line* text_line = text_block->lines; text_line < text_block->lines + text_block->len; ++text_line) { assert(text_line != nullptr); for (fz_stext_span* text_span = text_line->first_span; text_span != nullptr; text_span = text_span->next) { for (int i = 0; i < text_span->len; ++i) { const int c = text_span->text[i].c; #endif // A single UTF-8 character cannot take more than 4 bytes, but let's // go for 8. char buffer[8]; const int num_bytes = fz_runetochar(buffer, c); assert(num_bytes <= static_cast<int>(sizeof(buffer))); buffer[num_bytes] = '\0'; r += buffer; } } if (!isspace(r.back())) { r += line_sep; } } } // 4. Clean up. fz_drop_stext_page(_fz_context, text_page); #if MUPDF_VERSION < 10012 fz_drop_stext_sheet(_fz_context, text_sheet); #endif return r; } PDFDocument::PDFOutlineItem::~PDFOutlineItem() {} PDFDocument::PDFOutlineItem::PDFOutlineItem(fz_outline* src) { if (src == nullptr) { _dest_page = -1; } else { _title = src->title; #if MUPDF_VERSION >= 10010 _dest_page = src->page; #else _dest_page = src->dest.ld.gotor.page; #endif } } int PDFDocument::PDFOutlineItem::GetDestPage() const { return _dest_page; } PDFDocument::PDFOutlineItem* PDFDocument::PDFOutlineItem::Build( fz_context* ctx, fz_outline* src) { PDFOutlineItem* root = nullptr; std::vector<std::unique_ptr<OutlineItem>> items; BuildRecursive(src, &items); fz_drop_outline(ctx, src); if (items.empty()) { return nullptr; } else if (items.size() == 1) { root = dynamic_cast<PDFOutlineItem*>(items[0].release()); } else { root = new PDFOutlineItem(nullptr); root->_title = DEFAULT_ROOT_OUTLINE_ITEM_TITLE; root->_children.swap(items); } return root; } void PDFDocument::PDFOutlineItem::BuildRecursive( fz_outline* src, std::vector<std::unique_ptr<Document::OutlineItem>>* output) { assert(output != nullptr); for (fz_outline* i = src; i != nullptr; i = i->next) { PDFOutlineItem* item = new PDFOutlineItem(i); if (i->down != nullptr) { BuildRecursive(i->down, &(item->_children)); } output->push_back(std::unique_ptr<Document::OutlineItem>(item)); } } PDFDocument::PDFPageCache::PDFPageCache(int cache_size, PDFDocument* parent) : Cache<int, pdf_page*>(cache_size), _parent(parent) {} PDFDocument::PDFPageCache::~PDFPageCache() { Clear(); } pdf_page* PDFDocument::PDFPageCache::Load(const int& page) { std::unique_lock<std::mutex> lock(_mutex); return pdf_load_page(_parent->_fz_context, _parent->_pdf_document, page); } void PDFDocument::PDFPageCache::Discard( const int& page, pdf_page* const& page_struct) { std::unique_lock<std::mutex> lock(_mutex); pdf_drop_page(_parent->_fz_context, _parent->_pdf_document, page_struct); }