static void pdfapp_loadpage_pdf(pdfapp_t *app) { pdf_page *page; fz_error error; fz_device *mdev; error = pdf_load_page(&page, app->xref, app->pageno - 1); if (error) pdfapp_error(app, error); app->page_bbox = page->mediabox; app->page_rotate = page->rotate; app->page_links = page->links; page->links = NULL; /* Create display list */ app->page_list = fz_new_display_list(); mdev = fz_new_list_device(app->page_list); error = pdf_run_page(app->xref, page, mdev, fz_identity); if (error) { error = fz_rethrow(error, "cannot draw page %d in '%s'", app->pageno, app->doctitle); pdfapp_error(app, error); } fz_free_device(mdev); pdf_free_page(page); pdf_age_store(app->xref->store, 3); }
pdf_page* j_mu_load_page(void* p_ctx, void* p_doc, int number) { fz_context* ctx = (fz_context*) p_ctx; pdf_document* doc = (pdf_document*) p_doc; pdf_page* page = pdf_load_page(ctx, doc, number); return page; }
int pdfsign_main(int argc, char **argv) { fz_context *ctx; pdf_document *doc; char *password = ""; int i, n, c; pdf_page *page = NULL; while ((c = fz_getopt(argc, argv, "p:")) != -1) { switch (c) { case 'p': password = fz_optarg; break; default: usage(); break; } } if (argc - fz_optind < 1) usage(); filename = argv[fz_optind++]; ctx = fz_new_context(NULL, NULL, FZ_STORE_UNLIMITED); if (!ctx) { fprintf(stderr, "cannot initialize context\n"); exit(1); } fz_var(page); doc = pdf_open_document(ctx, filename); fz_try(ctx) { if (pdf_needs_password(ctx, doc)) if (!pdf_authenticate_password(ctx, doc, password)) fz_warn(ctx, "cannot authenticate password: %s", filename); n = pdf_count_pages(ctx, doc); for (i = 0; i < n; ++i) { page = pdf_load_page(ctx, doc, i); verify_page(ctx, doc, i, page); fz_drop_page(ctx, (fz_page*)page); page = NULL; } } fz_always(ctx) pdf_drop_document(ctx, doc); fz_catch(ctx) { fz_drop_page(ctx, (fz_page*)page); fprintf(stderr, "error verify signatures: %s\n", fz_caught_message(ctx)); } fz_flush_warnings(ctx); fz_drop_context(ctx); return 0; }
QImage Pdf::page(int i) { pdf_page* page = pdf_load_page(xref, i); if (page == 0) { printf("cannot load page %d\n", i); return QImage(); } static const float resolution = 300.0; const float zoom = resolution / 72.0; fz_matrix ctm = fz_translate(0, -page->mediabox.y1); ctm = fz_concat(ctm, fz_scale(zoom, -zoom)); ctm = fz_concat(ctm, fz_rotate(page->rotate)); fz_bbox bbox = fz_round_rect(fz_transform_rect(ctm, page->mediabox)); fz_pixmap* pix = fz_new_pixmap_with_rect(ctx, fz_device_gray, bbox); fz_clear_pixmap_with_color(pix, 255); fz_device* dev = fz_new_draw_device(ctx, cache, pix); pdf_run_page(xref, page, dev, ctm); fz_free_device(dev); int w = pix->w; int h = pix->h; QImage image(w, h, QImage::Format_MonoLSB); QVector<QRgb> ct(2); ct[0] = qRgb(255, 255, 255); ct[1] = qRgb(0, 0, 0); image.setColorTable(ct); uchar* s = pix->samples; int stride = image.bytesPerLine(); int bytes = w >> 3; for (int line = 0; line < h; ++line) { uchar* d = image.bits() + stride * line; for (int col = 0; col < bytes; ++col) { uchar data = 0; for (int i = 0; i < 8; ++i) { uchar v = *s++; s++; data >>= 1; if (v < 128) data |= 0x80; } *d++ = data; } } fz_drop_pixmap(ctx, pix); pdf_free_page(ctx, page); return image; }
QImage Document::RenderPage (int num, double xRes, double yRes) { auto page = WrapPage (pdf_load_page (MuDoc_, num), MuDoc_); if (!page) return QImage (); #if MUPDF_VERSION < 0x0102 const auto& rect = pdf_bound_page (MuDoc_, page.get ()); #else fz_rect rect; pdf_bound_page (MuDoc_, page.get (), &rect); #endif auto px = fz_new_pixmap (MuCtx_, fz_device_bgr, xRes * (rect.x1 - rect.x0), yRes * (rect.y1 - rect.y0)); fz_clear_pixmap (MuCtx_, px); auto dev = fz_new_draw_device (MuCtx_, px); #if MUPDF_VERSION < 0x0102 pdf_run_page (MuDoc_, page.get (), dev, fz_scale (xRes, yRes), NULL); #else fz_matrix matrix; pdf_run_page (MuDoc_, page.get (), dev, fz_scale (&matrix, xRes, yRes), NULL); #endif fz_free_device (dev); const int pxWidth = fz_pixmap_width (MuCtx_, px); const int pxHeight = fz_pixmap_height (MuCtx_, px); auto samples = fz_pixmap_samples (MuCtx_, px); QImage temp (samples, pxWidth, pxHeight, QImage::Format_ARGB32); QImage img (QSize (pxWidth, pxHeight), QImage::Format_ARGB32); for (int y = 0; y < pxHeight; ++y) { auto target = reinterpret_cast<QRgb*> (img.scanLine (y)); const auto source = reinterpret_cast<QRgb*> (temp.scanLine (y)); std::memcpy (target, source, sizeof (source [0]) * pxWidth); } fz_drop_pixmap (MuCtx_, px); temp = QImage (QSize (pxWidth, pxHeight), QImage::Format_ARGB32); QPainter p; p.begin (&temp); p.fillRect (QRect (QPoint (0, 0), temp.size ()), Qt::white); p.drawImage (0, 0, img); p.end (); return temp; }
QSize Document::GetPageSize (int num) const { auto page = WrapPage (pdf_load_page (MuDoc_, num), MuDoc_); if (!page) return QSize (); #if MUPDF_VERSION < 0x0102 const auto& rect = pdf_bound_page (MuDoc_, page.get ()); #else fz_rect rect; pdf_bound_page (MuDoc_, page.get (), &rect); #endif return QSize (rect.x1 - rect.x0, rect.y1 - rect.y0); }
static pdf_page* _pdf_doc_get_page(struct _pdf_doc *self, int pageno) { if (self->pages[pageno]) return self->pages[pageno]; if (NULL == self->pages[pageno]) { fz_error err = pdf_load_page( &self->pages[pageno], self->xref, pageno); if (err) { mume_error(("pdf_load_page(%d): %d\n", pageno, err)); } } return self->pages[pageno]; }
JNIEXPORT void JNICALL Java_com_artifex_mupdf_MuPDFCore_gotoPageInternal(JNIEnv *env, jobject thiz, int page) { float zoom; fz_matrix ctm; fz_bbox bbox; fz_device *dev = NULL; pdf_page *currentPage = NULL; fz_var(dev); fz_var(currentPage); /* In the event of an error, ensure we give a non-empty page */ pageWidth = 100; pageHeight = 100; LOGE("Goto page %d...", page); fz_try(ctx) { if (currentPageList != NULL) { fz_free_display_list(ctx, currentPageList); currentPageList = NULL; } pagenum = page; currentPage = pdf_load_page(xref, pagenum); zoom = resolution / 72; currentMediabox = pdf_bound_page(xref, currentPage); ctm = fz_scale(zoom, zoom); bbox = fz_round_rect(fz_transform_rect(ctm, currentMediabox)); pageWidth = bbox.x1-bbox.x0; pageHeight = bbox.y1-bbox.y0; /* Render to list */ currentPageList = fz_new_display_list(ctx); dev = fz_new_list_device(ctx, currentPageList); pdf_run_page(xref, currentPage, dev, fz_identity, NULL); } fz_catch(ctx) { LOGE("cannot make displaylist from page %d", pagenum); } pdf_free_page(ctx, currentPage); currentPage = NULL; fz_free_device(dev); dev = NULL; }
pdf_page *benchloadpage(fz_context *ctx, pdf_document *xref, int pagenum) { pdf_page *page = NULL; mstimer timer; timerstart(&timer); fz_try(ctx) { page = pdf_load_page(xref, pagenum - 1); } fz_catch(ctx) { logbench("Error: failed to load page %d\n", pagenum); return NULL; } timerstop(&timer); logbench("pageload %3d: %.2f ms\n", pagenum, timeinms(&timer)); return page; }
pdf_page* PDFDocument::PDFPageCache::Load(const int& page) { std::unique_lock<std::mutex> lock(_mutex); return pdf_load_page(_parent->_fz_context, _parent->_pdf_document, page); }
static void decimatepages(fz_context *ctx, pdf_document *doc) { pdf_obj *oldroot, *root, *pages, *kids, *parent; int num_pages = pdf_count_pages(ctx, doc); int page, kidcount; oldroot = pdf_dict_get(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root); pages = pdf_dict_get(ctx, oldroot, PDF_NAME_Pages); root = pdf_new_dict(ctx, doc, 2); pdf_dict_put(ctx, root, PDF_NAME_Type, pdf_dict_get(ctx, oldroot, PDF_NAME_Type)); pdf_dict_put(ctx, root, PDF_NAME_Pages, pdf_dict_get(ctx, oldroot, PDF_NAME_Pages)); pdf_update_object(ctx, doc, pdf_to_num(ctx, oldroot), root); pdf_drop_obj(ctx, root); /* Create a new kids array with our new pages in */ parent = pdf_new_indirect(ctx, doc, pdf_to_num(ctx, pages), pdf_to_gen(ctx, pages)); kids = pdf_new_array(ctx, doc, 1); kidcount = 0; for (page=0; page < num_pages; page++) { pdf_page *page_details = pdf_load_page(ctx, doc, page); int xf = x_factor, yf = y_factor; int x, y; float w = page_details->mediabox.x1 - page_details->mediabox.x0; float h = page_details->mediabox.y1 - page_details->mediabox.y0; if (xf == 0 && yf == 0) { /* Nothing specified, so split along the long edge */ if (w > h) xf = 2, yf = 1; else xf = 1, yf = 2; } else if (xf == 0) xf = 1; else if (yf == 0) yf = 1; for (y = yf-1; y >= 0; y--) { for (x = 0; x < xf; x++) { pdf_obj *newpageobj, *newpageref, *newmediabox; fz_rect mb; int num; newpageobj = pdf_copy_dict(ctx, pdf_lookup_page_obj(ctx, doc, page)); num = pdf_create_object(ctx, doc); pdf_update_object(ctx, doc, num, newpageobj); newpageref = pdf_new_indirect(ctx, doc, num, 0); newmediabox = pdf_new_array(ctx, doc, 4); mb.x0 = page_details->mediabox.x0 + (w/xf)*x; if (x == xf-1) mb.x1 = page_details->mediabox.x1; else mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1); mb.y0 = page_details->mediabox.y0 + (h/yf)*y; if (y == yf-1) mb.y1 = page_details->mediabox.y1; else mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.x0)); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.y0)); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.x1)); pdf_array_push(ctx, newmediabox, pdf_new_real(ctx, doc, mb.y1)); pdf_dict_put(ctx, newpageobj, PDF_NAME_Parent, parent); pdf_dict_put(ctx, newpageobj, PDF_NAME_MediaBox, newmediabox); /* Store page object in new kids array */ pdf_array_push(ctx, kids, newpageref); kidcount++; } } } pdf_drop_obj(ctx, parent); /* Update page count and kids array */ pdf_dict_put(ctx, pages, PDF_NAME_Count, pdf_new_int(ctx, doc, kidcount)); pdf_dict_put(ctx, pages, PDF_NAME_Kids, kids); pdf_drop_obj(ctx, kids); }
static void decimatepages(pdf_document *xref) { pdf_obj *oldroot, *root, *pages, *kids, *parent; fz_context *ctx = xref->ctx; int num_pages = pdf_count_pages(xref); int page, kidcount; /* Keep only pages/type and (reduced) dest entries to avoid * references to unretained pages */ oldroot = pdf_dict_gets(pdf_trailer(xref), "Root"); pages = pdf_dict_gets(oldroot, "Pages"); root = pdf_new_dict(ctx, 2); pdf_dict_puts(root, "Type", pdf_dict_gets(oldroot, "Type")); pdf_dict_puts(root, "Pages", pdf_dict_gets(oldroot, "Pages")); pdf_update_object(xref, pdf_to_num(oldroot), root); pdf_drop_obj(root); /* Create a new kids array with only the pages we want to keep */ parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); kids = pdf_new_array(ctx, 1); kidcount = 0; for (page=0; page < num_pages; page++) { pdf_page *page_details = pdf_load_page(xref, page); int xf = x_factor, yf = y_factor; int x, y; float w = page_details->mediabox.x1 - page_details->mediabox.x0; float h = page_details->mediabox.y1 - page_details->mediabox.y0; if (xf == 0 && yf == 0) { /* Nothing specified, so split along the long edge */ if (w > h) xf = 2, yf = 1; else xf = 1, yf = 2; } else if (xf == 0) xf = 1; else if (yf == 0) yf = 1; for (y = yf-1; y >= 0; y--) { for (x = 0; x < xf; x++) { pdf_obj *newpageobj, *newpageref, *newmediabox; fz_rect mb; int num; newpageobj = pdf_copy_dict(ctx, xref->page_objs[page]); num = pdf_create_object(xref); pdf_update_object(xref, num, newpageobj); newpageref = pdf_new_indirect(ctx, num, 0, xref); newmediabox = pdf_new_array(ctx, 4); mb.x0 = page_details->mediabox.x0 + (w/xf)*x; if (x == xf-1) mb.x1 = page_details->mediabox.x1; else mb.x1 = page_details->mediabox.x0 + (w/xf)*(x+1); mb.y0 = page_details->mediabox.y0 + (h/yf)*y; if (y == yf-1) mb.y1 = page_details->mediabox.y1; else mb.y1 = page_details->mediabox.y0 + (h/yf)*(y+1); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x0)); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y0)); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.x1)); pdf_array_push(newmediabox, pdf_new_real(ctx, mb.y1)); pdf_dict_puts(newpageobj, "Parent", parent); pdf_dict_puts(newpageobj, "MediaBox", newmediabox); /* Store page object in new kids array */ pdf_array_push(kids, newpageref); kidcount++; } } } pdf_drop_obj(parent); /* Update page count and kids array */ pdf_dict_puts(pages, "Count", pdf_new_int(ctx, kidcount)); pdf_dict_puts(pages, "Kids", kids); pdf_drop_obj(kids); }
std::string PDFDocument::GetPageText(int page, int line_sep) { // 1. Init MuPDF structures. pdf_page* page_struct = GetPage(page); #if MUPDF_VERSION < 10012 fz_stext_sheet* text_sheet = fz_new_stext_sheet(_fz_context); #endif // 2. Render page. #if MUPDF_VERSION >= 10012 fz_stext_options stext_options = {0}; // See #elif MUPDF_VERSION >= 10009 block below. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), &stext_options); #elif MUPDF_VERSION >= 10010 fz_stext_options stext_options = {0}; // See #elif MUPDF_VERSION >= 10009 block below. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), text_sheet, &stext_options); #elif MUPDF_VERSION >= 10009 // The function below is a wrapper around fz_run_page that uses a fresh // device. We can't use pdf_run_page to gather the text for us. // These notes are also left in here in case MuPDF's API changes again. fz_stext_page* text_page = fz_new_stext_page_from_page( _fz_context, &(page_struct->super), text_sheet); #else fz_stext_page* text_page = fz_new_text_page(_fz_context); fz_device* dev = fz_new_stext_device(_fz_context, text_sheet, text_page); // I've no idea what fz_{begin,end}_page do, but without them pdf_run_page // segfaults :-/ fz_begin_page(_fz_context, dev, &fz_infinite_rect, &fz_identity); pdf_run_page( _fz_context, _pdf_document, page_struct, dev, &fz_identity, nullptr); fz_end_page(_fz_context, dev); #endif // 3. Build text. std::string r; #if MUPDF_VERSION >= 10012 for (fz_stext_block* text_block = text_page->first_block; text_block != nullptr; text_block = text_block->next) { if (text_block->type != FZ_STEXT_BLOCK_TEXT) { continue; } for (fz_stext_line* text_line = text_block->u.t.first_line; text_line != nullptr; text_line = text_line->next) { for (fz_stext_char* text_char = text_line->first_char; text_char != nullptr; text_char = text_char->next) { { const int c = text_char->c; #else for (fz_page_block* page_block = text_page->blocks; page_block < text_page->blocks + text_page->len; ++page_block) { assert(page_block != nullptr); if (page_block->type != FZ_PAGE_BLOCK_TEXT) { continue; } fz_stext_block* const text_block = page_block->u.text; assert(text_block != nullptr); for (fz_stext_line* text_line = text_block->lines; text_line < text_block->lines + text_block->len; ++text_line) { assert(text_line != nullptr); for (fz_stext_span* text_span = text_line->first_span; text_span != nullptr; text_span = text_span->next) { for (int i = 0; i < text_span->len; ++i) { const int c = text_span->text[i].c; #endif // A single UTF-8 character cannot take more than 4 bytes, but let's // go for 8. char buffer[8]; const int num_bytes = fz_runetochar(buffer, c); assert(num_bytes <= static_cast<int>(sizeof(buffer))); buffer[num_bytes] = '\0'; r += buffer; } } if (!isspace(r.back())) { r += line_sep; } } } // 4. Clean up. fz_drop_stext_page(_fz_context, text_page); #if MUPDF_VERSION < 10012 fz_drop_stext_sheet(_fz_context, text_sheet); #endif return r; } PDFDocument::PDFOutlineItem::~PDFOutlineItem() {} PDFDocument::PDFOutlineItem::PDFOutlineItem(fz_outline* src) { if (src == nullptr) { _dest_page = -1; } else { _title = src->title; #if MUPDF_VERSION >= 10010 _dest_page = src->page; #else _dest_page = src->dest.ld.gotor.page; #endif } } int PDFDocument::PDFOutlineItem::GetDestPage() const { return _dest_page; } PDFDocument::PDFOutlineItem* PDFDocument::PDFOutlineItem::Build( fz_context* ctx, fz_outline* src) { PDFOutlineItem* root = nullptr; std::vector<std::unique_ptr<OutlineItem>> items; BuildRecursive(src, &items); fz_drop_outline(ctx, src); if (items.empty()) { return nullptr; } else if (items.size() == 1) { root = dynamic_cast<PDFOutlineItem*>(items[0].release()); } else { root = new PDFOutlineItem(nullptr); root->_title = DEFAULT_ROOT_OUTLINE_ITEM_TITLE; root->_children.swap(items); } return root; } void PDFDocument::PDFOutlineItem::BuildRecursive( fz_outline* src, std::vector<std::unique_ptr<Document::OutlineItem>>* output) { assert(output != nullptr); for (fz_outline* i = src; i != nullptr; i = i->next) { PDFOutlineItem* item = new PDFOutlineItem(i); if (i->down != nullptr) { BuildRecursive(i->down, &(item->_children)); } output->push_back(std::unique_ptr<Document::OutlineItem>(item)); } } PDFDocument::PDFPageCache::PDFPageCache(int cache_size, PDFDocument* parent) : Cache<int, pdf_page*>(cache_size), _parent(parent) {} PDFDocument::PDFPageCache::~PDFPageCache() { Clear(); } pdf_page* PDFDocument::PDFPageCache::Load(const int& page) { std::unique_lock<std::mutex> lock(_mutex); return pdf_load_page(_parent->_fz_context, _parent->_pdf_document, page); } void PDFDocument::PDFPageCache::Discard( const int& page, pdf_page* const& page_struct) { std::unique_lock<std::mutex> lock(_mutex); pdf_drop_page(_parent->_fz_context, _parent->_pdf_document, page_struct); }