static void _pdf_doc_extract_text( struct _pdf_doc *self, int pageno, char *tbuf, mume_rect_t *rbuf) { fz_display_list *list; fz_text_span *text, *span; fz_device *tdev; int i; list = _pdf_doc_get_list(self, pageno); text = fz_new_text_span(); tdev = fz_new_text_device(text); fz_execute_display_list( list, tdev, fz_identity, fz_infinite_bbox); for (span = text; span; span = span->next) { for (i = 0; i < span->len; i++) { *tbuf = span->text[i].c; if (*tbuf < 32) *tbuf = '?'; tbuf++; *rbuf++ = _fz_bbox_to_mume_rect(span->text[i].bbox); } if (!span->eol && span->next) continue; *tbuf++ = '\n'; *rbuf++ = mume_rect_empty; } fz_free_device(tdev); fz_free_text_span(text); }
static int _pdf_doc_text_length(struct _pdf_doc *self, int pageno) { fz_display_list *list; fz_text_span *text, *span; fz_device *tdev; int length = 0; list = _pdf_doc_get_list(self, pageno); text = fz_new_text_span(); tdev = fz_new_text_device(text); fz_execute_display_list( list, tdev, fz_identity, fz_infinite_bbox); for (span = text; span; span = span->next) { length += span->len; if (!span->eol && span->next) continue; /* End of line ? */ length += 1; } fz_free_device(tdev); fz_free_text_span(text); return length; }
std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> MuPDFDoc::SearchText(const char* searchText) { fz_text_sheet *sheet = nullptr; fz_text_page *text = nullptr; fz_device *dev = nullptr; PageCache *pageCache = &m_pages[m_currentPage]; fz_var(sheet); fz_var(text); fz_var(dev); std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>> hints(new std::vector<std::shared_ptr<RectFloat>>()); fz_try(m_context) { int hitCount = 0; fz_matrix ctm = CalcConvertMatrix(); fz_rect mbrect = fz_transform_rect(ctm, pageCache->mediaBox); sheet = fz_new_text_sheet(m_context); text = fz_new_text_page(m_context, mbrect); dev = fz_new_text_device(m_context, sheet, text); fz_run_page(m_document, pageCache->page, dev, ctm, nullptr); fz_free_device(dev); dev = nullptr; int len = TextLen(text); for (int pos = 0; pos < len; pos++) { fz_bbox rr = fz_empty_bbox; int n = Match(text, searchText, pos); for (int i = 0; i < n; i++) rr = fz_union_bbox(rr, BBoxCharAt(text, pos + i)); if (!fz_is_empty_bbox(rr) && hitCount < MAX_SEARCH_HITS) { hints->push_back(std::shared_ptr<RectFloat>(new RectFloat(rr.x0, rr.y0, rr.x1, rr.y1))); if (++hitCount >= MAX_SEARCH_HITS) break; } } } fz_always(m_context) { fz_free_text_page(m_context, text); fz_free_text_sheet(m_context, sheet); fz_free_device(dev); } fz_catch(m_context) { return std::shared_ptr<std::vector<std::shared_ptr<RectFloat>>>(nullptr); } return hints; }
fz_text_page * fz_new_text_page_from_page(fz_context *ctx, fz_page *page, fz_text_sheet *sheet) { fz_text_page *text; fz_device *dev; text = fz_new_text_page(ctx); fz_try(ctx) { dev = fz_new_text_device(ctx, sheet, text); fz_run_page(ctx, page, dev, &fz_identity, NULL); } fz_always(ctx) { fz_drop_device(ctx, dev); } fz_catch(ctx) { fz_drop_text_page(ctx, text); fz_rethrow(ctx); } return text; }
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) { fz_page *page; fz_display_list *list = NULL; fz_device *dev = NULL; int start; fz_cookie cookie = { 0 }; int needshot = 0; fz_var(list); fz_var(dev); if (showtime) { start = gettime(); } fz_try(ctx) { page = fz_load_page(doc, pagenum - 1); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load page %d in file '%s'", pagenum, filename); } if (mujstest_file) { pdf_document *inter = pdf_specifics(doc); pdf_widget *widget = NULL; if (inter) widget = pdf_first_widget(inter, (pdf_page *)page); if (widget) { fprintf(mujstest_file, "GOTO %d\n", pagenum); needshot = 1; } for (;widget; widget = pdf_next_widget(widget)) { fz_rect rect; int w, h, len; int type = pdf_widget_get_type(widget); pdf_bound_widget(widget, &rect); w = (rect.x1 - rect.x0); h = (rect.y1 - rect.y0); ++mujstest_count; switch (type) { default: fprintf(mujstest_file, "%% UNKNOWN %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_PUSHBUTTON: fprintf(mujstest_file, "%% PUSHBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_CHECKBOX: fprintf(mujstest_file, "%% CHECKBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_RADIOBUTTON: fprintf(mujstest_file, "%% RADIOBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_TEXT: { int maxlen = pdf_text_widget_max_len(inter, widget); int texttype = pdf_text_widget_content_type(inter, widget); /* If height is low, assume a single row, and base * the width off that. */ if (h < 10) { w = (w+h-1) / (h ? h : 1); h = 1; } /* Otherwise, if width is low, work off height */ else if (w < 10) { h = (w+h-1) / (w ? w : 1); w = 1; } else { w = (w+9)/10; h = (h+9)/10; } len = w*h; if (len < 2) len = 2; if (len > maxlen) len = maxlen; fprintf(mujstest_file, "%% TEXT %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); switch (texttype) { default: case PDF_WIDGET_CONTENT_UNRESTRAINED: fprintf(mujstest_file, "TEXT %d ", mujstest_count); escape_string(mujstest_file, len-3, lorem); fprintf(mujstest_file, "\n"); break; case PDF_WIDGET_CONTENT_NUMBER: fprintf(mujstest_file, "TEXT %d\n", mujstest_count); break; case PDF_WIDGET_CONTENT_SPECIAL: #ifdef __MINGW32__ fprintf(mujstest_file, "TEXT %I64d\n", 46702919800LL + mujstest_count); #else fprintf(mujstest_file, "TEXT %lld\n", 46702919800LL + mujstest_count); #endif break; case PDF_WIDGET_CONTENT_DATE: fprintf(mujstest_file, "TEXT Jun %d 1979\n", 1 + ((13 + mujstest_count) % 30)); break; case PDF_WIDGET_CONTENT_TIME: ++mujstest_count; fprintf(mujstest_file, "TEXT %02d:%02d\n", ((mujstest_count/60) % 24), mujstest_count % 60); break; } break; } case PDF_WIDGET_TYPE_LISTBOX: fprintf(mujstest_file, "%% LISTBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case PDF_WIDGET_TYPE_COMBOBOX: fprintf(mujstest_file, "%% COMBOBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; } fprintf(mujstest_file, "CLICK %0.2f %0.2f\n", (rect.x0+rect.x1)/2, (rect.y0+rect.y1)/2); } } if (uselist) { fz_try(ctx) { list = fz_new_display_list(ctx); dev = fz_new_list_device(ctx, list); fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow_message(ctx, "cannot draw page %d in file '%s'", pagenum, filename); } } if (showxml) { fz_try(ctx) { dev = fz_new_trace_device(ctx); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showtext) { fz_text_page *text = NULL; fz_var(text); fz_try(ctx) { text = fz_new_text_page(ctx); dev = fz_new_text_device(ctx, sheet, text); if (showtext == TEXT_HTML) fz_disable_device_hints(dev, FZ_IGNORE_IMAGE); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); fz_free_device(dev); dev = NULL; if (showtext == TEXT_XML) { fz_print_text_page_xml(ctx, out, text); } else if (showtext == TEXT_HTML) { fz_analyze_text(ctx, sheet, text); fz_print_text_page_html(ctx, out, text); } else if (showtext == TEXT_PLAIN) { fz_print_text_page(ctx, out, text); fz_printf(out, "\f\n"); } } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_free_text_page(ctx, text); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showmd5 || showtime) printf("page %s %d", filename, pagenum); if (pdfout) { fz_matrix ctm; fz_rect bounds, tbounds; pdf_page *newpage; fz_bound_page(doc, page, &bounds); fz_rotate(&ctm, rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); newpage = pdf_create_page(pdfout, bounds, 72, 0); fz_try(ctx) { dev = pdf_page_write(pdfout, newpage); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } pdf_insert_page(pdfout, newpage, INT_MAX); pdf_free_page(pdfout, newpage); } if (output && output_format == OUT_SVG) { float zoom; fz_matrix ctm; fz_rect bounds, tbounds; char buf[512]; FILE *file; fz_output *out; if (!strcmp(output, "-")) file = stdout; else { sprintf(buf, output, pagenum); file = fopen(buf, "wb"); if (file == NULL) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", buf, strerror(errno)); } out = fz_new_output_with_file(ctx, file); fz_bound_page(doc, page, &bounds); zoom = resolution / 72; fz_pre_rotate(fz_scale(&ctm, zoom, zoom), rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); fz_try(ctx) { dev = fz_new_svg_device(ctx, out, tbounds.x1-tbounds.x0, tbounds.y1-tbounds.y0); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_close_output(out); if (file != stdout) fclose(file); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } }
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) { fz_page *page; fz_display_list *list = NULL; fz_device *dev = NULL; int start; fz_cookie cookie = { 0 }; fz_var(list); fz_var(dev); if (showtime) { start = gettime(); } fz_try(ctx) { page = fz_load_page(doc, pagenum - 1); } fz_catch(ctx) { fz_rethrow_message(ctx, "cannot load page %d in file '%s'", pagenum, filename); } if (uselist) { fz_try(ctx) { list = fz_new_display_list(ctx); dev = fz_new_list_device(ctx, list); fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow_message(ctx, "cannot draw page %d in file '%s'", pagenum, filename); } } if (showxml) { fz_try(ctx) { dev = fz_new_trace_device(ctx); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showtext) { fz_text_page *text = NULL; fz_var(text); fz_try(ctx) { text = fz_new_text_page(ctx); dev = fz_new_text_device(ctx, sheet, text); if (showtext == TEXT_HTML) fz_disable_device_hints(dev, FZ_IGNORE_IMAGE); if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, &fz_identity, &cookie); fz_free_device(dev); dev = NULL; if (showtext == TEXT_XML) { fz_print_text_page_xml(ctx, out, text); } else if (showtext == TEXT_HTML) { fz_analyze_text(ctx, sheet, text); fz_print_text_page_html(ctx, out, text); } else if (showtext == TEXT_PLAIN) { fz_print_text_page(ctx, out, text); fz_printf(out, "\f\n"); } } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_free_text_page(ctx, text); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showmd5 || showtime || showfeatures) printf("page %s %d", filename, pagenum); if (showfeatures) { int iscolor; dev = fz_new_test_device(ctx, &iscolor, 0.02f); fz_try(ctx) { if (list) fz_run_display_list(list, dev, &fz_identity, &fz_infinite_rect, NULL); else fz_run_page(doc, page, dev, &fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_rethrow(ctx); } printf(" %s", iscolor ? "color" : "grayscale"); } if (pdfout) { fz_matrix ctm; fz_rect bounds, tbounds; pdf_page *newpage; fz_bound_page(doc, page, &bounds); fz_rotate(&ctm, rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); newpage = pdf_create_page(pdfout, bounds, 72, 0); fz_try(ctx) { dev = pdf_page_write(pdfout, newpage); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } pdf_insert_page(pdfout, newpage, INT_MAX); pdf_free_page(pdfout, newpage); } if (output && output_format == OUT_SVG) { float zoom; fz_matrix ctm; fz_rect bounds, tbounds; char buf[512]; FILE *file; fz_output *out; if (!strcmp(output, "-")) file = stdout; else { sprintf(buf, output, pagenum); file = fopen(buf, "wb"); if (file == NULL) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open file '%s': %s", buf, strerror(errno)); } out = fz_new_output_with_file(ctx, file); fz_bound_page(doc, page, &bounds); zoom = resolution / 72; fz_pre_rotate(fz_scale(&ctm, zoom, zoom), rotation); tbounds = bounds; fz_transform_rect(&tbounds, &ctm); fz_try(ctx) { dev = fz_new_svg_device(ctx, out, tbounds.x1-tbounds.x0, tbounds.y1-tbounds.y0); if (list) fz_run_display_list(list, dev, &ctm, &tbounds, &cookie); else fz_run_page(doc, page, dev, &ctm, &cookie); fz_free_device(dev); dev = NULL; } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_close_output(out); if (file != stdout) fclose(file); } fz_catch(ctx) { fz_drop_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } }
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) { fz_page *page; fz_display_list *list = NULL; fz_device *dev = NULL; int start; fz_cookie cookie = { 0 }; int needshot = 0; fz_var(list); fz_var(dev); if (showtime) { start = gettime(); } fz_try(ctx) { page = fz_load_page(doc, pagenum - 1); } fz_catch(ctx) { fz_throw(ctx, "cannot load page %d in file '%s'", pagenum, filename); } if (mujstest_file) { fz_interactive *inter = fz_interact(doc); fz_widget *widget = NULL; if (inter) widget = fz_first_widget(inter, page); if (widget) { fprintf(mujstest_file, "GOTO %d\n", pagenum); needshot = 1; } for (;widget; widget = fz_next_widget(inter, widget)) { fz_rect rect = fz_widget_bbox(widget); int w = (rect.x1-rect.x0); int h = (rect.y1-rect.y0); int len; int type = fz_widget_get_type(widget); ++mujstest_count; switch (type) { default: fprintf(mujstest_file, "%% UNKNOWN %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case FZ_WIDGET_TYPE_PUSHBUTTON: fprintf(mujstest_file, "%% PUSHBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case FZ_WIDGET_TYPE_CHECKBOX: fprintf(mujstest_file, "%% CHECKBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case FZ_WIDGET_TYPE_RADIOBUTTON: fprintf(mujstest_file, "%% RADIOBUTTON %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case FZ_WIDGET_TYPE_TEXT: { int maxlen = fz_text_widget_max_len(inter, widget); int texttype = fz_text_widget_content_type(inter, widget); /* If height is low, assume a single row, and base * the width off that. */ if (h < 10) { w = (w+h-1) / (h ? h : 1); h = 1; } /* Otherwise, if width is low, work off height */ else if (w < 10) { h = (w+h-1) / (w ? w : 1); w = 1; } else { w = (w+9)/10; h = (h+9)/10; } len = w*h; if (len < 2) len = 2; if (len > maxlen) len = maxlen; fprintf(mujstest_file, "%% TEXT %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); switch (texttype) { default: case FZ_WIDGET_CONTENT_UNRESTRAINED: fprintf(mujstest_file, "TEXT %d ", mujstest_count); escape_string(mujstest_file, len-3, lorem); fprintf(mujstest_file, "\n"); break; case FZ_WIDGET_CONTENT_NUMBER: fprintf(mujstest_file, "TEXT %d\n", mujstest_count); break; case FZ_WIDGET_CONTENT_SPECIAL: fprintf(mujstest_file, "TEXT %lld\n", 46702919800LL + mujstest_count); break; case FZ_WIDGET_CONTENT_DATE: fprintf(mujstest_file, "TEXT Jun %d 1979\n", 1 + ((13 + mujstest_count) % 30)); break; case FZ_WIDGET_CONTENT_TIME: ++mujstest_count; fprintf(mujstest_file, "TEXT %02d:%02d\n", ((mujstest_count/60) % 24), mujstest_count % 60); break; } break; } case FZ_WIDGET_TYPE_LISTBOX: fprintf(mujstest_file, "%% LISTBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; case FZ_WIDGET_TYPE_COMBOBOX: fprintf(mujstest_file, "%% COMBOBOX %0.2f %0.2f %0.2f %0.2f\n", rect.x0, rect.y0, rect.x1, rect.y1); break; } fprintf(mujstest_file, "CLICK %0.2f %0.2f\n", (rect.x0+rect.x1)/2, (rect.y0+rect.y1)/2); } } if (uselist) { fz_try(ctx) { list = fz_new_display_list(ctx); dev = fz_new_list_device(ctx, list); fz_run_page(doc, page, dev, fz_identity, &cookie); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_free_display_list(ctx, list); fz_free_page(doc, page); fz_throw(ctx, "cannot draw page %d in file '%s'", pagenum, filename); } } if (showxml) { fz_try(ctx) { dev = fz_new_trace_device(ctx); fz_printf(out, "<page number=\"%d\">\n", pagenum); if (list) fz_run_display_list(list, dev, fz_identity, fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, fz_identity, &cookie); fz_printf(out, "</page>\n"); } fz_always(ctx) { fz_free_device(dev); dev = NULL; } fz_catch(ctx) { fz_free_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showtext) { fz_text_page *text = NULL; fz_var(text); fz_try(ctx) { text = fz_new_text_page(ctx, fz_bound_page(doc, page)); dev = fz_new_text_device(ctx, sheet, text); if (list) fz_run_display_list(list, dev, fz_identity, fz_infinite_rect, &cookie); else fz_run_page(doc, page, dev, fz_identity, &cookie); fz_free_device(dev); dev = NULL; if (showtext == TEXT_XML) { fz_print_text_page_xml(ctx, out, text); } else if (showtext == TEXT_HTML) { fz_print_text_page_html(ctx, out, text); } else if (showtext == TEXT_PLAIN) { fz_print_text_page(ctx, out, text); fz_printf(out, "\f\n"); } } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_free_text_page(ctx, text); } fz_catch(ctx) { fz_free_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } } if (showmd5 || showtime) printf("page %s %d", filename, pagenum); #ifdef GDI_PLUS_BMP_RENDERER // hack: use -G0 to "enable GDI+" when saving as TGA if (output && (strstr(output, ".bmp") || strstr(output, ".tga") && !gamma_value)) drawbmp(ctx, doc, page, list, pagenum); else #endif if (output || showmd5 || showtime) { float zoom; fz_matrix ctm; fz_rect bounds, tbounds; fz_bbox ibounds; fz_pixmap *pix = NULL; int w, h; fz_var(pix); bounds = fz_bound_page(doc, page); zoom = resolution / 72; ctm = fz_scale(zoom, zoom); ctm = fz_concat(ctm, fz_rotate(rotation)); tbounds = fz_transform_rect(ctm, bounds); ibounds = fz_round_rect(tbounds); /* convert to integers */ /* Make local copies of our width/height */ w = width; h = height; /* If a resolution is specified, check to see whether w/h are * exceeded; if not, unset them. */ if (res_specified) { int t; t = ibounds.x1 - ibounds.x0; if (w && t <= w) w = 0; t = ibounds.y1 - ibounds.y0; if (h && t <= h) h = 0; } /* Now w or h will be 0 unless they need to be enforced. */ if (w || h) { float scalex = w / (tbounds.x1 - tbounds.x0); float scaley = h / (tbounds.y1 - tbounds.y0); if (fit) { if (w == 0) scalex = 1.0f; if (h == 0) scaley = 1.0f; } else { if (w == 0) scalex = scaley; if (h == 0) scaley = scalex; } if (!fit) { if (scalex > scaley) scalex = scaley; else scaley = scalex; } ctm = fz_concat(ctm, fz_scale(scalex, scaley)); tbounds = fz_transform_rect(ctm, bounds); } ibounds = fz_round_rect(tbounds); /* TODO: banded rendering and multi-page ppm */ fz_try(ctx) { pix = fz_new_pixmap_with_bbox(ctx, colorspace, ibounds); if (savealpha) fz_clear_pixmap(ctx, pix); else fz_clear_pixmap_with_value(ctx, pix, 255); dev = fz_new_draw_device(ctx, pix); if (list) fz_run_display_list(list, dev, ctm, tbounds, &cookie); else fz_run_page(doc, page, dev, ctm, &cookie); fz_free_device(dev); dev = NULL; if (invert) fz_invert_pixmap(ctx, pix); if (gamma_value != 1) fz_gamma_pixmap(ctx, pix, gamma_value); if (savealpha) fz_unmultiply_pixmap(ctx, pix); if (output) { char buf[512]; sprintf(buf, output, pagenum); if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) fz_write_pnm(ctx, pix, buf); else if (strstr(output, ".pam")) fz_write_pam(ctx, pix, buf, savealpha); else if (strstr(output, ".png")) fz_write_png(ctx, pix, buf, savealpha); else if (strstr(output, ".pbm")) { fz_bitmap *bit = fz_halftone_pixmap(ctx, pix, NULL); fz_write_pbm(ctx, bit, buf); fz_drop_bitmap(ctx, bit); } /* SumatraPDF: support TGA as output format */ else if (strstr(output, ".tga")) fz_write_tga(ctx, pix, buf, savealpha); } if (showmd5) { unsigned char digest[16]; int i; fz_md5_pixmap(pix, digest); printf(" "); for (i = 0; i < 16; i++) printf("%02x", digest[i]); } } fz_always(ctx) { fz_free_device(dev); dev = NULL; fz_drop_pixmap(ctx, pix); } fz_catch(ctx) { fz_free_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } }
static void pdfapp_showpage(pdfapp_t *app, int loadpage, int drawpage, int repaint) { char buf[256]; fz_device *idev; fz_device *tdev; fz_colorspace *colorspace; fz_matrix ctm; fz_bbox bbox; wincursor(app, WAIT); if (loadpage) { if (app->page_list) fz_free_display_list(app->page_list); if (app->page_text) fz_free_text_span(app->page_text); if (app->page_links) pdf_free_link(app->page_links); if (app->xref) pdfapp_loadpage_pdf(app); if (app->xps) pdfapp_loadpage_xps(app); /* Zero search hit position */ app->hit = -1; app->hitlen = 0; /* Extract text */ app->page_text = fz_new_text_span(); tdev = fz_new_text_device(app->page_text); fz_execute_display_list(app->page_list, tdev, fz_identity, fz_infinite_bbox); fz_free_device(tdev); } if (drawpage) { sprintf(buf, "%s - %d/%d (%d dpi)", app->doctitle, app->pageno, app->pagecount, app->resolution); wintitle(app, buf); ctm = pdfapp_viewctm(app); bbox = fz_round_rect(fz_transform_rect(ctm, app->page_bbox)); /* Draw */ if (app->image) fz_drop_pixmap(app->image); if (app->grayscale) colorspace = fz_device_gray; else #ifdef _WIN32 colorspace = fz_device_bgr; #else colorspace = fz_device_rgb; #endif app->image = fz_new_pixmap_with_rect(colorspace, bbox); fz_clear_pixmap_with_color(app->image, 255); idev = fz_new_draw_device(app->cache, app->image); fz_execute_display_list(app->page_list, idev, ctm, bbox); fz_free_device(idev); } if (repaint) { pdfapp_panview(app, app->panx, app->pany); if (app->shrinkwrap) { int w = app->image->w; int h = app->image->h; if (app->winw == w) app->panx = 0; if (app->winh == h) app->pany = 0; if (w > app->scrw * 90 / 100) w = app->scrw * 90 / 100; if (h > app->scrh * 90 / 100) h = app->scrh * 90 / 100; if (w != app->winw || h != app->winh) winresize(app, w, h); } winrepaint(app); wincursor(app, ARROW); } fz_flush_warnings(); }
DrPage * DrPDFExtractor::ExtractPage(unsigned int pageno) { fz_page * page = fz_load_page(m_doc, pageno); if (page == NULL) { return NULL; } DrPage * dpage = new DrPage; dpage->SetPageNo(pageno); std::list<DrChar *> charlist; std::list<DrPhrase *> phraselist; std::list<DrLine *> linelist; std::list<DrZone *> &zonelist = dpage->m_zonelist; fz_matrix transform; fz_rotate(&transform,0); fz_pre_scale(&transform, 1.0f, 1.0f); fz_rect bounds; fz_bound_page(m_doc, page, &bounds); fz_transform_rect(&bounds, &transform); fz_irect bbox; fz_round_rect(&bbox, &bounds); fz_matrix ttransform = transform; fz_pixmap *pix = fz_new_pixmap_with_bbox(m_ctx, fz_device_rgb(m_ctx), &bbox); fz_clear_pixmap_with_value(m_ctx, pix, 0xff); fz_device * dev = fz_new_draw_device(m_ctx,pix); fz_run_page(m_doc, page, dev, &transform, NULL); fz_free_device(dev); fz_text_sheet * sheet = fz_new_text_sheet(m_ctx); fz_text_page * tpage = fz_new_text_page(m_ctx); fz_device * cdev = fz_new_text_device(m_ctx, sheet, tpage); fz_run_page(m_doc, page, cdev, &ttransform, NULL); ExtractChars(charlist,tpage); fz_free_device(cdev); // DrThumbnail * thumb = new DrThumbnail(m_ctx,pix,pageno); // dpage->m_thumbnail = thumb; DrTextGrouper::TextGroup(phraselist, charlist); std::list<DrPhrase *>::iterator itphrase = phraselist.begin(); while (itphrase != phraselist.end()) { if ((*itphrase)->IsSpacePhrase()) { delete *itphrase; itphrase = phraselist.erase(itphrase); } else itphrase++; } DrTextGrouper::TextGroup(linelist, phraselist); DrTextGrouper::TextGroup(zonelist, linelist); dpage->CalculatePageBox(); // fz_free_text_sheet(m_ctx, tsheet); // fz_free_text_page(m_ctx, tpage); fz_free_page(m_doc, page); return dpage; }
static void drawpage(fz_context *ctx, fz_document *doc, int pagenum) { fz_page *page; fz_link *links; fz_display_list *list = NULL; fz_device *dev = NULL; int start; fz_var(list); fz_var(dev); if (showtime) { start = gettime(); } fz_try(ctx) { page = fz_load_page(doc, pagenum - 1); } fz_catch(ctx) { fz_throw(ctx, "cannot load page %d in file '%s'", pagenum, filename); } fz_try(ctx) { links = fz_load_links(doc, page); } fz_catch(ctx) { fz_throw(ctx, "cannot load links for page %d in file '%s'", pagenum, filename); } if (uselist) { fz_try(ctx) { list = fz_new_display_list(ctx); dev = fz_new_list_device(ctx, list); fz_run_page(doc, page, dev, fz_identity, NULL); } fz_catch(ctx) { fz_free_device(dev); fz_free_display_list(ctx, list); fz_free_page(doc, page); fz_throw(ctx, "cannot draw page %d in file '%s'", pagenum, filename); } fz_free_device(dev); dev = NULL; } if (showjson) { fz_text_page *text = NULL; fz_var(text); fz_try(ctx) { text = fz_new_text_page(ctx, fz_bound_page(doc, page)); dev = fz_new_text_device(ctx, sheet, text); if (list) fz_run_display_list(list, dev, fz_identity, fz_infinite_bbox, NULL); else fz_run_page(doc, page, dev, fz_identity, NULL); fz_free_device(dev); dev = NULL; fz_print_text_page_json(ctx, stdout, text, links); printf("\f\n"); } fz_catch(ctx) { fz_free_device(dev); fz_free_text_page(ctx, text); fz_free_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } fz_free_text_page(ctx, text); } if (showmd5 || showtime) printf("page %s %d", filename, pagenum); if (output || showmd5 || showtime) { float zoom; fz_matrix ctm; fz_rect bounds, bounds2; fz_bbox bbox; fz_pixmap *pix = NULL; int w, h; fz_var(pix); bounds = fz_bound_page(doc, page); zoom = resolution / 72; ctm = fz_scale(zoom, zoom); ctm = fz_concat(ctm, fz_rotate(rotation)); bounds2 = fz_transform_rect(ctm, bounds); bbox = fz_round_rect(bounds2); /* Make local copies of our width/height */ w = width; h = height; /* If a resolution is specified, check to see whether w/h are * exceeded; if not, unset them. */ if (res_specified) { int t; t = bbox.x1 - bbox.x0; if (w && t <= w) w = 0; t = bbox.y1 - bbox.y0; if (h && t <= h) h = 0; } /* Now w or h will be 0 unless then need to be enforced. */ if (w || h) { float scalex = w/(bounds2.x1-bounds2.x0); float scaley = h/(bounds2.y1-bounds2.y0); if (fit) { if (w == 0) scalex = 1.0f; if (h == 0) scaley = 1.0f; } else { if (w == 0) scalex = scaley; if (h == 0) scaley = scalex; } if (!fit) { if (scalex > scaley) scalex = scaley; else scaley = scalex; } ctm = fz_concat(ctm, fz_scale(scalex, scaley)); bounds2 = fz_transform_rect(ctm, bounds); } bbox = fz_round_rect(bounds2); /* TODO: banded rendering and multi-page ppm */ fz_try(ctx) { pix = fz_new_pixmap_with_bbox(ctx, colorspace, bbox); if (savealpha) fz_clear_pixmap(ctx, pix); else fz_clear_pixmap_with_value(ctx, pix, 255); dev = fz_new_draw_device(ctx, pix); if (list) fz_run_display_list(list, dev, ctm, bbox, NULL); else fz_run_page(doc, page, dev, ctm, NULL); fz_free_device(dev); dev = NULL; if (invert) fz_invert_pixmap(ctx, pix); if (gamma_value != 1) fz_gamma_pixmap(ctx, pix, gamma_value); if (savealpha) fz_unmultiply_pixmap(ctx, pix); if (output) { char buf[512]; sprintf(buf, output, pagenum); if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) fz_write_pnm(ctx, pix, buf); else if (strstr(output, ".pam")) fz_write_pam(ctx, pix, buf, savealpha); else if (strstr(output, ".png")) fz_write_png(ctx, pix, buf, savealpha); else if (strstr(output, ".pbm")) { fz_bitmap *bit = fz_halftone_pixmap(ctx, pix, NULL); fz_write_pbm(ctx, bit, buf); fz_drop_bitmap(ctx, bit); } } if (showmd5) { unsigned char digest[16]; int i; fz_md5_pixmap(pix, digest); printf(" "); for (i = 0; i < 16; i++) printf("%02x", digest[i]); } fz_drop_pixmap(ctx, pix); } fz_catch(ctx) { fz_free_device(dev); fz_drop_pixmap(ctx, pix); fz_free_display_list(ctx, list); fz_free_page(doc, page); fz_rethrow(ctx); } }
JNIEXPORT jobjectArray JNICALL Java_com_artifex_mupdf_MuPDFCore_searchPage(JNIEnv * env, jobject thiz, jstring jtext) { jclass rectClass; jmethodID ctor; jobjectArray arr; jobject rect; fz_text_sheet *sheet = NULL; fz_text_page *text = NULL; fz_device *dev = NULL; float zoom; fz_matrix ctm; int pos; int len; int i, n; int hit_count = 0; const char *str; page_cache *pc = &pages[current]; rectClass = (*env)->FindClass(env, "android/graphics/RectF"); if (rectClass == NULL) return NULL; ctor = (*env)->GetMethodID(env, rectClass, "<init>", "(FFFF)V"); if (ctor == NULL) return NULL; str = (*env)->GetStringUTFChars(env, jtext, NULL); if (str == NULL) return NULL; fz_var(sheet); fz_var(text); fz_var(dev); fz_try(ctx) { fz_rect rect; if (hit_bbox == NULL) hit_bbox = fz_malloc_array(ctx, MAX_SEARCH_HITS, sizeof(*hit_bbox)); zoom = resolution / 72; ctm = fz_scale(zoom, zoom); rect = fz_transform_rect(ctm, pc->media_box); sheet = fz_new_text_sheet(ctx); text = fz_new_text_page(ctx, rect); dev = fz_new_text_device(ctx, sheet, text); fz_run_page(doc, pc->page, dev, ctm, NULL); fz_free_device(dev); dev = NULL; len = textlen(text); for (pos = 0; pos < len; pos++) { fz_bbox rr = fz_empty_bbox; n = match(text, str, pos); for (i = 0; i < n; i++) rr = fz_union_bbox(rr, bboxcharat(text, pos + i)); if (!fz_is_empty_bbox(rr) && hit_count < MAX_SEARCH_HITS) hit_bbox[hit_count++] = rr; } } fz_always(ctx) { fz_free_text_page(ctx, text); fz_free_text_sheet(ctx, sheet); fz_free_device(dev); } fz_catch(ctx) { jclass cls; (*env)->ReleaseStringUTFChars(env, jtext, str); cls = (*env)->FindClass(env, "java/lang/OutOfMemoryError"); if (cls != NULL) (*env)->ThrowNew(env, cls, "Out of memory in MuPDFCore_searchPage"); (*env)->DeleteLocalRef(env, cls); return NULL; } (*env)->ReleaseStringUTFChars(env, jtext, str); arr = (*env)->NewObjectArray(env, hit_count, rectClass, NULL); if (arr == NULL) return NULL; for (i = 0; i < hit_count; i++) { rect = (*env)->NewObject(env, rectClass, ctor, (float) (hit_bbox[i].x0), (float) (hit_bbox[i].y0), (float) (hit_bbox[i].x1), (float) (hit_bbox[i].y1)); if (rect == NULL) return NULL; (*env)->SetObjectArrayElement(env, arr, i, rect); (*env)->DeleteLocalRef(env, rect); } return arr; }
static void drawpage(xps_context *ctx, int pagenum) { xps_page *page; fz_display_list *list; fz_device *dev; int start; int code; if (showtime) { start = gettime(); } code = xps_load_page(&page, ctx, pagenum - 1); if (code) die(fz_rethrow(code, "cannot load page %d in file '%s'", pagenum, filename)); list = NULL; if (uselist) { list = fz_new_display_list(); dev = fz_new_list_device(list); xps_run_page(ctx, page, dev, fz_identity); fz_free_device(dev); } if (showxml) { dev = fz_new_trace_device(); printf("<page number=\"%d\">\n", pagenum); if (list) fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox); else xps_run_page(ctx, page, dev, fz_identity); printf("</page>\n"); fz_free_device(dev); } if (showtext) { fz_text_span *text = fz_new_text_span(); dev = fz_new_text_device(text); if (list) fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox); else xps_run_page(ctx, page, dev, fz_identity); fz_free_device(dev); printf("[Page %d]\n", pagenum); if (showtext > 1) fz_debug_text_span_xml(text); else fz_debug_text_span(text); printf("\n"); fz_free_text_span(text); } if (showmd5 || showtime) printf("page %s %d", filename, pagenum); if (output || showmd5 || showtime) { float zoom; fz_matrix ctm; fz_rect rect; fz_bbox bbox; fz_pixmap *pix; rect.x0 = rect.y0 = 0; rect.x1 = page->width; rect.y1 = page->height; zoom = resolution / 96; ctm = fz_translate(0, -page->height); ctm = fz_concat(ctm, fz_scale(zoom, zoom)); bbox = fz_round_rect(fz_transform_rect(ctm, rect)); /* TODO: banded rendering and multi-page ppm */ pix = fz_new_pixmap_with_rect(colorspace, bbox); if (savealpha) fz_clear_pixmap(pix); else fz_clear_pixmap_with_color(pix, 255); dev = fz_new_draw_device(glyphcache, pix); if (list) fz_execute_display_list(list, dev, ctm, bbox); else xps_run_page(ctx, page, dev, ctm); fz_free_device(dev); if (output) { char buf[512]; sprintf(buf, output, pagenum); if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) fz_write_pnm(pix, buf); else if (strstr(output, ".pam")) fz_write_pam(pix, buf, savealpha); else if (strstr(output, ".png")) fz_write_png(pix, buf, savealpha); } if (showmd5) { fz_md5 md5; unsigned char digest[16]; int i; fz_md5_init(&md5); fz_md5_update(&md5, pix->samples, pix->w * pix->h * pix->n); fz_md5_final(&md5, digest); printf(" "); for (i = 0; i < 16; i++) printf("%02x", digest[i]); } fz_drop_pixmap(pix); } if (list) fz_free_display_list(list); if (showtime) { int end = gettime(); int diff = end - start; if (diff < timing.min) { timing.min = diff; timing.minpage = pagenum; } if (diff > timing.max) { timing.max = diff; timing.maxpage = pagenum; } timing.total += diff; timing.count ++; printf(" %dms", diff); } if (showmd5 || showtime) printf("\n"); }