static int _pdf_doc_text_length(struct _pdf_doc *self, int pageno) { fz_display_list *list; fz_text_span *text, *span; fz_device *tdev; int length = 0; list = _pdf_doc_get_list(self, pageno); text = fz_new_text_span(); tdev = fz_new_text_device(text); fz_execute_display_list( list, tdev, fz_identity, fz_infinite_bbox); for (span = text; span; span = span->next) { length += span->len; if (!span->eol && span->next) continue; /* End of line ? */ length += 1; } fz_free_device(tdev); fz_free_text_span(text); return length; }
static void _pdf_doc_extract_text( struct _pdf_doc *self, int pageno, char *tbuf, mume_rect_t *rbuf) { fz_display_list *list; fz_text_span *text, *span; fz_device *tdev; int i; list = _pdf_doc_get_list(self, pageno); text = fz_new_text_span(); tdev = fz_new_text_device(text); fz_execute_display_list( list, tdev, fz_identity, fz_infinite_bbox); for (span = text; span; span = span->next) { for (i = 0; i < span->len; i++) { *tbuf = span->text[i].c; if (*tbuf < 32) *tbuf = '?'; tbuf++; *rbuf++ = _fz_bbox_to_mume_rect(span->text[i].bbox); } if (!span->eol && span->next) continue; *tbuf++ = '\n'; *rbuf++ = mume_rect_empty; } fz_free_device(tdev); fz_free_text_span(text); }
static void fz_add_text_char(fz_context *ctx, fz_text_span **last, fz_font *font, float size, int wmode, int c, fz_bbox bbox) { fz_text_span *span = *last; if (!span->font) { span->font = fz_keep_font(ctx, font); span->size = size; } if ((span->font != font || span->size != size || span->wmode != wmode) && c != 32) { span = fz_new_text_span(ctx); span->font = fz_keep_font(ctx, font); span->size = size; span->wmode = wmode; (*last)->next = span; *last = span; } switch (c) { case -1: /* ignore when one unicode character maps to multiple glyphs */ break; case 0xFB00: /* ff */ fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2)); fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 2)); break; case 0xFB01: /* fi */ fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2)); fz_add_text_char_imp(ctx, span, 'i', fz_split_bbox(bbox, 1, 2)); break; case 0xFB02: /* fl */ fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2)); fz_add_text_char_imp(ctx, span, 'l', fz_split_bbox(bbox, 1, 2)); break; case 0xFB03: /* ffi */ fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 3)); fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 3)); fz_add_text_char_imp(ctx, span, 'i', fz_split_bbox(bbox, 2, 3)); break; case 0xFB04: /* ffl */ fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 3)); fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 3)); fz_add_text_char_imp(ctx, span, 'l', fz_split_bbox(bbox, 2, 3)); break; case 0xFB05: /* long st */ case 0xFB06: /* st */ fz_add_text_char_imp(ctx, span, 's', fz_split_bbox(bbox, 0, 2)); fz_add_text_char_imp(ctx, span, 't', fz_split_bbox(bbox, 1, 2)); break; default: fz_add_text_char_imp(ctx, span, c, bbox); break; } }
static fz_text_span * fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, const fz_matrix *trm) { if (!text->tail) { text->head = text->tail = fz_new_text_span(ctx, font, wmode, trm); } else if (text->tail->font != font || text->tail->wmode != wmode || text->tail->trm.a != trm->a || text->tail->trm.b != trm->b || text->tail->trm.c != trm->c || text->tail->trm.d != trm->d) { text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, trm); } return text->tail; }
static void fz_add_text_newline(fz_text_span **last, float size, int wmode) { fz_text_span *span; span = fz_new_text_span(); span->size = size; span->wmode = wmode; (*last)->eol = 1; (*last)->next = span; *last = span; }
static void fz_add_text_newline(fz_context *ctx, fz_text_span **last, fz_font *font, float size, int wmode) { fz_text_span *span; span = fz_new_text_span(ctx); span->font = fz_keep_font(ctx, font); span->size = size; span->wmode = wmode; (*last)->eol = 1; (*last)->next = span; *last = span; }
static void pdfapp_showpage(pdfapp_t *app, int loadpage, int drawpage, int repaint) { char buf[256]; fz_device *idev; fz_device *tdev; fz_colorspace *colorspace; fz_matrix ctm; fz_bbox bbox; wincursor(app, WAIT); if (loadpage) { if (app->page_list) fz_free_display_list(app->page_list); if (app->page_text) fz_free_text_span(app->page_text); if (app->page_links) pdf_free_link(app->page_links); if (app->xref) pdfapp_loadpage_pdf(app); if (app->xps) pdfapp_loadpage_xps(app); /* Zero search hit position */ app->hit = -1; app->hitlen = 0; /* Extract text */ app->page_text = fz_new_text_span(); tdev = fz_new_text_device(app->page_text); fz_execute_display_list(app->page_list, tdev, fz_identity, fz_infinite_bbox); fz_free_device(tdev); } if (drawpage) { sprintf(buf, "%s - %d/%d (%d dpi)", app->doctitle, app->pageno, app->pagecount, app->resolution); wintitle(app, buf); ctm = pdfapp_viewctm(app); bbox = fz_round_rect(fz_transform_rect(ctm, app->page_bbox)); /* Draw */ if (app->image) fz_drop_pixmap(app->image); if (app->grayscale) colorspace = fz_device_gray; else #ifdef _WIN32 colorspace = fz_device_bgr; #else colorspace = fz_device_rgb; #endif app->image = fz_new_pixmap_with_rect(colorspace, bbox); fz_clear_pixmap_with_color(app->image, 255); idev = fz_new_draw_device(app->cache, app->image); fz_execute_display_list(app->page_list, idev, ctm, bbox); fz_free_device(idev); } if (repaint) { pdfapp_panview(app, app->panx, app->pany); if (app->shrinkwrap) { int w = app->image->w; int h = app->image->h; if (app->winw == w) app->panx = 0; if (app->winh == h) app->pany = 0; if (w > app->scrw * 90 / 100) w = app->scrw * 90 / 100; if (h > app->scrh * 90 / 100) h = app->scrh * 90 / 100; if (w != app->winw || h != app->winh) winresize(app, w, h); } winrepaint(app); wincursor(app, ARROW); } fz_flush_warnings(); }
static void fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style, int c, fz_matrix *trm, float adv, int wmode) { int can_append = 1; int add_space = 0; fz_point dir, ndir, p, q, r; float size; fz_point delta; float spacing = 0; float base_offset = 0; if (wmode == 0) { dir.x = 1; dir.y = 0; } else { dir.x = 0; dir.y = -1; } fz_transform_vector(&dir, trm); ndir = dir; fz_normalize_vector(&ndir); /* dir = direction vector for motion. ndir = normalised(dir) */ size = fz_matrix_expansion(trm); /* We need to identify where glyphs 'start' (p) and 'stop' (q). * Each glyph holds it's 'start' position, and the next glyph in the * span (or span->max if there is no next glyph) holds it's 'end' * position. * * For both horizontal and vertical motion, trm->{e,f} gives the * bottom left corner of the glyph. * * In horizontal mode: * + p is bottom left. * + q is the bottom right * In vertical mode: * + p is top left (where it advanced from) * + q is bottom left */ if (wmode == 0) { p.x = trm->e; p.y = trm->f; q.x = trm->e + adv * dir.x; q.y = trm->f + adv * dir.y; } else { p.x = trm->e - adv * dir.x; p.y = trm->f - adv * dir.y; q.x = trm->e; q.y = trm->f; } if (dev->cur_span == NULL || trm->a != dev->cur_span->transform.a || trm->b != dev->cur_span->transform.b || trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d || dev->cur_span->wmode != wmode) { /* If the matrix has changed, or the wmode is different (or * if we don't have a span at all), then we can't append. */ #ifdef DEBUG_SPANS printf("Transform/WMode changed\n"); #endif can_append = 0; } else { /* Calculate how far we've moved since the end of the current * span. */ delta.x = p.x - dev->cur_span->max.x; delta.y = p.y - dev->cur_span->max.y; /* The transform has not changed, so we know we're in the same * direction. Calculate 2 distances; how far off the previous * baseline we are, together with how far along the baseline * we are from the expected position. */ spacing = ndir.x * delta.x + ndir.y * delta.y; base_offset = -ndir.y * delta.x + ndir.x * delta.y; spacing /= size * SPACE_DIST; spacing = fabsf(spacing); if (fabsf(base_offset) < size * 0.1) { /* Only a small amount off the baseline - we'll take this */ if (spacing < 1.0) { /* Motion is in line, and small. */ } else if (spacing >= 1 && spacing < (SPACE_MAX_DIST/SPACE_DIST)) { /* Motion is in line, but large enough * to warrant us adding a space */ if (dev->lastchar != ' ' && wmode == 0) add_space = 1; } else { /* Motion is in line, but too large - split to a new span */ can_append = 0; } } else { can_append = 0; #ifdef DEBUG_SPANS spacing = 0; #endif } } #ifdef DEBUG_SPANS printf("%c%c append=%d space=%d size=%g spacing=%g base_offset=%g\n", dev->lastchar, c, can_append, add_space, size, spacing, base_offset); #endif if (can_append == 0) { /* Start a new span */ add_span_to_soup(ctx, dev->spans, dev->cur_span); dev->cur_span = NULL; dev->cur_span = fz_new_text_span(ctx, &p, wmode, trm); dev->cur_span->spacing = 0; } if (add_space) { r.x = - 0.2f; r.y = 0; fz_transform_point(&r, trm); add_char_to_span(ctx, dev->cur_span, ' ', &p, &r, style); } add_char_to_span(ctx, dev->cur_span, c, &p, &q, style); }
static void drawpage(xps_context *ctx, int pagenum) { xps_page *page; fz_display_list *list; fz_device *dev; int start; int code; if (showtime) { start = gettime(); } code = xps_load_page(&page, ctx, pagenum - 1); if (code) die(fz_rethrow(code, "cannot load page %d in file '%s'", pagenum, filename)); list = NULL; if (uselist) { list = fz_new_display_list(); dev = fz_new_list_device(list); xps_run_page(ctx, page, dev, fz_identity); fz_free_device(dev); } if (showxml) { dev = fz_new_trace_device(); printf("<page number=\"%d\">\n", pagenum); if (list) fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox); else xps_run_page(ctx, page, dev, fz_identity); printf("</page>\n"); fz_free_device(dev); } if (showtext) { fz_text_span *text = fz_new_text_span(); dev = fz_new_text_device(text); if (list) fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox); else xps_run_page(ctx, page, dev, fz_identity); fz_free_device(dev); printf("[Page %d]\n", pagenum); if (showtext > 1) fz_debug_text_span_xml(text); else fz_debug_text_span(text); printf("\n"); fz_free_text_span(text); } if (showmd5 || showtime) printf("page %s %d", filename, pagenum); if (output || showmd5 || showtime) { float zoom; fz_matrix ctm; fz_rect rect; fz_bbox bbox; fz_pixmap *pix; rect.x0 = rect.y0 = 0; rect.x1 = page->width; rect.y1 = page->height; zoom = resolution / 96; ctm = fz_translate(0, -page->height); ctm = fz_concat(ctm, fz_scale(zoom, zoom)); bbox = fz_round_rect(fz_transform_rect(ctm, rect)); /* TODO: banded rendering and multi-page ppm */ pix = fz_new_pixmap_with_rect(colorspace, bbox); if (savealpha) fz_clear_pixmap(pix); else fz_clear_pixmap_with_color(pix, 255); dev = fz_new_draw_device(glyphcache, pix); if (list) fz_execute_display_list(list, dev, ctm, bbox); else xps_run_page(ctx, page, dev, ctm); fz_free_device(dev); if (output) { char buf[512]; sprintf(buf, output, pagenum); if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm")) fz_write_pnm(pix, buf); else if (strstr(output, ".pam")) fz_write_pam(pix, buf, savealpha); else if (strstr(output, ".png")) fz_write_png(pix, buf, savealpha); } if (showmd5) { fz_md5 md5; unsigned char digest[16]; int i; fz_md5_init(&md5); fz_md5_update(&md5, pix->samples, pix->w * pix->h * pix->n); fz_md5_final(&md5, digest); printf(" "); for (i = 0; i < 16; i++) printf("%02x", digest[i]); } fz_drop_pixmap(pix); } if (list) fz_free_display_list(list); if (showtime) { int end = gettime(); int diff = end - start; if (diff < timing.min) { timing.min = diff; timing.minpage = pagenum; } if (diff > timing.max) { timing.max = diff; timing.maxpage = pagenum; } timing.total += diff; timing.count ++; printf(" %dms", diff); } if (showmd5 || showtime) printf("\n"); }