Exemplo n.º 1
0
static int _pdf_doc_text_length(struct _pdf_doc *self, int pageno)
{
    fz_display_list *list;
    fz_text_span *text, *span;
    fz_device *tdev;
    int length = 0;

    list = _pdf_doc_get_list(self, pageno);
    text = fz_new_text_span();
    tdev = fz_new_text_device(text);
    fz_execute_display_list(
        list, tdev, fz_identity, fz_infinite_bbox);

    for (span = text; span; span = span->next) {
        length += span->len;
        if (!span->eol && span->next)
            continue;

        /* End of line ? */
        length += 1;
    }

    fz_free_device(tdev);
    fz_free_text_span(text);

    return length;
}
Exemplo n.º 2
0
static void _pdf_doc_extract_text(
    struct _pdf_doc *self, int pageno, char *tbuf, mume_rect_t *rbuf)
{
    fz_display_list *list;
    fz_text_span *text, *span;
    fz_device *tdev;
    int i;

    list = _pdf_doc_get_list(self, pageno);
    text = fz_new_text_span();
    tdev = fz_new_text_device(text);
    fz_execute_display_list(
        list, tdev, fz_identity, fz_infinite_bbox);

    for (span = text; span; span = span->next) {
        for (i = 0; i < span->len; i++) {
            *tbuf = span->text[i].c;

            if (*tbuf < 32)
                *tbuf = '?';

            tbuf++;
            *rbuf++ = _fz_bbox_to_mume_rect(span->text[i].bbox);
        }

        if (!span->eol && span->next)
            continue;

        *tbuf++ = '\n';
        *rbuf++ = mume_rect_empty;
    }

    fz_free_device(tdev);
    fz_free_text_span(text);
}
Exemplo n.º 3
0
static void
fz_add_text_char(fz_context *ctx, fz_text_span **last, fz_font *font, float size, int wmode, int c, fz_bbox bbox)
{
	fz_text_span *span = *last;

	if (!span->font)
	{
		span->font = fz_keep_font(ctx, font);
		span->size = size;
	}

	if ((span->font != font || span->size != size || span->wmode != wmode) && c != 32)
	{
		span = fz_new_text_span(ctx);
		span->font = fz_keep_font(ctx, font);
		span->size = size;
		span->wmode = wmode;
		(*last)->next = span;
		*last = span;
	}

	switch (c)
	{
	case -1: /* ignore when one unicode character maps to multiple glyphs */
		break;
	case 0xFB00: /* ff */
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2));
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 2));
		break;
	case 0xFB01: /* fi */
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2));
		fz_add_text_char_imp(ctx, span, 'i', fz_split_bbox(bbox, 1, 2));
		break;
	case 0xFB02: /* fl */
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 2));
		fz_add_text_char_imp(ctx, span, 'l', fz_split_bbox(bbox, 1, 2));
		break;
	case 0xFB03: /* ffi */
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 3));
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 3));
		fz_add_text_char_imp(ctx, span, 'i', fz_split_bbox(bbox, 2, 3));
		break;
	case 0xFB04: /* ffl */
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 0, 3));
		fz_add_text_char_imp(ctx, span, 'f', fz_split_bbox(bbox, 1, 3));
		fz_add_text_char_imp(ctx, span, 'l', fz_split_bbox(bbox, 2, 3));
		break;
	case 0xFB05: /* long st */
	case 0xFB06: /* st */
		fz_add_text_char_imp(ctx, span, 's', fz_split_bbox(bbox, 0, 2));
		fz_add_text_char_imp(ctx, span, 't', fz_split_bbox(bbox, 1, 2));
		break;
	default:
		fz_add_text_char_imp(ctx, span, c, bbox);
		break;
	}
}
Exemplo n.º 4
0
static fz_text_span *
fz_add_text_span(fz_context *ctx, fz_text *text, fz_font *font, int wmode, const fz_matrix *trm)
{
	if (!text->tail)
	{
		text->head = text->tail = fz_new_text_span(ctx, font, wmode, trm);
	}
	else if (text->tail->font != font ||
		text->tail->wmode != wmode ||
		text->tail->trm.a != trm->a ||
		text->tail->trm.b != trm->b ||
		text->tail->trm.c != trm->c ||
		text->tail->trm.d != trm->d)
	{
		text->tail = text->tail->next = fz_new_text_span(ctx, font, wmode, trm);
	}
	return text->tail;
}
Exemplo n.º 5
0
static void
fz_add_text_newline(fz_text_span **last, float size, int wmode)
{
	fz_text_span *span;
	span = fz_new_text_span();
	span->size = size;
	span->wmode = wmode;
	(*last)->eol = 1;
	(*last)->next = span;
	*last = span;
}
Exemplo n.º 6
0
static void
fz_add_text_newline(fz_context *ctx, fz_text_span **last, fz_font *font, float size, int wmode)
{
	fz_text_span *span;
	span = fz_new_text_span(ctx);
	span->font = fz_keep_font(ctx, font);
	span->size = size;
	span->wmode = wmode;
	(*last)->eol = 1;
	(*last)->next = span;
	*last = span;
}
Exemplo n.º 7
0
static void pdfapp_showpage(pdfapp_t *app, int loadpage, int drawpage, int repaint)
{
	char buf[256];
	fz_device *idev;
	fz_device *tdev;
	fz_colorspace *colorspace;
	fz_matrix ctm;
	fz_bbox bbox;

	wincursor(app, WAIT);

	if (loadpage)
	{
		if (app->page_list)
			fz_free_display_list(app->page_list);
		if (app->page_text)
			fz_free_text_span(app->page_text);
		if (app->page_links)
			pdf_free_link(app->page_links);

		if (app->xref)
			pdfapp_loadpage_pdf(app);
		if (app->xps)
			pdfapp_loadpage_xps(app);

		/* Zero search hit position */
		app->hit = -1;
		app->hitlen = 0;

		/* Extract text */
		app->page_text = fz_new_text_span();
		tdev = fz_new_text_device(app->page_text);
		fz_execute_display_list(app->page_list, tdev, fz_identity, fz_infinite_bbox);
		fz_free_device(tdev);
	}

	if (drawpage)
	{
		sprintf(buf, "%s - %d/%d (%d dpi)", app->doctitle,
				app->pageno, app->pagecount, app->resolution);
		wintitle(app, buf);

		ctm = pdfapp_viewctm(app);
		bbox = fz_round_rect(fz_transform_rect(ctm, app->page_bbox));

		/* Draw */
		if (app->image)
			fz_drop_pixmap(app->image);
		if (app->grayscale)
			colorspace = fz_device_gray;
		else
#ifdef _WIN32
			colorspace = fz_device_bgr;
#else
			colorspace = fz_device_rgb;
#endif
		app->image = fz_new_pixmap_with_rect(colorspace, bbox);
		fz_clear_pixmap_with_color(app->image, 255);
		idev = fz_new_draw_device(app->cache, app->image);
		fz_execute_display_list(app->page_list, idev, ctm, bbox);
		fz_free_device(idev);
	}

	if (repaint)
	{
		pdfapp_panview(app, app->panx, app->pany);

		if (app->shrinkwrap)
		{
			int w = app->image->w;
			int h = app->image->h;
			if (app->winw == w)
				app->panx = 0;
			if (app->winh == h)
				app->pany = 0;
			if (w > app->scrw * 90 / 100)
				w = app->scrw * 90 / 100;
			if (h > app->scrh * 90 / 100)
				h = app->scrh * 90 / 100;
			if (w != app->winw || h != app->winh)
				winresize(app, w, h);
		}

		winrepaint(app);

		wincursor(app, ARROW);
	}

	fz_flush_warnings();
}
Exemplo n.º 8
0
static void
fz_add_text_char_imp(fz_context *ctx, fz_text_device *dev, fz_text_style *style, int c, fz_matrix *trm, float adv, int wmode)
{
	int can_append = 1;
	int add_space = 0;
	fz_point dir, ndir, p, q, r;
	float size;
	fz_point delta;
	float spacing = 0;
	float base_offset = 0;

	if (wmode == 0)
	{
		dir.x = 1;
		dir.y = 0;
	}
	else
	{
		dir.x = 0;
		dir.y = -1;
	}
	fz_transform_vector(&dir, trm);
	ndir = dir;
	fz_normalize_vector(&ndir);
	/* dir = direction vector for motion. ndir = normalised(dir) */

	size = fz_matrix_expansion(trm);

	/* We need to identify where glyphs 'start' (p) and 'stop' (q).
	 * Each glyph holds it's 'start' position, and the next glyph in the
	 * span (or span->max if there is no next glyph) holds it's 'end'
	 * position.
	 *
	 * For both horizontal and vertical motion, trm->{e,f} gives the
	 * bottom left corner of the glyph.
	 *
	 * In horizontal mode:
	 *   + p is bottom left.
	 *   + q is the bottom right
	 * In vertical mode:
	 *   + p is top left (where it advanced from)
	 *   + q is bottom left
	 */
	if (wmode == 0)
	{
		p.x = trm->e;
		p.y = trm->f;
		q.x = trm->e + adv * dir.x;
		q.y = trm->f + adv * dir.y;
	}
	else
	{
		p.x = trm->e - adv * dir.x;
		p.y = trm->f - adv * dir.y;
		q.x = trm->e;
		q.y = trm->f;
	}

	if (dev->cur_span == NULL ||
		trm->a != dev->cur_span->transform.a || trm->b != dev->cur_span->transform.b ||
		trm->c != dev->cur_span->transform.c || trm->d != dev->cur_span->transform.d ||
		dev->cur_span->wmode != wmode)
	{
		/* If the matrix has changed, or the wmode is different (or
		 * if we don't have a span at all), then we can't append. */
#ifdef DEBUG_SPANS
		printf("Transform/WMode changed\n");
#endif
		can_append = 0;
	}
	else
	{
		/* Calculate how far we've moved since the end of the current
		 * span. */
		delta.x = p.x - dev->cur_span->max.x;
		delta.y = p.y - dev->cur_span->max.y;

		/* The transform has not changed, so we know we're in the same
		 * direction. Calculate 2 distances; how far off the previous
		 * baseline we are, together with how far along the baseline
		 * we are from the expected position. */
		spacing = ndir.x * delta.x + ndir.y * delta.y;
		base_offset = -ndir.y * delta.x + ndir.x * delta.y;

		spacing /= size * SPACE_DIST;
		spacing = fabsf(spacing);
		if (fabsf(base_offset) < size * 0.1)
		{
			/* Only a small amount off the baseline - we'll take this */
			if (spacing < 1.0)
			{
				/* Motion is in line, and small. */
			}
			else if (spacing >= 1 && spacing < (SPACE_MAX_DIST/SPACE_DIST))
			{
				/* Motion is in line, but large enough
				 * to warrant us adding a space */
				if (dev->lastchar != ' ' && wmode == 0)
					add_space = 1;
			}
			else
			{
				/* Motion is in line, but too large - split to a new span */
				can_append = 0;
			}
		}
		else
		{
			can_append = 0;
#ifdef DEBUG_SPANS
			spacing = 0;
#endif
		}
	}

#ifdef DEBUG_SPANS
	printf("%c%c append=%d space=%d size=%g spacing=%g base_offset=%g\n", dev->lastchar, c, can_append, add_space, size, spacing, base_offset);
#endif

	if (can_append == 0)
	{
		/* Start a new span */
		add_span_to_soup(ctx, dev->spans, dev->cur_span);
		dev->cur_span = NULL;
		dev->cur_span = fz_new_text_span(ctx, &p, wmode, trm);
		dev->cur_span->spacing = 0;
	}
	if (add_space)
	{
		r.x = - 0.2f;
		r.y = 0;
		fz_transform_point(&r, trm);
		add_char_to_span(ctx, dev->cur_span, ' ', &p, &r, style);
	}
	add_char_to_span(ctx, dev->cur_span, c, &p, &q, style);
}
Exemplo n.º 9
0
static void drawpage(xps_context *ctx, int pagenum)
{
	xps_page *page;
	fz_display_list *list;
	fz_device *dev;
	int start;
	int code;

	if (showtime)
	{
		start = gettime();
	}

	code = xps_load_page(&page, ctx, pagenum - 1);
	if (code)
		die(fz_rethrow(code, "cannot load page %d in file '%s'", pagenum, filename));

	list = NULL;

	if (uselist)
	{
		list = fz_new_display_list();
		dev = fz_new_list_device(list);
		xps_run_page(ctx, page, dev, fz_identity);
		fz_free_device(dev);
	}

	if (showxml)
	{
		dev = fz_new_trace_device();
		printf("<page number=\"%d\">\n", pagenum);
		if (list)
			fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox);
		else
			xps_run_page(ctx, page, dev, fz_identity);
		printf("</page>\n");
		fz_free_device(dev);
	}

	if (showtext)
	{
		fz_text_span *text = fz_new_text_span();
		dev = fz_new_text_device(text);
		if (list)
			fz_execute_display_list(list, dev, fz_identity, fz_infinite_bbox);
		else
			xps_run_page(ctx, page, dev, fz_identity);
		fz_free_device(dev);
		printf("[Page %d]\n", pagenum);
		if (showtext > 1)
			fz_debug_text_span_xml(text);
		else
			fz_debug_text_span(text);
		printf("\n");
		fz_free_text_span(text);
	}

	if (showmd5 || showtime)
		printf("page %s %d", filename, pagenum);

	if (output || showmd5 || showtime)
	{
		float zoom;
		fz_matrix ctm;
		fz_rect rect;
		fz_bbox bbox;
		fz_pixmap *pix;

		rect.x0 = rect.y0 = 0;
		rect.x1 = page->width;
		rect.y1 = page->height;

		zoom = resolution / 96;
		ctm = fz_translate(0, -page->height);
		ctm = fz_concat(ctm, fz_scale(zoom, zoom));
		bbox = fz_round_rect(fz_transform_rect(ctm, rect));

		/* TODO: banded rendering and multi-page ppm */

		pix = fz_new_pixmap_with_rect(colorspace, bbox);

		if (savealpha)
			fz_clear_pixmap(pix);
		else
			fz_clear_pixmap_with_color(pix, 255);

		dev = fz_new_draw_device(glyphcache, pix);
		if (list)
			fz_execute_display_list(list, dev, ctm, bbox);
		else
			xps_run_page(ctx, page, dev, ctm);
		fz_free_device(dev);

		if (output)
		{
			char buf[512];
			sprintf(buf, output, pagenum);
			if (strstr(output, ".pgm") || strstr(output, ".ppm") || strstr(output, ".pnm"))
				fz_write_pnm(pix, buf);
			else if (strstr(output, ".pam"))
				fz_write_pam(pix, buf, savealpha);
			else if (strstr(output, ".png"))
				fz_write_png(pix, buf, savealpha);
		}

		if (showmd5)
		{
			fz_md5 md5;
			unsigned char digest[16];
			int i;

			fz_md5_init(&md5);
			fz_md5_update(&md5, pix->samples, pix->w * pix->h * pix->n);
			fz_md5_final(&md5, digest);

			printf(" ");
			for (i = 0; i < 16; i++)
				printf("%02x", digest[i]);
		}

		fz_drop_pixmap(pix);
	}

	if (list)
		fz_free_display_list(list);

	if (showtime)
	{
		int end = gettime();
		int diff = end - start;

		if (diff < timing.min)
		{
			timing.min = diff;
			timing.minpage = pagenum;
		}
		if (diff > timing.max)
		{
			timing.max = diff;
			timing.maxpage = pagenum;
		}
		timing.total += diff;
		timing.count ++;

		printf(" %dms", diff);
	}

	if (showmd5 || showtime)
		printf("\n");
}