Пример #1
0
fz_char_and_box *fz_text_char_at(fz_char_and_box *cab, fz_text_page *page, int idx)
{
	fz_text_block *block;
	fz_text_line *line;
	int ofs = 0;
	for (block = page->blocks; block < page->blocks + page->len; block++)
	{
		for (line = block->lines; line < block->lines + block->len; line++)
		{
			int span_num;
			for (span_num = 0; span_num < line->len; span_num++)
			{
				fz_text_span *span = line->spans[span_num];
				if (idx < ofs + span->len)
				{
					cab->c = span->text[idx - ofs].c;
					fz_text_char_bbox(&cab->bbox, span, idx - ofs);
					return cab;
				}
				ofs += span->len;
			}
			/* pseudo-newline */
			if (idx == 0)
			{
				cab->bbox = fz_empty_rect;
				cab->c = 0;
				return cab;
			}
			ofs++;
		}
	}
	cab->bbox = fz_empty_rect;
	cab->c = 0;
	return cab;
}
Пример #2
0
/* Information down to the character level */
SYMBOL_DECLSPEC int __stdcall mGetTextCharacter(void *page, int block_num, int line_num,
	int item_num, double *top_x, double *top_y, double *height, double *width)
{
	fz_text_block *block;
	fz_text_line line;
	fz_text_span *span;
	fz_text_page *text = (fz_text_page*)page;
	fz_char_and_box cab;
	int index = item_num;

	block = text->blocks[block_num].u.text;
	line = block->lines[line_num];

	span = line.first_span;
	while (index >= span->len)
	{
		index = index - span->len;  /* Reset to start of next span */
		span = span->next;  /* Get next span */
	}

	cab.c = span->text[index].c;
	fz_text_char_bbox(&(cab.bbox), span, index);
	*top_x = cab.bbox.x0;
	*top_y = cab.bbox.y0;
	*height = cab.bbox.y1 - *top_y;
	*width = cab.bbox.x1 - *top_x;

	return cab.c;
}
Пример #3
0
char *
fz_copy_selection(fz_context *ctx, fz_text_page *page, fz_rect rect)
{
	fz_buffer *buffer;
	fz_rect hitbox;
	int c, i, block_num, seen = 0;
	char *s;

	float x0 = rect.x0;
	float x1 = rect.x1;
	float y0 = rect.y0;
	float y1 = rect.y1;

	buffer = fz_new_buffer(ctx, 1024);

	for (block_num = 0; block_num < page->len; block_num++)
	{
		fz_text_block *block;
		fz_text_line *line;
		fz_text_span *span;

		if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
			continue;
		block = page->blocks[block_num].u.text;
		for (line = block->lines; line < block->lines + block->len; line++)
		{
			for (span = line->first_span; span; span = span->next)
			{
				if (seen)
				{
					fz_write_buffer_byte(ctx, buffer, '\n');
				}

				seen = 0;

				for (i = 0; i < span->len; i++)
				{
					fz_text_char_bbox(&hitbox, span, i);
					c = span->text[i].c;
					if (c < 32)
						c = '?';
					if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
					{
						fz_write_buffer_rune(ctx, buffer, c);
						seen = 1;
					}
				}

				seen = (seen && span == line->last_span);
			}
		}
	}

	fz_write_buffer_byte(ctx, buffer, 0);

	s = (char*)buffer->data;
	fz_free(ctx, buffer);
	return s;
}
Пример #4
0
char *
fz_copy_selection(fz_context *ctx, fz_text_page *page, fz_rect rect)
{
	fz_buffer *buffer;
	fz_rect hitbox;
	fz_text_block *block;
	fz_text_line *line;
	int c, i, seen = 0;
	char *s;

	float x0 = rect.x0;
	float x1 = rect.x1;
	float y0 = rect.y0;
	float y1 = rect.y1;

	buffer = fz_new_buffer(ctx, 1024);

	for (block = page->blocks; block < page->blocks + page->len; block++)
	{
		for (line = block->lines; line < block->lines + block->len; line++)
		{
			int span_num;
			for (span_num = 0; span_num < line->len; span_num++)
			{
				fz_text_span *span = line->spans[span_num];
				if (seen)
				{
					fz_write_buffer_byte(ctx, buffer, '\n');
				}

				seen = 0;

				for (i = 0; i < span->len; i++)
				{
					fz_text_char_bbox(&hitbox, span, i);
					c = span->text[i].c;
					if (c < 32)
						c = '?';
					if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
					{
						fz_write_buffer_rune(ctx, buffer, c);
						seen = 1;
					}
				}

				seen = (seen && span_num + 1 == line->len);
			}
		}
	}

	fz_write_buffer_byte(ctx, buffer, 0);

	s = (char*)buffer->data;
	fz_free(ctx, buffer);
	return s;
}
Пример #5
0
int
fz_highlight_selection(fz_context *ctx, fz_text_page *page, fz_rect rect, fz_rect *hit_bbox, int hit_max)
{
	fz_rect linebox, charbox;
	fz_text_block *block;
	fz_text_line *line;
	fz_text_span *span;
	int i, block_num, hit_count;

	float x0 = rect.x0;
	float x1 = rect.x1;
	float y0 = rect.y0;
	float y1 = rect.y1;

	hit_count = 0;

	for (block_num = 0; block_num < page->len; block_num++)
	{
		if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
			continue;
		block = page->blocks[block_num].u.text;
		for (line = block->lines; line < block->lines + block->len; line++)
		{
			linebox = fz_empty_rect;
			for (span = line->first_span; span; span = span->next)
			{
				for (i = 0; i < span->len; i++)
				{
					fz_text_char_bbox(&charbox, span, i);
					if (charbox.x1 >= x0 && charbox.x0 <= x1 && charbox.y1 >= y0 && charbox.y0 <= y1)
					{
						if (charbox.y0 != linebox.y0 || fz_abs(charbox.x0 - linebox.x1) > 5)
						{
							if (!fz_is_empty_rect(&linebox) && hit_count < hit_max)
								hit_bbox[hit_count++] = linebox;
							linebox = charbox;
						}
						else
						{
							fz_union_rect(&linebox, &charbox);
						}
					}
				}
			}
			if (!fz_is_empty_rect(&linebox) && hit_count < hit_max)
				hit_bbox[hit_count++] = linebox;
		}
	}

	return hit_count;
}
Пример #6
0
int
fz_highlight_selection(fz_context *ctx, fz_text_page *page, fz_rect rect, fz_rect *hit_bbox, int hit_max)
{
	fz_rect linebox, charbox;
	fz_text_block *block;
	fz_text_line *line;
	int i, hit_count;

	float x0 = rect.x0;
	float x1 = rect.x1;
	float y0 = rect.y0;
	float y1 = rect.y1;

	hit_count = 0;

	for (block = page->blocks; block < page->blocks + page->len; block++)
	{
		for (line = block->lines; line < block->lines + block->len; line++)
		{
			int span_num;
			linebox = fz_empty_rect;
			for (span_num = 0; span_num < line->len; span_num++)
			{
				fz_text_span *span = line->spans[span_num];
				for (i = 0; i < span->len; i++)
				{
					fz_text_char_bbox(&charbox, span, i);
					if (charbox.x1 >= x0 && charbox.x0 <= x1 && charbox.y1 >= y0 && charbox.y0 <= y1)
					{
						if (charbox.y0 != linebox.y0 || fz_abs(charbox.x0 - linebox.x1) > 5)
						{
							if (!fz_is_empty_rect(&linebox) && hit_count < hit_max)
								hit_bbox[hit_count++] = linebox;
							linebox = charbox;
						}
						else
						{
							fz_union_rect(&linebox, &charbox);
						}
					}
				}
			}
			if (!fz_is_empty_rect(&linebox) && hit_count < hit_max)
				hit_bbox[hit_count++] = linebox;
		}
	}

	return hit_count;
}
Пример #7
0
void DrPDFExtractor::ExtractChars(std::list<DrChar *> &charlist, fz_text_page * tpage)
{
    for (int i = 0; i < tpage->len; i++) {
		fz_page_block * pb = tpage->blocks+i;
		if (pb->type == FZ_PAGE_BLOCK_TEXT) {
			fz_text_block * tb = pb->u.text;
			for (int j = 0; j < tb->len; j++) {
				fz_text_line * line = tb->lines+j;
				for (fz_text_span * span = line->first_span; span->next != NULL; span = span->next) {
					for (int k = 0; k < span->len; k++) {
						fz_rect bbox;
						fz_text_char_bbox(&bbox, span, k);
						fz_text_char * t_char = span->text+k;
						fz_text_style * style = t_char->style;
						DrChar * achar = new DrChar();
						DrBox charbox(bbox.x0, bbox.y0, bbox.x1, bbox.y1);
						int fontstyle;
						if (style->font->ft_bold == 1 && style->font->ft_italic == 0) {
							fontstyle = DrFontDescriptor::FS_BOLD;
						}
						else if (style->font->ft_bold == 0 && style->font->ft_italic == 1)
						{
							fontstyle = DrFontDescriptor::FS_ITALIC;
						}
						else if (style->font->ft_bold == 1 && style->font->ft_italic == 1)
						{
							fontstyle = DrFontDescriptor::FS_BOLD_ITALIC;
						}
						else
							fontstyle = DrFontDescriptor::FS_NONE;
						DrFontDescriptor * fd = m_fontcache->FindDescriptor(style->font->name, style->size, fontstyle);
						if (fd == NULL) {
							m_fontcache->AddDescriptor(style->font->name, style->size, fontstyle);
							fd = m_fontcache->FindDescriptor(style->font->name, style->size, fontstyle);
						}
						achar->Initialize(charbox, t_char->c,fd);
						charlist.push_back(achar);
					}
				}
			}
		}
	}
}
Пример #8
0
fz_char_and_box *fz_text_char_at(fz_char_and_box *cab, fz_text_page *page, int idx)
{
	int block_num;

	for (block_num = 0; block_num < page->len; block_num++)
	{
		fz_text_block *block;
		fz_text_line *line;
		fz_text_span *span;
		int ofs = 0;

		if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
			continue;
		block = page->blocks[block_num].u.text;
		for (line = block->lines; line < block->lines + block->len; line++)
		{
			for (span = line->first_span; span; span = span->next)
			{
				if (idx < ofs + span->len)
				{
					cab->c = span->text[idx - ofs].c;
					fz_text_char_bbox(&cab->bbox, span, idx - ofs);
					return cab;
				}
				ofs += span->len;
			}
			/* pseudo-newline */
			if (idx == 0)
			{
				cab->bbox = fz_empty_rect;
				cab->c = 0;
				return cab;
			}
			ofs++;
		}
	}
	cab->bbox = fz_empty_rect;
	cab->c = 0;
	return cab;
}
Пример #9
0
void
fz_print_text_page_xml(fz_context *ctx, fz_output *out, fz_text_page *page)
{
	int block_n;

	fz_printf(out, "<page width=\"%g\" height=\"%g\">\n",
		page->mediabox.x1 - page->mediabox.x0,
		page->mediabox.y1 - page->mediabox.y0);

	for (block_n = 0; block_n < page->len; block_n++)
	{
		switch (page->blocks[block_n].type)
		{
		case FZ_PAGE_BLOCK_TEXT:
		{
			fz_text_block *block = page->blocks[block_n].u.text;
			fz_text_line *line;
			char *s;

			fz_printf(out, "<block bbox=\"%g %g %g %g\">\n",
				block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1);
			for (line = block->lines; line < block->lines + block->len; line++)
			{
				fz_text_span *span;
				fz_printf(out, "<line bbox=\"%g %g %g %g\">\n",
					line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1);
				for (span = line->first_span; span; span = span->next)
				{
					fz_text_style *style = NULL;
					int char_num;
					for (char_num = 0; char_num < span->len; char_num++)
					{
						fz_text_char *ch = &span->text[char_num];
						if (ch->style != style)
						{
							if (style)
							{
								fz_printf(out, "</span>\n");
							}
							style = ch->style;
							s = strchr(style->font->name, '+');
							s = s ? s + 1 : style->font->name;
							fz_printf(out, "<span bbox=\"%g %g %g %g\" font=\"%s\" size=\"%g\">\n",
								span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1,
								s, style->size);
						}
						{
							fz_rect rect;
							fz_text_char_bbox(&rect, span, char_num);
							fz_printf(out, "<char bbox=\"%g %g %g %g\" x=\"%g\" y=\"%g\" c=\"",
								rect.x0, rect.y0, rect.x1, rect.y1, ch->p.x, ch->p.y);
						}
						switch (ch->c)
						{
						case '<': fz_printf(out, "&lt;"); break;
						case '>': fz_printf(out, "&gt;"); break;
						case '&': fz_printf(out, "&amp;"); break;
						case '"': fz_printf(out, "&quot;"); break;
						case '\'': fz_printf(out, "&apos;"); break;
						default:
							if (ch->c >= 32 && ch->c <= 127)
								fz_printf(out, "%c", ch->c);
							else
								fz_printf(out, "&#x%x;", ch->c);
							break;
						}
						fz_printf(out, "\"/>\n");
					}
					if (style)
						fz_printf(out, "</span>\n");
				}
				fz_printf(out, "</line>\n");
			}
			fz_printf(out, "</block>\n");
			break;
		}
		case FZ_PAGE_BLOCK_IMAGE:
		{
			break;
		}
	}
	}
	fz_printf(out, "</page>\n");
}
Пример #10
0
fz_buffer *
fz_new_buffer_from_text_page(fz_context *ctx, fz_text_page *text, const fz_rect *sel, int crlf)
{
	fz_buffer *buf;
	fz_rect hitbox;
	float x0, y0, x1, y1;
	int block_num;
	int need_newline;
	int i;

	need_newline = 0;

	if (fz_is_infinite_rect(sel))
	{
		x0 = y0 = -FLT_MAX;
		x1 = y1 = FLT_MAX;
	}
	else
	{
		x0 = sel->x0;
		y0 = sel->y0;
		x1 = sel->x1;
		y1 = sel->y1;
	}

	buf = fz_new_buffer(ctx, 256);
	fz_try(ctx)
	{
		for (block_num = 0; block_num < text->len; block_num++)
		{
			fz_text_line *line;
			fz_text_block *block;
			fz_text_span *span;

			if (text->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT)
				continue;

			block = text->blocks[block_num].u.text;
			for (line = block->lines; line < block->lines + block->len; line++)
			{
				int saw_text = 0;
				for (span = line->first_span; span; span = span->next)
				{
					for (i = 0; i < span->len; i++)
					{
						int c;
						fz_text_char_bbox(ctx, &hitbox, span, i);
						c = span->text[i].c;
						if (c < 32)
							c = '?';
						if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1)
						{
							saw_text = 1;
							if (need_newline)
							{
								if (crlf)
									fz_write_buffer_rune(ctx, buf, '\r');
								fz_write_buffer_rune(ctx, buf, '\n');
								need_newline = 0;
							}
							fz_write_buffer_rune(ctx, buf, c);
						}
					}
				}

				if (saw_text)
					need_newline = 1;
			}
		}
	}
	fz_catch(ctx)
	{
		fz_drop_buffer(ctx, buf);
		fz_rethrow(ctx);
	}

	return buf;
}