char * fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect) { fz_buffer *buffer; fz_rect hitbox; int c, i, block_num, seen = 0; unsigned char *s; float x0 = rect.x0; float x1 = rect.x1; float y0 = rect.y0; float y1 = rect.y1; buffer = fz_new_buffer(ctx, 1024); for (block_num = 0; block_num < page->len; block_num++) { fz_stext_block *block; fz_stext_line *line; fz_stext_span *span; if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) continue; block = page->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { for (span = line->first_span; span; span = span->next) { if (seen) { fz_write_buffer_byte(ctx, buffer, '\n'); } seen = 0; for (i = 0; i < span->len; i++) { fz_stext_char_bbox(ctx, &hitbox, span, i); c = span->text[i].c; if (c < 32) c = 0xFFFD; if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1) { fz_write_buffer_rune(ctx, buffer, c); seen = 1; } } seen = (seen && span == line->last_span); } } } fz_write_buffer_byte(ctx, buffer, 0); fz_buffer_extract(ctx, buffer, &s); fz_drop_buffer(ctx, buffer); return (char*)s; }
/** * @brief Get bounding box of a unicode char. * * @param index index of the unicode char(start from 0) */ QRectF TextBox::charBoundingBox(int index) const { fz_rect rect; fz_stext_char_bbox(d->ctx, &rect, d->text_span, index); return QRectF(QPointF(rect.x0, rect.y0), QPointF(rect.x1, rect.y1)); }
char * fz_copy_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect) { fz_buffer *buffer; fz_rect hitbox; int c, seen = 0; unsigned char *s; fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; float x0 = rect.x0; float x1 = rect.x1; float y0 = rect.y0; float y1 = rect.y1; buffer = fz_new_buffer(ctx, 1024); for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { if (seen) { fz_append_byte(ctx, buffer, '\n'); } seen = 0; for (ch = line->first_char; ch; ch = ch->next) { fz_stext_char_bbox(ctx, &hitbox, line, ch); c = ch->c; if (c < 32) c = FZ_REPLACEMENT_CHARACTER; if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1) { fz_append_rune(ctx, buffer, c); seen = 1; } } seen = (seen && line == block->u.t.last_line); } } fz_terminate_buffer(ctx, buffer); fz_buffer_extract(ctx, buffer, &s); /* take over the data */ fz_drop_buffer(ctx, buffer); return (char*)s; }
int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect, fz_rect *hit_bbox, int hit_max) { fz_rect linebox, charbox; fz_stext_block *block; fz_stext_line *line; fz_stext_span *span; int i, block_num, hit_count; float x0 = rect.x0; float x1 = rect.x1; float y0 = rect.y0; float y1 = rect.y1; hit_count = 0; for (block_num = 0; block_num < page->len; block_num++) { if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) continue; block = page->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { linebox = fz_empty_rect; for (span = line->first_span; span; span = span->next) { for (i = 0; i < span->len; i++) { fz_stext_char_bbox(ctx, &charbox, span, i); if (charbox.x1 >= x0 && charbox.x0 <= x1 && charbox.y1 >= y0 && charbox.y0 <= y1) { if (charbox.y0 != linebox.y0 || fz_abs(charbox.x0 - linebox.x1) > 5) { if (!fz_is_empty_rect(&linebox) && hit_count < hit_max) hit_bbox[hit_count++] = linebox; linebox = charbox; } else { fz_union_rect(&linebox, &charbox); } } } } if (!fz_is_empty_rect(&linebox) && hit_count < hit_max) hit_bbox[hit_count++] = linebox; } } return hit_count; }
int fz_highlight_selection(fz_context *ctx, fz_stext_page *page, fz_rect rect, fz_rect *hit_bbox, int hit_max) { fz_rect linebox, charbox; fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; int hit_count; float x0 = rect.x0; float x1 = rect.x1; float y0 = rect.y0; float y1 = rect.y1; hit_count = 0; for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { linebox = fz_empty_rect; for (ch = line->first_char; ch; ch = ch->next) { fz_stext_char_bbox(ctx, &charbox, line, ch); if (charbox.x1 >= x0 && charbox.x0 <= x1 && charbox.y1 >= y0 && charbox.y0 <= y1) { if (charbox.y0 != linebox.y0 || fz_abs(charbox.x0 - linebox.x1) > 5) { if (!fz_is_empty_rect(&linebox) && hit_count < hit_max) hit_bbox[hit_count++] = linebox; linebox = charbox; } else { fz_union_rect(&linebox, &charbox); } } } if (!fz_is_empty_rect(&linebox) && hit_count < hit_max) hit_bbox[hit_count++] = linebox; } } return hit_count; }
fz_char_and_box *fz_stext_char_at(fz_context *ctx, fz_char_and_box *cab, fz_stext_page *page, int idx) { int block_num; int ofs = 0; for (block_num = 0; block_num < page->len; block_num++) { fz_stext_block *block; fz_stext_line *line; fz_stext_span *span; if (page->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) continue; block = page->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { for (span = line->first_span; span; span = span->next) { if (idx < ofs + span->len) { cab->c = span->text[idx - ofs].c; fz_stext_char_bbox(ctx, &cab->bbox, span, idx - ofs); return cab; } ofs += span->len; } /* pseudo-newline */ if (idx == ofs) { cab->bbox = fz_empty_rect; cab->c = ' '; return cab; } ofs++; } } cab->bbox = fz_empty_rect; cab->c = 0; return cab; }
fz_char_and_box *fz_stext_char_at(fz_context *ctx, fz_char_and_box *cab, fz_stext_page *page, int idx) { fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; int ofs = 0; for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { for (ch = line->first_char; ch; ch = ch->next) { if (ofs == idx) { cab->c = ch->c; fz_stext_char_bbox(ctx, &cab->bbox, line, ch); return cab; } ++ofs; } /* pseudo-newline */ if (idx == ofs) { cab->bbox = fz_empty_rect; cab->c = ' '; return cab; } ++ofs; } } cab->bbox = fz_empty_rect; cab->c = 0; return cab; }
fz_buffer * fz_new_buffer_from_stext_page(fz_context *ctx, fz_stext_page *text, const fz_rect *sel, int crlf) { fz_buffer *buf; fz_rect hitbox; float x0, y0, x1, y1; int block_num; int need_newline; int i; need_newline = 0; if (fz_is_infinite_rect(sel)) { x0 = y0 = -FLT_MAX; x1 = y1 = FLT_MAX; } else { x0 = sel->x0; y0 = sel->y0; x1 = sel->x1; y1 = sel->y1; } buf = fz_new_buffer(ctx, 256); fz_try(ctx) { for (block_num = 0; block_num < text->len; block_num++) { fz_stext_line *line; fz_stext_block *block; fz_stext_span *span; if (text->blocks[block_num].type != FZ_PAGE_BLOCK_TEXT) continue; block = text->blocks[block_num].u.text; for (line = block->lines; line < block->lines + block->len; line++) { int saw_text = 0; for (span = line->first_span; span; span = span->next) { for (i = 0; i < span->len; i++) { int c; fz_stext_char_bbox(ctx, &hitbox, span, i); c = span->text[i].c; if (c < 32) c = '?'; if (hitbox.x1 >= x0 && hitbox.x0 <= x1 && hitbox.y1 >= y0 && hitbox.y0 <= y1) { saw_text = 1; if (need_newline) { if (crlf) fz_write_buffer_rune(ctx, buf, '\r'); fz_write_buffer_rune(ctx, buf, '\n'); need_newline = 0; } fz_write_buffer_rune(ctx, buf, c); } } } if (saw_text) need_newline = 1; } } } fz_catch(ctx) { fz_drop_buffer(ctx, buf); fz_rethrow(ctx); } return buf; }
void fz_print_stext_page_as_xml(fz_context *ctx, fz_output *out, fz_stext_page *page) { fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; fz_write_printf(ctx, out, "<page width=\"%g\" height=\"%g\">\n", page->mediabox.x1 - page->mediabox.x0, page->mediabox.y1 - page->mediabox.y0); for (block = page->first_block; block; block = block->next) { switch (block->type) { case FZ_STEXT_BLOCK_TEXT: fz_write_printf(ctx, out, "<block bbox=\"%g %g %g %g\">\n", block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1); for (line = block->u.t.first_line; line; line = line->next) { fz_font *font = NULL; float size = 0; const char *name = NULL; fz_rect rect; fz_write_printf(ctx, out, "<line bbox=\"%g %g %g %g\" wmode=\"%d\" dir=\"%g %g\">\n", line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1, line->wmode, line->dir.x, line->dir.y); for (ch = line->first_char; ch; ch = ch->next) { if (ch->font != font || ch->size != size) { if (font) fz_write_string(ctx, out, "</font>\n"); font = ch->font; size = ch->size; name = font_full_name(ctx, font); fz_write_printf(ctx, out, "<font name=\"%s\" size=\"%g\">\n", name, size); } fz_stext_char_bbox(ctx, &rect, line, ch); fz_write_printf(ctx, out, "<char bbox=\"%g %g %g %g\" x=\"%g\" y=\"%g\" c=\"", rect.x0, rect.y0, rect.x1, rect.y1, ch->origin.x, ch->origin.y); switch (ch->c) { case '<': fz_write_string(ctx, out, "<"); break; case '>': fz_write_string(ctx, out, ">"); break; case '&': fz_write_string(ctx, out, "&"); break; case '"': fz_write_string(ctx, out, """); break; case '\'': fz_write_string(ctx, out, "'"); break; default: if (ch->c >= 32 && ch->c <= 127) fz_write_printf(ctx, out, "%c", ch->c); else fz_write_printf(ctx, out, "&#x%x;", ch->c); break; } fz_write_string(ctx, out, "\"/>\n"); } if (font) fz_write_string(ctx, out, "</font>\n"); fz_write_string(ctx, out, "</line>\n"); } fz_write_string(ctx, out, "</block>\n"); break; case FZ_STEXT_BLOCK_IMAGE: fz_write_printf(ctx, out, "<image bbox=\"%g %g %g %g\" />\n", block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1); break; } } fz_write_string(ctx, out, "</page>\n"); }
void fz_print_stext_page_xml(fz_context *ctx, fz_output *out, fz_stext_page *page) { int block_n; fz_printf(ctx, out, "<page width=\"%g\" height=\"%g\">\n", page->mediabox.x1 - page->mediabox.x0, page->mediabox.y1 - page->mediabox.y0); for (block_n = 0; block_n < page->len; block_n++) { switch (page->blocks[block_n].type) { case FZ_PAGE_BLOCK_TEXT: { fz_stext_block *block = page->blocks[block_n].u.text; fz_stext_line *line; const char *s; fz_printf(ctx, out, "<block bbox=\"%g %g %g %g\">\n", block->bbox.x0, block->bbox.y0, block->bbox.x1, block->bbox.y1); for (line = block->lines; line < block->lines + block->len; line++) { fz_stext_span *span; fz_printf(ctx, out, "<line bbox=\"%g %g %g %g\">\n", line->bbox.x0, line->bbox.y0, line->bbox.x1, line->bbox.y1); for (span = line->first_span; span; span = span->next) { fz_stext_style *style = NULL; const char *name = NULL; int char_num; for (char_num = 0; char_num < span->len; char_num++) { fz_stext_char *ch = &span->text[char_num]; if (ch->style != style) { if (style) { fz_printf(ctx, out, "</span>\n"); } style = ch->style; name = fz_font_name(ctx, style->font); s = strchr(name, '+'); s = s ? s + 1 : name; fz_printf(ctx, out, "<span bbox=\"%g %g %g %g\" font=\"%s\" size=\"%g\">\n", span->bbox.x0, span->bbox.y0, span->bbox.x1, span->bbox.y1, s, style->size); } { fz_rect rect; fz_stext_char_bbox(ctx, &rect, span, char_num); fz_printf(ctx, out, "<char bbox=\"%g %g %g %g\" x=\"%g\" y=\"%g\" c=\"", rect.x0, rect.y0, rect.x1, rect.y1, ch->p.x, ch->p.y); } switch (ch->c) { case '<': fz_printf(ctx, out, "<"); break; case '>': fz_printf(ctx, out, ">"); break; case '&': fz_printf(ctx, out, "&"); break; case '"': fz_printf(ctx, out, """); break; case '\'': fz_printf(ctx, out, "'"); break; default: if (ch->c >= 32 && ch->c <= 127) fz_printf(ctx, out, "%c", ch->c); else fz_printf(ctx, out, "&#x%x;", ch->c); break; } fz_printf(ctx, out, "\"/>\n"); } if (style) fz_printf(ctx, out, "</span>\n"); } fz_printf(ctx, out, "</line>\n"); } fz_printf(ctx, out, "</block>\n"); break; } case FZ_PAGE_BLOCK_IMAGE: { break; } } } fz_printf(ctx, out, "</page>\n"); }