static int match_stext(fz_context *ctx, fz_stext_page *page, const char *s, int n) { int orig = n; int c; while (*s) { s += fz_chartorune(&c, (char *)s); if (iswhite(c) && iswhite(charat(ctx, page, n))) { const char *s_next; /* Skip over whitespace in the document */ do n++; while (iswhite(charat(ctx, page, n))); /* Skip over multiple whitespace in the search string */ while (s_next = s + fz_chartorune(&c, (char *)s), iswhite(c)) s = s_next; } else { if (fz_tolower(c) != fz_tolower(charat(ctx, page, n))) return 0; n++; } } return n - orig; }
static void write_comb_string(fz_context *ctx, fz_buffer *buf, const char *a, const char *b, fz_font *font, float cell_w) { float gw, pad, carry = 0; fz_append_byte(ctx, buf, '['); while (a < b) { int c, g; a += fz_chartorune(&c, a); c = fz_windows_1252_from_unicode(c); if (c < 0) c = REPLACEMENT; g = fz_encode_character(ctx, font, c); gw = fz_advance_glyph(ctx, font, g, 0) * 1000; pad = (cell_w - gw) / 2; fz_append_printf(ctx, buf, "%g", -(carry + pad)); carry = pad; fz_append_byte(ctx, buf, '('); if (c == '(' || c == ')' || c == '\\') fz_append_byte(ctx, buf, '\\'); fz_append_byte(ctx, buf, c); fz_append_byte(ctx, buf, ')'); } fz_append_string(ctx, buf, "] TJ\n"); }
static fz_html_flow *split_flow(fz_context *ctx, fz_pool *pool, fz_html_flow *flow, size_t offset) { fz_html_flow *new_flow; char *text; size_t len; if (offset == 0) return flow; new_flow = fz_pool_alloc(ctx, pool, sizeof *flow); *new_flow = *flow; new_flow->next = flow->next; flow->next = new_flow; text = flow->content.text; while (*text && offset) { int rune; text += fz_chartorune(&rune, text); offset--; } len = strlen(text); new_flow->content.text = fz_pool_alloc(ctx, pool, len+1); strcpy(new_flow->content.text, text); *text = 0; return new_flow; }
static void measure_word(fz_context *ctx, fz_html_flow *node, float em) { const char *s; int c, g; float w; em = fz_from_css_number(node->style->font_size, em, em); node->x = 0; node->y = 0; node->h = fz_from_css_number_scale(node->style->line_height, em, em, em); w = 0; s = node->text; while (*s) { s += fz_chartorune(&c, s); g = fz_encode_character(ctx, node->style->font, c); if (g) { w += fz_advance_glyph(ctx, node->style->font, g) * em; } else { g = fz_encode_character(ctx, node->style->fallback, c); w += fz_advance_glyph(ctx, node->style->fallback, g) * em; } } node->w = w; node->em = em; }
static float break_simple_string(fz_context *ctx, fz_font *font, float size, const char *a, const char **endp, float maxw) { const char *space = NULL; float space_x, x = 0; int c, g; while (*a) { a += fz_chartorune(&c, a); if (c >= 256) c = REPLACEMENT; if (c == '\n' || c == '\r') break; if (c == ' ') { space = a; space_x = x; } g = fz_encode_character(ctx, font, c); x += fz_advance_glyph(ctx, font, g, 0) * size; if (space && x > maxw) return *endp = space, space_x; } return *endp = a, x; }
fz_stream * fz_open_file(fz_context *ctx, const char *name) { #ifdef _WIN32 char *s = (char*)name; wchar_t *wname, *d; int c, fd; /* SumatraPDF: prefer ANSI to UTF-8 for consistency with remaining API */ fd = open(name, O_BINARY | O_RDONLY, 0); if (fd == -1) { d = wname = fz_malloc(ctx, (strlen(name)+1) * sizeof(wchar_t)); while (*s) { s += fz_chartorune(&c, s); *d++ = c; } *d = 0; fd = _wopen(wname, O_BINARY | O_RDONLY, 0); fz_free(ctx, wname); } #else int fd = open(name, O_BINARY | O_RDONLY, 0); #endif if (fd == -1) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open %s", name); return fz_open_fd(ctx, fd); }
static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm) { fz_html_flow *node; fz_text *text; fz_matrix trm; const char *s; float color[3]; float x, y; int c, g; for (node = box->flow_head; node; node = node->next) { if (node->type == FLOW_IMAGE) { if (node->y > page_bot || node->y + node->h < page_top) continue; } else { if (node->y > page_bot || node->y < page_top) continue; } if (node->type == FLOW_WORD) { fz_scale(&trm, node->em, -node->em); text = fz_new_text(ctx, node->style->font, &trm, 0); x = node->x; y = node->y; s = node->text; while (*s) { s += fz_chartorune(&c, s); g = fz_encode_character(ctx, node->style->font, c); fz_add_text(ctx, text, g, c, x, y); x += fz_advance_glyph(ctx, node->style->font, g) * node->em; } color[0] = node->style->color.r / 255.0f; color[1] = node->style->color.g / 255.0f; color[2] = node->style->color.b / 255.0f; fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1); fz_drop_text(ctx, text); } else if (node->type == FLOW_IMAGE) { fz_matrix local_ctm = *ctm; fz_pre_translate(&local_ctm, node->x, node->y); fz_pre_scale(&local_ctm, node->w, node->h); fz_fill_image(ctx, dev, node->image, &local_ctm, 1); } } }
static inline int myisalnum(char *s) { int cat, c; fz_chartorune(&c, s); cat = ucdn_get_general_category(c); if (cat >= UCDN_GENERAL_CATEGORY_LL && cat <= UCDN_GENERAL_CATEGORY_LU) return 1; if (cat >= UCDN_GENERAL_CATEGORY_ND && cat <= UCDN_GENERAL_CATEGORY_NO) return 1; return 0; }
static float measure_string_part(const char *s, const char *e) { int c; float w = 0; while (s < e) { s += fz_chartorune(&c, s); w += ui_measure_character(ctx, c); } return w; }
static void draw_string_part(float x, float y, const char *s, const char *e) { int c; ui_begin_text(ctx); while (s < e) { s += fz_chartorune(&c, s); x += ui_draw_character(ctx, c, x, y + ui.baseline); } ui_end_text(ctx); }
static char *find_string_location(char *s, char *e, float w, float x) { int c; while (s < e) { int n = fz_chartorune(&c, s); float cw = ui_measure_character(ctx, c); if (w + (cw / 2) >= x) return s; w += cw; s += n; } return e; }
static float measure_simple_string(fz_context *ctx, fz_font *font, const char *text) { float w = 0; while (*text) { int c, g; text += fz_chartorune(&c, text); c = fz_windows_1252_from_unicode(c); if (c < 0) c = REPLACEMENT; g = fz_encode_character(ctx, font, c); w += fz_advance_glyph(ctx, font, g, 0); } return w; }
wchar_t * fz_wchar_from_utf8(const char *s) { wchar_t *d, *r; int c; r = d = malloc((strlen(s) + 1) * sizeof(wchar_t)); if (!r) return NULL; while (*s) { s += fz_chartorune(&c, s); *d++ = c; } *d = 0; return r; }
float ui_draw_string(fz_context *ctx, float x, float y, const char *str) { int ucs; ui_begin_text(ctx); while (*str) { str += fz_chartorune(&ucs, str); x += ui_draw_character(ctx, ucs, x, y); } ui_end_text(ctx); return x; }
static void write_simple_string(fz_context *ctx, fz_buffer *buf, const char *a, const char *b) { fz_append_byte(ctx, buf, '('); while (a < b) { int c; a += fz_chartorune(&c, a); c = fz_windows_1252_from_unicode(c); if (c < 0) c = REPLACEMENT; if (c == '(' || c == ')' || c == '\\') fz_append_byte(ctx, buf, '\\'); fz_append_byte(ctx, buf, c); } fz_append_byte(ctx, buf, ')'); }
void wintitle(pdfapp_t *app, char *title) { wchar_t wide[256], *dp; char *sp; int rune; dp = wide; sp = title; while (*sp && dp < wide + 255) { sp += fz_chartorune(&rune, sp); *dp++ = rune; } *dp = 0; SetWindowTextW(hwndframe, wide); }
float ui_measure_string(fz_context *ctx, char *str) { int ucs; float x = 0; ui_begin_text(ctx); while (*str) { str += fz_chartorune(&ucs, str); x += ui_measure_character(ctx, ucs); } ui_end_text(ctx); return x; }
static int match(fz_text_page *page, const char *s, int n) { int orig = n; int c; while (*s) { s += fz_chartorune(&c, (char *)s); if (c == ' ' && charat(page, n) == ' ') { while (charat(page, n) == ' ') n++; } else { if (tolower(c) != tolower(charat(page, n))) return 0; n++; } } return n - orig; }
void fz_show_string(fz_context *ctx, fz_text *text, fz_font *user_font, fz_matrix *trm, const char *s, int wmode) { fz_font *font; int gid, ucs; float adv; while (*s) { s += fz_chartorune(&ucs, s); gid = fz_encode_character_with_fallback(ctx, user_font, ucs, 0, &font); fz_show_glyph(ctx, text, font, trm, gid, ucs, wmode); adv = fz_advance_glyph(ctx, font, gid, wmode); if (wmode == 0) fz_pre_translate(trm, adv, 0); else fz_pre_translate(trm, 0, -adv); } }
static int Match(fz_text_page *page, const char *str, int n) { int orig = n; int c; while (*str) { str += fz_chartorune(&c, (char *)str); if (c == ' ' && CharAt(page, n) == ' ') { while (CharAt(page, n) == ' ') n++; } else { if (tolower(c) != tolower(CharAt(page, n))) return 0; n++; } } return n - orig; }
fz_stream * fz_open_file(fz_context *ctx, const char *name) { #ifdef _WIN32 char *s = (char*)name; wchar_t *wname, *d; int c, fd; d = wname = fz_malloc(ctx, (strlen(name)+1) * sizeof(wchar_t)); while (*s) { s += fz_chartorune(&c, s); *d++ = c; } *d = 0; fd = _wopen(wname, O_BINARY | O_RDONLY, 0); fz_free(ctx, wname); #else int fd = open(name, O_BINARY | O_RDONLY, 0); #endif if (fd == -1) fz_throw(ctx, "cannot open %s", name); return fz_open_fd(ctx, fd); }
fz_stream * fz_open_file(fz_context *ctx, const char *name) { FILE *f; #if defined(_WIN32) || defined(_WIN64) char *s = (char*)name; wchar_t *wname, *d; int c; d = wname = fz_malloc(ctx, (strlen(name)+1) * sizeof(wchar_t)); while (*s) { s += fz_chartorune(&c, s); *d++ = c; } *d = 0; f = _wfopen(wname, L"rb"); fz_free(ctx, wname); #else f = fz_fopen(name, "rb"); #endif if (f == NULL) fz_throw(ctx, FZ_ERROR_GENERIC, "cannot open %s: %s", name, strerror(errno)); return fz_open_file_ptr(ctx, f); }
static void draw_flow_box(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm) { fz_font *font; fz_html_flow *node; fz_text *text; fz_matrix trm; const char *s; float color[3]; int c, g; for (node = box->flow_head; node; node = node->next) { if (node->type == FLOW_IMAGE) { if (node->y >= page_bot || node->y + node->h <= page_top) continue; } else { if (node->y > page_bot || node->y < page_top) continue; } if (node->type == FLOW_WORD) { fz_scale(&trm, node->em, -node->em); color[0] = node->style->color.r / 255.0f; color[1] = node->style->color.g / 255.0f; color[2] = node->style->color.b / 255.0f; /* TODO: reuse text object if color is unchanged */ text = fz_new_text(ctx); trm.e = node->x; trm.f = node->y; s = node->content.text; if (node->char_r2l) { float w = 0; const char *t = s; while (*t) { t += fz_chartorune(&c, t); if (node->mirror) c = ucdn_mirror(c); g = fz_encode_character_with_fallback(ctx, node->style->font, c, 0, &font); w += fz_advance_glyph(ctx, font, g) * node->em; } trm.e += w; while (*s) { s += fz_chartorune(&c, s); if (node->mirror) c = ucdn_mirror(c); g = fz_encode_character_with_fallback(ctx, node->style->font, c, 0, &font); trm.e -= fz_advance_glyph(ctx, font, g) * node->em; if (node->style->visibility == V_VISIBLE) fz_add_text(ctx, text, font, 0, &trm, g, c); } trm.e += w; } else { while (*s) { s += fz_chartorune(&c, s); g = fz_encode_character_with_fallback(ctx, node->style->font, c, 0, &font); if (node->style->visibility == V_VISIBLE) fz_add_text(ctx, text, font, 0, &trm, g, c); trm.e += fz_advance_glyph(ctx, font, g) * node->em; } } if (text) { fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1); fz_drop_text(ctx, text); } } else if (node->type == FLOW_IMAGE) { if (node->style->visibility == V_VISIBLE) { fz_matrix local_ctm = *ctm; fz_pre_translate(&local_ctm, node->x, node->y); fz_pre_scale(&local_ctm, node->w, node->h); fz_fill_image(ctx, dev, node->content.image, &local_ctm, 1); } } } }
static void generate_text(fz_context *ctx, fz_pool *pool, fz_html *box, const char *text) { fz_html *flow; int collapse = box->style.white_space & WS_COLLAPSE; int bsp = box->style.white_space & WS_ALLOW_BREAK_SPACE; int bnl = box->style.white_space & WS_FORCE_BREAK_NEWLINE; flow = box; while (flow->type != BOX_FLOW) flow = flow->up; while (*text) { if (bnl && (*text == '\n' || *text == '\r')) { if (text[0] == '\r' && text[1] == '\n') text += 2; else text += 1; add_flow_break(ctx, pool, flow, &box->style); } else if (iswhite(*text)) { const char *mark = text++; if (collapse) while (iswhite(*text)) ++text; /* TODO: tabs */ if (bsp) add_flow_glue(ctx, pool, flow, &box->style, " ", 1); else add_flow_word(ctx, pool, flow, &box->style, mark, text); } else { const char *mark = text; int c, addglue = 0; while (*text && !iswhite(*text)) { /* TODO: Unicode Line Breaking Algorithm (UAX #14) */ text += fz_chartorune(&c, text); if (iscjk(c)) { int cat = ucdn_get_general_category(c); if (addglue && !not_at_bol(cat, c)) add_flow_glue(ctx, pool, flow, &box->style, "", 0); add_flow_word(ctx, pool, flow, &box->style, mark, text); if (!not_at_eol(cat, c)) addglue = 1; mark = text; } else { addglue = 0; } } if (mark != text) add_flow_word(ctx, pool, flow, &box->style, mark, text); } } }
static inline int chartocanon(int *c, const char *s) { int n = fz_chartorune(c, s); *c = canon(*c); return n; }
static void draw_list_mark(fz_context *ctx, fz_html *box, float page_top, float page_bot, fz_device *dev, const fz_matrix *ctm, int n) { fz_font *font; fz_text *text; fz_matrix trm; fz_html_flow *line; float y, w; float color[3]; const char *s; char buf[40]; int c, g; fz_scale(&trm, box->em, -box->em); text = fz_new_text(ctx); line = find_list_mark_anchor(ctx, box); if (line) { y = line->y; } else { float h = fz_from_css_number_scale(box->style.line_height, box->em, box->em, box->em); float a = box->em * 0.8; float d = box->em * 0.2; if (a + d > h) h = a + d; y = box->y + a + (h - a - d) / 2; } if (y > page_bot || y < page_top) return; format_list_number(ctx, box->style.list_style_type, n, buf, sizeof buf); s = buf; w = 0; while (*s) { s += fz_chartorune(&c, s); g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font); w += fz_advance_glyph(ctx, font, g) * box->em; } s = buf; trm.e = box->x - w; trm.f = y; while (*s) { s += fz_chartorune(&c, s); g = fz_encode_character_with_fallback(ctx, box->style.font, c, UCDN_SCRIPT_LATIN, &font); fz_add_text(ctx, text, font, 0, &trm, g, c); trm.e += fz_advance_glyph(ctx, font, g) * box->em; } color[0] = box->style.color.r / 255.0f; color[1] = box->style.color.g / 255.0f; color[2] = box->style.color.b / 255.0f; fz_fill_text(ctx, dev, text, ctm, fz_device_rgb(ctx), color, 1); fz_drop_text(ctx, text); }
/* * Parse unicode and indices strings and encode glyphs. * Calculate metrics for positioning. */ static fz_text * xps_parse_glyphs_imp(fz_context *ctx, xps_document *doc, const fz_matrix *ctm, fz_font *font, float size, float originx, float originy, int is_sideways, int bidi_level, char *indices, char *unicode) { xps_glyph_metrics mtx; fz_text *text; fz_matrix tm; float e, f; float x = originx; float y = originy; char *us = unicode; char *is = indices; int un = 0; if (!unicode && !indices) fz_warn(ctx, "glyphs element with neither characters nor indices"); if (us) { if (us[0] == '{' && us[1] == '}') us = us + 2; un = strlen(us); } if (is_sideways) { fz_pre_scale(fz_rotate(&tm, 90), -size, size); } else fz_scale(&tm, size, -size); text = fz_new_text(ctx, font, &tm, is_sideways); while ((us && un > 0) || (is && *is)) { int char_code = '?'; int code_count = 1; int glyph_count = 1; if (is && *is) { is = xps_parse_cluster_mapping(is, &code_count, &glyph_count); } if (code_count < 1) code_count = 1; if (glyph_count < 1) glyph_count = 1; /* TODO: add code chars with cluster mappings for text extraction */ while (code_count--) { if (us && un > 0) { int t = fz_chartorune(&char_code, us); us += t; un -= t; } } while (glyph_count--) { int glyph_index = -1; float u_offset = 0; float v_offset = 0; float advance; if (is && *is) is = xps_parse_glyph_index(is, &glyph_index); if (glyph_index == -1) glyph_index = xps_encode_font_char(font, char_code); xps_measure_font_glyph(ctx, doc, font, glyph_index, &mtx); if (is_sideways) advance = mtx.vadv * 100; else if (bidi_level & 1) advance = -mtx.hadv * 100; else advance = mtx.hadv * 100; if (font->ft_bold) advance *= 1.02f; if (is && *is) { is = xps_parse_glyph_metrics(is, &advance, &u_offset, &v_offset); if (*is == ';') is ++; } if (bidi_level & 1) u_offset = -mtx.hadv * 100 - u_offset; u_offset = u_offset * 0.01f * size; v_offset = v_offset * 0.01f * size; if (is_sideways) { e = x + u_offset + (mtx.vorg * size); f = y - v_offset + (mtx.hadv * 0.5f * size); } else { e = x + u_offset; f = y - v_offset; } fz_add_text(ctx, text, glyph_index, char_code, e, f); x += advance * 0.01f * size; } } return text; }
/* Search for occurrence of 'needle' in text page. Return the number of hits and store hit quads in the passed in array. NOTE: This is an experimental interface and subject to change without notice. */ int fz_search_stext_page(fz_context *ctx, fz_stext_page *page, const char *needle, fz_quad *quads, int max_quads) { struct highlight hits; fz_stext_block *block; fz_stext_line *line; fz_stext_char *ch; fz_buffer *buffer; const char *haystack, *begin, *end; int c, inside; if (strlen(needle) == 0) return 0; hits.len = 0; hits.cap = max_quads; hits.box = quads; hits.hfuzz = 0.5f; hits.vfuzz = 0.1f; buffer = fz_new_buffer_from_stext_page(ctx, page); fz_try(ctx) { haystack = fz_string_from_buffer(ctx, buffer); begin = find_string(haystack, needle, &end); if (!begin) goto no_more_matches; inside = 0; for (block = page->first_block; block; block = block->next) { if (block->type != FZ_STEXT_BLOCK_TEXT) continue; for (line = block->u.t.first_line; line; line = line->next) { for (ch = line->first_char; ch; ch = ch->next) { try_new_match: if (!inside) { if (haystack >= begin) inside = 1; } if (inside) { if (haystack < end) on_highlight_char(ctx, &hits, line, ch); else { inside = 0; begin = find_string(haystack, needle, &end); if (!begin) goto no_more_matches; else goto try_new_match; } } haystack += fz_chartorune(&c, haystack); } assert(*haystack == '\n'); ++haystack; } assert(*haystack == '\n'); ++haystack; } no_more_matches:; } fz_always(ctx) fz_drop_buffer(ctx, buffer); fz_catch(ctx) fz_rethrow(ctx); return hits.len; }