/* Convert Unicode/PdfDocEncoding string into utf-8 */ char * pdf_to_utf8(fz_context *ctx, pdf_document *doc, pdf_obj *src) { fz_buffer *strmbuf = NULL; unsigned char *srcptr; char *dstptr, *dst; int srclen; int dstlen = 0; int ucs; int i; fz_var(strmbuf); fz_try(ctx) { if (pdf_is_string(ctx, src)) { srcptr = (unsigned char *) pdf_to_str_buf(ctx, src); srclen = pdf_to_str_len(ctx, src); } else if (pdf_is_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src))) { strmbuf = pdf_load_stream(ctx, doc, pdf_to_num(ctx, src), pdf_to_gen(ctx, src)); srclen = fz_buffer_storage(ctx, strmbuf, (unsigned char **)&srcptr); } else { srclen = 0; } if (srclen >= 2 && srcptr[0] == 254 && srcptr[1] == 255) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] << 8 | srcptr[i+1]; dstptr += fz_runetochar(dstptr, ucs); } } else if (srclen >= 2 && srcptr[0] == 255 && srcptr[1] == 254) { for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstlen += fz_runelen(ucs); } dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 2; i + 1 < srclen; i += 2) { ucs = srcptr[i] | srcptr[i+1] << 8; dstptr += fz_runetochar(dstptr, ucs); } } else { for (i = 0; i < srclen; i++) dstlen += fz_runelen(pdf_doc_encoding[srcptr[i]]); dstptr = dst = fz_malloc(ctx, dstlen + 1); for (i = 0; i < srclen; i++) { ucs = pdf_doc_encoding[srcptr[i]]; dstptr += fz_runetochar(dstptr, ucs); } } } fz_always(ctx) { fz_drop_buffer(ctx, strmbuf); } fz_catch(ctx) { fz_rethrow(ctx); } *dstptr = '\0'; return dst; }
/* When resetting or submitting a form, the fields to act upon are defined by an array of either field references or field names, plus a flag determining whether to act upon the fields in the array, or all fields other than those in the array. specified_fields interprets this information and produces the array of fields to be acted upon. */ static pdf_obj *specified_fields(fz_context *ctx, pdf_document *doc, pdf_obj *fields, int exclude) { pdf_obj *form = pdf_dict_getl(ctx, pdf_trailer(ctx, doc), PDF_NAME_Root, PDF_NAME_AcroForm, PDF_NAME_Fields, NULL); int i, n; pdf_obj *result = pdf_new_array(ctx, doc, 0); pdf_obj *nil = NULL; fz_var(nil); fz_try(ctx) { /* The 'fields' array not being present signals that all fields * should be acted upon, so handle it using the exclude case - excluding none */ if (exclude || !fields) { /* mark the fields we don't want to act upon */ nil = pdf_new_null(ctx, doc); n = pdf_array_len(ctx, fields); for (i = 0; i < n; i++) { pdf_obj *field = pdf_array_get(ctx, fields, i); if (pdf_is_string(ctx, field)) field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field)); if (field) pdf_dict_put(ctx, field, PDF_NAME_Exclude, nil); } /* Act upon all unmarked fields */ n = pdf_array_len(ctx, form); for (i = 0; i < n; i++) add_field_hierarchy_to_array(ctx, result, pdf_array_get(ctx, form, i)); /* Unmark the marked fields */ n = pdf_array_len(ctx, fields); for (i = 0; i < n; i++) { pdf_obj *field = pdf_array_get(ctx, fields, i); if (pdf_is_string(ctx, field)) field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field)); if (field) pdf_dict_del(ctx, field, PDF_NAME_Exclude); } } else { n = pdf_array_len(ctx, fields); for (i = 0; i < n; i++) { pdf_obj *field = pdf_array_get(ctx, fields, i); if (pdf_is_string(ctx, field)) field = pdf_lookup_field(ctx, form, pdf_to_str_buf(ctx, field)); if (field) add_field_hierarchy_to_array(ctx, result, field); } } } fz_always(ctx) { pdf_drop_obj(ctx, nil); } fz_catch(ctx) { pdf_drop_obj(ctx, result); fz_rethrow(ctx); } return result; }
char * pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest) { pdf_obj *obj; char buf[256]; const char *ld; int page; int x, y; dest = resolve_dest(ctx, doc, dest); if (dest == NULL) { fz_warn(ctx, "undefined link destination"); return NULL; } if (pdf_is_name(ctx, dest)) { ld = pdf_to_name(ctx, dest); return fz_strdup(ctx, ld); } else if (pdf_is_string(ctx, dest)) { ld = pdf_to_str_buf(ctx, dest); return fz_strdup(ctx, ld); } obj = pdf_array_get(ctx, dest, 0); if (pdf_is_int(ctx, obj)) page = pdf_to_int(ctx, obj); else { fz_try(ctx) page = pdf_lookup_page_number(ctx, doc, obj); fz_catch(ctx) page = -1; } x = y = 0; obj = pdf_array_get(ctx, dest, 1); if (pdf_name_eq(ctx, obj, PDF_NAME_XYZ)) { x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 3)); } else if (pdf_name_eq(ctx, obj, PDF_NAME_FitR)) { x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 5)); } else if (pdf_name_eq(ctx, obj, PDF_NAME_FitH) || pdf_name_eq(ctx, obj, PDF_NAME_FitBH)) y = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); else if (pdf_name_eq(ctx, obj, PDF_NAME_FitV) || pdf_name_eq(ctx, obj, PDF_NAME_FitBV)) x = pdf_to_int(ctx, pdf_array_get(ctx, dest, 2)); if (page >= 0) { if (x != 0 || y != 0) fz_snprintf(buf, sizeof buf, "#%d,%d,%d", page + 1, x, y); else fz_snprintf(buf, sizeof buf, "#%d", page + 1); return fz_strdup(ctx, buf); } return NULL; }
char * pdf_parse_file_spec(fz_context *ctx, pdf_document *doc, pdf_obj *file_spec, pdf_obj *dest) { pdf_obj *filename=NULL; char *path = NULL; char *uri = NULL; char buf[256]; size_t n; if (pdf_is_string(ctx, file_spec)) filename = file_spec; if (pdf_is_dict(ctx, file_spec)) { #if defined(_WIN32) || defined(_WIN64) filename = pdf_dict_get(ctx, file_spec, PDF_NAME_DOS); #else filename = pdf_dict_get(ctx, file_spec, PDF_NAME_Unix); #endif if (!filename) filename = pdf_dict_geta(ctx, file_spec, PDF_NAME_UF, PDF_NAME_F); } if (!pdf_is_string(ctx, filename)) { fz_warn(ctx, "cannot parse file specification"); return NULL; } path = pdf_to_utf8(ctx, filename); #if defined(_WIN32) || defined(_WIN64) if (strcmp(pdf_to_name(ctx, pdf_dict_gets(ctx, file_spec, "FS")), "URL") != 0) { /* move the file name into the expected place and use the expected path separator */ char *c; if (path[0] == '/' && (('A' <= path[1] && path[1] <= 'Z') || ('a' <= path[1] && path[1] <= 'z')) && path[2] == '/') { path[0] = path[1]; path[1] = ':'; } for (c = path; *c; c++) { if (*c == '/') *c = '\\'; } } #endif if (pdf_is_array(ctx, dest)) fz_snprintf(buf, sizeof buf, "#page=%d", pdf_to_int(ctx, pdf_array_get(ctx, dest, 0)) + 1); else if (pdf_is_name(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_name(ctx, dest)); else if (pdf_is_string(ctx, dest)) fz_snprintf(buf, sizeof buf, "#%s", pdf_to_str_buf(ctx, dest)); else buf[0] = 0; n = 7 + strlen(path) + strlen(buf) + 1; uri = fz_malloc(ctx, n); fz_strlcpy(uri, "file://", n); fz_strlcat(uri, path, n); fz_strlcat(uri, buf, n); fz_free(ctx, path); return uri; }
static fz_colorspace * load_indexed(pdf_document *xref, pdf_obj *array) { struct indexed *idx = NULL; fz_context *ctx = xref->ctx; pdf_obj *baseobj = pdf_array_get(array, 1); pdf_obj *highobj = pdf_array_get(array, 2); pdf_obj *lookup = pdf_array_get(array, 3); fz_colorspace *base = NULL; fz_colorspace *cs = NULL; int i, n; fz_var(idx); fz_var(base); fz_var(cs); fz_try(ctx) { base = pdf_load_colorspace(xref, baseobj); idx = fz_malloc_struct(ctx, struct indexed); idx->lookup = NULL; idx->base = base; idx->high = pdf_to_int(highobj); idx->high = fz_clampi(idx->high, 0, 255); n = base->n * (idx->high + 1); idx->lookup = fz_malloc_array(ctx, 1, n); cs = fz_new_colorspace(ctx, "Indexed", 1); cs->to_rgb = indexed_to_rgb; cs->free_data = free_indexed; cs->data = idx; cs->size += sizeof(*idx) + n + (base ? base->size : 0); if (pdf_is_string(lookup) && pdf_to_str_len(lookup) == n) { unsigned char *buf = (unsigned char *) pdf_to_str_buf(lookup); for (i = 0; i < n; i++) idx->lookup[i] = buf[i]; } else if (pdf_is_indirect(lookup)) { fz_stream *file = NULL; fz_try(ctx) { file = pdf_open_stream(xref, pdf_to_num(lookup), pdf_to_gen(lookup)); } fz_catch(ctx) { fz_throw(ctx, "cannot open colorspace lookup table (%d 0 R)", pdf_to_num(lookup)); } /* SumatraPDF: fix memory leak */ fz_try(ctx) { i = fz_read(file, idx->lookup, n); } fz_always(ctx) { fz_close(file); } fz_catch(ctx) { fz_throw(ctx, "cannot read colorspace lookup table (%d 0 R)", pdf_to_num(lookup)); } } else { fz_throw(ctx, "cannot parse colorspace lookup table"); } }
static pdf_font_desc * pdf_load_simple_font(pdf_document *doc, pdf_obj *dict) { pdf_obj *descriptor; pdf_obj *encoding; pdf_obj *widths; unsigned short *etable = NULL; pdf_font_desc *fontdesc = NULL; char *subtype; FT_Face face; FT_CharMap cmap; int symbolic; int kind; char *basefont; char *estrings[256]; char ebuffer[256][32]; int i, k, n; int fterr; int has_lock = 0; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_var(etable); fz_var(has_lock); basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); /* Load font file */ fz_try(ctx) { fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (descriptor) pdf_load_font_descriptor(fontdesc, doc, descriptor, NULL, basefont, 0); else pdf_load_builtin_font(ctx, fontdesc, basefont); /* Some chinese documents mistakenly consider WinAnsiEncoding to be codepage 936 */ if (descriptor && pdf_is_string(pdf_dict_gets(descriptor, "FontName")) && !pdf_dict_gets(dict, "ToUnicode") && !strcmp(pdf_to_name(pdf_dict_gets(dict, "Encoding")), "WinAnsiEncoding") && pdf_to_int(pdf_dict_gets(descriptor, "Flags")) == 4) { char *cp936fonts[] = { "\xCB\xCE\xCC\xE5", "SimSun,Regular", "\xBA\xDA\xCC\xE5", "SimHei,Regular", "\xBF\xAC\xCC\xE5_GB2312", "SimKai,Regular", "\xB7\xC2\xCB\xCE_GB2312", "SimFang,Regular", "\xC1\xA5\xCA\xE9", "SimLi,Regular", NULL }; for (i = 0; cp936fonts[i]; i += 2) if (!strcmp(basefont, cp936fonts[i])) break; if (cp936fonts[i]) { fz_warn(ctx, "workaround for S22PDF lying about chinese font encodings"); pdf_drop_font(ctx, fontdesc); fontdesc = NULL; fontdesc = pdf_new_font_desc(ctx); pdf_load_font_descriptor(fontdesc, doc, descriptor, "Adobe-GB1", cp936fonts[i+1], 0); fontdesc->encoding = pdf_load_system_cmap(ctx, "GBK-EUC-H"); fontdesc->to_unicode = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); face = fontdesc->font->ft_face; kind = ft_kind(face); goto skip_encoding; } } face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ symbolic = fontdesc->flags & 4; if (face->num_charmaps > 0) cmap = face->charmaps[0]; else cmap = NULL; for (i = 0; i < face->num_charmaps; i++) { FT_CharMap test = face->charmaps[i]; if (kind == TYPE1) { if (test->platform_id == 7) cmap = test; } if (kind == TRUETYPE) { if (test->platform_id == 1 && test->encoding_id == 0) cmap = test; if (test->platform_id == 3 && test->encoding_id == 1) cmap = test; if (symbolic && test->platform_id == 3 && test->encoding_id == 0) cmap = test; } } if (cmap) { fterr = FT_Set_Charmap(face, cmap); if (fterr) fz_warn(ctx, "freetype could not set cmap: %s", ft_error_string(fterr)); } else fz_warn(ctx, "freetype could not find any cmaps"); etable = fz_malloc_array(ctx, 256, sizeof(unsigned short)); fontdesc->size += 256 * sizeof(unsigned short); for (i = 0; i < 256; i++) { estrings[i] = NULL; etable[i] = 0; } encoding = pdf_dict_gets(dict, "Encoding"); if (encoding) { if (pdf_is_name(encoding)) pdf_load_encoding(estrings, pdf_to_name(encoding)); if (pdf_is_dict(encoding)) { pdf_obj *base, *diff, *item; base = pdf_dict_gets(encoding, "BaseEncoding"); if (pdf_is_name(base)) pdf_load_encoding(estrings, pdf_to_name(base)); else if (!fontdesc->is_embedded && !symbolic) pdf_load_encoding(estrings, "StandardEncoding"); diff = pdf_dict_gets(encoding, "Differences"); if (pdf_is_array(diff)) { n = pdf_array_len(diff); k = 0; for (i = 0; i < n; i++) { item = pdf_array_get(diff, i); if (pdf_is_int(item)) k = pdf_to_int(item); if (pdf_is_name(item) && k >= 0 && k < nelem(estrings)) estrings[k++] = pdf_to_name(item); } } } } /* start with the builtin encoding */ for (i = 0; i < 256; i++) etable[i] = ft_char_index(face, i); fz_lock(ctx, FZ_LOCK_FREETYPE); has_lock = 1; /* built-in and substitute fonts may be a different type than what the document expects */ subtype = pdf_to_name(pdf_dict_gets(dict, "Subtype")); if (!strcmp(subtype, "Type1")) kind = TYPE1; else if (!strcmp(subtype, "MMType1")) kind = TYPE1; else if (!strcmp(subtype, "TrueType")) kind = TRUETYPE; else if (!strcmp(subtype, "CIDFontType0")) kind = TYPE1; else if (!strcmp(subtype, "CIDFontType2")) kind = TRUETYPE; /* encode by glyph name where we can */ if (kind == TYPE1) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) { int aglcode = pdf_lookup_agl(estrings[i]); const char **dupnames = pdf_lookup_agl_duplicates(aglcode); while (*dupnames) { etable[i] = FT_Get_Name_Index(face, (char*)*dupnames); if (etable[i]) break; dupnames++; } } } } } /* encode by glyph name where we can */ if (kind == TRUETYPE) { /* Unicode cmap */ if (!symbolic && face->charmap && face->charmap->platform_id == 3) { for (i = 0; i < 256; i++) { if (estrings[i]) { int aglcode = pdf_lookup_agl(estrings[i]); if (!aglcode) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, aglcode); } } } /* MacRoman cmap */ else if (!symbolic && face->charmap && face->charmap->platform_id == 1) { for (i = 0; i < 256; i++) { if (estrings[i]) { k = lookup_mre_code(estrings[i]); if (k <= 0) etable[i] = FT_Get_Name_Index(face, estrings[i]); else etable[i] = ft_char_index(face, k); } } } /* Symbolic cmap */ else if (!face->charmap || face->charmap->encoding != FT_ENCODING_MS_SYMBOL) { for (i = 0; i < 256; i++) { if (estrings[i]) { etable[i] = FT_Get_Name_Index(face, estrings[i]); if (etable[i] == 0) etable[i] = ft_char_index(face, i); } } } } /* try to reverse the glyph names from the builtin encoding */ for (i = 0; i < 256; i++) { if (etable[i] && !estrings[i]) { if (FT_HAS_GLYPH_NAMES(face)) { fterr = FT_Get_Glyph_Name(face, etable[i], ebuffer[i], 32); if (fterr) fz_warn(ctx, "freetype get glyph name (gid %d): %s", etable[i], ft_error_string(fterr)); if (ebuffer[i][0]) estrings[i] = ebuffer[i]; } else { estrings[i] = (char*) pdf_win_ansi[i]; /* discard const */ } } } /* symbolic Type 1 fonts with an implicit encoding and non-standard glyph names */ if (kind == TYPE1 && symbolic) { for (i = 0; i < 256; i++) if (etable[i] && estrings[i] && !pdf_lookup_agl(estrings[i])) estrings[i] = (char*) pdf_standard[i]; } fz_unlock(ctx, FZ_LOCK_FREETYPE); has_lock = 0; fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 1); fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); fontdesc->cid_to_gid_len = 256; fontdesc->cid_to_gid = etable; fz_try(ctx) { pdf_load_to_unicode(doc, fontdesc, estrings, NULL, pdf_dict_gets(dict, "ToUnicode")); } fz_catch(ctx) { /* FIXME: TryLater */ fz_warn(ctx, "cannot load ToUnicode CMap"); } skip_encoding: /* Widths */ pdf_set_default_hmtx(ctx, fontdesc, fontdesc->missing_width); widths = pdf_dict_gets(dict, "Widths"); if (widths) { int first, last; first = pdf_to_int(pdf_dict_gets(dict, "FirstChar")); last = pdf_to_int(pdf_dict_gets(dict, "LastChar")); if (first < 0 || last > 255 || first > last) first = last = 0; for (i = 0; i < last - first + 1; i++) { int wid = pdf_to_int(pdf_array_get(widths, i)); pdf_add_hmtx(ctx, fontdesc, i + first, i + first, wid); } } else { fz_lock(ctx, FZ_LOCK_FREETYPE); has_lock = 1; fterr = FT_Set_Char_Size(face, 1000, 1000, 72, 72); if (fterr) fz_warn(ctx, "freetype set character size: %s", ft_error_string(fterr)); for (i = 0; i < 256; i++) { pdf_add_hmtx(ctx, fontdesc, i, i, ft_width(ctx, fontdesc, i)); } fz_unlock(ctx, FZ_LOCK_FREETYPE); has_lock = 0; } pdf_end_hmtx(ctx, fontdesc); } fz_catch(ctx) { if (has_lock) fz_unlock(ctx, FZ_LOCK_FREETYPE); if (fontdesc && etable != fontdesc->cid_to_gid) fz_free(ctx, etable); pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load simple font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
char * pdf_parse_link_dest(fz_context *ctx, pdf_document *doc, pdf_obj *dest) { pdf_obj *obj, *pageobj; fz_rect mediabox; fz_matrix pagectm; const char *ld; int page, x, y, h; dest = resolve_dest(ctx, doc, dest); if (dest == NULL) { fz_warn(ctx, "undefined link destination"); return NULL; } if (pdf_is_name(ctx, dest)) { ld = pdf_to_name(ctx, dest); return fz_strdup(ctx, ld); } else if (pdf_is_string(ctx, dest)) { ld = pdf_to_str_buf(ctx, dest); return fz_strdup(ctx, ld); } pageobj = pdf_array_get(ctx, dest, 0); if (pdf_is_int(ctx, pageobj)) { page = pdf_to_int(ctx, pageobj); pageobj = pdf_lookup_page_obj(ctx, doc, page); } else { fz_try(ctx) page = pdf_lookup_page_number(ctx, doc, pageobj); fz_catch(ctx) page = -1; } if (page < 0) return NULL; obj = pdf_array_get(ctx, dest, 1); if (obj) { /* Link coords use a coordinate space that does not seem to respect Rotate or UserUnit. */ /* All we need to do is figure out the page height to flip the coordinate space. */ pdf_page_obj_transform(ctx, pageobj, &mediabox, &pagectm); mediabox = fz_transform_rect(mediabox, pagectm); h = mediabox.y1 - mediabox.y0; if (pdf_name_eq(ctx, obj, PDF_NAME(XYZ))) { x = pdf_array_get_int(ctx, dest, 2); y = h - pdf_array_get_int(ctx, dest, 3); } else if (pdf_name_eq(ctx, obj, PDF_NAME(FitR))) { x = pdf_array_get_int(ctx, dest, 2); y = h - pdf_array_get_int(ctx, dest, 5); } else if (pdf_name_eq(ctx, obj, PDF_NAME(FitH)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBH))) { x = 0; y = h - pdf_array_get_int(ctx, dest, 2); } else if (pdf_name_eq(ctx, obj, PDF_NAME(FitV)) || pdf_name_eq(ctx, obj, PDF_NAME(FitBV))) { x = pdf_array_get_int(ctx, dest, 2); y = 0; } else { x = 0; y = 0; } return fz_asprintf(ctx, "#%d,%d,%d", page + 1, x, y); } return fz_asprintf(ctx, "#%d", page + 1); }