ErrorCode juggler_add_pages_from_file(juggler_t *dest, juggler_t *src, int dest_index) { pdf_obj *dest_pages = pdf_dict_getp(dest->ctx, pdf_trailer(dest->ctx, dest->pdf), "Root/Pages"); int dest_pages_index = pdf_array_len(dest->ctx, pdf_dict_gets(dest->ctx, dest_pages, "Kids")); /* be aware that this function does not change the two variables if the page index is greater than the number of pages */ find_destination_pages(dest->ctx, dest_pages, dest_index, &dest_pages, &dest_pages_index); pdf_obj *dest_kids = pdf_dict_gets(dest->ctx, dest_pages, "Kids"); if(!pdf_is_indirect(dest->ctx, dest_pages) || !pdf_is_dict(dest->ctx, dest_pages) || !pdf_is_array(dest->ctx, dest_kids)) { return(ERROR_INVALID_RANGE); } pdf_obj *pages_root = pdf_dict_getp(src->ctx, pdf_trailer(src->ctx, src->pdf), "Root/Pages"); if(!pdf_is_indirect(src->ctx, pages_root) || !pdf_is_dict(src->ctx, pages_root)) return(ERROR_NO_PAGES); /* if we copy the root pages-node and it's referenced objects, we will copy all pages and all objects those pages need */ pdf_obj *new_pages_ref = copy_object_single(dest->ctx, dest->pdf, src->ctx, src->pdf, pages_root); /* insert new pages-node */ pdf_array_insert_drop(dest->ctx, dest_kids, new_pages_ref, dest_pages_index); /* update the parent */ pdf_obj *new_pages_parent = pdf_new_indirect(dest->ctx, dest->pdf, pdf_to_num(dest->ctx, dest_pages), pdf_to_gen(dest->ctx, dest_pages)); pdf_dict_puts_drop(dest->ctx, new_pages_ref, "Parent", new_pages_parent); /* TODO: If dest_pages contains anything inheritable but not the new node we need to insert empty items to prevent this inerhitance */ /* update count */ int new_count = pdf_to_int(dest->ctx, pdf_dict_gets(dest->ctx, dest_pages, "Count")) + src->pagecount; pdf_dict_puts_drop(dest->ctx, dest_pages, "Count", pdf_new_int(dest->ctx, dest->pdf, new_count)); /* let MuPDF rebuild the page tree */ pdf_finish_edit(dest->ctx, dest->pdf); dest->pdf->page_count = new_count; /* update juggler's state */ juggler_page_tree_changed_due_to_insert(dest, dest_index, src->pagecount); return(NoError); }
static pdf_obj * resolve_dest_rec(fz_context *ctx, pdf_document *doc, pdf_obj *dest, int depth) { if (depth > 10) /* Arbitrary to avoid infinite recursion */ return NULL; if (pdf_is_name(ctx, dest) || pdf_is_string(ctx, dest)) { dest = pdf_lookup_dest(ctx, doc, dest); dest = resolve_dest_rec(ctx, doc, dest, depth+1); return dest; } else if (pdf_is_array(ctx, dest)) { return dest; } else if (pdf_is_dict(ctx, dest)) { dest = pdf_dict_get(ctx, dest, PDF_NAME_D); return resolve_dest_rec(ctx, doc, dest, depth+1); } else if (pdf_is_indirect(ctx, dest)) return dest; return NULL; }
/* returns the number of attachments saved */ int save_attachments(int pageno, char *targetdir) { pdf_page *page = pdf_load_page(doc, pageno-1); pdf_annot *annot; int saved_count = 0; for (annot = page->annots; annot ; annot = annot->next) { pdf_obj *fs_obj = pdf_dict_gets(annot->obj, "FS"); if (fs_obj) { pdf_obj *ef_obj; char *name = basename(strdup(pdf_to_str_buf(pdf_dict_gets(fs_obj, "F")))); ef_obj = pdf_dict_gets(fs_obj, "EF"); if (ef_obj) { pdf_obj *f_obj = pdf_dict_gets(ef_obj, "F"); if (f_obj && pdf_is_indirect(f_obj)) { static char pathname[PATH_MAX]; sprintf(pathname, "%s/%s", targetdir, name); FILE *fout = fopen(pathname, "w"); if (!fout) { fprintf(stderr, "extr: cannot write to file %s\n", pathname); exit(1); } dump_stream(pdf_to_num(f_obj), fout); fclose(fout); saved_count++; } } } } return saved_count; }
static pdf_obj * resolve_dest_rec(pdf_document *xref, pdf_obj *dest, int depth) { if (depth > 10) /* Arbitrary to avoid infinite recursion */ return NULL; if (pdf_is_name(dest) || pdf_is_string(dest)) { dest = pdf_lookup_dest(xref, dest); return resolve_dest_rec(xref, dest, depth+1); } else if (pdf_is_array(dest)) { return dest; } else if (pdf_is_dict(dest)) { dest = pdf_dict_gets(dest, "D"); return resolve_dest_rec(xref, dest, depth+1); } else if (pdf_is_indirect(dest)) return dest; return NULL; }
/* * Create a filter given a name and param dictionary. */ static fz_stream * build_filter(fz_context *ctx, fz_stream *chain, pdf_document *doc, pdf_obj *f, pdf_obj *p, int num, int gen, fz_compression_params *params) { fz_compression_params local_params; if (params == NULL) params = &local_params; build_compression_params(ctx, f, p, params); /* If we were using params we were passed in, and we successfully * recognised the image type, we can use the existing filter and * shortstop here. */ if (params != &local_params && params->type != FZ_IMAGE_RAW) return chain; if (params->type != FZ_IMAGE_RAW) return fz_open_image_decomp_stream(ctx, chain, params, NULL); if (pdf_name_eq(ctx, f, PDF_NAME_ASCIIHexDecode) || pdf_name_eq(ctx, f, PDF_NAME_AHx)) return fz_open_ahxd(ctx, chain); else if (pdf_name_eq(ctx, f, PDF_NAME_ASCII85Decode) || pdf_name_eq(ctx, f, PDF_NAME_A85)) return fz_open_a85d(ctx, chain); else if (pdf_name_eq(ctx, f, PDF_NAME_JBIG2Decode)) { fz_jbig2_globals *globals = NULL; pdf_obj *obj = pdf_dict_get(ctx, p, PDF_NAME_JBIG2Globals); if (pdf_is_indirect(ctx, obj)) globals = pdf_load_jbig2_globals(ctx, doc, obj); /* fz_open_jbig2d takes possession of globals */ return fz_open_jbig2d(ctx, chain, globals); } else if (pdf_name_eq(ctx, f, PDF_NAME_JPXDecode)) return chain; /* JPX decoding is special cased in the image loading code */ else if (pdf_name_eq(ctx, f, PDF_NAME_Crypt)) { pdf_obj *name; if (!doc->crypt) { fz_warn(ctx, "crypt filter in unencrypted document"); return chain; } name = pdf_dict_get(ctx, p, PDF_NAME_Name); if (pdf_is_name(ctx, name)) return pdf_open_crypt_with_filter(ctx, chain, doc->crypt, name, num, gen); return chain; } fz_warn(ctx, "unknown filter name (%s)", pdf_to_name(ctx, f)); return chain; }
static void pdf_print_key(fz_context *ctx, fz_output *out, void *key_) { pdf_obj *key = (pdf_obj *)key_; if (pdf_is_indirect(ctx, key)) fz_printf(ctx, out, "(%d %d R) ", pdf_to_num(ctx, key), pdf_to_gen(ctx, key)); else pdf_print_obj(ctx, out, key, 0); }
static void pdf_debug_key(void *key_) { pdf_obj *key = (pdf_obj *)key_; if (pdf_is_indirect(key)) { printf("(%d %d R) ", pdf_to_num(key), pdf_to_gen(key)); } else pdf_print_obj(key); }
static int pdf_make_hash_key(fz_store_hash *hash, void *key_) { pdf_obj *key = (pdf_obj *)key_; if (!pdf_is_indirect(key)) return 0; hash->u.i.i0 = pdf_to_num(key); hash->u.i.i1 = pdf_to_gen(key); return 1; }
static void pdf_debug_key(FILE *out, void *key_) { pdf_obj *key = (pdf_obj *)key_; if (pdf_is_indirect(key)) { fprintf(out, "(%d %d R) ", pdf_to_num(key), pdf_to_gen(key)); } else pdf_fprint_obj(out, key, 0); }
static int pdf_make_hash_key(fz_context *ctx, fz_store_hash *hash, void *key_) { pdf_obj *key = (pdf_obj *)key_; if (!pdf_is_indirect(ctx, key)) return 0; hash->u.i.i0 = pdf_to_num(ctx, key); hash->u.i.i1 = pdf_to_gen(ctx, key); hash->u.i.ptr = pdf_get_indirect_document(ctx, key); return 1; }
// just copy one of the resource sub-entries (e.g. /Font) static int copy_and_rename_resource(fz_context *dest_ctx, pdf_obj *dest, fz_context *src_ctx, pdf_obj *src, char *prefix, struct put_info *info) { char new_name[64]; /* this buffer is big enough up to hold all digits for two 16-bit numbers */ int i; for(i = 0; i < pdf_dict_len(src_ctx, src); i++) { pdf_obj *src_key = pdf_dict_get_key(src_ctx, src, i); pdf_obj *src_val = pdf_dict_get_val(src_ctx, src, i); if(!pdf_is_name(src_ctx, src_key)) { return(2); } /* if this is an inline resource, just copy this object into the new resource dict */ if(!pdf_is_indirect(src_ctx, src_val)) { if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), "%sinline_%d", prefix, info->next_inline_id++) >= sizeof(new_name) / sizeof(new_name[0])) return(1); // not enough space pdf_obj *new_res = copy_unassigned_object_continue(dest_ctx, info->dest_doc, src_ctx, info->src_doc, src_val, &info->new_ids); //pdf_obj *new_res = pdf_new_dict(dest_ctx, info->dest_doc, 10); printf("dump it...\n"); pdf_fprint_obj(dest_ctx, stdout, new_res, 0); /* now reference this new object in the resource object of this sheet */ pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, dest, dest_key, new_res); pdf_drop_obj(dest_ctx, dest_key); pdf_drop_obj(dest_ctx, new_res); } else { /* The new name of resource objects is always the num/gen of the referenced object in the src-file. Thus we can check by that name if the object was already referenced by another page of this sheet. */ if(snprintf(new_name, sizeof(new_name) / sizeof(new_name[0]), "%s%d_%d", prefix, pdf_to_num(dest_ctx, src_val), pdf_to_gen(dest_ctx, src_val)) >= sizeof(new_name) / sizeof(new_name[0])) return(1); // not enough space if(pdf_dict_gets(dest_ctx, dest, new_name) == NULL) { /* if this resource is not inlined and not already in the resource-dict of the current sheet... */ /* ...copy the referenced resource to the new document! If this object has copied already (for another sheet in dest_doc), copy_object_continue() will do nothing */ pdf_obj *new_res = copy_object_continue(dest_ctx, info->dest_doc, src_ctx, info->src_doc, src_val, &info->new_ids); /* now reference this new object in the resource object of this sheet */ pdf_obj *dest_key = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, dest, dest_key, new_res); pdf_drop_obj(dest_ctx, dest_key); pdf_drop_obj(dest_ctx, new_res); } } /* even if it was used on another sheet or on this sheet, add it to the rename-dict for this sheet! Because it could have different names on different source-pages */ pdf_obj *rename_key = pdf_new_name(dest_ctx, info->dest_doc, pdf_to_name(dest_ctx, src_key)); pdf_obj *rename_val = pdf_new_name(dest_ctx, info->dest_doc, new_name); pdf_dict_put(dest_ctx, info->rename_dict, rename_key, rename_val); pdf_drop_obj(dest_ctx, rename_key); pdf_drop_obj(dest_ctx, rename_val); } return(0); }
int pdf_repair_obj(fz_context *ctx, pdf_document *doc, pdf_lexbuf *buf, fz_off_t *stmofsp, int *stmlenp, pdf_obj **encrypt, pdf_obj **id, pdf_obj **page, fz_off_t *tmpofs, pdf_obj **root) { fz_stream *file = doc->file; pdf_token tok; int stm_len; *stmofsp = 0; if (stmlenp) *stmlenp = -1; stm_len = 0; /* On entry to this function, we know that we've just seen * '<int> <int> obj'. We expect the next thing we see to be a * pdf object. Regardless of the type of thing we meet next * we only need to fully parse it if it is a dictionary. */ tok = pdf_lex(ctx, file, buf); if (tok == PDF_TOK_OPEN_DICT) { pdf_obj *dict, *obj; fz_try(ctx) { dict = pdf_parse_dict(ctx, doc, file, buf); } fz_catch(ctx) { fz_rethrow_if(ctx, FZ_ERROR_TRYLATER); /* Don't let a broken object at EOF overwrite a good one */ if (file->eof) fz_rethrow(ctx); /* Silently swallow the error */ dict = pdf_new_dict(ctx, NULL, 2); } /* We must be careful not to try to resolve any indirections * here. We have just read dict, so we know it to be a non * indirected dictionary. Before we look at any values that * we get back from looking up in it, we need to check they * aren't indirected. */ if (encrypt || id || root) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_XRef)) { if (encrypt) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Encrypt); if (obj) { pdf_drop_obj(ctx, *encrypt); *encrypt = pdf_keep_obj(ctx, obj); } } if (id) { obj = pdf_dict_get(ctx, dict, PDF_NAME_ID); if (obj) { pdf_drop_obj(ctx, *id); *id = pdf_keep_obj(ctx, obj); } } if (root) *root = pdf_keep_obj(ctx, pdf_dict_get(ctx, dict, PDF_NAME_Root)); } } obj = pdf_dict_get(ctx, dict, PDF_NAME_Length); if (!pdf_is_indirect(ctx, obj) && pdf_is_int(ctx, obj)) stm_len = pdf_to_int(ctx, obj); if (doc->file_reading_linearly && page) { obj = pdf_dict_get(ctx, dict, PDF_NAME_Type); if (!pdf_is_indirect(ctx, obj) && pdf_name_eq(ctx, obj, PDF_NAME_Page)) { pdf_drop_obj(ctx, *page); *page = pdf_keep_obj(ctx, dict); } } pdf_drop_obj(ctx, dict); }
void pdf_delete_annot(fz_context *ctx, pdf_document *doc, pdf_page *page, pdf_annot *annot) { pdf_annot **annotptr; pdf_obj *old_annot_arr; pdf_obj *annot_arr; if (annot == NULL) return; /* Remove annot from page's list */ for (annotptr = &page->annots; *annotptr; annotptr = &(*annotptr)->next) { if (*annotptr == annot) break; } /* Check the passed annotation was of this page */ if (*annotptr == NULL) return; *annotptr = annot->next; /* If the removed annotation was the last in the list adjust the end pointer */ if (*annotptr == NULL) page->annot_tailp = annotptr; /* Stick it in the deleted list */ annot->next = page->deleted_annots; page->deleted_annots = annot; pdf_drop_xobject(ctx, annot->ap); annot->ap = NULL; /* Recreate the "Annots" array with this annot removed */ old_annot_arr = pdf_dict_get(ctx, page->me, PDF_NAME_Annots); if (old_annot_arr) { int i, n = pdf_array_len(ctx, old_annot_arr); annot_arr = pdf_new_array(ctx, doc, n?(n-1):0); fz_try(ctx) { for (i = 0; i < n; i++) { pdf_obj *obj = pdf_array_get(ctx, old_annot_arr, i); if (obj != annot->obj) pdf_array_push(ctx, annot_arr, obj); } if (pdf_is_indirect(ctx, old_annot_arr)) pdf_update_object(ctx, doc, pdf_to_num(ctx, old_annot_arr), annot_arr); else pdf_dict_put(ctx, page->me, PDF_NAME_Annots, annot_arr); if (pdf_is_indirect(ctx, annot->obj)) pdf_delete_object(ctx, doc, pdf_to_num(ctx, annot->obj)); } fz_always(ctx) { pdf_drop_obj(ctx, annot_arr); } fz_catch(ctx) { fz_rethrow(ctx); } } pdf_drop_obj(ctx, annot->obj); annot->obj = NULL; doc->dirty = 1; }
/* * Load CMap stream in PDF file */ pdf_cmap * pdf_load_embedded_cmap(fz_context *ctx, pdf_document *doc, pdf_obj *stmobj) { fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap = NULL; pdf_obj *obj; fz_var(file); fz_var(cmap); fz_var(usecmap); if (pdf_obj_marked(ctx, stmobj)) fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in embedded cmap"); if ((cmap = pdf_find_item(ctx, pdf_drop_cmap_imp, stmobj)) != NULL) return cmap; fz_try(ctx) { file = pdf_open_stream(ctx, stmobj); cmap = pdf_load_cmap(ctx, file); obj = pdf_dict_get(ctx, stmobj, PDF_NAME_WMode); if (pdf_is_int(ctx, obj)) pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(ctx, obj)); obj = pdf_dict_get(ctx, stmobj, PDF_NAME_UseCMap); if (pdf_is_name(ctx, obj)) { usecmap = pdf_load_system_cmap(ctx, pdf_to_name(ctx, obj)); pdf_set_usecmap(ctx, cmap, usecmap); } else if (pdf_is_indirect(ctx, obj)) { if (pdf_mark_obj(ctx, obj)) fz_throw(ctx, FZ_ERROR_GENERIC, "recursive CMap"); fz_try(ctx) usecmap = pdf_load_embedded_cmap(ctx, doc, obj); fz_always(ctx) pdf_unmark_obj(ctx, obj); fz_catch(ctx) fz_rethrow(ctx); pdf_set_usecmap(ctx, cmap, usecmap); } pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); } fz_always(ctx) { fz_drop_stream(ctx, file); pdf_drop_cmap(ctx, usecmap); } fz_catch(ctx) { pdf_drop_cmap(ctx, cmap); fz_rethrow(ctx); } return cmap; }
static void wmupdf_convert_single_page_to_form(pdf_document *xref,fz_context *ctx,int pageno) { pdf_obj *array,*srcpageobj,*srcpagecontents; int i,len,streamlen,pageref,pagegen,compressed; double bbox_array[4]; double matrix[6]; /* New source page, so get the source page objects */ srcpageobj = xref->page_objs[pageno-1]; pageref=pdf_to_num(xref->page_refs[pageno-1]); pagegen=pdf_to_gen(xref->page_refs[pageno-1]); wmupdf_page_bbox(srcpageobj,bbox_array); for (i=0;i<6;i++) matrix[i]=0.; matrix[0]=matrix[3]=1.; srcpagecontents=pdf_dict_gets(srcpageobj,"Contents"); /* Concatenate all indirect streams from source page directly into it. */ // printf("Adding streams to source page %d (pageref=%d, pagegen=%d)...\n",pageno,pageref,pagegen); streamlen=0; if (pdf_is_array(srcpagecontents)) { int k; for (k=0;k<pdf_array_len(srcpagecontents);k++) { pdf_obj *obj; obj=pdf_array_get(srcpagecontents,k); if (pdf_is_indirect(obj)) pdf_resolve_indirect(obj); streamlen=add_to_srcpage_stream(xref,ctx,pageref,pagegen,obj); } } else { if (pdf_is_indirect(srcpagecontents)) pdf_resolve_indirect(srcpagecontents); streamlen=add_to_srcpage_stream(xref,ctx,pageref,pagegen,srcpagecontents); } compressed=stream_deflate(xref,ctx,pageref,pagegen,&streamlen); srcpageobj = xref->page_objs[pageno-1]; pageref=pdf_to_num(xref->page_refs[pageno-1]); len=pdf_dict_len(srcpageobj); for (i=0;i<len;i++) { pdf_obj *key; /* *value */ key=pdf_dict_get_key(srcpageobj,i); /* if (pdf_is_name(key)) printf("key[%d] = name = %s\n",i,pdf_to_name(key)); else printf("key[%d] = ??\n",i); */ /* value=pdf_dict_get_val(srcpageobj,i); */ /* Keep same resources */ if (!pdf_is_name(key)) continue; if (pdf_is_name(key) && !stricmp("Resources",pdf_to_name(key))) continue; /* Drop dictionary entry otherwise */ // printf("Deleting key %s.\n",pdf_to_name(key)); pdf_dict_del(srcpageobj,key); i=-1; len=pdf_dict_len(srcpageobj); } pdf_dict_puts(srcpageobj,"Type",pdf_new_name(ctx,"XObject")); pdf_dict_puts(srcpageobj,"Subtype",pdf_new_name(ctx,"Form")); pdf_dict_puts(srcpageobj,"FormType",pdf_new_int(ctx,1)); if (compressed) pdf_dict_puts(srcpageobj,"Filter",pdf_new_name(ctx,"FlateDecode")); pdf_dict_puts(srcpageobj,"Length",pdf_new_int(ctx,streamlen)); array=pdf_new_array(ctx,4); for (i=0;i<4;i++) pdf_array_push(array,pdf_new_real(ctx,bbox_array[i])); pdf_dict_puts(srcpageobj,"BBox",array); array=pdf_new_array(ctx,6); for (i=0;i<6;i++) pdf_array_push(array,pdf_new_real(ctx,matrix[i])); pdf_dict_puts(srcpageobj,"Matrix",array); /* (It's no longer a "page"--it's a Form-type XObject) */ /* I don't think this call should be made since it will call fz_drop_object on srcpageobj */ /* pdf_update_object(xref,pageref,srcpageobj); */ }
/* * Load CMap stream in PDF file */ pdf_cmap * pdf_load_embedded_cmap(pdf_document *doc, pdf_obj *stmobj) { fz_stream *file = NULL; pdf_cmap *cmap = NULL; pdf_cmap *usecmap; pdf_obj *wmode; pdf_obj *obj = NULL; fz_context *ctx = doc->ctx; int phase = 0; fz_var(phase); fz_var(obj); fz_var(file); fz_var(cmap); if (pdf_obj_marked(stmobj)) fz_throw(ctx, FZ_ERROR_GENERIC, "Recursion in embedded cmap"); if ((cmap = pdf_find_item(ctx, pdf_free_cmap_imp, stmobj)) != NULL) { return cmap; } fz_try(ctx) { file = pdf_open_stream(doc, pdf_to_num(stmobj), pdf_to_gen(stmobj)); phase = 1; cmap = pdf_load_cmap(ctx, file); phase = 2; fz_close(file); file = NULL; wmode = pdf_dict_gets(stmobj, "WMode"); if (pdf_is_int(wmode)) pdf_set_cmap_wmode(ctx, cmap, pdf_to_int(wmode)); obj = pdf_dict_gets(stmobj, "UseCMap"); if (pdf_is_name(obj)) { usecmap = pdf_load_system_cmap(ctx, pdf_to_name(obj)); pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } else if (pdf_is_indirect(obj)) { phase = 3; pdf_mark_obj(obj); usecmap = pdf_load_embedded_cmap(doc, obj); pdf_unmark_obj(obj); phase = 4; pdf_set_usecmap(ctx, cmap, usecmap); pdf_drop_cmap(ctx, usecmap); } pdf_store_item(ctx, stmobj, cmap, pdf_cmap_size(ctx, cmap)); } fz_catch(ctx) { if (file) fz_close(file); if (cmap) pdf_drop_cmap(ctx, cmap); if (phase < 1) fz_rethrow_message(ctx, "cannot open cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 2) fz_rethrow_message(ctx, "cannot parse cmap stream (%d %d R)", pdf_to_num(stmobj), pdf_to_gen(stmobj)); else if (phase < 3) fz_rethrow_message(ctx, "cannot load system usecmap '%s'", pdf_to_name(obj)); else { if (phase == 3) pdf_unmark_obj(obj); fz_rethrow_message(ctx, "cannot load embedded usecmap (%d %d R)", pdf_to_num(obj), pdf_to_gen(obj)); } } return cmap; }
/* dest points to the new pages content-streams-dict, src is a reference to one source content-stream */ int copy_content_stream_of_page(fz_context *dest_ctx, pdf_obj *dest, fz_context *src_ctx, pdf_obj *src, struct put_info *info, struct pos_info *pos) { if(!pdf_is_array(dest_ctx, dest) && !pdf_is_indirect(src_ctx, src)) return(-1); /* translation: 1 0 0 1 diff_x diff_y scale: scale 0 0 scale 0 0 rotation: cos sin -sin cos 0 0 ------------------------------------------------- rotation 0: 1 0 0 1 0 0 rotation 90: 0 1 -1 0 0 0 rotation 180: -1 0 0 -1 0 0 rotation 270: 0 -1 1 0 0 0 */ fz_buffer *buffer = fz_new_buffer(dest_ctx, 1024); fz_output *output = fz_new_output_with_buffer(dest_ctx, buffer); fz_printf(dest_ctx, output, "q\n"); /* set the outer clip region */ fz_printf(dest_ctx, output, "%f %f %f %f re W n\n", pos->outer_clip_x, pos->outer_clip_y, pos->outer_clip_width, pos->outer_clip_height); /* position the page correctly */ if(pos->rotate == 0) { fz_printf(dest_ctx, output, "1 0 0 1 %f %f cm\n", pos->x + pos->content_translate_x, pos->y + pos->content_translate_y); } else if(pos->rotate == 90) { fz_printf(dest_ctx, output, "0 1 -1 0 %f %f cm\n", pos->x + pos->width, pos->y); } else if(pos->rotate == 180) { fz_printf(dest_ctx, output, "-1 0 0 -1 %f %f cm\n", pos->width + pos->x - pos->content_translate_x, pos->height + pos->y - pos->content_translate_y); } else if(pos->rotate == 270) { fz_printf(dest_ctx, output, "0 -1 1 0 %f %f cm\n", pos->x, pos->y + pos->height); } if(pos->bleed_clip_x != 0.0 || pos->bleed_clip_y != 0.0 || pos->bleed_clip_width != 0.0 || pos->bleed_clip_height != 0.0) { fz_printf(dest_ctx, output, "%f %f %f %f re W n\n", pos->bleed_clip_x, pos->bleed_clip_y, pos->bleed_clip_width, pos->bleed_clip_height); } int src_num = pdf_to_num(src_ctx, src); int src_gen = pdf_to_gen(src_ctx, src); fz_stream *input = pdf_open_stream(src_ctx, info->src_doc, src_num, src_gen); rename_res_in_content_stream(src_ctx, input, dest_ctx, output, info->rename_dict); fz_printf(dest_ctx, output, "Q"); fz_drop_output(dest_ctx, output); fz_drop_stream(dest_ctx, input); int new_num = pdf_create_object(dest_ctx, info->dest_doc); pdf_obj *new_ref = pdf_new_indirect(dest_ctx, info->dest_doc, new_num, 0); /* each stream has a dict containing at least its length... */ pdf_obj *stream_info_dict = pdf_new_dict(dest_ctx, info->dest_doc, 1); pdf_dict_puts_drop(dest_ctx, stream_info_dict, "Length", pdf_new_int(dest_ctx, info->dest_doc, buffer->len)); pdf_update_object(dest_ctx, info->dest_doc, new_num, stream_info_dict); pdf_drop_obj(dest_ctx, stream_info_dict); pdf_update_stream(dest_ctx, info->dest_doc, new_ref, buffer, 0); fz_drop_buffer(dest_ctx, buffer); pdf_array_push(dest_ctx, dest, new_ref); pdf_drop_obj(dest_ctx, new_ref); return(0); }
static pdf_font_desc * load_cid_font(pdf_document *xref, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = xref->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, xref, descriptor, collection, basefont); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(xref, encoding); } else { fz_throw(ctx, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(xref, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); } } pdf_load_to_unicode(xref, fontdesc, NULL, collection, to_unicode); /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_throw(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }
static pdf_font_desc * load_cid_font(pdf_document *doc, pdf_obj *dict, pdf_obj *encoding, pdf_obj *to_unicode) { pdf_obj *widths; pdf_obj *descriptor; pdf_font_desc *fontdesc = NULL; FT_Face face; int kind; char collection[256]; char *basefont; int i, k, fterr; pdf_obj *obj; int dw; fz_context *ctx = doc->ctx; fz_var(fontdesc); fz_try(ctx) { /* Get font name and CID collection */ basefont = pdf_to_name(pdf_dict_gets(dict, "BaseFont")); { pdf_obj *cidinfo; char tmpstr[64]; int tmplen; cidinfo = pdf_dict_gets(dict, "CIDSystemInfo"); if (!cidinfo) fz_throw(ctx, FZ_ERROR_GENERIC, "cid font is missing info"); obj = pdf_dict_gets(cidinfo, "Registry"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcpy(collection, tmpstr, sizeof collection); fz_strlcat(collection, "-", sizeof collection); obj = pdf_dict_gets(cidinfo, "Ordering"); tmplen = fz_mini(sizeof tmpstr - 1, pdf_to_str_len(obj)); memcpy(tmpstr, pdf_to_str_buf(obj), tmplen); tmpstr[tmplen] = '\0'; fz_strlcat(collection, tmpstr, sizeof collection); } /* Load font file */ fontdesc = pdf_new_font_desc(ctx); descriptor = pdf_dict_gets(dict, "FontDescriptor"); if (!descriptor) fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: missing font descriptor"); pdf_load_font_descriptor(fontdesc, doc, descriptor, collection, basefont, 1, 1); face = fontdesc->font->ft_face; kind = ft_kind(face); /* Encoding */ if (pdf_is_name(encoding)) { if (!strcmp(pdf_to_name(encoding), "Identity-H")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 0, 2); else if (!strcmp(pdf_to_name(encoding), "Identity-V")) fontdesc->encoding = pdf_new_identity_cmap(ctx, 1, 2); else fontdesc->encoding = pdf_load_system_cmap(ctx, pdf_to_name(encoding)); } else if (pdf_is_indirect(encoding)) { fontdesc->encoding = pdf_load_embedded_cmap(doc, encoding); } else { fz_throw(ctx, FZ_ERROR_GENERIC, "syntaxerror: font missing encoding"); } fontdesc->size += pdf_cmap_size(ctx, fontdesc->encoding); pdf_set_font_wmode(ctx, fontdesc, pdf_cmap_wmode(ctx, fontdesc->encoding)); if (kind == TRUETYPE || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1565 */ !strcmp(pdf_to_name(pdf_dict_gets(dict, "Subtype")), "CIDFontType2") || /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1997 */ pdf_is_indirect(pdf_dict_gets(dict, "CIDToGIDMap"))) { pdf_obj *cidtogidmap; cidtogidmap = pdf_dict_gets(dict, "CIDToGIDMap"); if (pdf_is_indirect(cidtogidmap)) { fz_buffer *buf; buf = pdf_load_stream(doc, pdf_to_num(cidtogidmap), pdf_to_gen(cidtogidmap)); fontdesc->cid_to_gid_len = (buf->len) / 2; fontdesc->cid_to_gid = fz_malloc_array(ctx, fontdesc->cid_to_gid_len, sizeof(unsigned short)); fontdesc->size += fontdesc->cid_to_gid_len * sizeof(unsigned short); for (i = 0; i < fontdesc->cid_to_gid_len; i++) fontdesc->cid_to_gid[i] = (buf->data[i * 2] << 8) + buf->data[i * 2 + 1]; fz_drop_buffer(ctx, buf); } /* if truetype font is external, cidtogidmap should not be identity */ /* so we map from cid to unicode and then map that through the (3 1) */ /* unicode cmap to get a glyph id */ else if (fontdesc->font->ft_substitute) { fterr = FT_Select_Charmap(face, ft_encoding_unicode); if (fterr) { fz_throw(ctx, FZ_ERROR_GENERIC, "fonterror: no unicode cmap when emulating CID font: %s", ft_error_string(fterr)); } if (!strcmp(collection, "Adobe-CNS1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-CNS1-UCS2"); else if (!strcmp(collection, "Adobe-GB1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-GB1-UCS2"); else if (!strcmp(collection, "Adobe-Japan1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan1-UCS2"); else if (!strcmp(collection, "Adobe-Japan2")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Japan2-UCS2"); else if (!strcmp(collection, "Adobe-Korea1")) fontdesc->to_ttf_cmap = pdf_load_system_cmap(ctx, "Adobe-Korea1-UCS2"); /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=2318 */ else if (!strcmp(collection, "Adobe-Identity") && fontdesc->font->ft_file) fontdesc->font->ft_substitute = 0; } } /* cf. http://code.google.com/p/sumatrapdf/issues/detail?id=1961 */ fz_try(ctx) { pdf_load_to_unicode(doc, fontdesc, NULL, collection, to_unicode); } fz_catch(ctx) { fz_warn(ctx, "cannot load ToUnicode CMap"); } /* If we have an identity encoding, we're supposed to use the glyph ids directly. * If we only have a substitute font, that won't work. * Make a last ditch attempt by using * the ToUnicode table if it exists to map via the substitute font's cmap. */ if (strstr(fontdesc->encoding->cmap_name, "Identity-") && fontdesc->font->ft_substitute) { fz_warn(ctx, "non-embedded font using identity encoding: %s", basefont); if (fontdesc->to_unicode && !fontdesc->to_ttf_cmap) fontdesc->to_ttf_cmap = pdf_keep_cmap(ctx, fontdesc->to_unicode); } /* Horizontal */ dw = 1000; obj = pdf_dict_gets(dict, "DW"); if (obj) dw = pdf_to_int(obj); pdf_set_default_hmtx(ctx, fontdesc, dw); widths = pdf_dict_gets(dict, "W"); if (widths) { int c0, c1, w, n, m; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { m = pdf_array_len(obj); for (k = 0; k < m; k++) { w = pdf_to_int(pdf_array_get(obj, k)); pdf_add_hmtx(ctx, fontdesc, c0 + k, c0 + k, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); pdf_add_hmtx(ctx, fontdesc, c0, c1, w); i += 3; } } } pdf_end_hmtx(ctx, fontdesc); /* Vertical */ if (pdf_cmap_wmode(ctx, fontdesc->encoding) == 1) { int dw2y = 880; int dw2w = -1000; obj = pdf_dict_gets(dict, "DW2"); if (obj) { dw2y = pdf_to_int(pdf_array_get(obj, 0)); dw2w = pdf_to_int(pdf_array_get(obj, 1)); } pdf_set_default_vmtx(ctx, fontdesc, dw2y, dw2w); widths = pdf_dict_gets(dict, "W2"); if (widths) { int c0, c1, w, x, y, n; n = pdf_array_len(widths); for (i = 0; i < n; ) { c0 = pdf_to_int(pdf_array_get(widths, i)); obj = pdf_array_get(widths, i + 1); if (pdf_is_array(obj)) { int m = pdf_array_len(obj); for (k = 0; k * 3 < m; k ++) { w = pdf_to_int(pdf_array_get(obj, k * 3 + 0)); x = pdf_to_int(pdf_array_get(obj, k * 3 + 1)); y = pdf_to_int(pdf_array_get(obj, k * 3 + 2)); pdf_add_vmtx(ctx, fontdesc, c0 + k, c0 + k, x, y, w); } i += 2; } else { c1 = pdf_to_int(obj); w = pdf_to_int(pdf_array_get(widths, i + 2)); x = pdf_to_int(pdf_array_get(widths, i + 3)); y = pdf_to_int(pdf_array_get(widths, i + 4)); pdf_add_vmtx(ctx, fontdesc, c0, c1, x, y, w); i += 5; } } } pdf_end_vmtx(ctx, fontdesc); } } fz_catch(ctx) { pdf_drop_font(ctx, fontdesc); fz_rethrow_message(ctx, "cannot load cid font (%d %d R)", pdf_to_num(dict), pdf_to_gen(dict)); } return fontdesc; }