/* ximage here is the result. DONT USE IT FOR PASSING OPTIONS! */ int pdf_include_page (pdf_ximage *ximage, FILE *image_file, const char *ident, load_options options) { pdf_file *pf; xform_info info; pdf_obj *contents = NULL, *catalog; pdf_obj *page = NULL, *resources = NULL, *markinfo = NULL; pf = pdf_open(ident, image_file); if (!pf) return -1; if (pdf_file_get_version(pf) > pdf_get_version()) { WARN("Trying to include PDF file which has newer version number " \ "than output PDF: 1.%d.", pdf_get_version()); } pdf_ximage_init_form_info(&info); if (options.page_no == 0) options.page_no = 1; page = pdf_doc_get_page(pf, options.page_no, options.bbox_type, &info.bbox, &resources); if(!page) goto error_silent; catalog = pdf_file_get_catalog(pf); markinfo = pdf_deref_obj(pdf_lookup_dict(catalog, "MarkInfo")); if (markinfo) { pdf_obj *tmp = pdf_deref_obj(pdf_lookup_dict(markinfo, "Marked")); pdf_release_obj(markinfo); if (!PDF_OBJ_BOOLEANTYPE(tmp)) { if (tmp) pdf_release_obj(tmp); goto error; } else if (pdf_boolean_value(tmp)) { WARN("PDF file is tagged... Ignoring tags."); } pdf_release_obj(tmp); } contents = pdf_deref_obj(pdf_lookup_dict(page, "Contents")); pdf_release_obj(page); page = NULL; /* * Handle page content stream. */ { pdf_obj *content_new; if (!contents) { /* * Empty page */ content_new = pdf_new_stream(0); /* TODO: better don't include anything if the page is empty */ } else if (PDF_OBJ_STREAMTYPE(contents)) { /* * We must import the stream because its dictionary * may contain indirect references. */ content_new = pdf_import_object(contents); } else if (PDF_OBJ_ARRAYTYPE(contents)) { /* * Concatenate all content streams. */ int idx, len = pdf_array_length(contents); content_new = pdf_new_stream(STREAM_COMPRESS); for (idx = 0; idx < len; idx++) { pdf_obj *content_seg = pdf_deref_obj(pdf_get_array(contents, idx)); if (!PDF_OBJ_STREAMTYPE(content_seg) || pdf_concat_stream(content_new, content_seg) < 0) { pdf_release_obj(content_seg); pdf_release_obj(content_new); goto error; } pdf_release_obj(content_seg); } } else { goto error; } if (contents) pdf_release_obj(contents); contents = content_new; } /* * Add entries to contents stream dictionary. */ { pdf_obj *contents_dict, *bbox, *matrix; contents_dict = pdf_stream_dict(contents); pdf_add_dict(contents_dict, pdf_new_name("Type"), pdf_new_name("XObject")); pdf_add_dict(contents_dict, pdf_new_name("Subtype"), pdf_new_name("Form")); pdf_add_dict(contents_dict, pdf_new_name("FormType"), pdf_new_number(1.0)); bbox = pdf_new_array(); pdf_add_array(bbox, pdf_new_number(info.bbox.llx)); pdf_add_array(bbox, pdf_new_number(info.bbox.lly)); pdf_add_array(bbox, pdf_new_number(info.bbox.urx)); pdf_add_array(bbox, pdf_new_number(info.bbox.ury)); pdf_add_dict(contents_dict, pdf_new_name("BBox"), bbox); matrix = pdf_new_array(); pdf_add_array(matrix, pdf_new_number(1.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(1.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_array(matrix, pdf_new_number(0.0)); pdf_add_dict(contents_dict, pdf_new_name("Matrix"), matrix); pdf_add_dict(contents_dict, pdf_new_name("Resources"), pdf_import_object(resources)); pdf_release_obj(resources); } pdf_close(pf); pdf_ximage_set_form(ximage, &info, contents); return 0; error: WARN("Cannot parse document. Broken PDF file?"); error_silent: if (resources) pdf_release_obj(resources); if (markinfo) pdf_release_obj(markinfo); if (page) pdf_release_obj(page); if (contents) pdf_release_obj(contents); pdf_close(pf); return -1; }
int pdf_copy_clip (FILE *image_file, int pageNo, double x_user, double y_user) { pdf_obj *page_tree, *contents; int depth = 0, top = -1; const char *clip_path, *end_path; char *save_path, *temp; pdf_tmatrix M; double stack[6]; pdf_file *pf; pf = pdf_open(NULL, image_file); if (!pf) return -1; pdf_dev_currentmatrix(&M); pdf_invertmatrix(&M); M.e += x_user; M.f += y_user; page_tree = pdf_get_page_obj (pf, pageNo, NULL, NULL); if (!page_tree) { pdf_close(pf); return -1; } contents = pdf_get_page_content(page_tree); pdf_release_obj(page_tree); if (!contents) { pdf_close(pf); return -1; } pdf_doc_add_page_content(" ", 1); save_path = malloc(pdf_stream_length(contents) + 1); strncpy(save_path, (const char *) pdf_stream_dataptr(contents), pdf_stream_length(contents)); clip_path = save_path; end_path = clip_path + pdf_stream_length(contents); depth = 0; for (; clip_path < end_path; clip_path++) { int color_dimen = 0; /* silence uninitialized warning */ char *token; skip_white(&clip_path, end_path); if (clip_path == end_path) break; if (depth > 1) { if (*clip_path == 'q') depth++; if (*clip_path == 'Q') depth--; parse_ident(&clip_path, end_path); continue; } else if (*clip_path == '-' || *clip_path == '+' || *clip_path == '.' || isdigit((unsigned char)*clip_path)) { stack[++top] = strtod(clip_path, &temp); clip_path = temp; } else if (*clip_path == '[') { /* Ignore, but put a dummy value on the stack (in case of d operator) */ parse_pdf_array(&clip_path, end_path, pf); stack[++top] = 0; } else if (*clip_path == '/') { if (strncmp("/DeviceGray", clip_path, 11) == 0 || strncmp("/Indexed", clip_path, 8) == 0 || strncmp("/CalGray", clip_path, 8) == 0) { color_dimen = 1; continue; } else if (strncmp("/DeviceRGB", clip_path, 10) == 0 || strncmp("/CalRGB", clip_path, 7) == 0 || strncmp("/Lab", clip_path, 4) == 0) { color_dimen = 3; continue; } else if (strncmp("/DeviceCMYK", clip_path, 11) == 0) { color_dimen = 4; continue; } else { clip_path++; parse_ident(&clip_path, end_path); skip_white(&clip_path, end_path); token = parse_ident(&clip_path, end_path); if (strcmp(token, "gs") == 0) { continue; } return -1; } } else { int j; pdf_tmatrix T; pdf_coord p0, p1, p2, p3; token = parse_ident(&clip_path, end_path); for (j = 0; j < sizeof(pdf_operators) / sizeof(pdf_operators[0]); j++) if (strcmp(token, pdf_operators[j].token) == 0) break; if (j == sizeof(pdf_operators) / sizeof(pdf_operators[0])) { return -1; } switch (pdf_operators[j].opcode) { case 0: case -1: case -2: case -3: case -4: /* Just pop the stack and do nothing. */ top += pdf_operators[j].opcode; if (top < -1) return -1; break; case OP_SETCOLOR: top -= color_dimen; if (top < -1) return -1; break; case OP_CLOSEandCLIP: pdf_dev_closepath(); case OP_CLIP: #if 0 pdf_dev_clip(); #else pdf_dev_flushpath('W', PDF_FILL_RULE_NONZERO); #endif break; case OP_CONCATMATRIX: if (top < 5) return -1; T.f = stack[top--]; T.e = stack[top--]; T.d = stack[top--]; T.c = stack[top--]; T.b = stack[top--]; T.a = stack[top--]; pdf_concatmatrix(&M, &T); break; case OP_SETCOLORSPACE: /* Do nothing. */ break; case OP_RECTANGLE: if (top < 3) return -1; p1.y = stack[top--]; p1.x = stack[top--]; p0.y = stack[top--]; p0.x = stack[top--]; if (M.b == 0 && M.c == 0) { pdf_tmatrix M0; M0.a = M.a; M0.b = M.b; M0.c = M.c; M0.d = M.d; M0.e = 0; M0.f = 0; pdf_dev_transform(&p0, &M); pdf_dev_transform(&p1, &M0); pdf_dev_rectadd(p0.x, p0.y, p1.x, p1.y); } else { p2.x = p0.x + p1.x; p2.y = p0.y + p1.y; p3.x = p0.x; p3.y = p0.y + p1.y; p1.x += p0.x; p1.y = p0.y; pdf_dev_transform(&p0, &M); pdf_dev_transform(&p1, &M); pdf_dev_transform(&p2, &M); pdf_dev_transform(&p3, &M); pdf_dev_moveto(p0.x, p0.y); pdf_dev_lineto(p1.x, p1.y); pdf_dev_lineto(p2.x, p2.y); pdf_dev_lineto(p3.x, p3.y); pdf_dev_closepath(); } break; case OP_CURVETO: if (top < 5) return -1; p0.y = stack[top--]; p0.x = stack[top--]; pdf_dev_transform(&p0, &M); p1.y = stack[top--]; p1.x = stack[top--]; pdf_dev_transform(&p1, &M); p2.y = stack[top--]; p2.x = stack[top--]; pdf_dev_transform(&p2, &M); pdf_dev_curveto(p2.x, p2.y, p1.x, p1.y, p0.x, p0.y); break; case OP_CLOSEPATH: pdf_dev_closepath(); break; case OP_LINETO: if (top < 1) return -1; p0.y = stack[top--]; p0.x = stack[top--]; pdf_dev_transform(&p0, &M); pdf_dev_lineto(p0.x, p0.y); break; case OP_MOVETO: if (top < 1) return -1; p0.y = stack[top--]; p0.x = stack[top--]; pdf_dev_transform(&p0, &M); pdf_dev_moveto(p0.x, p0.y); break; case OP_NOOP: pdf_doc_add_page_content(" n", 2); break; case OP_GSAVE: depth++; break; case OP_GRESTORE: depth--; break; case OP_CURVETO1: if (top < 3) return -1; p0.y = stack[top--]; p0.x = stack[top--]; pdf_dev_transform(&p0, &M); p1.y = stack[top--]; p1.x = stack[top--]; pdf_dev_transform(&p1, &M); pdf_dev_vcurveto(p1.x, p1.y, p0.x, p0.y); break; case OP_CURVETO2: if (top < 3) return -1; p0.y = stack[top--]; p0.x = stack[top--]; pdf_dev_transform(&p0, &M); p1.y = stack[top--]; p1.x = stack[top--]; pdf_dev_transform(&p1, &M); pdf_dev_ycurveto(p1.x, p1.y, p0.x, p0.y); break; default: return -1; } } } free(save_path); pdf_release_obj(contents); pdf_close(pf); return 0; }
pdf_obj *pdf_include_page(FILE *image_file, struct xform_info *p, char *res_name) { pdf_obj *trailer = NULL, *catalog = NULL, *page_tree = NULL; pdf_obj *kids_ref, *kids; pdf_obj *media_box = NULL, *crop_box = NULL, *resources = NULL, *contents = NULL, *contents_ref = NULL; pdf_obj *tmp1; #ifdef MEM_DEBUG MEM_START #endif if (!(trailer = pdf_open (image_file))) { fprintf (stderr, "\nCorrupt PDF file?\n"); } /* Now just lookup catalog location */ /* Deref catalog */ if (trailer && (catalog = pdf_deref_obj(pdf_lookup_dict (trailer,"Root"))) == NULL) { fprintf (stderr, "\nCatalog isn't where I expect it.\n"); } if (trailer) pdf_release_obj (trailer); /* Lookup page tree in catalog */ if (catalog) { page_tree = pdf_deref_obj (pdf_lookup_dict (catalog, "Pages")); /* Should be finished with catalog */ pdf_release_obj (catalog); } /* Media box and resources can be inherited so start looking for them here */ if (page_tree) { if ((tmp1 = pdf_lookup_dict (page_tree, "CropBox"))) crop_box = pdf_deref_obj (tmp1); if ((tmp1 = pdf_lookup_dict (page_tree, "MediaBox"))) media_box = pdf_deref_obj (tmp1); resources = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources")); if (resources == NULL) { resources = pdf_new_dict(); } while ((kids_ref = pdf_lookup_dict (page_tree, "Kids")) != NULL) { kids = pdf_deref_obj (kids_ref); pdf_release_obj (page_tree); page_tree = pdf_deref_obj (pdf_get_array(kids, 0)); pdf_release_obj (kids); /* Replace MediaBox if it's here */ tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "MediaBox")); if (tmp1 && media_box) pdf_release_obj (media_box); if (tmp1) media_box = tmp1; /* Do same for CropBox */ tmp1 = pdf_deref_obj(pdf_lookup_dict (page_tree, "CropBox")); if (tmp1 && crop_box) pdf_release_obj (crop_box); if (tmp1) crop_box = tmp1; /* Add resources if they're here */ tmp1 = pdf_deref_obj (pdf_lookup_dict (page_tree, "Resources")); if (tmp1) { pdf_merge_dict (tmp1, resources); pdf_release_obj (resources); resources = tmp1; } } /* At this point, page_tree contains the first page. media_box, crop_box, and resources should also be set. */ /* If there's a crop_box, replace media_box with crop_box. The rest of this routine assumes crop_box has been released */ if (crop_box) { pdf_release_obj (media_box); media_box = crop_box; crop_box = NULL; } /* This gets bit confusing. In the following code, media_box is the box the image is cropped to. The bounding box is the box the image is scaled to */ /* If user did not supply bounding box, use media_box (which may really be cropbox) as bounding box */ /* Set the crop box parameters in the xform_info structure */ p->c_llx = pdf_number_value (pdf_get_array (media_box, 0)); p->c_lly = pdf_number_value (pdf_get_array (media_box, 1)); p->c_urx = pdf_number_value (pdf_get_array (media_box, 2)); p->c_ury = pdf_number_value (pdf_get_array (media_box, 3)); /* Adjust scaling and clipping information as necessary */ pdf_scale_image (p); /* Set the media box to whatever pdf_scale_image() decided for the crop box (which may be unchanged) */ pdf_release_obj (media_box); media_box = pdf_new_array (); pdf_add_array (media_box, pdf_new_number (p->c_llx)); pdf_add_array (media_box, pdf_new_number (p->c_lly)); pdf_add_array (media_box, pdf_new_number (p->c_urx)); pdf_add_array (media_box, pdf_new_number (p->c_ury)); if ((contents = pdf_deref_obj(pdf_lookup_dict(page_tree,"Contents")))==NULL) { fprintf (stderr, "\nNo Contents found\n"); return NULL; } pdf_release_obj (page_tree); } /* Arrays of contents must be handled very differently (not implemented) */ if (contents && contents -> type != PDF_ARRAY) { doc_make_form_xobj (contents, media_box, p->user_bbox? p->u_llx: 0.0, p->user_bbox? p->u_lly: 0.0, 1.0, 1.0, resources, res_name); } else { fprintf (stderr, "\nIgnoring stream with with multiple segments\n"); contents = NULL; } if (contents) { contents_ref = pdf_ref_obj (contents); pdf_release_obj (contents); } pdf_close (); #ifdef MEM_DEBUG MEM_END #endif return (contents_ref); }