void pageregions_insert(PAGEREGIONS *dst,int index,PAGEREGIONS *src) { static char *funcname="pageregions_insert"; int i; if (src->n<1) return; if (dst->n + src->n > dst->na) { int newsize; newsize = dst->na<16 ? 32 : dst->na*2; while (newsize < dst->n + src->n) newsize *= 2; willus_mem_realloc_robust_warn((void **)&dst->pageregion,newsize*sizeof(PAGEREGION), dst->na*sizeof(PAGEREGION),funcname,10); dst->na=newsize; } /* Must initialize the new array elements that will be used */ for (i=dst->n;i<dst->n+src->n;i++) pageregion_init(&dst->pageregion[i]); for (i=dst->n+src->n-1;i-src->n>=index;i--) pageregion_copy(&dst->pageregion[i],&dst->pageregion[i-src->n]); for (i=0;i<src->n;i++) pageregion_copy(&dst->pageregion[i+index],&src->pageregion[i]); dst->n += src->n; }
/* ** Allocates new space for text buffer. */ void ocrwords_add_word(OCRWORDS *words,OCRWORD *word) { static char *funcname="ocrwords_add_word"; int i; if (words->n>=words->na) { int newsize; newsize = words->na<512 ? 1024 : words->na*2; willus_mem_realloc_robust_warn((void **)&words->word,newsize*sizeof(OCRWORD), words->na*sizeof(OCRWORD),funcname,10); for (i=words->na;i<newsize;i++) ocrword_init(&words->word[i]); words->na=newsize; } words->word[words->n]=(*word); words->word[words->n].text=NULL; willus_mem_alloc_warn((void **)&words->word[words->n].text,strlen(word->text)+1,funcname,10); strcpy(words->word[words->n].text,word->text); /* Copy char positions */ words->word[words->n].n=utf8_to_unicode(NULL,word->text,1000000); if (word->cpos!=NULL) { willus_mem_alloc_warn((void **)&words->word[words->n].cpos, words->word[words->n].n*sizeof(double),funcname,10); for (i=0;i<words->word[words->n].n;i++) words->word[words->n].cpos[i] = word->cpos[i]; } else words->word[words->n].cpos=NULL; words->n++; }
void strbuf_ensure(STRBUF *sbuf,int n) { static char *funcname="strbuf_ensure"; if (n>sbuf->na) { willus_mem_realloc_robust_warn((void**)&sbuf->s,n,sbuf->na,funcname,10); if (sbuf->na==0) sbuf->s[0]='\0'; sbuf->na=n; } }
void wrectmaps_add_wrectmap(WRECTMAPS *wrectmaps,WRECTMAP *wrectmap) { static char *funcname="wrectmaps_add_wrectmap"; if (wrectmaps->n>=wrectmaps->na) { int newsize; newsize = wrectmaps->na < 128 ? 256 : wrectmaps->na*2; willus_mem_realloc_robust_warn((void **)&wrectmaps->wrectmap,newsize*sizeof(WRECTMAP), wrectmaps->na*sizeof(WRECTMAP),funcname,10); wrectmaps->na=newsize; } wrectmaps->wrectmap[wrectmaps->n++]=(*wrectmap); }
void wpdfboxes_add_box(WPDFBOXES *boxes,WPDFBOX *box) { static char *funcname="wpdfboxes_add_box"; if (boxes->n>=boxes->na) { int newsize; newsize = boxes->na < 1024 ? 2048 : boxes->na*2; willus_mem_realloc_robust_warn((void **)&boxes->box,newsize*sizeof(WPDFBOX), boxes->na*sizeof(WPDFBOX),funcname,10); boxes->na=newsize; } boxes->box[boxes->n++]=(*box); }
void k2pdfopt_files_add_file(K2PDFOPT_FILES *k2files,char *filename) { static char *funcname="k2pdfopt_files_add_file"; if (k2files->n >= k2files->na) { int newsize; newsize = k2files->na<128 ? 256 : k2files->na*2; willus_mem_realloc_robust_warn((void **)&k2files->file,sizeof(char *)*newsize, sizeof(char *)*k2files->na,funcname,10); k2files->na=newsize; } willus_mem_alloc_warn((void **)&k2files->file[k2files->n],strlen(filename)+1,funcname,10); strcpy(k2files->file[k2files->n],filename); k2files->n++; }
void pageregions_add_pageregion(PAGEREGIONS *regions,BMPREGION *bmpregion,int level, int fullspan,int notes) { static char *funcname="pageregions_add_pageregion"; if (regions->n>=regions->na) { int newsize; newsize = regions->na<16 ? 32 : regions->na*2; willus_mem_realloc_robust_warn((void **)®ions->pageregion,newsize*sizeof(PAGEREGION), regions->na*sizeof(PAGEREGION),funcname,10); regions->na=newsize; } pageregion_init(®ions->pageregion[regions->n]); bmpregion_copy(®ions->pageregion[regions->n].bmpregion,bmpregion,1); regions->pageregion[regions->n].level=level; regions->pageregion[regions->n].fullspan=fullspan; regions->pageregion[regions->n].notes=notes; regions->n++; }
static void str0_addstring(char **list,int *na,char *s) { char *d; int i,needed; static char *funcname="str0_addstring"; if (s==NULL || s[0]=='\0') return; i=str0_len(*list); needed=i+strlen(s)+2; if ((*list)==NULL || (needed > (*na))) { int newsize; newsize = (*na)<512 ? 1024 : (*na)*2; while (newsize < needed) newsize *= 2; willus_mem_realloc_robust_warn((void **)list,newsize,(*na),funcname,10); (*na)=newsize; } d=(*list); strcpy(&d[i],s); d[i+strlen(s)+1]='\0'; }
static int wmupdf_pdfdoc_newpages(pdf_document *xref,fz_context *ctx,WPDFPAGEINFO *pageinfo, int use_forms,FILE *out) { static char *funcname="wmupdf_pdfdoc_newpages"; pdf_obj *root,*oldroot,*pages,*kids,*countobj,*parent,*olddests; pdf_obj *srcpageobj,*srcpagecontents; pdf_obj *destpageobj,*destpagecontents,*destpageresources; double srcx0,srcy0; int qref,i,i0,pagecount,srccount,destpageref,nbb; int *srcpageused; char *bigbuf; double srcpagerot; /* Avoid compiler warning */ destpageref = 0; destpageobj = NULL; srcx0=srcy0=0.; /* Keep only pages/type and (reduced) dest entries to avoid references to unretained pages */ pagecount = pdf_count_pages(xref); if (use_forms) { willus_mem_alloc_warn((void **)&srcpageused,sizeof(int)*(pagecount+1),funcname,10); /* Mark all source pages as "not done" */ for (i=0;i<=pagecount;i++) srcpageused[i]=0; nbb=4096; willus_mem_alloc_warn((void **)&bigbuf,nbb,funcname,10); bigbuf[0]='\0'; } oldroot = pdf_dict_gets(xref->trailer,"Root"); /* ** pages points to /Pages object in PDF file. ** Has: /Type /Pages, /Count <numpages>, /Kids [ obj obj obj obj ] */ pages = pdf_dict_gets(oldroot,"Pages"); olddests = pdf_load_name_tree(xref,"Dests"); /* ** Create new root object with only /Pages and /Type (and reduced dest entries) ** to avoid references to unretained pages. */ root = pdf_new_dict(ctx,4); pdf_dict_puts(root,"Type",pdf_dict_gets(oldroot,"Type")); pdf_dict_puts(root,"Pages",pages); pdf_update_object(xref,pdf_to_num(oldroot),root); pdf_drop_obj(root); /* Parent indirectly references the /Pages object in the file */ /* (Each new page we create has to point to this.) */ parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref); /* Create a new kids array with only the pages we want to keep */ kids = pdf_new_array(ctx, 1); qref=0; /* Avoid compiler warnings */ destpageresources=NULL; destpagecontents=NULL; srcpagecontents=NULL; srcpagerot=0.; for (i=0;i<=pageinfo->boxes.n;i++) if (pageinfo->boxes.box[i].dstpage>0) break; if (i>0) { if (i<pageinfo->boxes.n) memmove(&pageinfo->boxes.box[0],&pageinfo->boxes.box[i],sizeof(WPDFBOX)*pageinfo->boxes.n-i); pageinfo->boxes.n -= i; } /* Walk through PFDBOXES array */ for (i=srccount=i0=0;i<=pageinfo->boxes.n;i++) { WPDFBOX *box; int j,k,newsrc; static char buf[512]; pdf_obj *s1indirect,*qindirect,*rotobj; static double cpm[3][3],m[3][3],m1[3][3]; static double xclip[4],yclip[4]; /* printf("box[%d]\n",i); if (i<pageinfo->boxes.n) { box=&pageinfo->boxes.box[i]; printf(" srcpage=%d, dstpage=%d\n",box->srcbox.pageno,box->dstpage); printf(" x0=%g, y0=%g\n",box->x0,box->y0); printf(" w=%g, h=%g\n",box->w,box->h); printf(" x1=%g, y1=%g\n",box->x1,box->y1); printf(" sr=%g, dr=%g\n",box->srcrot_deg,box->dstrot_deg); printf(" scale=%g\n",box->scale); } */ /* Check to see if we are done with an output page */ if (srccount>0 && (i==pageinfo->boxes.n || (i>0 && pageinfo->boxes.box[i].dstpage!=pageinfo->boxes.box[i-1].dstpage))) { pdf_obj *newpageref; /* ** Store destination page into document structure */ /* printf(" ADDING NEW PAGE. (srccount=%d)\n",srccount); */ if (use_forms) { pdf_obj *dest_stream; /* Create new object in document for destination page stream */ dest_stream = pdf_new_indirect(ctx,new_stream_object(xref,ctx,bigbuf), 0,(void *)xref); /* Store this into the destination page contents array */ pdf_array_push(destpagecontents,dest_stream); pdf_drop_obj(dest_stream); } newpageref=pdf_new_indirect(ctx,destpageref,0,(void *)xref); /* Reference parent list of pages */ pdf_dict_puts(destpageobj,"Parent",parent); pdf_dict_puts(destpageobj,"Contents",destpagecontents); pdf_dict_puts(destpageobj,"Resources",destpageresources); /* Store page object in document's kids array */ pdf_array_push(kids,newpageref); /* Update document with new page */ pdf_update_object(xref,destpageref,destpageobj); /* Clean up */ pdf_drop_obj(newpageref); pdf_drop_obj(destpageresources); pdf_drop_obj(destpagecontents); pdf_drop_obj(destpageobj); /* Reset source page and index to start of new destination page */ i0=i; srccount=0; } /* Quit loop if beyond last box */ if (i>=pageinfo->boxes.n) break; box=&pageinfo->boxes.box[i]; if (box->srcbox.pageno<1 || box->srcbox.pageno>pagecount) continue; /* Is this a source page we haven't processed yet (for this destination page)? */ for (newsrc=1,j=i0;j<i;j++) { if (pageinfo->boxes.box[j].srcbox.pageno==box->srcbox.pageno) { newsrc=0; break; } } if (newsrc) { double v[4]; srccount++; if (use_forms) srcpageused[box->srcbox.pageno]=1; /* printf(" NEW SOURCE PAGE (srccount=%d)\n",srccount); */ if (srccount==1) { /* ** Start a new destination page. ** ** Each new page object is a dict type with: ** /Type /Page ** /Contents (array of objects) ** /Resources (dict) ** /MediaBox [0 0 612 792] ** /Parent <PagesObj> ** [Can have /Rotate 90, for example.] ** */ /* printf(" (STARTING NEW DEST. PAGE)\n"); */ destpageobj=start_new_destpage(ctx,box->dst_width_pts,box->dst_height_pts); destpageresources=pdf_new_dict(ctx,1); if (use_forms) pdf_dict_puts(destpageresources,"XObject",pdf_new_dict(ctx,1)); destpageref=pdf_create_object(xref); destpagecontents=pdf_new_array(ctx,1); /* Init the destination page stream for forms */ if (use_forms) bigbuf[0]='\0'; } /* New source page, so get the source page objects */ srcpageobj = xref->page_objs[box->srcbox.pageno-1]; wmupdf_page_bbox(srcpageobj,v); srcx0=v[0]; srcy0=v[1]; /* printf("SRCX0=%g, SRCY0=%g\n",srcx0,srcy0); */ rotobj=pdf_dict_gets(srcpageobj,"Rotate"); srcpagerot = rotobj!=NULL ? pdf_to_real(rotobj) : 0.; /* printf("Page rotation = %g\n",srcpagerot); */ srcpagecontents=pdf_dict_gets(srcpageobj,"Contents"); /* if (pdf_is_array(srcpagecontents)) { int k; printf(" source page contents = array.\n"); for (k=0;k<pdf_array_len(srcpagecontents);k++) { pdf_obj *obj; obj=pdf_array_get(srcpagecontents,k); if (pdf_is_indirect(obj)) { printf(" contents[%d] = indirect (%d)\n",k,pdf_to_num(obj)); pdf_resolve_indirect(obj); } } } */ if (use_forms) { pdf_obj *xobjdict; int pageno; xobjdict=pdf_dict_gets(destpageresources,"XObject"); pageno=box->srcbox.pageno; pdf_dict_puts(xobjdict,xobject_name(pageno),xref->page_refs[pageno-1]); pdf_dict_puts(destpageresources,"XObject",xobjdict); } else { pdf_obj *srcpageresources; /* Merge source page resources into destination page resources */ srcpageresources=pdf_dict_gets(srcpageobj,"Resources"); /* printf("box->dstpage=%d, srcpage=%d (ind.#=%d)\n",box->dstpage,box->srcbox.pageno,pdf_to_num(xref->page_refs[box->srcbox.pageno-1])); */ wmupdf_dict_merge(ctx,"Resources",destpageresources,srcpageresources); } } /* ** Process this source box: ** ** Create a tranformation matrix and clipping path to only show the ** desired part of the source page at the appropriate place on the ** destination page. ** ** How the tranformation matrix works: ** - Translations shall be specified as [ 1 0 0 1 tx ty ], where tx and ty ** shall be the distances to translate the origin of the coordinate system ** in the horizontal and vertical dimensions, respectively. ** ** - Scaling shall be obtained by [ sx 0 0 sy 0 0 ]. This scales the coordinates ** so that 1 unit in the horizontal and vertical dimensions of the new coordinate ** system is the same size as sx and sy units, respectively, in the previous ** coordinate system. ** ** - Rotations shall be produced by [ cos q sin q -sin q cos q 0 0 ], which has the ** effect of rotating the coordinate system axes by an angle q counter-clockwise. ** ** - Skew shall be specified by [ 1 tan a tan b 1 0 0 ], which skews the x axis by ** an angle a and the y axis by an angle b. ** */ wpdfbox_determine_original_source_position(box); /* printf("Before unrotate.\n"); printf("box->srcrot=%g\n",box->srcrot_deg); printf("box->x0=%g, box->y0=%g\n",box->x0,box->y0); printf("box->w=%g, box->h=%g\n",box->w,box->h); printf("box->pw=%g, box->ph=%g\n",box->src_width_pts,box->src_height_pts); */ if (fabs(srcpagerot) > 1.0e-4) wpdfbox_unrotate(box,srcpagerot); /* printf("box->srcrot=%g\n",box->srcrot_deg); printf("box->x0=%g, box->y0=%g\n",box->x0,box->y0); printf("box->w=%g, box->h=%g\n",box->w,box->h); printf("box->pw=%g, box->ph=%g\n",box->src_width_pts,box->src_height_pts); */ matrix_unity(m,1.); /* printf("xfmatrix = [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n", m[0][0],m[0][1],m[0][2], m[1][0],m[1][1],m[1][2], m[2][0],m[2][1],m[2][2]); */ matrix_translate(m1,-box->x0-srcx0,-box->y0-srcy0); matrix_mul(m,m1); matrix_rotate(m1,-box->srcrot_deg+box->dstrot_deg); matrix_mul(m,m1); matrix_unity(m1,box->scale); matrix_mul(m,m1); matrix_translate(m1,box->x1,box->y1); matrix_mul(m,m1); matrix_zero_round(m); matrix_rotate(cpm,box->srcrot_deg); matrix_translate(m1,box->x0+srcx0,box->y0+srcy0); matrix_mul(cpm,m1); /* printf("Clip matrix:\n"); printf("xfmatrix = [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n" " [ %9.6f %9.6f %9.6f ]\n", cpm[0][0],cpm[0][1],cpm[0][2], cpm[1][0],cpm[1][1],cpm[1][2], cpm[2][0],cpm[2][1],cpm[2][2]); */ set_clip_array(xclip,yclip,box->srcrot_deg,box->w,box->h); for (k=0;k<4;k++) matrix_xymul(cpm,&xclip[k],&yclip[k]); /* printf("Clip path:\n %7.2f %7.2f\n %7.2f,%7.2f\n %7.2f,%7.2f\n" " %7.2f %7.2f\n %7.2f,%7.2f\n", xclip[0],yclip[0],xclip[1],yclip[1],xclip[2],yclip[2], xclip[3],yclip[3],xclip[0],yclip[0]); */ strcpy(buf,"q"); for (k=0;k<=2;k++) { cat_pdf_double(buf,m[k][0]); cat_pdf_double(buf,m[k][1]); } strcat(buf," cm"); for (k=0;k<=4;k++) { cat_pdf_double(buf,xclip[k&3]); cat_pdf_double(buf,yclip[k&3]); strcat(buf,k==0 ? " m" : " l"); } strcat(buf," W n"); if (use_forms) { /* FORM METHOD */ sprintf(&buf[strlen(buf)]," /%s Do Q\n",xobject_name(box->srcbox.pageno)); if (strlen(bigbuf)+strlen(buf) > nbb) { int newsize; newsize=nbb*2; willus_mem_realloc_robust_warn((void **)&bigbuf,newsize,nbb,funcname,10); nbb=newsize; } strcat(bigbuf,buf); } else { /* NO-FORMS METHOD */ strcat(buf,"\n"); /* Create new objects in document for tx matrix and restore matrix */ s1indirect = pdf_new_indirect(ctx,new_stream_object(xref,ctx,buf),0,(void *)xref); if (qref==0) qref=new_stream_object(xref,ctx,"Q\n"); qindirect = pdf_new_indirect(ctx,qref,0,(void *)xref); /* Store this region into the destination page contents array */ pdf_array_push(destpagecontents,s1indirect); if (pdf_is_array(srcpagecontents)) { int k; for (k=0;k<pdf_array_len(srcpagecontents);k++) pdf_array_push(destpagecontents,pdf_array_get(srcpagecontents,k)); } else pdf_array_push(destpagecontents,srcpagecontents); pdf_array_push(destpagecontents,qindirect); pdf_drop_obj(s1indirect); pdf_drop_obj(qindirect); } } pdf_drop_obj(parent); /* For forms, convert all original source pages to XObject Forms */ if (use_forms) wmupdf_convert_pages_to_forms(xref,ctx,srcpageused); /* Update page count and kids array */ countobj = pdf_new_int(ctx, pdf_array_len(kids)); pdf_dict_puts(pages, "Count", countobj); pdf_drop_obj(countobj); pdf_dict_puts(pages, "Kids", kids); pdf_drop_obj(kids); /* Also preserve the (partial) Dests name tree */ if (olddests) wmupdf_preserve_old_dests(olddests,ctx,xref,pages); if (use_forms) { /* Free memory */ willus_mem_free((double **)&bigbuf,funcname); willus_mem_free((double **)&srcpageused,funcname); } return(0); }