Beispiel #1
0
void pageregions_insert(PAGEREGIONS *dst,int index,PAGEREGIONS *src)

    {
    static char *funcname="pageregions_insert";
    int i;

    if (src->n<1)
        return;
    if (dst->n + src->n > dst->na)
        {
        int newsize;

        newsize = dst->na<16 ? 32 : dst->na*2;
        while (newsize < dst->n + src->n)
            newsize *= 2;
        willus_mem_realloc_robust_warn((void **)&dst->pageregion,newsize*sizeof(PAGEREGION),
                                     dst->na*sizeof(PAGEREGION),funcname,10);
        dst->na=newsize;
        }
    /* Must initialize the new array elements that will be used */
    for (i=dst->n;i<dst->n+src->n;i++)
        pageregion_init(&dst->pageregion[i]);
    for (i=dst->n+src->n-1;i-src->n>=index;i--)
        pageregion_copy(&dst->pageregion[i],&dst->pageregion[i-src->n]);
    for (i=0;i<src->n;i++)
        pageregion_copy(&dst->pageregion[i+index],&src->pageregion[i]);
    dst->n += src->n;
    }
Beispiel #2
0
/*
** Allocates new space for text buffer.
*/
void ocrwords_add_word(OCRWORDS *words,OCRWORD *word)

    {
    static char *funcname="ocrwords_add_word";
    int i;

    if (words->n>=words->na)
        {
        int newsize;
      
        newsize = words->na<512 ? 1024 : words->na*2;
        willus_mem_realloc_robust_warn((void **)&words->word,newsize*sizeof(OCRWORD),
                                    words->na*sizeof(OCRWORD),funcname,10);
        for (i=words->na;i<newsize;i++)
            ocrword_init(&words->word[i]);
        words->na=newsize;
        }
    words->word[words->n]=(*word);
    words->word[words->n].text=NULL;
    willus_mem_alloc_warn((void **)&words->word[words->n].text,strlen(word->text)+1,funcname,10);
    strcpy(words->word[words->n].text,word->text);
    /* Copy char positions */
    words->word[words->n].n=utf8_to_unicode(NULL,word->text,1000000);
    if (word->cpos!=NULL)
        {
        willus_mem_alloc_warn((void **)&words->word[words->n].cpos,
                       words->word[words->n].n*sizeof(double),funcname,10);
        for (i=0;i<words->word[words->n].n;i++)
            words->word[words->n].cpos[i] = word->cpos[i];
        }
    else
        words->word[words->n].cpos=NULL;
    words->n++;
    }
Beispiel #3
0
void strbuf_ensure(STRBUF *sbuf,int n)

    {
    static char *funcname="strbuf_ensure";
    if (n>sbuf->na)
        {
        willus_mem_realloc_robust_warn((void**)&sbuf->s,n,sbuf->na,funcname,10);
        if (sbuf->na==0)
            sbuf->s[0]='\0';
        sbuf->na=n;
        }
    }
Beispiel #4
0
void wrectmaps_add_wrectmap(WRECTMAPS *wrectmaps,WRECTMAP *wrectmap)

    {
    static char *funcname="wrectmaps_add_wrectmap";

    if (wrectmaps->n>=wrectmaps->na)
        {
        int newsize;
        newsize = wrectmaps->na < 128 ? 256 : wrectmaps->na*2;
        willus_mem_realloc_robust_warn((void **)&wrectmaps->wrectmap,newsize*sizeof(WRECTMAP),
                                    wrectmaps->na*sizeof(WRECTMAP),funcname,10);
        wrectmaps->na=newsize;
        }
    wrectmaps->wrectmap[wrectmaps->n++]=(*wrectmap);
    }
Beispiel #5
0
void wpdfboxes_add_box(WPDFBOXES *boxes,WPDFBOX *box)

    {
    static char *funcname="wpdfboxes_add_box";

    if (boxes->n>=boxes->na)
        {
        int newsize;

        newsize = boxes->na < 1024 ? 2048 : boxes->na*2;
        willus_mem_realloc_robust_warn((void **)&boxes->box,newsize*sizeof(WPDFBOX),
                                      boxes->na*sizeof(WPDFBOX),funcname,10);
        boxes->na=newsize;
        }
    boxes->box[boxes->n++]=(*box);
    }
Beispiel #6
0
void k2pdfopt_files_add_file(K2PDFOPT_FILES *k2files,char *filename)

    {
    static char *funcname="k2pdfopt_files_add_file";

    if (k2files->n >= k2files->na)
        {
        int newsize;
        newsize = k2files->na<128 ? 256 : k2files->na*2;
        willus_mem_realloc_robust_warn((void **)&k2files->file,sizeof(char *)*newsize,
                                       sizeof(char *)*k2files->na,funcname,10);
        k2files->na=newsize;
        }
    willus_mem_alloc_warn((void **)&k2files->file[k2files->n],strlen(filename)+1,funcname,10);
    strcpy(k2files->file[k2files->n],filename);
    k2files->n++;
    }
Beispiel #7
0
void pageregions_add_pageregion(PAGEREGIONS *regions,BMPREGION *bmpregion,int level,
                                       int fullspan,int notes)

    {
    static char *funcname="pageregions_add_pageregion";

    if (regions->n>=regions->na)
        {
        int newsize;

        newsize = regions->na<16 ? 32 : regions->na*2;
        willus_mem_realloc_robust_warn((void **)&regions->pageregion,newsize*sizeof(PAGEREGION),
                                     regions->na*sizeof(PAGEREGION),funcname,10);
        regions->na=newsize;
        }
    pageregion_init(&regions->pageregion[regions->n]);
    bmpregion_copy(&regions->pageregion[regions->n].bmpregion,bmpregion,1);
    regions->pageregion[regions->n].level=level;
    regions->pageregion[regions->n].fullspan=fullspan;
    regions->pageregion[regions->n].notes=notes;
    regions->n++;
    }
Beispiel #8
0
static void str0_addstring(char **list,int *na,char *s)

    {
    char *d;
    int i,needed;
    static char *funcname="str0_addstring";

    if (s==NULL || s[0]=='\0')
        return;
    i=str0_len(*list);
    needed=i+strlen(s)+2;
    if ((*list)==NULL || (needed > (*na)))
        {
        int newsize;
        newsize = (*na)<512 ? 1024 : (*na)*2;
        while (newsize < needed)
            newsize *= 2;
        willus_mem_realloc_robust_warn((void **)list,newsize,(*na),funcname,10);
        (*na)=newsize;
        }
    d=(*list);
    strcpy(&d[i],s);
    d[i+strlen(s)+1]='\0';
    }
Beispiel #9
0
static int wmupdf_pdfdoc_newpages(pdf_document *xref,fz_context *ctx,WPDFPAGEINFO *pageinfo,
                                  int use_forms,FILE *out)

    {
    static char *funcname="wmupdf_pdfdoc_newpages";
    pdf_obj *root,*oldroot,*pages,*kids,*countobj,*parent,*olddests;
    pdf_obj *srcpageobj,*srcpagecontents;
    pdf_obj *destpageobj,*destpagecontents,*destpageresources;
    double srcx0,srcy0;
    int qref,i,i0,pagecount,srccount,destpageref,nbb;
    int *srcpageused;
    char *bigbuf;
    double srcpagerot;

    /* Avoid compiler warning */
    destpageref = 0;
    destpageobj = NULL;
    srcx0=srcy0=0.;
    /* Keep only pages/type and (reduced) dest entries to avoid references to unretained pages */
    pagecount = pdf_count_pages(xref);
    if (use_forms)
        {
        willus_mem_alloc_warn((void **)&srcpageused,sizeof(int)*(pagecount+1),funcname,10);
        /* Mark all source pages as "not done" */
        for (i=0;i<=pagecount;i++)
            srcpageused[i]=0;
        nbb=4096;
        willus_mem_alloc_warn((void **)&bigbuf,nbb,funcname,10);
        bigbuf[0]='\0';
        }
    oldroot = pdf_dict_gets(xref->trailer,"Root");
    /*
    ** pages points to /Pages object in PDF file.
    ** Has:  /Type /Pages, /Count <numpages>, /Kids [ obj obj obj obj ]
    */
    pages = pdf_dict_gets(oldroot,"Pages");
    olddests = pdf_load_name_tree(xref,"Dests");

    /*
    ** Create new root object with only /Pages and /Type (and reduced dest entries)
    ** to avoid references to unretained pages.
    */
    root = pdf_new_dict(ctx,4);
    pdf_dict_puts(root,"Type",pdf_dict_gets(oldroot,"Type"));
    pdf_dict_puts(root,"Pages",pages);
    pdf_update_object(xref,pdf_to_num(oldroot),root);
    pdf_drop_obj(root);

    /* Parent indirectly references the /Pages object in the file */
    /* (Each new page we create has to point to this.)            */
    parent = pdf_new_indirect(ctx, pdf_to_num(pages), pdf_to_gen(pages), xref);
    /* Create a new kids array with only the pages we want to keep */
    kids = pdf_new_array(ctx, 1);


    qref=0;
    /* Avoid compiler warnings */
    destpageresources=NULL;
    destpagecontents=NULL;
    srcpagecontents=NULL;
    srcpagerot=0.;
    for (i=0;i<=pageinfo->boxes.n;i++)
        if (pageinfo->boxes.box[i].dstpage>0)
            break;
    if (i>0)
        {
        if (i<pageinfo->boxes.n)
            memmove(&pageinfo->boxes.box[0],&pageinfo->boxes.box[i],sizeof(WPDFBOX)*pageinfo->boxes.n-i);
        pageinfo->boxes.n -= i;
        }
    /* Walk through PFDBOXES array */
    for (i=srccount=i0=0;i<=pageinfo->boxes.n;i++)
        {
        WPDFBOX *box;
        int j,k,newsrc;
        static char buf[512];
        pdf_obj *s1indirect,*qindirect,*rotobj;
        static double cpm[3][3],m[3][3],m1[3][3];
        static double xclip[4],yclip[4];

/*
printf("box[%d]\n",i);
if (i<pageinfo->boxes.n)
{
box=&pageinfo->boxes.box[i];
printf("    srcpage=%d, dstpage=%d\n",box->srcbox.pageno,box->dstpage);
printf("    x0=%g, y0=%g\n",box->x0,box->y0);
printf("    w=%g, h=%g\n",box->w,box->h);
printf("    x1=%g, y1=%g\n",box->x1,box->y1);
printf("    sr=%g, dr=%g\n",box->srcrot_deg,box->dstrot_deg);
printf("    scale=%g\n",box->scale);
}
*/
        /* Check to see if we are done with an output page */
        if (srccount>0 && (i==pageinfo->boxes.n
               || (i>0 && pageinfo->boxes.box[i].dstpage!=pageinfo->boxes.box[i-1].dstpage)))
            {
            pdf_obj *newpageref;
            /*
            ** Store destination page into document structure
            */
/*
printf("    ADDING NEW PAGE. (srccount=%d)\n",srccount);
*/
            if (use_forms)
                {
                pdf_obj *dest_stream;

                /* Create new object in document for destination page stream */
                dest_stream = pdf_new_indirect(ctx,new_stream_object(xref,ctx,bigbuf),
                                               0,(void *)xref);
                /* Store this into the destination page contents array */
                pdf_array_push(destpagecontents,dest_stream);
                pdf_drop_obj(dest_stream);
                }
            newpageref=pdf_new_indirect(ctx,destpageref,0,(void *)xref);
            /* Reference parent list of pages */
            pdf_dict_puts(destpageobj,"Parent",parent);
            pdf_dict_puts(destpageobj,"Contents",destpagecontents);
            pdf_dict_puts(destpageobj,"Resources",destpageresources);
            /* Store page object in document's kids array */
            pdf_array_push(kids,newpageref);
            /* Update document with new page */
            pdf_update_object(xref,destpageref,destpageobj);
            /* Clean up */
            pdf_drop_obj(newpageref);
            pdf_drop_obj(destpageresources);
            pdf_drop_obj(destpagecontents);
            pdf_drop_obj(destpageobj);
            /* Reset source page and index to start of new destination page */
            i0=i;
            srccount=0;
            }
        /* Quit loop if beyond last box */
        if (i>=pageinfo->boxes.n)
            break;
        box=&pageinfo->boxes.box[i];
        if (box->srcbox.pageno<1 || box->srcbox.pageno>pagecount)
            continue;
        /* Is this a source page we haven't processed yet (for this destination page)? */
        for (newsrc=1,j=i0;j<i;j++)
            {
            if (pageinfo->boxes.box[j].srcbox.pageno==box->srcbox.pageno)
                {
                newsrc=0;
                break;
                }
            }
        if (newsrc)
            {
            double v[4];

            srccount++;
            if (use_forms)
                srcpageused[box->srcbox.pageno]=1;
/*
printf("    NEW SOURCE PAGE (srccount=%d)\n",srccount);
*/
            if (srccount==1)
                {
                /*
                ** Start a new destination page.
                **
                ** Each new page object is a dict type with:
                ** /Type /Page
                ** /Contents (array of objects)
                ** /Resources (dict)
                ** /MediaBox [0 0 612 792]
                ** /Parent <PagesObj>
                ** [Can have /Rotate 90, for example.]
                **
                */
/*
printf("        (STARTING NEW DEST. PAGE)\n");
*/
                destpageobj=start_new_destpage(ctx,box->dst_width_pts,box->dst_height_pts);
                destpageresources=pdf_new_dict(ctx,1);
                if (use_forms)
                    pdf_dict_puts(destpageresources,"XObject",pdf_new_dict(ctx,1));
                destpageref=pdf_create_object(xref);
                destpagecontents=pdf_new_array(ctx,1);
                /* Init the destination page stream for forms */
                if (use_forms)
                    bigbuf[0]='\0';
                }
            /* New source page, so get the source page objects */
            srcpageobj = xref->page_objs[box->srcbox.pageno-1];
            wmupdf_page_bbox(srcpageobj,v);
            srcx0=v[0];
            srcy0=v[1];
/*
printf("SRCX0=%g, SRCY0=%g\n",srcx0,srcy0);
*/
            rotobj=pdf_dict_gets(srcpageobj,"Rotate");
            srcpagerot = rotobj!=NULL ? pdf_to_real(rotobj) : 0.;
/*
printf("Page rotation = %g\n",srcpagerot);
*/
            srcpagecontents=pdf_dict_gets(srcpageobj,"Contents");
/*
if (pdf_is_array(srcpagecontents))
{
int k;
printf("    source page contents = array.\n");
for (k=0;k<pdf_array_len(srcpagecontents);k++)
{
pdf_obj *obj;
obj=pdf_array_get(srcpagecontents,k);
if (pdf_is_indirect(obj))
{
printf("    contents[%d] = indirect (%d)\n",k,pdf_to_num(obj));
pdf_resolve_indirect(obj);
}
}
}
*/
            if (use_forms)
                {
                pdf_obj *xobjdict;
                int pageno;

                xobjdict=pdf_dict_gets(destpageresources,"XObject");
                pageno=box->srcbox.pageno;
                pdf_dict_puts(xobjdict,xobject_name(pageno),xref->page_refs[pageno-1]);
                pdf_dict_puts(destpageresources,"XObject",xobjdict);
                }
            else
                {
                pdf_obj *srcpageresources;

                /* Merge source page resources into destination page resources */
                srcpageresources=pdf_dict_gets(srcpageobj,"Resources");
/*
printf("box->dstpage=%d, srcpage=%d (ind.#=%d)\n",box->dstpage,box->srcbox.pageno,pdf_to_num(xref->page_refs[box->srcbox.pageno-1]));
*/
                wmupdf_dict_merge(ctx,"Resources",destpageresources,srcpageresources);
                }
            }
        /*
        ** Process this source box:
        **
        ** Create a tranformation matrix and clipping path to only show the
        ** desired part of the source page at the appropriate place on the
        ** destination page.
        **
        ** How the tranformation matrix works:
        ** - Translations shall be specified as [ 1 0 0 1 tx ty ], where tx and ty
        **   shall be the distances to translate the origin of the coordinate system
        **   in the horizontal and vertical dimensions, respectively.
        **
        ** - Scaling shall be obtained by [ sx 0 0 sy 0 0 ]. This scales the coordinates
        **   so that 1 unit in the horizontal and vertical dimensions of the new coordinate
        **   system is the same size as sx and sy units, respectively, in the previous
        **   coordinate system.
        **
        ** - Rotations shall be produced by [ cos q sin q -sin q cos q 0 0 ], which has the
        **   effect of rotating the coordinate system axes by an angle q counter-clockwise.
        **
        ** - Skew shall be specified by [ 1 tan a tan b 1 0 0 ], which skews the x axis by
        **   an angle a and the y axis by an angle b.
        **
        */
        wpdfbox_determine_original_source_position(box);
/*
printf("Before unrotate.\n");
printf("box->srcrot=%g\n",box->srcrot_deg);
printf("box->x0=%g, box->y0=%g\n",box->x0,box->y0);
printf("box->w=%g, box->h=%g\n",box->w,box->h);
printf("box->pw=%g, box->ph=%g\n",box->src_width_pts,box->src_height_pts);
*/
        if (fabs(srcpagerot) > 1.0e-4)
            wpdfbox_unrotate(box,srcpagerot);
/*
printf("box->srcrot=%g\n",box->srcrot_deg);
printf("box->x0=%g, box->y0=%g\n",box->x0,box->y0);
printf("box->w=%g, box->h=%g\n",box->w,box->h);
printf("box->pw=%g, box->ph=%g\n",box->src_width_pts,box->src_height_pts);
*/
        matrix_unity(m,1.);
/*
printf("xfmatrix = [  %9.6f   %9.6f   %9.6f  ]\n"
       "           [  %9.6f   %9.6f   %9.6f  ]\n"
       "           [  %9.6f   %9.6f   %9.6f  ]\n",
        m[0][0],m[0][1],m[0][2],
        m[1][0],m[1][1],m[1][2],
        m[2][0],m[2][1],m[2][2]);
*/
        matrix_translate(m1,-box->x0-srcx0,-box->y0-srcy0);
        matrix_mul(m,m1);
        matrix_rotate(m1,-box->srcrot_deg+box->dstrot_deg);
        matrix_mul(m,m1);
        matrix_unity(m1,box->scale);
        matrix_mul(m,m1);
        matrix_translate(m1,box->x1,box->y1);
        matrix_mul(m,m1);
        matrix_zero_round(m);
        matrix_rotate(cpm,box->srcrot_deg);
        matrix_translate(m1,box->x0+srcx0,box->y0+srcy0);
        matrix_mul(cpm,m1);
/*
printf("Clip matrix:\n");
printf("xfmatrix = [  %9.6f   %9.6f   %9.6f  ]\n"
       "           [  %9.6f   %9.6f   %9.6f  ]\n"
       "           [  %9.6f   %9.6f   %9.6f  ]\n",
        cpm[0][0],cpm[0][1],cpm[0][2],
        cpm[1][0],cpm[1][1],cpm[1][2],
        cpm[2][0],cpm[2][1],cpm[2][2]);
*/


        set_clip_array(xclip,yclip,box->srcrot_deg,box->w,box->h);
        for (k=0;k<4;k++)
            matrix_xymul(cpm,&xclip[k],&yclip[k]);
/*
printf("Clip path:\n    %7.2f %7.2f\n    %7.2f,%7.2f\n    %7.2f,%7.2f\n"
                   "    %7.2f %7.2f\n    %7.2f,%7.2f\n",
                xclip[0],yclip[0],xclip[1],yclip[1],xclip[2],yclip[2],
                xclip[3],yclip[3],xclip[0],yclip[0]);
*/
        strcpy(buf,"q");
        for (k=0;k<=2;k++)
            {
            cat_pdf_double(buf,m[k][0]);
            cat_pdf_double(buf,m[k][1]);
            }
        strcat(buf," cm");
        for (k=0;k<=4;k++)
            {
            cat_pdf_double(buf,xclip[k&3]);
            cat_pdf_double(buf,yclip[k&3]);
            strcat(buf,k==0 ? " m" : " l");
            }
        strcat(buf," W n");
        if (use_forms)
            {
            /* FORM METHOD */
            sprintf(&buf[strlen(buf)]," /%s Do Q\n",xobject_name(box->srcbox.pageno));
            if (strlen(bigbuf)+strlen(buf) > nbb)
                {
                int newsize;
                newsize=nbb*2;
                willus_mem_realloc_robust_warn((void **)&bigbuf,newsize,nbb,funcname,10);
                nbb=newsize;
                }
            strcat(bigbuf,buf);
            }
        else
            {
            /* NO-FORMS METHOD */
            strcat(buf,"\n");
            /* Create new objects in document for tx matrix and restore matrix */
            s1indirect = pdf_new_indirect(ctx,new_stream_object(xref,ctx,buf),0,(void *)xref);
            if (qref==0)
                qref=new_stream_object(xref,ctx,"Q\n");
            qindirect = pdf_new_indirect(ctx,qref,0,(void *)xref);
            /* Store this region into the destination page contents array */
            pdf_array_push(destpagecontents,s1indirect);
            if (pdf_is_array(srcpagecontents))
                {
                int k;
                for (k=0;k<pdf_array_len(srcpagecontents);k++)
                    pdf_array_push(destpagecontents,pdf_array_get(srcpagecontents,k));
                }
            else
                pdf_array_push(destpagecontents,srcpagecontents);
            pdf_array_push(destpagecontents,qindirect);
            pdf_drop_obj(s1indirect);
            pdf_drop_obj(qindirect);
            }
        }
    pdf_drop_obj(parent);

    /* For forms, convert all original source pages to XObject Forms */
    if (use_forms)
        wmupdf_convert_pages_to_forms(xref,ctx,srcpageused);

    /* Update page count and kids array */
    countobj = pdf_new_int(ctx, pdf_array_len(kids));
    pdf_dict_puts(pages, "Count", countobj);
    pdf_drop_obj(countobj);
    pdf_dict_puts(pages, "Kids", kids);
    pdf_drop_obj(kids);

    /* Also preserve the (partial) Dests name tree */
    if (olddests)
        wmupdf_preserve_old_dests(olddests,ctx,xref,pages);
    if (use_forms)
        {
        /* Free memory */
        willus_mem_free((double **)&bigbuf,funcname);
        willus_mem_free((double **)&srcpageused,funcname);
        }
    return(0);
    }