/* ** Track gaps between words so that we can tell when one is out of family. ** lcheight = height of a lowercase letter. */ void textwords_add_word_gaps(TEXTWORDS *textwords,int lcheight,double *median_gap, double word_spacing) { static int nn=0; static double gap[1024]; static char *funcname="word_gaps_add"; if (textwords==NULL && median_gap==NULL) { nn=0; return; } if (textwords!=NULL && textwords->n>1) { int i; for (i=0;i<textwords->n-1;i++) { double g; g = (double)textwords->textrow[i].gap / lcheight; if (g>=word_spacing) { gap[nn&0x3ff]= g; nn++; } } } if (median_gap!=NULL) { if (nn>0) { int n; double *gap_sorted; /* v2.02--this variable is no longer static */ n = (nn>1024) ? 1024 : nn; willus_dmem_alloc_warn(28,(void **)&gap_sorted,sizeof(double)*n,funcname,10); memcpy(gap_sorted,gap,n*sizeof(double)); sortd(gap_sorted,n); (*median_gap)=gap_sorted[n/2]; willus_dmem_free(28,&gap_sorted,funcname); } else (*median_gap)=0.7; } }
double bmp_inflections_horizontal(WILLUSBITMAP *srcgrey,int ndivisions,int delta,int *wthresh) { int x0,x1,nx,bw,i,nh,nisum,ni,wt,wtmax; double *g; char *funcname="bmp_inflections_vertical"; nh=srcgrey->height/ndivisions; x0=srcgrey->width/6; x1=srcgrey->width-x0; nx=x1-x0; bw=bmp_bytewidth(srcgrey); willus_dmem_alloc_warn(22,(void **)&g,nx*sizeof(double),funcname,10); wtmax=-1; for (nisum=0,i=0;i<10;i++) { int y0,y1,ny,j; y0=(srcgrey->height-nh)*(i+2)/13; y1=y0+nh; if (y1>srcgrey->height) y1=srcgrey->height; ny=y1-y0; for (j=x0;j<x1;j++) { int k,rsum; unsigned char *p; p=bmp_rowptr_from_top(srcgrey,y0)+j; for (rsum=k=0;k<ny;k++,p+=bw) rsum+=p[0]; g[j-x0]=(double)rsum/ny; } wt=(*wthresh); ni=inflection_count(g,nx,delta,&wt); if ((*wthresh)<0 && ni>=3 && wt>wtmax) wtmax=wt; if (ni>nisum) nisum=ni; } willus_dmem_free(22,&g,funcname); if ((*wthresh)<0) (*wthresh)=wtmax; return(nisum); }
double bmp_inflections_vertical(WILLUSBITMAP *srcgrey,int ndivisions,int delta,int *wthresh) { int y0,y1,ny,i,nw,nisum,ni,wt,wtmax; double *g; char *funcname="bmp_inflections_vertical"; nw=srcgrey->width/ndivisions; y0=srcgrey->height/6; y1=srcgrey->height-y0; ny=y1-y0; willus_dmem_alloc_warn(21,(void **)&g,ny*sizeof(double),funcname,10); wtmax=-1; for (nisum=0,i=0;i<10;i++) { int x0,x1,nx,j; x0=(srcgrey->width-nw)*(i+2)/13; x1=x0+nw; if (x1>srcgrey->width) x1=srcgrey->width; nx=x1-x0; for (j=y0;j<y1;j++) { int k,rsum; unsigned char *p; p=bmp_rowptr_from_top(srcgrey,j)+x0; for (rsum=k=0;k<nx;k++,p++) rsum+=p[0]; g[j-y0]=(double)rsum/nx; } wt=(*wthresh); ni=inflection_count(g,ny,delta,&wt); if ((*wthresh)<0 && ni>=3 && wt>wtmax) wtmax=wt; if (ni>nisum) nisum=ni; } willus_dmem_free(21,&g,funcname); if ((*wthresh)<0) (*wthresh)=wtmax; return(nisum); }
void wrapbmp_flush(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings, int allow_full_justification,int use_bgi) { BMPREGION region; WILLUSBITMAP *bmp8,_bmp8; int gap,just,nomss,dh; int *colcount,*rowcount; static char *funcname="wrapbmp->flush"; WRAPBMP *wrapbmp; // char filename[256]; wrapbmp=&masterinfo->wrapbmp; if (wrapbmp->bmp.width<=0) { if (use_bgi==1 && wrapbmp->beginning_gap_internal > 0) masterinfo_add_gap_src_pixels(masterinfo,k2settings, wrapbmp->beginning_gap_internal,"wrapbmp->bgi0"); wrapbmp->beginning_gap_internal=-1; wrapbmp->last_h5050_internal=-1; if (use_bgi) wrapbmp->just_flushed_internal=1; return; } #ifdef WILLUSDEBUG printf("@wrapbmp->flush()\n"); #endif /* { char filename[256]; int i; static int bcount=0; for (i=0;i<wrapbmp->bmp.height;i++) { unsigned char *p; int j; p=bmp_rowptr_from_top(&wrapbmp->bmp,i); for (j=0;j<wrapbmp->bmp.width;j++) if (p[j]>240) p[j]=192; } sprintf(filename,"out%05d.png",bcount++); bmp_write(wrapbmp,filename,stdout,100); } */ colcount=rowcount=NULL; willus_dmem_alloc_warn(19,(void **)&colcount,(wrapbmp->bmp.width+16)*sizeof(int),funcname,10); willus_dmem_alloc_warn(20,(void **)&rowcount,(wrapbmp->bmp.height+16)*sizeof(int),funcname,10); region.c1=0; region.c2=wrapbmp->bmp.width-1; region.r1=0; region.r2=wrapbmp->bmp.height-1; region.rowbase=wrapbmp->base; region.bmp=&wrapbmp->bmp; region.bgcolor=wrapbmp->bgcolor; region.dpi=k2settings->src_dpi; #ifdef WILLUSDEBUG printf("Bitmap is %d x %d (baseline=%d)\n",wrapbmp->bmp.width,wrapbmp->bmp.height,wrapbmp->base); #endif /* Sanity check on row spacing -- don't let it be too large. */ nomss = wrapbmp->rhmax*1.7; /* Nominal single-spaced height for this row */ if (k2settings->last_rowbase_internal<0) dh = 0; else { dh=(int)(wrapbmp->line_spacing-k2settings->last_rowbase_internal - 1.2*fabs(k2settings->vertical_line_spacing)*nomss +.5); if (k2settings->vertical_line_spacing < 0.) { int dh1; if (wrapbmp->maxgap > 0) dh1 = region.rowbase+1-wrapbmp->rhmax-wrapbmp->maxgap; else dh1=(int)(wrapbmp->line_spacing-k2settings->last_rowbase_internal- 1.2*nomss+.5); if (dh1 > dh) dh =dh1; } } if (dh>0) { #ifdef WILLUSDEBUG aprintf(ANSI_YELLOW "dh > 0 = %d" ANSI_NORMAL "\n",dh); printf(" wrapbmp->line_spacing=%d\n",wrapbmp->line_spacing); printf(" nomss = %d\n",nomss); printf(" vls = %g\n",k2settings->vertical_line_spacing); printf(" lrbi=%d\n",k2settings->last_rowbase_internal); printf(" wrapbmp->maxgap=%d\n",wrapbmp->maxgap); printf(" wrapbmp->rhmax=%d\n",wrapbmp->rhmax); #endif region.r1 = dh; /* if (dh>200) { bmp_write(wrapbmp,"out.png",stdout,100); exit(10); } */ } if (wrapbmp->bmp.bpp==24) { bmp8=&_bmp8; bmp_init(bmp8); bmp_convert_to_greyscale_ex(bmp8,&wrapbmp->bmp); region.bmp8=bmp8; } else region.bmp8=&wrapbmp->bmp; if (k2settings->gap_override_internal > 0) { region.r1=wrapbmp->base-wrapbmp->rhmax+1; if (region.r1<0) region.r1=0; if (region.r1>wrapbmp->base) region.r1=wrapbmp->base; gap=k2settings->gap_override_internal; k2settings->gap_override_internal = -1; } else { if (wrapbmp->height_extended) gap = wrapbmp->gap; else gap = 0; } #ifdef WILLUSDEBUG printf("wf: gap=%d\n",gap); #endif if (gap>0) masterinfo_add_gap_src_pixels(masterinfo,k2settings,gap,"wrapbmp"); if (!allow_full_justification) just = (wrapbmp->just & 0xcf) | 0x20; else just = wrapbmp->just; /* ** For now, set pageinfo=NULL in calls to bmpregion_add because the ** pageinfo processing assumes that the BMPREGION structure it is working ** with is using the original source bitmap, not the wrapbmp bitmap. ** This means that word wrapping can't use the pageinfo structure for now. */ bmpregion_add(®ion,k2settings,NULL,masterinfo,0,0,0,-1.0,just,2, colcount,rowcount,0xf,wrapbmp->bmp.height-1-wrapbmp->base); if (wrapbmp->bmp.bpp==24) bmp_free(bmp8); willus_dmem_free(20,(double **)&rowcount,funcname); willus_dmem_free(19,(double **)&colcount,funcname); wrapbmp->bmp.width=0; wrapbmp->bmp.height=0; wrapbmp->line_spacing=-1; wrapbmp->gap=-1; wrapbmp->rhmax=-1; wrapbmp->thmax=-1; wrapbmp->hyphen.ch=-1; if (use_bgi==1 && wrapbmp->beginning_gap_internal > 0) masterinfo_add_gap_src_pixels(masterinfo,k2settings, wrapbmp->beginning_gap_internal,"wrapbmp->bgi1"); wrapbmp->beginning_gap_internal = -1; wrapbmp->last_h5050_internal = -1; if (use_bgi) wrapbmp->just_flushed_internal=1; }
/* ** Calculate max vert line length. Line is terminated by nw consecutive white pixels ** on either side. ** ** v2.10--handle cbmp 8-bit correctly. */ static int vert_line_erase(WILLUSBITMAP *bmp,WILLUSBITMAP *cbmp,WILLUSBITMAP *tmp, int row0,int col0,double tanth,double minheight_in, /* double minwidth_in,*/ double maxwidth_in,int white_thresh, double dpi,int erase_vertical_lines) { int lw,cc,maxdev,nw,dir,i,n,cbpp; int *c1,*c2,*w; static char *funcname="vert_line_erase"; #if (WILLUSDEBUGX & 0x8000) printf("@vert_line_erase(row0=%d,col0=%d,tanth=%g,minheight_in=%g\n" " maxwidth_in=%g,white_thresh=%d,dpi=%g,evl=%d\n", row0,col0,tanth,minheight_in, maxwidth_in,white_thresh,dpi,erase_vertical_lines); printf(" bmp = %d x %d x %d\n",bmp->width,bmp->height,bmp->bpp); if (cbmp!=NULL) printf(" cbmp = %d x %d x %d\n",cbmp->width,cbmp->height,cbmp->bpp); if (tmp!=NULL) printf(" tmp = %d x %d x %d\n",tmp->width,tmp->height,tmp->bpp); #endif cbpp = (cbmp!=NULL && cbmp->bpp==24) ? 3 : 1; willus_dmem_alloc_warn(26,(void **)&c1,sizeof(int)*3*bmp->height,funcname,10); c2=&c1[bmp->height]; w=&c2[bmp->height]; /* maxdev = (int)((double)bmp->height / minheight_in +.5); if (maxdev < 3) maxdev=3; */ nw = (int)(dpi/100.+.5); if (nw<2) nw=2; maxdev=nw; for (i=0;i<bmp->height;i++) c1[i]=c2[i]=-1; n=0; for (dir=-1;dir<=1;dir+=2) { int del,brc; #if (WILLUSDEBUGX & 0x8000) printf("dir=%d\n",dir); #endif brc = 0; for (del=(dir==-1)?0:1;1;del++) { int r,c; unsigned char *p; #if (WILLUSDEBUGX & 0x8000) printf("del=%d\n",del); #endif r=row0+dir*del; if (r<0 || r>bmp->height-1) break; c=col0+(r-row0)*tanth; if (c<0 || c>bmp->width-1) break; p=bmp_rowptr_from_top(bmp,r); for (i=c;i<=c+maxdev && i<bmp->width;i++) if (p[i]<white_thresh) break; if (i>c+maxdev || i>=bmp->width) { for (i=c-1;i>=c-maxdev && i>=0;i--) if (p[i]<white_thresh) break; if (i<c-maxdev || i<0) { brc++; if (brc>=nw) break; continue; } } brc=0; for (c=i,cc=0;i<bmp->width;i++) if (p[i]<white_thresh) cc=0; else { cc++; if (cc>=nw) break; } c2[r]=i-cc; if (c2[r]>bmp->width-1) c2[r]=bmp->width-1; for (cc=0,i=c;i>=0;i--) if (p[i]<white_thresh) cc=0; else { cc++; if (cc>=nw) break; } c1[r]=i+cc; if (c1[r]<0) c1[r]=0; w[n++]=c2[r]-c1[r]+1; c1[r]-=cc; if (c1[r]<0) c1[r]=0; c2[r]+=cc; if (c2[r]>bmp->width-1) c2[r]=bmp->width-1; } } #if (WILLUSDEBUGX & 0x8000) printf("n=%d\n",n); #endif if (n>1) sorti(w,n); /* printf("n=%d, w[%d]=%d, w[%d]=%d (mw=%g)\n",n,n/4,w[n/4],3*n/4,w[3*n/4],maxwidth_in*dpi); */ if (n < 10 || n < minheight_in*dpi || w[n/4] < 1 /* (int)(minwidth_in*dpi + .5) */ || w[3*n/4] > (int)(maxwidth_in*dpi) || (erase_vertical_lines==1 && w[n-1] > maxwidth_in*dpi)) { #if (WILLUSDEBUGX & 0x8000) printf("Erasing area in temp bitmap.\n"); #endif /* Erase area in temp bitmap */ for (i=0;i<bmp->height;i++) { unsigned char *p; int cmax; if (c1[i]<0 || c2[i]<0) continue; cmax=(c2[i]-c1[i])+1; p=bmp_rowptr_from_top(tmp,i)+c1[i]; for (;cmax>0;cmax--,p++) (*p)=255; } } else { #if (WILLUSDEBUGX & 0x8000) printf("Erasing line width in source\n"); #endif /* Erase line width in source bitmap */ lw=w[3*n/4]+nw*2; #if (WILLUSDEBUGX & 0x8000) printf("1. lw=%d\n",lw); #endif if (lw > maxwidth_in*dpi/2) lw=maxwidth_in*dpi/2; #if (WILLUSDEBUGX & 0x8000) printf("2. lw=%d\n",lw); #endif for (i=0;i<bmp->height;i++) { unsigned char *p; int c0,cmin,cmax,count,white; #if (WILLUSDEBUGX & 0x8000) printf("i=%d\n",i); #endif if (c1[i]<0 || c2[i]<0) continue; c0=col0+(i-row0)*tanth; cmin=c0-lw-1; if (cmin<c1[i]) cmin=c1[i]; cmax=c0+lw+1; if (cmax>c2[i]) cmax=c2[i]; #if (WILLUSDEBUGX & 0x8000) printf("A\n"); #endif p=bmp_rowptr_from_top(bmp,i); c0 = (p[cmin] > p[cmax]) ? cmin : cmax; white=p[c0]; #if (WILLUSDEBUGX & 0x8000) printf("B\n"); #endif if (white <= white_thresh) white = white_thresh+1; if (white>255) white=255; #if (WILLUSDEBUGX & 0x8000) printf("C\n"); #endif count=(cmax-cmin)+1; p=&p[cmin]; #if (WILLUSDEBUGX & 0x8000) printf("D\n"); #endif for (;count>0;count--,p++) (*p)=white; #if (WILLUSDEBUGX & 0x8000) printf("E\n"); #endif if (cbmp!=NULL) { unsigned char *p0; p=bmp_rowptr_from_top(cbmp,i); p0=p+c0*cbpp; p=p+cmin*cbpp; count=(cmax-cmin)+1; #if (WILLUSDEBUGX & 0x8000) printf("F width=%d, ht=%d, bpp=%d, c0=%d, cmin=%d, i=%d, count=%d\n",cbmp->width,cbmp->height,cbmp->bpp,c0,cmin,i,count); #endif if (cbpp==3) for (;count>0;count--,p+=3) { p[0]=p0[0]; p[1]=p0[1]; p[2]=p0[2]; } else memset(p,p0[0],count); #if (WILLUSDEBUGX & 0x8000) printf("G\n"); #endif } } #if (WILLUSDEBUGX & 0x8000) printf(" done.\n"); #endif } willus_dmem_free(26,(double **)&c1,funcname); return(1); }
static int inflection_count(double *x,int n,int delta,int *wthresh) { int i,i0,ni,ww,c,ct,wt,mode; double meandi,meandisq,f1,f2,stdev; double *xs; static int *hist; static char *funcname="inflection_count"; /* Allocate memory for hist[] array rather than using static array */ /* v2.13 fix */ willus_dmem_alloc_warn(34,(void **)&hist,sizeof(int)*256,funcname,10); /* Find threshold white value that peaks must exceed */ if ((*wthresh)<0) { for (i=0;i<256;i++) hist[i]=0; for (i=0;i<n;i++) { i0=floor(x[i]); if (i0>255) i0=255; hist[i0]++; } ct=n*.15; for (c=0,i=255;i>=0;i--) { c+=hist[i]; if (c>ct) break; } wt=i-10; if (wt<192) wt=192; #ifdef DEBUG k2printf("wt=%d\n",wt); #endif (*wthresh)=wt; } else wt=(*wthresh); willus_dmem_free(34,(double **)&hist,funcname); ww=n/150; if (ww<1) ww=1; willus_dmem_alloc_warn(23,(void **)&xs,sizeof(double)*n,funcname,10); for (i=0;i<n-ww;i++) { int j; for (xs[i]=0.,j=0;j<ww;j++,xs[i]+=x[i+j]); xs[i] /= ww; } meandi=meandisq=0.; if (xs[0]<=wt-delta) mode=1; else if (xs[0]>=wt) mode=-1; else mode=0; for (i0=0,ni=0,i=1;i<n-ww;i++) { if (mode==1 && xs[i]>=wt) { if (i0>0) { meandi+=i-i0; meandisq+=(i-i0)*(i-i0); ni++; } i0=i; mode=-1; continue; } if (xs[i]<=wt-delta) mode=1; } stdev = 1.0; /* Avoid compiler warning */ if (ni>0) { meandi /= ni; meandisq /= ni; stdev = sqrt(fabs(meandi*meandi-meandisq)); } f1=meandi/n; if (f1>.15) f1=.15; if (ni>2) { if (stdev/meandi < .05) f2=20.; else f2=meandi/stdev; } else f2=1.; #ifdef DEBUG k2printf(" ni=%3d, f1=%8.4f, f2=%8.4f, f1*f2*ni=%8.4f\n",ni,f1,f2,f1*f2*ni); { static int count=0; FILE *f; int i; f=fopen("inf.ep",count==0?"w":"a"); count++; fprintf(f,"/sa l \"%d\" 1\n",ni); for (i=0;i<n-ww;i++) fprintf(f,"%g\n",xs[i]); fprintf(f,"//nc\n"); fclose(f); } #endif /* DEBUG */ willus_dmem_free(23,&xs,funcname); return(f1*f2*ni); }