void bmp_adjust_contrast(WILLUSBITMAP *src,WILLUSBITMAP *srcgrey, K2PDFOPT_SETTINGS *k2settings,int *white) { int i,j,tries,wc,tc,hist[256]; double contrast,rat0; WILLUSBITMAP *dst,_dst; if (k2settings->debug && k2settings->verbose) k2printf("\nAt adjust_contrast.\n"); if ((*white) <= 0) (*white)=192; /* If contrast_max negative, use it as fixed contrast adjustment. */ if (k2settings->contrast_max < 0.) { bmp_contrast_adjust(srcgrey,srcgrey,-k2settings->contrast_max); if (k2settings->dst_color && fabs(k2settings->contrast_max+1.0)>1e-4) bmp_contrast_adjust(src,src,-k2settings->contrast_max); return; } dst=&_dst; bmp_init(dst); wc=0; /* Avoid compiler warning */ tc=srcgrey->width*srcgrey->height; rat0=0.5; /* Avoid compiler warning */ for (contrast=1.0,tries=0;contrast<k2settings->contrast_max+.01;tries++) { if (fabs(contrast-1.0)>1e-4) bmp_contrast_adjust(dst,srcgrey,contrast); else bmp_copy(dst,srcgrey); /*Get bitmap histogram */ for (i=0;i<256;i++) hist[i]=0; for (j=0;j<dst->height;j++) { unsigned char *p; p=bmp_rowptr_from_top(dst,j); for (i=0;i<dst->width;i++,p++) hist[p[0]]++; } if (tries==0) { int h1; for (h1=0,j=(*white);j<256;j++) h1+=hist[j]; rat0=(double)h1/tc; if (k2settings->debug && k2settings->verbose) k2printf(" rat0 = rat[%d-255]=%.4f\n",(*white),rat0); } /* Find white ratio */ /* for (wc=hist[254],j=253;j>=252;j--) if (hist[j]>wc1) wc1=hist[j]; */ for (wc=0,j=252;j<=255;j++) wc += hist[j]; /* if ((double)wc/tc >= rat0*0.7 && (double)hist[255]/wc > 0.995) break; */ if (k2settings->debug && k2settings->verbose) k2printf(" %2d. Contrast=%7.2f, rat[252-255]/rat0=%.4f\n", tries+1,contrast,(double)wc/tc/rat0); if ((double)wc/tc >= rat0*0.94) break; contrast *= 1.05; } if (k2settings->debug) k2printf("Contrast=%7.2f, rat[252-255]/rat0=%.4f\n", contrast,(double)wc/tc/rat0); /* bmp_write(dst,"outc.png",stdout,100); wfile_written_info("outc.png",stdout); exit(10); */ bmp_copy(srcgrey,dst); /* Maybe don't adjust the contrast for the color bitmap? */ if (k2settings->dst_color && fabs(contrast-1.0)>1e-4) bmp_contrast_adjust(src,src,contrast); bmp_free(dst); }
void masterinfo_publish(MASTERINFO *masterinfo,K2PDFOPT_SETTINGS *k2settings,int flushall) { #ifdef HAVE_MUPDF_LIB static WTEXTCHARS *wtcs=NULL; static WTEXTCHARS _wtcs; /* static int pageno=0; */ #endif WILLUSBITMAP _bmp,*bmp; double bmpdpi; int output_page_count,size_reduction; #ifdef HAVE_OCR_LIB OCRWORDS *ocrwords,_ocrwords; #else void *ocrwords; #endif /* aprintf(ANSI_GREEN "\n @masterinfo_publish(flushall=%d)....\n\n" ANSI_NORMAL,flushall); */ #ifdef HAVE_MUPDF_LIB if (wtcs==NULL) { wtcs=&_wtcs; wtextchars_init(wtcs); } #endif #ifdef HAVE_OCR_LIB if (k2settings->dst_ocr) { ocrwords=&_ocrwords; ocrwords_init(ocrwords); } else #endif ocrwords=NULL; bmp=&_bmp; bmp_init(bmp); output_page_count=0; while (masterinfo_get_next_output_page(masterinfo,k2settings,flushall,bmp, &bmpdpi,&size_reduction,ocrwords)>0) { /* aprintf(ANSI_GREEN "\n SRC PAGE %d\n\n" ANSI_NORMAL,masterinfo->pageinfo.srcpage); */ output_page_count++; if (masterinfo->preview_bitmap!=NULL) { if (ocrwords!=NULL) ocrwords_free(ocrwords); /* Don't really need this, but just for insurance */ if (!k2settings->show_marked_source && abs(k2settings->preview_page)==masterinfo->published_pages) { /* printf("At preview page: bmp = %d x %d x %d, preview(dst) = %d x %d x %d\n", bmp->width,bmp->height,bmp->bpp, masterinfo->preview_bitmap->width,masterinfo->preview_bitmap->height,masterinfo->preview_bitmap->bpp); */ bmp_copy(masterinfo->preview_bitmap,bmp); masterinfo->preview_captured=1; break; } continue; } /* v2.16, outline / bookmark check done in separate function. */ k2publish_outline_check(masterinfo,k2settings,0); /* printf("use_toc=%d, outline=%p, spc=%d, srcpage=%d\n",k2settings->use_toc,masterinfo->outline,masterinfo->outline_srcpage_completed,masterinfo->pageinfo.srcpage); */ /* ** Nothing to do inside loop if using crop boxes -- they all ** get written after all pages have been processed. */ if (k2settings->use_crop_boxes) continue; #ifdef HAVE_OCR_LIB if (k2settings->dst_ocr) { int flags_extra; flags_extra=0; if (k2settings->dst_ocr=='m') { flags_extra=0x20; /* Don't re-sort--messes up the copy/paste flow */ /* if (masterinfo->ocrfilename[0]=='\0') ocrwords_sort_by_pageno(ocrwords); */ /* ** This section no longer needed in v2.20. The text in the ocrword boxes ** has already been determined by k2ocr_ocrwords_get_from_ocrlayer() in k2ocr.c */ /* for (i=0;i<ocrwords->n;i++) { static char *funcname="masterinfo_publish"; if (ocrwords->word[i].pageno != pageno) { wtextchars_clear(wtcs); wtextchars_fill_from_page(wtcs,masterinfo->srcfilename, ocrwords->word[i].pageno,""); wtextchars_rotate_clockwise(wtcs,360-(int)ocrwords->word[i].rot0_deg); pageno=ocrwords->word[i].pageno; } willus_mem_free((double **)&ocrwords->word[i].text,funcname); wtextchars_text_inside(wtcs,&ocrwords->word[i].text, ocrwords->word[i].x0, ocrwords->word[i].y0, ocrwords->word[i].x0+ocrwords->word[i].w0, ocrwords->word[i].y0+ocrwords->word[i].h0); #if (WILLUSDEBUGX & 0x400) printf("MuPDF Word (%5.1f,%5.1f) - (%5.1f,%5.1f) = '%s'\n", ocrwords->word[i].x0, ocrwords->word[i].y0, ocrwords->word[i].x0+ocrwords->word[i].w0, ocrwords->word[i].y0+ocrwords->word[i].h0, ocrwords->word[i].text); #endif if (ocrwords->word[i].text==NULL || ocrwords->word[i].text[0]=='\0') { ocrwords_remove_words(ocrwords,i,i); i--; } } */ } if (masterinfo->ocrfilename[0]!='\0') ocrwords_to_textfile(ocrwords,masterinfo->ocrfilename, masterinfo->published_pages>1); #if (WILLUSDEBUGX & 0x400) printf("Calling pdffile_add_bitmap_with_ocrwords.\n"); #endif #if (WILLUSDEBUGX & 0x10000) if (ocrwords!=NULL) { int k; printf("flags_extra= %d\n",flags_extra); printf("PAGE OF WORDS\n"); for (k=0;k<ocrwords->n;k++) printf("%3d. '%s'\n",k,ocrwords->word[k].text); } #endif pdffile_add_bitmap_with_ocrwords(&masterinfo->outfile,bmp,bmpdpi, k2settings->jpeg_quality,size_reduction, ocrwords,k2settings->dst_ocr_visibility_flags | flags_extra); #if (WILLUSDEBUGX & 0x400) printf("Back from pdffile_add_bitmap_with_ocrwords.\n"); #endif /* { static int count=1; char filename[MAXFILENAMELEN]; sprintf(filename,"page%04d.png",count++); bmp_write(bmp,filename,stdout,100); } */ masterinfo->wordcount += ocrwords->n; ocrwords_free(ocrwords); } else #endif pdffile_add_bitmap(&masterinfo->outfile,bmp,bmpdpi, k2settings->jpeg_quality,size_reduction); } /* ** v2.16 bug fix: If no destination output generated, we still have to call outline_check(). */ if (output_page_count==0) k2publish_outline_check(masterinfo,k2settings,1); bmp_free(bmp); }
/* ** bmp must be grayscale! (cbmp might be color, might be grayscale, can be null) ** Handles cbmp either 8-bit or 24-bit in v2.10. */ void bmp_detect_vertical_lines(WILLUSBITMAP *bmp,WILLUSBITMAP *cbmp, double dpi,/* double minwidth_in, */ double maxwidth_in,double minheight_in,double anglemax_deg, int white_thresh,int erase_vertical_lines,int debug,int verbose) { int tc,iangle,irow,icol; int rowstep,na,angle_sign,ccthresh; int halfwidth,bytewidth; int bs1,nrsteps; double anglestep; WILLUSBITMAP *tmp,_tmp; unsigned char *p0; unsigned char *t0; if (debug) k2printf("At bmp_detect_vertical_lines...\n"); if (!bmp_is_grayscale(bmp)) { k2printf("Internal error. bmp_detect_vertical_lines passed a non-grayscale bitmap.\n"); exit(10); } tmp=&_tmp; bmp_init(tmp); bmp_copy(tmp,bmp); p0=bmp_rowptr_from_top(bmp,0); t0=bmp_rowptr_from_top(tmp,0); bytewidth=bmp_bytewidth(bmp); /* pixmin = (int)(minwidth_in*dpi+.5); if (pixmin<1) pixmin=1; halfwidth=pixmin/4; if (halfwidth<1) halfwidth=1; */ halfwidth=1; anglestep=atan2((double)halfwidth/dpi,minheight_in); na=(int)((anglemax_deg*PI/180.)/anglestep+.5); if (na<1) na=1; rowstep=(int)(dpi/40.+.5); if (rowstep<2) rowstep=2; nrsteps=bmp->height/rowstep; bs1=bytewidth*rowstep; ccthresh=(int)(minheight_in*dpi/rowstep+.5); if (ccthresh<2) ccthresh=2; if (debug && verbose) k2printf(" na = %d, rowstep = %d, ccthresh = %d, white_thresh = %d, nrsteps=%d\n",na,rowstep,ccthresh,white_thresh,nrsteps); /* bmp_write(bmp,"out.png",stdout,97); wfile_written_info("out.png",stdout); */ for (tc=0;tc<100;tc++) { int ccmax,ic0max,ir0max; double tanthmax; ccmax=-1; ic0max=ir0max=0; tanthmax=0.; for (iangle=0;iangle<=na;iangle++) { for (angle_sign=1;angle_sign>=-1;angle_sign-=2) { double th,tanth,tanthx; int ic1,ic2; if (iangle==0 && angle_sign==-1) continue; th=(PI/180.)*iangle*angle_sign*fabs(anglemax_deg)/na; tanth=tan(th); tanthx=tanth*rowstep; if (angle_sign==1) { ic1=-(int)(bmp->height*tanth+1.); ic2=bmp->width-1; } else { ic1=(int)(-bmp->height*tanth+1.); ic2=bmp->width-1+(int)(-bmp->height*tanth+1.); } // k2printf("iangle=%2d, angle_sign=%2d, ic1=%4d, ic2=%4d\n",iangle,angle_sign,ic1,ic2); for (icol=ic1;icol<=ic2;icol++) { unsigned char *p,*t; int cc,ic0,ir0; p=p0; t=t0; if (icol<0 || icol>bmp->width-1) for (irow=0;irow<nrsteps;irow++,p+=bs1,t+=bs1) { int ic; ic=icol+irow*tanthx; if (ic>=0 && ic<bmp->width) break; } else irow=0; for (ir0=ic0=cc=0;irow<nrsteps;irow++,p+=bs1,t+=bs1) { int ic; ic=icol+irow*tanthx; if (ic<0 || ic>=bmp->width) break; if ((p[ic]<white_thresh || p[ic+bytewidth]<white_thresh) && (t[ic]<white_thresh || t[ic+bytewidth]<white_thresh)) { if (cc==0) { ic0=ic; ir0=irow*rowstep; } cc++; if (cc>ccmax) { ccmax=cc; tanthmax=tanth; ic0max=ic0; ir0max=ir0; } } else cc=0; } } } } if (ccmax<ccthresh) break; if (debug) k2printf(" Vert line detected: ccmax=%d (pix=%d), tanthmax=%g, ic0max=%d, ir0max=%d\n",ccmax,ccmax*rowstep,tanthmax,ic0max,ir0max); if (!vert_line_erase(bmp,cbmp,tmp,ir0max,ic0max,tanthmax,minheight_in, /*minwidth_in,*/ maxwidth_in,white_thresh,dpi,erase_vertical_lines)) break; } /* bmp_write(tmp,"outt.png",stdout,95); wfile_written_info("outt.png",stdout); bmp_write(bmp,"out2.png",stdout,95); wfile_written_info("out2.png",stdout); exit(10); */ /* v2.20--fix memory leak here */ bmp_free(tmp); }
/* ** k2pdfopt_proc_one() is the main source file processing function in k2pdfopt. ** ** Depending on the value of rot_deg, it either determines the correct rotation of ** the passed file, or it processes it and converts it. ** ** The basic idea is to parse the source document into rectangular regions ** (held in the BMPREGION structures) and then to place these regions into ** the master destination bitmap (kept track of in MASTERINFO structure). ** You can think of this bitmap as a sort of "infinitely scrolling" output ** bitmap which is then cut into output pages. ** ** The bmpregion_source_page_add() function parses the source file. ** ** The masterinfo_publish() cuts the output bitmap into destination pages. ** ** If rot_deg == SRCROT_AUTO, then the rotation correction of the source ** file is computed and returned, but no other processing is done. ** ** Otherwise, the source file is processed. */ static double k2pdfopt_proc_one(K2PDFOPT_SETTINGS *k2settings0,char *filename,double rot_deg, K2PDFOPT_OUTPUT *k2out) { static K2PDFOPT_SETTINGS _k2settings,*k2settings; static MASTERINFO _masterinfo,*masterinfo; static PDFFILE _mpdf,*mpdf; char dstfile[MAXFILENAMELEN]; char markedfile[MAXFILENAMELEN]; char rotstr[128]; WILLUSBITMAP _src,*src; WILLUSBITMAP _srcgrey,*srcgrey; WILLUSBITMAP _marked,*marked; WILLUSBITMAP preview_internal; int i,status,pw,np,src_type,second_time_through,or_detect,orep_detect,preview; int pagecount,pagestep,pages_done,local_tocwrites; int errcnt,pixwarn; FILELIST *fl,_fl; int folder,dpi; double size,bormean; char *mupdffilename; extern int k2mark_page_count; static char *funcname="k2pdfopt_proc_one"; static char *readerr=TTEXT_WARN "\a\n ** ERROR reading page %d from " TTEXT_BOLD2 "%s" TTEXT_WARN ".\n\n" TTEXT_NORMAL; static char *readlimit=TTEXT_WARN "\a\n ** (No more read errors will be echoed for file %s.)\n\n" TTEXT_NORMAL; #ifdef HAVE_MUPDF_LIB static char *mupdferr_trygs=TTEXT_WARN "\a\n ** ERROR reading from " TTEXT_BOLD2 "%s" TTEXT_WARN "using MuPDF. Trying Ghostscript...\n\n" TTEXT_NORMAL; #endif /* extern void willus_mem_debug_update(char *); */ #if (WILLUSDEBUGX & 1) printf("@k2pdfopt_proc_one(%s)\n",filename); #endif /* printf("@k2pdfopt_proc_one(filename='%s', rot_deg=%g, preview_bitmap=%p)\n",filename,rot_deg,k2out->bmp); */ local_tocwrites=0; k2out->status = 1; k2settings=&_k2settings; k2pdfopt_settings_copy(k2settings,k2settings0); #ifdef HAVE_K2GUI if (k2gui_active()) k2gui_cbox_set_filename(filename); #endif mpdf=&_mpdf; /* Must be called once per conversion to init margins / devsize / output size */ k2pdfopt_settings_sanity_check(k2settings); k2pdfopt_settings_new_source_document_init(k2settings); errcnt=0; pixwarn=0; mupdffilename=_masterinfo.srcfilename; strncpy(mupdffilename,filename,MAXFILENAMELEN-1); mupdffilename[MAXFILENAMELEN-1]='\0'; or_detect=OR_DETECT(rot_deg); orep_detect=OREP_DETECT(k2settings); if ((fabs(k2settings->src_rot-SRCROT_AUTO)<.5 || orep_detect) && !or_detect) second_time_through=1; else second_time_through=0; /* Don't care about rotation if just echoing page count */ if (k2settings->echo_source_page_count && second_time_through==0) return(0.); if (or_detect && k2settings->src_dpi>300) dpi=300; else dpi=k2settings->src_dpi; folder=(wfile_status(filename)==2); /* if (folder && !second_time_through) k2printf("Processing " TTEXT_INPUT "BITMAP FOLDER %s" TTEXT_NORMAL "...\n", filename); */ /* else k2printf("Processing " TTEXT_BOLD2 "PDF FILE %s" TTEXT_NORMAL "...\n", filename); */ fl=&_fl; filelist_init(fl); if (folder) { char basename[MAXFILENAMELEN]; static char *iolist[]={"*.png","*.jpg",""}; static char *eolist[]={""}; wfile_basespec(basename,filename); if (!second_time_through) k2printf("Searching folder " TTEXT_BOLD2 "%s" TTEXT_NORMAL " ... ",basename); fflush(stdout); filelist_fill_from_disk(fl,filename,iolist,eolist,0,0); if (fl->n<=0) { if (!second_time_through) k2printf(TTEXT_WARN "\n** No bitmaps found in folder %s.\n\n" TTEXT_NORMAL,filename); k2out->status=2; return(0.); } if (!second_time_through) k2printf("%d bitmaps found in %s.\n",(int)fl->n,filename); filelist_sort_by_name(fl); } src=&_src; srcgrey=&_srcgrey; marked=&_marked; bmp_init(src); bmp_init(srcgrey); bmp_init(marked); pw=0; /* ** Determine source type */ if (folder) src_type = SRC_TYPE_BITMAPFOLDER; else if (!stricmp(wfile_ext(filename),"pdf")) src_type = SRC_TYPE_PDF; else if (!stricmp(wfile_ext(filename),"djvu")) src_type = SRC_TYPE_DJVU; else if (!stricmp(wfile_ext(filename),"djv")) src_type = SRC_TYPE_DJVU; else if (!stricmp(wfile_ext(filename),"ps")) src_type = SRC_TYPE_PS; else if (!stricmp(wfile_ext(filename),"eps")) src_type = SRC_TYPE_PS; else src_type = SRC_TYPE_OTHER; #ifndef HAVE_DJVU_LIB if (src_type==SRC_TYPE_DJVU) { if (!or_detect) k2printf(TTEXT_WARN "\a\n\n** DjVuLibre not compiled into this version of k2pdfopt. **\n\n" "** Cannot process file %s. **\n\n" TTEXT_NORMAL,filename); k2out->status=3; return(0.); } #endif if (src_type==SRC_TYPE_PS) k2settings->usegs=1; /* ** Turn off native PDF output if source is not PDF */ if (src_type!=SRC_TYPE_PDF) { if (k2settings->use_crop_boxes && !or_detect) k2printf(TTEXT_WARN "\n** Native PDF output mode turned off on file %s. **\n" "** (It is not a PDF file.) **\n\n",filename); k2settings->use_crop_boxes=0; #ifdef HAVE_OCR_LIB if (k2settings->dst_ocr=='m') k2settings->dst_ocr=0; #endif } masterinfo=&_masterinfo; masterinfo_init(masterinfo,k2settings); if (k2settings->preview_page!=0 && !or_detect) { preview=1; if (k2out->bmp!=NULL) masterinfo->preview_bitmap=k2out->bmp; else { masterinfo->preview_bitmap=&preview_internal; bmp_init(masterinfo->preview_bitmap); } } else preview=0; if (!or_detect && !preview) { static int dstfilecount=0; wfile_newext(dstfile,filename,""); dstfilecount++; filename_substitute(dstfile,k2settings->dst_opname_format,filename,dstfilecount,"pdf"); #ifdef HAVE_OCR_LIB if (k2settings->ocrout[0]!='\0' && k2settings->dst_ocr) filename_substitute(masterinfo->ocrfilename,k2settings->ocrout,filename,dstfilecount,"txt"); else #endif masterinfo->ocrfilename[0]='\0'; if (!filename_comp(dstfile,filename)) { k2printf(TTEXT_WARN "\n\aSource file and ouput file have the same name!" TTEXT_NORMAL "\n\n"); k2printf(" Source file = '%s'\n",filename); k2printf(" Output file = '%s'\n",dstfile); k2printf(" Output file name format string = '%s'\n",k2settings->dst_opname_format); k2printf("\nOperation aborted.\n"); k2sys_exit(k2settings,50); } if ((status=overwrite_fail(dstfile,k2settings->overwrite_minsize_mb))!=0) { masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); if (status<0) k2sys_exit(k2settings,20); k2out->status=4; return(0.); } { int can_write; if (!k2settings->use_crop_boxes) can_write = (pdffile_init(&masterinfo->outfile,dstfile,1)!=NULL); else { FILE *f1; f1 = wfile_fopen_utf8(dstfile,"w"); can_write = (f1!=NULL); if (f1!=NULL) { fclose(f1); wfile_remove_utf8(dstfile); } if (!can_write) { k2printf(TTEXT_WARN "\n\aCannot open PDF file %s for output!" TTEXT_NORMAL "\n\n",dstfile); #ifdef HAVE_K2GUI if (k2gui_active()) { k2gui_okay("Failed to open output file", "Cannot open PDF file %s for output!\n" "Maybe another application has it open already?\n" "Conversion failed!",dstfile); k2out->status=4; return(0.); } #endif k2sys_exit(k2settings,30); } } } k2out->outname=NULL; /* Return output file name in k2out for GUI */ willus_mem_alloc((double **)&k2out->outname,(long)(strlen(dstfile)+1),funcname); if (k2out->outname!=NULL) strcpy(k2out->outname,dstfile); if (k2settings->use_crop_boxes) pdffile_close(&masterinfo->outfile); if (k2settings->show_marked_source) { filename_substitute(markedfile,"%s_marked",filename,0,"pdf"); if (pdffile_init(mpdf,markedfile,1)==NULL) { k2printf(TTEXT_WARN "\n\aCannot open PDF file %s for marked output!" TTEXT_NORMAL "\n\n",markedfile); k2sys_exit(k2settings,40); } } } if (src_type==SRC_TYPE_PDF || src_type==SRC_TYPE_DJVU) { wsys_set_decimal_period(1); #ifdef HAVE_MUPDF_LIB if (src_type==SRC_TYPE_PDF) { np=wmupdf_numpages(mupdffilename); #ifdef HAVE_WIN32_API if (np<0) { int ns; ns=wsys_filename_8dot3(mupdffilename,filename,MAXFILENAMELEN-1); if (ns>0 && stricmp(filename,mupdffilename)) np=wmupdf_numpages(mupdffilename); else strcpy(mupdffilename,filename); } #endif /* Get bookmarks / outline from PDF file */ if (!or_detect && k2settings->use_toc!=0 && !toclist_valid(k2settings->toclist,NULL)) { masterinfo->outline=wpdfoutline_read_from_pdf_file(mupdffilename); /* Save TOC if requested */ if (k2settings->tocsavefile[0]!='\0') { FILE *f; f=fopen(k2settings->tocsavefile,tocwrites==0?"w":"a"); if (f!=NULL) { int i; fprintf(f,"%sFILE: %s\n",tocwrites==0?"":"\n\n",mupdffilename); for (i=strlen(mupdffilename)+6;i>0;i--) fputc('-',f); fprintf(f,"\n"); if (masterinfo->outline!=NULL) wpdfoutline_echo2(masterinfo->outline,0,f); else fprintf(f,"(No outline info in file.)\n"); fclose(f); tocwrites++; local_tocwrites++; } } } } else #endif #ifdef HAVE_DJVU_LIB if (src_type==SRC_TYPE_DJVU) np=bmpdjvu_numpages(filename); else #endif np=-1; wsys_set_decimal_period(1); #ifdef HAVE_MUPDF_LIB if (np==-1 && (k2settings->usegs<=0) && src_type==SRC_TYPE_PDF) { k2printf(mupdferr_trygs,filename); if (k2settings->usegs==0) k2settings->usegs=1; } #endif #ifdef HAVE_Z_LIB if (np<=0 && src_type==SRC_TYPE_PDF) np=pdf_numpages(filename); #endif } else if (src_type==SRC_TYPE_BITMAPFOLDER) np=fl->n; else np=-1; if (k2settings->echo_source_page_count) { printf("\"%s\" page count = %d\n",mupdffilename,np); masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); return(0.); } masterinfo->srcpages = np; if (!or_detect && toclist_valid(k2settings->toclist,stdout)) { if (pagelist_valid_page_range(k2settings->toclist)) masterinfo->outline=wpdfoutline_from_pagelist(k2settings->toclist,masterinfo->srcpages); else masterinfo->outline=wpdfoutline_read_from_text_file(k2settings->toclist); } pagecount = np<0 ? -1 : pagelist_count(k2settings->pagelist,np); #ifdef HAVE_K2GUI if (k2gui_active()) { k2gui_cbox_set_num_pages(pagecount<0 ? 1 : pagecount); k2gui_cbox_set_pages_completed(0,NULL); } #endif if (pagecount<0 || !or_detect) pagestep=1; else { pagestep=pagecount/10; if (pagestep<1) pagestep=1; } pages_done=0; if (np>0 && pagecount==0) { if (!second_time_through) k2printf("\a\n" TTEXT_WARN "No %ss to convert (-p %s)!" TTEXT_NORMAL "\n\n", folder?"file":"page",k2settings->pagelist); masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); k2out->status=5; return(0.); } if (!second_time_through) { k2printf("Reading "); if (pagecount>0) { if (pagecount<np) k2printf("%d out of %d %s%s",pagecount,np,folder?"file":"page",np>1?"s":""); else k2printf("%d %s%s",np,folder?"file":"page",np>1?"s":""); } else k2printf("%ss",folder?"file":"page"); k2printf(" from " TTEXT_BOLD2 "%s" TTEXT_NORMAL " ...\n",filename); } if (or_detect) k2printf("\nDetecting document orientation ... "); bormean=1.0; for (i=0;1;i+=pagestep) { char bmpfile[MAXFILENAMELEN]; int pageno; /* sprintf(bmpfile,"i=%d",i); willus_mem_debug_update(bmpfile); */ pageno=0; if (pagecount>0 && i+1>pagecount) break; pageno = pagelist_page_by_index(k2settings->pagelist,i,np); if (!pagelist_page_by_index(k2settings->pagelist,pageno,np)) continue; if (folder) { if (pageno-1>=fl->n) continue; wfile_fullname(bmpfile,fl->dir,fl->entry[pageno-1].name); status=bmp_read(src,bmpfile,stdout); if (status<0) { if (!second_time_through) k2printf(TTEXT_WARN "\n\aCould not read file %s.\n" TTEXT_NORMAL,bmpfile); continue; } } else { double npix; /* If not a PDF/DJVU/PS file, only read it once. */ if (i>0 && src_type!=SRC_TYPE_PDF && src_type!=SRC_TYPE_DJVU && src_type!=SRC_TYPE_PS) break; /* Pre-read at low dpi to check bitmap size */ wsys_set_decimal_period(1); status=bmp_get_one_document_page(src,k2settings,src_type,mupdffilename,pageno,10.,8, stdout); wsys_set_decimal_period(1); if (status<0) { errcnt++; if (errcnt<=10) { k2printf(readerr,pageno,filename); if (errcnt==10) k2printf(readlimit,filename); } /* Error reading PS probably means we've run out of pages. */ if (src_type==SRC_TYPE_PS) break; continue; } /* Sanity check the bitmap size */ npix = (double)(dpi/10.)*(dpi/10.)*src->width*src->height; if (npix > 2.5e8 && !pixwarn) { int ww,hh; ww=(int)((double)(dpi/10.)*src->width+.5); hh=(int)((double)(dpi/10.)*src->height+.5); k2printf("\a\n" TTEXT_WARN "\n\a ** Source resolution is very high (%d x %d pixels)!\n" " You may want to reduce the -odpi or -idpi setting!\n" " k2pdfopt may crash when reading the source file..." TTEXT_NORMAL "\n\n",ww,hh); pixwarn=1; } /* Read again at nominal source dpi */ wsys_set_decimal_period(1); if (k2settings->dst_color) status=bmp_get_one_document_page(src,k2settings,src_type,mupdffilename,pageno, dpi,24,stdout); else status=bmp_get_one_document_page(src,k2settings,src_type,mupdffilename,pageno, dpi,8,stdout); wsys_set_decimal_period(1); if (status<0) { errcnt++; if (errcnt<=10) { k2printf(readerr,pageno,filename); if (errcnt==10) aprintf(readlimit,filename); } /* Error reading PS probably means we've run out of pages. */ if (src_type==SRC_TYPE_PS) break; continue; } } k2mark_page_count = i+1; { BMPREGION region; /* Got Good Page Render */ bmpregion_init(®ion); if (masterinfo_new_source_page_init(masterinfo,k2settings,src,srcgrey,marked, ®ion,rot_deg,&bormean,rotstr,pageno,stdout)==0) { /* v2.15 -- memory leak fix */ bmpregion_free(®ion); pages_done++; continue; } k2printf("\n" TTEXT_HEADER "SOURCE PAGE %d",pageno); if (pagecount>0) { if (k2settings->pagelist[0]!='\0') k2printf(" (%d of %d)",pages_done+1,pagecount); else k2printf(" of %d",pagecount); } k2printf(TTEXT_NORMAL " (%.1f x %.1f in) ... %s",(double)srcgrey->width/k2settings->src_dpi, (double)srcgrey->height/k2settings->src_dpi,rotstr); fflush(stdout); /* Parse the source bitmap for viewable regions */ bmpregion_source_page_add(®ion,k2settings,masterinfo,1,pages_done++); /* v2.15 memory leak fix */ bmpregion_free(®ion); } /* End declaration of BMPREGION region */ #ifdef HAVE_K2GUI if (k2gui_active()) k2gui_cbox_set_pages_completed(pages_done,NULL); #endif if (k2settings->verbose) { k2printf(" master->rows=%d\n",masterinfo->rows); k2printf("Publishing...\n"); } /* Reset the display order for this source page */ if (k2settings->show_marked_source) mark_source_page(k2settings,masterinfo,NULL,0,0xf); /* ** v2.10 Call masterinfo_publish() no matter what. If we've just kicked out a ** page, it doesn't matter. It will do nothing. */ masterinfo_publish(masterinfo,k2settings, masterinfo_should_flush(masterinfo,k2settings)); if (preview && k2_handle_preview(k2settings,masterinfo,k2mark_page_count, k2settings->dst_color?marked:src,k2out)) { bmp_free(marked); bmp_free(srcgrey); bmp_free(src); masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); k2out->status=0; return(0.); } if (k2settings->show_marked_source && !preview) publish_marked_page(mpdf,k2settings->dst_color ? marked : src,k2settings->src_dpi); k2printf("%d new pages saved.\n",masterinfo->published_pages-pw); pw=masterinfo->published_pages; } /* willus_mem_debug_update("End"); */ /* Didn't find the preview page yet--push out final page. */ if (preview) { masterinfo_flush(masterinfo,k2settings); if (!k2_handle_preview(k2settings,masterinfo,k2mark_page_count, k2settings->dst_color?marked:src,k2out)) { /* No preview bitmap--return zero-width bitmap */ if (k2out->bmp==NULL) bmp_free(masterinfo->preview_bitmap); else k2out->bmp->width=0; } bmp_free(marked); bmp_free(srcgrey); bmp_free(src); masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); k2out->status=0; return(0.); } bmp_free(marked); bmp_free(srcgrey); bmp_free(src); /* Determine orientation of document */ if (or_detect) { if (pages_done>0) { double thresh; /* ** bormean = 1.0 means neutral ** bormean >> 1.0 means document is likely portrait (no rotation necessary) ** bormean << 1.0 means document is likely landscape (need to rotate it) */ bormean = pow(bormean,1./pages_done); thresh=10.-(double)pages_done/2.; if (thresh<5.) thresh=5.; if (bormean < 1./thresh) { k2printf("Rotating clockwise.\n"); masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); k2out->status=0; return(270.); } } k2printf("No rotation necessary.\n"); masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); k2out->status=0; return(0.); } /* ** v2.10 -- Calling masterinfo_flush() without checking if a page has just been ** been flushed is fine at the end. If there is nothing left ** in the master output bitmap, it won't do anything. */ /* if (k2settings->dst_break_pages<=0 && !k2settings_gap_override(k2settings)) */ masterinfo_flush(masterinfo,k2settings); { char cdate[128],author[256],title[256]; #ifdef HAVE_MUPDF_LIB if (src_type==SRC_TYPE_PDF) { if (wmupdf_info_field(mupdffilename,"Author",author,255)<0) author[0]='\0'; if (wmupdf_info_field(mupdffilename,"CreationDate",cdate,127)<0) cdate[0]='\0'; if (wmupdf_info_field(mupdffilename,"Title",title,255)<0) title[0]='\0'; } else #endif author[0]=title[0]=cdate[0]='\0'; if (!k2settings->use_crop_boxes) { if (masterinfo->outline!=NULL) { if (k2settings->debug) wpdfoutline_echo(masterinfo->outline,1,1,stdout); pdffile_add_outline(&masterinfo->outfile,masterinfo->outline); } pdffile_finish(&masterinfo->outfile,title,author,masterinfo->pageinfo.producer,cdate); pdffile_close(&masterinfo->outfile); } else { /* Re-write PDF file using crop boxes */ #if (WILLUSDEBUGX & 64) wpdfboxes_echo(&masterinfo->pageinfo.boxes,stdout); #endif #ifdef HAVE_MUPDF_LIB /* v2.20 bug fix -- need to compensate for document_scale_factor if its not 1.0 */ wpdfpageinfo_scale_source_boxes(&masterinfo->pageinfo,1./k2settings->document_scale_factor); wmupdf_remake_pdf(mupdffilename,dstfile,&masterinfo->pageinfo,1,masterinfo->outline,stdout); #endif } if (k2settings->show_marked_source) { pdffile_finish(mpdf,title,author,masterinfo->pageinfo.producer,cdate); pdffile_close(mpdf); } } // cdate, author, title selection if (k2settings->debug || k2settings->verbose) k2printf("Cleaning up ...\n\n"); /* if (folder) k2printf("Processing on " TTEXT_INPUT "folder %s" TTEXT_NORMAL " complete. Total %d pages.\n\n",filename,masterinfo->published_pages); else k2printf("Processing on " TTEXT_BOLD2 "file %s" TTEXT_NORMAL " complete. Total %d pages.\n\n",filename,masterinfo->published_pages); */ size=wfile_size(dstfile); k2printf("\n" TTEXT_BOLD "%d pages" TTEXT_NORMAL,masterinfo->published_pages); if (masterinfo->wordcount>0) k2printf(" (%d words)",masterinfo->wordcount); k2printf(" written to " TTEXT_MAGENTA "%s" TTEXT_NORMAL " (%.1f MB).\n\n", dstfile,size/1024./1024.); #ifdef HAVE_GHOSTSCRIPT if (k2settings->ppgs) gs_postprocess(dstfile); #endif if (k2settings->show_marked_source) { size=wfile_size(markedfile); k2printf(TTEXT_BOLD "%d pages" TTEXT_NORMAL " written to " TTEXT_MAGENTA "%s" TTEXT_NORMAL " (%.1f MB).\n\n",pages_done,markedfile,size/1024./1024.); } #ifdef HAVE_OCR_LIB if (k2settings->dst_ocr && masterinfo->ocrfilename[0]!='\0' && wfile_status(masterinfo->ocrfilename)==1) { size=wfile_size(masterinfo->ocrfilename); k2printf(TTEXT_BOLD "%d words" TTEXT_NORMAL " written to " TTEXT_MAGENTA "%s" TTEXT_NORMAL " (%.1f MB).\n\n",masterinfo->wordcount,masterinfo->ocrfilename,size/1024./1024.); } #endif if (local_tocwrites>0) k2printf(TTEXT_BOLD "%d bytes" TTEXT_NORMAL " written to " TTEXT_MAGENTA "%s" TTEXT_NORMAL ".\n\n",(int)(wfile_size(k2settings->tocsavefile)+.5),k2settings->tocsavefile); masterinfo_free(masterinfo,k2settings); if (folder) filelist_free(fl); k2out->status=0; return(0.); }