/** Takes profiles of numStrips vertical strips (plus numStrips-1 overlapping strips) and uses them to estimate the avg textline height **/ void DTextlineSeparator::getTextlineRects(DImage &img, int *numTextlines, DRect **rgTextlineRects, int *spacingEst, char *stDebugBaseName){ int w, h; D_uint8 *pu8; DProfile prof; DProfile profSmear;// profile of smeared image char stTmp[1024]; w = img.width(); h = img.height(); pu8 = img.dataPointer_u8(); for(int y=0, idx=0; y < h; ++y){ for(int x=0; x < w; ++x, ++idx){ if((pu8[idx] > 0) && (pu8[idx] < 255)){ fprintf(stderr, "DTextlineSeparator::getTextlineRects() requires " "BINARY image with values of 0 or 255!\n"); exit(1); } } } DProfile profWeightedStrokeDist; int *rgBlackSpacingHist; rgBlackSpacingHist=new int[200]; D_CHECKPTR(rgBlackSpacingHist); memset(rgBlackSpacingHist,0,sizeof(int)*200); int *rgPeakYs; int numPeaks; int *rgValleyYs; int numValleys; rgPeakYs = new int[h]; D_CHECKPTR(rgPeakYs); rgValleyYs = new int[h]; D_CHECKPTR(rgValleyYs); numPeaks = 0; numValleys = 0; { prof.getImageVerticalProfile(img,false); prof.smoothAvg(2); double *pdbl; pdbl = prof.dataPointer(); for(int j=0; j < h; ++j) pdbl[j] /= 255; // now the profile is number of white pixels (was GS prof) unsigned int profMax; profMax = (unsigned int)prof.max(); //use original image to create histogram of horizontal foreground spacing //(distance from black pixel to next black pixel) weighted by profile value //inverse (number of black pixels instead of white pixels) for(int y=2; y < (h-2); ++y){//ignore 2 on each end (smoothing boundaries) int lastBlackX; int runlength; int x; int weight; x=0; lastBlackX = -1; runlength = 0; for(x=0 ; x < w; ++x){ if(pu8[y*w+x] == 0){//black runlength = x - lastBlackX; if((runlength >= 2) && (runlength < 200)){ weight = (int)profMax - (int)pdbl[y];//inverse of profile value at y rgBlackSpacingHist[runlength] += weight; } lastBlackX=x; } } } } //to get the spacing estimate, get the max, then find the next position //that is less than 1/3 of the max. Use that as the estimate to determine //scale int spacingMax; int spacingEstimate; spacingMax = 2; for(int j=3; j<200; ++j){ if(rgBlackSpacingHist[j] > rgBlackSpacingHist[spacingMax]) spacingMax = j; } spacingEstimate = spacingMax; for(int j=spacingMax+1; j < 200; ++j){ if(rgBlackSpacingHist[j] < (rgBlackSpacingHist[spacingMax] / 3)){ spacingEstimate = j; break; } } printf(" spacing estimate = *** %d pixels\n",spacingEstimate); if(NULL != spacingEst){ (*spacingEst) = spacingEstimate; } // now smear the image based on the spacing estimate, then take new profiles DImage imgSmear; D_uint8 *psmear; imgSmear = img; psmear = imgSmear.dataPointer_u8(); for(int y=0; y < h; ++y){ int lastBlackX; int runlength; lastBlackX = w; for(int x=0; x < w; ++x){ if(pu8[y*w+x] == 0){//black runlength = x - lastBlackX; if((runlength < 2*spacingEstimate) && (runlength >0)){ // fill in the white since last black pixel with black for(int xp=lastBlackX+1; xp < x; ++xp){ psmear[(y*w+xp)] = 128; } } lastBlackX = x; } } } sprintf(stTmp,"%s_smear.ppm",stDebugBaseName); imgSmear.save(stTmp); // now recalculate all of the profiles int rgSmearThresh; { double *pdbl; unsigned int profMax; // imgSmear.copy_(imgStrip, stripLeft, 0, stripW, h); profSmear.getImageVerticalProfile(img,false); // invert the profile so black is 255 and white is zero before smoothing pdbl = profSmear.dataPointer(); profMax = (unsigned int)profSmear.max(); for(int y=0; y < h; ++y) pdbl[y] = profMax - pdbl[y]; profSmear.smoothAvg(spacingEstimate*2/3); profMax = (unsigned int)profSmear.max();//new max after smoothing // decide where peak/valleys in profile are { int prevSign = 0; double deriv; double *pdbl; int numZeros = 0; pdbl = profSmear.dataPointer(); //use profile derivative and dist from last peak/valley //to decide where peaks and valleys are for(int y=1; y < (h-1); ++y){ int left, right; right = y + spacingEstimate/2; if(right > h) right = h; left = y - spacingEstimate/2; if(left < 0) left = 0; // deriv = pdbl[y+1] - pdbl[y-1]; deriv = pdbl[right] - pdbl[left]; if(deriv > 0.){//rising if(prevSign <= 0){//valley rgValleyYs[numValleys] = y-numZeros/2;//(middle of plateaus) ++numValleys; } prevSign = 1; numZeros = 0; } else if(deriv < 0.){//falling if(prevSign >= 0){//peak rgPeakYs[numPeaks] = y-numZeros/2;//(middle of plateaus) ++numPeaks; } prevSign = -1; numZeros = 0; } else{ // zero slope ++numZeros; } }//end for(y=... } #if 0 // refine valleys so they are at true minima for(int v=0; v < numValleys; ++v){ bool fRefined = false; int origY; origY = rgValleyYs[v]; for(int offs=1; offs < spacingEstimate/2; ++offs){ int checkY; checkY = rgValleyYs[v]-offs; if(checkY>=0){ if(pdbl[checkY] < pdbl[rgValleyYs[v]]){ rgValleyYs[v] = checkY; fRefined = true; } } checkY = rgValleyYs[v]+offs; if(checkY<h){ if(pdbl[checkY] < pdbl[rgValleyYs[v]]){ rgValleyYs[v] = checkY; fRefined = true; } } } if(fRefined) printf(" >>refined valley%d from y=%d to y=%d\n", v,origY,rgValleyYs[v]); } #endif // #if 0 // // get rid of false peaks (those that have very low prominence) // { // // figure out weighted avg prominence (weight by prominence of each peak) // double sumProm = 0.; // int numProm = 0; // for(int p=0; p < numPeaks; ++p){ // int numSides; // will be 1, 2, or 0 // // double prom = pdbl[rgPeakYs[p] // } // } // // combine peaks that are too close to each other // { // int numPeaksRemoved = 0; // bool fRemoved; // fRemoved = true; // while(fRemoved && (numPeaks>1)){ // fRemoved = false; // int deletePeak=0; // int deleteValley=0; // for(int j=1; j < numPeaks; ++j){ // if(abs(rgPeakYs[j-1]-rgPeakYs[j])<spacingEstimate*2/3){//too close // if(pdbl[rgPeakYs[j]] > pdbl[rgPeakYs[j-1]]){ // printf(" A remove peak %d at y=%d\n",j-1,(int)pdbl[rgPeakYs[j-1]]); // deletePeak = j-1; // } // else{ // printf(" B remove peak %d at y=%d\n",j,(int)pdbl[rgPeakYs[j]]); // deletePeak = j; // } // deleteValley = -1; // if(numValleys > 0){ // if(rgPeakYs[0] < rgValleyYs[0]){//peak was first // deleteValley = deletePeak; // } // else{//valley was first // deleteValley = deletePeak+1; // } // } // //delete the peak // for(int k=deletePeak+1; k < numPeaks; ++k){ // rgPeakYs[k-1] = rgPeakYs[k]; // } // --numPeaks; // //delete the valley (if in range) // if((deleteValley>=0) && (deleteValley < numValleys)){ // for(int k=deleteValley+1; k < numValleys; ++k){ // rgValleyYs[k-1] = rgValleyYs[k]; // } // } // fRemoved = true; // ++numPeaksRemoved; // break; // }//if(abs(... // }//for(int j=... // }//while(fRemoved) // } // {//figure out peak-to-valley topographic prominence threshold // } // #endif } printf("fPeakFirst = %d\n",(int)(rgPeakYs[0] < rgValleyYs[0])); printf("numPeaks=%d numValleys=%d\n", numPeaks, numValleys); for(int p=0; (p < numPeaks) || (p<numValleys); ++p){ printf("\t%d:\t",p); if(p< numPeaks) printf("p%4d\t",rgPeakYs[p]); else printf("p----\t"); if(p< numValleys) printf("v%4d\t",rgValleyYs[p]); else printf("v----\t"); printf("\n"); } (*numTextlines) = numPeaks; (*rgTextlineRects) = new DRect[numPeaks]; D_CHECKPTR((*rgTextlineRects)); bool fPeakFirst; fPeakFirst = rgPeakYs[0] < rgValleyYs[0]; for(int p = 0; p < numPeaks; ++p){ int topIdx, botIdx; if(fPeakFirst){ topIdx = p-1; botIdx = p; } else{ topIdx = p; botIdx = p+1; } (*rgTextlineRects)[p].x = 0; (*rgTextlineRects)[p].w = w-1; if(topIdx < 0) (*rgTextlineRects)[p].y = 0; else if(topIdx >= numValleys){ fprintf(stderr, "This shouldn't happen!(%s:%d)\n", __FILE__, __LINE__); (*rgTextlineRects)[p].y = 0; } else{ (*rgTextlineRects)[p].y = rgValleyYs[topIdx]; } if(botIdx < 0){ fprintf(stderr, "This shouldn't happen!(%s:%d)\n", __FILE__, __LINE__); (*rgTextlineRects)[p].h = h-((*rgTextlineRects)[p].y)-1; } else if(botIdx >= numValleys){ (*rgTextlineRects)[p].h = h-((*rgTextlineRects)[p].y)-1; } else{ (*rgTextlineRects)[p].h = rgValleyYs[botIdx]-((*rgTextlineRects)[p].y)-1; } } // now remove any textlines that seem empty { //avg # of pixels within a textline (weighted by # pxls in that textline) double sumPxls; double sumWeights; sumPxls = 0; sumWeights = 0; long *rgNumPixels; long pxlThresh; rgNumPixels = (long*)calloc(*numTextlines, sizeof(long)); D_CHECKPTR(rgNumPixels); sumPxls = 0; sumWeights = 0; for(int p=0; p < (*numTextlines); ++p){ long numPxls; numPxls = 0; for(int y=(*rgTextlineRects)[p].y; y < ((*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h); ++y){ for(int x=(*rgTextlineRects)[p].x; x < ((*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w); ++x){ if(pu8[y*w+x]==0){//black pixel ++numPxls; } } } printf(" line%d numPxls=%ld\n",p,numPxls); rgNumPixels[p]=numPxls; sumPxls += numPxls * numPxls; sumWeights += numPxls; } printf(" sumPxls=%f sumWeights=%f\n",sumPxls, sumWeights); if(sumWeights > 0) sumPxls /= sumWeights; else sumPxls = 0; printf(" weighted avg number of pixels per line:%f\n",sumPxls); pxlThresh = sumPxls/10; printf(" pixel threshold=%ld\n",pxlThresh); //now get rid of lines with few pixels for(int p=(*numTextlines)-1; p >=0; --p){ if(rgNumPixels[p] < pxlThresh){// one-twentieth of weighted avg printf(" remove textline %d (y=%d to y=%d) with %ld pixels\n",p, (*rgTextlineRects)[p].y,(*rgTextlineRects)[p].y+ (*rgTextlineRects)[p].h, rgNumPixels[p]); for(int r=p; r < ((*numTextlines)-1); ++r){ (*rgTextlineRects)[r] = (*rgTextlineRects)[r+1]; } --(*numTextlines); } } free(rgNumPixels); } printf(" There are now %d textlines\n", (*numTextlines)); //debug: save an image with the textline rectangles drawn { DImage imgTextlines; imgTextlines = img.convertedImgType(DImage::DImage_RGB); for(int p = 0; p < (*numTextlines); ++p){ int colorR, colorG, colorB; printf("\trect%d: x,y wxh=%d,%d %dx%d\n",p,(*rgTextlineRects)[p].x, (*rgTextlineRects)[p].y, (*rgTextlineRects)[p].w,(*rgTextlineRects)[p].h); colorR = ((p+1)*127) % 255; colorG = (p*127) % 255; colorB = (p) % 255; imgTextlines.drawRect((*rgTextlineRects)[p].x,(*rgTextlineRects)[p].y, (*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w-1, (*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h, colorR, colorG, colorB); imgTextlines.drawRect((*rgTextlineRects)[p].x+1,(*rgTextlineRects)[p].y+1, (*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w-1-1, (*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h-1, colorR, colorG, colorB); imgTextlines.drawRect((*rgTextlineRects)[p].x+2,(*rgTextlineRects)[p].y+2, (*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w-1-2, (*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h-2, colorR, colorG, colorB); } sprintf(stTmp,"%s_tl_rects.pgm",stDebugBaseName); imgTextlines.save(stTmp); } // // now get x-height estimate using profiles (or black runlengths of smears) // #if 0 // //debug: save an image with all of the profiles // { // DImage imgProfsAll; // imgProfsAll = prof.toDImage(500,true); // imgProfsAll = imgProfsAll.convertedImgType(DImage::DImage_RGB); // sprintf(stTmp,"%s_allprofs.pgm",stDebugBaseName); // imgProfsAll.save(stTmp); // } // //debug: save an image with all of the smeared profiles // { // DImage imgProfsAll; // imgProfsAll = profSmear.toDImage(500,true); // imgProfsAll = imgProfsAll.convertedImgType(DImage::DImage_RGB); // for(int j=0; j < numPeaks; ++j){ // int ypos; // ypos = rgPeakYs[j]; // imgProfsAll.drawLine(0,ypos,499,ypos,255,0,0); // } // for(int j=0; j < numValleys; ++j){ // int ypos; // ypos = rgValleyYs[j]; // imgProfsAll.drawLine(0,ypos,499,ypos,0,255,0); // } // sprintf(stTmp,"%s_allsmearprofs.pgm",stDebugBaseName); // imgProfsAll.save(stTmp); // } // //debug: save a gnuplot of the histograms of black spacing weighted by profile // // the image has the histogram for each strip followed by the sum histogram // // a value of -10 is placed at positions 0,1 of each histogram as a separator // { // DImage imgSpacingHists; // FILE *fout; // sprintf(stTmp,"%s_spacing_profs.dat",stDebugBaseName); // fout = fopen(stTmp,"wb"); // if(!fout){ // fprintf(stderr, "couldn't open debug file '%s' for output\n",stTmp); // exit(1); // } // for(int j=0; j < 200; ++j){ // int val; // val = rgBlackSpacingHist[j]; // if(j<2) // val = -10; // fprintf(fout,"%d\t%d\n",j, val); // } // fclose(fout); // } // #endif // now at the otsu x-position in the profile, get avg black runlength to // guess at peak (textline) height. // Do the same for white to guess at valley (spacing) height. // After getting it for each strip's profile, take the avg for the whole // page. Use that to determine a smoothing value and a window size for the // transition count map (TCM). (maybe use median instead of avg?) delete [] rgPeakYs; delete [] rgValleyYs; delete [] rgBlackSpacingHist; return; }