/**The resulting image will be 8-bit grayscale and if fVertical is * true, then the image will be numPixels wide and dataLen() high (if * fVertical is false, then the image will be dataLen() wide and * numPixels high). The grayscale values for the profile foreground * and background can be specified with fg and bg, respectively. (fg * specifies the pixels that are part of the profile, bg specifies the * pixels that are NOT part of the profile) */ DImage DProfile::toDImage(int numPixels, bool fVertical, D_uint8 fg, D_uint8 bg){ DImage img; double max; D_uint8 *pdata; img.create(numPixels, len, DImage::DImage_u8); pdata = img.dataPointer_u8(); max = 0.; for(int i = 0; i < len; ++i){ if(rgProf[i] > max) max = rgProf[i]; } // printf("DProfile::toDImage() max=%f\n",max); for(int y = 0, idx = 0; y < len; ++y){ double fillTo; fillTo = rgProf[y] * numPixels / max; for(int x = 0; x < numPixels; ++x, ++idx){ if(x <= fillTo) pdata[idx] = fg; else pdata[idx] = bg; } } if(!fVertical){ DImage imgRot; img.rotate90_(imgRot, -90); return imgRot; } return img; }
/**Each row of img is projected onto the vertical axis. Resulting data length will be equal to the height of img. The profile is a summation of the grayscale values in each row. If fNormalize is true, then each value is divided by img.width() so it is the average grayscale value for the row instead of the sum. If fNormalize is true, the resulting profile values are divided by the image width. */ void DProfile::getImageVerticalProfile(const DImage &img, bool fNormalize){ int w, h; w = img.width(); h = img.height(); // allocate the rgProf array if(NULL == rgProf){ rgProf = (double*)malloc(h * sizeof(double)); D_CHECKPTR(rgProf); len = h; } else{ if(len != h){ rgProf = (double*)realloc(rgProf,h*sizeof(double)); D_CHECKPTR(rgProf); len = h; } } switch(img.getImageType()){ case DImage::DImage_u8: { D_uint8 *pu8; pu8=img.dataPointer_u8(); for(int y = 0, idx=0; y < h; ++y){ rgProf[y] = 0.; for(int x = 0; x < w; ++x, ++idx){ rgProf[y] += pu8[idx]; } if(fNormalize) rgProf[y] /= w; } } break; case DImage::DImage_flt_multi: { float *pflt; if(img.numChannels() > 1){ fprintf(stderr,"DProfile::getImageVerticalProfile() floats only " "supported with a single channel\n"); abort(); } pflt=img.dataPointer_flt(0); for(int y = 0, idx=0; y < h; ++y){ rgProf[y] = 0.; for(int x = 0; x < w; ++x, ++idx){ rgProf[y] += pflt[idx]; } if(fNormalize) rgProf[y] /= w; } } break; default: fprintf(stderr, "Not yet implemented!\n"); abort(); }//end switch(img.getImageType()) }
void convertDImageToMat(DImage& dimg, Mat* mat) { *mat = Mat_<unsigned char>(dimg.height(),dimg.width()); unsigned char* dataD = dimg.dataPointer_u8(); unsigned char* dataM = mat->data; for (int i=0; i< dimg.width() * dimg.height(); i++) { dataM[i]=dataD[i]; } }
Mat DImageToMat(const DImage& src) { Mat img(src.height(),src.width(),CV_8U); unsigned char* data1 = src.dataPointer_u8(); unsigned char* data0 = img.data; for (int i=0; i< src.height() * src.width(); i++) { data0[i]=data1[i]; } return img; }
/**Max runlength in each column of img is projected onto the horizontal axis. Resulting data length will be equal to the width of img. If fNormalize is true, each profile value will be divided by image height, so the value is a fraction of the image height instead of a number of pixels. */ void DProfile::getHorizMaxRunlengthProfile(const DImage &img, D_uint32 rgbVal, bool fNormalize){ int w, h; unsigned int *rgRunlengths; w = img.width(); h = img.height(); // allocate the rgProf array if(NULL == rgProf){ rgProf = (double*)malloc(w * sizeof(double)); D_CHECKPTR(rgProf); len = w; } else{ if(len != w){ rgProf = (double*)realloc(rgProf,w*sizeof(double)); D_CHECKPTR(rgProf); len = w; } } rgRunlengths = (unsigned int*)malloc(sizeof(unsigned int)*w); D_CHECKPTR(rgRunlengths); memset(rgRunlengths, 0, sizeof(unsigned int)*w); memset(rgProf, 0, sizeof(double) * w); switch(img.getImageType()){ case DImage::DImage_u8: { D_uint8 *pu8; pu8=img.dataPointer_u8(); for(int y = 0, idx=0; y < h; ++y){ for(int x = 0; x < w; ++x, ++idx){ if((D_uint8)rgbVal == pu8[idx]){//increment run length for this col ++(rgRunlengths[x]); if(rgRunlengths[x] > rgProf[x]) rgProf[x] = (double)rgRunlengths[x]; } else{ rgRunlengths[x] = 0; } } } if(fNormalize){ for(int x = 0; x < w; ++x) rgProf[x] /= h; } } break; default: fprintf(stderr, "Not yet implemented!\n"); abort(); }//end switch(img.getImageType()) free(rgRunlengths); }
DImage MatToDImage(const Mat& src) { DImage img; img.setLogicalSize(src.cols,src.rows); unsigned char* data1 = img.dataPointer_u8(); unsigned char* data0 = src.data; for (int i=0; i< src.cols * src.rows; i++) { data1[i]=data0[i]; } return img; }
/**Avg runlength in each column of img is projected onto the vertical axis. Resulting data length will be equal to the height of img. If fNormalize is true, each profile value will be divided by image width, so the value is a fraction of the image width instead of a number of pixels. */ void DProfile::getVertAvgRunlengthProfile(const DImage &img, D_uint32 rgbVal, bool fNormalize){ int w, h; unsigned int runLength, numRuns; w = img.width(); h = img.height(); // allocate the rgProf array if(NULL == rgProf){ rgProf = (double*)malloc(h * sizeof(double)); D_CHECKPTR(rgProf); len = h; } else{ if(len != h){ rgProf = (double*)realloc(rgProf,h*sizeof(double)); D_CHECKPTR(rgProf); len = h; } } switch(img.getImageType()){ case DImage::DImage_u8: { D_uint8 *pu8; pu8=img.dataPointer_u8(); for(int y = 0, idx=0; y < h; ++y){ rgProf[y] = 0.; runLength = 0; numRuns = 0; for(int x = 0; x < w; ++x, ++idx){ if((D_uint8)rgbVal == pu8[idx]){//increment run length for this row if(0==runLength) ++numRuns; ++runLength; ++(rgProf[y]); } else{ runLength = 0; } } if(numRuns > 0) rgProf[y] /= numRuns; //(we have sum and need to divide for avg) if(fNormalize) rgProf[y] /= w; } } break; default: fprintf(stderr, "Not yet implemented!\n"); abort(); }//end switch(img.getImageType()) }
/**Each column of img is projected onto the horizontal axis. Resulting data length will be equal to the width of img. If fNormalize is true, the resulting profile values are divided by the image height. */ void DProfile::getImageHorizontalProfile(const DImage &img, bool fNormalize){ int w, h; w = img.width(); h = img.height(); // allocate the rgProf array if(NULL == rgProf){ rgProf = (double*)malloc(w * sizeof(double)); D_CHECKPTR(rgProf); len = w; } else{ if(len != w){ rgProf = (double*)realloc(rgProf,w*sizeof(double)); D_CHECKPTR(rgProf); len = w; } } memset(rgProf, 0, sizeof(double) * w); switch(img.getImageType()){ case DImage::DImage_u8: { D_uint8 *pu8; pu8=img.dataPointer_u8(); for(int y = 0, idx=0; y < h; ++y){ for(int x = 0; x < w; ++x, ++idx){ rgProf[x] += pu8[idx]; } } if(fNormalize){ for(int x = 0; x < w; ++x) rgProf[x] /= h; } } break; default: fprintf(stderr, "Not yet implemented!\n"); abort(); }//end switch(img.getImageType()) }
/**The slant angle is assumed to be between 60 and -45 degrees (0 deg=vertical, * negative values are left-slanted, positive values right-slanted). * To determine slant: at each x-position, the longest runlength at each angle * is found and its squared value is added into the accumulator for that angle. * The histogram is smoothed, and the angle corresponding to the highest value * in the histogram is the returned angle (in degrees). * * Runlengths of less than rlThresh pixels are ignored. * * The image should be black(0) and white(255). The portion of the image * specified by x0,y0 - x1,y1 is considered to be the textline of interest. * If no coordinates are specified, then the entire image is used as the * textline. * * If weight is not NULL, it will be the sum of max runlengths (not squared) at * all 120 angles. Weights are used in determination of weighted average angle * for all textlines in getAllTextlinesSlantAngleDeg() before adjusting angles. * * If rgSlantHist is not NULL, the squared max RL values in the angle histogram * will be copied into the rgSlantHist array. It must already be allocated to * 120*sizeof(unsigned int). * * if imgAngleHist is not NULL, then the image is set to w=120 and h=y1-y0+1. * It is a (grayscale) graphical representation of what is in rgSlantHist. */ double DSlantAngle::getTextlineSlantAngleDeg(DImage &imgBW, int rlThresh, int x0,int y0,int x1,int y1, double *weight, unsigned int *rgSlantHist, DImage *imgAngleHist){ int *rgLineSlantAngles; int lineH; int slantOffset, slantAngle, angle; unsigned int rgSlantSums[120]; unsigned int rgSlantSumsTmp[120]; int runlen, maxrl; /* maximum slant runlen */ double slantDx; int w, h; D_uint8 *p8; double dblWeight = 0; w = imgBW.width(); h = imgBW.height(); p8 = imgBW.dataPointer_u8(); if(-1 == x1) x1 = w-1; if(-1 == y1) y1 = h-1; lineH = y1-y0+1; /* estimate the predominant slant angle (0=vertical, +right, -left) */ slantOffset = (int)(0.5+ (lineH / 2.0) / tan(DMath::degreesToRadians(30.))); for(int j = 0; j < 120; ++j){ rgSlantSums[j] = 0; rgSlantSumsTmp[j] = 0; } for(angle = -45; angle <= 60; angle += 1){ /* at each x-position, sum the maximum run length at that angle into the accumulator */ if(0 == angle) /* vertical, so tangent is infinity */ slantDx = 0.; else slantDx = -1.0 / tan(DMath::degreesToRadians(90-angle)); // for(j = slantOffset; j < (hdr.w-slantOffset); ++j){ for(int j = x0; j <= x1; ++j){ maxrl = 0; runlen = 0; for(int y = 0; y < lineH; ++y){ int x; x = (int)(0.5+ j + y * slantDx); if( (x>=x0) && (x <= x1)){ /* make sure we are within bounds */ int idxtmp; idxtmp = (y+y0)*w+x; // imgCoded[idxtmp*3] = 0; if(0 == p8[idxtmp]){ ++runlen; if(runlen > maxrl){ maxrl = runlen; } } else runlen = 0; } /* end if in bounds */ else{ runlen = 0; /* ignore runs that go off edge of image */ } } if(maxrl > rlThresh){ rgSlantSums[angle+45] += maxrl*maxrl; dblWeight += maxrl; } } /* end for j */ } /* end for angle */ //smooth the histogram rgSlantSumsTmp[0] = (rgSlantSums[0] + rgSlantSums[1]) / 2; for(int aa = 1; aa < 119; ++aa){ rgSlantSumsTmp[aa]=(rgSlantSums[aa-1]+rgSlantSums[aa]+rgSlantSums[aa+1])/3; } for(int aa = 0; aa < 120; ++aa){ rgSlantSums[aa] = rgSlantSumsTmp[aa]; } //use the histogram peak as the slant angle slantAngle = 0; for(angle = -45; angle <= 60; angle += 1){ if(rgSlantSums[angle+45] > rgSlantSums[slantAngle+45]){ slantAngle = angle; } } /* end for angle */ if(NULL != weight) (*weight) = dblWeight; if(NULL != rgSlantHist){ for(int aa = 0; aa < 120; ++aa){ rgSlantHist[aa] = rgSlantSums[aa]; } } if(NULL != imgAngleHist){//debug tool- return an image of the angle histogram //DProfile prof; int max = 0; int htmp; D_uint8 *p8ang; htmp = y1-y0+1; imgAngleHist->create(120,htmp,DImage::DImage_u8); imgAngleHist->clear(); p8ang = imgAngleHist->dataPointer_u8(); for(int i=0; i < 120; ++i){ if(rgSlantSums[i] > max) max = rgSlantSums[i]; } if(0==max) max = 1; // for(int y=0, idx=0; y < htmp; ++y){ // for(int x=0; x < 120; ++x, ++idx){ // if((rgSlantSums[x]/(double)max) >= ((htmp-y)/(double)htmp)) // p8ang[idx] = 0xee; // else // p8ang[idx] = 0x88; // } // } // printf("htmp=%d\n", htmp); for(int x=0; x < 120; ++x){ double pct; pct = 1.-rgSlantSums[x] / (double)max; imgAngleHist->drawLine(x,htmp-1,x,(int)(pct*(htmp-1)), 128); } } return (double)slantAngle; }
void* DSlantAngle::getSlant_thread_func(void *params){ SLANT_THREAD_PARMS *pparms; int numThreads; int w, h; D_uint8 *p8; int runlen, maxrl; /* maximum slant runlen */ double slantDx; int lineH; int slantOffset, slantAngle, angle; double dblWeight; DImage *pimg; int rlThresh; pparms = (SLANT_THREAD_PARMS*)params; numThreads = pparms->numThreads; pimg = pparms->pImgSrc; rlThresh = pparms->rlThresh; w = pimg->width(); h = pimg->height(); p8 = pimg->dataPointer_u8(); for(int i=0; i < 120; ++i) pparms->rgSlantSums[i] = 0; for(int tl=pparms->threadNum; tl < (pparms->numTextlines); tl+=numThreads){ int x0, y0, x1, y1; unsigned int rgSlantSums[120]; x0 = pparms->rgRects[tl].x; y0 = pparms->rgRects[tl].y; x1 = pparms->rgRects[tl].x + pparms->rgRects[tl].w - 1; y1 = pparms->rgRects[tl].y + pparms->rgRects[tl].h - 1; lineH = y1-y0+1; memset(rgSlantSums, 0, sizeof(int)*120); dblWeight = 0.; for(angle = -45; angle <= 60; angle += 1){ /* at each x-position, sum the maximum run length at that angle into the accumulator */ if(0 == angle) /* vertical, so tangent is infinity */ slantDx = 0.; else slantDx = -1.0 / tan(DMath::degreesToRadians(90-angle)); // for(j = slantOffset; j < (hdr.w-slantOffset); ++j){ for(int j = x0; j <= x1; ++j){ maxrl = 0; runlen = 0; for(int y = 0; y < lineH; ++y){ int x; x = (int)(0.5+ j + y * slantDx); if( (x>=x0) && (x <= x1)){ /* make sure we are within bounds */ int idxtmp; idxtmp = (y+y0)*w+x; // imgCoded[idxtmp*3] = 0; if(0 == p8[idxtmp]){ ++runlen; if(runlen > maxrl){ maxrl = runlen; } } else runlen = 0; } /* end if in bounds */ else{ runlen = 0; /* ignore runs that go off edge of image */ } } if(maxrl > rlThresh){ rgSlantSums[angle+45] += maxrl*maxrl; dblWeight += maxrl; } } /* end for j */ } /* end for angle */ for(int i=0; i < 120; ++i) pparms->rgSlantSums[i] += rgSlantSums[i]; if(NULL != (pparms->rgWeights)){ pparms->rgWeights[tl] = dblWeight; } if(NULL != (pparms->rgAngles)){ // need to independently figure out the angle for this particular textline unsigned int rgSlantSumsTmp[120]; //smooth the histogram rgSlantSumsTmp[0] = (rgSlantSums[0] + rgSlantSums[1]) / 2; for(int aa = 1; aa < 119; ++aa){ rgSlantSumsTmp[aa]=(rgSlantSums[aa-1]+rgSlantSums[aa]+rgSlantSums[aa+1])/3; } // for(int aa = 0; aa < 120; ++aa){ // rgSlantSums[aa] = rgSlantSumsTmp[aa]; // } //use the smoothed histogram peak as the slant angle slantAngle = 0; for(angle = -45; angle <= 60; angle += 1){ if(rgSlantSumsTmp[angle+45] > rgSlantSumsTmp[slantAngle+45]){ slantAngle = angle; } } /* end for angle */ pparms->rgAngles[tl] = slantAngle; } } }
/** imgDst will be 2*radius pixels less wide and high than imgSrc * because of the padding that is added before calling this function. * This function requires that imgDst.create() has already been called * with the proper w,h,imgType,etc. */ void DMaxFilter::maxFiltHuang_u8_square(DImage &imgDst, const DImage &imgSrc, int radiusX, int radiusY, int wKern, int hKern, D_uint8 *rgKern, int numKernPxls, DProgress *pProg, int progStart, int progMax, int threadNumber, int numThreads){ int rgHist[256]; int max; unsigned char valTmp; int idxDst; int idx3; D_uint8 *pTmp; // pointer to padded image data int wTmp, hTmp; // width, height of imgSrc int w, h; // width, height of imgDst D_uint8 *pDst; wTmp = imgSrc.width(); hTmp = imgSrc.height(); w = wTmp - radiusX*2; h = hTmp - radiusY*2; pDst = imgDst.dataPointer_u8(); pTmp = imgSrc.dataPointer_u8(); for(int y = threadNumber; y < h; y += numThreads){ // update progress report and check if user cancelled the operation if((NULL != pProg) && (0 == (y & 0x0000003f))){ if(0 != pProg->reportStatus(progStart + y, 0, progMax)){ // the operation has been cancelled pProg->reportStatus(-1, 0, progMax); // report cancel acknowledged return; } } // position window at the beginning of a new row and fill the kernel, hist memset(rgHist, 0, sizeof(int)*256); for(int kr = 0, kidx =0; kr < hKern; ++kr){ for(int kc = 0; kc < wKern; ++kc, ++kidx){ if(rgKern[kidx]){ // pixel is part of the kernel mask ++(rgHist[pTmp[(y+kr)*wTmp+kc]]);//add pixel val to histogram } } } // calculate max for first spot for(max = 255; (max > 0) && (0==rgHist[max]); --max){ // do nothing } // put the max in the spot we're at idxDst = y*w; pDst[idxDst] = (unsigned char)max; // remove pixels from leftmost column idx3 = y*wTmp+radiusX; for(int ky = 0; ky < hKern; ++ky){ valTmp = pTmp[idx3 - wKern]; --(rgHist[valTmp]); if((valTmp==max)&&(0 == rgHist[valTmp])){//update the max for(;(max>0)&&(0==rgHist[max]); --max){ //do nothing } } idx3 += wTmp; } for(int x=1; x < w; ++x){ ++idxDst; // add pixels from the right-hand side of kernel (after moving over one) idx3 = y*wTmp+x+radiusX; for(int ky = 0; ky < hKern; ++ky){ valTmp = pTmp[idx3 + wKern]; if(valTmp > max)//update the max max = valTmp; ++(rgHist[valTmp]); idx3 += wTmp; } // put the max value in the destination pixel pDst[idxDst] = (unsigned char)max; // remove pixels from leftmost column for next time through loop if(x < (w-1)){//don't need to remove left edge if going to a new row idx3 = y*wTmp+x+radiusX; for(int ky = 0; ky < hKern; ++ky){ valTmp = pTmp[idx3 - wKern]; --(rgHist[valTmp]); if((valTmp==max)&&(0 == rgHist[valTmp])){//update the max for(;(max>0)&&(0==rgHist[max]); --max){ //do nothing } } idx3 += wTmp; } // end for(ky... } // end if } // end for (x=1; ... }// end for(y=0... // report progress if(NULL != pProg){ pProg->reportStatus(progStart + h, 0, progMax); } }
/**Computes the projection profile of img onto an axis with angle axisAngDeg. Horizontal and vertical profiles (when axisAngDeg is 0 or 90 degress, respectively) are special-cased for speed by calling getImageVerticalProfile() or getImageHorizontalProfile(). I have seen inconsistent usage of the terms "vertical profile" and "horizontal profile" since some people describe the direction of projection instead of the direction of the axis onto which the image is projected. It seems more common, however, to use the direction of the axis. Therefore, when I say "vertical profile," I mean that the profile length is equal to the height of the image, and the Rth value in the profile array is the sum of the values in the Rth row of the image. Likewise, a horizontal profile has length equal to the image width, and each value is the projection of the corresponding image column onto the horizontal axis. For a horizontal profile, set axisAngDeg to 0. For a vertical profile, set axisAngDeg to 90. Note that angles increase clockwise since the y-coordinate of images increases from top to bottom. For RGB images and multi-channel float or double images, the sum of all channels is used (a single profile is calculated). If this is not what is desired, you could create separate images for each channel and take the profiles seperately. Complex images are not supported directly, so they must be converted into a different type before a profile can be taken. If fInterp is false (default), then the nearest image pixel value is used when the position the profile passes through is between pixels. If fInterp is true, bilinear interpolation is used to estimate the value that should go in the profile.*/ void DProfile::getImageProfile(const DImage &img, double axisAngDeg, bool fNormalize, bool fInterp){ int w, h; double wm1, hm1; int numPixels; double dblSum; double theta; double dblTmp; int alen;//length of anchor segment. The anchor segment is the //segment that passes through the center of the image and //is oriented perpendicular to axisAngDeg. The length of //the profile will be equal to alen. int olen;//length of offset segments. The offset segments are the //integration paths parallel to axisAngDeg (one segment per //profile value. double asx, asy;//start (x,y) of the anchor segment double adx, ady;//deltaX and deltaY of the anchor segment double ax, ay; // current anchor point (x,y along the anchor segment) double osx, osy;//starty x,y offset from ax,ay for an offset segment double odx, ody;//deltaX and deltaY of offset segments double ox, oy; // current offset point (x,y along the offset segment) if(DImage::DImage_cmplx == img.getImageType()){ fprintf(stderr, "DProfile::getImageProfile() does not support complex images\n"); abort(); } // if(((axisAngDeg > -0.00001) && (axisAngDeg < 0.00001)) || // ((axisAngDeg > 179.00001) && (axisAngDeg < 180.00001)) || // ((axisAngDeg > 359.00001) && (axisAngDeg < 360.00001))){ // getImageHorizontalProfile(img, fNormalize); // return; // } // if(((axisAngDeg > 89.00001) && (axisAngDeg < 90.00001)) || // ((axisAngDeg > 269.00001) && (axisAngDeg < 270.00001)) || // ((axisAngDeg < -269.00001) && (axisAngDeg > -270.00001)) || // ((axisAngDeg < -89.00001) && (axisAngDeg > -90.00001))){ // getImageVerticalProfile(img, fNormalize); // return; // } w = img.width(); h = img.height(); wm1 = w-1; hm1 = h-1; theta = DMath::degreesToRadians(axisAngDeg); //figure out how long rgProf should be and how wide the integration should be odx = cos(theta); ody = sin(theta); adx = -1. * ody; ady = odx; olen = (int)(2*DMath::distPointLine(0, 0, w/2., h/2., w/2.+adx, h/2.+ady)); dblTmp = 2*DMath::distPointLine(w, 0, w/2., h/2., w/2.+adx, h/2.+ady); if(dblTmp > olen) olen = (int)dblTmp; alen = (int)(2*DMath::distPointLine(0, 0, w/2., h/2., w/2.+odx, h/2.+ody)); dblTmp = 2*DMath::distPointLine(w, 0, w/2., h/2., w/2.+odx, h/2.+ody); if(dblTmp > alen) alen = (int)dblTmp; // printf("w=%d h=%d\n", w, h); // printf("adx=%.2f ady=%.2f alen=%d\n", adx, ady, alen); // printf("odx=%.2f ody=%.2f olen=%d\n", odx, ody, olen); // allocate the rgProf array if(NULL == rgProf){ rgProf = (double*)malloc(alen * sizeof(double)); D_CHECKPTR(rgProf); len = alen; } else{ if(len != alen){ rgProf = (double*)realloc(rgProf,alen*sizeof(double)); D_CHECKPTR(rgProf); len = alen; } } ax = asx = w/2.-(alen/2*adx); ay = asy = h/2.-(alen/2*ady); osx = -(olen/2*odx); osy = -(olen/2*ody); switch(img.getImageType()){ case DImage::DImage_u8: { D_uint8 *pu8; pu8=img.dataPointer_u8(); if(fInterp){ fprintf(stderr, "WARNING! DProfile::getImageProfile() may be buggy " "when fInterp is true\n"); for(int aa=0; aa < alen; ++aa){ int ix, iy; double w1, w2; //weight of left vs right pixels double w3, w4;//weight of top vs bottom pixels double left, right; int oidx; ox = ax+osx; oy = ay+osy; numPixels = 0; dblSum = 0.; for(int oo=0; oo < olen; ++oo){ if( (ox>=0) && (ox < wm1) && (oy>=0) && (oy<hm1)){ ix = (int)ox; iy = (int)oy; oidx = iy*w + ix; w2 = ox-ix; w1 = 1.-w2; w4 = oy-iy; w3 = 1.-w4; left = (pu8[oidx] * w3 + pu8[oidx+w] * w4); right = (pu8[oidx+1] * w3 + pu8[oidx+w+1] * w4); dblSum += left*w1 + right*w2; ++numPixels; } else if((ox==wm1) || (oy == hm1)){ // don't interpolate on the edges, just approximate ix = (int)ox; iy = (int)oy; oidx = iy*w + ix; dblSum += pu8[oidx]; ++numPixels; } ox += odx; oy += ody; } ax += adx; ay += ady; if(fNormalize && (numPixels>0)) rgProf[aa] = dblSum / numPixels; else rgProf[aa] = dblSum; } } else{ for(int aa=0; aa < alen; ++aa){ int ix, iy; int oidx; ox = ax+osx; oy = ay+osy; numPixels = 0; dblSum = 0.; for(int oo=0; oo < olen; ++oo){ if( (ox>=0) && (ox <w) && (oy>=0) && (oy<h)){ ix = (int)ox; iy = (int)oy; oidx = iy*w + ix; dblSum += pu8[oidx]; ++numPixels; } ox += odx; oy += ody; } ax += adx; ay += ady; if(fNormalize && (numPixels>0)) rgProf[aa] = dblSum / numPixels; else rgProf[aa] = dblSum; } } } break; default: fprintf(stderr, "Not yet implemented!\n"); abort(); }//end switch(img.getImageType()) }
/**This function calculates the vertical profile (the length of * profile will be the same as the height of the image). However, * instead of projecting pixels straight across to the vertical axis, * the projection is taken using lines angled through the middle * (x=width/2) of the image. Linear interpolation is used for * sampling the image pixel values since the y-position on the * projection lines will normally be "between" two pixels for any * given x-position. The function is intended only for angles between * -45 and 45 degrees, since otherwise the slope will be too steep for * the assumption I am making (I am steping through the x-values and * calculating the y-values. If the slope is steeper, I should do the * opposite. As a "to-do," maybe we should just check the angle and * handle the two cases individually). */ void DProfile::getAngledVertProfile(const DImage &img, double ang, int fNormalize){ double m; int x; double y, val; int hm1; double xc; int numPixels; D_uint8 *pimg; int w, h; int initialOffset=0; int *rgYoffsets; double *rgBotWeights; int yTop; int yTopPrev=0; int imglen; if(DImage::DImage_u8 != img.getImageType()){ fprintf(stderr, "DProfile::getAngledVertProfile() currently only supports 8-bit " "grayscale images\n"); abort(); } w=img.width(); h=img.height(); pimg = img.dataPointer_u8(); if(NULL == rgProf){ rgProf = (double*)malloc(h * sizeof(double)); D_CHECKPTR(rgProf); len = h; } else{ if(len != h){ rgProf = (double*)realloc(rgProf,h*sizeof(double)); D_CHECKPTR(rgProf); len = h; } } // memset(rgProf, 0, sizeof(double) * h); rgYoffsets = (int*)malloc(sizeof(int)*w); D_CHECKPTR(rgYoffsets); rgBotWeights = (double*)malloc(sizeof(double)*w); D_CHECKPTR(rgBotWeights); m = 1 * tan(DMath::degreesToRadians(ang)); /* dy per dx=1 */ xc = w / 2.; // initialOffset is the y-offset of first pixel, rgYoffsets[i] for i=1...w-1 // are the relative offsets from rgYoffsets[i-1] of the top pixel // rgBotWeights[i] is the interpolation weight of the bottom pixel, while // 1.-rgBotWeights[i] is the interpolation weight of the top pixel. for(x = 0; x < w; ++x){ y = m * ((double)x - xc); yTop = (int)(floor(y)); rgBotWeights[x] = y - (double)(yTop); if(0 == x){ rgYoffsets[0] = 0; initialOffset = yTop; } else rgYoffsets[x] = (yTop - yTopPrev); yTopPrev = yTop; } hm1 = h - 1; imglen = w * hm1; for(int i = 0; i < h; ++i){ /* profile index */ int idx; rgProf[i] = 0.; numPixels = 0; idx = w * (i + initialOffset); for(x = 0; x < w; ++x, ++idx){ idx += (w*rgYoffsets[x]);// go to next row if needed if((idx <0) || (idx >= imglen)) // out of bounds continue; ++numPixels; val = rgBotWeights[x] * pimg[idx] + (1.-rgBotWeights[x]) * pimg[idx+w]; rgProf[i] += val; } if((numPixels > 0) && fNormalize) rgProf[i] /= numPixels; } free(rgYoffsets); free(rgBotWeights); }
/** Takes profiles of numStrips vertical strips (plus numStrips-1 overlapping strips) and uses them to estimate the avg textline height **/ void DTextlineSeparator::getTextlineRects(DImage &img, int *numTextlines, DRect **rgTextlineRects, int *spacingEst, char *stDebugBaseName){ int w, h; D_uint8 *pu8; DProfile prof; DProfile profSmear;// profile of smeared image char stTmp[1024]; w = img.width(); h = img.height(); pu8 = img.dataPointer_u8(); for(int y=0, idx=0; y < h; ++y){ for(int x=0; x < w; ++x, ++idx){ if((pu8[idx] > 0) && (pu8[idx] < 255)){ fprintf(stderr, "DTextlineSeparator::getTextlineRects() requires " "BINARY image with values of 0 or 255!\n"); exit(1); } } } DProfile profWeightedStrokeDist; int *rgBlackSpacingHist; rgBlackSpacingHist=new int[200]; D_CHECKPTR(rgBlackSpacingHist); memset(rgBlackSpacingHist,0,sizeof(int)*200); int *rgPeakYs; int numPeaks; int *rgValleyYs; int numValleys; rgPeakYs = new int[h]; D_CHECKPTR(rgPeakYs); rgValleyYs = new int[h]; D_CHECKPTR(rgValleyYs); numPeaks = 0; numValleys = 0; { prof.getImageVerticalProfile(img,false); prof.smoothAvg(2); double *pdbl; pdbl = prof.dataPointer(); for(int j=0; j < h; ++j) pdbl[j] /= 255; // now the profile is number of white pixels (was GS prof) unsigned int profMax; profMax = (unsigned int)prof.max(); //use original image to create histogram of horizontal foreground spacing //(distance from black pixel to next black pixel) weighted by profile value //inverse (number of black pixels instead of white pixels) for(int y=2; y < (h-2); ++y){//ignore 2 on each end (smoothing boundaries) int lastBlackX; int runlength; int x; int weight; x=0; lastBlackX = -1; runlength = 0; for(x=0 ; x < w; ++x){ if(pu8[y*w+x] == 0){//black runlength = x - lastBlackX; if((runlength >= 2) && (runlength < 200)){ weight = (int)profMax - (int)pdbl[y];//inverse of profile value at y rgBlackSpacingHist[runlength] += weight; } lastBlackX=x; } } } } //to get the spacing estimate, get the max, then find the next position //that is less than 1/3 of the max. Use that as the estimate to determine //scale int spacingMax; int spacingEstimate; spacingMax = 2; for(int j=3; j<200; ++j){ if(rgBlackSpacingHist[j] > rgBlackSpacingHist[spacingMax]) spacingMax = j; } spacingEstimate = spacingMax; for(int j=spacingMax+1; j < 200; ++j){ if(rgBlackSpacingHist[j] < (rgBlackSpacingHist[spacingMax] / 3)){ spacingEstimate = j; break; } } printf(" spacing estimate = *** %d pixels\n",spacingEstimate); if(NULL != spacingEst){ (*spacingEst) = spacingEstimate; } // now smear the image based on the spacing estimate, then take new profiles DImage imgSmear; D_uint8 *psmear; imgSmear = img; psmear = imgSmear.dataPointer_u8(); for(int y=0; y < h; ++y){ int lastBlackX; int runlength; lastBlackX = w; for(int x=0; x < w; ++x){ if(pu8[y*w+x] == 0){//black runlength = x - lastBlackX; if((runlength < 2*spacingEstimate) && (runlength >0)){ // fill in the white since last black pixel with black for(int xp=lastBlackX+1; xp < x; ++xp){ psmear[(y*w+xp)] = 128; } } lastBlackX = x; } } } sprintf(stTmp,"%s_smear.ppm",stDebugBaseName); imgSmear.save(stTmp); // now recalculate all of the profiles int rgSmearThresh; { double *pdbl; unsigned int profMax; // imgSmear.copy_(imgStrip, stripLeft, 0, stripW, h); profSmear.getImageVerticalProfile(img,false); // invert the profile so black is 255 and white is zero before smoothing pdbl = profSmear.dataPointer(); profMax = (unsigned int)profSmear.max(); for(int y=0; y < h; ++y) pdbl[y] = profMax - pdbl[y]; profSmear.smoothAvg(spacingEstimate*2/3); profMax = (unsigned int)profSmear.max();//new max after smoothing // decide where peak/valleys in profile are { int prevSign = 0; double deriv; double *pdbl; int numZeros = 0; pdbl = profSmear.dataPointer(); //use profile derivative and dist from last peak/valley //to decide where peaks and valleys are for(int y=1; y < (h-1); ++y){ int left, right; right = y + spacingEstimate/2; if(right > h) right = h; left = y - spacingEstimate/2; if(left < 0) left = 0; // deriv = pdbl[y+1] - pdbl[y-1]; deriv = pdbl[right] - pdbl[left]; if(deriv > 0.){//rising if(prevSign <= 0){//valley rgValleyYs[numValleys] = y-numZeros/2;//(middle of plateaus) ++numValleys; } prevSign = 1; numZeros = 0; } else if(deriv < 0.){//falling if(prevSign >= 0){//peak rgPeakYs[numPeaks] = y-numZeros/2;//(middle of plateaus) ++numPeaks; } prevSign = -1; numZeros = 0; } else{ // zero slope ++numZeros; } }//end for(y=... } #if 0 // refine valleys so they are at true minima for(int v=0; v < numValleys; ++v){ bool fRefined = false; int origY; origY = rgValleyYs[v]; for(int offs=1; offs < spacingEstimate/2; ++offs){ int checkY; checkY = rgValleyYs[v]-offs; if(checkY>=0){ if(pdbl[checkY] < pdbl[rgValleyYs[v]]){ rgValleyYs[v] = checkY; fRefined = true; } } checkY = rgValleyYs[v]+offs; if(checkY<h){ if(pdbl[checkY] < pdbl[rgValleyYs[v]]){ rgValleyYs[v] = checkY; fRefined = true; } } } if(fRefined) printf(" >>refined valley%d from y=%d to y=%d\n", v,origY,rgValleyYs[v]); } #endif // #if 0 // // get rid of false peaks (those that have very low prominence) // { // // figure out weighted avg prominence (weight by prominence of each peak) // double sumProm = 0.; // int numProm = 0; // for(int p=0; p < numPeaks; ++p){ // int numSides; // will be 1, 2, or 0 // // double prom = pdbl[rgPeakYs[p] // } // } // // combine peaks that are too close to each other // { // int numPeaksRemoved = 0; // bool fRemoved; // fRemoved = true; // while(fRemoved && (numPeaks>1)){ // fRemoved = false; // int deletePeak=0; // int deleteValley=0; // for(int j=1; j < numPeaks; ++j){ // if(abs(rgPeakYs[j-1]-rgPeakYs[j])<spacingEstimate*2/3){//too close // if(pdbl[rgPeakYs[j]] > pdbl[rgPeakYs[j-1]]){ // printf(" A remove peak %d at y=%d\n",j-1,(int)pdbl[rgPeakYs[j-1]]); // deletePeak = j-1; // } // else{ // printf(" B remove peak %d at y=%d\n",j,(int)pdbl[rgPeakYs[j]]); // deletePeak = j; // } // deleteValley = -1; // if(numValleys > 0){ // if(rgPeakYs[0] < rgValleyYs[0]){//peak was first // deleteValley = deletePeak; // } // else{//valley was first // deleteValley = deletePeak+1; // } // } // //delete the peak // for(int k=deletePeak+1; k < numPeaks; ++k){ // rgPeakYs[k-1] = rgPeakYs[k]; // } // --numPeaks; // //delete the valley (if in range) // if((deleteValley>=0) && (deleteValley < numValleys)){ // for(int k=deleteValley+1; k < numValleys; ++k){ // rgValleyYs[k-1] = rgValleyYs[k]; // } // } // fRemoved = true; // ++numPeaksRemoved; // break; // }//if(abs(... // }//for(int j=... // }//while(fRemoved) // } // {//figure out peak-to-valley topographic prominence threshold // } // #endif } printf("fPeakFirst = %d\n",(int)(rgPeakYs[0] < rgValleyYs[0])); printf("numPeaks=%d numValleys=%d\n", numPeaks, numValleys); for(int p=0; (p < numPeaks) || (p<numValleys); ++p){ printf("\t%d:\t",p); if(p< numPeaks) printf("p%4d\t",rgPeakYs[p]); else printf("p----\t"); if(p< numValleys) printf("v%4d\t",rgValleyYs[p]); else printf("v----\t"); printf("\n"); } (*numTextlines) = numPeaks; (*rgTextlineRects) = new DRect[numPeaks]; D_CHECKPTR((*rgTextlineRects)); bool fPeakFirst; fPeakFirst = rgPeakYs[0] < rgValleyYs[0]; for(int p = 0; p < numPeaks; ++p){ int topIdx, botIdx; if(fPeakFirst){ topIdx = p-1; botIdx = p; } else{ topIdx = p; botIdx = p+1; } (*rgTextlineRects)[p].x = 0; (*rgTextlineRects)[p].w = w-1; if(topIdx < 0) (*rgTextlineRects)[p].y = 0; else if(topIdx >= numValleys){ fprintf(stderr, "This shouldn't happen!(%s:%d)\n", __FILE__, __LINE__); (*rgTextlineRects)[p].y = 0; } else{ (*rgTextlineRects)[p].y = rgValleyYs[topIdx]; } if(botIdx < 0){ fprintf(stderr, "This shouldn't happen!(%s:%d)\n", __FILE__, __LINE__); (*rgTextlineRects)[p].h = h-((*rgTextlineRects)[p].y)-1; } else if(botIdx >= numValleys){ (*rgTextlineRects)[p].h = h-((*rgTextlineRects)[p].y)-1; } else{ (*rgTextlineRects)[p].h = rgValleyYs[botIdx]-((*rgTextlineRects)[p].y)-1; } } // now remove any textlines that seem empty { //avg # of pixels within a textline (weighted by # pxls in that textline) double sumPxls; double sumWeights; sumPxls = 0; sumWeights = 0; long *rgNumPixels; long pxlThresh; rgNumPixels = (long*)calloc(*numTextlines, sizeof(long)); D_CHECKPTR(rgNumPixels); sumPxls = 0; sumWeights = 0; for(int p=0; p < (*numTextlines); ++p){ long numPxls; numPxls = 0; for(int y=(*rgTextlineRects)[p].y; y < ((*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h); ++y){ for(int x=(*rgTextlineRects)[p].x; x < ((*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w); ++x){ if(pu8[y*w+x]==0){//black pixel ++numPxls; } } } printf(" line%d numPxls=%ld\n",p,numPxls); rgNumPixels[p]=numPxls; sumPxls += numPxls * numPxls; sumWeights += numPxls; } printf(" sumPxls=%f sumWeights=%f\n",sumPxls, sumWeights); if(sumWeights > 0) sumPxls /= sumWeights; else sumPxls = 0; printf(" weighted avg number of pixels per line:%f\n",sumPxls); pxlThresh = sumPxls/10; printf(" pixel threshold=%ld\n",pxlThresh); //now get rid of lines with few pixels for(int p=(*numTextlines)-1; p >=0; --p){ if(rgNumPixels[p] < pxlThresh){// one-twentieth of weighted avg printf(" remove textline %d (y=%d to y=%d) with %ld pixels\n",p, (*rgTextlineRects)[p].y,(*rgTextlineRects)[p].y+ (*rgTextlineRects)[p].h, rgNumPixels[p]); for(int r=p; r < ((*numTextlines)-1); ++r){ (*rgTextlineRects)[r] = (*rgTextlineRects)[r+1]; } --(*numTextlines); } } free(rgNumPixels); } printf(" There are now %d textlines\n", (*numTextlines)); //debug: save an image with the textline rectangles drawn { DImage imgTextlines; imgTextlines = img.convertedImgType(DImage::DImage_RGB); for(int p = 0; p < (*numTextlines); ++p){ int colorR, colorG, colorB; printf("\trect%d: x,y wxh=%d,%d %dx%d\n",p,(*rgTextlineRects)[p].x, (*rgTextlineRects)[p].y, (*rgTextlineRects)[p].w,(*rgTextlineRects)[p].h); colorR = ((p+1)*127) % 255; colorG = (p*127) % 255; colorB = (p) % 255; imgTextlines.drawRect((*rgTextlineRects)[p].x,(*rgTextlineRects)[p].y, (*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w-1, (*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h, colorR, colorG, colorB); imgTextlines.drawRect((*rgTextlineRects)[p].x+1,(*rgTextlineRects)[p].y+1, (*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w-1-1, (*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h-1, colorR, colorG, colorB); imgTextlines.drawRect((*rgTextlineRects)[p].x+2,(*rgTextlineRects)[p].y+2, (*rgTextlineRects)[p].x+(*rgTextlineRects)[p].w-1-2, (*rgTextlineRects)[p].y+(*rgTextlineRects)[p].h-2, colorR, colorG, colorB); } sprintf(stTmp,"%s_tl_rects.pgm",stDebugBaseName); imgTextlines.save(stTmp); } // // now get x-height estimate using profiles (or black runlengths of smears) // #if 0 // //debug: save an image with all of the profiles // { // DImage imgProfsAll; // imgProfsAll = prof.toDImage(500,true); // imgProfsAll = imgProfsAll.convertedImgType(DImage::DImage_RGB); // sprintf(stTmp,"%s_allprofs.pgm",stDebugBaseName); // imgProfsAll.save(stTmp); // } // //debug: save an image with all of the smeared profiles // { // DImage imgProfsAll; // imgProfsAll = profSmear.toDImage(500,true); // imgProfsAll = imgProfsAll.convertedImgType(DImage::DImage_RGB); // for(int j=0; j < numPeaks; ++j){ // int ypos; // ypos = rgPeakYs[j]; // imgProfsAll.drawLine(0,ypos,499,ypos,255,0,0); // } // for(int j=0; j < numValleys; ++j){ // int ypos; // ypos = rgValleyYs[j]; // imgProfsAll.drawLine(0,ypos,499,ypos,0,255,0); // } // sprintf(stTmp,"%s_allsmearprofs.pgm",stDebugBaseName); // imgProfsAll.save(stTmp); // } // //debug: save a gnuplot of the histograms of black spacing weighted by profile // // the image has the histogram for each strip followed by the sum histogram // // a value of -10 is placed at positions 0,1 of each histogram as a separator // { // DImage imgSpacingHists; // FILE *fout; // sprintf(stTmp,"%s_spacing_profs.dat",stDebugBaseName); // fout = fopen(stTmp,"wb"); // if(!fout){ // fprintf(stderr, "couldn't open debug file '%s' for output\n",stTmp); // exit(1); // } // for(int j=0; j < 200; ++j){ // int val; // val = rgBlackSpacingHist[j]; // if(j<2) // val = -10; // fprintf(fout,"%d\t%d\n",j, val); // } // fclose(fout); // } // #endif // now at the otsu x-position in the profile, get avg black runlength to // guess at peak (textline) height. // Do the same for white to guess at valley (spacing) height. // After getting it for each strip's profile, take the avg for the whole // page. Use that to determine a smoothing value and a window size for the // transition count map (TCM). (maybe use median instead of avg?) delete [] rgPeakYs; delete [] rgValleyYs; delete [] rgBlackSpacingHist; return; }
/** Takes profiles of numStrips vertical strips (plus numStrips-1 overlapping strips) and uses them to estimate the avg textline height **/ int DTextlineSeparator::estimateAvgHeight2(DImage &imgBinary, int numStrips, char *stDebugBaseName){ int w, h; D_uint8 *pu8; DProfile prof; DProfile *rgProfs;// profiles of overlapping strips of image DProfile *rgProfsRL;//avg white RL profile DProfile *rgProfsSmear;// profiles of overlapping strips of image after smear char stTmp[1024]; int *rgPeakThresh; int *rgPeakThreshRL; double *rgPeakLineOffs; rgProfs = new DProfile[numStrips*2-1]; D_CHECKPTR(rgProfs); rgProfsRL = new DProfile[numStrips*2-1]; D_CHECKPTR(rgProfsRL); rgProfsSmear = new DProfile[numStrips*2-1]; D_CHECKPTR(rgProfsSmear); rgPeakThresh = new int[numStrips*2-1]; D_CHECKPTR(rgPeakThresh); rgPeakThreshRL = new int[numStrips*2-1]; D_CHECKPTR(rgPeakThreshRL); rgPeakLineOffs = new double[numStrips*2-1]; D_CHECKPTR(rgPeakLineOffs); w = imgBinary.width(); h = imgBinary.height(); pu8 = imgBinary.dataPointer_u8(); for(int y=0, idx=0; y < h; ++y){ for(int x=0; x < w; ++x, ++idx){ if((pu8[idx] > 0) && (pu8[idx] < 255)){ fprintf(stderr, "DTextlineSeparator::estimateAvgHeight() requires " "BINARY image with values of 0 or 255!\n"); exit(1); } } } DImage imgStrip; int stripW, stripLeft; DProfile profWeightedStrokeDist; int **rgBlackSpacingHist; rgBlackSpacingHist = new int*[numStrips*2-1]; D_CHECKPTR(rgBlackSpacingHist); rgBlackSpacingHist[0]=new int[200*(numStrips*2-1)]; D_CHECKPTR(rgBlackSpacingHist[0]); memset(rgBlackSpacingHist[0],0,sizeof(int)*200*(numStrips*2-1)); for(int i=1; i < (numStrips*2-1); ++i){ rgBlackSpacingHist[i] = &(rgBlackSpacingHist[i-1][200]);//only 2-199 are valid spacings } int **rgPeakYs; int *rgNumPeaks; int **rgValleyYs; int *rgNumValleys; rgPeakYs = new int*[numStrips*2-1]; D_CHECKPTR(rgPeakYs); rgPeakYs[0] = new int[(numStrips*2-1)*h]; D_CHECKPTR(rgPeakYs[0]); rgValleyYs = new int*[numStrips*2-1]; D_CHECKPTR(rgValleyYs); rgValleyYs[0] = new int[(numStrips*2-1)*h]; D_CHECKPTR(rgValleyYs); for(int i = 1; i < (numStrips*2-1); ++i){ rgPeakYs[i] = &(rgPeakYs[i-1][h]); rgValleyYs[i] = &(rgValleyYs[i-1][h]); } rgNumPeaks = new int[numStrips*2-1]; D_CHECKPTR(rgNumPeaks); rgNumValleys = new int[numStrips*2-1]; D_CHECKPTR(rgNumValleys); for(int i=0; i < (numStrips*2-1); ++i){ rgNumPeaks[i] = 0; rgNumValleys[i] = 0; } stripW = (w + numStrips-1) / numStrips; printf("w=%d h=%d stripW=%d\n",w,h,stripW); for(int i=0; i < numStrips*2-1; ++i){ stripLeft = i * stripW/2; if(i == numStrips*2-2){//last strip may have slightly different width stripW = w - stripLeft - 1; } imgBinary.copy_(imgStrip, stripLeft, 0, stripW, h); rgProfs[i].getImageVerticalProfile(imgStrip,false); rgProfs[i].smoothAvg(2); rgProfsRL[i].getVertAvgRunlengthProfile(imgStrip,0xff,true); rgProfsRL[i].smoothAvg(2); double *pdbl; pdbl = rgProfs[i].dataPointer(); for(int j=0; j < h; ++j) pdbl[j] /= 255; // now the profile is number of white pixels (was GS prof) unsigned int profMax; profMax = (unsigned int)rgProfs[i].max(); //use original image to create histogram of horizontal foreground spacing //(distance from black pixel to next black pixel) weighted by profile value //inverse (number of black pixels instead of white pixels) for(int y=2; y < (h-2); ++y){//ignore 2 on each end (smoothing boundaries) int lastBlackX; int runlength; int x; int weight; x = stripLeft-199; if(x < 0) x=0; lastBlackX = x; runlength = 0; for( ; (x<stripLeft+stripW+199) && (x < w); ++x){ if(pu8[y*w+x] == 0){//black runlength = x - lastBlackX; if((runlength >= 2) && (runlength < 200)){ weight = (int)profMax - (int)pdbl[y];//inverse of profile value at y rgBlackSpacingHist[0/*i*/][runlength] += weight; } lastBlackX=x; } } } //now multiply the values by the avg runlength double *pdblRL; pdblRL = rgProfsRL[i].dataPointer(); // for(int j=0; j < h; ++j) // pdbl[j] *= pdblRL[j]; //now get a histogram of the profile values and use otsu to determine //a threshold between peaks and valleys unsigned int *rgProfHist; double peakThresh; rgProfHist = (unsigned int*)calloc(profMax+1,sizeof(unsigned int)); D_CHECKPTR(rgProfHist); for(int j=0; j < h; ++j) ++(rgProfHist[(int)(pdbl[j])]); peakThresh = DThresholder::getOtsuThreshVal(rgProfHist, profMax+1); rgPeakLineOffs[i] = peakThresh / (double)stripW;//now a fraction of stripW //choose a threshold between peaks and valleys as the thresh that maximizes //how many peaks there are that are between 2 and 200 pixels high // unsigned int max,min; // max = 0; // min = rgProfHist[0]; // for(int j=0; j < stripW; ++j){ // if(rgProfHist[j] > max) // max = rgProfHist[j]; // if(rgProfHist[j] < min) // min = rgProfHist[j]; // } // rgPeakLineOffs[i] = peakThresh / (double)max; // printf("peakThresh=%lf rgPeakLineOffs=%f\n", // peakThresh,rgPeakLineOffs[i]); free(rgProfHist); rgPeakThresh[i] = (int)peakThresh; } //to get the spacing estimate, get the max, then find the next position //that is less than 1/3 of the max. Use that as the estimate to determine //scale int spacingMax; int spacingEstimate; spacingMax = 2; for(int j=3; j<200; ++j){ if(rgBlackSpacingHist[0][j] > rgBlackSpacingHist[0][spacingMax]) spacingMax = j; } spacingEstimate = spacingMax; for(int j=spacingMax+1; j < 200; ++j){ if(rgBlackSpacingHist[0][j] < (rgBlackSpacingHist[0][spacingMax] / 3)){ spacingEstimate = j; break; } } printf(" spacing estimate = *** %d pixels\n",spacingEstimate); // now smear the image based on the spacing estimate, then take new profiles DImage imgSmear; D_uint8 *psmear; imgSmear = imgBinary; psmear = imgSmear.dataPointer_u8(); for(int y=0; y < h; ++y){ int lastBlackX; int runlength; lastBlackX = w; for(int x=0; x < w; ++x){ if(pu8[y*w+x] == 0){//black runlength = x - lastBlackX; if((runlength < 2*spacingEstimate) && (runlength >0)){ // fill in the white since last black pixel with black for(int xp=lastBlackX+1; xp < x; ++xp){ psmear[(y*w+xp)] = 128; } } lastBlackX = x; } } } sprintf(stTmp,"%s_smear.ppm",stDebugBaseName); imgSmear.save(stTmp); // now recalculate all of the profiles stripW = (w + numStrips-1) / numStrips; int *rgSmearThresh; rgSmearThresh = new int[numStrips*2-1]; D_CHECKPTR(rgSmearThresh); for(int i=0; i < numStrips*2-1; ++i){ double *pdbl; unsigned int profMax; stripLeft = i * stripW/2; if(i == numStrips*2-2){//last strip may have slightly different width stripW = w - stripLeft - 1; } // imgSmear.copy_(imgStrip, stripLeft, 0, stripW, h); imgBinary.copy_(imgStrip, stripLeft, 0, stripW, h); rgProfsSmear[i].getImageVerticalProfile(imgStrip,false); // invert the profile so black is 255 and white is zero before smoothing pdbl = rgProfsSmear[i].dataPointer(); profMax = (unsigned int)rgProfsSmear[i].max(); for(int y=0; y < h; ++y) pdbl[y] = profMax - pdbl[y]; rgProfsSmear[i].smoothAvg(spacingEstimate*2/3); profMax = (unsigned int)rgProfsSmear[i].max();//new max after smoothing // decide where peak/valleys in profile are { int prevSign = 0; double deriv; double *pdbl; int numZeros = 0; pdbl = rgProfsSmear[i].dataPointer(); //use profile derivative and dist from last peak/valley //to decide where peaks and valleys are for(int y=1; y < (h-1); ++y){ deriv = pdbl[y+1] - pdbl[y-1]; if(deriv > 0.){//rising if(prevSign <= 0){//valley rgValleyYs[i][rgNumValleys[i]] = y-numZeros/2;//(middle of plateaus) ++(rgNumValleys[i]); } prevSign = 1; numZeros = 0; } else if(deriv < 0.){//falling if(prevSign >= 0){//peak rgPeakYs[i][rgNumPeaks[i]] = y-numZeros/2;//(middle of plateaus) ++(rgNumPeaks[i]); } prevSign = -1; numZeros = 0; } else{ // zero slope ++numZeros; } }//end for(y=... } // combine peaks that are too close to each other { int numPeaksRemoved = 0; bool fRemoved; fRemoved = true; while(fRemoved && (rgNumPeaks[i]>1)){ fRemoved = false; int deletePeak=0; int deleteValley=0; for(int j=1; j < rgNumPeaks[i]; ++j){ if(abs(rgPeakYs[i][j-1]-rgPeakYs[i][j])<spacingEstimate*2/3){//too close if(pdbl[rgPeakYs[i][j]] > pdbl[rgPeakYs[i][j-1]]){ printf(" A remove peak %d at y=%d\n",j-1,(int)pdbl[rgPeakYs[i][j-1]]); deletePeak = j-1; } else{ printf(" B remove peak %d at y=%d\n",j,(int)pdbl[rgPeakYs[i][j]]); deletePeak = j; } deleteValley = -1; if(rgNumValleys[i] > 0){ if(rgPeakYs[i][0] < rgValleyYs[i][0]){//peak was first deleteValley = deletePeak; } else{//valley was first deleteValley = deletePeak+1; } } //delete the peak for(int k=deletePeak+1; k < rgNumPeaks[i]; ++k){ rgPeakYs[i][k-1] = rgPeakYs[i][k]; } --(rgNumPeaks[i]); //delete the valley (if in range) if((deleteValley>=0) && (deleteValley < rgNumValleys[i])){ for(int k=deleteValley+1; k < rgNumValleys[i]; ++k){ rgValleyYs[i][k-1] = rgValleyYs[i][k]; } } fRemoved = true; ++numPeaksRemoved; break; }//if(abs(... }//for(int j=... }//while(fRemoved) } #if 0 rgSmearThresh[i] = 0; //choose threshold that maximizes the number of peaks int bestNumPeaks; int bestNumPeaksThresh; int peaksThresh; int numPeaks; // double peaksProfMax = 0.; // for(int y=spacingEstimate/2-1; y < h-(spacingEstimate/2-1); ++y) // if(pdbl[y] > peaksProfMax) // peaksProfMax = pdbl[y]; bestNumPeaksThresh = 0; bestNumPeaks = 0; numPeaks = 1; for(int peaksThresh = 0; peaksThresh <= profMax; ++peaksThresh){ numPeaks = 0; int fLead = -1; for(int y=0; y < h;++y){ if(fLead>=0){ if((pdbl[y] <= peaksThresh)){ if((y-fLead) >= spacingEstimate/2) ++numPeaks; fLead = -1; } } else{ if(pdbl[y] > peaksThresh) fLead = y; } } if(numPeaks >= bestNumPeaks){ bestNumPeaks = numPeaks; bestNumPeaksThresh = peaksThresh; } } rgSmearThresh[i] = bestNumPeaksThresh; #endif } // now get x-height estimate using profiles (or black runlengths of smears) //debug: save an image with all of the profiles { DImage imgProfsAll; DImage imgProfsRLAll; imgProfsAll.create(w,h,DImage::DImage_u8); stripW = (w + numStrips-1) / numStrips; for(int i=0; i < numStrips*2-1; ++i){ DImage imgTmp; imgTmp = rgProfs[i].toDImage(stripW/2,true); imgProfsAll.pasteFromImage(i*stripW/2,0,imgTmp,0,0,stripW/2,h); } imgProfsAll = imgProfsAll.convertedImgType(DImage::DImage_RGB); for(int i=0; i < numStrips*2-1; ++i){ int peakLineOffs; // peakLineOffs = stripW/2 * rgPeakThresh[i] / rgProfs[i].max(); peakLineOffs = (int)(rgPeakLineOffs[i]*stripW/2); // printf(" rgPeakLineOffs[%d]=%lf peakLineOffs=%d\n",i,rgPeakLineOffs[i], // peakLineOffs); imgProfsAll.drawLine(i*stripW/2 + peakLineOffs+1, 0, i*stripW/2 + peakLineOffs+1, h-1, 255-i,0,0); imgProfsAll.drawLine(i*stripW/2 + peakLineOffs, 0, i*stripW/2 + peakLineOffs, h-1, 255-i,0,0); imgProfsAll.drawLine(i*stripW/2, 0, i*stripW/2, h-1, 0, 255-i,0); } sprintf(stTmp,"%s_allprofs.pgm",stDebugBaseName); imgProfsAll.save(stTmp); } //debug: save an image with all of the smeared profiles { DImage imgProfsAll; DImage imgProfsRLAll; imgProfsAll.create(w,h,DImage::DImage_u8); stripW = (w + numStrips-1) / numStrips; for(int i=0; i < numStrips*2-1; ++i){ DImage imgTmp; imgTmp = rgProfsSmear[i].toDImage(stripW/2,true); imgProfsAll.pasteFromImage(i*stripW/2,0,imgTmp,0,0,stripW/2,h); } imgProfsAll = imgProfsAll.convertedImgType(DImage::DImage_RGB); for(int i=0; i < numStrips*2-1; ++i){ for(int j=0; j < rgNumPeaks[i]; ++j){ int ypos; ypos = rgPeakYs[i][j]; imgProfsAll.drawLine(i*stripW/2,ypos,(i+1)*stripW/2-1,ypos,255,0,0); } for(int j=0; j < rgNumValleys[i]; ++j){ int ypos; ypos = rgValleyYs[i][j]; imgProfsAll.drawLine(i*stripW/2,ypos,(i+1)*stripW/2-1,ypos,0,255,0); } #if 0 int prevSign = 0; int lastPeakY, lastValleyY, lastTurnY; double deriv; double *pdbl; pdbl = rgProfsSmear[i].dataPointer(); lastPeakY = lastValleyY = lastTurnY = 0-spacingEstimate; //use profile derivative and dist from last peak/valley //to decide where peaks and valleys are for(int y=1; y < (h-1); ++y){ deriv = pdbl[y+1] - pdbl[y-1]; if(deriv > 0.){//rising // imgProfsAll.setPixel(i*stripW/2,y,0,255,0); if((prevSign <= 0) && ((y-lastTurnY)>spacingEstimate/2)){//valley imgProfsAll.drawLine(i*stripW/2,y,(i+1)*stripW/2-1,y,0,255,0); lastValleyY = lastTurnY = y; prevSign = 1; } } else if(deriv < 0.){//falling // imgProfsAll.setPixel(i*stripW/2,y,255,0,0); if(prevSign >= 0){ if(((y-lastTurnY)>spacingEstimate/2) || ((lastPeakY>=0)&&(pdbl[y] > pdbl[lastPeakY]))){//peak if((lastPeakY>=0)&&(pdbl[y] > pdbl[lastPeakY])){//correct previous imgProfsAll.drawLine(i*stripW/2,lastPeakY, (i+1)*stripW/2-1,lastPeakY,0,0,0); } imgProfsAll.drawLine(i*stripW/2,y,(i+1)*stripW/2-1,y,255,0,0); lastPeakY = lastTurnY = y; prevSign = -1; } } } else{ // zero slope (do nothing) // imgProfsAll.setPixel(i*stripW/2,y,0,0,0); // do nothing } } // int peakLineOffs; // peakLineOffs = rgSmearThresh[i] * stripW/2 / rgProfsSmear[i].max(); // imgProfsAll.drawLine(i*stripW/2 + peakLineOffs+1, 0, // i*stripW/2 + peakLineOffs+1, h-1, 255-i,0,0); // imgProfsAll.drawLine(i*stripW/2 + peakLineOffs, 0, // i*stripW/2 + peakLineOffs, h-1, 255-i,0,0); // imgProfsAll.drawLine(i*stripW/2, 0, // i*stripW/2, h-1, 0, 255-i,0); #endif } sprintf(stTmp,"%s_allsmearprofs.pgm",stDebugBaseName); imgProfsAll.save(stTmp); } //debug: save an image with all of the RL profiles { DImage imgProfsAll; imgProfsAll.create(w,h,DImage::DImage_u8); stripW = (w + numStrips-1) / numStrips; for(int i=0; i < numStrips*2-1; ++i){ DImage imgTmp; imgTmp = rgProfsRL[i].toDImage(stripW/2,true); imgProfsAll.pasteFromImage(i*stripW/2,0,imgTmp,0,0,stripW/2,h); } imgProfsAll = imgProfsAll.convertedImgType(DImage::DImage_RGB); sprintf(stTmp,"%s_allprofsRL.pgm",stDebugBaseName); imgProfsAll.save(stTmp); } //debug: save a gnuplot of the histograms of black spacing weighted by profile // the image has the histogram for each strip followed by the sum histogram // a value of -10 is placed at positions 0,1 of each histogram as a separator { DImage imgSpacingHists; FILE *fout; sprintf(stTmp,"%s_spacing_profs.dat",stDebugBaseName); fout = fopen(stTmp,"wb"); if(!fout){ fprintf(stderr, "couldn't open debug file '%s' for output\n",stTmp); exit(1); } for(int i=0; i < 1/*numStrips*2-1*/; ++i){ for(int j=0; j < 200; ++j){ int val; val = rgBlackSpacingHist[i][j]; if(j<2) val = -10; fprintf(fout,"%d\t%d\n",i*200+j, val); } } fclose(fout); } // now at the otsu x-position in the profile, get avg black runlength to // guess at peak (textline) height. // Do the same for white to guess at valley (spacing) height. // After getting it for each strip's profile, take the avg for the whole // page. Use that to determine a smoothing value and a window size for the // transition count map (TCM). (maybe use median instead of avg?) delete [] rgPeakYs[0]; delete [] rgPeakYs; delete [] rgNumPeaks; delete [] rgValleyYs[0]; delete [] rgValleyYs; delete [] rgNumValleys; delete [] rgProfs; delete [] rgProfsRL; delete [] rgPeakThresh; delete [] rgPeakThreshRL; delete [] rgPeakLineOffs; delete rgBlackSpacingHist[0]; delete [] rgBlackSpacingHist; // exit(1); // prof.getImageVerticalProfile(imgROI,true); // DImage imgTmp; // imgTmp = prof.toDImage(100,true); // sprintf(stTmp,"%s_prof.pgm",stDebugBaseName); // imgTmp.save(stTmp); // prof.smoothAvg(2); // imgTmp = prof.toDImage(100,true); // sprintf(stTmp,"%s_prof_smooth.pgm",stDebugBaseName); // imgTmp.save(stTmp); // prof.getVertAvgRunlengthProfile(imgROI,0x00,false); // imgTmp = prof.toDImage(100,true); // sprintf(stTmp,"%s_prof_rle.pgm",stDebugBaseName); // imgTmp.save(stTmp); // prof.smoothAvg(2); // imgTmp = prof.toDImage(100,true); // sprintf(stTmp,"%s_prof_rle_smooth.pgm",stDebugBaseName); // imgTmp.save(stTmp); // // find a radiusX that gives a good histogram from the TCM // // (we want the TCM to give responses of about // printf(" *creating TCM histograms...\n");fflush(stdout); // int rgHists[40][256]; // memset(rgHists,0,sizeof(int)*40*256); // for(int rx = 10; rx < 400; rx +=10){ // DImage imgTCM; // D_uint8 *p8; // int max = 0; // int ry; // ry = rx/6; // if(ry < 1) // ry = 1; // DTCM::getImageTCM_(imgTCM, imgROI, rx,ry, false,NULL); // p8 = imgTCM.dataPointer_u8(); // for(int y = 0, idx=0; y < h; ++y){ // for(int x = 0; x < w; ++x,++idx){ // rgHists[rx/10][p8[idx]] += 1; // } // } // rgHists[rx/10][0] = 0; // max = 0; // for(int i=0;i<256;++i) // if(rgHists[rx/10][i] > max) // max =rgHists[rx/10][i]; // for(int i=0;i<256;++i){//scale from 0 to 255 // rgHists[rx/10][i] = rgHists[rx/10][i] * 255 / max; // } // } // //now save the histograms as an image // DImage imgTCMhists; // imgTCMhists.create(256,40,DImage::DImage_u8); // D_uint8 *p8; // p8 = imgTCMhists.dataPointer_u8(); // for(int y=0, idx=0; y < 40; ++y){ // for(int x=0; x < 256; ++x, ++idx){ // p8[idx] = (D_uint8)(rgHists[y][x]); // } // } // sprintf(stTmp, "%s_tcmhist.pgm",stDebugBaseName); // imgTCMhists.save(stTmp); // printf(" *done creating TCM histograms...\n"); // int radiusX, radiusY; // radiusX = imgROI.width() / 20; // if(radiusX < 10) // radiusX = 10; // if(radiusX > 200) // radiusX = 200; // radiusY = radiusX / 5; // // if(radiusY < 2) // radiusY = 2; // printf(" TCM radiusX=%d radiusY=%d\n", radiusX,radiusY); // DTCM::getImageTCM_(imgTmp, imgROI, radiusX,radiusY, false,stDebugBaseName); // // DTCM::getImageTCM_(imgTmp, imgROI, 1,1, false); // // double *rgProf; // // rgProf = prof.dataPointer(); // // for(int i=100; i < 500; ++i){ // // if(rgProf[i] > 0.) // // printf("[%d]=%f ",i, rgProf[i]); // // } // // printf("\n"); return 0; }
///assumes that the image is BINARY with bg=255 int DTextlineSeparator::estimateAvgHeight(DImage &imgBinary, int ROIx0, int ROIy0, int ROIx1, int ROIy1, char *stDebugBaseName){ DImage imgROI; int w, h; D_uint8 *pu8; DProfile prof; if(-1 == ROIx1) ROIx1 = imgBinary.width()-1; if(-1 == ROIy1) ROIy1 = imgBinary.height()-1; imgBinary.copy_(imgROI,ROIx0,ROIy0,ROIx1-ROIx0+1,ROIy1-ROIy0+1); char stTmp[1024]; sprintf(stTmp, "%s_roi.pgm",stDebugBaseName); imgROI.save(stTmp); w = imgROI.width(); h = imgROI.height(); pu8 = imgROI.dataPointer_u8(); for(int y=0, idx=0; y < h; ++y){ for(int x=0; x < w; ++x, ++idx){ if((pu8[idx] > 0) && (pu8[idx] < 255)){ fprintf(stderr, "DTextlineSeparator::estimateAvgHeight() requires " "BINARY image!\n"); exit(1); } } } prof.getImageVerticalProfile(imgROI,true); DImage imgTmp; imgTmp = prof.toDImage(100,true); sprintf(stTmp,"%s_prof.pgm",stDebugBaseName); imgTmp.save(stTmp); prof.smoothAvg(2); imgTmp = prof.toDImage(100,true); sprintf(stTmp,"%s_prof_smooth.pgm",stDebugBaseName); imgTmp.save(stTmp); prof.getVertAvgRunlengthProfile(imgROI,0x00,false); imgTmp = prof.toDImage(100,true); sprintf(stTmp,"%s_prof_rle.pgm",stDebugBaseName); imgTmp.save(stTmp); prof.smoothAvg(2); imgTmp = prof.toDImage(100,true); sprintf(stTmp,"%s_prof_rle_smooth.pgm",stDebugBaseName); imgTmp.save(stTmp); // find a radiusX that gives a good histogram from the TCM // (we want the TCM to give responses of about printf(" *creating TCM histograms...\n");fflush(stdout); int rgHists[40][256]; memset(rgHists,0,sizeof(int)*40*256); for(int rx = 10; rx < 400; rx +=10){ DImage imgTCM; D_uint8 *p8; int max = 0; int ry; ry = rx/6; if(ry < 1) ry = 1; DTCM::getImageTCM_(imgTCM, imgROI, rx,ry, false,NULL); p8 = imgTCM.dataPointer_u8(); for(int y = 0, idx=0; y < h; ++y){ for(int x = 0; x < w; ++x,++idx){ rgHists[rx/10][p8[idx]] += 1; } } rgHists[rx/10][0] = 0; max = 0; for(int i=0;i<256;++i) if(rgHists[rx/10][i] > max) max =rgHists[rx/10][i]; for(int i=0;i<256;++i){//scale from 0 to 255 if (max!=0) rgHists[rx/10][i] = rgHists[rx/10][i] * 255 / max; } } //now save the histograms as an image DImage imgTCMhists; imgTCMhists.create(256,40,DImage::DImage_u8); D_uint8 *p8; p8 = imgTCMhists.dataPointer_u8(); for(int y=0, idx=0; y < 40; ++y){ for(int x=0; x < 256; ++x, ++idx){ p8[idx] = (D_uint8)(rgHists[y][x]); } } sprintf(stTmp, "%s_tcmhist.pgm",stDebugBaseName); imgTCMhists.save(stTmp); printf(" *done creating TCM histograms...\n"); int radiusX, radiusY; radiusX = imgROI.width() / 20; if(radiusX < 10) radiusX = 10; if(radiusX > 200) radiusX = 200; radiusY = radiusX / 5; // if(radiusY < 2) radiusY = 2; printf(" TCM radiusX=%d radiusY=%d\n", radiusX,radiusY); DTCM::getImageTCM_(imgTmp, imgROI, radiusX,radiusY, false,stDebugBaseName); // DTCM::getImageTCM_(imgTmp, imgROI, 1,1, false); // double *rgProf; // rgProf = prof.dataPointer(); // for(int i=100; i < 500; ++i){ // if(rgProf[i] > 0.) // printf("[%d]=%f ",i, rgProf[i]); // } // printf("\n"); return 0; }