void rectangleDetection(IplImage* inputImage,IplImage* srcImage,CvRect iRect,int iColor,vector<ShapeRecResult> &v)//p1为前行位,p2为左转位 { const int iWidth = inputImage->width; const int iHeight = inputImage->height; //水平和竖直状态 //bool VerticalReturnStatus = false; //bool HorzReturnStatus=false; //横向检测框 int HorzRectHeight=(iRect.width+iRect.height)/2 + 6; int HorzRectWidth=3*(HorzRectHeight-4)+3; int HorzRectX1=0, HorzRectY1=0; int HorzRectX2=0, HorzRectY2=0; //thresholding for graylevel differences between seedpoints and its neibours const int grayThresholding =80;//70 const int RatioThreshold = 55;//检测框中黑色像素所占比例 //纵向检测框 //int iDrawRectWidth = (iRect.width+iRect.height)/2 + 6; int iDrawRectWidth = (iRect.width + iRect.height)/2 *5/ 3; //int iDrawRectHeight = 3*(iDrawRectWidth-4)+6; int iDrawRectHeight; int iDrawRectX1=0, iDrawRectY1=0; int iDrawRectX2=0, iDrawRectY2=0; if(iColor==RED_PIXEL_LABEL){ iDrawRectHeight = iDrawRectWidth * 7 / 3; iDrawRectY1 = iRect.y - iDrawRectWidth /4; HorzRectX1= iRect.x-3; ///iDrawRectHeight = iDrawRectWidth * 7 / 3; } else if(iColor == GREEN_PIXEL_LABEL){ iDrawRectHeight = iDrawRectWidth * 8 / 3; iDrawRectY1 = iRect.y-iDrawRectHeight/3*2; HorzRectX1=iRect.x-HorzRectWidth/3*2; } //竖直检测窗设置 iDrawRectY2 = iDrawRectY1 + iDrawRectHeight; iDrawRectX1 = iRect.x - iDrawRectWidth/5; iDrawRectX2 = iDrawRectX1 + iDrawRectWidth; //水平检测框设置 HorzRectX2= HorzRectX1+HorzRectWidth; HorzRectY1= iRect.y-3; HorzRectY2= HorzRectY1+HorzRectHeight; if(HorzRectX1<0 || HorzRectY1<0 || HorzRectX2>=iWidth || HorzRectY2>=iHeight) { //cvReleaseImage(&imageGrayScale);//when return the result, the image must be released, otherwise,the memory will be leaked return; } if( iDrawRectX1<0 || iDrawRectY1<0 || iDrawRectX2>=iWidth || iDrawRectY2>=iHeight) { //cvReleaseImage(&imageGrayScale);//when return the result, the image must be released, otherwise,the memory will be leaked return; } //竖直方向统计黑色像素比例 CvRect VerticalRect; VerticalRect.x=iDrawRectX1; VerticalRect.y=iDrawRectY1; VerticalRect.width=iDrawRectWidth; VerticalRect.height=iDrawRectHeight; IplImage*VerticalLight = cvCreateImage(cvSize(iDrawRectWidth,iDrawRectHeight),srcImage->depth,srcImage->nChannels); GetImageRect(srcImage,VerticalRect,VerticalLight); IplImage *VerticalGrayLight=cvCreateImage(cvSize(iDrawRectWidth,iDrawRectHeight),IPL_DEPTH_8U,1); cvCvtColor(VerticalLight,VerticalGrayLight,CV_BGR2GRAY); cvThreshold(VerticalGrayLight,VerticalGrayLight,0,255,CV_THRESH_OTSU); //get the other two blocks black ration (vertical) bool verticalBlackLimit = checkOtherBlocksBlackRatio(VerticalGrayLight, iColor,true); ShapeRecResult TLbox; //若检测出的矩形框符合条件,则将坚持到的矩形框放入v中,在外面统一显示 //if(VerticalBlackRatio>=RatioThreshold&&VerticalBlackRatio<=93) //if (verticalBlackLimit&&isTL(srcImage, VerticalRect, true)){ if (verticalBlackLimit == true && isTL(srcImage, VerticalRect, true)) { TLbox.box = VerticalRect; //TLbox.shape = 1;//表示竖向 if (iColor == GREEN_PIXEL_LABEL) { //cvRectangle(srcImage,cvPoint(iDrawRectX1,iDrawRectY1),cvPoint(iDrawRectX2,iDrawRectY2),cvScalar(0,255,0),2); TLbox.color = GREEN_PIXEL_LABEL; v.push_back(TLbox); } else if (iColor == RED_PIXEL_LABEL) { //cvRectangle(srcImage,cvPoint(iDrawRectX1,iDrawRectY1),cvPoint(iDrawRectX2,iDrawRectY2),cvScalar(0,0,255),2); TLbox.color = RED_PIXEL_LABEL; //识别信号灯指向 int result = RecognizeLight(srcImage, iRect); switch (result) { case 0://圆形 TLbox.shape = 0; break; case 1://禁止左转 TLbox.shape = 1; break; case 2://前行箭头 TLbox.shape = 0; break; default: break; } v.push_back(TLbox); } } else{ //水平方向统计黑色像素比例 CvRect HorzRect; HorzRect.x = HorzRectX1; HorzRect.y = HorzRectY1; HorzRect.width = HorzRectWidth; HorzRect.height = HorzRectHeight; IplImage*HorzLight = cvCreateImage(cvSize(HorzRectWidth, HorzRectHeight), srcImage->depth, srcImage->nChannels); GetImageRect(srcImage, HorzRect, HorzLight); IplImage *HorzGrayLight = cvCreateImage(cvSize(HorzRectWidth, HorzRectHeight), IPL_DEPTH_8U, 1); cvCvtColor(HorzLight, HorzGrayLight, CV_BGR2GRAY); cvThreshold(HorzGrayLight, HorzGrayLight, 0, 255, CV_THRESH_OTSU); /* int HorzWidthStep = HorzGrayLight->widthStep; int HorzSum=0; int HorzGrayValue=0; unsigned char* pDataHorz; for(int j=0; j<HorzRectHeight; j++){ pDataHorz = (unsigned char*)HorzGrayLight->imageData + j*HorzWidthStep; for(int i=0; i<HorzRectWidth; i++){ HorzGrayValue = pDataHorz[i]; //if((HorzGrayValue<=grayThresholding)) if((HorzGrayValue==0)) HorzSum++; } } */ /*int cvHorzSum=cvCountNonZero(HorzGrayLight); int horzBlackNum=HorzRectWidth*HorzRectHeight-cvHorzSum;*/ //get the other two blocks black ration (horizental) bool horizBlackLimit = checkOtherBlocksBlackRatio(HorzGrayLight, iColor, false); //bool horizBlackLimit = true; //else if (HorzBlackRatio>=RatioThreshold&&HorzBlackRatio<=90) //else if (horizBlackLimit&&isTL(srcImage, HorzRect, false)) if (horizBlackLimit&&isTL(srcImage, HorzRect, false)) { //横向检测 TLbox.box.x = HorzRectX1; TLbox.box.y = HorzRectY1; TLbox.box.width = HorzRectWidth; TLbox.box.height = HorzRectHeight; //TLbox.shape = 0;//表示横向 if (iColor == GREEN_PIXEL_LABEL) { //cvRectangle(srcImage,cvPoint(HorzRectX1,HorzRectY1),cvPoint(HorzRectX2,HorzRectY2),cvScalar(0,255,0),2); TLbox.color = GREEN_PIXEL_LABEL; v.push_back(TLbox); } else if (iColor == RED_PIXEL_LABEL) { //cvRectangle(srcImage,cvPoint(HorzRectX1,HorzRectY1),cvPoint(HorzRectX2,HorzRectY2),cvScalar(0,0,255),2); //*p1=*p1+1; TLbox.color = RED_PIXEL_LABEL; int result = RecognizeLight(srcImage, iRect); switch (result) { case 0://圆形 TLbox.shape = 0; break; case 1://禁止左转 TLbox.shape = 1; break; case 2://前行箭头 TLbox.shape = 0; break; default: break; } v.push_back(TLbox); } } cvReleaseImage(&HorzLight); cvReleaseImage(&HorzGrayLight); } /* int iWidthStep = VerticalGrayLight->widthStep; int sum=0; int VerticalGrayValue=0; unsigned char* pDataVertical; for(int j=0; j<iDrawRectHeight; j++){ pDataVertical = (unsigned char*)VerticalGrayLight->imageData + j*iWidthStep; for(int i=0; i<iDrawRectWidth; i++){ VerticalGrayValue = pDataVertical[i]; if((VerticalGrayValue<=grayThresholding)) sum++; } }*/ //int cvVerticalSum=cvCountNonZero(VerticalGrayLight); //int verticalBlackNum=iDrawRectWidth*iDrawRectHeight-cvVerticalSum;//黑色像素点个数 //int VerticalBlackRatio = (float)verticalBlackNum*100/(float)((iDrawRectWidth+1)*((float)iDrawRectHeight+1));//矩形框中黑色像素所占比例 //int HorzBlackRatio=(float)horzBlackNum*100/(float)((HorzRectWidth+1)*((float)HorzRectHeight+1));//矩形框中黑色像素所占比例 #if ISDEBUG_TL ofstream outfile; outfile.open(debugTLPath,ios::app);//ios::app: 以追加的方式打开文件 outfile<<"===black VerticalBlackRatio===:"<<VerticalBlackRatio<<endl;//输出到调试文件中 cout<<"===black VerticalBlackRatio===:"<<VerticalBlackRatio<<endl;//输出到控制台 outfile.close(); #endif #if ISDEBUG_TL Mat grayMat(imageGrayScale); Rect drawRect; drawRect.x=iDrawRectX1; drawRect.y=iDrawRectY1; drawRect.width=iDrawRectX2-iDrawRectX1; drawRect.height=iDrawRectY2-iDrawRectY1; Mat tmpMat=grayMat(drawRect); isLighInBox(tmpMat); #endif //int DetectResult=isTL(srcImage,iRect,); //int DetectResult = isTL(srcImage, VerticalRect); cvReleaseImage(&VerticalLight); cvReleaseImage(&VerticalGrayLight); //DetectResult = 1; return; }
int TextRecognizer::recognize(IplImage *input, const struct TextDetectionParams ¶ms, std::string svmModel, std::vector<Chain> &chains, std::vector<std::pair<Point2d, Point2d> > &compBB, std::vector<std::pair<CvPoint, CvPoint> > &chainBB, std::vector<std::string>& text) { // Convert to grayscale IplImage * grayImage = cvCreateImage(cvGetSize(input), IPL_DEPTH_8U, 1); cvCvtColor(input, grayImage, CV_RGB2GRAY); for (unsigned int i = 0; i < chainBB.size(); i++) { cv::Point center = cv::Point( (chainBB[i].first.x + chainBB[i].second.x) / 2, (chainBB[i].first.y + chainBB[i].second.y) / 2); /* work out if total width of chain is large enough */ if (chainBB[i].second.x - chainBB[i].first.x < input->width / params.maxImgWidthToTextRatio) { LOGL(LOG_TXT_ORIENT, "Reject chain #" << i << " width=" << (chainBB[i].second.x - chainBB[i].first.x) << "<" << (input->width / params.maxImgWidthToTextRatio)); continue; } /* eliminate chains with components of lower height than required minimum */ int minHeight = chainBB[i].second.y - chainBB[i].first.y; for (unsigned j = 0; j < chains[i].components.size(); j++) { minHeight = std::min(minHeight, compBB[chains[i].components[j]].second.y - compBB[chains[i].components[j]].first.y); } if (minHeight < params.minCharacterheight) { LOGL(LOG_CHAINS, "Reject chain # " << i << " minHeight=" << minHeight << "<" << params.minCharacterheight); continue; } /* invert direction if angle is in 3rd/4th quadrants */ if (chains[i].direction.x < 0) { chains[i].direction.x = -chains[i].direction.x; chains[i].direction.y = -chains[i].direction.y; } /* work out chain angle */ double theta_deg = 180 * atan2(chains[i].direction.y, chains[i].direction.x) / PI; if (absd(theta_deg) > params.maxAngle) { LOGL(LOG_TXT_ORIENT, "Chain angle " << theta_deg << " exceeds max " << params.maxAngle); continue; } if ((chainBB.size() == 2) && (absd(theta_deg) > 5)) continue; LOGL(LOG_TXT_ORIENT, "Chain #" << i << " Angle: " << theta_deg << " degrees"); /* create copy of input image including only the selected components */ cv::Mat inputMat = cv::Mat(input); cv::Mat grayMat = cv::Mat(grayImage); cv::Mat componentsImg = cv::Mat::zeros(grayMat.rows, grayMat.cols, grayMat.type()); std::vector<cv::Point> compCoords; for (unsigned int j = 0; j < chains[i].components.size(); j++) { int component_id = chains[i].components[j]; cv::Rect roi = cv::Rect(compBB[component_id].first.x, compBB[component_id].first.y, compBB[component_id].second.x - compBB[component_id].first.x, compBB[component_id].second.y - compBB[component_id].first.y); cv::Mat componentRoi = grayMat(roi); compCoords.push_back( cv::Point(compBB[component_id].first.x, compBB[component_id].first.y)); compCoords.push_back( cv::Point(compBB[component_id].second.x, compBB[component_id].second.y)); compCoords.push_back( cv::Point(compBB[component_id].first.x, compBB[component_id].second.y)); compCoords.push_back( cv::Point(compBB[component_id].second.x, compBB[component_id].first.y)); cv::Mat thresholded; cv::threshold(componentRoi, thresholded, 0 // the value doesn't matter for Otsu thresholding , 255 // we could choose any non-zero value. 255 (white) makes it easy to see the binary image , cv::THRESH_OTSU | cv::THRESH_BINARY_INV); #if 0 cv::Moments mu = cv::moments(thresholded, true); std::cout << "mu02=" << mu.mu02 << " mu11=" << mu.mu11 << " skew=" << mu.mu11 / mu.mu02 << std::endl; #endif cv::imwrite("thresholded.png", thresholded); cv::threshold(componentRoi, componentsImg(roi), 0 // the value doesn't matter for Otsu thresholding , 255 // we could choose any non-zero value. 255 (white) makes it easy to see the binary image , cv::THRESH_OTSU | cv::THRESH_BINARY_INV); } cv::imwrite("bib-components.png", componentsImg); cv::Mat rotMatrix = cv::getRotationMatrix2D(center, theta_deg, 1.0); cv::Mat rotatedMat = cv::Mat::zeros(grayMat.rows, grayMat.cols, grayMat.type()); cv::warpAffine(componentsImg, rotatedMat, rotMatrix, rotatedMat.size()); cv::imwrite("bib-rotated.png", rotatedMat); /* rotate each component coordinates */ const int border = 3; cv::transform(compCoords, compCoords, rotMatrix); /* find bounding box of rotated components */ cv::Rect roi = getBoundingBox(compCoords, cv::Size(input->width, input->height)); /* ROI area can be null if outside of clipping area */ if ((roi.width == 0) || (roi.height == 0)) continue; LOGL(LOG_TEXTREC, "ROI = " << roi); cv::Mat mat = cv::Mat::zeros(roi.height + 2 * border, roi.width + 2 * border, grayMat.type()); cv::Mat tmp = rotatedMat(roi); #if 0 cv::Mat roiMat = inputMat(roi); char *filename_roi; asprintf(&filename_roi, "bib-%05d-%d.png", this->bsid+1, i); cv::imwrite(filename_roi, roiMat); free(filename_roi); #endif /* copy bounded box from rotated mat to new mat with borders - borders are needed * to improve OCR success rate */ tmp.copyTo( mat( cv::Rect(cv::Point(border, border), cv::Point(roi.width + border, roi.height + border)))); /* resize image to improve OCR success rate */ float upscale = 3.0; cv::resize(mat, mat, cvSize(0, 0), upscale, upscale); /* erode text to get rid of thin joints */ int s = (int) (0.05 * mat.rows); /* 5% of up-scaled size) */ cv::Mat elem = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(2 * s + 1, 2 * s + 1), cv::Point(s, s)); cv::erode(mat, mat, elem); cv::imwrite("bib-tess-input.png", mat); // Pass it to Tesseract API tess.SetImage((uchar*) mat.data, mat.cols, mat.rows, 1, mat.step1()); // Get the text char* out = tess.GetUTF8Text(); do { if (strlen(out) == 0) { break; } std::string s_out(out); boost::algorithm::trim(s_out); if (s_out.size() != chains[i].components.size()) { LOGL(LOG_TEXTREC, "Text size mismatch: expected " << chains[i].components.size() << " digits, got '" << s_out << "' (" << s_out.size() << " digits)"); break; } /* if first character is a '0' we have a partially occluded number */ if (s_out[0] == '0') { LOGL(LOG_TEXTREC, "Text begins with '0' (partially occluded)"); break; } if (!is_number(s_out)) { LOGL(LOG_TEXTREC, "Text is not a number ('" << s_out << "')"); //break; } /* adjust width to size of 6 digits */ int charWidth = (chainBB[i].second.x - chainBB[i].first.x) / s_out.size(); int width = 6 * charWidth; /* adjust to 2 width/height aspect ratio */ int height = width / 2; int midx = center.x; int midy = center.y; cv::Rect roi = cv::Rect(midx - width / 2, midy - height / 2, width, height); if ((roi.x >= 0) && (roi.y >= 0) && (roi.x + roi.width < inputMat.cols) && (roi.y + roi.height < inputMat.rows)) { cv::Mat bibMat = inputMat(roi); if (s_out.size() <= (unsigned) params.modelVerifLenCrit) { if (svmModel.empty()) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on no model"); break; } if (minHeight < params.modelVerifMinHeight) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on small height"); break; } /* if we have an SVM Model, predict */ CvSVM svm; cv::HOGDescriptor hog(cv::Size(128, 64), /* windows size */ cv::Size(16, 16), /* block size */ cv::Size(8, 8), /* block stride */ cv::Size(8, 8), /* cell size */ 9 /* nbins */ ); std::vector<float> descriptor; /* resize to HOGDescriptor dimensions */ cv::Mat resizedMat; cv::resize(bibMat, resizedMat, hog.winSize, 0, 0); hog.compute(resizedMat, descriptor); /* load SVM model */ svm.load(svmModel.c_str()); float prediction = svm.predict(cv::Mat(descriptor).t()); LOGL(LOG_SVM, "Prediction=" << prediction); if (prediction < 0.5) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on low SVM prediction"); break; } } /* symmetry check */ if ( //(i == 4) && (1)) { cv::Mat inputRotated = cv::Mat::zeros(inputMat.rows, inputMat.cols, inputMat.type()); cv::warpAffine(inputMat, inputRotated, rotMatrix, inputRotated.size()); int minOffset = 0; double min = 1e6; //width = 12 * charWidth; for (int offset = -50; offset < 30; offset += 2) { /* resize to HOGDescriptor dimensions */ cv::Mat straightMat; cv::Mat flippedMat; /* extract shifted ROI */ cv::Rect roi = cv::Rect(midx - width / 2 + offset, midy - height / 2, width, height); if ((roi.x >= 0) && (roi.y >= 0) && (roi.x + roi.width < inputMat.cols) && (roi.y + roi.height < inputMat.rows)) { straightMat = inputRotated(roi); cv::flip(straightMat, flippedMat, 1); cv::Scalar mssimV = getMSSIM(straightMat, flippedMat); double avgMssim = (mssimV.val[0] + mssimV.val[1] + mssimV.val[2]) * 100 / 3; double dist = 1 / (avgMssim + 1); LOGL(LOG_SYMM_CHECK, "offset=" << offset << " dist=" << dist); if (dist < min) { min = dist; minOffset = offset; cv::imwrite("symm-max.png", straightMat); cv::Mat visualImage; } } } LOGL(LOG_SYMM_CHECK, "MinOffset = " << minOffset << " charWidth=" << charWidth); if (absd(minOffset) > charWidth / 3) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on asymmetry"); std::cout << "Reject " << s_out << " on asymmetry" << std::endl; break; } } /* save for training only if orientation is ~horizontal */ if (abs(theta_deg) < 7) { char *filename; std::cout << " ------ " << s_out << std::endl; asprintf(&filename, "bib-%05d-%s.png", this->bsid++, s_out.c_str()); cv::imwrite(filename, bibMat); free(filename); } } else { LOGL(LOG_TEXTREC, "Reject as ROI outside boundaries"); break; } /* all fine, add this bib number */ text.push_back(s_out); LOGL(LOG_TEXTREC, "Bib number: '" << s_out << "'"); } while (0); free(out); } cvReleaseImage(&grayImage); return 0; }