// 检查图片违规2,下载图片、比较图片 BOOL DoCheckImageIllegal(vector<CString>& imgs, CString& msg) { for (const CString& img : imgs) { CString imgName = GetImageName(img); // 检查缓存结果 if (g_leagalImage.find(imgName) != g_leagalImage.end()) continue; if (g_illegalImage.find(imgName) != g_illegalImage.end()) return TRUE; // 读取图片 Mat image; if (PathFileExists(IMG_CACHE_PATH + imgName)) { // 读取图片缓存 ReadImage(IMG_CACHE_PATH + imgName, image); } else { // 下载图片 unique_ptr<BYTE[]> buffer; ULONG size; if (HTTPGetRaw(img, &buffer, &size, FALSE) == NET_SUCCESS) { ReadImage(buffer.get(), size, image); CreateDir(IMG_CACHE_PATH); CFile file; if (file.Open(IMG_CACHE_PATH + imgName, CFile::modeCreate | CFile::modeWrite)) file.Write(buffer.get(), size); } } if (image.data == NULL || image.cols < 30 || image.rows < 30) // 尺寸太小不比较 continue; // 判断和违规图片比较大于阈值 g_plan.m_optionsLock.Lock(); for (const NameImage& i : g_plan.m_images) { if (i.img.cols < 30 || i.img.rows < 30) // 尺寸太小不比较 continue; double mssim = getMSSIM(image, i.img); if (mssim > g_plan.m_SSIMThreshold) { msg.Format(_T("<font color=red> 触发违规图片 </font>%s<font color=red> 相似度%.3lf</font>"), (LPCTSTR)i.name, mssim); g_illegalImage.insert(imgName); g_plan.m_optionsLock.Unlock(); return TRUE; } } g_leagalImage.insert(imgName); g_plan.m_optionsLock.Unlock(); } return FALSE; }
int TextRecognizer::recognize(IplImage *input, const struct TextDetectionParams ¶ms, std::string svmModel, std::vector<Chain> &chains, std::vector<std::pair<Point2d, Point2d> > &compBB, std::vector<std::pair<CvPoint, CvPoint> > &chainBB, std::vector<std::string>& text) { // Convert to grayscale IplImage * grayImage = cvCreateImage(cvGetSize(input), IPL_DEPTH_8U, 1); cvCvtColor(input, grayImage, CV_RGB2GRAY); for (unsigned int i = 0; i < chainBB.size(); i++) { cv::Point center = cv::Point( (chainBB[i].first.x + chainBB[i].second.x) / 2, (chainBB[i].first.y + chainBB[i].second.y) / 2); /* work out if total width of chain is large enough */ if (chainBB[i].second.x - chainBB[i].first.x < input->width / params.maxImgWidthToTextRatio) { LOGL(LOG_TXT_ORIENT, "Reject chain #" << i << " width=" << (chainBB[i].second.x - chainBB[i].first.x) << "<" << (input->width / params.maxImgWidthToTextRatio)); continue; } /* eliminate chains with components of lower height than required minimum */ int minHeight = chainBB[i].second.y - chainBB[i].first.y; for (unsigned j = 0; j < chains[i].components.size(); j++) { minHeight = std::min(minHeight, compBB[chains[i].components[j]].second.y - compBB[chains[i].components[j]].first.y); } if (minHeight < params.minCharacterheight) { LOGL(LOG_CHAINS, "Reject chain # " << i << " minHeight=" << minHeight << "<" << params.minCharacterheight); continue; } /* invert direction if angle is in 3rd/4th quadrants */ if (chains[i].direction.x < 0) { chains[i].direction.x = -chains[i].direction.x; chains[i].direction.y = -chains[i].direction.y; } /* work out chain angle */ double theta_deg = 180 * atan2(chains[i].direction.y, chains[i].direction.x) / PI; if (absd(theta_deg) > params.maxAngle) { LOGL(LOG_TXT_ORIENT, "Chain angle " << theta_deg << " exceeds max " << params.maxAngle); continue; } if ((chainBB.size() == 2) && (absd(theta_deg) > 5)) continue; LOGL(LOG_TXT_ORIENT, "Chain #" << i << " Angle: " << theta_deg << " degrees"); /* create copy of input image including only the selected components */ cv::Mat inputMat = cv::Mat(input); cv::Mat grayMat = cv::Mat(grayImage); cv::Mat componentsImg = cv::Mat::zeros(grayMat.rows, grayMat.cols, grayMat.type()); std::vector<cv::Point> compCoords; for (unsigned int j = 0; j < chains[i].components.size(); j++) { int component_id = chains[i].components[j]; cv::Rect roi = cv::Rect(compBB[component_id].first.x, compBB[component_id].first.y, compBB[component_id].second.x - compBB[component_id].first.x, compBB[component_id].second.y - compBB[component_id].first.y); cv::Mat componentRoi = grayMat(roi); compCoords.push_back( cv::Point(compBB[component_id].first.x, compBB[component_id].first.y)); compCoords.push_back( cv::Point(compBB[component_id].second.x, compBB[component_id].second.y)); compCoords.push_back( cv::Point(compBB[component_id].first.x, compBB[component_id].second.y)); compCoords.push_back( cv::Point(compBB[component_id].second.x, compBB[component_id].first.y)); cv::Mat thresholded; cv::threshold(componentRoi, thresholded, 0 // the value doesn't matter for Otsu thresholding , 255 // we could choose any non-zero value. 255 (white) makes it easy to see the binary image , cv::THRESH_OTSU | cv::THRESH_BINARY_INV); #if 0 cv::Moments mu = cv::moments(thresholded, true); std::cout << "mu02=" << mu.mu02 << " mu11=" << mu.mu11 << " skew=" << mu.mu11 / mu.mu02 << std::endl; #endif cv::imwrite("thresholded.png", thresholded); cv::threshold(componentRoi, componentsImg(roi), 0 // the value doesn't matter for Otsu thresholding , 255 // we could choose any non-zero value. 255 (white) makes it easy to see the binary image , cv::THRESH_OTSU | cv::THRESH_BINARY_INV); } cv::imwrite("bib-components.png", componentsImg); cv::Mat rotMatrix = cv::getRotationMatrix2D(center, theta_deg, 1.0); cv::Mat rotatedMat = cv::Mat::zeros(grayMat.rows, grayMat.cols, grayMat.type()); cv::warpAffine(componentsImg, rotatedMat, rotMatrix, rotatedMat.size()); cv::imwrite("bib-rotated.png", rotatedMat); /* rotate each component coordinates */ const int border = 3; cv::transform(compCoords, compCoords, rotMatrix); /* find bounding box of rotated components */ cv::Rect roi = getBoundingBox(compCoords, cv::Size(input->width, input->height)); /* ROI area can be null if outside of clipping area */ if ((roi.width == 0) || (roi.height == 0)) continue; LOGL(LOG_TEXTREC, "ROI = " << roi); cv::Mat mat = cv::Mat::zeros(roi.height + 2 * border, roi.width + 2 * border, grayMat.type()); cv::Mat tmp = rotatedMat(roi); #if 0 cv::Mat roiMat = inputMat(roi); char *filename_roi; asprintf(&filename_roi, "bib-%05d-%d.png", this->bsid+1, i); cv::imwrite(filename_roi, roiMat); free(filename_roi); #endif /* copy bounded box from rotated mat to new mat with borders - borders are needed * to improve OCR success rate */ tmp.copyTo( mat( cv::Rect(cv::Point(border, border), cv::Point(roi.width + border, roi.height + border)))); /* resize image to improve OCR success rate */ float upscale = 3.0; cv::resize(mat, mat, cvSize(0, 0), upscale, upscale); /* erode text to get rid of thin joints */ int s = (int) (0.05 * mat.rows); /* 5% of up-scaled size) */ cv::Mat elem = cv::getStructuringElement(cv::MORPH_ELLIPSE, cv::Size(2 * s + 1, 2 * s + 1), cv::Point(s, s)); cv::erode(mat, mat, elem); cv::imwrite("bib-tess-input.png", mat); // Pass it to Tesseract API tess.SetImage((uchar*) mat.data, mat.cols, mat.rows, 1, mat.step1()); // Get the text char* out = tess.GetUTF8Text(); do { if (strlen(out) == 0) { break; } std::string s_out(out); boost::algorithm::trim(s_out); if (s_out.size() != chains[i].components.size()) { LOGL(LOG_TEXTREC, "Text size mismatch: expected " << chains[i].components.size() << " digits, got '" << s_out << "' (" << s_out.size() << " digits)"); break; } /* if first character is a '0' we have a partially occluded number */ if (s_out[0] == '0') { LOGL(LOG_TEXTREC, "Text begins with '0' (partially occluded)"); break; } if (!is_number(s_out)) { LOGL(LOG_TEXTREC, "Text is not a number ('" << s_out << "')"); //break; } /* adjust width to size of 6 digits */ int charWidth = (chainBB[i].second.x - chainBB[i].first.x) / s_out.size(); int width = 6 * charWidth; /* adjust to 2 width/height aspect ratio */ int height = width / 2; int midx = center.x; int midy = center.y; cv::Rect roi = cv::Rect(midx - width / 2, midy - height / 2, width, height); if ((roi.x >= 0) && (roi.y >= 0) && (roi.x + roi.width < inputMat.cols) && (roi.y + roi.height < inputMat.rows)) { cv::Mat bibMat = inputMat(roi); if (s_out.size() <= (unsigned) params.modelVerifLenCrit) { if (svmModel.empty()) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on no model"); break; } if (minHeight < params.modelVerifMinHeight) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on small height"); break; } /* if we have an SVM Model, predict */ CvSVM svm; cv::HOGDescriptor hog(cv::Size(128, 64), /* windows size */ cv::Size(16, 16), /* block size */ cv::Size(8, 8), /* block stride */ cv::Size(8, 8), /* cell size */ 9 /* nbins */ ); std::vector<float> descriptor; /* resize to HOGDescriptor dimensions */ cv::Mat resizedMat; cv::resize(bibMat, resizedMat, hog.winSize, 0, 0); hog.compute(resizedMat, descriptor); /* load SVM model */ svm.load(svmModel.c_str()); float prediction = svm.predict(cv::Mat(descriptor).t()); LOGL(LOG_SVM, "Prediction=" << prediction); if (prediction < 0.5) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on low SVM prediction"); break; } } /* symmetry check */ if ( //(i == 4) && (1)) { cv::Mat inputRotated = cv::Mat::zeros(inputMat.rows, inputMat.cols, inputMat.type()); cv::warpAffine(inputMat, inputRotated, rotMatrix, inputRotated.size()); int minOffset = 0; double min = 1e6; //width = 12 * charWidth; for (int offset = -50; offset < 30; offset += 2) { /* resize to HOGDescriptor dimensions */ cv::Mat straightMat; cv::Mat flippedMat; /* extract shifted ROI */ cv::Rect roi = cv::Rect(midx - width / 2 + offset, midy - height / 2, width, height); if ((roi.x >= 0) && (roi.y >= 0) && (roi.x + roi.width < inputMat.cols) && (roi.y + roi.height < inputMat.rows)) { straightMat = inputRotated(roi); cv::flip(straightMat, flippedMat, 1); cv::Scalar mssimV = getMSSIM(straightMat, flippedMat); double avgMssim = (mssimV.val[0] + mssimV.val[1] + mssimV.val[2]) * 100 / 3; double dist = 1 / (avgMssim + 1); LOGL(LOG_SYMM_CHECK, "offset=" << offset << " dist=" << dist); if (dist < min) { min = dist; minOffset = offset; cv::imwrite("symm-max.png", straightMat); cv::Mat visualImage; } } } LOGL(LOG_SYMM_CHECK, "MinOffset = " << minOffset << " charWidth=" << charWidth); if (absd(minOffset) > charWidth / 3) { LOGL(LOG_TEXTREC, "Reject " << s_out << " on asymmetry"); std::cout << "Reject " << s_out << " on asymmetry" << std::endl; break; } } /* save for training only if orientation is ~horizontal */ if (abs(theta_deg) < 7) { char *filename; std::cout << " ------ " << s_out << std::endl; asprintf(&filename, "bib-%05d-%s.png", this->bsid++, s_out.c_str()); cv::imwrite(filename, bibMat); free(filename); } } else { LOGL(LOG_TEXTREC, "Reject as ROI outside boundaries"); break; } /* all fine, add this bib number */ text.push_back(s_out); LOGL(LOG_TEXTREC, "Bib number: '" << s_out << "'"); } while (0); free(out); } cvReleaseImage(&grayImage); return 0; }