Example #1
0
File: main.cpp Project: tungph/GNUT
void sayHello(const char* string, bool toUppercase = false)
{
    if (!toUppercase)
    {
        std::cout << "Hello world. " << string << std::endl;
    }
    else
    {
        std::string string1(string);
        std::transform(string1.begin(), string1.end(), string1.begin(), toupper);
        std::cout << "Hello world. " << string1.c_str() << std::endl;

        // other implementation (use lambda)
        std::cout << "Hello world. " << [](const char* s){
            int len = strlen(s)+1;
            std::unique_ptr<char> s_out(new char[len]);
            for (int i = 0; i < len; ++i)
            {
                s_out.get()[i] = (char)toupper(s[i]);
            }

            return s_out;
        }(string).get() << std::endl;
    }
}
Example #2
0
int TextRecognizer::recognize(IplImage *input,
		const struct TextDetectionParams &params, std::string svmModel,
		std::vector<Chain> &chains,
		std::vector<std::pair<Point2d, Point2d> > &compBB,
		std::vector<std::pair<CvPoint, CvPoint> > &chainBB,
		std::vector<std::string>& text) {

	// Convert to grayscale
	IplImage * grayImage = cvCreateImage(cvGetSize(input), IPL_DEPTH_8U, 1);
	cvCvtColor(input, grayImage, CV_RGB2GRAY);

	for (unsigned int i = 0; i < chainBB.size(); i++) {
		cv::Point center = cv::Point(
				(chainBB[i].first.x + chainBB[i].second.x) / 2,
				(chainBB[i].first.y + chainBB[i].second.y) / 2);

		/* work out if total width of chain is large enough */
		if (chainBB[i].second.x - chainBB[i].first.x
				< input->width / params.maxImgWidthToTextRatio) {
			LOGL(LOG_TXT_ORIENT,
					"Reject chain #" << i << " width=" << (chainBB[i].second.x - chainBB[i].first.x) << "<" << (input->width / params.maxImgWidthToTextRatio));
			continue;
		}

		/* eliminate chains with components of lower height than required minimum */
		int minHeight = chainBB[i].second.y - chainBB[i].first.y;
		for (unsigned j = 0; j < chains[i].components.size(); j++) {
			minHeight = std::min(minHeight,
					compBB[chains[i].components[j]].second.y
							- compBB[chains[i].components[j]].first.y);
		}
		if (minHeight < params.minCharacterheight) {
			LOGL(LOG_CHAINS,
					"Reject chain # " << i << " minHeight=" << minHeight << "<" << params.minCharacterheight);
			continue;
		}

		/* invert direction if angle is in 3rd/4th quadrants */
		if (chains[i].direction.x < 0) {
			chains[i].direction.x = -chains[i].direction.x;
			chains[i].direction.y = -chains[i].direction.y;
		}
		/* work out chain angle */
		double theta_deg = 180
				* atan2(chains[i].direction.y, chains[i].direction.x) / PI;

		if (absd(theta_deg) > params.maxAngle) {
			LOGL(LOG_TXT_ORIENT,
					"Chain angle " << theta_deg << " exceeds max " << params.maxAngle);
			continue;
		}
		if ((chainBB.size() == 2) && (absd(theta_deg) > 5))
			continue;
		LOGL(LOG_TXT_ORIENT,
				"Chain #" << i << " Angle: " << theta_deg << " degrees");

		/* create copy of input image including only the selected components */
		cv::Mat inputMat = cv::Mat(input);
		cv::Mat grayMat = cv::Mat(grayImage);
		cv::Mat componentsImg = cv::Mat::zeros(grayMat.rows, grayMat.cols,
				grayMat.type());

		std::vector<cv::Point> compCoords;

		for (unsigned int j = 0; j < chains[i].components.size(); j++) {
			int component_id = chains[i].components[j];
			cv::Rect roi = cv::Rect(compBB[component_id].first.x,
					compBB[component_id].first.y,
					compBB[component_id].second.x
							- compBB[component_id].first.x,
					compBB[component_id].second.y
							- compBB[component_id].first.y);
			cv::Mat componentRoi = grayMat(roi);

			compCoords.push_back(
					cv::Point(compBB[component_id].first.x,
							compBB[component_id].first.y));
			compCoords.push_back(
					cv::Point(compBB[component_id].second.x,
							compBB[component_id].second.y));
			compCoords.push_back(
					cv::Point(compBB[component_id].first.x,
							compBB[component_id].second.y));
			compCoords.push_back(
					cv::Point(compBB[component_id].second.x,
							compBB[component_id].first.y));

			cv::Mat thresholded;
			cv::threshold(componentRoi, thresholded, 0 // the value doesn't matter for Otsu thresholding
					, 255 // we could choose any non-zero value. 255 (white) makes it easy to see the binary image
					, cv::THRESH_OTSU | cv::THRESH_BINARY_INV);

#if 0
			cv::Moments mu = cv::moments(thresholded, true);
			std::cout << "mu02=" << mu.mu02 << " mu11=" << mu.mu11 << " skew="
			<< mu.mu11 / mu.mu02 << std::endl;
#endif
			cv::imwrite("thresholded.png", thresholded);

			cv::threshold(componentRoi, componentsImg(roi), 0 // the value doesn't matter for Otsu thresholding
					, 255 // we could choose any non-zero value. 255 (white) makes it easy to see the binary image
					, cv::THRESH_OTSU | cv::THRESH_BINARY_INV);
		}
		cv::imwrite("bib-components.png", componentsImg);

		cv::Mat rotMatrix = cv::getRotationMatrix2D(center, theta_deg, 1.0);

		cv::Mat rotatedMat = cv::Mat::zeros(grayMat.rows, grayMat.cols,
				grayMat.type());
		cv::warpAffine(componentsImg, rotatedMat, rotMatrix, rotatedMat.size());
		cv::imwrite("bib-rotated.png", rotatedMat);

		/* rotate each component coordinates */
		const int border = 3;
		cv::transform(compCoords, compCoords, rotMatrix);
		/* find bounding box of rotated components */
		cv::Rect roi = getBoundingBox(compCoords,
				cv::Size(input->width, input->height));
		/* ROI area can be null if outside of clipping area */
		if ((roi.width == 0) || (roi.height == 0))
			continue;
		LOGL(LOG_TEXTREC, "ROI = " << roi);
		cv::Mat mat = cv::Mat::zeros(roi.height + 2 * border,
				roi.width + 2 * border, grayMat.type());
		cv::Mat tmp = rotatedMat(roi);
#if 0
        cv::Mat roiMat = inputMat(roi);
        char *filename_roi;
        asprintf(&filename_roi, "bib-%05d-%d.png", this->bsid+1, i);
        cv::imwrite(filename_roi, roiMat);
        free(filename_roi);
#endif
		/* copy bounded box from rotated mat to new mat with borders - borders are needed
		 * to improve OCR success rate
		 */
		tmp.copyTo(
				mat(
						cv::Rect(cv::Point(border, border),
								cv::Point(roi.width + border,
										roi.height + border))));

		/* resize image to improve OCR success rate */
		float upscale = 3.0;
		cv::resize(mat, mat, cvSize(0, 0), upscale, upscale);
		/* erode text to get rid of thin joints */
		int s = (int) (0.05 * mat.rows); /* 5% of up-scaled size) */
		cv::Mat elem = cv::getStructuringElement(cv::MORPH_ELLIPSE,
				cv::Size(2 * s + 1, 2 * s + 1), cv::Point(s, s));
		cv::erode(mat, mat, elem);
		cv::imwrite("bib-tess-input.png", mat);

		// Pass it to Tesseract API
		tess.SetImage((uchar*) mat.data, mat.cols, mat.rows, 1, mat.step1());
		// Get the text
		char* out = tess.GetUTF8Text();
		do {
			if (strlen(out) == 0) {
				break;
			}
			std::string s_out(out);
			boost::algorithm::trim(s_out);

			if (s_out.size() != chains[i].components.size()) {
				LOGL(LOG_TEXTREC,
						"Text size mismatch: expected " << chains[i].components.size() << " digits, got '" << s_out << "' (" << s_out.size() << " digits)");
				break;
			}
			/* if first character is a '0' we have a partially occluded number */
			if (s_out[0] == '0') {
				LOGL(LOG_TEXTREC, "Text begins with '0' (partially occluded)");
				break;
			}
			if (!is_number(s_out)) {
				LOGL(LOG_TEXTREC, "Text is not a number ('" << s_out << "')");
				//break;
			}

			/* adjust width to size of 6 digits */
			int charWidth = (chainBB[i].second.x - chainBB[i].first.x)
					/ s_out.size();
			int width = 6 * charWidth;
			/* adjust to 2 width/height aspect ratio */
			int height = width / 2;
			int midx = center.x;
			int midy = center.y;

			cv::Rect roi = cv::Rect(midx - width / 2, midy - height / 2, width,
					height);
			if ((roi.x >= 0) && (roi.y >= 0)
					&& (roi.x + roi.width < inputMat.cols)
					&& (roi.y + roi.height < inputMat.rows)) {
				cv::Mat bibMat = inputMat(roi);

				if (s_out.size() <= (unsigned) params.modelVerifLenCrit) {

					if (svmModel.empty()) {
						LOGL(LOG_TEXTREC, "Reject " << s_out << " on no model");
						break;
					}

					if (minHeight < params.modelVerifMinHeight) {
						LOGL(LOG_TEXTREC,
								"Reject " << s_out << " on small height");
						break;
					}

					/* if we have an SVM Model, predict */

					CvSVM svm;
					cv::HOGDescriptor hog(cv::Size(128, 64), /* windows size */
					cv::Size(16, 16), /* block size */
					cv::Size(8, 8), /* block stride */
					cv::Size(8, 8), /* cell size */
					9 /* nbins */
					);
					std::vector<float> descriptor;

					/* resize to HOGDescriptor dimensions */
					cv::Mat resizedMat;
					cv::resize(bibMat, resizedMat, hog.winSize, 0, 0);
					hog.compute(resizedMat, descriptor);

					/* load SVM model */
					svm.load(svmModel.c_str());
					float prediction = svm.predict(cv::Mat(descriptor).t());
					LOGL(LOG_SVM, "Prediction=" << prediction);
					if (prediction < 0.5) {
						LOGL(LOG_TEXTREC,
								"Reject " << s_out << " on low SVM prediction");
						break;
					}
				}

				/* symmetry check */
				if (   //(i == 4) &&
						(1)) {
					cv::Mat inputRotated = cv::Mat::zeros(inputMat.rows,
							inputMat.cols, inputMat.type());
					cv::warpAffine(inputMat, inputRotated, rotMatrix,
							inputRotated.size());

					int minOffset = 0;
					double min = 1e6;
					//width = 12 * charWidth;
					for (int offset = -50; offset < 30; offset += 2) {

						/* resize to HOGDescriptor dimensions */
						cv::Mat straightMat;
						cv::Mat flippedMat;

						/* extract shifted ROI */
						cv::Rect roi = cv::Rect(midx - width / 2 + offset,
								midy - height / 2, width, height);

						if ((roi.x >= 0) && (roi.y >= 0)
								&& (roi.x + roi.width < inputMat.cols)
								&& (roi.y + roi.height < inputMat.rows)) {
							straightMat = inputRotated(roi);
							cv::flip(straightMat, flippedMat, 1);
							cv::Scalar mssimV = getMSSIM(straightMat,
									flippedMat);
							double avgMssim = (mssimV.val[0] + mssimV.val[1]
									+ mssimV.val[2]) * 100 / 3;
							double dist = 1 / (avgMssim + 1);
							LOGL(LOG_SYMM_CHECK, "offset=" << offset << " dist=" << dist);
							if (dist < min) {
								min = dist;
								minOffset = offset;
								cv::imwrite("symm-max.png", straightMat);
								cv::Mat visualImage;
							}
						}
					}

					LOGL(LOG_SYMM_CHECK, "MinOffset = " << minOffset
							<< " charWidth=" << charWidth);

					if (absd(minOffset) > charWidth / 3) {
						LOGL(LOG_TEXTREC,
								"Reject " << s_out << " on asymmetry");
						std::cout << "Reject " << s_out << " on asymmetry"
								<< std::endl;
						break;
					}
				}

				/* save for training only if orientation is ~horizontal */
				if (abs(theta_deg) < 7) {
					char *filename;
                    std::cout << " ------ " << s_out << std::endl;
					asprintf(&filename, "bib-%05d-%s.png", this->bsid++, s_out.c_str());
					cv::imwrite(filename, bibMat);
					free(filename);
				}

			} else {
				LOGL(LOG_TEXTREC, "Reject as ROI outside boundaries");
				break;
			}

			/* all fine, add this bib number */
			text.push_back(s_out);
			LOGL(LOG_TEXTREC, "Bib number: '" << s_out << "'");

		} while (0);
		free(out);
	}

	cvReleaseImage(&grayImage);

	return 0;

}