コード例 #1
0
LandmarkCollection BobotLandmarkSource::getLandmarks() const {
	LandmarkCollection collection;
	if (index >= 0 && index < static_cast<int>(positions.size())) {
		const cv::Mat& image = imageSource->getImage();
		const Rect_<float>& relativePosition = positions[index];
		if (relativePosition.x == 0 && relativePosition.y == 0 && relativePosition.width == 0 && relativePosition.height == 0) {
			collection.insert(make_shared<RectLandmark>(landmarkName)); // invisible landmark
		} else {
			cv::Rect_<float> rect(relativePosition.x * image.cols, relativePosition.y * image.rows,
					relativePosition.width * image.cols, relativePosition.height * image.rows);
			collection.insert(make_shared<RectLandmark>(landmarkName, rect));
		}
	} // Note: else { throw... } ? This should never happen in regular operation, now that we removed get() ?
	return collection;
}
コード例 #2
0
pair<path, LandmarkCollection> LfpwLandmarkFormatParser::readLine(const vector<string>& line, const string header)
{
	path imageName = path(line[0]).filename();
	LandmarkCollection landmarks;

	vector<string> headerTokens; // split the header
	boost::split(headerTokens, header, boost::is_any_of("\t"));
	unsigned int offset = -1; // The offset to go from the landmark number to the corresponding entry in the vector or line
	for (unsigned int i = 1; i <= 35; ++i) { // i corresponds to the landmark number of LFPW (see their picture)
		string landmarkName = headerTokens[3*i+offset].substr(0, headerTokens[3*i+offset].length()-2); // cuts off the last two characters ('_x')
		string tlmsName = lfpwToTlmsName(landmarkName);
		bool visible = true; // Todo: Could add "obscured" to Landmark class
		if (boost::lexical_cast<int>(line[3*i+offset+2]) == 2 || boost::lexical_cast<int>(line[3*i+offset+2]) == 3) {
			visible = false;
		}
		cv::Vec3f position(boost::lexical_cast<float>(line[3*i+offset]), boost::lexical_cast<float>(line[3*i+offset+1]), 0.0f);
		shared_ptr<Landmark> lm = make_shared<ModelLandmark>(tlmsName, position, visible);
		if (lm->getName().length() != 0) { // Todo: Find better solution
			landmarks.insert(lm);
		}
	}
	
	return make_pair(imageName, landmarks);
}
コード例 #3
0
int main(int argc, char *argv[])
{
	#ifdef WIN32
	//_CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); // dump leaks at return
	//_CrtSetBreakAlloc(287);
	#endif
	
	string verboseLevelConsole;
	int deviceId, kinectId;
	bool useCamera = false, useKinect = false;
	bool useFileList = false;
	bool useImgs = false;
	bool useDirectory = false;
	bool useLandmarkFiles = false;
	vector<path> inputPaths;
	path inputFilelist;
	path inputDirectory;
	vector<path> inputFilenames;
	path configFilename;
	shared_ptr<ImageSource> imageSource;
	path landmarksDir; // TODO: Make more dynamic wrt landmark format. a) What about the loading-flags (1_Per_Folder etc) we have? b) Expose those flags to cmdline? c) Make a LmSourceLoader and he knows about a LM_TYPE (each corresponds to a Parser/Loader class?)
	string landmarkType;
	path sdmModelFile;
	path faceDetectorFilename;
	bool trackingMode;

	try {
		po::options_description desc("Allowed options");
		desc.add_options()
			("help,h",
				"produce help message")
			("verbose,v", po::value<string>(&verboseLevelConsole)->implicit_value("DEBUG")->default_value("INFO","show messages with INFO loglevel or below."),
				  "specify the verbosity of the console output: PANIC, ERROR, WARN, INFO, DEBUG or TRACE")
			("config,c", po::value<path>(&configFilename)->required(), 
				"path to a config (.cfg) file")
			("input,i", po::value<vector<path>>(&inputPaths)/*->required()*/, 
				"input from one or more files, a directory, or a  .lst/.txt-file containing a list of images")
			("device,d", po::value<int>(&deviceId)->implicit_value(0), 
				"A camera device ID for use with the OpenCV camera driver")
			("kinect,k", po::value<int>(&kinectId)->implicit_value(0), 
				"Windows only: Use a Kinect as camera. Optionally specify a device ID.")
			("model,m", po::value<path>(&sdmModelFile)->required(),
				"A SDM model file to load.")
			("face-detector,f", po::value<path>(&faceDetectorFilename)->required(),
				"Path to an XML CascadeClassifier from OpenCV.")
			("landmarks,l", po::value<path>(&landmarksDir), 
				"load landmark files from the given folder")
			("landmark-type,t", po::value<string>(&landmarkType), 
				"specify the type of landmarks to load: ibug")
			("tracking-mode,r", po::value<bool>(&trackingMode)->default_value(false)->implicit_value(true),
				"If on, V&J will be run to initialize the model only and after the model lost tracking. If off, V&J will be run on every frame/image.")
		;

		po::positional_options_description p;
		p.add("input", -1);

		po::variables_map vm;
		po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
		po::notify(vm);

		if (vm.count("help")) {
			cout << "Usage: fitter [options]\n";
			cout << desc;
			return EXIT_SUCCESS;
		}
		if (vm.count("landmarks")) {
			useLandmarkFiles = true;
			if (!vm.count("landmark-type")) {
				cout << "You have specified to use landmark files. Please also specify the type of the landmarks to load via --landmark-type or -t." << endl;
				return EXIT_SUCCESS;
			}
		}

	} catch(std::exception& e) {
		cout << e.what() << endl;
		return EXIT_FAILURE;
	}

	loglevel logLevel;
	if(boost::iequals(verboseLevelConsole, "PANIC")) logLevel = loglevel::PANIC;
	else if(boost::iequals(verboseLevelConsole, "ERROR")) logLevel = loglevel::ERROR;
	else if(boost::iequals(verboseLevelConsole, "WARN")) logLevel = loglevel::WARN;
	else if(boost::iequals(verboseLevelConsole, "INFO")) logLevel = loglevel::INFO;
	else if(boost::iequals(verboseLevelConsole, "DEBUG")) logLevel = loglevel::DEBUG;
	else if(boost::iequals(verboseLevelConsole, "TRACE")) logLevel = loglevel::TRACE;
	else {
		cout << "Error: Invalid loglevel." << endl;
		return EXIT_FAILURE;
	}
	
	Loggers->getLogger("shapemodels").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("render").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("sdmTracking").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Logger appLogger = Loggers->getLogger("sdmTracking");

	appLogger.debug("Verbose level for console output: " + logging::loglevelToString(logLevel));
	appLogger.debug("Using config: " + configFilename.string());

	if (inputPaths.size() > 1) {
		// We assume the user has given several, valid images
		useImgs = true;
		inputFilenames = inputPaths;
	} else if (inputPaths.size() == 1) {
		// We assume the user has given either an image, directory, or a .lst-file
		if (inputPaths[0].extension().string() == ".lst" || inputPaths[0].extension().string() == ".txt") { // check for .lst or .txt first
			useFileList = true;
			inputFilelist = inputPaths.front();
		} else if (boost::filesystem::is_directory(inputPaths[0])) { // check if it's a directory
			useDirectory = true;
			inputDirectory = inputPaths.front();
		} else { // it must be an image
			useImgs = true;
			inputFilenames = inputPaths;
		}
	} else {
		// todo see HeadTracking.cpp
		useCamera = true;
		//appLogger.error("Please either specify one or several files, a directory, or a .lst-file containing a list of images to run the program!");
		//return EXIT_FAILURE;
	}

	if (useFileList==true) {
		appLogger.info("Using file-list as input: " + inputFilelist.string());
		shared_ptr<ImageSource> fileListImgSrc; // TODO VS2013 change to unique_ptr, rest below also
		try {
			fileListImgSrc = make_shared<FileListImageSource>(inputFilelist.string());
		} catch(const std::runtime_error& e) {
			appLogger.error(e.what());
			return EXIT_FAILURE;
		}
		imageSource = fileListImgSrc;
	}
	if (useImgs==true) {
		//imageSource = make_shared<FileImageSource>(inputFilenames);
		//imageSource = make_shared<RepeatingFileImageSource>("C:\\Users\\Patrik\\GitHub\\data\\firstrun\\ws_8.png");
		appLogger.info("Using input images: ");
		vector<string> inputFilenamesStrings;	// Hack until we use vector<path> (?)
		for (const auto& fn : inputFilenames) {
			appLogger.info(fn.string());
			inputFilenamesStrings.push_back(fn.string());
		}
		shared_ptr<ImageSource> fileImgSrc;
		try {
			fileImgSrc = make_shared<FileImageSource>(inputFilenamesStrings);
		} catch(const std::runtime_error& e) {
			appLogger.error(e.what());
			return EXIT_FAILURE;
		}
		imageSource = fileImgSrc;
	}
	if (useDirectory==true) {
		appLogger.info("Using input images from directory: " + inputDirectory.string());
		try {
			imageSource = make_shared<DirectoryImageSource>(inputDirectory.string());
		} catch(const std::runtime_error& e) {
			appLogger.error(e.what());
			return EXIT_FAILURE;
		}
	}
	if (useCamera) {
		imageSource = make_shared<CameraImageSource>(deviceId);
	}
	// Load the ground truth
	// Either a) use if/else for imageSource or labeledImageSource, or b) use an EmptyLandmarkSoure
	shared_ptr<LabeledImageSource> labeledImageSource;
	shared_ptr<NamedLandmarkSource> landmarkSource;
	if (useLandmarkFiles) {
		vector<path> groundtruthDirs; groundtruthDirs.push_back(landmarksDir); // Todo: Make cmdline use a vector<path>
		shared_ptr<LandmarkFormatParser> landmarkFormatParser;
		if(boost::iequals(landmarkType, "lst")) {
			//landmarkFormatParser = make_shared<LstLandmarkFormatParser>();
			//landmarkSource = make_shared<DefaultNamedLandmarkSource>(LandmarkFileGatherer::gather(imageSource, string(), GatherMethod::SEPARATE_FILES, groundtruthDirs), landmarkFormatParser);
		} else if(boost::iequals(landmarkType, "ibug")) {
			landmarkFormatParser = make_shared<IbugLandmarkFormatParser>();
			landmarkSource = make_shared<DefaultNamedLandmarkSource>(LandmarkFileGatherer::gather(imageSource, ".pts", GatherMethod::ONE_FILE_PER_IMAGE_SAME_DIR, groundtruthDirs), landmarkFormatParser);
		} else {
			cout << "Error: Invalid ground truth type." << endl;
			return EXIT_FAILURE;
		}
	} else {
		landmarkSource = make_shared<EmptyLandmarkSource>();
	}
	labeledImageSource = make_shared<NamedLabeledImageSource>(imageSource, landmarkSource);
	ptree pt;
	try {
		boost::property_tree::info_parser::read_info(configFilename.string(), pt);
	} catch(const boost::property_tree::ptree_error& error) {
		appLogger.error(error.what());
		return EXIT_FAILURE;
	}
	
	std::chrono::time_point<std::chrono::system_clock> start, end;
	Mat img;
	const string windowName = "win";

	vector<imageio::ModelLandmark> landmarks;

	cv::namedWindow(windowName);

	SdmLandmarkModel lmModel = SdmLandmarkModel::load(sdmModelFile);
	SdmLandmarkModelFitting modelFitter(lmModel);

	// faceDetectorFilename: e.g. opencv\\sources\\data\\haarcascades\\haarcascade_frontalface_alt2.xml
	cv::CascadeClassifier faceCascade;
	if (!faceCascade.load(faceDetectorFilename.string()))
	{
		cout << "Error loading face detection model." << endl;
		return EXIT_FAILURE;
	}
	
	bool runRigidAlign = true;

	std::ofstream resultsFile("C:\\Users\\Patrik\\Documents\\GitHub\\sdm_lfpw_tr_68lm_10s_5c_RESULTS.txt");
	vector<string> comparisonLandmarks({ "9", "31", "37", "40", "43", "46", "49", "55", "63", "67" });

	while(labeledImageSource->next()) {
		start = std::chrono::system_clock::now();
		appLogger.info("Starting to process " + labeledImageSource->getName().string());
		img = labeledImageSource->getImage();
		Mat landmarksImage = img.clone();

		LandmarkCollection groundtruth = labeledImageSource->getLandmarks();
		vector<shared_ptr<Landmark>> lmv = groundtruth.getLandmarks();
		for (const auto& l : lmv) {
			cv::circle(landmarksImage, l->getPoint2D(), 3, Scalar(255.0f, 0.0f, 0.0f));
		}

		// iBug 68 points. No eye-centers. Calculate them:
		cv::Point2f reye_c = (groundtruth.getLandmark("37")->getPosition2D() + groundtruth.getLandmark("40")->getPosition2D()) / 2.0f;
		cv::Point2f leye_c = (groundtruth.getLandmark("43")->getPosition2D() + groundtruth.getLandmark("46")->getPosition2D()) / 2.0f;
		cv::circle(landmarksImage, reye_c, 3, Scalar(255.0f, 0.0f, 127.0f));
		cv::circle(landmarksImage, leye_c, 3, Scalar(255.0f, 0.0f, 127.0f));
		cv::Scalar interEyeDistance = cv::norm(Vec2f(reye_c), Vec2f(leye_c), cv::NORM_L2);

		Mat imgGray;
		cvtColor(img, imgGray, cv::COLOR_BGR2GRAY);
		vector<cv::Rect> faces;
		float score, notFace = 0.5;
		
		// face detection
		faceCascade.detectMultiScale(img, faces, 1.2, 2, 0, cv::Size(50, 50));
		//faces.push_back({ 172, 199, 278, 278 });
		if (faces.empty()) {
			runRigidAlign = true;
			cv::imshow(windowName, landmarksImage);
			cv::waitKey(5);
			continue;
		}
		for (const auto& f : faces) {
			cv::rectangle(landmarksImage, f, cv::Scalar(0.0f, 0.0f, 255.0f));
		}

		// Check if the face corresponds to the ground-truth:
		Mat gtLmsRowX(1, lmv.size(), CV_32FC1);
		Mat gtLmsRowY(1, lmv.size(), CV_32FC1);
		int idx = 0;
		for (const auto& l : lmv) {
			gtLmsRowX.at<float>(idx) = l->getX();
			gtLmsRowY.at<float>(idx) = l->getY();
			++idx;
		}
		double minWidth, maxWidth, minHeight, maxHeight;
		cv::minMaxIdx(gtLmsRowX, &minWidth, &maxWidth);
		cv::minMaxIdx(gtLmsRowY, &minHeight, &maxHeight);
		float cx = cv::mean(gtLmsRowX)[0];
		float cy = cv::mean(gtLmsRowY)[0] - 30.0f;
		// do this in relation to the IED, not absolute pixel values
		if (std::abs(cx - (faces[0].x+faces[0].width/2.0f)) > 30.0f || std::abs(cy - (faces[0].y+faces[0].height/2.0f)) > 30.0f) {
			//cv::imshow(windowName, landmarksImage);
			//cv::waitKey();
			continue;
		}
		
		Mat modelShape = lmModel.getMeanShape();
		//if (runRigidAlign) {
			modelShape = modelFitter.alignRigid(modelShape, faces[0]);
			//runRigidAlign = false;
		//}
		
	
		// print the mean initialization
		for (int i = 0; i < lmModel.getNumLandmarks(); ++i) {
			cv::circle(landmarksImage, Point2f(modelShape.at<float>(i, 0), modelShape.at<float>(i + lmModel.getNumLandmarks(), 0)), 3, Scalar(255.0f, 0.0f, 255.0f));
		}
		modelShape = modelFitter.optimize(modelShape, imgGray);
		for (int i = 0; i < lmModel.getNumLandmarks(); ++i) {
			cv::circle(landmarksImage, Point2f(modelShape.at<float>(i, 0), modelShape.at<float>(i + lmModel.getNumLandmarks(), 0)), 3, Scalar(0.0f, 255.0f, 0.0f));
		}

		imwrite("C:\\Users\\Patrik\\Documents\\GitHub\\out_sdm_lms\\" + labeledImageSource->getName().filename().string(), landmarksImage);

		resultsFile << "# " << labeledImageSource->getName() << std::endl;
		for (const auto& lmId : comparisonLandmarks) {

			shared_ptr<Landmark> gtlm = groundtruth.getLandmark(lmId); // Todo: Handle case when LM not found
			cv::Point2f gt = gtlm->getPoint2D();
			cv::Point2f det = lmModel.getLandmarkAsPoint(lmId, modelShape);

			float dx = (gt.x - det.x);
			float dy = (gt.y - det.y);
			float diff = std::sqrt(dx*dx + dy*dy);
			diff = diff / interEyeDistance[0]; // normalize by the IED

			resultsFile << diff << " # " << lmId << std::endl;
		}

		
		end = std::chrono::system_clock::now();
		int elapsed_mseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count();
		appLogger.info("Finished processing. Elapsed time: " + lexical_cast<string>(elapsed_mseconds) + "ms.\n");
		
		//cv::imshow(windowName, landmarksImage);
		//cv::waitKey(5);

	}

	resultsFile.close();

	return 0;
}
コード例 #4
0
ファイル: fitter2.cpp プロジェクト: fulrbuaa/FeatureDetection
int main(int argc, char *argv[])
{
	#ifdef WIN32
	//_CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); // dump leaks at return
	//_CrtSetBreakAlloc(287);
	#endif
	
	string verboseLevelConsole;
	path inputFilename;
	path configFilename;
	path inputLandmarks;
	string landmarkType;
	path outputPath(".");

	try {
		po::options_description desc("Allowed options");
		desc.add_options()
			("help,h",
				"produce help message")
			("verbose,v", po::value<string>(&verboseLevelConsole)->implicit_value("DEBUG")->default_value("INFO","show messages with INFO loglevel or below."),
				  "specify the verbosity of the console output: PANIC, ERROR, WARN, INFO, DEBUG or TRACE")
			("config,c", po::value<path>(&configFilename)->required(), 
				"path to a config (.cfg) file")
			("input,i", po::value<path>(&inputFilename)->required(),
				"input filename")
			("landmarks,l", po::value<path>(&inputLandmarks)->required(),
				"input landmarks")
			("landmark-type,t", po::value<string>(&landmarkType)->required(),
				"specify the type of landmarks: ibug")
		;

		po::variables_map vm;
		po::store(po::command_line_parser(argc, argv).options(desc).run(), vm); // style(po::command_line_style::unix_style | po::command_line_style::allow_long_disguise)
		if (vm.count("help")) {
			cout << "Usage: fitter [options]\n";
			cout << desc;
			return EXIT_SUCCESS;
		}
		po::notify(vm);

	}
	catch (po::error& e) {
		cout << "Error while parsing command-line arguments: " << e.what() << endl;
		cout << "Use --help to display a list of options." << endl;
		return EXIT_SUCCESS;
	}

	LogLevel logLevel;
	if(boost::iequals(verboseLevelConsole, "PANIC")) logLevel = LogLevel::Panic;
	else if(boost::iequals(verboseLevelConsole, "ERROR")) logLevel = LogLevel::Error;
	else if(boost::iequals(verboseLevelConsole, "WARN")) logLevel = LogLevel::Warn;
	else if(boost::iequals(verboseLevelConsole, "INFO")) logLevel = LogLevel::Info;
	else if(boost::iequals(verboseLevelConsole, "DEBUG")) logLevel = LogLevel::Debug;
	else if(boost::iequals(verboseLevelConsole, "TRACE")) logLevel = LogLevel::Trace;
	else {
		cout << "Error: Invalid LogLevel." << endl;
		return EXIT_FAILURE;
	}
	
	Loggers->getLogger("morphablemodel").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("render").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("fitter").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Logger appLogger = Loggers->getLogger("fitter");

	appLogger.debug("Verbose level for console output: " + logging::logLevelToString(logLevel));
	appLogger.debug("Using config: " + configFilename.string());

	// Load the image
	shared_ptr<ImageSource> imageSource;
	try {
		imageSource = make_shared<FileImageSource>(inputFilename.string());
	} catch(const std::runtime_error& e) {
		appLogger.error(e.what());
		return EXIT_FAILURE;
	}

	// Load the ground truth
	shared_ptr<LabeledImageSource> labeledImageSource;
	shared_ptr<NamedLandmarkSource> landmarkSource;
	
	shared_ptr<LandmarkFormatParser> landmarkFormatParser;
	if(boost::iequals(landmarkType, "ibug")) {
		landmarkFormatParser = make_shared<IbugLandmarkFormatParser>();
		landmarkSource = make_shared<DefaultNamedLandmarkSource>(vector<path>{inputLandmarks}, landmarkFormatParser);
	} else if (boost::iequals(landmarkType, "did")) {
		landmarkFormatParser = make_shared<DidLandmarkFormatParser>();
		landmarkSource = make_shared<DefaultNamedLandmarkSource>(vector<path>{inputLandmarks}, landmarkFormatParser);
	} else {
		cout << "Error: Invalid ground truth type." << endl;
		return EXIT_FAILURE;
	}
	labeledImageSource = make_shared<NamedLabeledImageSource>(imageSource, landmarkSource);
	
	// Load the config file
	ptree pt;
	try {
		boost::property_tree::info_parser::read_info(configFilename.string(), pt);
	} catch(const boost::property_tree::ptree_error& error) {
		appLogger.error(error.what());
		return EXIT_FAILURE;
	}
	// Load the Morphable Model
	morphablemodel::MorphableModel morphableModel;
	try {
		morphableModel = morphablemodel::MorphableModel::load(pt.get_child("morphableModel"));
	} catch (const boost::property_tree::ptree_error& error) {
		appLogger.error(error.what());
		return EXIT_FAILURE;
	}
	catch (const std::runtime_error& error) {
		appLogger.error(error.what());
		return EXIT_FAILURE;
	}
	
	//const string windowName = "win";

	// Create the output directory if it doesn't exist yet
	if (!boost::filesystem::exists(outputPath)) {
		boost::filesystem::create_directory(outputPath);
	}
	

	std::chrono::time_point<std::chrono::system_clock> start, end;
	Mat img;
	morphablemodel::OpenCVCameraEstimation epnpCameraEstimation(morphableModel); // todo: this can all go to only init once
	morphablemodel::AffineCameraEstimation affineCameraEstimation(morphableModel);
	vector<imageio::ModelLandmark> landmarks;


	labeledImageSource->next();
	start = std::chrono::system_clock::now();
	appLogger.info("Starting to process " + labeledImageSource->getName().string());
	img = labeledImageSource->getImage();

	LandmarkCollection lms = labeledImageSource->getLandmarks();
	vector<shared_ptr<Landmark>> lmsv = lms.getLandmarks();
	landmarks.clear();
	Mat landmarksImage = img.clone(); // blue rect = the used landmarks
	for (const auto& lm : lmsv) {
		lm->draw(landmarksImage);
		//if (lm->getName() == "right.eye.corner_outer" || lm->getName() == "right.eye.corner_inner" || lm->getName() == "left.eye.corner_outer" || lm->getName() == "left.eye.corner_inner" || lm->getName() == "center.nose.tip" || lm->getName() == "right.lips.corner" || lm->getName() == "left.lips.corner") {
		landmarks.emplace_back(imageio::ModelLandmark(lm->getName(), lm->getPosition2D()));
		cv::rectangle(landmarksImage, cv::Point(cvRound(lm->getX() - 2.0f), cvRound(lm->getY() - 2.0f)), cv::Point(cvRound(lm->getX() + 2.0f), cvRound(lm->getY() + 2.0f)), cv::Scalar(255, 0, 0));
		//}
	}

	// Start affine camera estimation (Aldrian paper)
	Mat affineCamLandmarksProjectionImage = landmarksImage.clone(); // the affine LMs are currently not used (don't know how to render without z-vals)
	Mat affineCam = affineCameraEstimation.estimate(landmarks);
	for (const auto& lm : landmarks) {
		Vec3f tmp = morphableModel.getShapeModel().getMeanAtPoint(lm.getName());
		Mat p(4, 1, CV_32FC1);
		p.at<float>(0, 0) = tmp[0];
		p.at<float>(1, 0) = tmp[1];
		p.at<float>(2, 0) = tmp[2];
		p.at<float>(3, 0) = 1;
		Mat p2d = affineCam * p;
		Point2f pp(p2d.at<float>(0, 0), p2d.at<float>(1, 0)); // Todo: check
		cv::circle(affineCamLandmarksProjectionImage, pp, 4.0f, Scalar(0.0f, 255.0f, 0.0f));
	}
	// End Affine est.

	// Estimate the shape coefficients
	vector<float> fittedCoeffs;
	{
		// $\hat{V} \in R^{3N\times m-1}$, subselect the rows of the eigenvector matrix $V$ associated with the $N$ feature points
		// And we insert a row of zeros after every third row, resulting in matrix $\hat{V}_h \in R^{4N\times m-1}$:
		Mat V_hat_h = Mat::zeros(4 * landmarks.size(), morphableModel.getShapeModel().getNumberOfPrincipalComponents(), CV_32FC1);
		int rowIndex = 0;
		for (const auto& lm : landmarks) {
			Mat basisRows = morphableModel.getShapeModel().getPcaBasis(lm.getName()); // getPcaBasis should return the not-normalized basis I think
			basisRows.copyTo(V_hat_h.rowRange(rowIndex, rowIndex + 3));
			rowIndex += 4; // replace 3 rows and skip the 4th one, it has all zeros
		}
		// Form a block diagonal matrix $P \in R^{3N\times 4N}$ in which the camera matrix C (P_Affine, affineCam) is placed on the diagonal:
		Mat P = Mat::zeros(3 * landmarks.size(), 4 * landmarks.size(), CV_32FC1);
		for (int i = 0; i < landmarks.size(); ++i) {
			Mat submatrixToReplace = P.colRange(4 * i, (4 * i) + 4).rowRange(3 * i, (3 * i) + 3);
			affineCam.copyTo(submatrixToReplace);
		}
		// The variances: We set the 3D and 2D variances to one static value for now. $sigma^2_2D = sqrt(1) + sqrt(3)^2 = 4$
		float sigma_2D = std::sqrt(4);
		Mat Sigma = Mat::zeros(3 * landmarks.size(), 3 * landmarks.size(), CV_32FC1);
		for (int i = 0; i < 3 * landmarks.size(); ++i) {
			Sigma.at<float>(i, i) = 1.0f / sigma_2D;
		}
		Mat Omega = Sigma.t() * Sigma;
		// The landmarks in matrix notation (in homogeneous coordinates), $3N\times 1$
		Mat y = Mat::ones(3 * landmarks.size(), 1, CV_32FC1);
		for (int i = 0; i < landmarks.size(); ++i) {
			y.at<float>(3 * i, 0) = landmarks[i].getX();
			y.at<float>((3 * i) + 1, 0) = landmarks[i].getY();
			// the position (3*i)+2 stays 1 (homogeneous coordinate)
		}
		// The mean, with an added homogeneous coordinate (x_1, y_1, z_1, 1, x_2, ...)^t
		Mat v_bar = Mat::ones(4 * landmarks.size(), 1, CV_32FC1);
		for (int i = 0; i < landmarks.size(); ++i) {
			Vec3f modelMean = morphableModel.getShapeModel().getMeanAtPoint(landmarks[i].getName());
			v_bar.at<float>(4 * i, 0) = modelMean[0];
			v_bar.at<float>((4 * i) + 1, 0) = modelMean[1];
			v_bar.at<float>((4 * i) + 2, 0) = modelMean[2];
			// the position (4*i)+3 stays 1 (homogeneous coordinate)
		}

		// Bring into standard regularised quadratic form with diagonal distance matrix Omega
		Mat A = P * V_hat_h;
		Mat b = P * v_bar - y;
		//Mat c_s; // The x, we solve for this! (the variance-normalized shape parameter vector, $c_s = [a_1/sigma_{s,1} , ..., a_m-1/sigma_{s,m-1}]^t$
		float lambda = 0.1f; // lambdaIn; //0.01f; // The weight of the regularisation
		int numShapePc = morphableModel.getShapeModel().getNumberOfPrincipalComponents();
		Mat AtOmegaA = A.t() * Omega * A;
		Mat AtOmegaAReg = AtOmegaA + lambda * Mat::eye(numShapePc, numShapePc, CV_32FC1);
		Mat AtOmegaARegInv = AtOmegaAReg.inv(/*cv::DECOMP_SVD*/); // check if invertible. Use Eigen? Regularisation? (see SDM). Maybe we need pseudo-inverse.
		Mat AtOmegatb = A.t() * Omega.t() * b;
		Mat c_s = -AtOmegaARegInv * AtOmegatb;
		fittedCoeffs = vector<float>(c_s);
	}
	

	// End estimate the shape coefficients

	//std::shared_ptr<render::Mesh> meanMesh = std::make_shared<render::Mesh>(morphableModel.getMean());
	//render::Mesh::writeObj(*meanMesh.get(), "C:\\Users\\Patrik\\Documents\\GitHub\\mean.obj");

	//const float aspect = (float)img.cols / (float)img.rows; // 640/480
	//render::Camera camera(Vec3f(0.0f, 0.0f, 0.0f), /*horizontalAngle*/0.0f*(CV_PI / 180.0f), /*verticalAngle*/0.0f*(CV_PI / 180.0f), render::Frustum(-1.0f*aspect, 1.0f*aspect, -1.0f, 1.0f, /*zNear*/-0.1f, /*zFar*/-100.0f));
	/*render::SoftwareRenderer r(img.cols, img.rows, camera); // 640, 480
	r.perspectiveDivision = render::SoftwareRenderer::PerspectiveDivision::None;
	r.doClippingInNDC = false;
	r.directToScreenTransform = true;
	r.doWindowTransform = false;
	r.setObjectToScreenTransform(shapemodels::AffineCameraEstimation::calculateFullMatrix(affineCam));
	r.draw(meshToDraw, nullptr);
	Mat buff = r.getImage();*/

	/*
	std::ofstream myfile;
	path coeffsFilename = outputPath / labeledImageSource->getName().stem();
	myfile.open(coeffsFilename.string() + ".txt");
	for (int i = 0; i < fittedCoeffs.size(); ++i) {
	myfile << fittedCoeffs[i] * std::sqrt(morphableModel.getShapeModel().getEigenvalue(i)) << std::endl;

	}
	myfile.close();
	*/

	//std::shared_ptr<render::Mesh> meshToDraw = std::make_shared<render::Mesh>(morphableModel.drawSample(fittedCoeffs, vector<float>(morphableModel.getColorModel().getNumberOfPrincipalComponents(), 0.0f)));
	//render::Mesh::writeObj(*meshToDraw.get(), "C:\\Users\\Patrik\\Documents\\GitHub\\fittedMesh.obj");

	//r.resetBuffers();
	//r.draw(meshToDraw, nullptr);
	// TODO: REPROJECT THE POINTS FROM THE C_S MODEL HERE AND SEE IF THE LMS REALLY GO FURTHER OUT OR JUST THE REST OF THE MESH

	//cv::imshow(windowName, img);
	//cv::waitKey(5);


	end = std::chrono::system_clock::now();
	int elapsed_mseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
	appLogger.info("Finished processing. Elapsed time: " + lexical_cast<string>(elapsed_mseconds)+"ms.\n");


	return 0;
}
コード例 #5
0
void Benchmark::run(
		shared_ptr<LabeledImageSource> source, shared_ptr<FeatureExtractor> extractor, shared_ptr<TrainableProbabilisticClassifier> classifier,
		float confidenceThreshold, size_t negatives, size_t initialNegatives, ostream& frameOut, ostream& resultOut) const {

	uint32_t frameCount = 0;
	uint64_t extractedPatchCountSum = 0; // total amount of extracted patches
	uint64_t classifiedPatchCountSum = 0; // total amount of classified patches
	uint64_t negativePatchCountSum = 0; // amount of negative patches of all frames except the first ones (where classifier is not usable)
	uint32_t positivePatchCountSum = 0;
	uint64_t updateTimeSum = 0;
	uint64_t extractTimeSum = 0;
	uint64_t falseRejectionsSum = 0;
	uint64_t falseAcceptancesSum = 0;
	uint64_t classifyTimeSum = 0;
	double locationAccuracySum = 0;
	float worstLocationAccuracy = 1;
	uint32_t positiveTrainingCountSum = 0;
	uint32_t negativeTrainingCountSum = 0;
	uint64_t trainingTimeSum = 0;
	cv::Size patchSize;

	bool initialized = false;
	float aspectRatio = 1;
	while (source->next()) {
		Mat image = source->getImage();
		const LandmarkCollection landmarks = source->getLandmarks();
		if (landmarks.isEmpty())
			continue;
		const shared_ptr<Landmark> landmark = landmarks.getLandmark();
		bool hasGroundTruth = landmark->isVisible();
		if (!hasGroundTruth && !initialized)
			continue;

		Rect_<float> groundTruth;
		if (hasGroundTruth) {
			groundTruth = landmark->getRect();
			aspectRatio = groundTruth.width / groundTruth.height;
			if (!initialized) {
				DirectPyramidFeatureExtractor* pyramidExtractor = dynamic_cast<DirectPyramidFeatureExtractor*>(extractor.get());
				if (pyramidExtractor != nullptr) {
					patchSize = pyramidExtractor->getPatchSize();
					double dimension = patchSize.width * patchSize.height;
					double patchHeight = sqrt(dimension / aspectRatio);
					double patchWidth = aspectRatio * patchHeight;
					pyramidExtractor->setPatchSize(cvRound(patchWidth), cvRound(patchHeight));
				}
				initialized = true;
			}
		}

		steady_clock::time_point extractionStart = steady_clock::now();
		extractor->update(image);
		steady_clock::time_point extractionBetween = steady_clock::now();
		shared_ptr<Patch> positivePatch;
		if (hasGroundTruth) {
			positivePatch = extractor->extract(
					cvRound(groundTruth.x + 0.5f * groundTruth.width),
					cvRound(groundTruth.y + 0.5f * groundTruth.height),
					cvRound(groundTruth.width),
					cvRound(groundTruth.height));
			if (!positivePatch)
				continue;
		}
		vector<shared_ptr<Patch>> negativePatches;
		vector<shared_ptr<Patch>> neutralPatches;
		int patchCount = hasGroundTruth ? 1 : 0;
		for (float size = sizeMin; size <= sizeMax; size *= sizeScale) {
			float realHeight = size * image.rows;
			float realWidth = aspectRatio * realHeight;
			int width = cvRound(realWidth);
			int height = cvRound(realHeight);
			int minX = width / 2;
			int minY = height / 2;
			int maxX = image.cols - width + width / 2;
			int maxY = image.rows - height + height / 2;
			float stepX = std::max(1.f, step * realWidth);
			float stepY = std::max(1.f, step * realHeight);
			for (float x = minX; cvRound(x) < maxX; x += stepX) {
				for (float y = minY; cvRound(y) < maxY; y += stepY) {
					shared_ptr<Patch> patch = extractor->extract(cvRound(x), cvRound(y), width, height);
					if (patch) {
						patchCount++;
						if (!hasGroundTruth || computeOverlap(groundTruth, patch->getBounds()) <= allowedOverlap)
							negativePatches.push_back(patch);
						else
							neutralPatches.push_back(patch);
					}
				}
			}
		}
		steady_clock::time_point extractionEnd = steady_clock::now();
		milliseconds updateDuration = duration_cast<milliseconds>(extractionBetween - extractionStart);
		milliseconds extractDuration = duration_cast<milliseconds>(extractionEnd - extractionBetween);

		frameOut << source->getName().filename().generic_string() << ": " << updateDuration.count() << "ms " << patchCount << " " << extractDuration.count() << "ms";
		frameCount++;
		extractedPatchCountSum += patchCount;
		updateTimeSum += updateDuration.count();
		extractTimeSum += extractDuration.count();

		if (classifier->isUsable()) {
			classifiedPatchCountSum += patchCount;
			if (hasGroundTruth)
				positivePatchCountSum++; // amount needed for false acceptance rate, therefore only positive patches that are tested
			negativePatchCountSum += negativePatches.size(); // amount needed for false acceptance rate, therefore only negative patches that are tested

			steady_clock::time_point classificationStart = steady_clock::now();
			// positive patch
			shared_ptr<ClassifiedPatch> classifiedPositivePatch;
			if (hasGroundTruth)
				classifiedPositivePatch = make_shared<ClassifiedPatch>(positivePatch, classifier->classify(positivePatch->getData()));
			// negative patches
			vector<shared_ptr<ClassifiedPatch>> classifiedNegativePatches;
			classifiedNegativePatches.reserve(negativePatches.size());
			for (const shared_ptr<Patch>& patch : negativePatches)
				classifiedNegativePatches.push_back(make_shared<ClassifiedPatch>(patch, classifier->classify(patch->getData())));
			// neutral patches
			vector<shared_ptr<ClassifiedPatch>> classifiedNeutralPatches;
			classifiedNeutralPatches.reserve(neutralPatches.size());
			for (const shared_ptr<Patch>& patch : neutralPatches)
				classifiedNeutralPatches.push_back(make_shared<ClassifiedPatch>(patch, classifier->classify(patch->getData())));
			steady_clock::time_point classificationEnd = steady_clock::now();
			milliseconds classificationDuration = duration_cast<milliseconds>(classificationEnd - classificationStart);

			int falseRejections = !hasGroundTruth || classifiedPositivePatch->isPositive() ? 0 : 1;
			int falseAcceptances = std::count_if(classifiedNegativePatches.begin(), classifiedNegativePatches.end(), [](shared_ptr<ClassifiedPatch>& patch) {
				return patch->isPositive();
			});

			frameOut << " " << falseRejections << "/" << (hasGroundTruth ? "1 " : "0 ") << falseAcceptances << "/" << negativePatches.size() << " " << classificationDuration.count() << "ms";
			falseRejectionsSum += falseRejections;
			falseAcceptancesSum += falseAcceptances;
			classifyTimeSum += classificationDuration.count();

			if (hasGroundTruth) {
				shared_ptr<ClassifiedPatch> bestNegativePatch = *std::max_element(classifiedNegativePatches.begin(), classifiedNegativePatches.end(), [](shared_ptr<ClassifiedPatch>& a, shared_ptr<ClassifiedPatch>& b) {
					return a->getProbability() > b->getProbability();
				});
				shared_ptr<ClassifiedPatch> bestNeutralPatch = *std::max_element(classifiedNeutralPatches.begin(), classifiedNeutralPatches.end(), [](shared_ptr<ClassifiedPatch>& a, shared_ptr<ClassifiedPatch>& b) {
					return a->getProbability() > b->getProbability();
				});
				float overlap = 0;
				if (classifiedPositivePatch->getProbability() >= bestNeutralPatch->getProbability() && classifiedPositivePatch->getProbability() >= bestNegativePatch->getProbability())
					overlap = computeOverlap(groundTruth, classifiedPositivePatch->getPatch()->getBounds());
				else if (bestNeutralPatch->getProbability() >= bestNegativePatch->getProbability())
					overlap = computeOverlap(groundTruth, bestNeutralPatch->getPatch()->getBounds());
				else
					overlap = computeOverlap(groundTruth, bestNegativePatch->getPatch()->getBounds());

				frameOut << " " << cvRound(100 * overlap) << "%";
				locationAccuracySum += overlap;
				worstLocationAccuracy = std::min(worstLocationAccuracy, overlap);
			}

			vector<shared_ptr<ClassifiedPatch>> negativeTrainingCandidates;
			for (shared_ptr<ClassifiedPatch>& patch : classifiedNegativePatches) {
				if (patch->isPositive() || patch->getProbability() > 1 - confidenceThreshold)
					negativeTrainingCandidates.push_back(patch);
			}
			if (negativeTrainingCandidates.size() > negatives) {
				std::partial_sort(negativeTrainingCandidates.begin(), negativeTrainingCandidates.begin() + negatives, negativeTrainingCandidates.end(), [](shared_ptr<ClassifiedPatch>& a, shared_ptr<ClassifiedPatch>& b) {
					return a->getProbability() > b->getProbability();
				});
				negativeTrainingCandidates.resize(negatives);
			}
			vector<Mat> negativeTrainingExamples;
			negativeTrainingExamples.reserve(negativeTrainingCandidates.size());
			for (const shared_ptr<ClassifiedPatch>& patch : negativeTrainingCandidates)
				negativeTrainingExamples.push_back(patch->getPatch()->getData());
			vector<Mat> positiveTrainingExamples;
			if (hasGroundTruth && (!classifiedPositivePatch->isPositive() || classifiedPositivePatch->getProbability() < confidenceThreshold))
				positiveTrainingExamples.push_back(classifiedPositivePatch->getPatch()->getData());

			steady_clock::time_point trainingStart = steady_clock::now();
			classifier->retrain(positiveTrainingExamples, negativeTrainingExamples);
			steady_clock::time_point trainingEnd = steady_clock::now();
			milliseconds trainingDuration = duration_cast<milliseconds>(trainingEnd - trainingStart);

			frameOut << " " << positiveTrainingExamples.size() << "+" << negativeTrainingExamples.size() << " " << trainingDuration.count() << "ms";
			positiveTrainingCountSum += positiveTrainingExamples.size();
			negativeTrainingCountSum += negativeTrainingExamples.size();
			trainingTimeSum += trainingDuration.count();
		} else {
			int step = negativePatches.size() / initialNegatives;
			int first = step / 2;
			vector<Mat> negativeTrainingExamples;
			negativeTrainingExamples.reserve(initialNegatives);
			for (size_t i = first; i < negativePatches.size(); i += step)
				negativeTrainingExamples.push_back(negativePatches[i]->getData());
			vector<Mat> positiveTrainingExamples;
			if (hasGroundTruth)
				positiveTrainingExamples.push_back(positivePatch->getData());

			steady_clock::time_point trainingStart = steady_clock::now();
			classifier->retrain(positiveTrainingExamples, negativeTrainingExamples);
			steady_clock::time_point trainingEnd = steady_clock::now();
			milliseconds trainingDuration = duration_cast<milliseconds>(trainingEnd - trainingStart);

			frameOut << " " << positiveTrainingExamples.size() << "+" << negativeTrainingExamples.size() << " " << trainingDuration.count() << "ms";
			positiveTrainingCountSum += positiveTrainingExamples.size();
			negativeTrainingCountSum += negativeTrainingExamples.size();
			trainingTimeSum += trainingDuration.count();
		}
		frameOut << endl;
	}

	DirectPyramidFeatureExtractor* pyramidExtractor = dynamic_cast<DirectPyramidFeatureExtractor*>(extractor.get());
	if (pyramidExtractor != nullptr) {
		pyramidExtractor->setPatchSize(patchSize.width, patchSize.height);
	}

	if (frameCount == 0) {
		frameOut << "no valid frames" << endl;
		resultOut << "no valid frames" << endl;
	} else if (frameCount == 1 || positivePatchCountSum < 1) {
		frameOut << "too few valid frames" << endl;
		resultOut << "too few valid frames" << endl;
	} else if (extractedPatchCountSum == 0) {
		frameOut << "no valid patches" << endl;
		resultOut << "no valid patches" << endl;
	} else if (negativePatchCountSum == 0) {
		frameOut << "no valid negative patches" << endl;
		resultOut << "no valid negative patches" << endl;
	} else {
		frameOut << frameCount << " " << extractedPatchCountSum << " " << classifiedPatchCountSum << " " << negativePatchCountSum << " " << positivePatchCountSum << " "
				<< updateTimeSum << "ms " << extractTimeSum << "ms " << falseRejectionsSum << " " << falseAcceptancesSum << " "
				<< classifyTimeSum << "ms " << cvRound(100 * locationAccuracySum / positivePatchCountSum) << "% " << cvRound(100 * worstLocationAccuracy) << "% "
				<< positiveTrainingCountSum << " " << negativeTrainingCountSum << " " << trainingTimeSum << "ms" << endl;

		float falseRejectionRate = static_cast<float>(falseRejectionsSum) / static_cast<float>(frameCount - 1);
		float falseAcceptanceRate = static_cast<float>(falseRejectionsSum) / static_cast<float>(negativePatchCountSum);
		float avgPositiveTrainingCount = static_cast<float>(positiveTrainingCountSum) / static_cast<float>(frameCount);
		float avgNegativeTrainingCount = static_cast<float>(negativeTrainingCountSum) / static_cast<float>(frameCount);
		uint64_t normalizedExtractionTime = updateTimeSum / frameCount + (1000 * extractTimeSum) / extractedPatchCountSum;
		uint64_t normalizedClassificationTime = (1000 * classifyTimeSum) / classifiedPatchCountSum;
		uint64_t normalizedTrainingTime = trainingTimeSum / frameCount;
		uint64_t normalizedFrameTime = normalizedExtractionTime + normalizedClassificationTime + normalizedTrainingTime;
		resultOut.setf(std::ios::fixed);
		resultOut.precision(2);
		resultOut << falseRejectionRate << " " << falseAcceptanceRate << " "
				<< cvRound(100 * locationAccuracySum / positivePatchCountSum) << "% " << cvRound(100 * worstLocationAccuracy) << "% "
				<< avgPositiveTrainingCount << " " << avgNegativeTrainingCount << " "
				<< normalizedFrameTime << "ms (" << normalizedExtractionTime << "ms " << normalizedClassificationTime << "ms " << normalizedTrainingTime << "ms)" << endl;
	}
}
コード例 #6
0
int main(int argc, char *argv[])
{
	#ifdef WIN32
	//_CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); // dump leaks at return
	//_CrtSetBreakAlloc(287);
	#endif
	
	string verboseLevelConsole;
	bool useFileList = false;
	bool useImgs = false;
	bool useDirectory = false;
	bool useLandmarkFiles = false;
	vector<path> inputPaths;
	path inputFilelist;
	path inputDirectory;
	vector<path> inputFilenames;
	shared_ptr<ImageSource> imageSource;
	path landmarksDir; // TODO: Make more dynamic wrt landmark format. a) What about the loading-flags (1_Per_Folder etc) we have? b) Expose those flags to cmdline? c) Make a LmSourceLoader and he knows about a LM_TYPE (each corresponds to a Parser/Loader class?)
	string landmarkType;
	int forwardDelay;

	try {
		po::options_description desc("Allowed options");
		desc.add_options()
			("help,h",
				"produce help message")
			("verbose,v", po::value<string>(&verboseLevelConsole)->implicit_value("DEBUG")->default_value("INFO","show messages with INFO loglevel or below."),
				  "specify the verbosity of the console output: PANIC, ERROR, WARN, INFO, DEBUG or TRACE")
			("input,i", po::value<vector<path>>(&inputPaths)->required(), 
				"input from one or more files, a directory, or a  .lst/.txt-file containing a list of images")
			("landmarks,l", po::value<path>(&landmarksDir), 
				"load landmark files from the given folder")
			("landmark-type,t", po::value<string>(&landmarkType), 
				"specify the type of landmarks to load: ibug")
			("forward-delay,d", po::value<int>(&forwardDelay)->default_value(0)->implicit_value(1000),
				"Automatically show the next image after the given time in ms. If the option is omitted or zero, the application will wait for a keypress before showing the next image.")
		;

		po::positional_options_description p;
		p.add("input", -1);

		po::variables_map vm;
		po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
		po::notify(vm);

		if (vm.count("help")) {
			cout << "Usage: landmarkVisualiser [options]\n";
			cout << desc;
			return EXIT_SUCCESS;
		}
		if (vm.count("landmarks")) {
			useLandmarkFiles = true;
			if (!vm.count("landmark-type")) {
				cout << "You have specified to use landmark files. Please also specify the type of the landmarks to load via --landmark-type or -t." << endl;
				return EXIT_SUCCESS;
			}
		}

	} catch(std::exception& e) {
		cout << e.what() << endl;
		return EXIT_FAILURE;
	}

	loglevel logLevel;
	if(boost::iequals(verboseLevelConsole, "PANIC")) logLevel = loglevel::PANIC;
	else if(boost::iequals(verboseLevelConsole, "ERROR")) logLevel = loglevel::ERROR;
	else if(boost::iequals(verboseLevelConsole, "WARN")) logLevel = loglevel::WARN;
	else if(boost::iequals(verboseLevelConsole, "INFO")) logLevel = loglevel::INFO;
	else if(boost::iequals(verboseLevelConsole, "DEBUG")) logLevel = loglevel::DEBUG;
	else if(boost::iequals(verboseLevelConsole, "TRACE")) logLevel = loglevel::TRACE;
	else {
		cout << "Error: Invalid loglevel." << endl;
		return EXIT_FAILURE;
	}
	
	Loggers->getLogger("imageio").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("landmarkVisualiser").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Logger appLogger = Loggers->getLogger("landmarkVisualiser");

	appLogger.debug("Verbose level for console output: " + logging::loglevelToString(logLevel));

	if (inputPaths.size() > 1) {
		// We assume the user has given several, valid images
		useImgs = true;
		inputFilenames = inputPaths;
	} else if (inputPaths.size() == 1) {
		// We assume the user has given either an image, directory, or a .lst-file
		if (inputPaths[0].extension().string() == ".lst" || inputPaths[0].extension().string() == ".txt") { // check for .lst or .txt first
			useFileList = true;
			inputFilelist = inputPaths.front();
		} else if (boost::filesystem::is_directory(inputPaths[0])) { // check if it's a directory
			useDirectory = true;
			inputDirectory = inputPaths.front();
		} else { // it must be an image
			useImgs = true;
			inputFilenames = inputPaths;
		}
	} else {
		appLogger.error("Please either specify one or several files, a directory, or a .lst-file containing a list of images to run the program!");
		return EXIT_FAILURE;
	}

	if (useFileList==true) {
		appLogger.info("Using file-list as input: " + inputFilelist.string());
		shared_ptr<ImageSource> fileListImgSrc; // TODO VS2013 change to unique_ptr, rest below also
		try {
			fileListImgSrc = make_shared<FileListImageSource>(inputFilelist.string(), "C:\\Users\\Patrik\\Documents\\GitHub\\data\\fddb\\originalPics\\", ".jpg");
		} catch(const std::runtime_error& e) {
			appLogger.error(e.what());
			return EXIT_FAILURE;
		}
		imageSource = fileListImgSrc;
	}
	if (useImgs==true) {
		//imageSource = make_shared<FileImageSource>(inputFilenames);
		//imageSource = make_shared<RepeatingFileImageSource>("C:\\Users\\Patrik\\GitHub\\data\\firstrun\\ws_8.png");
		appLogger.info("Using input images: ");
		vector<string> inputFilenamesStrings;	// Hack until we use vector<path> (?)
		for (const auto& fn : inputFilenames) {
			appLogger.info(fn.string());
			inputFilenamesStrings.push_back(fn.string());
		}
		shared_ptr<ImageSource> fileImgSrc;
		try {
			fileImgSrc = make_shared<FileImageSource>(inputFilenamesStrings);
		} catch(const std::runtime_error& e) {
			appLogger.error(e.what());
			return EXIT_FAILURE;
		}
		imageSource = fileImgSrc;
	}
	if (useDirectory==true) {
		appLogger.info("Using input images from directory: " + inputDirectory.string());
		try {
			imageSource = make_shared<DirectoryImageSource>(inputDirectory.string());
		} catch(const std::runtime_error& e) {
			appLogger.error(e.what());
			return EXIT_FAILURE;
		}
	}
	// Load the ground truth
	// Either a) use if/else for imageSource or labeledImageSource, or b) use an EmptyLandmarkSoure
	shared_ptr<LabeledImageSource> labeledImageSource;
	shared_ptr<NamedLandmarkSource> landmarkSource;
	if (useLandmarkFiles) {
		vector<path> groundtruthDirs; groundtruthDirs.push_back(landmarksDir); // Todo: Make cmdline use a vector<path>
		shared_ptr<LandmarkFormatParser> landmarkFormatParser;
		if(boost::iequals(landmarkType, "lst")) {
			//landmarkFormatParser = make_shared<LstLandmarkFormatParser>();
			//landmarkSource = make_shared<DefaultNamedLandmarkSource>(LandmarkFileGatherer::gather(imageSource, string(), GatherMethod::SEPARATE_FILES, groundtruthDirs), landmarkFormatParser);
		} else if(boost::iequals(landmarkType, "ibug")) {
			landmarkFormatParser = make_shared<IbugLandmarkFormatParser>();
			landmarkSource = make_shared<DefaultNamedLandmarkSource>(LandmarkFileGatherer::gather(imageSource, ".pts", GatherMethod::ONE_FILE_PER_IMAGE_SAME_DIR, groundtruthDirs), landmarkFormatParser);
		} else {
			cout << "Error: Invalid ground truth type." << endl;
			return EXIT_FAILURE;
		}
	} else {
		landmarkSource = make_shared<EmptyLandmarkSource>();
	}
	labeledImageSource = make_shared<NamedLabeledImageSource>(imageSource, landmarkSource);
	
	std::chrono::time_point<std::chrono::system_clock> start, end;
	Mat img;
	const string windowName = "win";

	vector<imageio::ModelLandmark> landmarks;

	cv::namedWindow(windowName);
	
	while(labeledImageSource->next()) {
		start = std::chrono::system_clock::now();
		appLogger.info("Starting to process " + labeledImageSource->getName().string());
		img = labeledImageSource->getImage();
		
		LandmarkCollection lms = labeledImageSource->getLandmarks();
		vector<shared_ptr<Landmark>> lmsv = lms.getLandmarks();

		for (const auto& lm : lmsv) {
			lm->draw(img);
			//cv::rectangle(landmarksImage, cv::Point(cvRound(lm->getX() - 2.0f), cvRound(lm->getY() - 2.0f)), cv::Point(cvRound(lm->getX() + 2.0f), cvRound(lm->getY() + 2.0f)), cv::Scalar(255, 0, 0));
		}

		cv::imshow(windowName, img);
		cv::waitKey(forwardDelay);
		
		end = std::chrono::system_clock::now();
		int elapsed_mseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count();
		appLogger.info("Finished processing. Elapsed time: " + lexical_cast<string>(elapsed_mseconds) + "ms.\n");

	}

	return 0;
}
コード例 #7
0
int main(int argc, char *argv[])
{
	#ifdef WIN32
	//_CrtSetDbgFlag ( _CRTDBG_ALLOC_MEM_DF | _CRTDBG_LEAK_CHECK_DF ); // dump leaks at return
	//_CrtSetBreakAlloc(287);
	#endif
	
	string verboseLevelConsole;
	path inputFilename;
	path configFilename;
	path inputLandmarks;
	string landmarkType;
	path landmarkMappings;
	path outputPath;

	try {
		po::options_description desc("Allowed options");
		desc.add_options()
			("help,h",
				"produce help message")
			("verbose,v", po::value<string>(&verboseLevelConsole)->implicit_value("DEBUG")->default_value("INFO","show messages with INFO loglevel or below."),
				  "specify the verbosity of the console output: PANIC, ERROR, WARN, INFO, DEBUG or TRACE")
			("config,c", po::value<path>(&configFilename)->required(), 
				"path to a config (.cfg) file")
			("input,i", po::value<path>(&inputFilename)->required(),
				"input filename")
			("landmarks,l", po::value<path>(&inputLandmarks)->required(),
				"input landmarks")
			("landmark-type,t", po::value<string>(&landmarkType)->required(),
				"specify the type of landmarks: ibug")
			("landmark-mappings,m", po::value<path>(&landmarkMappings)->required(),
				"a mapping-file that maps from the input landmarks to landmark identifiers in the model's format")
			("output,o", po::value<path>(&outputPath)->default_value("."),
				"path to an output folder")
		;

		po::variables_map vm;
		po::store(po::command_line_parser(argc, argv).options(desc).run(), vm); // style(po::command_line_style::unix_style | po::command_line_style::allow_long_disguise)
		if (vm.count("help")) {
			cout << "Usage: fitter [options]\n";
			cout << desc;
			return EXIT_SUCCESS;
		}
		po::notify(vm);

	}
	catch (po::error& e) {
		cout << "Error while parsing command-line arguments: " << e.what() << endl;
		cout << "Use --help to display a list of options." << endl;
		return EXIT_SUCCESS;
	}

	LogLevel logLevel;
	if(boost::iequals(verboseLevelConsole, "PANIC")) logLevel = LogLevel::Panic;
	else if(boost::iequals(verboseLevelConsole, "ERROR")) logLevel = LogLevel::Error;
	else if(boost::iequals(verboseLevelConsole, "WARN")) logLevel = LogLevel::Warn;
	else if(boost::iequals(verboseLevelConsole, "INFO")) logLevel = LogLevel::Info;
	else if(boost::iequals(verboseLevelConsole, "DEBUG")) logLevel = LogLevel::Debug;
	else if(boost::iequals(verboseLevelConsole, "TRACE")) logLevel = LogLevel::Trace;
	else {
		cout << "Error: Invalid LogLevel." << endl;
		return EXIT_FAILURE;
	}
	
	Loggers->getLogger("imageio").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("morphablemodel").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("render").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Loggers->getLogger("fitter").addAppender(make_shared<logging::ConsoleAppender>(logLevel));
	Logger appLogger = Loggers->getLogger("fitter");

	appLogger.debug("Verbose level for console output: " + logging::logLevelToString(logLevel));
	appLogger.debug("Using config: " + configFilename.string());

	// Load the image
	shared_ptr<ImageSource> imageSource;
	try {
		imageSource = make_shared<FileImageSource>(inputFilename.string());
	} catch(const std::runtime_error& e) {
		appLogger.error(e.what());
		return EXIT_FAILURE;
	}

	// Load the ground truth
	shared_ptr<LabeledImageSource> labeledImageSource;
	shared_ptr<NamedLandmarkSource> landmarkSource;
	
	shared_ptr<LandmarkFormatParser> landmarkFormatParser;
	if(boost::iequals(landmarkType, "ibug")) {
		landmarkFormatParser = make_shared<IbugLandmarkFormatParser>();
		landmarkSource = make_shared<DefaultNamedLandmarkSource>(vector<path>{inputLandmarks}, landmarkFormatParser);
	} else if (boost::iequals(landmarkType, "did")) {
		landmarkFormatParser = make_shared<DidLandmarkFormatParser>();
		landmarkSource = make_shared<DefaultNamedLandmarkSource>(vector<path>{inputLandmarks}, landmarkFormatParser);
	} else {
		cout << "Error: Invalid ground truth type." << endl;
		return EXIT_FAILURE;
	}
	labeledImageSource = make_shared<NamedLabeledImageSource>(imageSource, landmarkSource);
	
	// Read the config file
	ptree config;
	try {
		boost::property_tree::info_parser::read_info(configFilename.string(), config);
	} catch(const boost::property_tree::ptree_error& error) {
		appLogger.error(error.what());
		return EXIT_FAILURE;
	}
	// Load the Morphable Model
	morphablemodel::MorphableModel morphableModel;
	try {
		morphableModel = morphablemodel::MorphableModel::load(config.get_child("morphableModel"));
	} catch (const boost::property_tree::ptree_error& error) {
		appLogger.error(error.what());
		return EXIT_FAILURE;
	}
	catch (const std::runtime_error& error) {
		appLogger.error(error.what());
		return EXIT_FAILURE;
	}

	// Create the output directory if it doesn't exist yet
	if (!boost::filesystem::exists(outputPath)) {
		boost::filesystem::create_directory(outputPath);
	}
	
	std::chrono::time_point<std::chrono::system_clock> start, end;
	Mat img;
	vector<imageio::ModelLandmark> landmarks;
	float lambda = 15.0f;

	LandmarkMapper landmarkMapper(landmarkMappings);

	labeledImageSource->next();
	start = std::chrono::system_clock::now();
	appLogger.info("Starting to process " + labeledImageSource->getName().string());
	img = labeledImageSource->getImage();

	LandmarkCollection lms = labeledImageSource->getLandmarks();
	LandmarkCollection didLms = landmarkMapper.convert(lms);
	landmarks.clear();
	Mat landmarksImage = img.clone(); // blue rect = the used landmarks
	for (const auto& lm : didLms.getLandmarks()) {
		lm->draw(landmarksImage);
		landmarks.emplace_back(imageio::ModelLandmark(lm->getName(), lm->getPosition2D()));
		cv::rectangle(landmarksImage, cv::Point(cvRound(lm->getX() - 2.0f), cvRound(lm->getY() - 2.0f)), cv::Point(cvRound(lm->getX() + 2.0f), cvRound(lm->getY() + 2.0f)), cv::Scalar(255, 0, 0));
	}

	// Start affine camera estimation (Aldrian paper)
	Mat affineCamLandmarksProjectionImage = landmarksImage.clone(); // the affine LMs are currently not used (don't know how to render without z-vals)
	
	// Convert the landmarks to clip-space
	vector<imageio::ModelLandmark> landmarksClipSpace;
	for (const auto& lm : landmarks) {
		cv::Vec2f clipCoords = render::utils::screenToClipSpace(lm.getPosition2D(), img.cols, img.rows);
		imageio::ModelLandmark lmcs(lm.getName(), Vec3f(clipCoords[0], clipCoords[1], 0.0f), lm.isVisible());
		landmarksClipSpace.push_back(lmcs);
	}
	
	Mat affineCam = fitting::estimateAffineCamera(landmarksClipSpace, morphableModel);

	// Render the mean-face landmarks projected using the estimated camera:
	for (const auto& lm : landmarks) {
		Vec3f modelPoint = morphableModel.getShapeModel().getMeanAtPoint(lm.getName());
		cv::Vec2f screenPoint = fitting::projectAffine(modelPoint, affineCam, img.cols, img.rows);
		cv::circle(affineCamLandmarksProjectionImage, Point2f(screenPoint), 4.0f, Scalar(0.0f, 255.0f, 0.0f));
	}

	// Estimate the shape coefficients:
	// Detector variances: Should not be in pixels. Should be normalised by the IED. Normalise by the image dimensions is not a good idea either, it has nothing to do with it. See comment in fitShapeToLandmarksLinear().
	// Let's just use the hopefully reasonably set default value for now (around 3 pixels)
	vector<float> fittedCoeffs = fitting::fitShapeToLandmarksLinear(morphableModel, affineCam, landmarksClipSpace, lambda);

	// Obtain the full mesh and render it using the estimated camera:
	Mesh mesh = morphableModel.drawSample(fittedCoeffs, vector<float>()); // takes standard-normal (not-normalised) coefficients

	render::SoftwareRenderer softwareRenderer(img.cols, img.rows);
	Mat fullAffineCam = fitting::calculateAffineZDirection(affineCam);
	fullAffineCam.at<float>(2, 3) = fullAffineCam.at<float>(2, 2); // Todo: Find out and document why this is necessary!
	fullAffineCam.at<float>(2, 2) = 1.0f;
	softwareRenderer.doBackfaceCulling = true;
	auto framebuffer = softwareRenderer.render(mesh, fullAffineCam); // hmm, do we have the z-test disabled?
	


	// Write:
	// ptree .fit file: cam-params, model-file, model-params-fn(alphas)
	// alphas
	// texmap

	ptree fittingFile;
	fittingFile.put("camera", string("affine"));
	fittingFile.put("camera.matrix", string("2 5.4 232.22"));

	fittingFile.put("imageWidth", img.cols);
	fittingFile.put("imageHeight", img.rows);

	fittingFile.put("fittingParameters.lambda", lambda);

	fittingFile.put("textureMap", path("C:/bla/texture.png").filename().string());
	
	fittingFile.put("model", config.get_child("morphableModel").get<string>("filename")); // This can throw, but the filename should really exist.
	
	// alphas:
	fittingFile.put("shapeCoefficients", "");
	for (size_t i = 0; i < fittedCoeffs.size(); ++i) {
		fittingFile.put("shapeCoefficients." + std::to_string(i), fittedCoeffs[i]);
	}

	path fittingFileName = outputPath / labeledImageSource->getName().stem();
	fittingFileName += ".txt";
	boost::property_tree::write_info(fittingFileName.string(), fittingFile);

	// Additional optional output, as set in the config file:
	if (config.get_child("output", ptree()).get<bool>("copyInputImage", false)) {
		path outInputImage = outputPath / labeledImageSource->getName().filename();
		cv::imwrite(outInputImage.string(), img);
	}
	if (config.get_child("output", ptree()).get<bool>("landmarksImage", false)) {
		path outLandmarksImage = outputPath / labeledImageSource->getName().stem();
		outLandmarksImage += "_landmarks.png";
		cv::imwrite(outLandmarksImage.string(), framebuffer.first);
	}
	if (config.get_child("output", ptree()).get<bool>("writeObj", false)) {
		path outMesh = outputPath / labeledImageSource->getName().stem();
		outMesh.replace_extension("obj");
		Mesh::writeObj(mesh, outMesh.string());
	}
	if (config.get_child("output", ptree()).get<bool>("renderResult", false)) {
		path outRenderResult = outputPath / labeledImageSource->getName().stem();
		outRenderResult += "_render.png";
		cv::imwrite(outRenderResult.string(), framebuffer.first);
	}
	if (config.get_child("output", ptree()).get<bool>("frontalRendering", false)) {
		throw std::runtime_error("Not implemented yet, please disable.");
	}
	

	end = std::chrono::system_clock::now();
	int elapsed_mseconds = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
	appLogger.info("Finished processing. Elapsed time: " + lexical_cast<string>(elapsed_mseconds)+"ms.\n");

	return 0;
}