// This is the one where the actual work gets done, other DetectLandmarksInImage calls lead to this one bool LandmarkDetector::DetectLandmarksInImage(const cv::Mat_<uchar> &grayscale_image, const cv::Mat_<float> depth_image, const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params) { // Can have multiple hypotheses vector<cv::Vec3d> rotation_hypotheses; if(params.multi_view) { // Try out different orientation initialisations // It is possible to add other orientation hypotheses easilly by just pushing to this vector rotation_hypotheses.push_back(cv::Vec3d(0,0,0)); rotation_hypotheses.push_back(cv::Vec3d(0,0.5236,0)); rotation_hypotheses.push_back(cv::Vec3d(0,-0.5236,0)); rotation_hypotheses.push_back(cv::Vec3d(0.5236,0,0)); rotation_hypotheses.push_back(cv::Vec3d(-0.5236,0,0)); } else { // Assume the face is close to frontal rotation_hypotheses.push_back(cv::Vec3d(0,0,0)); } // Use the initialisation size for the landmark detection params.window_sizes_current = params.window_sizes_init; // Store the current best estimate double best_likelihood; cv::Vec6d best_global_parameters; cv::Mat_<double> best_local_parameters; cv::Mat_<double> best_detected_landmarks; cv::Mat_<double> best_landmark_likelihoods; bool best_success; // The hierarchical model parameters vector<double> best_likelihood_h(clnf_model.hierarchical_models.size()); vector<cv::Vec6d> best_global_parameters_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<double> > best_local_parameters_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<double> > best_detected_landmarks_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<double> > best_landmark_likelihoods_h(clnf_model.hierarchical_models.size()); for(size_t hypothesis = 0; hypothesis < rotation_hypotheses.size(); ++hypothesis) { // Reset the potentially set clnf_model parameters clnf_model.params_local.setTo(0.0); for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { clnf_model.hierarchical_models[part].params_local.setTo(0.0); } // calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown) clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local, rotation_hypotheses[hypothesis]); bool success = clnf_model.DetectLandmarks(grayscale_image, depth_image, params); if(hypothesis == 0 || best_likelihood < clnf_model.model_likelihood) { best_likelihood = clnf_model.model_likelihood; best_global_parameters = clnf_model.params_global; best_local_parameters = clnf_model.params_local.clone(); best_detected_landmarks = clnf_model.detected_landmarks.clone(); best_landmark_likelihoods = clnf_model.landmark_likelihoods.clone(); best_success = success; } for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { if (hypothesis == 0 || best_likelihood < clnf_model.hierarchical_models[part].model_likelihood) { best_likelihood_h[part] = clnf_model.hierarchical_models[part].model_likelihood; best_global_parameters_h[part] = clnf_model.hierarchical_models[part].params_global; best_local_parameters_h[part] = clnf_model.hierarchical_models[part].params_local.clone(); best_detected_landmarks_h[part] = clnf_model.hierarchical_models[part].detected_landmarks.clone(); best_landmark_likelihoods_h[part] = clnf_model.hierarchical_models[part].landmark_likelihoods.clone(); } } } // Store the best estimates in the clnf_model clnf_model.model_likelihood = best_likelihood; clnf_model.params_global = best_global_parameters; clnf_model.params_local = best_local_parameters.clone(); clnf_model.detected_landmarks = best_detected_landmarks.clone(); clnf_model.detection_success = best_success; clnf_model.landmark_likelihoods = best_landmark_likelihoods.clone(); for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { clnf_model.hierarchical_models[part].params_global = best_global_parameters_h[part]; clnf_model.hierarchical_models[part].params_local = best_local_parameters_h[part].clone(); clnf_model.hierarchical_models[part].detected_landmarks = best_detected_landmarks_h[part].clone(); clnf_model.hierarchical_models[part].landmark_likelihoods = best_landmark_likelihoods_h[part].clone(); } return best_success; }
bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat_<uchar> &grayscale_image, const cv::Mat_<float> &depth_image, CLNF& clnf_model, FaceModelParameters& params) { // First need to decide if the landmarks should be "detected" or "tracked" // Detected means running face detection and a larger search area, tracked means initialising from previous step // and using a smaller search area // Indicating that this is a first detection in video sequence or after restart TS(DetectLandmarksInVideo); bool initial_detection = !clnf_model.tracking_initialised; // Only do it if there was a face detection at all if(clnf_model.tracking_initialised) { // The area of interest search size will depend if the previous track was successful if(!clnf_model.detection_success) { params.window_sizes_current = params.window_sizes_init; } else { params.window_sizes_current = params.window_sizes_small; } // Before the expensive landmark detection step apply a quick template tracking approach if(params.use_face_template && !clnf_model.face_template.empty() && clnf_model.detection_success) { CorrectGlobalParametersVideo(grayscale_image, clnf_model, params); } TS(DetectLandmarks); bool track_success = clnf_model.DetectLandmarks(grayscale_image, depth_image, params); TE(DetectLandmarks); if(!track_success) { // Make a record that tracking failed clnf_model.failures_in_a_row++; } else { // indicate that tracking is a success clnf_model.failures_in_a_row = -1; UpdateTemplate(grayscale_image, clnf_model); } } // This is used for both detection (if it the tracking has not been initialised yet) or if the tracking failed (however we do this every n frames, for speed) // This also has the effect of an attempt to reinitialise just after the tracking has failed, which is useful during large motions if((!clnf_model.tracking_initialised && (clnf_model.failures_in_a_row + 1) % (params.reinit_video_every * 6) == 0) || (clnf_model.tracking_initialised && !clnf_model.detection_success && params.reinit_video_every > 0 && clnf_model.failures_in_a_row % params.reinit_video_every == 0)) { cv::Rect_<double> bounding_box; cv::Point preference_det(-1, -1); if(clnf_model.preference_det.x != -1 && clnf_model.preference_det.y != -1) { preference_det.x = clnf_model.preference_det.x * grayscale_image.cols; preference_det.y = clnf_model.preference_det.y * grayscale_image.rows; clnf_model.preference_det = cv::Point(-1, -1); } bool face_detection_success; double confidence; TS(DetectSingleFaceHOG); face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det); TE(DetectSingleFaceHOG); // Attempt to detect landmarks using the detected face (if unseccessful the detection will be ignored) if(face_detection_success) { // Indicate that tracking has started as a face was detected clnf_model.tracking_initialised = true; // Keep track of old model values so that they can be restored if redetection fails cv::Vec6d params_global_init = clnf_model.params_global; cv::Mat_<double> params_local_init = clnf_model.params_local.clone(); double likelihood_init = clnf_model.model_likelihood; cv::Mat_<double> detected_landmarks_init = clnf_model.detected_landmarks.clone(); cv::Mat_<double> landmark_likelihoods_init = clnf_model.landmark_likelihoods.clone(); // Use the detected bounding box and empty local parameters clnf_model.params_local.setTo(0); clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local); // Make sure the search size is large params.window_sizes_current = params.window_sizes_init; // Do the actual landmark detection (and keep it only if successful) bool landmark_detection_success = clnf_model.DetectLandmarks(grayscale_image, depth_image, params); // If landmark reinitialisation unsucessful continue from previous estimates // if it's initial detection however, do not care if it was successful as the validator might be wrong, so continue trackig // regardless if(!initial_detection && !landmark_detection_success) { // Restore previous estimates clnf_model.params_global = params_global_init; clnf_model.params_local = params_local_init.clone(); clnf_model.pdm.CalcShape2D(clnf_model.detected_landmarks, clnf_model.params_local, clnf_model.params_global); clnf_model.model_likelihood = likelihood_init; clnf_model.detected_landmarks = detected_landmarks_init.clone(); clnf_model.landmark_likelihoods = landmark_likelihoods_init.clone(); TE(DetectLandmarksInVideo); return false; } else { clnf_model.failures_in_a_row = -1; UpdateTemplate(grayscale_image, clnf_model); TE(DetectLandmarksInVideo); return true; } } } // if the model has not been initialised yet class it as a failure if(!clnf_model.tracking_initialised) { clnf_model.failures_in_a_row++; } // un-initialise the tracking if( clnf_model.failures_in_a_row > 100) { clnf_model.tracking_initialised = false; } TE(DetectLandmarksInVideo); return clnf_model.detection_success; }
bool DetectLandmarksInImageMultiHypBasic(const cv::Mat_<uchar> &grayscale_image, vector<cv::Vec3d> rotation_hypotheses, const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params) { // Use the initialisation size for the landmark detection params.window_sizes_current = params.window_sizes_init; // Store the current best estimate float best_likelihood; float best_detection_certainty; cv::Vec6f best_global_parameters; cv::Mat_<float> best_local_parameters; cv::Mat_<float> best_detected_landmarks; cv::Mat_<float> best_landmark_likelihoods; bool best_success; // The hierarchical model parameters vector<float> best_likelihood_h(clnf_model.hierarchical_models.size()); vector<cv::Vec6f> best_global_parameters_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<float>> best_local_parameters_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<float>> best_detected_landmarks_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<float>> best_landmark_likelihoods_h(clnf_model.hierarchical_models.size()); for (size_t hypothesis = 0; hypothesis < rotation_hypotheses.size(); ++hypothesis) { // Reset the potentially set clnf_model parameters clnf_model.params_local.setTo(0.0); for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { clnf_model.hierarchical_models[part].params_local.setTo(0.0); } // calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown) clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local, rotation_hypotheses[hypothesis]); bool success = clnf_model.DetectLandmarks(grayscale_image, params); if (hypothesis == 0 || best_likelihood < clnf_model.model_likelihood) { best_likelihood = clnf_model.model_likelihood; best_global_parameters = clnf_model.params_global; best_local_parameters = clnf_model.params_local.clone(); best_detected_landmarks = clnf_model.detected_landmarks.clone(); best_landmark_likelihoods = clnf_model.landmark_likelihoods.clone(); best_detection_certainty = clnf_model.detection_certainty; best_success = success; for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { best_likelihood_h[part] = clnf_model.hierarchical_models[part].model_likelihood; best_global_parameters_h[part] = clnf_model.hierarchical_models[part].params_global; best_local_parameters_h[part] = clnf_model.hierarchical_models[part].params_local.clone(); best_detected_landmarks_h[part] = clnf_model.hierarchical_models[part].detected_landmarks.clone(); best_landmark_likelihoods_h[part] = clnf_model.hierarchical_models[part].landmark_likelihoods.clone(); } } } // Store the best estimates in the clnf_model clnf_model.model_likelihood = best_likelihood; clnf_model.params_global = best_global_parameters; clnf_model.params_local = best_local_parameters.clone(); clnf_model.detected_landmarks = best_detected_landmarks.clone(); clnf_model.detection_success = best_success; clnf_model.landmark_likelihoods = best_landmark_likelihoods.clone(); clnf_model.detection_certainty = best_detection_certainty; for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { clnf_model.hierarchical_models[part].params_global = best_global_parameters_h[part]; clnf_model.hierarchical_models[part].params_local = best_local_parameters_h[part].clone(); clnf_model.hierarchical_models[part].detected_landmarks = best_detected_landmarks_h[part].clone(); clnf_model.hierarchical_models[part].landmark_likelihoods = best_landmark_likelihoods_h[part].clone(); } return best_success; }
bool DetectLandmarksInImageMultiHypEarlyTerm(const cv::Mat_<uchar> &grayscale_image, vector<cv::Vec3d> rotation_hypotheses, const cv::Rect_<double> bounding_box, CLNF& clnf_model, FaceModelParameters& params) { FaceModelParameters old_params(params); // Use the initialisation size for the landmark detection params.window_sizes_current = params.window_sizes_init; bool early_term = false; // Setup the parameters accordingly // Only do the first iteration for (size_t i = 1; i < params.window_sizes_current.size(); ++i) { params.window_sizes_current[i] = 0; } params.refine_hierarchical = false; params.validate_detections = false; bool success = false; // Keeping track of converges vector<float> likelihoods; vector<cv::Vec6f> global_parameters; vector<cv::Mat_<float>> local_parameters; for (size_t hypothesis = 0; hypothesis < rotation_hypotheses.size(); ++hypothesis) { // Reset the potentially set clnf_model parameters clnf_model.params_local.setTo(0.0); for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { clnf_model.hierarchical_models[part].params_local.setTo(0.0); } // calculate the local and global parameters from the generated 2D shape (mapping from the 2D to 3D because camera params are unknown) clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local, rotation_hypotheses[hypothesis]); // Perform landmark detection in first scale clnf_model.DetectLandmarks(grayscale_image, params); float lhood = clnf_model.model_likelihood * clnf_model.patch_experts.early_term_weights[clnf_model.view_used] + clnf_model.patch_experts.early_term_biases[clnf_model.view_used]; // If likelihood higher than cutoff continue on this model if (lhood > clnf_model.patch_experts.early_term_cutoffs[clnf_model.view_used]) { params.refine_hierarchical = old_params.refine_hierarchical; params.window_sizes_current = params.window_sizes_init; params.window_sizes_current[0] = 0; params.validate_detections = old_params.validate_detections; success = clnf_model.DetectLandmarks(grayscale_image, params); early_term = true; break; } else { likelihoods.push_back(lhood); global_parameters.push_back(clnf_model.params_global); local_parameters.push_back(clnf_model.params_local); } } if (!early_term) { // Store the current best estimate float best_likelihood; cv::Vec6f best_global_parameters; cv::Mat_<float> best_local_parameters; cv::Mat_<float> best_detected_landmarks; cv::Mat_<float> best_landmark_likelihoods; bool best_success; // The hierarchical model parameters vector<float> best_likelihood_h(clnf_model.hierarchical_models.size()); vector<cv::Vec6f> best_global_parameters_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<float>> best_local_parameters_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<float>> best_detected_landmarks_h(clnf_model.hierarchical_models.size()); vector<cv::Mat_<float>> best_landmark_likelihoods_h(clnf_model.hierarchical_models.size()); // Sort the likelihoods and pick the best top 3 models vector<size_t> indices = sort_indexes(likelihoods); // Pick 3 best hypotheses and complete them size_t max = indices.size() >= 3 ? 3 : indices.size(); params.refine_hierarchical = old_params.refine_hierarchical; params.window_sizes_current = params.window_sizes_init; params.window_sizes_current[0] = 0; params.validate_detections = old_params.validate_detections; for (size_t i = 0; i < max; ++i) { // Reset the potentially set clnf_model parameters clnf_model.params_local = local_parameters[indices[i]]; clnf_model.params_global = global_parameters[indices[i]]; for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { clnf_model.hierarchical_models[part].params_local.setTo(0.0); } // Perform landmark detection in first scale success = clnf_model.DetectLandmarks(grayscale_image, params); if (i == 0 || best_likelihood < clnf_model.model_likelihood) { best_likelihood = clnf_model.model_likelihood; best_global_parameters = clnf_model.params_global; best_local_parameters = clnf_model.params_local.clone(); best_detected_landmarks = clnf_model.detected_landmarks.clone(); best_landmark_likelihoods = clnf_model.landmark_likelihoods.clone(); best_success = success; for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { best_likelihood_h[part] = clnf_model.hierarchical_models[part].model_likelihood; best_global_parameters_h[part] = clnf_model.hierarchical_models[part].params_global; best_local_parameters_h[part] = clnf_model.hierarchical_models[part].params_local.clone(); best_detected_landmarks_h[part] = clnf_model.hierarchical_models[part].detected_landmarks.clone(); best_landmark_likelihoods_h[part] = clnf_model.hierarchical_models[part].landmark_likelihoods.clone(); } } } // Store the best estimates in the clnf_model clnf_model.model_likelihood = best_likelihood; clnf_model.params_global = best_global_parameters; clnf_model.params_local = best_local_parameters.clone(); clnf_model.detected_landmarks = best_detected_landmarks.clone(); clnf_model.detection_success = best_success; clnf_model.landmark_likelihoods = best_landmark_likelihoods.clone(); for (size_t part = 0; part < clnf_model.hierarchical_models.size(); ++part) { clnf_model.hierarchical_models[part].params_global = best_global_parameters_h[part]; clnf_model.hierarchical_models[part].params_local = best_local_parameters_h[part].clone(); clnf_model.hierarchical_models[part].detected_landmarks = best_detected_landmarks_h[part].clone(); clnf_model.hierarchical_models[part].landmark_likelihoods = best_landmark_likelihoods_h[part].clone(); } } params = old_params; return success; }
bool LandmarkDetector::DetectLandmarksInVideo(const cv::Mat &rgb_image, CLNF& clnf_model, FaceModelParameters& params, cv::Mat& grayscale_image) { // First need to decide if the landmarks should be "detected" or "tracked" // Detected means running face detection and a larger search area, tracked means initialising from previous step // and using a smaller search area if(grayscale_image.empty()) { Utilities::ConvertToGrayscale_8bit(rgb_image, grayscale_image); } // Indicating that this is a first detection in video sequence or after restart bool initial_detection = !clnf_model.tracking_initialised; // Only do it if there was a face detection at all if(clnf_model.tracking_initialised) { // The area of interest search size will depend if the previous track was successful if(!clnf_model.detection_success) { params.window_sizes_current = params.window_sizes_init; } else { params.window_sizes_current = params.window_sizes_small; } // Before the expensive landmark detection step apply a quick template tracking approach if(params.use_face_template && !clnf_model.face_template.empty() && clnf_model.detection_success) { CorrectGlobalParametersVideo(grayscale_image, clnf_model, params); } bool track_success = clnf_model.DetectLandmarks(grayscale_image, params); if(!track_success) { // Make a record that tracking failed clnf_model.failures_in_a_row++; } else { // indicate that tracking is a success clnf_model.failures_in_a_row = -1; if(params.use_face_template) { UpdateTemplate(grayscale_image, clnf_model); } } } // This is used for both detection (if it the tracking has not been initialised yet) or if the tracking failed (however we do this every n frames, for speed) // This also has the effect of an attempt to reinitialise just after the tracking has failed, which is useful during large motions if((!clnf_model.tracking_initialised && (clnf_model.failures_in_a_row + 1) % (params.reinit_video_every * 6) == 0) || (clnf_model.tracking_initialised && !clnf_model.detection_success && params.reinit_video_every > 0 && clnf_model.failures_in_a_row % params.reinit_video_every == 0)) { cv::Rect_<float> bounding_box; // If the face detector has not been initialised and we're using it, then read it in if(clnf_model.face_detector_HAAR.empty() && params.curr_face_detector == params.HAAR_DETECTOR) { clnf_model.face_detector_HAAR.load(params.haar_face_detector_location); clnf_model.haar_face_detector_location = params.haar_face_detector_location; } if (clnf_model.face_detector_MTCNN.empty() && params.curr_face_detector == params.MTCNN_DETECTOR) { clnf_model.face_detector_MTCNN.Read(params.mtcnn_face_detector_location); clnf_model.mtcnn_face_detector_location = params.mtcnn_face_detector_location; // If the model is still empty default to HOG if (clnf_model.face_detector_MTCNN.empty()) { cout << "INFO: defaulting to HOG-SVM face detector" << endl; params.curr_face_detector = LandmarkDetector::FaceModelParameters::HOG_SVM_DETECTOR; } } cv::Point preference_det(-1, -1); if(clnf_model.preference_det.x != -1 && clnf_model.preference_det.y != -1) { preference_det.x = clnf_model.preference_det.x * grayscale_image.cols; preference_det.y = clnf_model.preference_det.y * grayscale_image.rows; clnf_model.preference_det = cv::Point(-1, -1); } bool face_detection_success; if(params.curr_face_detector == FaceModelParameters::HOG_SVM_DETECTOR) { float confidence; face_detection_success = LandmarkDetector::DetectSingleFaceHOG(bounding_box, grayscale_image, clnf_model.face_detector_HOG, confidence, preference_det); } else if(params.curr_face_detector == FaceModelParameters::HAAR_DETECTOR) { face_detection_success = LandmarkDetector::DetectSingleFace(bounding_box, grayscale_image, clnf_model.face_detector_HAAR, preference_det); } else if (params.curr_face_detector == FaceModelParameters::MTCNN_DETECTOR) { float confidence; face_detection_success = LandmarkDetector::DetectSingleFaceMTCNN(bounding_box, rgb_image, clnf_model.face_detector_MTCNN, confidence, preference_det); } // Attempt to detect landmarks using the detected face (if unseccessful the detection will be ignored) if(face_detection_success) { // Indicate that tracking has started as a face was detected clnf_model.tracking_initialised = true; // Keep track of old model values so that they can be restored if redetection fails cv::Vec6f params_global_init = clnf_model.params_global; cv::Mat_<float> params_local_init = clnf_model.params_local.clone(); float likelihood_init = clnf_model.model_likelihood; cv::Mat_<float> detected_landmarks_init = clnf_model.detected_landmarks.clone(); cv::Mat_<float> landmark_likelihoods_init = clnf_model.landmark_likelihoods.clone(); // Use the detected bounding box and empty local parameters clnf_model.params_local.setTo(0); clnf_model.pdm.CalcParams(clnf_model.params_global, bounding_box, clnf_model.params_local); // Make sure the search size is large params.window_sizes_current = params.window_sizes_init; // TODO rem (should the multi-hyp version be only for CEN and not CLNF?), otherwise poss too slow, and poss not accurate //bool landmark_detection_success = clnf_model.DetectLandmarks(grayscale_image, params); // Do the actual landmark detection (and keep it only if successful) // Perform multi-hypothesis detection here (as face detector can pick up multiple of them) params.multi_view = true; bool landmark_detection_success = DetectLandmarksInImage(rgb_image, bounding_box, clnf_model, params, grayscale_image); params.multi_view = false; // If landmark reinitialisation unsucessful continue from previous estimates // if it's initial detection however, do not care if it was successful as the validator might be wrong, so continue trackig // regardless if(!initial_detection && !landmark_detection_success) { // Restore previous estimates clnf_model.params_global = params_global_init; clnf_model.params_local = params_local_init.clone(); clnf_model.pdm.CalcShape2D(clnf_model.detected_landmarks, clnf_model.params_local, clnf_model.params_global); clnf_model.model_likelihood = likelihood_init; clnf_model.detected_landmarks = detected_landmarks_init.clone(); clnf_model.landmark_likelihoods = landmark_likelihoods_init.clone(); return false; } else { clnf_model.failures_in_a_row = -1; if(params.use_face_template) { UpdateTemplate(grayscale_image, clnf_model); } return true; } } } // if the model has not been initialised yet class it as a failure if(!clnf_model.tracking_initialised) { clnf_model.failures_in_a_row++; } // un-initialise the tracking if( clnf_model.failures_in_a_row > 100) { clnf_model.tracking_initialised = false; } return clnf_model.detection_success; }