void DirectPoseEstimationSingleLayer( const cv::Mat &img1, const cv::Mat &img2, const VecVector2d &px_ref, const vector<double> depth_ref, Sophus::SE3 &T21 ) { // parameters int half_patch_size = 4; int iterations = 100; double cost = 0, lastCost = 0; int nGood = 0; // good projections VecVector2d goodProjection; for (int iter = 0; iter < iterations; iter++) { nGood = 0; goodProjection.clear(); // Define Hessian and bias Matrix6d H = Matrix6d::Zero(); // 6x6 Hessian Vector6d b = Vector6d::Zero(); // 6x1 bias for (size_t i = 0; i < px_ref.size(); i++) { // compute the projection in the second image // TODO START YOUR CODE HERE float u =0, v = 0; Eigen::Vector2d p1 = px_ref[i]; Eigen::Vector3d pw((p1(0) - cx) / fx * depth_ref[i], (p1(1) - cy) / fy * depth_ref[i], depth_ref[i]); Eigen::Vector3d pc = T21 * pw; Eigen::Vector2d p2(fx * pc(0) / pc(2) + cx, fy * pc(1) / pc(2) + cy); u = p2(0); v = p2(1); if(u <= half_patch_size + 1 || u >=img2.cols - half_patch_size - 1 || v <= half_patch_size - 1 || v >= img2.rows - half_patch_size - 1) { continue; } nGood++; goodProjection.push_back(Eigen::Vector2d(u, v)); // and compute error and jacobian for (int x = -half_patch_size; x < half_patch_size; x++) for (int y = -half_patch_size; y < half_patch_size; y++) { double error = GetPixelValue(img1, p1(0)+x, p1(1)+y) - GetPixelValue(img2, p2(0)+x, p2(1)+y); double Z = depth_ref[i]; double X = (p1(0) + x - cx) / fx * Z; double Y = (p1(1) + y - cy) / fy * Z; Matrix26d J_pixel_xi; // pixel to \xi in Lie algebra J_pixel_xi(0, 0) = fx / Z; J_pixel_xi(0, 1) = 0; J_pixel_xi(0, 2) = -fx * X / Z / Z; J_pixel_xi(0, 3) = -fx * X * Y / Z / Z; J_pixel_xi(0, 4) = fx + fx * X * X / Z / Z; J_pixel_xi(0, 5) = -fx * Y / Z; J_pixel_xi(1, 0) = 0; J_pixel_xi(1, 1) = fy / Z; J_pixel_xi(1, 2) = -fy * Y / Z / Z; J_pixel_xi(1, 3) = -fy - fy * Y * Y / Z / Z; J_pixel_xi(1, 4) = fy * X * Y / Z / Z; J_pixel_xi(1, 5) = fy * X / Z; Eigen::Vector2d J_img_pixel; // image gradients J_img_pixel[0] = (GetPixelValue(img2, p2(0)+x+1, p2(1)+y) - GetPixelValue(img2,p2(0)+x-1,p2(1)+y))/2; J_img_pixel[1] = (GetPixelValue(img2, p2(0)+x, p2(1)+y+1) - GetPixelValue(img2,p2(0)+x,p2(1)+y-1))/2; // total jacobian Vector6d J= -J_img_pixel.transpose() * J_pixel_xi; H += J * J.transpose(); b += -error * J; cost += error * error; } // END YOUR CODE HERE } // solve update and put it into estimation // TODO START YOUR CODE HERE Vector6d update = H.inverse() * b; T21 = Sophus::SE3::exp(update) * T21; // END YOUR CODE HERE cost /= nGood; if (isnan(update[0])) { // sometimes occurred when we have a black or white patch and H is irreversible cout << "update is nan" << endl; break; } if (iter > 0 && cost > lastCost) { cout << "cost increased: " << cost << ", " << lastCost << endl; break; } lastCost = cost; cout << "cost = " << cost << ", good = " << nGood << endl; } cout << "good projection: " << nGood << endl; cout << "T21 = \n" << T21.matrix() << endl; // in order to help you debug, we plot the projected pixels here cv::Mat img1_show, img2_show; cv::cvtColor(img1, img1_show, CV_GRAY2BGR); cv::cvtColor(img2, img2_show, CV_GRAY2BGR); for (auto &px: px_ref) { cv::rectangle(img1_show, cv::Point2f(px[0] - 2, px[1] - 2), cv::Point2f(px[0] + 2, px[1] + 2), cv::Scalar(0, 250, 0)); } for (auto &px: goodProjection) { cv::rectangle(img2_show, cv::Point2f(px[0] - 2, px[1] - 2), cv::Point2f(px[0] + 2, px[1] + 2), cv::Scalar(0, 250, 0)); } cv::imshow("reference", img1_show); cv::imshow("current", img2_show); cv::waitKey(); }
int main( int /*argc*/, char* argv[] ) { const static string ui_file = "app.clicks"; // Load configuration data pangolin::ParseVarsFile("app.cfg"); InputRecordRepeat input("ui."); input.LoadBuffer(ui_file); // Target to track from Target target; target.GenerateRandom(60,25/(842.0/297.0),75/(842.0/297.0),15/(842.0/297.0),Vector2d(297,210)); // target.GenerateCircular(60,20,50,15,makeVector(210,210)); // target.GenerateEmptyCircle(60,25,75,15,200,makeVector(297,210)); target.SaveEPS("target_A4.eps"); cout << "Calibration target saved as: target_A4.eps" << endl; // Setup Video Var<string> video_uri("video_uri"); VideoRecordRepeat video(video_uri, "video.pvn", 1024*1024*200); // VideoInput video(video_uri); const unsigned w = video.Width(); const unsigned h = video.Height(); // Create Glut window pangolin::CreateGlutWindowAndBind("Main",2*w+PANEL_WIDTH,h); // Pangolin 3D Render state pangolin::OpenGlRenderState s_cam; s_cam.Set(ProjectionMatrixRDF_TopLeft(640,480,420,420,320,240,1,1E6)); s_cam.Set(FromTooN(toTooN(Sophus::SE3(Sophus::SO3(),Vector3d(-target.Size()[0]/2,-target.Size()[1]/2,500) )))); pangolin::Handler3D handler(s_cam); // Create viewport for video with fixed aspect View& vPanel = pangolin::CreatePanel("ui").SetBounds(1.0,0.0,0,PANEL_WIDTH); View& vVideo = pangolin::Display("Video").SetAspect((float)w/h); View& v3D = pangolin::Display("3D").SetAspect((float)w/h).SetHandler(&handler); // View& vDebug = pangolin::Display("Debug").SetAspect((float)w/h); Display("Container") .SetBounds(1.0,0.0,PANEL_WIDTH,1.0,false) .SetLayout(LayoutEqual) .AddDisplay(vVideo) .AddDisplay(v3D) // .AddDisplay(vDebug) ; // OpenGl Texture for video frame GlTexture texRGB(w,h,GL_RGBA8); GlTexture tex(w,h,GL_LUMINANCE8); // Declare Image buffers CVD::ImageRef size(w,h); Image<Rgb<byte> > Irgb(size); Image<byte> I(size); Image<float> intI(size); Image<short> lI(size); Image<std::array<float,2> > dI(size); Image<byte> tI(size); // Camera parameters Matrix<float,5,1> cam_params; // = Var<Matrix<float,5,1> >("cam_params"); cerr << "Not loading params properly" << endl; cam_params << 0.694621, 0.925258, 0.505055, 0.484551, 0.968455; FovCamera cam( w,h, w*cam_params[0],h*cam_params[1], w*cam_params[2],h*cam_params[3], cam_params[4] ); // Last good pose Sophus::SE3 T_gw; std::clock_t last_good; int good_frames; // Pose hypothesis Sophus::SE3 T_hw; // Fixed mirrored pose Sophus::SE3 T_0w; // Stored keyframes boost::ptr_vector<Keyframe> keyframes; // Variables Var<bool> record("ui.Record",false,false); Var<bool> play("ui.Play",false,false); Var<bool> source("ui.Source",false,false); Var<bool> add_keyframe("ui.Add Keyframe",false,false); Var<bool> use_mirror("ui.Use Mirror",false,true); Var<bool> draw_mirror("ui.Draw Mirror",false,true); // Var<bool> calc_mirror_pose("ui.Calculate Mirrored Pose",false,false); Var<bool> disp_thresh("ui.Display Thresh",false); Var<float> at_threshold("ui.Adap Threshold",1.0,0,1.0); Var<int> at_window("ui.Adapt Window",w/3,1,200); Var<float> conic_min_area(".Conic min area",25, 0, 100); Var<float> conic_max_area(".Conic max area",4E4, 0, 1E5); Var<float> conic_min_density(".Conic min density",0.4, 0, 1.0); Var<float> conic_min_aspect(".Conic min aspect",0.1, 0, 1.0); Var<int> target_match_neighbours(".Match Descriptor Neighbours",10, 5, 20); Var<int> target_ransac_its("ui.Ransac Its", 100, 20, 500); Var<int> target_ransac_min_pts("ui.Ransac Min Pts", 5, 5, 10); Var<float> target_ransac_max_inlier_err_mm("ui.Ransac Max Inlier Err (mm)", 15, 0, 50); Var<float> target_plane_inlier_thresh("ui.Plane inlier thresh", 1.5, 0.1, 10); Var<double> rms("ui.RMS", 0); Var<double> max_rms("ui.max RMS", 1.0, 0.01, 10); Var<double> robust_4pt_inlier_tol("ui.Ransac 4Pt Inlier", 0.006, 1E-3, 1E-2); Var<int> robust_4pt_its("ui.Ransac 4pt its", 100, 10, 500); Var<double> max_mmps("ui.max mms per sec", 1500, 100, 2000); Var<bool> lock_to_cam("ui.AR",false); for(int frame=0; !pangolin::ShouldQuit(); ++frame) { Viewport::DisableScissor(); glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT); // Get newest frame from camera and upload to GPU as texture video.GrabNewest((byte*)Irgb.data(),true); // Associate input with this video frame input.SetIndex(video.FrameId()); // Generic processing ConvertRGBtoI(Irgb,I); gradient(w,h,I.data(),dI.data()); integral_image(w,h,I.data(),intI.data()); // Threshold and label image AdaptiveThreshold(w,h,I.data(),intI.data(),tI.data(),at_threshold,at_window,(byte)0,(byte)255); vector<PixelClass> labels; Label(w,h,tI.data(),lI.data(),labels); // Find conics vector<PixelClass> candidates; vector<Conic> conics; FindCandidateConicsFromLabels( w,h,labels,candidates, conic_min_area,conic_max_area,conic_min_density, conic_min_aspect ); FindConics(w,h,candidates,dI.data(),conics ); // Generate map and point structures vector<int> conics_target_map(conics.size(),-1); vector<Vector2d > ellipses; for( size_t i=0; i < conics.size(); ++i ) ellipses.push_back(conics[i].center); // Update pose given correspondences T_hw = FindPose(cam,target.circles3D(),ellipses,conics_target_map,robust_4pt_inlier_tol,robust_4pt_its); rms = ReprojectionErrorRMS(cam,T_hw,target.circles3D(),ellipses,conics_target_map); int inliers =0; for( int i=0; i < conics_target_map.size(); ++i) if( conics_target_map[i] >=0 ) inliers++; if( !isfinite((double)rms) || rms > max_rms || (good_frames < 5 && inliers < 6) ) { // Undistort Conics vector<Conic> conics_camframe; for( unsigned int i=0; i<conics.size(); ++i ) conics_camframe.push_back(UnmapConic(conics[i],cam)); // Find target given (approximately) undistorted conics const static LinearCamera idcam(-1,-1,1,1,0,0); target.FindTarget( idcam,conics_camframe, conics_target_map, target_match_neighbours, target_ransac_its, target_ransac_min_pts, target_ransac_max_inlier_err_mm, target_plane_inlier_thresh,use_mirror ); // Estimate camera pose relative to target coordinate system T_hw = FindPose(cam,target.circles3D(),ellipses,conics_target_map,robust_4pt_inlier_tol,robust_4pt_its); rms = ReprojectionErrorRMS(cam,T_hw,target.circles3D(),ellipses,conics_target_map); } if( isfinite((double)rms) && rms < max_rms ) { // seconds since good const double t = std::difftime(clock(),last_good) / (double) CLOCKS_PER_SEC; const double dx = (T_hw.inverse().translation() - T_gw.inverse().translation()).norm(); if( dx / t < max_mmps || last_good == 0) { good_frames++; }else{ good_frames = 0; } if( good_frames > 5 ) { T_gw = T_hw; last_good = clock(); } }else{ good_frames = 0; } if( good_frames > 5 ) { if( lock_to_cam ) { s_cam.Set(FromTooN(toTooN(T_gw))); } if( pangolin::Pushed(add_keyframe) ) { Keyframe* kf = new Keyframe(); kf->T_kw = T_gw; kf->conics.insert(kf->conics.begin(),conics.begin(),conics.end()); kf->conics_target_map.insert(kf->conics_target_map.begin(),conics_target_map.begin(),conics_target_map.end()); keyframes.push_back(kf); } } // Display Live Image glColor3f(1,1,1); vVideo.ActivateScissorAndClear(); if(!disp_thresh) { texRGB.Upload(Irgb.data(),GL_RGB,GL_UNSIGNED_BYTE); texRGB.RenderToViewportFlipY(); }else{ tex.Upload(tI.data(),GL_LUMINANCE,GL_UNSIGNED_BYTE); tex.RenderToViewportFlipY(); } // Display detected ellipses glOrtho(-0.5,w-0.5,h-0.5,-0.5,0,1.0); for( int i=0; i<ellipses.size(); ++i ) { glColorBin(conics_target_map[i],target.circles().size()); DrawCross(ellipses[i],2); } glColor3f(0,1,0); DrawLabels(candidates); // Display 3D Vis glEnable(GL_DEPTH_TEST); v3D.ActivateScissorAndClear(s_cam); glDepthFunc(GL_LEQUAL); glDrawAxis(30); DrawTarget(target,Vector2d(0,0),1,0.2,0.2); DrawTarget(conics_target_map,target,Vector2d(0,0),1); // Draw Camera glColor3f(1,0,0); glDrawFrustrum(cam.Kinv(),w,h,T_gw.inverse(),10); Vector3d r_w = Vector3d::Zero(); if( keyframes.size() >= 3 ) { // Method 1 // As described in this paper // "Camera Pose Estimation using Images of Planar Mirror Reflections" ECCV 2010 // Rui Rodrigues, Joao Barreto, Urbano Nunes const unsigned Nkf = keyframes.size() -1; const Sophus::SE3 T_w0 = keyframes[0].T_kw.inverse(); MatrixXd As(Nkf*3,4); As.setZero(); for( int i=0; i<Nkf; ++i ) { const Sophus::SE3 T_iw = keyframes[i+1].T_kw * T_w0; const Vector3d t = T_iw.translation(); const Vector3d theta_omega = T_iw.so3().log(); const double theta = theta_omega.norm(); const Vector3d omega = theta_omega / theta; const double tanthby2 = tan(theta/2.0); const Matrix3d skew_t = SkewSym(t); As.block<3,3>(3*i,0) = skew_t + tanthby2 * omega * t.transpose(); As.block<3,1>(3*i,3) = -2 * tanthby2 * omega; } // Solve system using SVD Eigen::JacobiSVD<MatrixXd> svd(As, ComputeFullV); // Get mirror plane for virtual camera 0 in cam0 FoR const Vector4d _N_0 = svd.matrixV().col(3); Vector4d N_0 = _N_0 / (_N_0.head<3>()).norm(); // d has different meaning in paper. Negate to match my thesis. N_0(3) *= -1; // Render plane corresponding to first keyframe glSetFrameOfReferenceF(keyframes[0].T_kw.inverse()); glColor3f(0.2,0,0); DrawPlane(N_0,10,100); glUnsetFrameOfReference(); // Attempt to render real camera. // We seem to have the correct translation. Rotation is wrong. // It's because the real camera (from the paper), relates to the real // points, Q, but we're using Qhat which doesn't match the real Q. { const Vector4d Nz = Vector4d(0,0,1,0); const Matrix4d T_wr(SymmetryTransform(Nz) * T_w0.matrix() * SymmetryTransform(N_0)); glColor3f(0,0,1); glMatrixMode(GL_MODELVIEW); glPushMatrix(); glMultMatrix( T_wr ); glDrawFrustrum(cam.Kinv(),w,h,30); glPopMatrix(); } // Draw live mirror if( draw_mirror ) { Vector3d l_w = T_gw.inverse().translation(); l_w[2] *= -1; const Vector3d N = l_w - r_w; const double dist = N.norm(); const Vector3d n = N / dist; const double d = -(r_w + N/2.0).dot(n); glColor3f(1,0,0); DrawCross(l_w); glColor4f(0.2,0.2,0.2,0.2); DrawPlane(Vector4d(n[0],n[1],n[2],d),10,100); } } // Keyframes glColor3f(0.5,0.5,0.5); // foreach (Keyframe& kf, keyframes) { // glDrawFrustrum(cam.Kinv(),w,h,kf.T_kw.inverse(),30); // } if(keyframes.size() > 0 ) { glSetFrameOfReferenceF(keyframes[0].T_kw.inverse()); glDrawAxis(30); glUnsetFrameOfReference(); glColor3f(1,0.5,0.5); glDrawFrustrum(cam.Kinv(),w,h,keyframes[0].T_kw.inverse(),30); } if(pangolin::Pushed(record)) { video.Record(); input.Record(); } if(pangolin::Pushed(play)) { video.Play(true); input.PlayBuffer(0,input.Size()-1); input.SaveBuffer(ui_file); } if(pangolin::Pushed(source)) { video.Source(); input.Stop(); input.SaveBuffer(ui_file); } vPanel.Render(); // Swap back buffer with front glutSwapBuffers(); // Process window events via GLUT glutMainLoopEvent(); } return 0; }