Example #1
0
void DirectPoseEstimationSingleLayer(
        const cv::Mat &img1,
        const cv::Mat &img2,
        const VecVector2d &px_ref,
        const vector<double> depth_ref,
        Sophus::SE3 &T21
) {

    // parameters
    int half_patch_size = 4;
    int iterations = 100;

    double cost = 0, lastCost = 0;
    int nGood = 0;  // good projections
    VecVector2d goodProjection;

    for (int iter = 0; iter < iterations; iter++) {
        nGood = 0;
        goodProjection.clear();

        // Define Hessian and bias
        Matrix6d H = Matrix6d::Zero();  // 6x6 Hessian
        Vector6d b = Vector6d::Zero();  // 6x1 bias

        for (size_t i = 0; i < px_ref.size(); i++) {

            // compute the projection in the second image
            // TODO START YOUR CODE HERE
            float u =0, v = 0;
            Eigen::Vector2d p1 = px_ref[i];

            Eigen::Vector3d pw((p1(0) - cx) / fx * depth_ref[i], (p1(1) - cy) / fy * depth_ref[i], depth_ref[i]);
            Eigen::Vector3d pc = T21 * pw;
            Eigen::Vector2d p2(fx * pc(0) / pc(2) + cx, fy * pc(1) / pc(2) + cy);

            u = p2(0);
            v = p2(1);

            if(u <= half_patch_size + 1 || u >=img2.cols - half_patch_size - 1 || v <= half_patch_size - 1 || v >= img2.rows - half_patch_size - 1)
            {
                continue;
            }
            nGood++;
            goodProjection.push_back(Eigen::Vector2d(u, v));

            // and compute error and jacobian
            for (int x = -half_patch_size; x < half_patch_size; x++)
                for (int y = -half_patch_size; y < half_patch_size; y++) {

                    double error = GetPixelValue(img1, p1(0)+x, p1(1)+y) - GetPixelValue(img2, p2(0)+x, p2(1)+y);
                    double Z = depth_ref[i];
                    double X = (p1(0) + x - cx) / fx * Z;
                    double Y = (p1(1) + y - cy) / fy * Z;

                    Matrix26d J_pixel_xi;   // pixel to \xi in Lie algebra
                    J_pixel_xi(0, 0) = fx / Z;
                    J_pixel_xi(0, 1) = 0;
                    J_pixel_xi(0, 2) = -fx * X / Z / Z;
                    J_pixel_xi(0, 3) = -fx * X * Y / Z / Z;
                    J_pixel_xi(0, 4) = fx + fx * X * X / Z / Z;
                    J_pixel_xi(0, 5) = -fx * Y / Z;
                    J_pixel_xi(1, 0) = 0;
                    J_pixel_xi(1, 1) = fy / Z;
                    J_pixel_xi(1, 2) = -fy * Y / Z / Z;
                    J_pixel_xi(1, 3) = -fy - fy * Y * Y / Z / Z;
                    J_pixel_xi(1, 4) = fy * X * Y / Z / Z;
                    J_pixel_xi(1, 5) = fy * X / Z;
                    Eigen::Vector2d J_img_pixel;    // image gradients
                    J_img_pixel[0] = (GetPixelValue(img2, p2(0)+x+1, p2(1)+y) - GetPixelValue(img2,p2(0)+x-1,p2(1)+y))/2;
                    J_img_pixel[1] = (GetPixelValue(img2, p2(0)+x, p2(1)+y+1) - GetPixelValue(img2,p2(0)+x,p2(1)+y-1))/2;

                    // total jacobian
                    Vector6d J= -J_img_pixel.transpose() * J_pixel_xi;

                    H += J * J.transpose();
                    b += -error * J;
                    cost += error * error;
                }
            // END YOUR CODE HERE
        }

        // solve update and put it into estimation
        // TODO START YOUR CODE HERE
        Vector6d update = H.inverse() * b;
        T21 = Sophus::SE3::exp(update) * T21;
        // END YOUR CODE HERE

        cost /= nGood;

        if (isnan(update[0])) {
            // sometimes occurred when we have a black or white patch and H is irreversible
            cout << "update is nan" << endl;
            break;
        }
        if (iter > 0 && cost > lastCost) {
            cout << "cost increased: " << cost << ", " << lastCost << endl;
            break;
        }
        lastCost = cost;
        cout << "cost = " << cost << ", good = " << nGood << endl;
    }
    cout << "good projection: " << nGood << endl;
    cout << "T21 = \n" << T21.matrix() << endl;

    // in order to help you debug, we plot the projected pixels here
    cv::Mat img1_show, img2_show;
    cv::cvtColor(img1, img1_show, CV_GRAY2BGR);
    cv::cvtColor(img2, img2_show, CV_GRAY2BGR);
    for (auto &px: px_ref) {
        cv::rectangle(img1_show, cv::Point2f(px[0] - 2, px[1] - 2), cv::Point2f(px[0] + 2, px[1] + 2),
                      cv::Scalar(0, 250, 0));
    }
    for (auto &px: goodProjection) {
        cv::rectangle(img2_show, cv::Point2f(px[0] - 2, px[1] - 2), cv::Point2f(px[0] + 2, px[1] + 2),
                      cv::Scalar(0, 250, 0));
    }
    cv::imshow("reference", img1_show);
    cv::imshow("current", img2_show);
    cv::waitKey();
}
Example #2
0
int main( int /*argc*/, char* argv[] )
{
    const static string ui_file = "app.clicks";

    // Load configuration data
    pangolin::ParseVarsFile("app.cfg");

    InputRecordRepeat input("ui.");
    input.LoadBuffer(ui_file);

    // Target to track from
    Target target;
    target.GenerateRandom(60,25/(842.0/297.0),75/(842.0/297.0),15/(842.0/297.0),Vector2d(297,210));
    //  target.GenerateCircular(60,20,50,15,makeVector(210,210));
    //  target.GenerateEmptyCircle(60,25,75,15,200,makeVector(297,210));
    target.SaveEPS("target_A4.eps");
    cout << "Calibration target saved as: target_A4.eps" << endl;

    // Setup Video
    Var<string> video_uri("video_uri");
    VideoRecordRepeat video(video_uri, "video.pvn", 1024*1024*200);
//    VideoInput video(video_uri);

    const unsigned w = video.Width();
    const unsigned h = video.Height();

    // Create Glut window
    pangolin::CreateGlutWindowAndBind("Main",2*w+PANEL_WIDTH,h);

    // Pangolin 3D Render state
    pangolin::OpenGlRenderState s_cam;
    s_cam.Set(ProjectionMatrixRDF_TopLeft(640,480,420,420,320,240,1,1E6));
    s_cam.Set(FromTooN(toTooN(Sophus::SE3(Sophus::SO3(),Vector3d(-target.Size()[0]/2,-target.Size()[1]/2,500) ))));
    pangolin::Handler3D handler(s_cam);

    // Create viewport for video with fixed aspect
    View& vPanel = pangolin::CreatePanel("ui").SetBounds(1.0,0.0,0,PANEL_WIDTH);
    View& vVideo = pangolin::Display("Video").SetAspect((float)w/h);
    View& v3D    = pangolin::Display("3D").SetAspect((float)w/h).SetHandler(&handler);
    //  View& vDebug = pangolin::Display("Debug").SetAspect((float)w/h);

    Display("Container")
            .SetBounds(1.0,0.0,PANEL_WIDTH,1.0,false)
            .SetLayout(LayoutEqual)
            .AddDisplay(vVideo)
            .AddDisplay(v3D)
            //      .AddDisplay(vDebug)
            ;

    // OpenGl Texture for video frame
    GlTexture texRGB(w,h,GL_RGBA8);
    GlTexture tex(w,h,GL_LUMINANCE8);

    // Declare Image buffers
    CVD::ImageRef size(w,h);
    Image<Rgb<byte> > Irgb(size);
    Image<byte> I(size);
    Image<float> intI(size);
    Image<short> lI(size);
    Image<std::array<float,2> > dI(size);
    Image<byte> tI(size);

    // Camera parameters
    Matrix<float,5,1> cam_params; // = Var<Matrix<float,5,1> >("cam_params");
    cerr << "Not loading params properly" << endl;
    cam_params << 0.694621, 0.925258, 0.505055, 0.484551, 0.968455;
    FovCamera cam( w,h, w*cam_params[0],h*cam_params[1], w*cam_params[2],h*cam_params[3], cam_params[4] );

    // Last good pose
    Sophus::SE3 T_gw;
    std::clock_t last_good;
    int good_frames;

    // Pose hypothesis
    Sophus::SE3 T_hw;

    // Fixed mirrored pose
    Sophus::SE3 T_0w;

    // Stored keyframes
    boost::ptr_vector<Keyframe> keyframes;

    // Variables
    Var<bool> record("ui.Record",false,false);
    Var<bool> play("ui.Play",false,false);
    Var<bool> source("ui.Source",false,false);

    Var<bool> add_keyframe("ui.Add Keyframe",false,false);

    Var<bool> use_mirror("ui.Use Mirror",false,true);
    Var<bool> draw_mirror("ui.Draw Mirror",false,true);
//    Var<bool> calc_mirror_pose("ui.Calculate Mirrored Pose",false,false);

    Var<bool> disp_thresh("ui.Display Thresh",false);
    Var<float> at_threshold("ui.Adap Threshold",1.0,0,1.0);
    Var<int> at_window("ui.Adapt Window",w/3,1,200);
    Var<float> conic_min_area(".Conic min area",25, 0, 100);
    Var<float> conic_max_area(".Conic max area",4E4, 0, 1E5);
    Var<float> conic_min_density(".Conic min density",0.4, 0, 1.0);
    Var<float> conic_min_aspect(".Conic min aspect",0.1, 0, 1.0);
    Var<int> target_match_neighbours(".Match Descriptor Neighbours",10, 5, 20);
    Var<int> target_ransac_its("ui.Ransac Its", 100, 20, 500);
    Var<int> target_ransac_min_pts("ui.Ransac Min Pts", 5, 5, 10);
    Var<float> target_ransac_max_inlier_err_mm("ui.Ransac Max Inlier Err (mm)", 15, 0, 50);
    Var<float> target_plane_inlier_thresh("ui.Plane inlier thresh", 1.5, 0.1, 10);
    Var<double> rms("ui.RMS", 0);
    Var<double> max_rms("ui.max RMS", 1.0, 0.01, 10);
    Var<double> robust_4pt_inlier_tol("ui.Ransac 4Pt Inlier", 0.006, 1E-3, 1E-2);
    Var<int>    robust_4pt_its("ui.Ransac 4pt its", 100, 10, 500);
    Var<double> max_mmps("ui.max mms per sec", 1500, 100, 2000);

    Var<bool> lock_to_cam("ui.AR",false);

    for(int frame=0; !pangolin::ShouldQuit(); ++frame)
    {
        Viewport::DisableScissor();
        glClear(GL_DEPTH_BUFFER_BIT | GL_COLOR_BUFFER_BIT);

        // Get newest frame from camera and upload to GPU as texture
        video.GrabNewest((byte*)Irgb.data(),true);

        // Associate input with this video frame
        input.SetIndex(video.FrameId());

        // Generic processing
        ConvertRGBtoI(Irgb,I);
        gradient(w,h,I.data(),dI.data());
        integral_image(w,h,I.data(),intI.data());

        // Threshold and label image
        AdaptiveThreshold(w,h,I.data(),intI.data(),tI.data(),at_threshold,at_window,(byte)0,(byte)255);
        vector<PixelClass> labels;
        Label(w,h,tI.data(),lI.data(),labels);

        // Find conics
        vector<PixelClass> candidates;
        vector<Conic> conics;
        FindCandidateConicsFromLabels(
                    w,h,labels,candidates,
                    conic_min_area,conic_max_area,conic_min_density, conic_min_aspect
                    );
        FindConics(w,h,candidates,dI.data(),conics );

        // Generate map and point structures
        vector<int> conics_target_map(conics.size(),-1);
        vector<Vector2d > ellipses;
        for( size_t i=0; i < conics.size(); ++i )
            ellipses.push_back(conics[i].center);

        // Update pose given correspondences
        T_hw = FindPose(cam,target.circles3D(),ellipses,conics_target_map,robust_4pt_inlier_tol,robust_4pt_its);
        rms = ReprojectionErrorRMS(cam,T_hw,target.circles3D(),ellipses,conics_target_map);

        int inliers =0;
        for( int i=0; i < conics_target_map.size(); ++i)
            if( conics_target_map[i] >=0 ) inliers++;

        if( !isfinite((double)rms) || rms > max_rms || (good_frames < 5 && inliers < 6) )
        {
            // Undistort Conics
            vector<Conic> conics_camframe;
            for( unsigned int i=0; i<conics.size(); ++i )
                conics_camframe.push_back(UnmapConic(conics[i],cam));

            // Find target given (approximately) undistorted conics
            const static LinearCamera idcam(-1,-1,1,1,0,0);
            target.FindTarget(
                        idcam,conics_camframe, conics_target_map, target_match_neighbours,
                        target_ransac_its, target_ransac_min_pts, target_ransac_max_inlier_err_mm,
                        target_plane_inlier_thresh,use_mirror
                        );

            // Estimate camera pose relative to target coordinate system
            T_hw = FindPose(cam,target.circles3D(),ellipses,conics_target_map,robust_4pt_inlier_tol,robust_4pt_its);
            rms = ReprojectionErrorRMS(cam,T_hw,target.circles3D(),ellipses,conics_target_map);
        }

        if( isfinite((double)rms) && rms < max_rms )
        {
            // seconds since good
            const double t = std::difftime(clock(),last_good) / (double) CLOCKS_PER_SEC;
            const double dx = (T_hw.inverse().translation() - T_gw.inverse().translation()).norm();
            if( dx / t < max_mmps || last_good == 0) {
                good_frames++;
            }else{
                good_frames = 0;
            }
            if( good_frames > 5 )
            {
                T_gw = T_hw;
                last_good = clock();
            }
        }else{
            good_frames = 0;
        }

        if( good_frames > 5 )
        {
            if( lock_to_cam ) {
                s_cam.Set(FromTooN(toTooN(T_gw)));
            }

            if( pangolin::Pushed(add_keyframe) ) {
                Keyframe* kf = new Keyframe();
                kf->T_kw = T_gw;
                kf->conics.insert(kf->conics.begin(),conics.begin(),conics.end());
                kf->conics_target_map.insert(kf->conics_target_map.begin(),conics_target_map.begin(),conics_target_map.end());
                keyframes.push_back(kf);
            }
        }

        // Display Live Image
        glColor3f(1,1,1);
        vVideo.ActivateScissorAndClear();

        if(!disp_thresh) {
            texRGB.Upload(Irgb.data(),GL_RGB,GL_UNSIGNED_BYTE);
            texRGB.RenderToViewportFlipY();
        }else{
            tex.Upload(tI.data(),GL_LUMINANCE,GL_UNSIGNED_BYTE);
            tex.RenderToViewportFlipY();
        }

        // Display detected ellipses
        glOrtho(-0.5,w-0.5,h-0.5,-0.5,0,1.0);
        for( int i=0; i<ellipses.size(); ++i ) {
            glColorBin(conics_target_map[i],target.circles().size());
            DrawCross(ellipses[i],2);
        }

        glColor3f(0,1,0);
        DrawLabels(candidates);

        // Display 3D Vis
        glEnable(GL_DEPTH_TEST);
        v3D.ActivateScissorAndClear(s_cam);
        glDepthFunc(GL_LEQUAL);
        glDrawAxis(30);
        DrawTarget(target,Vector2d(0,0),1,0.2,0.2);
        DrawTarget(conics_target_map,target,Vector2d(0,0),1);

        // Draw Camera
        glColor3f(1,0,0);
        glDrawFrustrum(cam.Kinv(),w,h,T_gw.inverse(),10);

        Vector3d r_w = Vector3d::Zero();

        if( keyframes.size() >= 3 )
        {
            // Method 1
            // As described in this paper
            // "Camera Pose Estimation using Images of Planar Mirror Reflections" ECCV 2010
            // Rui Rodrigues, Joao Barreto, Urbano Nunes

            const unsigned Nkf = keyframes.size() -1;
            const Sophus::SE3 T_w0 = keyframes[0].T_kw.inverse();

            MatrixXd As(Nkf*3,4);
            As.setZero();
            for( int i=0; i<Nkf; ++i )
            {
                const Sophus::SE3 T_iw = keyframes[i+1].T_kw * T_w0;
                const Vector3d t = T_iw.translation();
                const Vector3d theta_omega = T_iw.so3().log();
                const double theta = theta_omega.norm();
                const Vector3d omega = theta_omega / theta;
                const double tanthby2 = tan(theta/2.0);
                const Matrix3d skew_t = SkewSym(t);

                As.block<3,3>(3*i,0) = skew_t + tanthby2 * omega * t.transpose();
                As.block<3,1>(3*i,3) = -2 * tanthby2 * omega;
            }

            // Solve system using SVD
            Eigen::JacobiSVD<MatrixXd> svd(As, ComputeFullV);

            // Get mirror plane for virtual camera 0 in cam0 FoR
            const Vector4d _N_0 = svd.matrixV().col(3);
            Vector4d N_0 = _N_0 / (_N_0.head<3>()).norm();
            // d has different meaning in paper. Negate to match my thesis.
            N_0(3) *= -1;

            // Render plane corresponding to first keyframe
            glSetFrameOfReferenceF(keyframes[0].T_kw.inverse());
            glColor3f(0.2,0,0);
            DrawPlane(N_0,10,100);
            glUnsetFrameOfReference();

            // Attempt to render real camera.
            // We seem to have the correct translation. Rotation is wrong.
            // It's because the real camera (from the paper), relates to the real
            // points, Q, but we're using Qhat which doesn't match the real Q.
            {
                const Vector4d Nz = Vector4d(0,0,1,0);
                const Matrix4d T_wr(SymmetryTransform(Nz) * T_w0.matrix() * SymmetryTransform(N_0));
                glColor3f(0,0,1);

                glMatrixMode(GL_MODELVIEW);
                glPushMatrix();
                glMultMatrix( T_wr );
                glDrawFrustrum(cam.Kinv(),w,h,30);
                glPopMatrix();
            }

            // Draw live mirror
            if( draw_mirror )
            {
                Vector3d l_w = T_gw.inverse().translation();
                l_w[2] *= -1;
                const Vector3d N = l_w - r_w;
                const double dist = N.norm();
                const Vector3d n = N / dist;
                const double d = -(r_w + N/2.0).dot(n);

                glColor3f(1,0,0);
                DrawCross(l_w);

                glColor4f(0.2,0.2,0.2,0.2);
                DrawPlane(Vector4d(n[0],n[1],n[2],d),10,100);
            }
        }

        // Keyframes
        glColor3f(0.5,0.5,0.5);
//        foreach (Keyframe& kf, keyframes) {
//            glDrawFrustrum(cam.Kinv(),w,h,kf.T_kw.inverse(),30);
//        }
        if(keyframes.size() > 0 )
        {
          glSetFrameOfReferenceF(keyframes[0].T_kw.inverse());
          glDrawAxis(30);
          glUnsetFrameOfReference();

          glColor3f(1,0.5,0.5);
          glDrawFrustrum(cam.Kinv(),w,h,keyframes[0].T_kw.inverse(),30);
        }

        if(pangolin::Pushed(record)) {
            video.Record();
            input.Record();
        }

        if(pangolin::Pushed(play)) {
            video.Play(true);
            input.PlayBuffer(0,input.Size()-1);
            input.SaveBuffer(ui_file);
        }

        if(pangolin::Pushed(source)) {
            video.Source();
            input.Stop();
            input.SaveBuffer(ui_file);
        }

        vPanel.Render();

        // Swap back buffer with front
        glutSwapBuffers();

        // Process window events via GLUT
        glutMainLoopEvent();
    }

    return 0;
}