void VideoSource::GetAndFillFrameBWandRGB(CVD::Image<CVD::byte> &imBW, CVD::Image<CVD::Rgb<CVD::byte> > &imRGB)
{
    Error error;

    imRGB.resize(mirSize);
    imBW.resize(mirSize);
    static FlyCapture2::Image imCap;
    error = cam.RetrieveBuffer(&imCap);
    if (error != PGRERROR_OK)
    {
        PrintErrorAndExit(error);
  
    }

  
    unsigned char *pImage = imCap.GetData();

    for (int y = 0; y<mirSize.y; y++) {
        for (int x = 0; x<mirSize.x; x++) {
            imRGB[y][x].blue = *pImage;
            pImage++;

            imRGB[y][x].green =*pImage;
            imBW[y][x] = *pImage;
            pImage++;

            imRGB[y][x].red = *pImage;
            pImage++;
        }
    }

}
예제 #2
0
void Intrinsic::UndistortionMap::Set(const Intrinsic &K) {
  const float sx = float(FTR_UNDIST_LUT_SIZE - 1) / K.w();
  const float sy = float(FTR_UNDIST_LUT_SIZE - 1) / K.h();
  m_fx = sx * K.k().m_fx; m_fy = sy * K.k().m_fy;
  m_cx = sx * K.k().m_cx; m_cy = sy * K.k().m_cy;
  const float fxI = 1.0f / m_fx, fyI = 1.0f / m_fy;
  m_xns.Resize(FTR_UNDIST_LUT_SIZE, FTR_UNDIST_LUT_SIZE);

#ifdef FTR_UNDIST_VERBOSE
  UT::PrintSeparator();
#endif
  Point2D xd, xn;
  float v;
  int x[2] = {FTR_UNDIST_LUT_SIZE / 2, FTR_UNDIST_LUT_SIZE / 2};
  int b[2][2] = {{x[0] - 1, x[0] + 1}, {x[1] - 1, x[1] + 1}};
  const int d[2] = {-1, 1};
  const int N = m_xns.Size();
  for(int i = 0, ix = 0, id = 0; i < N; ++i) {
    xd.x() = fxI * (x[0] - m_cx);
    xd.y() = fyI * (x[1] - m_cy);
    if (i == 0)
      xn = xd;
    else
      xn = xd * v;
    if (!K.Undistort(xd, &xn, NULL, NULL, true))
      exit(0);
    v = sqrtf(xn.SquaredLength() / xd.SquaredLength());
//#ifdef CFG_DEBUG
#if 0
    xn.x() = v;
#endif
    m_xns[x[1]][x[0]] = xn;
//#ifdef CFG_DEBUG
#if 0
    //UT::Print("%d %d\n", x[0], x[1]);
    const float s = 0.5f;
    const int _w = K.w(), _h = K.h();
    const float _fx = K.fx() * s, _fy = K.fy() * s;
    const float _cx = (_w - 1) * 0.5f, _cy = (_h - 1) * 0.5f;
    CVD::Image<CVD::Rgb<ubyte> > I;
    I.resize(CVD::ImageRef(_w, _h));
    I.zero();
    UT::ImageDrawCross(I, int(_fx * xd.x() + _cx), int(_fy * xd.y() + _cy), 5, CVD::Rgb<ubyte>(255, 0, 0));
    UT::ImageDrawCross(I, int(_fx * xn.x() + _cx), int(_fy * xn.y() + _cy), 5, CVD::Rgb<ubyte>(0, 255, 0));
    UT::ImageSave(UT::String("D:/tmp/test%04d.jpg", i), I);
#endif
    if (x[ix] == b[ix][id]) {
      b[ix][id] += d[id];
      ix = 1 - ix;
      if (ix == 0)
        id = 1 - id;
    }
    x[ix] += d[id];
  }
#ifdef FTR_UNDIST_VERBOSE
  UT::PrintSeparator();
#endif
}
예제 #3
0
	void VrmlGame::detect_corners(std::string &markerdata, std::vector<std::vector<Wml::Vector2d> > &corners)
	{
		bool ret = g_markerkeybase.SetFile(markerdata);

		if (!ret)
		{
			return;
		}

		//////////////////////////////////////////////////////////////////////////
		/// \brief Generated features.
		//////////////////////////////////////////////////////////////////////////
		std::vector<FeatureSpace::ArtificialFeature> _features;

		//////////////////////////////////////////////////////////////////////////
		/// \brief Found, <feature index, key index>
		//////////////////////////////////////////////////////////////////////////
		std::map<int, int> _featurekey;

		corners.resize(mpMap->vpKeyFrames.size());
		int i = 0;
		vector<KeyFrame *>::iterator kf;
		for( kf = mpMap->vpKeyFrames.begin(); kf != mpMap->vpKeyFrames.end(); kf++ )
		{
			corners[i].clear();

			CSimpleImageb simage;

			CVD::Image<CVD::byte> tempimage = (*kf)->aLevels[0].im;
			int height,width;
			height = tempimage.size().y;
			width = tempimage.size().x;
			//convert coordinate
			unsigned char* tempdata = (unsigned char*)malloc(sizeof(unsigned char)*width*height);
			for (int j=0; j<height; j++)
			{
				memcpy(tempdata+width*(height-1-j),(CVD::byte*)tempimage.data()+width*j,width*sizeof(unsigned char));
			}

			simage.set_data(tempdata, width, height, 1);
			free(tempdata);
			Pattern::CSimpleMarkerDetector smd;
			std::vector<pattern_parameter<MARKER_FLOAT> > patterns;

			bool ret = smd.Fit(simage, patterns);


			//////////////////////////////////////////////////////////////////////////
			std::vector<Pattern::RectHomography<MARKER_FLOAT> > recthomos;
			recthomos.resize(patterns.size());


			// use the pattern to extract the image features.
			std::vector<pattern_parameter<MARKER_FLOAT> >::iterator it = patterns.begin();

			for (int p = 0; it != patterns.end(); ++ it, ++ p)
			{
				Pattern::RectHomography<MARKER_FLOAT> &obj = recthomos[p];

				obj.corners = it->corners;
				assert(obj.corners.size() == 4);  
			}

			//////////////////////////////////////////////////////////////////////////
			CSimpleImagef grey_image;
			SimpleImage::grayscale(simage, grey_image);

			SimpleImage::flip(grey_image);


			// convert the marker region image to feature description.
			CArtificialFeatureMatch::Pattern2Feature(grey_image, recthomos, _features);

			//////////////////////////////////////////////////////////////////////////
			CArtificialFeatureMatch::SearchForFeature(_features, g_markerkeybase, _featurekey);


			//////////////////////////////////////////////////////////////////////////
			for(int c = 0; c < _features.size(); ++c)
			{
				if (_features[c]._state == FeatureSpace::CALIBRATED)
				{
					assert (_features[c]._corners.size() == 4);

					for(int p = 0; p < _features[c]._corners.size(); ++p)
					{
						corners[i].push_back(Wml::Vector2d(_features[c]._corners[p].x2, _features[c]._corners[p].y2));
						std::cout<<_features[c]._corners[p].x2<<","<<_features[c]._corners[p].y2<<std::endl;
					}
					std::cout<<i<<" image marker found\n";
				}
				break;
			}
			i++;
		}
	}
예제 #4
0
void EdgeDetector::mark_edge_point_along_search_path( std::vector<int>& viEdgeIndex,
    std::vector<TooN::Vector<3> > & vv3EdgeStrs, const CVD::Image<REAL_TYPE>& image,
    const TooN::Vector<2>& v2ImagePoint, const TooN::Vector<2>& v2Normal )
{
  TooN::Vector<3> edgeStr;
  int i, j;
  register int k, chkpt, nSearch, nSearch_2nw, nSearch_nw, TwoiMaxSearchRange;
  TooN::Vector<2> v2Xn;
  viEdgeIndex.clear();
  TwoiMaxSearchRange = 2*miMaxSearchRange;
  // apply mask to find edgeStr in both magnitude and direction for a whole range of search path.
  for( nSearch = 0; nSearch <= TwoiMaxSearchRange; ++nSearch )
  {
    v2Xn = v2ImagePoint + ( nSearch - miMaxSearchRange ) * v2Normal;
    if( !image.in_image_with_border( CVD::ir( v2Xn ), 10 ) ) break;
    edgeStrength( edgeStr, image, int( v2Xn[0] ), int( v2Xn[1] ) );
    vv3EdgeStrs[nSearch] = edgeStr;
  }
  // 1D NMS for (2nw+1)-Neighborhood
  // Efficient Non-Maximum Suppression, Alexander Neubeck and Luc Van Gool, ICPR, 2006
  const int nw = 2; // test nw = 2, must be more than or equal to 2;
  i = nw;
  CompPartialMax( vv3EdgeStrs, 0, i - 1 );
  chkpt = -1;
  nSearch_2nw = nSearch - 2*nw; // nSearch may not be equal to TwoiMaxSearchRange if it is reach an image border.
  nSearch_nw = nSearch - nw;
  while( i <= nSearch_2nw )
  {
    j = CompPartialMax( vv3EdgeStrs, i, i + nw );
    k = CompPartialMax( vv3EdgeStrs, i + nw + 1, j + nw );
    if( i == j || vv3EdgeStrs[j][0] > vv3EdgeStrs[k][0] )
    {
      if( ( chkpt <= j - nw || vv3EdgeStrs[j][0] >= mvrPMAX[chkpt] )
          && ( j - nw == i || vv3EdgeStrs[j][0] >= mvrPMAX[j - nw] ) )
      {
        if( vv3EdgeStrs[j][0] > mfEdgeThreshold ) viEdgeIndex.push_back( j );
      }
      if( i < j ) chkpt = i + nw + 1;
      i = j + nw + 1;
    }
    else
    {
      i = k;
      chkpt = j + nw + 1;
      while( i <= nSearch_nw )
      {
        j = CompPartialMax( vv3EdgeStrs, chkpt, i + nw );
        if( vv3EdgeStrs[i][0] > vv3EdgeStrs[j][0] )
        {
          if( vv3EdgeStrs[i][0] > mfEdgeThreshold ) viEdgeIndex.push_back( i );
          i = i + nw + 1;
          break;
        }
        else
        {
          chkpt = i + nw - 1;
          i = j;
        }
      }
    }
  }
}
예제 #5
0
void EdgeDetector::search_for_edge_point( EdgeData& ed, std::vector<TooN::Vector<3> > & vv3EdgeStrs, const CVD::Image<REAL_TYPE> & image,
        const TooN::Vector<2> & v2ImagePoint, const TooN::Vector<2>& v2Normal )
{
  TooN::Vector<3> edgeStr;
  TooN::Vector<2> v2Xn;
  ed.v2ExpectedImagePoint = v2ImagePoint;
  ed.v2Normal = v2Normal;
  register int i, j, k, chkpt, nSearch, nSearch_2nw, nSearch_nw, TwoiMaxSearchRange;
  TwoiMaxSearchRange = 2*miMaxSearchRange;
  // apply mask to find edgeStr in both magnitude and direction for a whole range of search path.
  for( nSearch = 0; nSearch <= TwoiMaxSearchRange; ++nSearch )
  {
    v2Xn = v2ImagePoint + ( nSearch - miMaxSearchRange ) * v2Normal;
    if( !image.in_image_with_border( CVD::ir( v2Xn ), 10 ) ) break; // reach boundary.
    edgeStrength( edgeStr, image, int( v2Xn[0] ), int( v2Xn[1] ) );
    vv3EdgeStrs[nSearch] = edgeStr;
  }
  // 1D NMS for (2nw+1)-Neighborhood
  // Efficient Non-Maximum Suppression, Alexander Neubeck and Luc Van Gool, ICPR, 2006
  REAL_TYPE subpixel, distance;
  const int nw = 2; // test nw = 2, must be more than or equal to 2;
  i = nw;
  CompPartialMax( vv3EdgeStrs, 0, i - 1 );
  chkpt = -1;
  nSearch_2nw = nSearch - 2*nw;
  nSearch_nw = nSearch - nw;
  while( i <= nSearch_2nw )
  {
    j = CompPartialMax( vv3EdgeStrs, i, i + nw );
    k = CompPartialMax( vv3EdgeStrs, i + nw + 1, j + nw );
    if( i == j || vv3EdgeStrs[j][0] > vv3EdgeStrs[k][0] )
    {
      if( ( chkpt <= j - nw || vv3EdgeStrs[j][0] >= mvrPMAX[chkpt] )
          && ( j - nw == i || vv3EdgeStrs[j][0] >= mvrPMAX[j - nw] ) )
      {
        if( vv3EdgeStrs[j][0] > mfEdgeThreshold ) // found edge
        {
          if( fabs(v2Normal*vv3EdgeStrs[j].slice(1,2) ) > COSANGLE ) // check angle
          {
            distance = j - miMaxSearchRange;
// subpixel m = a-c/(2*(a-2b-c) where the parabola passing though (-1,a), (0,b), (1,c)
// A Non-Maxima Suppression Method for Edge Detection with Sub-Pixel Accuracy.
            REAL_TYPE den = ( 2.0 * ( vv3EdgeStrs[j - 1][0] - 2.0 * vv3EdgeStrs[j][0] + vv3EdgeStrs[j + 1][0] ) );
            if( j >= 1 && den != 0 )
              subpixel = ( vv3EdgeStrs[j - 1][0] - vv3EdgeStrs[j + 1][0] ) / den;
            else subpixel = 0.0;
            distance += subpixel;
            ed.vrDistance.push_back( distance );
            ed.vrDistanceSquared.push_back( distance*distance );
            ++ed.no_candidate;
          }
        }
      }
      if( i < j ) chkpt = i + nw + 1;
      i = j + nw + 1;
    }
    else
    {
      i = k;
      chkpt = j + nw + 1;
      while( i <= nSearch_nw )
      {
        j = CompPartialMax( vv3EdgeStrs, chkpt, i + nw );
        if( vv3EdgeStrs[i][0] > vv3EdgeStrs[j][0] )
        {
          if( vv3EdgeStrs[i][0] > mfEdgeThreshold ) // found edge
          {
            if( fabs(v2Normal*vv3EdgeStrs[i].slice(1,2)) > COSANGLE ) // check angle
            {
              distance = i - miMaxSearchRange;
// subpixel m = a-c/(2*(a-2b-c) where the parabola passing though (-1,a), (0,b), (1,c)
// A Non-Maxima Suppression Method for Edge Detection with Sub-Pixel Accuracy.
              REAL_TYPE den = ( 2.0 * ( vv3EdgeStrs[i - 1][0] - 2.0 * vv3EdgeStrs[i][0] + vv3EdgeStrs[i + 1][0] ) );
              if( i >= 1 && den != 0 )
                subpixel = ( vv3EdgeStrs[i - 1][0] - vv3EdgeStrs[i + 1][0] ) / den;
              else subpixel = 0.0;
              distance += subpixel;
              ed.vrDistance.push_back( distance );
              ed.vrDistanceSquared.push_back( distance*distance );
              ++ed.no_candidate;
            }
          }
          i = i + nw + 1; // in the paper, it is i = i + nw - 1, it is wrong.
          break;
        }
        else
        {
          chkpt = i + nw - 1;
          i = j;
        }
      }
    }
  }
}
예제 #6
0
void EdgeDetector::search_for_closest_edge_point_with_pole( ClosestEdge& result, std::vector<TooN::Vector<3> > & vv3EdgeStrs, 
    const EdgeFeature & edgeFeature, const CVD::Image<REAL_TYPE>& image, const TooN::Vector<2> & v2ImagePoint, const TooN::Vector<2>& v2Normal )
{
  TooN::Vector<3> edgeStr;
  TooN::Vector<2> v2Xn;
  register int i, j, k, chkpt, nSearch, nSearch_2nw, nSearch_nw, TwoiMaxSearchRange;
  // apply mask to find edgeStr in both magnitude and direction for a whole range of search path.
  TwoiMaxSearchRange = 2*miMaxSearchRange;
  for( nSearch = 0; nSearch <= TwoiMaxSearchRange; ++nSearch )
  {
    v2Xn = v2ImagePoint + ( nSearch - miMaxSearchRange ) * v2Normal;
    if( !image.in_image_with_border( CVD::ir( v2Xn ), 10 ) ) break;
    edgeStrength( edgeStr, image, int( v2Xn[0] ), int( v2Xn[1] ) );
    vv3EdgeStrs[nSearch] = edgeStr;
  }
  // 1D NMS for (2nw+1)-Neighborhood
  // Efficient Non-Maximum Suppression, Alexander Neubeck and Luc Van Gool, ICPR, 2006
  result.bNotFound = true;
  REAL_TYPE subpixel, distance, test;
  result.rDistance = 2 * miMaxSearchRange + 1.f; 
  const int nw = 2; // test nw = 2, must be more than or equal to 2;
  i = nw;
  CompPartialMax( vv3EdgeStrs, 0, i - 1 );
  chkpt = -1;
  nSearch_2nw = nSearch - 2*nw;
  nSearch_nw = nSearch - nw;
  while( i <= nSearch_2nw )
  {
    j = CompPartialMax( vv3EdgeStrs, i, i + nw );
    k = CompPartialMax( vv3EdgeStrs, i + nw + 1, j + nw );
    if( i == j || vv3EdgeStrs[j][0] > vv3EdgeStrs[k][0] )
    {
      if( ( chkpt <= j - nw || vv3EdgeStrs[j][0] >= mvrPMAX[chkpt] )
          && ( j - nw == i || vv3EdgeStrs[j][0] >= mvrPMAX[j - nw] ) )
      {
        if( vv3EdgeStrs[j][0] > mfEdgeThreshold ) // found edge
        {
          test = v2Normal*vv3EdgeStrs[j].slice(1,2);
          if( ( IS_SET_BIT( edgeFeature.ucBits, EDGE_POLE ) && test > 0 ) || ( !IS_SET_BIT( edgeFeature.ucBits, EDGE_POLE ) && test <= 0 ) )
          {
            if( fabs(test) > COSANGLE ) // check angle
            {
              distance = j - miMaxSearchRange;
              if( fabs( result.rDistance ) >= fabs( distance ) )
              {
                result.rDistance = distance;
                result.iEdgeIndex = j;
                result.bNotFound = false;
              }
              else
                goto exit_label;
            }
          }
        }
      }
      if( i < j ) chkpt = i + nw + 1;
      i = j + nw + 1;
    }
    else
    {
      i = k;
      chkpt = j + nw + 1;
      while( i <= nSearch_nw )
      {
        j = CompPartialMax( vv3EdgeStrs, chkpt, i + nw );
        if( vv3EdgeStrs[i][0] > vv3EdgeStrs[j][0] )
        {
          if( vv3EdgeStrs[i][0] > mfEdgeThreshold ) // found edge
          {
            test = v2Normal*vv3EdgeStrs[i].slice(1,2);
            if( ( IS_SET_BIT( edgeFeature.ucBits, EDGE_POLE ) && test > 0 ) || ( !IS_SET_BIT( edgeFeature.ucBits, EDGE_POLE ) && test <= 0 ) )
            {
              if( fabs(test) > COSANGLE ) // check angle
              {
                distance = i - miMaxSearchRange;
                if( fabs( result.rDistance ) >= fabs( distance) )
                {
                  result.rDistance = distance;
                  result.iEdgeIndex = i;
                  result.bNotFound = false;
                }
                else
                  goto exit_label;
              }
            }
          }
          i = i + nw + 1;
          break;
        }
        else
        {
          chkpt = i + nw - 1;
          i = j;
        }
      }
    }
  }
  exit_label:
  if( !result.bNotFound )
  {
// subpixel m = a-c/(2*(a-2b-c) where the parabola passing though (-1,a), (0,b), (1,c)
// A Non-Maxima Suppression Method for Edge Detection with Sub-Pixel Accuracy.
    REAL_TYPE den = ( 2.0 * ( vv3EdgeStrs[result.iEdgeIndex - 1][0] - 2.0 * vv3EdgeStrs[result.iEdgeIndex][0] + vv3EdgeStrs[result.iEdgeIndex + 1][0] ) );
    if( result.iEdgeIndex >= 1 && den != 0 )
      subpixel = ( vv3EdgeStrs[result.iEdgeIndex - 1][0] - vv3EdgeStrs[result.iEdgeIndex + 1][0] ) / den;
    else subpixel = 0.0;
    result.rDistance += subpixel;
  }
}
예제 #7
0
int main( int argc, char* argv[] )
{




    srand ( time(NULL) );


    cudaGLSetGLDevice(cutGetMaxGflopsDeviceId());
    CreateGlutWindowAndBind("Main",width_window*2,height_window*2);
    glewInit();


    CVD::Image<float> input_image_low;
    img_load(input_image_low,"../data/images/car_003.pgm");

    CVD::Image<float> input_image_up;
    img_load(input_image_up,"../data/images/car_003_up.pgm");

    cout<< "height_up_image ="<< input_image_up.size().y << endl;
    cout<< " width_up_image ="<< input_image_up.size().x << endl;



    int N_cols_low_img = input_image_low.size().x;
    int N_rows_low_img = input_image_low.size().y;

    int size_have  = N_rows_low_img*N_cols_low_img;
    float scale = 2;

    int N_rows_upimg = (int)(scale*N_rows_low_img);
    int N_cols_upimg = (int)(scale*N_cols_low_img);

    int size_wanted  = N_rows_upimg*N_cols_upimg;


    int input_vector_size = size_wanted;
    float *input_image_data_up = new float[size_wanted];

//    for(int i = 0 ; i < input_vector_size ; i++)
//    {
//        input_image_data_up[i] = i;
//    }

    float *input_image_data_down = new float[size_have];

//    for(int i = 0 ; i < size_have ; i++)
//    {
//        input_image_data_down[i] = i;
//    }

    input_image_data_up = input_image_up.data();






    //############################### Computing the W^{T}'s for all the flow vectors ###########################

    int N_imgs = 9;
    std::vector< std::map <int, float> > h_vectorofMaps(N_imgs);

    int test_size = 8*8;
    float *hflow = new float [test_size];
    float *vflow = new float [test_size];

    buildWMatrixBilinearInterpolation(N_imgs, size_wanted, N_rows_upimg, N_cols_upimg, h_vectorofMaps, hflow, vflow);//,WMatT );
//    buildWMatrixBilinearInterpolation(N_imgs, test_size, 8, 8, h_vectorofMaps, hflow, vflow);//,WMatT );



    float **WMatvalPtr;
    WMatvalPtr = (float**)malloc(sizeof(float*)*N_imgs);

    int **WMatcolPtr;
    WMatcolPtr = (int**)malloc(sizeof(int*)*N_imgs);

    int **WMatrowPtr;
    WMatrowPtr = (int**)malloc(sizeof(int*)*N_imgs);

    int **h_nnzPerRow;
    h_nnzPerRow = (int**)malloc(sizeof(int*)*N_imgs);



    float *d_csrWMatStackedval;
    int *d_csrWMatStackedrow;
    int *d_csrWMatStackedcol;




    int total_nonzeros=0;

    for(int i = 0 ; i < N_imgs ; i++)
    {
        std::map<int, float>::iterator it;
        std::map<int, float> mapW = h_vectorofMaps[i];
        total_nonzeros += (int)mapW.size();

    }






//    float *h_csrWMatStackedval = new float[total_nonzeros];
//    int *h_csrWMatStackedrow = new int[size_wanted*N_imgs+1];
//    int *h_csrWMatStackedcol = new int[total_nonzeros];


//    float *d_WMatvalPtr;
//    int *d_WMatrowPtr;
//    int *d_WMatcolPtr;


//    for(int i = 0 ; i < N_imgs ; i++)
//    {
//        std::map<int, float>::iterator it;
//        std::map<int, float> mapW = h_vectorofMaps[i];

//        int NnzWMat = (int)mapW.size();

//        WMatvalPtr[i] = new float[NnzWMat];
//        WMatcolPtr[i] = new int[NnzWMat];
//        WMatrowPtr[i] = new int[size_wanted+1];
//        h_nnzPerRow[i] = new int[size_wanted];

//        int index = 0, prev_row=-1;

//        cout<< "NnzWMat = "<<NnzWMat <<endl;

//        WMatrowPtr[i][0] = 0;
//        h_nnzPerRow[i][0] = 0;

//        for(int j = 1 ; j < size_wanted; j++)
//        {
//            WMatrowPtr[i][j] =-1;
//            h_nnzPerRow[i][j] = 0;
//        }
//        WMatrowPtr[i][size_wanted] = NnzWMat;

//        cout<<"have initialised the matrices"<<endl;

//        index = 0;
//        for(it = mapW.begin(); it != mapW.end() ; it++)
//        {

//            WMatcolPtr[i][index] = (it->first)%size_wanted;
//            WMatvalPtr[i][index] = (it->second);

//            int row = ((it->first) - (it->first)%size_wanted)/size_wanted;
//            h_nnzPerRow[i][row]++;

//            if ( WMatrowPtr[i][row] == -1 && prev_row != row)
//            {
//                WMatrowPtr[i][row] = index;
//                prev_row = row;
//            }
//            index++;

//        }

//        index = 1;
//        while(1)
//        {
//            if( index > size_wanted)
//                break;

//            int startindex = index;
//            while( WMatrowPtr[i][index] == -1)
//            {
//                index++;
//            }
//            for(int j = startindex; j <= index-1 ; j++)
//            {
//                WMatrowPtr[i][j] = WMatrowPtr[i][index];
//            }
//            index++;
//        }
//        WMatrowPtr[i][0] = 0;


//        if ( i == 0)
//        {

//            cutilSafeCall(cudaMalloc((void**)&d_WMatvalPtr, NnzWMat*sizeof (float)));
//            cutilSafeCall(cudaMalloc((void**)&d_WMatcolPtr, (NnzWMat)*sizeof(int)));
//            cutilSafeCall(cudaMalloc((void**)&d_WMatrowPtr, (size_wanted+1)*sizeof (int)));

//            cudaMemcpy(d_WMatvalPtr, WMatvalPtr[i], NnzWMat*sizeof(float), cudaMemcpyHostToDevice);
//            cudaMemcpy(d_WMatcolPtr, WMatcolPtr[i], NnzWMat*sizeof(int), cudaMemcpyHostToDevice);
//            cudaMemcpy(d_WMatrowPtr, WMatrowPtr[i], (size_wanted+1)*sizeof(int), cudaMemcpyHostToDevice);

//        }

//        static int total_non_zeros_so_far = 0;

//        for(int index = 0 ; index < NnzWMat ; index++)
//        {
//            h_csrWMatStackedval[total_non_zeros_so_far + index] = WMatvalPtr[i][index];
//            h_csrWMatStackedcol[total_non_zeros_so_far + index] = WMatcolPtr[i][index];
//        }

//        for(int rv = 0 ; rv < size_wanted ; rv++)
//        {
//            h_csrWMatStackedrow[rv+size_wanted*i] = WMatrowPtr[i][rv] + total_non_zeros_so_far;
//        }

//        total_non_zeros_so_far = total_non_zeros_so_far +NnzWMat;
//    }

//    h_csrWMatStackedrow[size_wanted*N_imgs] = total_nonzeros;






//    for(int i = 0 ; i < N_imgs ; i++)
//    {
//        cout<<"WMatval, i = "<<i<<endl;

//        int NnzWMat = h_vectorofMaps[i].size();

//        char filevalname[50];
//        sprintf(filevalname,"WMatval_%d.txt",i);
//        ofstream wMatfileval(filevalname);

//        for(int j = 0 ; j < NnzWMat ; j++ )
//        {
//            wMatfileval << WMatvalPtr[i][j] << " ";
//        }
//        wMatfileval.close();


//        char filecolname[50];
//        sprintf(filecolname,"WMatcol_%d.txt",i);
//        ofstream wMatfilecol(filecolname);
//        for(int j = 0 ; j < NnzWMat ; j++ )
//        {
//            wMatfilecol<<WMatcolPtr[i][j] << "  ";
//        }
//        wMatfilecol.close();

//        char filerowname[50];
//        sprintf(filerowname,"WMatrow_%d.txt",i);
//        ofstream wMatfilerow(filerowname);
//        for(int j = 0 ; j < size_wanted+1 ; j++ )
//        {
//            wMatfilerow<<WMatrowPtr[i][j] << "  ";
//        }
//        wMatfilerow.close();

//    }




    ifstream Waifile("WStackai.txt");
    float aival;
    int total_nonzeros_wi=0;
    while(1)
    {
        Waifile >> aival;
        if ( Waifile.eof())
            break;
        total_nonzeros_wi++;

    }
    Waifile.close();

    float *h_csrWMatStackedval = new float[total_nonzeros_wi];
    int *h_csrWMatStackedrow = new int[size_wanted*N_imgs+1];
    int *h_csrWMatStackedcol = new int[total_nonzeros_wi];


    Waifile.open("WStackai.txt");
    int aipos=0;
    while(1)
    {
        Waifile >> aival;
        if ( Waifile.eof())
            break;
        h_csrWMatStackedval[aipos]= aival;
        aipos++;
    }
    Waifile.close();

    ifstream Wrpfile("WStackrp.txt");
    int rppos=0;
    while(1)
    {
        Wrpfile >> aival;
        if ( Wrpfile.eof())
            break;
        h_csrWMatStackedrow[rppos]= (int)aival;
        rppos++;
    }
    Wrpfile.close();


    ifstream Wcifile("WStackci.txt");
    int cipos=0;
    while(1)
    {
        Wcifile >> aival;
        if ( Wcifile.eof())
            break;
        h_csrWMatStackedcol[cipos]= (int)aival;
        cipos++;
    }
    Wcifile.close();



    total_nonzeros  = total_nonzeros_wi;
    cout<<"total_nonzeros_wi = "<<total_nonzeros_wi<<endl;
//    cout<<"total_nonzeros = "<<total_nonzeros<<endl;




//    exit(1);
















    cutilSafeCall(cudaMalloc((void**)&d_csrWMatStackedval, total_nonzeros*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_csrWMatStackedcol, (total_nonzeros)*sizeof(int)));
    cutilSafeCall(cudaMalloc((void**)&d_csrWMatStackedrow, (N_imgs*size_wanted+1)*sizeof (int)));

    cudaMemcpy(d_csrWMatStackedval, h_csrWMatStackedval, total_nonzeros*sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(d_csrWMatStackedcol, h_csrWMatStackedcol, total_nonzeros*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_csrWMatStackedrow, h_csrWMatStackedrow, (size_wanted*N_imgs+1)*sizeof(int), cudaMemcpyHostToDevice);

   //###########################################################################################################

    float sigma_kernel = 0.25* ( sqrt(scale*scale-1) );

    int kernel_size = (int)ceil(3*(0.25* ( sqrt(scale*scale-1) )));

    if ( kernel_size % 2 == 0 )
    {
        kernel_size = kernel_size+1;
    }

    int kernelhalfwidth = kernel_size/2;

    cout << "kernel_size = " << kernel_size << endl;
    cout << "blurWidth = " << kernelhalfwidth << endl;

    float blur_kernel_h[kernel_size*kernel_size];

    float* blur_kernel_d;

    float sum_kernel_values = 0;

    cout << "sigma_kernel = " << sigma_kernel << endl;

    for(int i = 0 ; i < kernel_size; i++)
    {
        for(int j = 0 ; j < kernel_size; j++)
        {
            float val = (i-kernelhalfwidth)*(i-kernelhalfwidth) + (j-kernelhalfwidth)*(j-kernelhalfwidth);


            val = val / (2*sigma_kernel*sigma_kernel);

            blur_kernel_h[i*kernel_size+j] = exp(-val)/(2*M_PI*sigma_kernel*sigma_kernel);

            sum_kernel_values += blur_kernel_h[i*kernel_size+j];

        }
    }



  //###################### ALL RELATED TO DATAMATRIX ###################################
    std::map<int,float>matindex_matval;
    std::map<int,float>matindex_matvalT;

    buildDMatrixLebesgueMeasure(size_have,size_wanted, N_rows_upimg, N_cols_upimg, scale, /*A,*/ matindex_matval,matindex_matvalT);

    int NnzDMat = (int)matindex_matval.size();
    int NnzDMatT = (int)matindex_matvalT.size();


    float *DMatvalPtr = new float[NnzDMat];
    float *DMatvalPtrT = new float[NnzDMatT];

    int *DMatcolPtr = new int[NnzDMat];
    int *DMatcolPtrT = new int[NnzDMatT];

    int *DMatrowPtr = new int[size_have+1];
    int *DMatrowPtrT = new int[size_wanted+1];


    int index = 0, prev_row = -1;


    cout << "In here" << endl;


    std::map<int,float>::iterator it;
    cout << "Map begins " << endl;
    cout << "NnzDMat   = "<<NnzDMat << endl;
    cout << "NnzDMatT   = "<<NnzDMatT << endl;

    for(int i = 0 ; i < size_have ; i++)
    {
        DMatrowPtr[i] = 0;
    }
    DMatrowPtr[size_have] = NnzDMat;

    for(int i = 0 ; i < size_wanted ; i++)
    {
        DMatrowPtrT[i] = 0;
    }
    DMatrowPtrT[size_wanted] = NnzDMatT;


    cout << "Transpose data begins" << endl;
    index = 0; prev_row = -1;
    for(it = matindex_matvalT.begin(); it != matindex_matvalT.end(); it++ )
    {
        DMatcolPtrT[index] = (it->first)%size_have;
        DMatvalPtrT[index]  = it->second;
        int row = ((it->first) - (it->first)%size_have)/size_have;

        if ( DMatrowPtrT[row] == 0 && prev_row != row)
        {
            DMatrowPtrT[row] = index;
            prev_row = row;
        }

        index++;
    }


    index = 0; prev_row=-1;
    for(it = matindex_matval.begin(); it != matindex_matval.end(); it++ )
    {
//        cout << (it->first)%size_wanted << " " ;//<< it->second << endl;
        DMatvalPtr[index] = (it->second);
        DMatcolPtr[index] = (it->first)%size_wanted;

        int row = ((it->first) - (it->first)%size_wanted)/size_wanted;

        if ( DMatrowPtr[row] == 0 && prev_row != row)
        {
            DMatrowPtr[row] = index;
            prev_row = row;
        }

        index++;
    }
    cout << endl;





    cout<<"DMatval "<<endl;
    ofstream dMatfileval("DMatval.txt");
    for(int i = 0 ; i < NnzDMat ; i++ )
    {
        dMatfileval << DMatvalPtr[i] << " ";
    }
    dMatfileval.close();


    cout<<"BMatcol "<<endl;
    ofstream dMatfilecol("DMatcol.txt");
    for(int i = 0 ; i < NnzDMat ; i++ )
    {
        dMatfilecol<<DMatcolPtr[i] << "  ";
    }
    dMatfilecol.close();


    ofstream dMatfilerow("DMatrow.txt");
    cout<<"BMatrow "<<endl;
    for(int i = 0 ; i < size_have+1 ; i++ )
    {
        dMatfilerow<<DMatrowPtr[i] << " ";
    }
    dMatfilerow.close();









  //######################################################################################


  //############### ALL RELATED TO BLUR MATRIX ############################################
    cout << "Into the Blur function!" << endl;

//    TooN::Matrix<>B(size_wanted,size_wanted);
//    B = TooN::Zeros(size_wanted,size_wanted);

    std::map<int,float>Blurmatindex_matval;
    std::map<int,float>Blurmatindex_matvalT;


    buildBlurMatrixFromKernel(size_wanted, N_rows_upimg, N_cols_upimg, blur_kernel_h, kernel_size, /*B,*/ Blurmatindex_matval,Blurmatindex_matvalT);


    int NnzBlurMat  = (int)Blurmatindex_matval.size();
    int NnzBlurMatT = (int)Blurmatindex_matvalT.size();

    float *BMatvalPtr = new float[NnzBlurMat];
    float *BMatvalPtrT = new float[NnzBlurMatT];

    int *BMatcolPtr = new int[NnzBlurMat];
    int *BMatcolPtrT = new int[NnzBlurMatT];

    int *BMatrowPtr = new int[size_wanted+1];
    int *BMatrowPtrT = new int[size_wanted+1];



    index = 0; prev_row = -1;

//    std::map<int,float>::iterator it;
    cout << "Map begins " << endl;
    cout << "NnzBlurMat   = "<<NnzBlurMat << endl;
    cout << "NnzBlurMatT   = "<<NnzBlurMatT << endl;

    for(int i = 0 ; i < size_wanted ; i++)
    {
        BMatrowPtr[i] = 0;
    }
    BMatrowPtr[size_wanted] = NnzBlurMat;

    for(int i = 0 ; i < size_wanted ; i++)
    {
        BMatrowPtrT[i] = 0;
    }
    BMatrowPtrT[size_wanted] = NnzBlurMatT;


    cout << "Transpose data begins" << endl;
    index = 0; prev_row = -1;

    for(it = Blurmatindex_matvalT.begin(); it != Blurmatindex_matvalT.end(); it++ )
    {
        BMatcolPtrT[index] = (it->first)%size_wanted;
        BMatvalPtrT[index]  = it->second;
        int row = ((it->first) - (it->first)%size_wanted)/size_wanted;

        if ( BMatrowPtrT[row] == 0 && prev_row != row)
        {
            BMatrowPtrT[row] = index;
            prev_row = row;
        }

        index++;
    }

    index = 0; prev_row=-1;
    for(it = Blurmatindex_matval.begin(); it != Blurmatindex_matval.end(); it++ )
    {
        BMatvalPtr[index] = (it->second);
        BMatcolPtr[index] = (it->first)%size_wanted;

        int row = ((it->first) - (it->first)%size_wanted)/size_wanted;

        if ( BMatrowPtr[row] == 0 && prev_row != row)
        {
            BMatrowPtr[row] = index;
            prev_row = row;
        }

        index++;
    }
    cout << endl;


    cout<<"BMatval "<<endl;
    ofstream bMatfileval("BMatval.txt");
    for(int i = 0 ; i < NnzBlurMat ; i++ )
    {
        bMatfileval << BMatvalPtr[i] << " ";
    }
    bMatfileval.close();


    cout<<"BMatcol "<<endl;
    ofstream bMatfilecol("BMatcol.txt");
    for(int i = 0 ; i < NnzBlurMat ; i++ )
    {
        bMatfilecol<<BMatcolPtr[i] << "  ";
    }
    bMatfilecol.close();


    ofstream bMatfilerow("BMatrow.txt");
    cout<<"BMatrow "<<endl;
    for(int i = 0 ; i < size_wanted+1 ; i++ )
    {
        bMatfilerow<<BMatrowPtr[i] << " ";
    }
    bMatfilerow.close();



   //##########################################################################################



    cout << "Going to initialise the sparse matrix thing!"<<endl;
    cusparseHandle_t handle = 0;
    cusparseStatus_t status;
    status = cusparseCreate(&handle);

    if ( status == CUSPARSE_STATUS_SUCCESS )
    {
        cout<<"Hooray! Success, Success, Success!" <<endl;
    }
    if (status != CUSPARSE_STATUS_SUCCESS) {
        fprintf( stderr, "!!!! CUSPARSE initialization error\n" );
        return EXIT_FAILURE;
    }


    cusparseMatDescr_t descr = 0;
    status = cusparseCreateMatDescr(&descr);
    if (status != CUSPARSE_STATUS_SUCCESS) {
        fprintf( stderr, "!!!! CUSPARSE cusparseCreateMatDescr error\n" );
        return EXIT_FAILURE;
    }
    cusparseSetMatType(descr,CUSPARSE_MATRIX_TYPE_GENERAL);
    cusparseSetMatIndexBase(descr,CUSPARSE_INDEX_BASE_ZERO);

    cout<< "cu sparse initialised!"<<endl;


    float* d_cscWMatvalPtr;
    int* d_cscWMatcolPtr;
    int* d_cscWMatrowPtr;

    float* d_cscBMatvalPtr;
    int* d_cscBMatcolPtr;
    int* d_cscBMatrowPtr;



    // Remember this is csc so col and rows are swapped!
    cutilSafeCall(cudaMalloc((void**)&d_cscWMatvalPtr, total_nonzeros_wi*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_cscWMatrowPtr, (total_nonzeros_wi)*sizeof(int)));
    cutilSafeCall(cudaMalloc((void**)&d_cscWMatcolPtr, (size_wanted+1)*sizeof (int)));



    // Conversion required to compute the W^{T}

    {
        ScopedCuTimer cuTime("csr2csc conversion time");
        cusparseScsr2csc(handle,size_wanted*N_imgs, size_wanted, d_csrWMatStackedval, d_csrWMatStackedrow, d_csrWMatStackedcol, d_cscWMatvalPtr,
                     d_cscWMatrowPtr, d_cscWMatcolPtr, 1, CUSPARSE_INDEX_BASE_ZERO);
    }





//    {
//        ScopedCuTimer cuTime("csr2csc conversion time");
//        cusparseScsr2csc(handle, size_wanted, size_wanted*N_imgs, d_cscWMatvalPtr, d_cscWMatcolPtr, d_cscWMatrowPtr, d_csrWMatStackedval,
//                     d_csrWMatStackedcol, d_csrWMatStackedrow, 1, CUSPARSE_INDEX_BASE_ZERO);

//        if (status != CUSPARSE_STATUS_SUCCESS) {
//            fprintf( stderr, "!!!! CSC2CSR Conversion error\n" );
//            return EXIT_FAILURE;
//        }
//    }




    float *d_horizontal_velocity_all;
    float *d_vertical_velocity_all;

    float *d_DMatvalPtr;
    int *d_DMatcolPtr;
    int *d_DMatrowPtr;

    float *d_BMatvalPtr;
    int *d_BMatcolPtr;
    int *d_BMatrowPtr;

    float *d_BMatvalPtrT;
    int *d_BMatcolPtrT;
    int *d_BMatrowPtrT;

    float *d_DMatvalPtrT;
    int *d_DMatcolPtrT;
    int *d_DMatrowPtrT;


    float *d_img;
    float *d_imgT;

    float *d_Ax;
    float *d_Bx;

    float *d_AxT;

    float *d_A_copy;

    float *h_Ax;
    float *h_Bx = new float[size_wanted];

    float *h_A;
    float *h_A_copy = new float[size_have*size_wanted];
    float *h_AxT;

    size_t upImageFloatPitch;


    int output_vector_size = size_have;
    size_t velFloatPitch;


    cutilSafeCall(cudaMalloc((void**)&d_DMatvalPtr, NnzDMat*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_DMatcolPtr, NnzDMat*sizeof (int)));
    cutilSafeCall(cudaMalloc((void**)&d_DMatrowPtr, (size_have+1)*sizeof(int)));

    cutilSafeCall(cudaMalloc((void**)&d_DMatvalPtrT, NnzDMatT*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_DMatcolPtrT, NnzDMatT*sizeof (int)));
    cutilSafeCall(cudaMalloc((void**)&d_DMatrowPtrT, (size_wanted+1)*sizeof(int)));


    cutilSafeCall(cudaMalloc((void**)&d_BMatvalPtr, NnzBlurMat*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_BMatcolPtr, NnzBlurMat*sizeof (int)));
    cutilSafeCall(cudaMalloc((void**)&d_BMatrowPtr, (size_wanted+1)*sizeof(int)));


    cutilSafeCall(cudaMalloc((void**)&d_Bx, (size_wanted)*sizeof(float)));
    cutilSafeCall(cudaMalloc((void**)&d_Ax, (output_vector_size)*sizeof(float)));
    cutilSafeCall(cudaMalloc((void**)&d_AxT, (size_wanted)*sizeof(float)));



    cutilSafeCall(cudaMalloc((void**)&d_A_copy, (size_have)*sizeof(float)*size_wanted));
    cutilSafeCall(cudaMallocPitch(&d_img,&upImageFloatPitch,sizeof(float)*N_cols_upimg,N_rows_upimg));




    cutilSafeCall(cudaMallocPitch(&d_horizontal_velocity_all,&velFloatPitch,sizeof(float)*size_wanted,N_imgs));
    cutilSafeCall(cudaMallocPitch(&d_vertical_velocity_all,&velFloatPitch,sizeof(float)*size_wanted,N_imgs));


    cutilSafeCall(cudaMalloc((void**)&d_imgT, sizeof(float)*size_have));




    cudaMemcpy(d_DMatvalPtr, DMatvalPtr, NnzDMat*sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(d_DMatcolPtr, DMatcolPtr, NnzDMat*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_DMatrowPtr, DMatrowPtr, (size_have+1)*sizeof(int), cudaMemcpyHostToDevice);

    cudaMemcpy(d_BMatvalPtr, BMatvalPtr, NnzBlurMat*sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(d_BMatcolPtr, BMatcolPtr, NnzBlurMat*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_BMatrowPtr, BMatrowPtr, (size_wanted+1)*sizeof(int), cudaMemcpyHostToDevice);

    cudaMemcpy(d_BMatvalPtrT, BMatvalPtrT, NnzBlurMatT*sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(d_BMatcolPtrT, BMatcolPtrT, NnzBlurMatT*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_BMatrowPtrT, BMatrowPtrT, (size_wanted+1)*sizeof(int), cudaMemcpyHostToDevice);


    cudaMemcpy(d_DMatvalPtrT, DMatvalPtrT, NnzDMatT*sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(d_DMatcolPtrT, DMatcolPtrT, NnzDMatT*sizeof(int), cudaMemcpyHostToDevice);
    cudaMemcpy(d_DMatrowPtrT, DMatrowPtrT, (size_wanted+1)*sizeof(int), cudaMemcpyHostToDevice);


    cudaMemcpy(d_img, input_image_data_up, (size_wanted)*sizeof(float), cudaMemcpyHostToDevice);
    cudaMemcpy(d_imgT, input_image_data_down, (size_have)*sizeof(float), cudaMemcpyHostToDevice);



    cutilSafeCall(cudaMalloc((void**)&d_cscBMatvalPtr, NnzBlurMat*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_cscBMatrowPtr, (NnzBlurMat)*sizeof(int)));
    cutilSafeCall(cudaMalloc((void**)&d_cscBMatcolPtr, (size_wanted+1)*sizeof (int)));


    // Check if the Blur is working...

    {
        ScopedCuTimer cuTime("blur multiplication time");
        status = cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_wanted, 1.0, descr, d_BMatvalPtr, d_BMatrowPtr, d_BMatcolPtr, d_img, 0.0, d_Bx);

    }
    cudaMemcpy(h_Bx,d_Bx,sizeof(float)*size_wanted,cudaMemcpyDeviceToHost);

    CVD::Image<CVD::byte> blurredImage = CVD::Image<CVD::byte>(ImageRef(N_cols_upimg,N_rows_upimg));

    for(int row = 0 ; row < N_rows_upimg ; row++)
    {
        for(int col = 0 ; col < N_cols_upimg ; col++)
        {
           blurredImage[ImageRef(col,row)] = (unsigned char)(h_Bx[row*N_cols_upimg+col]*255.0f);
        }
    }

    img_save(blurredImage,"blurredImage.png");


    {
        ScopedCuTimer cuTime("csr2csc conversion time for blur");
        cusparseScsr2csc(handle,size_wanted, size_wanted, d_BMatvalPtr, d_BMatrowPtr, d_BMatcolPtr, d_cscBMatvalPtr,
                     d_cscBMatrowPtr, d_cscBMatcolPtr, 1, CUSPARSE_INDEX_BASE_ZERO);
    }




    // Checked!



    // Check for matrix multiplication which is [W1; W2; W3; ... ;Wn]*u

    float* h_Wis_u_ = new float[size_wanted*N_imgs];
    float* d_Wis_u_;
    cutilSafeCall(cudaMalloc((void**)&d_Wis_u_, (size_wanted*N_imgs)*sizeof(float)));


    {

        ScopedCuTimer cuTime("warping multiplication time");
        status = cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted*N_imgs, size_wanted, 1.0, descr, d_csrWMatStackedval,
                                                        d_csrWMatStackedrow, d_csrWMatStackedcol, d_img, 0.0, d_Wis_u_);

    }

    {
        ScopedCuTimer cuTime("Memcpy time");
        cudaMemcpy(h_Wis_u_,d_Wis_u_,sizeof(float)*size_wanted*N_imgs,cudaMemcpyDeviceToHost);
    }





    cout << "N_rows_low_img = " << N_rows_low_img << endl;

    CVD::Image<CVD::byte> WarpedImage = CVD::Image<CVD::byte>(ImageRef(N_cols_upimg,N_rows_upimg));

    for(int i = 0 ; i < N_imgs ; i++)
    {

        for(int row = 0 ; row < N_rows_upimg ; row++)
        {
            for(int col = 0 ; col < N_cols_upimg ; col++)
            {
                WarpedImage[ImageRef(col,row)] = (unsigned char)(h_Wis_u_[row*N_cols_upimg+col + i*(size_wanted)]*255.0f);
            }
        }

        char fileName[40];
        sprintf(fileName,"WarpedImage_%02d.png",i);
        img_save(WarpedImage,fileName);
    }

//    exit(1);

    cout<< "After initialising the image!"<<endl;


    h_Ax          = (float*)malloc(sizeof(float)*size_have);
    h_AxT         = (float*)malloc(sizeof(float)*size_wanted);


    {
        ScopedCuTimer cuTime("multiplication time");
        status = cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_have, size_wanted, 1.0, descr, d_DMatvalPtr, d_DMatrowPtr, d_DMatcolPtr, d_Bx, 0.0, d_Ax);
    }

    {
        ScopedCuTimer cuTime("transpose multiplication time");
        status = cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_have, 1.0, descr, d_DMatvalPtrT, d_DMatrowPtrT, d_DMatcolPtrT, d_imgT, 0.0, d_AxT);
    }



    cudaMemcpy(h_Ax,d_Ax,sizeof(float)*output_vector_size,cudaMemcpyDeviceToHost);


    cout<< endl<<"cusparse multiplication result!"<<endl;


    cudaMemcpy(h_AxT,d_AxT,sizeof(float)*size_wanted,cudaMemcpyDeviceToHost);

    cout<< endl<<"cusparse multiplication transpose result!"<<endl;



    cout << "N_rows_low_img = " << N_rows_low_img << endl;

    CVD::Image<CVD::byte> dsampledImage = CVD::Image<CVD::byte>(ImageRef(N_cols_low_img,N_rows_low_img));

    for(int row = 0 ; row < N_rows_low_img ; row++)
    {
        for(int col = 0 ; col < N_cols_low_img ; col++)
        {
            dsampledImage[ImageRef(col,row)] = (unsigned char)(h_Ax[row*N_cols_low_img+col]*255.0f);
        }
    }

    img_save(dsampledImage,"dsampledImage.png");


    cout << "Image is downsampled and saved!" << endl;
//    exit(1);
// ######################### Uncomment from here #########################



//    cudaMemset2D(zero, zerop, 0.f, sizeof(float)*(M+2), (N+2));

    //Copy horizontal_velocities and vertical velocities

//    float *d_u_;
//    float *d_u_;
//    float *d_u_;


    size_t downImageFloatPitch;
    size_t imgVectorsFloatPitch;
    size_t WTBTDTqFloatPitch;
    size_t qVectorsFloatPitch;

    float *d_px;
    float *d_py;

    float *d_ux_;
    float *d_uy_;

    float *d_u_;
    float *d_u;

    float *d_ydcopyqi;
    float *d_Wiu_;
    float *d_Wiu_copy;

    float *d_DTqi_copy;
    float *d_BTDTqi;
    float *d_blur_result;
    float *d_res;
    float *d_dual_save_WTBTDTq;
    float *d_dual_save_BTDTq;

    float *d_fi; // All Input Images

    float *d_qi;
    float *d_res_stacked;



    cutilSafeCall(cudaMalloc((void**)&d_px, size_wanted*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_py, size_wanted*sizeof (float)));




    cutilSafeCall(cudaMalloc((void**)&d_ux_, size_wanted*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_uy_, size_wanted*sizeof (float)));

    cutilSafeCall(cudaMalloc((void**)&d_u_, size_wanted*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_u, size_wanted*sizeof (float)));




    int upImageStrideFloat    = upImageFloatPitch/sizeof(float);



    cutilSafeCall(cudaMalloc((void**)&d_ydcopyqi, size_have*sizeof (float)));




    cutilSafeCall(cudaMalloc((void**)&d_DTqi_copy, size_wanted*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_blur_result, size_wanted*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_BTDTqi, size_wanted*sizeof (float)));


    cutilSafeCall(cudaMalloc((void**)&d_Wiu_, size_wanted*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_Wiu_copy, size_wanted*sizeof (float)));

    cutilSafeCall(cudaMalloc((void**)&d_res, size_have*sizeof (float)));


    cutilSafeCall(cudaMalloc((void**)&d_fi, size_have*N_imgs*sizeof (float)));
    cutilSafeCall(cudaMalloc((void**)&d_dual_save_WTBTDTq, size_wanted*sizeof (float)));


    cutilSafeCall(cudaMalloc((void**)&d_dual_save_BTDTq, (size_wanted)*sizeof(float)*N_imgs));



    cutilSafeCall(cudaMalloc((void**)&d_qi, (size_have)*sizeof(float)*N_imgs));
    cutilSafeCall(cudaMemset(d_qi,0,(size_have)*sizeof(float)*N_imgs));
//    cutilSafeCall(cudaMemset(d_qi,1,(size_have)*sizeof(float)*N_imgs));


    cutilSafeCall(cudaMalloc((void**)&d_res_stacked, (size_have)*sizeof(float)*N_imgs));

//    float *h_res_stacked = new float[size_have*N_imgs];
//    for(int i = 0 ; i < size_have*N_imgs ; i++)
//    {
//        h_res_stacked[i]=i;
//    }

//    cudaMemcpy(d_res_stacked,h_res_stacked,sizeof(float)*size_have*N_imgs,cudaMemcpyHostToDevice);
//    cudaMemcpy(d_qi,h_res_stacked,sizeof(float)*size_have*N_imgs,cudaMemcpyHostToDevice);



    cutilSafeCall(cudaMemset(d_res_stacked,0,sizeof(float)*size_have*N_imgs));



    cout << "Memory Initialisation already!" <<endl;




    //Read Input Images

    for(int i = 0 ; i < N_imgs ; i++)
    {
        CVD::Image<float> inputImage;// = CVD::Image<float>(ImageRef(N_cols_low_img,N_rows_low_img));

        char imgName[100];
        sprintf(imgName,"../data/images/car_%03d.pgm",i+1);

        img_load(inputImage,imgName);
        // Image Loaded!

        cudaMemcpy(d_fi+(size_have)*i,inputImage.data(),sizeof(float)*size_have,cudaMemcpyHostToDevice);

    }
    cout << "Images have been loaded!" <<endl;


    ifstream tau_file("tau.txt");
    float* h_tau = new float[size_wanted];
    float* d_tau;

    char readlinedata[300];
    int taupos= 0;


    while ( !tau_file.eof() ) { // keep reading until end-of-file
        float tauval;
        tau_file >> tauval; // sets EOF flag if no value found
        h_tau[taupos++] = tauval;

    }
    tau_file.close();


//    while(1)
//    {
////        tau_file.getline(readlinedata, 300);
//        if ( tau_file.eof())
//            break;
//        tau_file >> val;
//        h_tau[taupos++] = val;
//    }


    ifstream dT_file("btranspose.txt");
    float* h_dT = new float[size_wanted];

//    char readlinedata[300];
//    pos = 0;
    while(1)
    {
        dT_file.getline(readlinedata, 300);
        if ( dT_file.eof())
            break;

        istringstream iss(readlinedata);

        float val;
        iss >> val;

        static int pos = 0;

        h_dT[pos] = val;
//        cout<< "h_dT = "<< h_dT[pos] << endl;
        pos++;
    }

    dT_file.close();


    cutilSafeCall(cudaMallocPitch(&d_tau,&upImageFloatPitch,sizeof(float)*N_cols_upimg,N_rows_upimg));
    cudaMemcpy(d_tau,h_tau,sizeof(float)*size_wanted,cudaMemcpyHostToDevice);

//    cout << "Here after reading the file contents!" << endl;




//    cutilSafeCall(cudaMemset(d_qi,1,(size_have)*sizeof(float)*N_imgs));


//    float* h_Wiu_copy = new float[size_wanted];

//    for(int i = 0 ; i < N_imgs ; i++)
//    {

//        for(int row = 0 ; row < N_rows_upimg ; row++)
//        {
//            for(int col = 0 ; col < N_cols_upimg ; col++)
//            {
//                cudaMemcpy(h_Wiu_copy,d_Wis_u_+(size_wanted)*i,sizeof(float)*size_wanted,cudaMemcpyDeviceToHost);
////                cudaMemcpy(d_Wiu_copy,d_Wis_u_+(size_wanted)*i,sizeof(float)*size_wanted,cudaMemcpyDeviceToDevice);


//                WarpedImage[ImageRef(col,row)] = (unsigned char)(h_Wiu_copy[row*N_cols_upimg+col]*255.0f);
//            }
//        }

//        char fileName[40];
//        sprintf(fileName,"WarpedImageMemcpyone_%02d.png",i);
//        img_save(WarpedImage,fileName);
//    }





    float *d_low_img;
    cutilSafeCall(cudaMallocPitch(&d_low_img,&downImageFloatPitch,sizeof(float)*N_cols_low_img,N_rows_low_img));
    cudaMemcpy(d_low_img,input_image_low.data(),sizeof(float)*size_have,cudaMemcpyHostToDevice);

//    float *h_low_img =;



    // Add images into d_fi;


    int downImageStrideFloat  = downImageFloatPitch/sizeof(float);
    int imgVectorsStrideFloat = imgVectorsFloatPitch/sizeof(float);
    int WTBTDTqStrideFloat    = WTBTDTqFloatPitch/sizeof(float);
    int qVectorsStrideFloat   = qVectorsFloatPitch/sizeof(float);


    cout << "upImageStrideFloat = " << upImageStrideFloat << endl;

    int velStrideFloat   = velFloatPitch/sizeof(float);

    int width_up   = N_cols_upimg;
    int height_up  = N_rows_upimg;

    int width_down   = N_cols_low_img;
    int height_down  = N_rows_low_img;

    double xisqr = scale*scale;




    cutilSafeCall(cudaMalloc((void**)&blur_kernel_d, (kernel_size)*sizeof(float)*kernel_size));
    cudaMemcpy(blur_kernel_d,blur_kernel_h,sizeof(float)*kernel_size*kernel_size,cudaMemcpyHostToDevice);






    cout <<" Blurred the kernel" << endl;



//    int width = width_window;
//    int height = height_window;



    int imgWidth = input_image_up.size().x;
    int imgHeight = input_image_up.size().y;

    int width = imgWidth;
    int height = imgHeight;

    GlTexture greyTexture(width,height,GL_LUMINANCE32F_ARB);
    GlBufferCudaPtr greypbo( GlPixelUnpackBuffer, size_wanted*sizeof(float), cudaGraphicsMapFlagsNone, GL_STREAM_DRAW );

//    int imgWidth = input_image_up.size().x;
//    int imgHeight = input_image_up.size().y;

    float aspect = 72.0f/121.0f;

    View& view_image0 = Display("image0").SetAspect(aspect);
    View& view_image1 = Display("image1").SetAspect(aspect);
    View& view_image2 = Display("image2").SetAspect(aspect);
    View& view_image3 = Display("image3").SetAspect(aspect);


    View& d_panel = pangolin::CreatePanel("ui")
                    .SetBounds(1.0, 0.0, 0, 150);


    
    View& d_imgs = pangolin::Display("images")
                 .SetBounds(1.0, 0.5, 150, 1.0, true)
                   .SetLayout(LayoutEqual)
                   .AddDisplay(view_image0)
                   .AddDisplay(view_image1)
                   .AddDisplay(view_image2)
                   .AddDisplay(view_image3)
                   ;


//    int imageStrideFloat=imagePitchFloat/sizeof(float);

    cout <<" Pangolin initialised!"<<endl;

    float lambda    = 0.1;
    float epsilon_u = 0.0;
    float epsilon_d = 0.0;


//    float L = sqrt(43);
//    float tau = scale/sqrt(L*L*lambda);

//    float sigma_p = 1.0f/sqrt(L*L*tau);
    float sigma_q   = 1.0f;

    float sigma_p   = 1/(2*lambda);

//    float sigma_q = 1.0f/L;




//    cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_have, 1.0, descr, d_DMatvalPtrT, d_DMatrowPtrT,
//                                                        d_DMatcolPtrT, d_low_img, 0.0, d_DTqi_copy);

//    cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_wanted, 1.0, descr, d_cscBMatvalPtr, d_cscBMatcolPtr,
//                   d_cscBMatrowPtr, d_DTqi_copy, 0.0, d_BTDTqi);


//    cudaMemcpy(h_AxT,d_BTDTqi,sizeof(float)*size_wanted,cudaMemcpyDeviceToHost);


//    for(int row = 0 ; row < N_rows_upimg ; row++)
//    {
//        for(int col = 0 ; col < N_cols_upimg ; col++)
//        {
//            cout<<"("<< h_dT[col*N_rows_upimg+row]<<", "<< h_AxT[row*N_cols_upimg+col]<<") ";
//        }
//        cout<<endl;
//    }


    cutilSafeCall(cudaMemset(d_px,0,sizeof(float)*size_wanted));
    cutilSafeCall(cudaMemset(d_py,0,sizeof(float)*size_wanted));

    cutilSafeCall(cudaMemset(d_ux_,0,sizeof(float)*size_wanted));
    cutilSafeCall(cudaMemset(d_uy_,0,sizeof(float)*size_wanted));



    ifstream indata;
    indata.open("u_stored.txt");

    float val;

//    float array[size_wanted];

    int i = 0;
    while ( !indata.eof() ) { // keep reading until end-of-file
        indata >> val; // sets EOF flag if no value found
        input_image_data_up[i++] = val;

    }
    indata.close();

    for(int i = 0 ; i < 100 ; i++)
        cout<< input_image_data_up[i] << " ";





    cudaMemcpy(d_u_,input_image_data_up,sizeof(float)*size_wanted,cudaMemcpyHostToDevice);
    cudaMemcpy(d_u,input_image_data_up,sizeof(float)*size_wanted,cudaMemcpyHostToDevice);


//    cutilSafeCall(cudaMemcpy2D(d_u, upImageFloatPitch, input_image_up.data(), sizeof(float)*N_cols_upimg, sizeof(float)*N_cols_upimg, N_rows_upimg, cudaMemcpyHostToDevice));
//    cutilSafeCall(cudaMemcpy2D(d_u_, upImageFloatPitch, input_image_up.data(), sizeof(float)*N_cols_upimg, sizeof(float)*N_cols_upimg, N_rows_upimg, cudaMemcpyHostToDevice));


//    cusparseScsr2dense(handle,test_size,test_size)

//    cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_wanted, 1.0, descr, d_WMatvalPtr,
//                                                    d_WMatrowPtr, d_WMatcolPtr, d_u_, 0.0, d_Wiu_copy);
//    exit(1);





    CVD::Image<float>bicubicsampledImage;
    img_load(bicubicsampledImage,"car_003_up.pgm");


    float *d_bicubic;
    cutilSafeCall(cudaMalloc((void**)&d_bicubic, (size_wanted)*sizeof(float)));
    cudaMemcpy(d_bicubic,bicubicsampledImage.data(),sizeof(float)*size_wanted,cudaMemcpyHostToDevice);



    float* h_dual_save_WTBTDTq = new float [size_wanted];

    ifstream Atqdata("Atq_it_1.txt");

    int Atqpos = 0;


    while ( !Atqdata.eof() ) {
        Atqdata >> val;
        h_dual_save_WTBTDTq[Atqpos]=val;
        Atqpos++;
    }
    Atqdata.close();

//    for(int row = 0 ; row < N_rows_upimg ; row++)
//    {
//        cout<<"row ="<<row<<endl;
//        for( int col = 0 ; col < N_cols_upimg ; col++)
//        {
////            cout<<h_dual_save_WTBTDTq[row*N_cols_upimg+col]<<" ";
//            cout<<h_tau[row*N_cols_upimg+col]<<" ";
//        }
//        cout<<endl;
//    }

//    cudaMemcpy(d_dual_save_WTBTDTq,h_dual_save_WTBTDTq,sizeof(float)*size_wanted,cudaMemcpyHostToDevice);
    cutilSafeCall(cudaMemset(d_dual_save_WTBTDTq,0,sizeof(float)*size_wanted));







    float* h_res = new float [size_wanted];
    ifstream pxfile("px_it_1.txt");

    int pxpos = 0;
    while ( !pxfile.eof() ) { // keep reading until end-of-file
        pxfile >> val; // sets EOF flag if no value found
        h_res[pxpos++] = val;

    }
    pxfile.close();


    float *h_qi  = new float[size_have];

//    exit(1);
    long doIt =0;

    while(!pangolin::ShouldQuit())
    {
        glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
//        cout << "Iteration = "<< doIt << endl;

        if( doIt%100 == 0)
        {
            ScopedCuTimer cuTime("TOTAL TIME PER ITERATION ");
            cout<<"width up = " << width_up <<endl;
            cout<<"height up = " << height_up <<endl;

            int stride = upImageFloatPitch/sizeof(float);
            cout<<"stride = "<<stride <<endl;
            cout<< "sigma_p = "<<sigma_p <<endl;
            cout<<"lambda = "<<lambda<<endl;

            launch_kernel_derivative_u(d_ux_,d_uy_,d_u_,stride,width_up, height_up);

            launch_kernel_dual_variable_p(d_px,d_py,d_ux_,d_uy_,epsilon_u, sigma_p, lambda, upImageStrideFloat,width_up,height_up);

            // What is that we want to try out in this image?
            // We want to do the optimisation steps with respect to q
            // That is:
            // q^{n+1} = \frac{q^n + \sigma \xi^{2} (DBWu_ - f)}{ 1 + epsilon_d*sigma_q/xisqr}
            // q^{n+1} =  max(-xisqr, min(xisqr, q^{n+1}))

            cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted*N_imgs, size_wanted, 1.0, descr, d_csrWMatStackedval,
                                                            d_csrWMatStackedrow, d_csrWMatStackedcol, d_u_, 0.0, d_Wis_u_);

            cout<<"Going to do DBW thing!"<<endl;
            for (int i = 0 ; i < N_imgs ; i++)
            {
                // copy
                cudaMemcpy(d_Wiu_copy,d_Wis_u_+(size_wanted)*i,sizeof(float)*size_wanted,cudaMemcpyDeviceToDevice);

                // Do B*(d_Wis_u_)
                cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_wanted, 1.0, descr, d_BMatvalPtr, d_BMatrowPtr, d_BMatcolPtr, d_Wiu_copy, 0.0, d_Bx);

                // Do D*(B*(d_Wis_u_))
                cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_have, size_wanted, 1.0, descr, d_DMatvalPtr, d_DMatrowPtr, d_DMatcolPtr, d_Bx, 0.0, d_res);

                //copy to d_res_stacked_vector
                cudaMemcpy(d_res_stacked +(size_have)*i,d_res,sizeof(float)*size_have,cudaMemcpyDeviceToDevice);
            }



//            for (int i = 0 ; i < N_imgs ; i++)
//            {
//                // copy
//                cudaMemcpy(h_Ax,d_res_stacked+(size_have)*i,sizeof(float)*size_have,cudaMemcpyDeviceToHost);

////                CVD::Image<CVD::byte>img2generate = CVD::Image<CVD::byte>(ImageRef(N_cols_low_img,N_rows_low_img));

//                char DBWfileName[40];
//                sprintf(DBWfileName,"DBWfile_%d.txt",i);
//                ofstream DBWfile(DBWfileName);

//                for(int row = 0 ; row < N_rows_low_img; row++)
//                {
//                    for(int col = 0 ; col < N_cols_low_img; col++)
//                    {
//                        DBWfile<<h_Ax[row*N_cols_low_img+col]<<" ";
////                        img2generate[ImageRef(col,row)] = (unsigned char)(h_Ax[row*N_cols_low_img+col]*255.0f);
//                    }
//                    DBWfile<<endl;
//                }
//                DBWfile.close();

////                static int imgno=0;
////                char downfileName[34];
////                sprintf(downfileName,"DBWImageCUDA_%03d.png",imgno++);
////                img_save(img2generate,downfileName);
////                cout<<"d_u is printed!"<<endl;
//            }
//            exit(1);





            // d_res_stacked is set to 1...!!!

            cout<<"Going to do subtration!"<<endl;

            launch_kernel_subtract(d_fi, imgVectorsStrideFloat, d_res_stacked, qVectorsStrideFloat, size_have*N_imgs, N_cols_low_img, N_rows_low_img*N_imgs);
            launch_kernel_q_SubtractDBWiu_fAdd_yAndReproject(d_qi, qVectorsStrideFloat,
                                                             d_res_stacked,qVectorsStrideFloat,
                                                             sigma_q,xisqr,epsilon_d,
                                                             N_cols_low_img, N_rows_low_img,N_imgs);


            for(int i = 0 ; i < N_imgs ; i++)
            {
                //copy_qi_to_yu;
                cudaMemcpy(d_ydcopyqi,d_qi+(size_have)*i,sizeof(float)*size_have,cudaMemcpyDeviceToDevice);

                //do D^{T}*yu;
                cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_have, 1.0, descr, d_DMatvalPtrT, d_DMatrowPtrT,
                                                    d_DMatcolPtrT, d_ydcopyqi, 0.0, d_DTqi_copy);

                //do B^{T}*(D^{T}*yu);
                cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, size_wanted, 1.0, descr, d_cscBMatvalPtr, d_cscBMatcolPtr,
                                                    d_cscBMatrowPtr, d_DTqi_copy, 0.0, d_BTDTqi);

                //copy the contents to d_dual_save_BTDT_q
                cudaMemcpy(d_dual_save_BTDTq +(size_wanted)*i,d_BTDTqi,sizeof(float)*size_wanted,cudaMemcpyDeviceToDevice);

            }


            // do batch Wi^{T}yu;

            cusparseScsrmv(handle,CUSPARSE_OPERATION_NON_TRANSPOSE, size_wanted, N_imgs*size_wanted, 1.0, descr, d_cscWMatvalPtr, d_cscWMatcolPtr, d_cscWMatrowPtr,
                           d_dual_save_BTDTq, 0.0, d_dual_save_WTBTDTq);

            //launch kernel u ;
            // Remeber to remove this WTBTDTqStrideFloat thing!
            launch_kernel_primal_u(d_px,d_py,d_u_,d_u, upImageStrideFloat, epsilon_u,d_tau,xisqr, lambda, d_dual_save_WTBTDTq, WTBTDTqStrideFloat,width_up,height_up,N_imgs);
            cudaMemcpy(h_AxT,d_u,sizeof(float)*size_wanted,cudaMemcpyDeviceToHost);

            CVD::Image<CVD::byte>img2store = CVD::Image<CVD::byte>(ImageRef(N_cols_upimg,N_rows_upimg));

            float maxval = -99;
            float minval =  99;

            for(int row = 0 ; row < N_rows_upimg; row++)
            {
                for(int col = 0 ; col < N_cols_upimg; col++)
                {
                    if( maxval < h_AxT[row*N_cols_upimg+col])
                    {
                        maxval = h_AxT[row*N_cols_upimg+col];
                    }

                    if( minval > h_AxT[row*N_cols_upimg+col])
                    {
                        minval = h_AxT[row*N_cols_upimg+col];
                    }
                }
            }




            cout<<"minval = "<<minval<<endl;
            cout<<"maxval = "<<maxval<<endl;

            for(int row = 0 ; row < N_rows_upimg; row++)
            {
                for(int col = 0 ; col < N_cols_upimg; col++)
                {
                    img2store[ImageRef(col,row)] = (unsigned char)((h_AxT[row*N_cols_upimg+col]-minval)*255.0f/(maxval - minval));

                }
            }

            static int imgno=0;
            char superfileName[34];
            sprintf(superfileName,"super_resolution_%03d.png",imgno++);
            img_save(img2store,superfileName);
            cout<<"d_u is printed!"<<endl;

        }
        doIt++;

        {
            //do some display stuff
            {

//                view_image1.ActivateScissorAndClear();
//                view_image1.Activate();
//                DisplayFloatDeviceMem(&view_image1, d_img,upImageFloatPitch, greypbo,greyTexture);

////                view_image2.ActivateScissorAndClear();
//                view_image2.Activate();
//                DisplayFloatDeviceMem(&view_image2, d_img,upImageFloatPitch, greypbo,greyTexture);

            }
        }



        glutSwapBuffers();
        glutMainLoopEvent();
    }


    return 0;
}
예제 #8
0
    // The image callback function for the master camera/ downward looking camera
    void imageCallback(const sensor_msgs::ImageConstPtr& img_msg) {
        if (isPTAMshouldstop) {
            cout << "PTAM SHOULD STOP ASSUMING OUTLIERS OR LANDING ALREADY!" << endl;
//            return;
        }

        // TODO: synchronise dual images, could be simply assuming that their stamps are close to each other
        // main function should start when both imgs have come.

        ros::Time msg_stamp = img_msg->header.stamp;
        frame_stamp_ = img_msg->header.stamp.toSec();
//        ROS_INFO("MASTER IMG TIMESTAMP: %f", frame_stamp_);

        if (ini_one_circle_ && !isFinishIniPTAMwithcircle)
        {
            if ( !isimage_ini_call || !isviconcall) return;
            else{
                convertImage(image_ini, 1);
                msg_stamp = image_ini->header.stamp;

                CVD::img_save(frameBW_, "inimage.jpg");
            }
        }
        else
            convertImage(img_msg, 1);

        // if ptam is initialised, forward image is now usable
        cursecimg_good = false;
        if ( isdualcam && secondimgcame )// && isFinishIniPTAMwithcircle)
        {
            double imginterval = fabs(img_msg->header.stamp.toSec()-img_secmsg->header.stamp.toSec());
            if ( imginterval < intervalmax_imgdual)
            {
                secondcamlock.lock();
                convertImage(img_secmsg, 2);
                secondcamlock.unlock();
                cursecimg_good = true;// only when img is synchronised and ptam ini already
            }
        }

        // pass image frame along to PTAM
        if (use_circle_ini_ && !isFinishIniPTAMwithcircle){
            // use ini pose and image from another node: simple_landing node
            if ((ini_one_circle_ || ini_two_circle_) && !tracker_->circle_pose_get ){
                tracker_->circle_pose_get = true;
                tracker_->se3CfromW = tfs2se3(vicon_state);// just need Twc
                tracker_->time_stamp_circle = vicon_state.header.stamp.toSec();
                tracker_->time_stamp_now = ros::Time::now().toSec();
            }

            // use ground information and IMU/EKF info.
            else if (ini_ground_ && !isEKFattitudeInicall && useEKFiniAtti){
                cout << "Waiting for attitude from EKF node..." << endl;
                return;
            }
            else if (ini_ground_ && !isattitudecall && !useEKFiniAtti){
                cout << "Waiting for attitude from IMU..." << endl;
                return;
            }
            else if (ini_ground_ && useEKFiniAtti && isEKFattitudeInicall ){
                if (!tracker_->attitude_get){

                    tracker_->se3IMU_camfromW = IMU2camWorldfromQuat(iniAttiEKF);//Tcw from Twi IMU calculated attitude

                    cout << "PTAM attitude got from EKF: "<< iniAttiEKF.w() <<", "
                                                << iniAttiEKF.x() <<", "
                                                << iniAttiEKF.y() <<", "
                                                << iniAttiEKF.z() <<endl;

                    tracker_->attitude_get = true;
                }
            }
            else if (ini_ground_ && !useEKFiniAtti && isattitudecall){
                if (!tracker_->attitude_get){
                    mAttitudelock.lock();
                    tracker_->se3IMU_camfromW = imu2camWorld(attitude_data_);//Twc from IMU calculated attitude
                    mAttitudelock.unlock();

                    cout << "PTAM pose got from IMU: "<< attitude_data_.roll <<", "
                            << attitude_data_.pitch<<endl;

                    tracker_->attitude_get = true;
                }
            }
        }

        if (use_ekf_pose && ispose_predicted_ekfcall) {
            mPosePredictedlock.lock();
            tracker_->se3CfromW_predicted_ekf.get_translation()[0] = pose_predicted_ekf_state_.pose.pose.position.x;
            tracker_->se3CfromW_predicted_ekf.get_translation()[1] = pose_predicted_ekf_state_.pose.pose.position.y;
            tracker_->se3CfromW_predicted_ekf.get_translation()[2] = pose_predicted_ekf_state_.pose.pose.position.z;
            tracker_->se3CfromW_predicted_ekf.get_rotation() = orientation_ekf_;
            tracker_->scale_ekf = scale_ekf_;// This is the scale of the pose estimation in the previous image frame
            mPosePredictedlock.unlock();
        }

        ros::Time time1 = ros::Time::now();


        // for debuging
//        static int published_num = 0;
//        if (published_num == 2)
//            return;

        // Should be able to input dual image in the callback function
        if (!cursecimg_good)
            tracker_->TrackFrame(frameBW_);
        else
            tracker_->TrackFrame(frameBW_, frameBW_sec);

        if (!isFinishIniPTAMwithcircle){
            camPoselast = tracker_->GetCurrentPose();
            camPosethis = camPoselast;
            camPose4pub = camPoselast;
        }
        else{
            camPosethis = tracker_->GetCurrentPose();
            camPose4pub = camPosethis;
        }
        isFinishIniPTAMwithcircle = true;

        ros::Time time2 = ros::Time::now();
        ros::Duration time3 = time2 - time1;
//        ROS_INFO("PTAM time cost: %f", time3.toSec());

        if (use_circle_ini_)
            cout << tracker_->GetMessageForUser() << " " << endl;
        else
            cout << tracker_->GetMessageForUser() << endl;

        // ignore those too low information, useful when landing
        // also currently, when outliers happen, should stop PTAM for safty.
        if ((ini_one_circle_ && tracker_->GetCurrentPose().inverse().get_translation()[2] < 0.3)
                ||(sqrt((camPosethis.get_translation()-camPoselast.get_translation())*
                        (camPosethis.get_translation()-camPoselast.get_translation()))>0.5))
        {
            isPTAMshouldstop = true;
//            return;
            // camposelast keep still, just publish it
            camPose4pub = camPoselast;
        }
        else
            camPoselast = camPosethis;
        // publish current pose in map in different cases
        if ( (use_circle_ini_ && !use_ekf_pose) || (use_ekf_pose && isEKFattitudeInicall)){
                publishPose(msg_stamp);
                publishPosewithCovariance(msg_stamp);
                inipose_published = true;

                // yang, publish landing object pose to set-point node?
                if (tracker_->isLandingpadPoseGet){
                    publish_inilandingpadpose(msg_stamp);
                    if (!tracker_->isFinishPadDetection)
                        publish_finallandingpadpose(msg_stamp);
                }
        }

        if (use_circle_ini_){
            static geometry_msgs::TransformStamped ptam_state;
            if (tracker_->ini_circle_finished)
                ptam_state.transform.rotation.w = 1;//only for state sign recognition
            else
                ptam_state.transform.rotation.w = 0;
            ini_circle_pub_.publish(ptam_state);
        }

        if (sendvisual){// && !isPTAMshouldstop){
            if (cam_marker_pub_.getNumSubscribers() > 0 || point_marker_pub_.getNumSubscribers() > 0)
                map_viz_->publishMapVisualization(map_,tracker_,cam_marker_pub_,point_marker_pub_, cam_marker_pub_sec, isdualcam);
            if ((landingpad_marker_pub_.getNumSubscribers() > 0) && tracker_->isLandingpadPoseGet)
                map_viz_->publishlandingpad(tracker_, landingpad_marker_pub_);
        }

        if (show_debug_image_) {
//            cv::Mat rgb_cv(480, 640, CV_8UC3, frameRGB_.data());
//            map_viz_->renderDebugImage(rgb_cv,tracker_,map_);
//            cv::imshow("downwards",rgb_cv);
//            cv::waitKey(10);
//            static int savedebugimg = 0;
//            if (savedebugimg == 0){
//                cv::imwrite("inimagedebug.jpg", rgb_cv);
//                savedebugimg = 1;
//            }
            if (cursecimg_good){
                cv::Mat rgb_cv(480, 640, CV_8UC3, frameRGB_sec.data());
                map_viz_->renderDebugImageSec(rgb_cv,tracker_,map_);
                cv::imshow("forwards",rgb_cv);
                cv::waitKey(10);
            }

            if (false)//(istrackPad)
            {
                CVD::Image<CVD::Rgb<CVD::byte> > frameRef;
                CVD::Image<CVD::byte> frameRefbw;
                frameRefbw.resize(tracker_->GetRefFrame().aLevels[0].im.size());
                frameRef.resize(tracker_->GetRefFrame().aLevels[0].im.size());
                CVD::copy(tracker_->GetRefFrame().aLevels[0].im, frameRefbw);
                //            memcpy(frameRefbw.data(), tracker_->GetRefFrame().aLevels[0].im.data(), tracker_->GetRefFrame().aLevels[0].im.totalsize());
                CVD::convert_image(frameRefbw, frameRef);
                cv::Mat rgb_ref(frameRef.size().y, frameRef.size().x, CV_8UC3, frameRef.data());
                map_viz_->renderRefImage(rgb_ref, tracker_);
                cv::imshow("ref", rgb_ref);
                cv::waitKey(10);
            }

        }
//        isviconcall = false;
    }
예제 #9
0
bool ofxCalibImage::makeFromImage(CVD::Image<CVD::byte> &im, ofxDataPackege * _data){
    data = _data;
    mvCorners.clear();
    mvGridCorners.clear();
    
    mim = im;
    mim.make_unique();
    
    // Find potential corners..
    // This works better on a blurred image, so make a blurred copy
    // and run the corner finding on that.
    {
        CVD::Image<CVD::byte> imBlurred = mim;
        imBlurred.make_unique();
        CVD::convolveGaussian(imBlurred, data->BlurSigma);
        CVD::ImageRef irTopLeft(5,5);
        CVD::ImageRef irBotRight = mim.size() - irTopLeft;
        CVD::ImageRef ir = irTopLeft;
        
        ofSetColor(255, 0, 255);
        int nGate = data->MeanGate;
        ofPushStyle();
        ofNoFill();
        do
            if( IsCorner(imBlurred, ir, nGate)){
                mvCorners.push_back(ir);
                ofCircle(ir.x,ir.y, 2);
            }
        while( ir.next(irTopLeft, irBotRight));
        ofPopStyle();
    }
    
    // If there's not enough corners, i.e. camera pointing somewhere random, abort.
    if((int) mvCorners.size() < 20)
        return false;
    
    // Pick a central corner point...
    CVD::ImageRef irCenterOfImage = mim.size()  / 2;
    CVD::ImageRef irBestCenterPos;
    unsigned int nBestDistSquared = 99999999;
    for(unsigned int i=0; i<mvCorners.size(); i++){
        unsigned int nDist = (mvCorners[i] - irCenterOfImage).mag_squared();
        if(nDist < nBestDistSquared){
            nBestDistSquared = nDist;
            irBestCenterPos = mvCorners[i];
        }
    }
    
    // ... and try to fit a corner-patch to that.
    PTAMM::CalibCornerPatch Patch( data->CornerPatchSize);
    PTAMM::CalibCornerPatch::Params Params;
    Params.v2Pos = vec(irBestCenterPos);
    Params.v2Angles = GuessInitialAngles(mim, irBestCenterPos); 
    Params.dGain = 80.0;
    Params.dMean = 120.0;
    
    if(!Patch.IterateOnImageWithDrawing(Params, mim))
        return false;
    
    // The first found corner patch becomes the origin of the detected grid.
    ofxCalibGridCorner cFirst;
    cFirst.Params = Params;
    mvGridCorners.push_back(cFirst);
    cFirst.draw();
    
    // Next, go in two compass directions from the origin patch, and see if 
    // neighbors can be found.
    if(!(expandByAngle(0,0) || expandByAngle(0,2)))
        return false;
    if(!(expandByAngle(0,1) || expandByAngle(0,3)))
        return false;
    
    mvGridCorners[1].mInheritedSteps = mvGridCorners[2].mInheritedSteps = mvGridCorners[0].GetSteps(mvGridCorners);
    
    // The three initial grid elements are enough to find the rest of the grid.
    int nNext;
    int nSanityCounter = 0; // Stop it getting stuck in an infinite loop...
    const int nSanityCounterLimit = 500;
    while((nNext = nextToExpand()) >= 0 && nSanityCounter < nSanityCounterLimit ){
        expandByStep(nNext);
        nSanityCounter++;
    }
    if(nSanityCounter == nSanityCounterLimit)
        return false;
    
    drawImageGrid();
    return true;
}
예제 #10
0
// Publish the small preview image
void SystemFrontendBase::PublishSmallImage()
{
  ROS_ASSERT(mpTracker);

  // Don't even bother doing image conversion if nobody's listening
  if (mSmallImagePub.getNumSubscribers() == 0)
    return;

  // We're going to create a small tiled image, based on mmSmallImageOffsets, and copy
  // individual downsampled images into specific locations in the image
  CVD::Image<CVD::byte> imSmall(mirSmallImageSize, 0);

  // Pointcloud to hold measurements. Z axis corresponds to pyramid level
  pcl::PointCloud<pcl::PointXYZ>::Ptr pointMsg(new pcl::PointCloud<pcl::PointXYZ>());

  for (ImageRefMap::iterator offset_it = mmSmallImageOffsets.begin(); offset_it != mmSmallImageOffsets.end();
       ++offset_it)
  {
    std::string camName = offset_it->first;
    CVD::ImageRef irOffset = offset_it->second;

    CVD::Image<CVD::byte> imTracker = mpTracker->GetKeyFrameImage(camName, mnSmallImageLevel);

    // When tracker just added an MKF to the map, it will have gotten rid of the MKF it is holding, so
    // imTracker will be an empty image. CVD::copy doesn't like this too much.
    if (imTracker.totalsize() > 0)
      CVD::copy(imTracker, imSmall, imTracker.size(), CVD::ImageRef(), irOffset);

    for (unsigned l = 0; l < LEVELS; ++l)
    {
      std::vector<TooN::Vector<2>> vMeas = mpTracker->GetKeyFrameSimpleMeas(camName, l);

      for (unsigned i = 0; i < vMeas.size(); ++i)
      {
        TooN::Vector<2> v2MeasInSmallImage = CVD::vec(irOffset) + LevelNPos(vMeas[i], mnSmallImageLevel);
        pcl::PointXYZ pclPoint;

        pclPoint.x = v2MeasInSmallImage[0];
        pclPoint.y = v2MeasInSmallImage[1];
        pclPoint.z = l;

        pointMsg->points.push_back(pclPoint);
      }
    }
  }

  ROS_DEBUG_STREAM("Collected " << pointMsg->points.size() << " simple measurements and put them in a point cloud");

  ros::Time timestamp = mpTracker->GetCurrentTimestamp();

  pointMsg->header.frame_id = "tracker_small_image";
  pointMsg->width = pointMsg->points.size();
  pointMsg->height = 1;
  pointMsg->is_dense = false;

#if ROS_VERSION_MINIMUM(1, 9, 56)  // Hydro or above, uses new PCL library
  pointMsg->header.stamp = timestamp.toNSec();
#else
  pointMsg->header.stamp = timestamp;
#endif

  sensor_msgs::Image imageMsg;
  imageMsg.header.stamp = timestamp;
  util::ImageToMsg(imSmall, imageMsg);

  mSmallImagePointsPub.publish(pointMsg);
  mSmallImagePub.publish(imageMsg);
}