template <template <typename> class Storage> void 
    cloud_cb (const boost::shared_ptr<openni_wrapper::Image>& image,
              const boost::shared_ptr<openni_wrapper::DepthImage>& depth_image, 
              float constant)
    {
      static unsigned count = 0;
      static double last = pcl::getTime ();
      double now = pcl::getTime ();
      if (++count == 30 || (now - last) > 5)
      {
        std::cerr << "Average framerate: " << double(count)/double(now - last) << " Hz" <<  std::endl;
        count = 0;
        last = now;
      }
      //static unsigned int nr_planes_last_time = 0;

      std::cerr << "received callback" << std::endl;
      typename PointCloudAOS<Storage>::Ptr data;
      typename SampleConsensusModel1PointPlane<Storage>::Ptr sac_model;
      {
      pcl::ScopeTime timer ("All: ");
      // Compute the PointCloud on the device
      d2c.compute<Storage> (depth_image, image, constant, data, true, 2);

      {
        pcl::ScopeTime t ("creating sac_model");
        sac_model.reset (new SampleConsensusModel1PointPlane<Storage> (data));
      }
      MultiRandomSampleConsensus<Storage> sac (sac_model);
      sac.setMinimumCoverage (0.90); // at least 95% points should be explained by planes
      sac.setMaximumBatches (5);
      sac.setIerationsPerBatch (1000);
      sac.setDistanceThreshold (0.05);

      {
        pcl::ScopeTime timer ("computeModel: ");
        if (!sac.computeModel (0))
        {
          std::cerr << "Failed to compute model" << std::endl;
        }
        else
        {
          if (use_viewer)
          {
            std::cerr << "getting inliers.. ";
            
            std::vector<typename SampleConsensusModel1PointPlane<Storage>::IndicesPtr> planes;
//            thrust::host_vector<int> regions_host = region_mask;
//            std::copy (regions_host.begin (), regions_host.end(), std::ostream_iterator<int>(std::cerr, " "));
            {
              ScopeTime t ("retrieving inliers");
              planes = sac.getAllInliers ();
            }
            
            typename Storage<int>::type region_mask;
            markInliers<Storage> (data, region_mask, planes);
            std::cerr << "image_size: " << data->width << " x " << data->height << " = " << region_mask.size () << std::endl;
            
            typename StoragePointer<Storage,uchar>::type ptr = StorageAllocator<Storage,uchar>::alloc (data->width * data->height);
              
            createIndicesImage<Storage> (ptr, region_mask);

            typename ImageType<Storage>::type dst (data->height, data->width, CV_8UC1, thrust::raw_pointer_cast<char4>(ptr), data->width);
            cv::imshow ("Result", cv::Mat(dst));
            cv::waitKey (2);

            std::vector<int> planes_inlier_counts = sac.getAllInlierCounts ();
            std::vector<float4> coeffs = sac.getAllModelCoefficients ();
            std::vector<float3> centroids = sac.getAllModelCentroids ();
            std::cerr << "Found " << planes_inlier_counts.size () << " planes" << std::endl;
            int best_plane = 0;
            int best_plane_inliers_count = -1;

            for (unsigned int i = 0; i < planes.size (); i++)
            {
              if (planes_inlier_counts[i] > best_plane_inliers_count)
              {
                best_plane = i;
                best_plane_inliers_count = planes_inlier_counts[i];
              }

              typename SampleConsensusModel1PointPlane<Storage>::IndicesPtr inliers_stencil;
              inliers_stencil = planes[i];//sac.getInliersStencil ();

              OpenNIRGB color;
              double trand = 255 / (RAND_MAX + 1.0);
              //int idx = (int)(rand () * trand);

              color.r = (int)(rand () * trand);
              color.g = (int)(rand () * trand);
              color.b = (int)(rand () * trand);
              {
                ScopeTime t ("coloring planes");
                colorIndices<Storage> (data, inliers_stencil, color);
              }
            }
          }

          //{
          //    ScopeTime t ("copying & transforming logo");
          //for (unsigned int i = 0; i < planes.size (); i++)
          //{
//        //    if (i == best_plane) // assume first plane is biggest plane
          //  {
          //    if (logo_cloud_)
          //    {
          //      boost::shared_ptr<pcl::PointCloud<pcl::PointXYZRGB> > transformed_logo (new pcl::PointCloud<pcl::PointXYZRGB>);
          //      Eigen::Affine3f transformation;

          //      Eigen::Vector3f plane_normal (coeffs[i].x, coeffs[i].y, coeffs[i].z);
          //      plane_normal.normalize ();
          //      if (plane_normal.dot (Eigen::Vector3f::Zero()) - coeffs[i].w > 0)
          //        plane_normal = -plane_normal;

          //      Eigen::Vector3f logo_normal (0,0,-1);

          //      Eigen::Vector3f trans (Eigen::Vector3f(centroids[i].x, centroids[i].y, centroids[i].z) * 0.97);
          //      Eigen::AngleAxisf rot (acos (logo_normal.dot (plane_normal)), logo_normal.cross (plane_normal).normalized ());

          //      transformation = Eigen::Affine3f::Identity();// = ....;
          //      transformation.translate (trans);// = ....;
          //      transformation.rotate (rot);// = ....;
          //      transformation.scale (0.001 * sqrt (planes_inlier_counts[i]));// = ....;

          //      std::cerr << "Plane centroid " << centroids[i].x << " " <<  centroids[i].y << " " << centroids[i].z << std::endl;
          //      pcl::getTransformedPointCloud<pcl::PointCloud<pcl::PointXYZRGB> > (*logo_cloud_, transformation, *transformed_logo);

          //      std::stringstream ss;
          //      ss << "logo" << i;
          //      //viewer.showCloud (transformed_logo, ss.str ());
          //    }
          //  }
          //}
          //}

          //boost::shared_ptr<pcl::PointCloud<pcl::PointXYZRGB> > dummy_cloud (new pcl::PointCloud<pcl::PointXYZRGB>);
          //for (unsigned int i = planes.size (); i < nr_planes_last_time; i++)
          //{
          //  std::stringstream ss;
          //  ss << "logo" << i;
          //  viewer.showCloud (dummy_cloud, ss.str ());
          //}
          //nr_planes_last_time = planes.size ();
        }
      }

    }
      if (use_viewer)
      {
        typename Storage<float4>::type normals = sac_model->getNormals ();

        boost::mutex::scoped_lock l(m_mutex);
        normal_cloud.reset (new pcl::PointCloud<pcl::PointXYZRGBNormal>);
        pcl_cuda::toPCL (*data, normals, *normal_cloud);
        new_cloud = true;
      }


      //pcl::PointCloud<pcl::PointXYZRGB>::Ptr output (new pcl::PointCloud<pcl::PointXYZRGB>);
      //pcl_cuda::toPCL (*data, *output);
      //viewer.showCloud (output);
    }
    template <template <typename> class Storage> void 
    cloud_cb (const boost::shared_ptr<openni_wrapper::Image>& image,
              const boost::shared_ptr<openni_wrapper::DepthImage>& depth_image, 
              float constant)
    {
      static unsigned count = 0;
      static double last = getTime ();
      double now = getTime ();
      //if (++count == 30 || (now - last) > 5)
      {
        std::cout << std::endl;
        count = 1;
        std::cout << "Average framerate: " << double(count)/double(now - last) << " Hz --- ";
        last = now;
      }

      static int smoothing_nr_iterations = 10;
      static int smoothing_filter_size = 2;
      static int enable_visualization = 1;
      static int enable_mean_shift = 0;
      static int enable_plane_fitting = 0;
      static int meanshift_sp=8;
      static int meanshift_sr=20;
      static int meanshift_minsize=100;

      pcl::PointCloud<pcl::PointXYZRGB>::Ptr output (new pcl::PointCloud<pcl::PointXYZRGB>);
      typename PointCloudAOS<Storage>::Ptr data;

      //ScopeTimeCPU timer ("All: ");
      ScopeTimeCPU time ("everything");
      // Compute the PointCloud on the device
      {
        ScopeTimeCPU time ("disparity smoothing");
        d2c.compute<Storage> (depth_image, image, constant, data, false, 1, smoothing_nr_iterations, smoothing_filter_size);
      }

      boost::shared_ptr<typename Storage<float4>::type> normals;
      float focallength = 580/2.0;
      {
        ScopeTimeCPU time ("Normal Estimation");
        if (normal_method == 1)
          normals = computeFastPointNormals<Storage> (data);
        else
          normals = computePointNormals<Storage> (data->points.begin (), data->points.end (), focallength, data, radius_cm / 100.0f, nr_neighbors);
        cudaThreadSynchronize ();
      }

      // retrieve normals as an image..
      typename ImageType<Storage>::type normal_image;
      typename StoragePointer<Storage,char4>::type ptr;
      {
        ScopeTimeCPU time ("Matrix Creation");
        ImageType<Storage>::createContinuous ((int)data->height, (int)data->width, CV_8UC4, normal_image);
        ptr = typename StoragePointer<Storage,char4>::type ((char4*)normal_image.data);
        createNormalsImage<Storage> (ptr, *normals);
      }

      //TODO: this breaks for pcl::cuda::Host
      cv::Mat seg;
      {
        ScopeTimeCPU time ("Mean Shift");
        if (enable_mean_shift == 1)
        {
          cv::gpu::meanShiftSegmentation (normal_image, seg, meanshift_sp, meanshift_sr, meanshift_minsize);
          typename Storage<char4>::type new_colors ((char4*)seg.datastart, (char4*)seg.dataend);
          colorCloud<Storage> (data, new_colors);
        }
      }

      typename SampleConsensusModel1PointPlane<Storage>::Ptr sac_model;
      if (enable_plane_fitting == 1)
      {
        // Create sac_model
        {
          ScopeTimeCPU t ("creating sac_model");
          sac_model.reset (new SampleConsensusModel1PointPlane<Storage> (data));
        }
        sac_model->setNormals (normals);

        MultiRandomSampleConsensus<Storage> sac (sac_model);
        sac.setMinimumCoverage (0.90); // at least 95% points should be explained by planes
        sac.setMaximumBatches (1);
        sac.setIerationsPerBatch (1024);
        sac.setDistanceThreshold (0.05);

        {
          ScopeTimeCPU timer ("computeModel: ");
          if (!sac.computeModel (0))
          {
            std::cerr << "Failed to compute model" << std::endl;
          }
          else
          {
            if (enable_visualization)
            {
//              std::cerr << "getting inliers.. ";
              
              std::vector<typename SampleConsensusModel1PointPlane<Storage>::IndicesPtr> planes;
              typename Storage<int>::type region_mask;
              markInliers<Storage> (data, region_mask, planes);
              thrust::host_vector<int> regions_host;
              std::copy (regions_host.begin (), regions_host.end(), std::ostream_iterator<int>(std::cerr, " "));
              {
                ScopeTimeCPU t ("retrieving inliers");
                planes = sac.getAllInliers ();
              }
              std::vector<int> planes_inlier_counts = sac.getAllInlierCounts ();
              std::vector<float4> coeffs = sac.getAllModelCoefficients ();
              std::vector<float3> centroids = sac.getAllModelCentroids ();
              std::cerr << "Found " << planes_inlier_counts.size () << " planes" << std::endl;
              int best_plane = 0;
              int best_plane_inliers_count = -1;

              for (unsigned int i = 0; i < planes.size (); i++)
              {
                if (planes_inlier_counts[i] > best_plane_inliers_count)
                {
                  best_plane = i;
                  best_plane_inliers_count = planes_inlier_counts[i];
                }

                typename SampleConsensusModel1PointPlane<Storage>::IndicesPtr inliers_stencil;
                inliers_stencil = planes[i];//sac.getInliersStencil ();

                OpenNIRGB color;
                //double trand = 255 / (RAND_MAX + 1.0);

                //color.r = (int)(rand () * trand);
                //color.g = (int)(rand () * trand);
                //color.b = (int)(rand () * trand);
                color.r = (1.0f + coeffs[i].x) * 128;
                color.g = (1.0f + coeffs[i].y) * 128;
                color.b = (1.0f + coeffs[i].z) * 128;
                {
                  ScopeTimeCPU t ("coloring planes");
                  colorIndices<Storage> (data, inliers_stencil, color);
                }
              }
            }
          }
        }
      }

      {
        ScopeTimeCPU time ("Vis");
        cv::namedWindow("NormalImage", CV_WINDOW_NORMAL);
        cv::namedWindow("Parameters", CV_WINDOW_NORMAL);
        cvCreateTrackbar( "iterations", "Parameters", &smoothing_nr_iterations, 50, NULL);
        cvCreateTrackbar( "filter_size", "Parameters", &smoothing_filter_size, 10, NULL);
        cvCreateTrackbar( "enable_visualization", "Parameters", &enable_visualization, 1, NULL);
        cvCreateTrackbar( "enable_color", "Parameters", &enable_color, 1, NULL);
        cvCreateTrackbar( "normal_method", "Parameters", &normal_method, 1, NULL);
        cvCreateTrackbar( "neighborhood_radius", "Parameters", &radius_cm, 50, NULL);
        cvCreateTrackbar( "nr_neighbors", "Parameters", &nr_neighbors, 400, NULL);
        cvCreateTrackbar( "normal_viz_step", "Parameters", &normal_viz_step, 1000, NULL);
        cvCreateTrackbar( "enable_mean_shift", "Parameters", &enable_mean_shift, 1, NULL);
        cvCreateTrackbar( "meanshift_sp", "Parameters", &meanshift_sp, 100, NULL);
        cvCreateTrackbar( "meanshift_sr", "Parameters", &meanshift_sr, 100, NULL);
        cvCreateTrackbar( "meanshift_minsize", "Parameters", &meanshift_minsize, 500, NULL);
        cvCreateTrackbar( "enable_plane_fitting", "Parameters", &enable_plane_fitting, 1, NULL);
        cvCreateTrackbar( "enable_normal_viz", "Parameters", &enable_normal_viz, 1, NULL);
        if (enable_visualization == 1)
        {

          cv::Mat temp;
          if (enable_mean_shift == 1)
            cv::cvtColor(seg,temp,CV_BGR2RGB);
          else
            cv::cvtColor(cv::Mat(normal_image),temp,CV_BGR2RGB);
          cv::imshow ("NormalImage", temp);

          boost::mutex::scoped_lock l(m_mutex);
          normal_cloud.reset (new pcl::PointCloud<pcl::PointXYZRGBNormal>);
          toPCL (*data, *normals, *normal_cloud);
          new_cloud = true;
        }
        cv::waitKey (2);
      }
    }
    // callback function from the OpenNIGrabber
    template <template <typename> class Storage> void 
    cloud_cb (const boost::shared_ptr<openni_wrapper::Image>& image,
              const boost::shared_ptr<openni_wrapper::DepthImage>& depth_image, 
              float constant)
    {
      // TIMING
      static unsigned count = 0;
      static double last = getTime ();
      double now = getTime ();

      //if (++count == 30 || (now - last) > 5)
      {
        std::cout << std::endl;
        count = 1;
        std::cout << "Average framerate: " << double(count)/double(now - last) << " Hz --- ";
        last = now;
      }

      // PARAMETERS
      static int smoothing_nr_iterations = 10;
      static int smoothing_filter_size = 2;
      static int enable_visualization = 0;
      static int enable_mean_shift = 1;
      static int enable_plane_fitting = 0;
      static int meanshift_sp=8;
      static int meanshift_sr=20;
      static int meanshift_minsize=100;

      // CPU AND GPU POINTCLOUD OBJECTS
      pcl::PointCloud<pcl::PointXYZRGB>::Ptr output (new pcl::PointCloud<pcl::PointXYZRGB>);
      typename PointCloudAOS<Storage>::Ptr data;

      ScopeTimeCPU time ("everything");
      {
        ScopeTimeCPU time ("disparity smoothing");
        // Compute the PointCloud on the device
        d2c.compute<Storage> (depth_image, image, constant, data, false, 1, smoothing_nr_iterations, smoothing_filter_size);
      }

      // GPU NORMAL CLOUD
      boost::shared_ptr<typename Storage<float4>::type> normals;
      float focallength = 580/2.0;
      {
        ScopeTimeCPU time ("Normal Estimation");
        if (normal_method == 1)
          // NORMAL ESTIMATION USING DIRECT PIXEL NEIGHBORS
          normals = computeFastPointNormals<Storage> (data);
        else
          // NORMAL ESTIMATION USING NEIGHBORHOODS OF RADIUS "radius"
          normals = computePointNormals<Storage> (data->points.begin (), data->points.end (), focallength, data, radius_cm / 100.0f, nr_neighbors);
        cudaThreadSynchronize ();
      }

      // RETRIEVE NORMALS AS AN IMAGE
      typename ImageType<Storage>::type normal_image;
      typename StoragePointer<Storage,char4>::type ptr;
      {
        ScopeTimeCPU time ("Matrix Creation");
        ImageType<Storage>::createContinuous ((int)data->height, (int)data->width, CV_8UC4, normal_image);
        ptr = typename StoragePointer<Storage,char4>::type ((char4*)normal_image.data);
        createNormalsImage<Storage> (ptr, *normals);
      }

      //urdf_filter->filter (data);

      //TODO: this breaks for pcl::cuda::Host, meaning we have to run this on the GPU
      std::vector<int> reg_labels;
      pcl::cuda::detail::DjSets comps(0);
      cv::Mat seg;
      {
        ScopeTimeCPU time ("Mean Shift");
        if (enable_mean_shift == 1)
        {
          // USE GPU MEAN SHIFT SEGMENTATION FROM OPENCV
          pcl::cuda::meanShiftSegmentation (normal_image, seg, reg_labels, meanshift_sp, meanshift_sr, meanshift_minsize, comps);
          typename Storage<char4>::type new_colors ((char4*)seg.datastart, (char4*)seg.dataend);
          colorCloud<Storage> (data, new_colors);
        }
      }

      typename SampleConsensusModel1PointPlane<Storage>::Ptr sac_model;
      if (enable_plane_fitting == 1)
      {
        // Create sac_model
        {
          ScopeTimeCPU t ("creating sac_model");
          sac_model.reset (new SampleConsensusModel1PointPlane<Storage> (data));
        }
        sac_model->setNormals (normals);

        MultiRandomSampleConsensus<Storage> sac (sac_model);
        sac.setMinimumCoverage (0.90); // at least 95% points should be explained by planes
        sac.setMaximumBatches (1);
        sac.setIerationsPerBatch (1024);
        sac.setDistanceThreshold (0.05);

        {
          ScopeTimeCPU timer ("computeModel: ");
          if (!sac.computeModel (0))
          {
            std::cerr << "Failed to compute model" << std::endl;
          }
          else
          {
            if (enable_visualization)
            {
//              std::cerr << "getting inliers.. ";
              
              std::vector<typename SampleConsensusModel1PointPlane<Storage>::IndicesPtr> planes;
              typename Storage<int>::type region_mask;
              markInliers<Storage> (data, region_mask, planes);
              thrust::host_vector<int> regions_host;
              std::copy (regions_host.begin (), regions_host.end(), std::ostream_iterator<int>(std::cerr, " "));
              {
                ScopeTimeCPU t ("retrieving inliers");
                planes = sac.getAllInliers ();
              }
              std::vector<int> planes_inlier_counts = sac.getAllInlierCounts ();
              std::vector<float4> coeffs = sac.getAllModelCoefficients ();
              std::vector<float3> centroids = sac.getAllModelCentroids ();
              std::cerr << "Found " << planes_inlier_counts.size () << " planes" << std::endl;
              int best_plane = 0;
              int best_plane_inliers_count = -1;

              for (unsigned int i = 0; i < planes.size (); i++)
              {
                if (planes_inlier_counts[i] > best_plane_inliers_count)
                {
                  best_plane = i;
                  best_plane_inliers_count = planes_inlier_counts[i];
                }

                typename SampleConsensusModel1PointPlane<Storage>::IndicesPtr inliers_stencil;
                inliers_stencil = planes[i];//sac.getInliersStencil ();

                OpenNIRGB color;
                //double trand = 255 / (RAND_MAX + 1.0);

                //color.r = (int)(rand () * trand);
                //color.g = (int)(rand () * trand);
                //color.b = (int)(rand () * trand);
                color.r = (1.0f + coeffs[i].x) * 128;
                color.g = (1.0f + coeffs[i].y) * 128;
                color.b = (1.0f + coeffs[i].z) * 128;
                {
                  ScopeTimeCPU t ("coloring planes");
                  colorIndices<Storage> (data, inliers_stencil, color);
                }
              }
            }
          }
        }
      }

      if (gui)
      {
        ScopeTimeCPU time ("Vis");
        cv::namedWindow("NormalImage", CV_WINDOW_NORMAL);
        cv::namedWindow("Parameters", CV_WINDOW_NORMAL);
        cvCreateTrackbar( "iterations", "Parameters", &smoothing_nr_iterations, 50, NULL);
        cvCreateTrackbar( "filter_size", "Parameters", &smoothing_filter_size, 10, NULL);
        cvCreateTrackbar( "enable_visualization", "Parameters", &enable_visualization, 1, NULL);
        cvCreateTrackbar( "enable_color", "Parameters", &enable_color, 1, NULL);
        cvCreateTrackbar( "normal_method", "Parameters", &normal_method, 1, NULL);
        cvCreateTrackbar( "neighborhood_radius", "Parameters", &radius_cm, 50, NULL);
        cvCreateTrackbar( "nr_neighbors", "Parameters", &nr_neighbors, 400, NULL);
        cvCreateTrackbar( "normal_viz_step", "Parameters", &normal_viz_step, 1000, NULL);
        cvCreateTrackbar( "enable_mean_shift", "Parameters", &enable_mean_shift, 1, NULL);
        cvCreateTrackbar( "meanshift_sp", "Parameters", &meanshift_sp, 100, NULL);
        cvCreateTrackbar( "meanshift_sr", "Parameters", &meanshift_sr, 100, NULL);
        cvCreateTrackbar( "meanshift_minsize", "Parameters", &meanshift_minsize, 500, NULL);
        cvCreateTrackbar( "enable_plane_fitting", "Parameters", &enable_plane_fitting, 1, NULL);
        if (enable_visualization == 1)
        {

          if (enable_mean_shift == 1)
            cv::imshow ("NormalImage", seg);
          else
            cv::imshow ("NormalImage", cv::Mat(normal_image));

          boost::mutex::scoped_lock l(m_mutex);
          normal_cloud.reset (new pcl::PointCloud<pcl::PointXYZRGBNormal>);
          toPCL (*data, *normals, *normal_cloud);
          new_cloud = true;
        }
        cv::waitKey (2);
      }
      else
      {
        {
          ScopeTimeCPU c_p ("Copying");
          boost::mutex::scoped_lock l(m_mutex);
          region_cloud.reset (new pcl::PointCloud<PointXYZRGBNormalRegion>);
          region_cloud->header.frame_id = "/openni_rgb_optical_frame";
          toPCL (*data, *normals, *region_cloud);
          if (enable_mean_shift)
          {
            for (int cp = 0; cp < region_cloud->points.size (); cp++)
            {
              region_cloud->points[cp].region = reg_labels[cp];
            }
          }
          new_cloud = true;
        }
        ScopeTimeCPU c_p ("Publishing");
        publish_cloud ();
      }
    }