// [ref] ${VLFEAT_HOME}/toolbox/quickshift/vl_quickshift.c
void quick_shift()
{
	const vl_qs_type kernelSize = 2.0;
	const vl_qs_type maxDist = 20.0;
	const vl_bool medoid = false;  // true to use kernelized medoid shift, false (default) uses quick shift.
	const vl_qs_type ratio = 0.5;  // tradeoff between spatial consistency and color consistency.

	// read image data
	const std::string input_filename = "./data/machine_vision/vlfeat/roofs1.jpg";
	//const std::string input_filename = "./data/machine_vision/opencv/fruits.jpg";

	const cv::Mat input_img = cv::imread(input_filename, CV_LOAD_IMAGE_COLOR);
	//const cv::Mat input_img = cv::imread(input_filename, CV_LOAD_IMAGE_GRAYSCALE);
	if (input_img.empty())
	{
		std::cerr << "file not found: " << input_filename << std::endl;
		return;
	}

	double minVal = 0.0, maxVal = 0.0;
	cv::minMaxLoc(input_img, &minVal, &maxVal);

	//
	const int img_width = input_img.cols;
	const int img_height = input_img.rows;
	const int img_numChannels = input_img.channels();

	// channels * width * height + width * row + col
	std::vector<vl_qs_type> image(img_width * img_height * img_numChannels, 0.0);
	{
		if (1 == img_numChannels)
		{
			for (int r = 0; r < img_height; ++r)
				for (int c = 0; c < img_width; ++c)
				{
					//image[img_width * r + c] = input_img.at<unsigned char>(r, c);
					//image[img_width * r + c] = ratio * (input_img.at<unsigned char>(r, c) + cv::randu<vl_qs_type>() / 2550.0);  // not correctly working
					image[img_width * r + c] = ratio * (input_img.at<unsigned char>(r, c) + cv::randu<vl_qs_type>() / 10.0);
				}
		}
		else if (3 == img_numChannels)
		{
			for (int r = 0; r < img_height; ++r)
				for (int c = 0; c < img_width; ++c)
				{
					const cv::Vec3b &pix = input_img.at<cv::Vec3b>(r, c);
					for (int ch = 0; ch < img_numChannels; ++ch)
					{
						//image[ch * img_width * img_height + img_width * r + c] = pix[ch];
						//image[ch * img_width * img_height + img_width * r + c] = ratio * (pix[ch] / 255.0 + cv::randu<vl_qs_type>() / 2550.0);  // not correctly working
						image[ch * img_width * img_height + img_width * r + c] = ratio * (pix[ch] + cv::randu<vl_qs_type>() / 10.0);
					}
				}
		}
		else
		{
			std::cerr << "the number of image's channels is improper ..." << std::endl;
			return;
		}
	}

	//
	VlQS *qs = vl_quickshift_new(&image[0], img_width, img_height, img_numChannels);

	vl_quickshift_set_kernel_size(qs, kernelSize);
	vl_quickshift_set_max_dist(qs, maxDist);
	vl_quickshift_set_medoid(qs, medoid);

	vl_quickshift_process(qs);

	//
	const int *parentsi = vl_quickshift_get_parents(qs);
	const vl_qs_type *dists = vl_quickshift_get_dists(qs);
	const vl_qs_type *density = vl_quickshift_get_density(qs);

	// flatten a tree.
	std::vector<int> parentsv(parentsi, parentsi + img_width * img_height);
	{
		for (int i = 0; i < img_width * img_height; ++i)
			while (parentsv[i] != parentsv[parentsv[i]])
				parentsv[i] = parentsv[parentsv[i]];
	}
/*
	{
		std::vector<int> indexes(parentsv.begin(), parentsv.end());
		std::sort(indexes.begin(), indexes.end());
		std::vector<int>::iterator itEndNew = std::unique(indexes.begin(), indexes.end());
		//const std::size_t labelCount = std::distance(indexes.begin(), itEndNew);
		std::map<int, int> lblMap;
		int idx = 0;
		for (std::vector<int>::iterator it = indexes.begin(); it != itEndNew; ++it, ++idx)
			lblMap[*it] = idx;

		for (std::vector<int>::iterator it = parentsv.begin(); it != parentsv.end(); ++it)
			*it = lblMap[*it];
	}
*/

	// visualize
	cv::Mat result_img(input_img.size(), input_img.type());
	{
		// draw boundary.
		if (1 == result_img.channels())
		{
			vl_size idx = 0;
			for (int r = 0; r < result_img.rows; ++r)
				for (int c = 0; c < result_img.cols; ++c, ++idx)
				{
					const int lbl = parentsv[idx];
					if (r - 1 >= 0 && lbl != parentsv[(r - 1) * result_img.cols + c])
						result_img.at<unsigned char>(r, c) = 255;
					else if (c - 1 >= 0 && lbl != parentsv[r * result_img.cols + (c - 1)])
						result_img.at<unsigned char>(r, c) = 255;
/*
					else if (r + 1 < result_img.rows && lbl != parentsv[(r + 1) * result_img.cols + c])
						result_img.at<unsigned char>(r, c) = 255;
					else if (c + 1 < result_img.cols && lbl != parentsv[r * result_img.cols + (c + 1)])
						result_img.at<unsigned char>(r, c) = 255;
*/
					else
						result_img.at<unsigned char>(r, c) = 0;
				}
		}
		else if (3 == result_img.channels())
		{
			vl_size idx = 0;
			for (int r = 0; r < result_img.rows; ++r)
				for (int c = 0; c < result_img.cols; ++c, ++idx)
				{
					const int lbl = parentsv[idx];
					if (r - 1 >= 0 && lbl != parentsv[(r - 1) * result_img.cols + c])
						result_img.at<cv::Vec3b>(r, c) = cv::Vec3b(255, 255, 255);
					else if (c - 1 >= 0 && lbl != parentsv[r * result_img.cols + (c - 1)])
						result_img.at<cv::Vec3b>(r, c) = cv::Vec3b(255, 255, 255);
/*
					else if (r + 1 < result_img.rows && lbl != parentsv[(r + 1) * result_img.cols + c])
						result_img.at<cv::Vec3b>(r, c) = cv::Vec3b(255, 255, 255);
					else if (c + 1 < result_img.cols && lbl != parentsv[r * result_img.cols + (c + 1)])
						result_img.at<cv::Vec3b>(r, c) = cv::Vec3b(255, 255, 255);
*/
					else
						result_img.at<cv::Vec3b>(r, c) = cv::Vec3b(0, 0, 0);
				}
		}
		else
		{
			std::cerr << "the number of image's channels is improper ..." << std::endl;
			return;
		}
	}

	cv::imshow("quick shift result", result_img);
	cv::waitKey(0);
	cv::destroyAllWindows();

	// clean-up
	vl_quickshift_delete(qs);
}
void
mexFunction(int nout, mxArray *out[],
            int nin, const mxArray *in[])
{
  enum {
    IN_I=0,     /* Input image */
    IN_KERNEL_SIZE,  /* The bandwidth parameter for density estimation */
    IN_MAX_DIST,     /* The maximum distance to a neighbor which increases
                   the density */
    IN_END
  } ;
  enum {
    OUT_PARENTS=0, /* parents (same size as I) */
    OUT_DISTS,     /* dists (same size as I) */
    OUT_DENSITY    /* density (same size as I) */
  } ;

  int             verb = 0 ;
  int             opt ;
  int             next = IN_END ;
  mxArray const  *optarg ;

  double const *I ;
  double *parents, *dists, *density ;
  int *parentsi;
  double sigma ;
  double tau ;

  int K,N1,N2;

  int medoid = 0 ;

  mwSize const *dims ;
  int ndims ;

  int i;

  VlQS * q;

  VL_USE_MATLAB_ENV ;

  /* -----------------------------------------------------------------
   *                                                   Check arguments
   * -------------------------------------------------------------- */

  if (nin < 2) {
    mexErrMsgTxt("At least two arguments.") ;
  }

  if (nout > 3) {
    mexErrMsgTxt("At most three output arguments.") ;
  }

  ndims = mxGetNumberOfDimensions(in[IN_I]) ;
  dims  = mxGetDimensions(in[IN_I]) ;

  if (ndims > 3) {
    mexErrMsgTxt("I must have at most 3 dimensions.") ;
  }

  if (mxGetClassID(in[IN_I]) != mxDOUBLE_CLASS) {
    mexErrMsgTxt("I must be DOUBLE.")  ;
  }

  N1 = dims [0] ;
  N2 = dims [1] ;
  K = (ndims == 3) ? dims [2] : 1 ;

  I     =  mxGetPr (in[IN_I]) ;
  sigma = *mxGetPr (in[IN_KERNEL_SIZE]) ;
  tau   = 3*sigma;
  if (nin > 2)
    tau = *mxGetPr (in[IN_MAX_DIST]) ;

  while ((opt = vlmxNextOption (in, nin, options, &next, &optarg)) >= 0) {
    switch (opt) {
    case opt_medoid: /* Do medoid shift instead of mean shift */
      medoid = 1 ;
      break ;
    case opt_verbose :
      ++ verb ;
      break ;
    }
  } /* while opts */

  /* Create outputs */
  out[OUT_PARENTS] = mxCreateDoubleMatrix(N1, N2, mxREAL) ;
  parents          = mxGetPr (out[OUT_PARENTS]) ;

  out[OUT_DISTS]   = mxCreateDoubleMatrix(N1, N2, mxREAL) ;
  dists            = mxGetPr (out[OUT_DISTS]) ;

  out[OUT_DENSITY] = mxCreateDoubleMatrix(N1, N2, mxREAL) ;
  density          = mxGetPr (out[OUT_DENSITY]) ;

  if (verb) {
    mexPrintf("quickshift: [N1,N2,K]: [%d,%d,%d]\n", N1,N2,K) ;
    mexPrintf("quickshift: type: %s\n", medoid ? "medoid" : "quick");
    mexPrintf("quickshift: kernel size:  %g\n", sigma) ;
    mexPrintf("quickshift: maximum gap:  %g\n", tau) ;
  }

  /* Do job */
  q = vl_quickshift_new(I, N1, N2, K);

  vl_quickshift_set_kernel_size (q, sigma) ;
  vl_quickshift_set_max_dist     (q, tau) ;
  vl_quickshift_set_medoid      (q, medoid) ;

  vl_quickshift_process(q);

  parentsi = vl_quickshift_get_parents(q);
  /* Copy results */
  for(i = 0; i < N1*N2; i++) parents[i] = parentsi[i] + 1;
  memcpy(dists, vl_quickshift_get_dists(q), sizeof(double)*N1*N2);
  memcpy(density, vl_quickshift_get_density(q), sizeof(double)*N1*N2);

  /* Delete quick shift object */
  vl_quickshift_delete(q);
}