int
gsl_multifit_robust(const gsl_matrix * X,
                    const gsl_vector * y,
                    gsl_vector * c,
                    gsl_matrix * cov,
                    gsl_multifit_robust_workspace *w)
{
  /* check matrix and vector sizes */
  if (X->size1 != y->size)
    {
      GSL_ERROR
        ("number of observations in y does not match rows of matrix X",
         GSL_EBADLEN);
    }
  else if (X->size2 != c->size)
    {
      GSL_ERROR ("number of parameters c does not match columns of matrix X",
                 GSL_EBADLEN);
    }
  else if (cov->size1 != cov->size2)
    {   
      GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR);
    }   
  else if (c->size != cov->size1)
    {   
      GSL_ERROR
        ("number of parameters does not match size of covariance matrix",
         GSL_EBADLEN);
    }
  else if (X->size1 != w->n || X->size2 != w->p)
    {
      GSL_ERROR
        ("size of workspace does not match size of observation matrix",
         GSL_EBADLEN);
    }
  else
    {
      int s;
      double chisq;
      const double tol = GSL_SQRT_DBL_EPSILON;
      int converged = 0;
      size_t numit = 0;
      const size_t n = y->size;
      double sigy = gsl_stats_sd(y->data, y->stride, n);
      double sig_lower;
      size_t i;

      /*
       * if the initial fit is very good, then finding outliers by comparing
       * them to the residual standard deviation is difficult. Therefore we
       * set a lower bound on the standard deviation estimate that is a small
       * fraction of the standard deviation of the data values
       */
      sig_lower = 1.0e-6 * sigy;
      if (sig_lower == 0.0)
        sig_lower = 1.0;

      /* compute initial estimates using ordinary least squares */
      s = gsl_multifit_linear(X, y, c, cov, &chisq, w->multifit_p);
      if (s)
        return s;

      /* save Q S^{-1} of original matrix */
      gsl_matrix_memcpy(w->QSI, w->multifit_p->QSI);
      gsl_vector_memcpy(w->D, w->multifit_p->D);

      /* compute statistical leverage of each data point */
      s = gsl_linalg_SV_leverage(w->multifit_p->A, w->resfac);
      if (s)
        return s;

      /* correct residuals with factor 1 / sqrt(1 - h) */
      for (i = 0; i < n; ++i)
        {
          double h = gsl_vector_get(w->resfac, i);

          if (h > 0.9999)
            h = 0.9999;

          gsl_vector_set(w->resfac, i, 1.0 / sqrt(1.0 - h));
        }

      /* compute residuals from OLS fit r = y - X c */
      s = gsl_multifit_linear_residuals(X, y, c, w->r);
      if (s)
        return s;

      /* compute estimate of sigma from ordinary least squares */
      w->stats.sigma_ols = gsl_blas_dnrm2(w->r) / sqrt((double) w->stats.dof);

      while (!converged && ++numit <= w->maxiter)
        {
          double sig;

          /* adjust residuals by statistical leverage (see DuMouchel and O'Brien) */
          s = gsl_vector_mul(w->r, w->resfac);
          if (s)
            return s;

          /* compute estimate of standard deviation using MAD */
          sig = robust_madsigma(w->r, w);

          /* scale residuals by standard deviation and tuning parameter */
          gsl_vector_scale(w->r, 1.0 / (GSL_MAX(sig, sig_lower) * w->tune));

          /* compute weights using these residuals */
          s = w->type->wfun(w->r, w->weights);
          if (s)
            return s;

          gsl_vector_memcpy(w->c_prev, c);

          /* solve weighted least squares with new weights */
          s = gsl_multifit_wlinear(X, w->weights, y, c, cov, &chisq, w->multifit_p);
          if (s)
            return s;

          /* compute new residuals r = y - X c */
          s = gsl_multifit_linear_residuals(X, y, c, w->r);
          if (s)
            return s;

          converged = robust_test_convergence(w->c_prev, c, tol);
        }

      /* compute final MAD sigma */
      w->stats.sigma_mad = robust_madsigma(w->r, w);

      /* compute robust estimate of sigma */
      w->stats.sigma_rob = robust_robsigma(w->r, w->stats.sigma_mad, w->tune, w);

      /* compute final estimate of sigma */
      w->stats.sigma = robust_sigma(w->stats.sigma_ols, w->stats.sigma_rob, w);

      /* store number of iterations */
      w->stats.numit = numit;

      {
        double dof = (double) w->stats.dof;
        double rnorm = w->stats.sigma * sqrt(dof); /* see DuMouchel, sec 4.2 */
        double ss_err = rnorm * rnorm;
        double ss_tot = gsl_stats_tss(y->data, y->stride, n);

        /* compute R^2 */
        w->stats.Rsq = 1.0 - ss_err / ss_tot;

        /* compute adjusted R^2 */
        w->stats.adj_Rsq = 1.0 - (1.0 - w->stats.Rsq) * (n - 1.0) / dof;

        /* compute rmse */
        w->stats.rmse = sqrt(ss_err / dof);

        /* store SSE */
        w->stats.sse = ss_err;
      }

      /* calculate covariance matrix = sigma^2 (X^T X)^{-1} */
      s = robust_covariance(w->stats.sigma, cov, w);
      if (s)
        return s;

      /* raise an error if not converged */
      if (numit > w->maxiter)
        {
          GSL_ERROR("maximum iterations exceeded", GSL_EMAXITER);
        }

      return s;
    }
} /* gsl_multifit_robust() */
Beispiel #2
0
void filter_matches_by_robust_distribution(const std::vector<cv::Point2f> &src_points,
		const std::vector<cv::Point2f> &dst_points,
		const std::vector<cv::DMatch> &matches,
		std::vector<char> &mask) {

	if(matches.size() != mask.size()) return;

	// filter in horizontal direction first (reduces the computation cost of vertical filter)
	std::vector<float> x_distances; x_distances.reserve( src_points.size() );
	for(size_t i = 0; i < matches.size(); i++) {
		if(mask[i] == 0) continue;
		const unsigned int src_index = matches[i].queryIdx;
		const unsigned int dst_index = matches[i].trainIdx;
		const unsigned int x_distance = abs(src_points[src_index].x - dst_points[dst_index].x);
		x_distances.push_back(x_distance);
	}
	if(x_distances.empty()) return;

	// calculate robust standard deviation
	float median;
	float sigma = robust_sigma(x_distances, median);
	if( sigma <= std::numeric_limits<float>::min() )
		return;

	size_t distance_index = 0;
	for(size_t i = 0; i < matches.size(); i++) {
		if(mask[i] == 0) continue;

		// scale distance to fit standard normal distribution
		const float z = abs(x_distances[distance_index] - median) / sigma;
		//use 0,95 interval for z
		if(z > 1.96)
			mask[i] = 0;
		distance_index++;
	}

	// filter in vertical direction
	std::vector<float> y_distances; y_distances.reserve( src_points.size() );
	for(size_t i = 0; i < matches.size(); i++) {
		if(mask[i] == 0) continue;
		const unsigned int src_index = matches[i].queryIdx;
		const unsigned int dst_index = matches[i].trainIdx;
		const unsigned int y_distance = abs(src_points[src_index].y - dst_points[dst_index].y);
		y_distances.push_back(y_distance);
	}
	if(y_distances.size() == 0) return;

	// calculate robust standard deviation
	sigma = robust_sigma(y_distances, median);
	if( sigma <= std::numeric_limits<float>::min() )
		return;

	distance_index = 0;
	for(size_t i = 0; i < matches.size(); i++) {
		if(mask[i] == 0) continue;

		// scale distance to fit standard normal distribution
		const float z = abs(y_distances[distance_index] - median) / sigma;
		//use 0,95 interval for z
		if(z > 1.96)
			mask[i] = 0;

		distance_index++;
	}
}