Example #1
0
/** \brief Estimate by ML the effect size of the genotype, the std deviation 
 *  of the errors and the std error of the estimated effect size in the 
 *  multiple linear regression Y = XB + E with E~MVN(0,sigma^2I)
 *  \note genotype supposed to be 2nd column of X
 */
  void FitSingleGeneWithSingleSnp(const gsl_matrix * X,
				  const gsl_vector * y,
				  double & pve,
				  double & sigmahat,
				  double & betahat_geno,
				  double & sebetahat_geno,
				  double & betapval_geno)
  {
    size_t N = X->size1, P = X->size2, rank;
    double rss;
    gsl_vector * Bhat = gsl_vector_alloc(P);
    gsl_matrix * covBhat = gsl_matrix_alloc(P, P);
    gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc(N, P);
    gsl_multifit_linear_svd(X, y, GSL_DBL_EPSILON, &rank, Bhat, covBhat,
			    &rss, work);
    pve = 1 - rss / gsl_stats_tss(y->data, y->stride, y->size);
    sigmahat = sqrt(rss / (double)(N-rank));
    betahat_geno = gsl_vector_get(Bhat, 1);
    sebetahat_geno = sqrt(gsl_matrix_get (covBhat, 1, 1));
    betapval_geno = 2 * gsl_cdf_tdist_Q(fabs(betahat_geno / sebetahat_geno),
					N-rank);
    gsl_vector_free(Bhat);
    gsl_matrix_free(covBhat);
    gsl_multifit_linear_free(work);
  }
Example #2
0
int
gsl_multifit_robust(const gsl_matrix * X,
                    const gsl_vector * y,
                    gsl_vector * c,
                    gsl_matrix * cov,
                    gsl_multifit_robust_workspace *w)
{
  /* check matrix and vector sizes */
  if (X->size1 != y->size)
    {
      GSL_ERROR
        ("number of observations in y does not match rows of matrix X",
         GSL_EBADLEN);
    }
  else if (X->size2 != c->size)
    {
      GSL_ERROR ("number of parameters c does not match columns of matrix X",
                 GSL_EBADLEN);
    }
  else if (cov->size1 != cov->size2)
    {   
      GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR);
    }   
  else if (c->size != cov->size1)
    {   
      GSL_ERROR
        ("number of parameters does not match size of covariance matrix",
         GSL_EBADLEN);
    }
  else if (X->size1 != w->n || X->size2 != w->p)
    {
      GSL_ERROR
        ("size of workspace does not match size of observation matrix",
         GSL_EBADLEN);
    }
  else
    {
      int s;
      double chisq;
      const double tol = GSL_SQRT_DBL_EPSILON;
      int converged = 0;
      size_t numit = 0;
      const size_t n = y->size;
      double sigy = gsl_stats_sd(y->data, y->stride, n);
      double sig_lower;
      size_t i;

      /*
       * if the initial fit is very good, then finding outliers by comparing
       * them to the residual standard deviation is difficult. Therefore we
       * set a lower bound on the standard deviation estimate that is a small
       * fraction of the standard deviation of the data values
       */
      sig_lower = 1.0e-6 * sigy;
      if (sig_lower == 0.0)
        sig_lower = 1.0;

      /* compute initial estimates using ordinary least squares */
      s = gsl_multifit_linear(X, y, c, cov, &chisq, w->multifit_p);
      if (s)
        return s;

      /* save Q S^{-1} of original matrix */
      gsl_matrix_memcpy(w->QSI, w->multifit_p->QSI);
      gsl_vector_memcpy(w->D, w->multifit_p->D);

      /* compute statistical leverage of each data point */
      s = gsl_linalg_SV_leverage(w->multifit_p->A, w->resfac);
      if (s)
        return s;

      /* correct residuals with factor 1 / sqrt(1 - h) */
      for (i = 0; i < n; ++i)
        {
          double h = gsl_vector_get(w->resfac, i);

          if (h > 0.9999)
            h = 0.9999;

          gsl_vector_set(w->resfac, i, 1.0 / sqrt(1.0 - h));
        }

      /* compute residuals from OLS fit r = y - X c */
      s = gsl_multifit_linear_residuals(X, y, c, w->r);
      if (s)
        return s;

      /* compute estimate of sigma from ordinary least squares */
      w->stats.sigma_ols = gsl_blas_dnrm2(w->r) / sqrt((double) w->stats.dof);

      while (!converged && ++numit <= w->maxiter)
        {
          double sig;

          /* adjust residuals by statistical leverage (see DuMouchel and O'Brien) */
          s = gsl_vector_mul(w->r, w->resfac);
          if (s)
            return s;

          /* compute estimate of standard deviation using MAD */
          sig = robust_madsigma(w->r, w);

          /* scale residuals by standard deviation and tuning parameter */
          gsl_vector_scale(w->r, 1.0 / (GSL_MAX(sig, sig_lower) * w->tune));

          /* compute weights using these residuals */
          s = w->type->wfun(w->r, w->weights);
          if (s)
            return s;

          gsl_vector_memcpy(w->c_prev, c);

          /* solve weighted least squares with new weights */
          s = gsl_multifit_wlinear(X, w->weights, y, c, cov, &chisq, w->multifit_p);
          if (s)
            return s;

          /* compute new residuals r = y - X c */
          s = gsl_multifit_linear_residuals(X, y, c, w->r);
          if (s)
            return s;

          converged = robust_test_convergence(w->c_prev, c, tol);
        }

      /* compute final MAD sigma */
      w->stats.sigma_mad = robust_madsigma(w->r, w);

      /* compute robust estimate of sigma */
      w->stats.sigma_rob = robust_robsigma(w->r, w->stats.sigma_mad, w->tune, w);

      /* compute final estimate of sigma */
      w->stats.sigma = robust_sigma(w->stats.sigma_ols, w->stats.sigma_rob, w);

      /* store number of iterations */
      w->stats.numit = numit;

      {
        double dof = (double) w->stats.dof;
        double rnorm = w->stats.sigma * sqrt(dof); /* see DuMouchel, sec 4.2 */
        double ss_err = rnorm * rnorm;
        double ss_tot = gsl_stats_tss(y->data, y->stride, n);

        /* compute R^2 */
        w->stats.Rsq = 1.0 - ss_err / ss_tot;

        /* compute adjusted R^2 */
        w->stats.adj_Rsq = 1.0 - (1.0 - w->stats.Rsq) * (n - 1.0) / dof;

        /* compute rmse */
        w->stats.rmse = sqrt(ss_err / dof);

        /* store SSE */
        w->stats.sse = ss_err;
      }

      /* calculate covariance matrix = sigma^2 (X^T X)^{-1} */
      s = robust_covariance(w->stats.sigma, cov, w);
      if (s)
        return s;

      /* raise an error if not converged */
      if (numit > w->maxiter)
        {
          GSL_ERROR("maximum iterations exceeded", GSL_EMAXITER);
        }

      return s;
    }
} /* gsl_multifit_robust() */
Example #3
0
double DescenderPath::computeHalfScore(bool upper, bool print) const
{
    double meanCurve;
    double stdDevCurve;
                      
    double meanSlope;
    double stdDevSlope;
    int startIndex;
    int endIndex;
//    const QVector<unsigned int>* path;
    if (upper)
    {
        meanCurve = NEW_UPPER_MEAN_CURVE;
        stdDevCurve = NEW_UPPER_STD_DEV_CURVE;
        
        meanSlope = NEW_UPPER_MEAN_SLOPE;
        stdDevSlope = NEW_UPPER_STD_DEV_SLOPE;
//        path = &upperPath;
        startIndex=divideIndex;
        endIndex=path.size()-1;
        if (print)
            printf("Upper:\t");
    }
    else
    {
        meanCurve = NEW_LOWER_MEAN_CURVE;
        stdDevCurve = NEW_LOWER_STD_DEV_CURVE;
        
        meanSlope = NEW_LOWER_MEAN_SLOPE;
        stdDevSlope = NEW_LOWER_STD_DEV_SLOPE;
//        path = &lowerPath;
        startIndex=0;
        endIndex=divideIndex>0?divideIndex:path.size()-1;
        if (print)
            printf("Lower:\t");
    }
    
    //        double relativeAngle = getRelAngle(skeleton, currentPath->at(currentPath->size()-2), currentPath->last(), nextIndex);
    //        double distance = getDist(skeleton, currentPath->last(), nextIndex);
    //        double newClockwiseScore = (clockwiseScore/1.5)+std::min(PI-relativeAngle,PI/3)*distance;
    double avgAngle;
    
    int sampleSize = 1+endIndex-startIndex;
    
    double x[sampleSize];
    double y[sampleSize];
//    QVector<double> x;
//    QVector<double> y;
    for (int i=startIndex; i<=endIndex; i++)
    {
        x[i-startIndex]=(*skeleton)[path[i]].x;
        y[i-startIndex]=(*skeleton)[path[i]].y;
        
//        foreach (QPoint p, (*skeleton).getRegion(path[i]))
//        {
//            if ((*skeleton).pixel(p.x(),p.y()))
//            {
//                x.append((double)p.x());
//                y.append((double)p.y());
//            }
//        }
//        if (i>startIndex)
//        {
//            int curX=(*skeleton)[path[i-1]].x;
//            int curY=(*skeleton)[path[i-1]].y;
//            int nextX=(*skeleton)[path[i]].x;
//            int nextY=(*skeleton)[path[i]].y;
//            QVector<QPoint> line;
//            QPoint start(curX,curY);
//            line.append(start);
//            if (curX==nextX || fabs((curY-nextY)/((double)curX-nextX)) > 1)
//            {
//                double slope = ((double)curX-nextX)/(curY-nextY);
//                double intersect = curX-curY*slope;
//                int inc = copysign(1.0, nextY-curY);
//                for (int y=curY+inc; inc*y<inc*nextY; y+=inc)
//                {
//                    QPoint toAdd(y*slope+intersect,y);
//                    if (((BPixelCollection*)skeleton)->pixel(toAdd))
//                        line.append(toAdd);
//                    else
//                    {
//        //                return;
//                    }
//                }
//            }
//            else
//            {
//                double slope = (curY-nextY)/((double)curX-nextX);
//                double intersect = curY-curX*slope;
//                int inc = copysign(1.0, nextX-curX);
//                for (int x=curX+inc; inc*x<inc*nextX; x+=inc)
//                {
//                    QPoint toAdd(x,slope*x+intersect);
//                    if (((BPixelCollection*)skeleton)->pixel(toAdd))
//                        line.append(toAdd);
//                    else
//                    {
//        //                return;
//                    }
//                }
//            }
//            QPoint end(nextX,nextY);
//            line.append(end);
            
//            foreach (QPoint p, line)
//            {

//                x.append((double)p.x());
//                y.append((double)p.y());
//            }
//        }
        
        if (i-startIndex>1)
        {
//            std::min(PI-relativeAngle,PI/3)
            avgAngle += std::max(getRelAngle(path[i-2], path[i-1], path[i]),PI*0.6666);
        }
    }
    
//    int sampleSize = x.size();
    
    if (sampleSize>2)
    {
        avgAngle /= sampleSize-2;
    }
    else
    {
        avgAngle = 0;
    }
    
    double halfScore=0;

    if (sampleSize>2)
    {
        double tss = gsl_stats_tss(x,1,sampleSize);
        double cov[9];
        double linOut[2];
        double chisqSlope= polynomialfit(sampleSize,2,y,x,linOut,cov);
        double slope=linOut[1];
        double rsqSlope=1-chisqSlope/tss;
        
        double quadOut[3];
        double chisqCurve = polynomialfit(sampleSize,3,y,x,quadOut,cov);
        double curvature=quadOut[2];
        double rsqCurve=1-chisqCurve/tss;
        
        double yOfVertex = quadOut[1]/(2*quadOut[2]);
        
        halfScore = (copysign(1.0, curvature) == copysign(1.0, meanCurve) ||
                             fabs(curvature)<0.001) ? 
                     10*(1/std::max(rsqCurve,0.1))*std::max(fabs(curvature-meanCurve),2*stdDevCurve)/(2*stdDevCurve) :
                     15*(1/std::max(rsqCurve,0.1))*std::max(fabs(curvature-meanCurve),2*stdDevCurve)/(2*stdDevCurve);
//        halfScore = 10*(1/std::max(rsqCurve,0.1))*std::max(fabs(curvature-meanCurve),2*stdDevCurve)/(2*stdDevCurve) + .1*std::max(fabs(slope-meanSlope),2*stdDevSlope)/(2*stdDevSlope);
        
        
    }
    
    if(print)
        printf("fit=%f\tangle=%f\t",halfScore,.1*avgAngle);
    
    halfScore += .1*avgAngle;
    
    if(print)
        printf("total=%f\n",halfScore);
    
    return halfScore;
}