/** \brief Estimate by ML the effect size of the genotype, the std deviation * of the errors and the std error of the estimated effect size in the * multiple linear regression Y = XB + E with E~MVN(0,sigma^2I) * \note genotype supposed to be 2nd column of X */ void FitSingleGeneWithSingleSnp(const gsl_matrix * X, const gsl_vector * y, double & pve, double & sigmahat, double & betahat_geno, double & sebetahat_geno, double & betapval_geno) { size_t N = X->size1, P = X->size2, rank; double rss; gsl_vector * Bhat = gsl_vector_alloc(P); gsl_matrix * covBhat = gsl_matrix_alloc(P, P); gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc(N, P); gsl_multifit_linear_svd(X, y, GSL_DBL_EPSILON, &rank, Bhat, covBhat, &rss, work); pve = 1 - rss / gsl_stats_tss(y->data, y->stride, y->size); sigmahat = sqrt(rss / (double)(N-rank)); betahat_geno = gsl_vector_get(Bhat, 1); sebetahat_geno = sqrt(gsl_matrix_get (covBhat, 1, 1)); betapval_geno = 2 * gsl_cdf_tdist_Q(fabs(betahat_geno / sebetahat_geno), N-rank); gsl_vector_free(Bhat); gsl_matrix_free(covBhat); gsl_multifit_linear_free(work); }
int gsl_multifit_robust(const gsl_matrix * X, const gsl_vector * y, gsl_vector * c, gsl_matrix * cov, gsl_multifit_robust_workspace *w) { /* check matrix and vector sizes */ if (X->size1 != y->size) { GSL_ERROR ("number of observations in y does not match rows of matrix X", GSL_EBADLEN); } else if (X->size2 != c->size) { GSL_ERROR ("number of parameters c does not match columns of matrix X", GSL_EBADLEN); } else if (cov->size1 != cov->size2) { GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR); } else if (c->size != cov->size1) { GSL_ERROR ("number of parameters does not match size of covariance matrix", GSL_EBADLEN); } else if (X->size1 != w->n || X->size2 != w->p) { GSL_ERROR ("size of workspace does not match size of observation matrix", GSL_EBADLEN); } else { int s; double chisq; const double tol = GSL_SQRT_DBL_EPSILON; int converged = 0; size_t numit = 0; const size_t n = y->size; double sigy = gsl_stats_sd(y->data, y->stride, n); double sig_lower; size_t i; /* * if the initial fit is very good, then finding outliers by comparing * them to the residual standard deviation is difficult. Therefore we * set a lower bound on the standard deviation estimate that is a small * fraction of the standard deviation of the data values */ sig_lower = 1.0e-6 * sigy; if (sig_lower == 0.0) sig_lower = 1.0; /* compute initial estimates using ordinary least squares */ s = gsl_multifit_linear(X, y, c, cov, &chisq, w->multifit_p); if (s) return s; /* save Q S^{-1} of original matrix */ gsl_matrix_memcpy(w->QSI, w->multifit_p->QSI); gsl_vector_memcpy(w->D, w->multifit_p->D); /* compute statistical leverage of each data point */ s = gsl_linalg_SV_leverage(w->multifit_p->A, w->resfac); if (s) return s; /* correct residuals with factor 1 / sqrt(1 - h) */ for (i = 0; i < n; ++i) { double h = gsl_vector_get(w->resfac, i); if (h > 0.9999) h = 0.9999; gsl_vector_set(w->resfac, i, 1.0 / sqrt(1.0 - h)); } /* compute residuals from OLS fit r = y - X c */ s = gsl_multifit_linear_residuals(X, y, c, w->r); if (s) return s; /* compute estimate of sigma from ordinary least squares */ w->stats.sigma_ols = gsl_blas_dnrm2(w->r) / sqrt((double) w->stats.dof); while (!converged && ++numit <= w->maxiter) { double sig; /* adjust residuals by statistical leverage (see DuMouchel and O'Brien) */ s = gsl_vector_mul(w->r, w->resfac); if (s) return s; /* compute estimate of standard deviation using MAD */ sig = robust_madsigma(w->r, w); /* scale residuals by standard deviation and tuning parameter */ gsl_vector_scale(w->r, 1.0 / (GSL_MAX(sig, sig_lower) * w->tune)); /* compute weights using these residuals */ s = w->type->wfun(w->r, w->weights); if (s) return s; gsl_vector_memcpy(w->c_prev, c); /* solve weighted least squares with new weights */ s = gsl_multifit_wlinear(X, w->weights, y, c, cov, &chisq, w->multifit_p); if (s) return s; /* compute new residuals r = y - X c */ s = gsl_multifit_linear_residuals(X, y, c, w->r); if (s) return s; converged = robust_test_convergence(w->c_prev, c, tol); } /* compute final MAD sigma */ w->stats.sigma_mad = robust_madsigma(w->r, w); /* compute robust estimate of sigma */ w->stats.sigma_rob = robust_robsigma(w->r, w->stats.sigma_mad, w->tune, w); /* compute final estimate of sigma */ w->stats.sigma = robust_sigma(w->stats.sigma_ols, w->stats.sigma_rob, w); /* store number of iterations */ w->stats.numit = numit; { double dof = (double) w->stats.dof; double rnorm = w->stats.sigma * sqrt(dof); /* see DuMouchel, sec 4.2 */ double ss_err = rnorm * rnorm; double ss_tot = gsl_stats_tss(y->data, y->stride, n); /* compute R^2 */ w->stats.Rsq = 1.0 - ss_err / ss_tot; /* compute adjusted R^2 */ w->stats.adj_Rsq = 1.0 - (1.0 - w->stats.Rsq) * (n - 1.0) / dof; /* compute rmse */ w->stats.rmse = sqrt(ss_err / dof); /* store SSE */ w->stats.sse = ss_err; } /* calculate covariance matrix = sigma^2 (X^T X)^{-1} */ s = robust_covariance(w->stats.sigma, cov, w); if (s) return s; /* raise an error if not converged */ if (numit > w->maxiter) { GSL_ERROR("maximum iterations exceeded", GSL_EMAXITER); } return s; } } /* gsl_multifit_robust() */
double DescenderPath::computeHalfScore(bool upper, bool print) const { double meanCurve; double stdDevCurve; double meanSlope; double stdDevSlope; int startIndex; int endIndex; // const QVector<unsigned int>* path; if (upper) { meanCurve = NEW_UPPER_MEAN_CURVE; stdDevCurve = NEW_UPPER_STD_DEV_CURVE; meanSlope = NEW_UPPER_MEAN_SLOPE; stdDevSlope = NEW_UPPER_STD_DEV_SLOPE; // path = &upperPath; startIndex=divideIndex; endIndex=path.size()-1; if (print) printf("Upper:\t"); } else { meanCurve = NEW_LOWER_MEAN_CURVE; stdDevCurve = NEW_LOWER_STD_DEV_CURVE; meanSlope = NEW_LOWER_MEAN_SLOPE; stdDevSlope = NEW_LOWER_STD_DEV_SLOPE; // path = &lowerPath; startIndex=0; endIndex=divideIndex>0?divideIndex:path.size()-1; if (print) printf("Lower:\t"); } // double relativeAngle = getRelAngle(skeleton, currentPath->at(currentPath->size()-2), currentPath->last(), nextIndex); // double distance = getDist(skeleton, currentPath->last(), nextIndex); // double newClockwiseScore = (clockwiseScore/1.5)+std::min(PI-relativeAngle,PI/3)*distance; double avgAngle; int sampleSize = 1+endIndex-startIndex; double x[sampleSize]; double y[sampleSize]; // QVector<double> x; // QVector<double> y; for (int i=startIndex; i<=endIndex; i++) { x[i-startIndex]=(*skeleton)[path[i]].x; y[i-startIndex]=(*skeleton)[path[i]].y; // foreach (QPoint p, (*skeleton).getRegion(path[i])) // { // if ((*skeleton).pixel(p.x(),p.y())) // { // x.append((double)p.x()); // y.append((double)p.y()); // } // } // if (i>startIndex) // { // int curX=(*skeleton)[path[i-1]].x; // int curY=(*skeleton)[path[i-1]].y; // int nextX=(*skeleton)[path[i]].x; // int nextY=(*skeleton)[path[i]].y; // QVector<QPoint> line; // QPoint start(curX,curY); // line.append(start); // if (curX==nextX || fabs((curY-nextY)/((double)curX-nextX)) > 1) // { // double slope = ((double)curX-nextX)/(curY-nextY); // double intersect = curX-curY*slope; // int inc = copysign(1.0, nextY-curY); // for (int y=curY+inc; inc*y<inc*nextY; y+=inc) // { // QPoint toAdd(y*slope+intersect,y); // if (((BPixelCollection*)skeleton)->pixel(toAdd)) // line.append(toAdd); // else // { // // return; // } // } // } // else // { // double slope = (curY-nextY)/((double)curX-nextX); // double intersect = curY-curX*slope; // int inc = copysign(1.0, nextX-curX); // for (int x=curX+inc; inc*x<inc*nextX; x+=inc) // { // QPoint toAdd(x,slope*x+intersect); // if (((BPixelCollection*)skeleton)->pixel(toAdd)) // line.append(toAdd); // else // { // // return; // } // } // } // QPoint end(nextX,nextY); // line.append(end); // foreach (QPoint p, line) // { // x.append((double)p.x()); // y.append((double)p.y()); // } // } if (i-startIndex>1) { // std::min(PI-relativeAngle,PI/3) avgAngle += std::max(getRelAngle(path[i-2], path[i-1], path[i]),PI*0.6666); } } // int sampleSize = x.size(); if (sampleSize>2) { avgAngle /= sampleSize-2; } else { avgAngle = 0; } double halfScore=0; if (sampleSize>2) { double tss = gsl_stats_tss(x,1,sampleSize); double cov[9]; double linOut[2]; double chisqSlope= polynomialfit(sampleSize,2,y,x,linOut,cov); double slope=linOut[1]; double rsqSlope=1-chisqSlope/tss; double quadOut[3]; double chisqCurve = polynomialfit(sampleSize,3,y,x,quadOut,cov); double curvature=quadOut[2]; double rsqCurve=1-chisqCurve/tss; double yOfVertex = quadOut[1]/(2*quadOut[2]); halfScore = (copysign(1.0, curvature) == copysign(1.0, meanCurve) || fabs(curvature)<0.001) ? 10*(1/std::max(rsqCurve,0.1))*std::max(fabs(curvature-meanCurve),2*stdDevCurve)/(2*stdDevCurve) : 15*(1/std::max(rsqCurve,0.1))*std::max(fabs(curvature-meanCurve),2*stdDevCurve)/(2*stdDevCurve); // halfScore = 10*(1/std::max(rsqCurve,0.1))*std::max(fabs(curvature-meanCurve),2*stdDevCurve)/(2*stdDevCurve) + .1*std::max(fabs(slope-meanSlope),2*stdDevSlope)/(2*stdDevSlope); } if(print) printf("fit=%f\tangle=%f\t",halfScore,.1*avgAngle); halfScore += .1*avgAngle; if(print) printf("total=%f\n",halfScore); return halfScore; }