double MlSldaState::OptimizeRegression() { assert((corpus_->num_train() - num_topics_) > 0); train_topic_assignments(regression_space_.x_.get()); // Use gsl's implementation of OLS to find the new nu parameter const gsl_vector* y = static_cast<lib_corpora::ReviewCorpus*> (corpus_.get())->train_ratings(); train_topic_assignments(regression_space_.x_.get()); gsl_multifit_linear(regression_space_.x_.get(), y, nu_.get(), regression_space_.covariance_.get(), ®ression_space_.chi_squared_, regression_space_.space_.get()); // Estimatee sigma squared from the sum of the squared residuals cout << "Did new regression with " << corpus_->num_train() << " documents " " and sum sq training error " << regression_space_.chi_squared_ << " (" << regression_space_.chi_squared_ / (double)corpus_->num_train() << ")" << endl; if (sigma_squared_ < FLAGS_min_variance) { sigma_squared_ = FLAGS_min_variance; cout << "WARNING! VARIANCE WENT TO ZERO. SOMETHING CRAZY IS GOING ON. "; cout << "WE'LL CARRY ON WITH VARIANCE=" << sigma_squared_ << endl; } // Now we update the predictions so that the numbers are correct UpdateAllPredictions(); return regression_space_.chi_squared_; }
//-------------------------------------------------------------- bool polynomialfit(int obs, int degree, double *dx, double *dy, double *store){ /* n, p */ gsl_multifit_linear_workspace *ws; gsl_matrix *cov, *X; gsl_vector *y, *c; double chisq; X = gsl_matrix_alloc(obs, degree); y = gsl_vector_alloc(obs); c = gsl_vector_alloc(degree); cov = gsl_matrix_alloc(degree, degree); for(int i=0; i<obs; i++){ gsl_matrix_set(X, i, 0, 1.0); for(int j=0; j<degree; j++){ gsl_matrix_set(X, i, j, pow(dx[i], j)); } gsl_vector_set(y, i, dy[i]); } ws = gsl_multifit_linear_alloc(obs, degree); gsl_multifit_linear(X, y, c, cov, &chisq, ws); /* store result ... */ for(int i=0; i< degree; i++){ store[i] = gsl_vector_get(c, i); } gsl_multifit_linear_free(ws); gsl_matrix_free(X); gsl_matrix_free(cov); gsl_vector_free(y); gsl_vector_free(c); return true; /* we do not "analyse" the result (cov matrix mainly) to know if the fit is "good" */ }
double polynomialfit(int obs, int degree, double *dx, double *dy, double *store) { gsl_multifit_linear_workspace *ws; gsl_matrix *cov, *X; gsl_vector *y, *c; double chisq; int i, j; X = gsl_matrix_alloc(obs, degree); y = gsl_vector_alloc(obs); c = gsl_vector_alloc(degree); cov = gsl_matrix_alloc(degree, degree); for(i=0; i < obs; i++) { gsl_matrix_set(X, i, 0, 1.0); for(j=0; j < degree; j++) { gsl_matrix_set(X, i, j, pow(dx[i], j)); } gsl_vector_set(y, i, dy[i]); } ws = gsl_multifit_linear_alloc(obs, degree); gsl_multifit_linear(X, y, c, cov, &chisq, ws); for(i=0; i < degree; i++) { store[i] = gsl_vector_get(c, i); } gsl_multifit_linear_free(ws); gsl_matrix_free(X); gsl_matrix_free(cov); gsl_vector_free(y); gsl_vector_free(c); return chisq; // return error }
int curve_fit_quad(uint8_t count, FittingData f_data[], double *aa, double *bb, double *cc) { int i, n; double xi, yi, chisq; gsl_matrix *X, *cov; gsl_vector *y, *c; n = count; X = gsl_matrix_alloc (n, 3); y = gsl_vector_alloc (n); c = gsl_vector_alloc (3); cov = gsl_matrix_alloc (3, 3); for (i = 0; i < n; i++) { xi = f_data[i].x; yi = f_data[i].y; ////printf ("%g %g\n", xi, yi); gsl_matrix_set (X, i, 0, 1.0); gsl_matrix_set (X, i, 1, xi); gsl_matrix_set (X, i, 2, xi*xi); gsl_vector_set (y, i, yi); } gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (n, 3); gsl_multifit_linear (X, y, c, cov, &chisq, work); gsl_multifit_linear_free (work); //printf ("# best fit: Y = %g + %g X + %g X^2\n", // C(0), C(1), C(2)); //printf ("# covariance matrix:\n"); //printf ("[ %+.5e, %+.5e, %+.5e \n", // COV(0,0), COV(0,1), COV(0,2)); //printf (" %+.5e, %+.5e, %+.5e \n", // COV(1,0), COV(1,1), COV(1,2)); //printf (" %+.5e, %+.5e, %+.5e ]\n", // COV(2,0), COV(2,1), COV(2,2)); //printf ("# chisq = %g\n", chisq); *aa = C(0); *bb = C(1); *cc = C(2); gsl_matrix_free (X); gsl_vector_free (y); gsl_vector_free (c); gsl_matrix_free (cov); return 0; }
void BSplineInterpolation::fitFromData(const Samples &samples) { /* preprocess samples and extract some info */ Samples ssamples = samples; std::sort(ssamples.begin(), ssamples.end()); const int numSamples = ssamples.size(); const float minSampleX = ssamples[0].first; const float maxSampleX = ssamples.back().first; /* prepare fitting data */ gsl_vector *x = gsl_vector_alloc(ssamples.size()); gsl_vector *y = gsl_vector_alloc(ssamples.size()); for (int i=0; i<ssamples.size(); i++) { gsl_vector_set(x, i, ssamples[i].first); gsl_vector_set(y, i, ssamples[i].second); } /* uniform knots distributed in sample range */ gsl_bspline_knots_uniform(minSampleX, maxSampleX, bSplineWorkspace); /* construct a fit matrix */ gsl_matrix *fitMatrix = gsl_matrix_alloc(numSamples, nCoeffs); for (int i=0; i<numSamples; i++) { /* compute B_j(xi) for all j */ double xi = gsl_vector_get(x, i); gsl_bspline_eval(xi, bSpline, bSplineWorkspace); /* fill in row i */ for (int j=0; j<nCoeffs; j++) { double Bj = gsl_vector_get(bSpline, j); gsl_matrix_set(fitMatrix, i, j, Bj); } } /* fit spline to data */ gsl_multifit_linear_workspace *mws = gsl_multifit_linear_alloc(numSamples, nCoeffs); double chisq; size_t rank; double tol = 0.1; gsl_multifit_linear(fitMatrix, y, cParameters, covMatrix, &chisq, mws); //gsl_multifit_linear_svd(fitMatrix, y, tol, // &rank, cParameters, covMatrix, &chisq, mws); splineMinX = minSampleX; splineMaxX = maxSampleX; /* clean up */ gsl_vector_free(x); gsl_vector_free(y); gsl_matrix_free(fitMatrix); gsl_multifit_linear_free(mws); }
/* solve system with lambda = 0 and test against OLS solution */ static void test_reg1(const gsl_matrix * X, const gsl_vector * y, const gsl_vector * wts, const double tol, gsl_multifit_linear_workspace * w, const char * desc) { const size_t n = X->size1; const size_t p = X->size2; double rnorm, snorm, chisq; gsl_vector *c0 = gsl_vector_alloc(p); gsl_vector *c1 = gsl_vector_alloc(p); gsl_matrix *cov = gsl_matrix_alloc(p, p); size_t j; if (wts) { gsl_matrix *Xs = gsl_matrix_alloc(n, p); gsl_vector *ys = gsl_vector_alloc(n); gsl_multifit_wlinear(X, wts, y, c0, cov, &chisq, w); gsl_multifit_linear_wstdform1(NULL, X, wts, y, Xs, ys, w); gsl_multifit_linear_svd(Xs, w); gsl_multifit_linear_solve(0.0, Xs, ys, c1, &rnorm, &snorm, w); gsl_matrix_free(Xs); gsl_vector_free(ys); } else { gsl_multifit_linear(X, y, c0, cov, &chisq, w); gsl_multifit_linear_svd(X, w); gsl_multifit_linear_solve(0.0, X, y, c1, &rnorm, &snorm, w); } gsl_test_rel(rnorm*rnorm, chisq, tol, "test_reg1: %s, lambda = 0, n=%zu p=%zu chisq", desc, n, p); /* test c0 = c1 */ for (j = 0; j < p; ++j) { double c0j = gsl_vector_get(c0, j); double c1j = gsl_vector_get(c1, j); gsl_test_rel(c1j, c0j, tol, "test_reg1: %s, lambda = 0, n=%zu p=%zu c0/c1", desc, n, p); } gsl_vector_free(c0); gsl_vector_free(c1); gsl_matrix_free(cov); }
const QVector<double> TimeSeriesMotion::baselineFit( const int term, const QVector<double> & series ) const { Q_ASSERT(term >= 3); // Create the matrix of terms. The first column is x_i^0 (1), second // column is x_i^1 (x), third is x_i^2, etc. gsl_matrix* X = gsl_matrix_alloc(series.size(), term); gsl_vector* y = gsl_vector_alloc(series.size()); for (int i = 0; i < series.size(); ++i) { gsl_vector_set( y, i, series.at(i)); for (int j = 0; j < term; ++j) { if ( j < 2 ) { // Don't use the first two terms in the fitting gsl_matrix_set(X, i, j, 0); } else { gsl_matrix_set(X, i, j, pow(m_timeStep * i, j)); } } } // Co-variance matrix gsl_matrix * cov = gsl_matrix_alloc(term, term); // Coefficients gsl_vector * c = gsl_vector_alloc(term); // Fit the data series gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc(series.size(), term); double chisq = 0; gsl_multifit_linear(X, y, c, cov, &chisq, work); // Copy coefficients over to m_coeffs QVector<double> coeffs(term); for ( int i = 0; i < term; ++i ) coeffs[i] = gsl_vector_get(c, i); // Clear the variables gsl_matrix_free(X); gsl_vector_free(y); gsl_vector_free(c); gsl_matrix_free(cov); gsl_multifit_linear_free (work); return coeffs; }
/* * adapted from http://rosettacode.org/wiki/Polynomial_regression * * @return m_coeffs[0] + m_coeffs[1]*x + m_coeffs[2]*x^2 + ... * */ std::vector<double> polynomialfit(int degree, std::vector<double> dx, std::vector<double> dy) { std::vector<double> store(degree); gsl_multifit_linear_workspace *ws; gsl_matrix *cov, *X; gsl_vector *y, *c; double chisq; int i, j; if (dx.size() != dx.size()) throw; int obs = dx.size(); // number of values X = gsl_matrix_alloc(obs, degree); y = gsl_vector_alloc(obs); c = gsl_vector_alloc(degree); cov = gsl_matrix_alloc(degree, degree); for (i = 0; i < obs; i++) { gsl_matrix_set(X, i, 0, 1.0); for (j = 0; j < degree; j++) { gsl_matrix_set(X, i, j, pow(dx[i], j)); } gsl_vector_set(y, i, dy[i]); } ws = gsl_multifit_linear_alloc(obs, degree); gsl_multifit_linear(X, y, c, cov, &chisq, ws); /* store result ... */ for (i = 0; i < degree; i++) { store[i] = gsl_vector_get(c, i); } gsl_multifit_linear_free(ws); gsl_matrix_free(X); gsl_matrix_free(cov); gsl_vector_free(y); gsl_vector_free(c); return store; }
QVector<Quadric2D> processaQUAD() { gsl_matrix * A = gsl_matrix_alloc (n, 5); for (int i = 0; i < n; ++i) { gsl_matrix_set (A, i, 0, points[0][i]*points[0][i]); //x^2 gsl_matrix_set (A, i, 1, 2.0*points[0][i]*points[1][i]); //2xy gsl_matrix_set (A, i, 2, 2.0*points[0][i] ); //2x gsl_matrix_set (A, i, 3, points[1][i]*points[1][i]); //y^2 gsl_matrix_set (A, i, 4, 2.0*points[1][i] ); //2y } gsl_matrix *cov = gsl_matrix_alloc (5, 5); gsl_vector * B = gsl_vector_alloc(n); gsl_vector * x = gsl_vector_alloc(5); gsl_vector_set_all(B, 1.0); real chisq = 0.0; gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (n,5); gsl_multifit_linear (A, B, x, cov, &chisq, work); gsl_multifit_linear_free (work); //qDebug() << chisq; QVector<Quadric2D> resp; float x1 = gsl_vector_get(x, 0); float x2 = gsl_vector_get(x, 1); float x3 = gsl_vector_get(x, 2); float x4 = gsl_vector_get(x, 3); float x5 = gsl_vector_get(x, 4); resp.push_back(Quadric2D(x1,x2,x3,x4,x5,-1.0)); resp.push_back(Quadric2D(x1,x2,x3,x4,x5,-1.0)); resp.push_back(Quadric2D(x1,x2,x3,x4,x5,-1.0)); gsl_matrix_free (A); gsl_vector_free (B); gsl_vector_free (x); gsl_matrix_free (cov); return resp; }
void linear_fit_quadratic(const std::vector<double> &Xin, const std::vector<double> &Yin, double &c_0, double &c_1, double &c_2) { assert (Xin.size() == Yin.size()); int i, n; double xi, yi, ei, chisq; gsl_matrix *X, *cov; gsl_vector *y, *c; n = Xin.size(); X = gsl_matrix_alloc (n, 3); y = gsl_vector_alloc (n); c = gsl_vector_alloc (3); cov = gsl_matrix_alloc (3, 3); for (i = 0; i < n; i++) { double xi = Xin[i], yi = Yin[i]; gsl_matrix_set (X, i, 0, 1.0); gsl_matrix_set (X, i, 1, xi); gsl_matrix_set (X, i, 2, xi*xi); gsl_vector_set (y, i, yi); } // do the actual fitting gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (n, 3); gsl_multifit_linear (X, y, c, cov, &chisq, work); gsl_multifit_linear_free (work); c_0 = gsl_vector_get(c,0); c_1 = gsl_vector_get(c,1); c_2 = gsl_vector_get(c,2); gsl_matrix_free (X); gsl_vector_free (y); gsl_vector_free (c); gsl_matrix_free (cov); }
void PolynomialFit::fit() { if (d_init_err) return; if (d_p > d_n){ QMessageBox::critical((ApplicationWindow *)parent(), tr("QtiPlot - Fit Error"), tr("You need at least %1 data points for this fit operation. Operation aborted!").arg(d_p)); return; } gsl_matrix *X = gsl_matrix_alloc (d_n, d_p); for (int i = 0; i <d_n; i++){ for (int j= 0; j < d_p; j++) gsl_matrix_set (X, i, j, pow(d_x[i],j)); } gsl_vector_view y = gsl_vector_view_array (d_y, d_n); gsl_vector_view w = gsl_vector_view_array (d_w, d_n); gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (d_n, d_p); if (d_weighting == NoWeighting) gsl_multifit_linear (X, &y.vector, d_param_init, covar, &chi_2, work); else gsl_multifit_wlinear (X, &w.vector, &y.vector, d_param_init, covar, &chi_2, work); for (int i = 0; i < d_p; i++) d_results[i] = gsl_vector_get(d_param_init, i); gsl_multifit_linear_free (work); gsl_matrix_free (X); generateFitCurve(); if (show_legend) showLegend(); ApplicationWindow *app = (ApplicationWindow *)parent(); if (app->writeFitResultsToLog) app->updateLog(logFitInfo(0, 0)); }
CAMLprim value ml_gsl_multifit_linear(value wo, value x, value y, value c, value cov, value ws) { double chisq; _DECLARE_MATRIX2(x,cov); _DECLARE_VECTOR2(y,c); _CONVERT_MATRIX2(x,cov); _CONVERT_VECTOR2(y,c); if(wo == Val_none) gsl_multifit_linear(&m_x, &v_y, &v_c, &m_cov, &chisq, MultifitWS_val(ws)); else { value w=Field(wo, 0); _DECLARE_VECTOR(w); _CONVERT_VECTOR(w); gsl_multifit_wlinear(&m_x, &v_w, &v_y, &v_c, &m_cov, &chisq, MultifitWS_val(ws)); } return copy_double(chisq); }
//Fits the maxima y and maxima x position to a parabola, returns result int sonar_quadfit::fit(gsl_vector* y, int* maxima_x, double &result) { //generate matricies X, y for(int k=0; k<n; k++) { temp=maxima_x[k]; gsl_matrix_set(X,k,1,temp); gsl_matrix_set(X,k,2,temp*temp); } //fit a parabola to the values status=gsl_multifit_linear(X, y, c, cov, chisq, workspace); //find the parabola center center=-gsl_vector_get(c,1)/(2*gsl_vector_get(c,2)); //Optional: implement an algorithm that will try to find the peak closest to the center. But it seems unnecessary for now, since it will work out just as well fir multiple data points result=center; return status; }
pure_expr* wrap_gsl_multifit_linear(gsl_matrix* X, gsl_matrix* y) { int i; double chisq; pure_expr *cx[X->size1]; double *p; gsl_vector* c = gsl_vector_alloc(X->size1); gsl_vector* yt = gsl_vector_alloc(X->size1); gsl_matrix_get_row(yt, y, 0); gsl_matrix* cov = gsl_matrix_alloc(X->size1, X->size2); gsl_multifit_linear_workspace* w; w = gsl_multifit_linear_alloc(X->size1, X->size2); gsl_multifit_linear(X, yt, c, cov, &chisq, w); gsl_multifit_linear_free(w); gsl_vector_free(yt); p = c->data; for (i = 0; i < X->size1; ++i) { cx[i] = pure_double(*p); ++p; } return pure_listl(3, pure_matrix_columnsv(X->size1, cx), pure_double_matrix(cov), pure_double(chisq)); }
void calibrator::calculateWeights(vector <ofPoint> eyePoints, vector <ofPoint> screenPoints){ int length = eyePoints.size(); int nTerms = 6; gsl_matrix * x = gsl_matrix_alloc(length,nTerms); gsl_vector * yx = gsl_vector_alloc(length); gsl_vector * yy = gsl_vector_alloc(length); gsl_vector * w = gsl_vector_alloc(nTerms); double * ptr; double * ptrScreenX; double * ptrScreenY; ptr = gsl_matrix_ptr(x,0,0); ptrScreenX = gsl_vector_ptr(yx,0); ptrScreenY = gsl_vector_ptr(yy,0); for (int i = 0; i < length; i++){ float xPosEye = eyePoints[i].x; float yPosEye = eyePoints[i].y; // Ax + Bx^2 + Cy + Dy^2 + Exy + Fx^3 + Gy^3 + H *ptr++ = xPosEye; *ptr++ = xPosEye*xPosEye; *ptr++ = yPosEye; *ptr++ = yPosEye*yPosEye; *ptr++ = xPosEye*yPosEye; //*ptr++ = xPosEye*xPosEye*xPosEye; //*ptr++ = yPosEye*yPosEye*yPosEye; *ptr++ = 1; *ptrScreenX++ = screenPoints[i].x; *ptrScreenY++ = screenPoints[i].y; } gsl_vector *cx = gsl_vector_calloc(nTerms); gsl_vector *cy = gsl_vector_calloc(nTerms); gsl_matrix *cov = gsl_matrix_calloc(nTerms, nTerms); double chisq; gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc(length, nTerms); int res = gsl_multifit_linear (x, yx, cx, cov, &chisq, work); int res2 = gsl_multifit_linear (x, yy, cy, cov, &chisq, work); printf("-------------------------------------------- \n"); double * xptr = gsl_vector_ptr(cx,0); double * yptr = gsl_vector_ptr(cy,0); for (int i = 0; i < nTerms; i++){ printf("cx %i = %f \n", i, xptr[i]); cxfit[i] = xptr[i]; } for (int i = 0; i < nTerms; i++){ printf("cy %i = %f \n", i, yptr[i]); cyfit[i] = yptr[i]; } bBeenFit = true; printf("-------------------------------------------- \n"); //return ; }
// ${LSHKIT_HOME}/tools/fitdata.cpp void fitdata_example() { const std::string data_file("./data/search_algorithm/lshkit/audio.data"); const unsigned N = 0; // number of points to use. const unsigned P = 50000; // number of pairs to sample. unsigned Q = 1000; // number of queries to sample. unsigned K = 100; // search for K nearest neighbors. const unsigned F = 10; // divide the sample to F folds. // load matrix. lshkit::Matrix<float> data(data_file); std::vector<unsigned> idx(data.getSize()); for (unsigned i = 0; i < idx.size(); ++i) idx[i] = i; random_shuffle(idx.begin(), idx.end()); if (N > 0 && N < data.getSize()) idx.resize(N); lshkit::metric::l2sqr<float> l2sqr(data.getDim()); lshkit::DefaultRng rng; boost::variate_generator<lshkit::DefaultRng &, lshkit::UniformUnsigned> gen(rng, lshkit::UniformUnsigned(0, idx.size()-1)); double gM = 0.0; double gG = 0.0; { // sample P pairs of points for (unsigned k = 0; k < P; ++k) { double dist, logdist; for (;;) { unsigned i = gen(); unsigned j = gen(); if (i == j) continue; dist = l2sqr(data[idx[i]], data[idx[j]]); logdist = std::log(dist); if (local::is_good_value(logdist)) break; } gM += dist; gG += logdist; } gM /= P; gG /= P; gG = std::exp(gG); } if (Q > idx.size()) Q = idx.size(); if (K > idx.size() - Q) K = idx.size() - Q; // sample query. std::vector<unsigned> qry(Q); lshkit::SampleQueries(&qry, idx.size(), rng); // do the queries. std::vector<lshkit::Topk<unsigned> > topks(Q); for (unsigned i = 0; i < Q; ++i) topks[i].reset(K); /* ... */ gsl_matrix *X = gsl_matrix_alloc(F * K, 3); gsl_vector *yM = gsl_vector_alloc(F * K); gsl_vector *yG = gsl_vector_alloc(F * K); gsl_vector *pM = gsl_vector_alloc(3); gsl_vector *pG = gsl_vector_alloc(3); gsl_matrix *cov = gsl_matrix_alloc(3,3); std::vector<double> M(K); std::vector<double> G(K); boost::progress_display progress(F, std::cerr); unsigned m = 0; for (unsigned l = 0; l < F; l++) { // Scan for (unsigned i = l; i< idx.size(); i += F) { for (unsigned j = 0; j < Q; j++) { int id = qry[j]; if (i != id) { float d = l2sqr(data[idx[id]], data[idx[i]]); if (local::is_good_value(std::log(double(d)))) topks[j] << lshkit::Topk<unsigned>::Element(i, d); } } } std::fill(M.begin(), M.end(), 0.0); std::fill(G.begin(), G.end(), 0.0); for (unsigned i = 0; i < Q; i++) { for (unsigned k = 0; k < K; k++) { M[k] += topks[i][k].dist; G[k] += std::log(topks[i][k].dist); } } for (unsigned k = 0; k < K; k++) { M[k] = std::log(M[k]/Q); G[k] /= Q; gsl_matrix_set(X, m, 0, 1.0); gsl_matrix_set(X, m, 1, std::log(double(data.getSize() * (l + 1)) / double(F))); gsl_matrix_set(X, m, 2, std::log(double(k + 1))); gsl_vector_set(yM, m, M[k]); gsl_vector_set(yG, m, G[k]); ++m; } ++progress; } gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc(F * K, 3); double chisq; gsl_multifit_linear(X, yM, pM, cov, &chisq, work); gsl_multifit_linear(X, yG, pG, cov, &chisq, work); std::cout << gM << '\t' << gG << std::endl; std::cout << gsl_vector_get(pM, 0) << '\t' << gsl_vector_get(pM, 1) << '\t' << gsl_vector_get(pM, 2) << std::endl; std::cout << gsl_vector_get(pG, 0) << '\t' << gsl_vector_get(pG, 1) << '\t' << gsl_vector_get(pG, 2) << std::endl; gsl_matrix_free(X); gsl_matrix_free(cov); gsl_vector_free(pM); gsl_vector_free(pG); gsl_vector_free(yM); gsl_vector_free(yG); }
void test_longley () { gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (longley_n, longley_p); gsl_multifit_robust_workspace * work_rob = gsl_multifit_robust_alloc (gsl_multifit_robust_ols, longley_n, longley_p); gsl_matrix_view X = gsl_matrix_view_array (longley_x, longley_n, longley_p); gsl_vector_view y = gsl_vector_view_array (longley_y, longley_n); gsl_vector * c = gsl_vector_alloc (longley_p); gsl_vector * r = gsl_vector_alloc (longley_n); gsl_matrix * cov = gsl_matrix_alloc (longley_p, longley_p); double chisq, chisq_res; double expected_c[7] = { -3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355 }; double expected_sd[7] = { 890420.383607373, 84.9149257747669, 0.334910077722432E-01, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212 } ; double expected_chisq = 836424.055505915; gsl_vector_view diag = gsl_matrix_diagonal (cov); gsl_vector_view exp_c = gsl_vector_view_array(expected_c, longley_p); gsl_vector_view exp_sd = gsl_vector_view_array(expected_sd, longley_p); /* test unweighted least squares */ gsl_multifit_linear (&X.matrix, &y.vector, c, cov, &chisq, work); gsl_multifit_linear_residuals(&X.matrix, &y.vector, c, r); gsl_blas_ddot(r, r, &chisq_res); test_longley_results("longley gsl_multifit_linear", c, &exp_c.vector, &diag.vector, &exp_sd.vector, chisq, chisq_res, expected_chisq); /* test robust least squares */ gsl_multifit_robust (&X.matrix, &y.vector, c, cov, work_rob); test_longley_results("longley gsl_multifit_robust", c, &exp_c.vector, &diag.vector, &exp_sd.vector, 1.0, 1.0, 1.0); /* test weighted least squares */ { size_t i, j; gsl_vector * w = gsl_vector_alloc (longley_n); double expected_cov[7][7] = { { 8531122.56783558, -166.727799925578, 0.261873708176346, 3.91188317230983, 1.1285582054705, -0.889550869422687, -4362.58709870581}, {-166.727799925578, 0.0775861253030891, -1.98725210399982e-05, -0.000247667096727256, -6.82911920718824e-05, 0.000136160797527761, 0.0775255245956248}, {0.261873708176346, -1.98725210399982e-05, 1.20690316701888e-08, 1.66429546772984e-07, 3.61843600487847e-08, -6.78805814483582e-08, -0.00013158719037715}, {3.91188317230983, -0.000247667096727256, 1.66429546772984e-07, 2.56665052544717e-06, 6.96541409215597e-07, -9.00858307771567e-07, -0.00197260370663974}, {1.1285582054705, -6.82911920718824e-05, 3.61843600487847e-08, 6.96541409215597e-07, 4.94032602583969e-07, -9.8469143760973e-08, -0.000576921112208274}, {-0.889550869422687, 0.000136160797527761, -6.78805814483582e-08, -9.00858307771567e-07, -9.8469143760973e-08, 5.49938542664952e-07, 0.000430074434198215}, {-4362.58709870581, 0.0775255245956248, -0.00013158719037715, -0.00197260370663974, -0.000576921112208274, 0.000430074434198215, 2.23229587481535 }} ; gsl_vector_set_all (w, 1.0); gsl_multifit_wlinear (&X.matrix, w, &y.vector, c, cov, &chisq, work); gsl_multifit_linear_residuals(&X.matrix, &y.vector, c, r); gsl_blas_ddot(r, r, &chisq_res); test_longley_results("longley gsl_multifit_wlinear", c, &exp_c.vector, NULL, NULL, chisq, chisq_res, expected_chisq); for (i = 0; i < longley_p; i++) { for (j = 0; j < longley_p; j++) { gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-7, "longley gsl_multifit_wlinear cov(%d,%d)", i, j) ; } } gsl_vector_free(w); } gsl_vector_free(c); gsl_vector_free(r); gsl_matrix_free(cov); gsl_multifit_linear_free (work); gsl_multifit_robust_free (work_rob); } /* test_longley() */
void bootstrap(double x[], double y[], double* result, int* b, int* B, int *n, int* d) { static gsl_rng *restrict r = NULL; if(r == NULL) { // First call to this function, setup RNG gsl_rng_env_setup(); r = gsl_rng_alloc(gsl_rng_mt19937); gsl_rng_set(r, time(NULL)); } //a stores the sampled indices int a[ *n ]; //allocate memory for the regression step gsl_matrix * pred = gsl_matrix_alloc ( *n, *d ); gsl_vector * resp = gsl_vector_alloc( *n ); gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc ( *n, *d ); gsl_vector* coef = gsl_vector_alloc ( *d ); gsl_matrix* cov = gsl_matrix_alloc ( *d, *d ); gsl_matrix * T_boot = gsl_matrix_alloc ( *B, *d ); double chisq; //create bootstrap samples for ( int i = 0; i < *B; i++ ) { //sample the indices samp_k_from_n( n, b, a, r); printf("dfdfdfd"); //transfer x to a matrix pred and y to a vector resp for ( int i = 0; i < *n; i++ ) { gsl_vector_set (resp, i, y[ a[i] ]); for (int j = 0; j < *d; j++) gsl_matrix_set (pred, i, j, x[ j + ( a[ i ] * (*d) ) ]); } //linera regression gsl_multifit_linear ( pred, resp, coef, cov, &chisq, work ); //pass the elements of coef to the ith row of T_boot gsl_matrix_set_row ( T_boot, i, coef ); } //compute the standard deviation of each coefficient accros the bootstrap repetitions for ( int j = 0; j < *d; j++){ result[ j ] = sqrt( gsl_stats_variance( gsl_matrix_ptr ( T_boot, 0, j ), 1, *B ) ); } //free the memory gsl_matrix_free (pred); gsl_vector_free(resp); gsl_multifit_linear_free ( work); gsl_vector_free (coef); //gsl_vector_free (w); gsl_matrix_free (cov); printf("\nI AM DONE\n\n"); }
void test_pontius () { size_t i, j; { gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (pontius_n, pontius_p); gsl_matrix * X = gsl_matrix_alloc (pontius_n, pontius_p); gsl_vector_view y = gsl_vector_view_array (pontius_y, pontius_n); gsl_vector * c = gsl_vector_alloc (pontius_p); gsl_vector * r = gsl_vector_alloc (pontius_n); gsl_matrix * cov = gsl_matrix_alloc (pontius_p, pontius_p); gsl_vector_view diag; double chisq; double expected_c[3] = { 0.673565789473684E-03, 0.732059160401003E-06, -0.316081871345029E-14}; double expected_sd[3] = { 0.107938612033077E-03, 0.157817399981659E-09, 0.486652849992036E-16 }; double expected_chisq = 0.155761768796992E-05; for (i = 0 ; i < pontius_n; i++) { for (j = 0; j < pontius_p; j++) { gsl_matrix_set(X, i, j, pow(pontius_x[i], j)); } } gsl_multifit_linear (X, &y.vector, c, cov, &chisq, work); gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "pontius gsl_fit_multilinear c0") ; gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "pontius gsl_fit_multilinear c1") ; gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "pontius gsl_fit_multilinear c2") ; diag = gsl_matrix_diagonal (cov); gsl_test_rel (gsl_vector_get(&diag.vector,0), pow(expected_sd[0],2.0), 1e-10, "pontius gsl_fit_multilinear cov00") ; gsl_test_rel (gsl_vector_get(&diag.vector,1), pow(expected_sd[1],2.0), 1e-10, "pontius gsl_fit_multilinear cov11") ; gsl_test_rel (gsl_vector_get(&diag.vector,2), pow(expected_sd[2],2.0), 1e-10, "pontius gsl_fit_multilinear cov22") ; gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_multilinear chisq") ; gsl_multifit_linear_residuals(X, &y.vector, c, r); gsl_blas_ddot(r, r, &chisq); gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_multilinear residuals") ; gsl_vector_free(c); gsl_vector_free(r); gsl_matrix_free(cov); gsl_matrix_free(X); gsl_multifit_linear_free (work); } { gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (pontius_n, pontius_p); gsl_matrix * X = gsl_matrix_alloc (pontius_n, pontius_p); gsl_vector_view y = gsl_vector_view_array (pontius_y, pontius_n); gsl_vector * w = gsl_vector_alloc (pontius_n); gsl_vector * c = gsl_vector_alloc (pontius_p); gsl_vector * r = gsl_vector_alloc (pontius_n); gsl_matrix * cov = gsl_matrix_alloc (pontius_p, pontius_p); double chisq; double expected_c[3] = { 0.673565789473684E-03, 0.732059160401003E-06, -0.316081871345029E-14}; double expected_chisq = 0.155761768796992E-05; double expected_cov[3][3] ={ {2.76754385964916e-01 , -3.59649122807024e-07, 9.74658869395731e-14}, {-3.59649122807024e-07, 5.91630591630603e-13, -1.77210703526497e-19}, {9.74658869395731e-14, -1.77210703526497e-19, 5.62573661988878e-26} }; for (i = 0 ; i < pontius_n; i++) { for (j = 0; j < pontius_p; j++) { gsl_matrix_set(X, i, j, pow(pontius_x[i], j)); } } gsl_vector_set_all (w, 1.0); gsl_multifit_wlinear (X, w, &y.vector, c, cov, &chisq, work); gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "pontius gsl_fit_multilinear c0") ; gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "pontius gsl_fit_multilinear c1") ; gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "pontius gsl_fit_multilinear c2") ; for (i = 0; i < pontius_p; i++) { for (j = 0; j < pontius_p; j++) { gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-10, "pontius gsl_fit_wmultilinear cov(%d,%d)", i, j) ; } } gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_wmultilinear chisq") ; gsl_multifit_linear_residuals(X, &y.vector, c, r); gsl_blas_ddot(r, r, &chisq); gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_wmultilinear residuals") ; gsl_vector_free(w); gsl_vector_free(c); gsl_vector_free(r); gsl_matrix_free(cov); gsl_matrix_free(X); gsl_multifit_linear_free (work); } }
int gsl_multifit_robust(const gsl_matrix * X, const gsl_vector * y, gsl_vector * c, gsl_matrix * cov, gsl_multifit_robust_workspace *w) { /* check matrix and vector sizes */ if (X->size1 != y->size) { GSL_ERROR ("number of observations in y does not match rows of matrix X", GSL_EBADLEN); } else if (X->size2 != c->size) { GSL_ERROR ("number of parameters c does not match columns of matrix X", GSL_EBADLEN); } else if (cov->size1 != cov->size2) { GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR); } else if (c->size != cov->size1) { GSL_ERROR ("number of parameters does not match size of covariance matrix", GSL_EBADLEN); } else if (X->size1 != w->n || X->size2 != w->p) { GSL_ERROR ("size of workspace does not match size of observation matrix", GSL_EBADLEN); } else { int s; double chisq; const double tol = GSL_SQRT_DBL_EPSILON; int converged = 0; size_t numit = 0; const size_t n = y->size; double sigy = gsl_stats_sd(y->data, y->stride, n); double sig_lower; size_t i; /* * if the initial fit is very good, then finding outliers by comparing * them to the residual standard deviation is difficult. Therefore we * set a lower bound on the standard deviation estimate that is a small * fraction of the standard deviation of the data values */ sig_lower = 1.0e-6 * sigy; if (sig_lower == 0.0) sig_lower = 1.0; /* compute initial estimates using ordinary least squares */ s = gsl_multifit_linear(X, y, c, cov, &chisq, w->multifit_p); if (s) return s; /* save Q S^{-1} of original matrix */ gsl_matrix_memcpy(w->QSI, w->multifit_p->QSI); gsl_vector_memcpy(w->D, w->multifit_p->D); /* compute statistical leverage of each data point */ s = gsl_linalg_SV_leverage(w->multifit_p->A, w->resfac); if (s) return s; /* correct residuals with factor 1 / sqrt(1 - h) */ for (i = 0; i < n; ++i) { double h = gsl_vector_get(w->resfac, i); if (h > 0.9999) h = 0.9999; gsl_vector_set(w->resfac, i, 1.0 / sqrt(1.0 - h)); } /* compute residuals from OLS fit r = y - X c */ s = gsl_multifit_linear_residuals(X, y, c, w->r); if (s) return s; /* compute estimate of sigma from ordinary least squares */ w->stats.sigma_ols = gsl_blas_dnrm2(w->r) / sqrt((double) w->stats.dof); while (!converged && ++numit <= w->maxiter) { double sig; /* adjust residuals by statistical leverage (see DuMouchel and O'Brien) */ s = gsl_vector_mul(w->r, w->resfac); if (s) return s; /* compute estimate of standard deviation using MAD */ sig = robust_madsigma(w->r, w); /* scale residuals by standard deviation and tuning parameter */ gsl_vector_scale(w->r, 1.0 / (GSL_MAX(sig, sig_lower) * w->tune)); /* compute weights using these residuals */ s = w->type->wfun(w->r, w->weights); if (s) return s; gsl_vector_memcpy(w->c_prev, c); /* solve weighted least squares with new weights */ s = gsl_multifit_wlinear(X, w->weights, y, c, cov, &chisq, w->multifit_p); if (s) return s; /* compute new residuals r = y - X c */ s = gsl_multifit_linear_residuals(X, y, c, w->r); if (s) return s; converged = robust_test_convergence(w->c_prev, c, tol); } /* compute final MAD sigma */ w->stats.sigma_mad = robust_madsigma(w->r, w); /* compute robust estimate of sigma */ w->stats.sigma_rob = robust_robsigma(w->r, w->stats.sigma_mad, w->tune, w); /* compute final estimate of sigma */ w->stats.sigma = robust_sigma(w->stats.sigma_ols, w->stats.sigma_rob, w); /* store number of iterations */ w->stats.numit = numit; { double dof = (double) w->stats.dof; double rnorm = w->stats.sigma * sqrt(dof); /* see DuMouchel, sec 4.2 */ double ss_err = rnorm * rnorm; double ss_tot = gsl_stats_tss(y->data, y->stride, n); /* compute R^2 */ w->stats.Rsq = 1.0 - ss_err / ss_tot; /* compute adjusted R^2 */ w->stats.adj_Rsq = 1.0 - (1.0 - w->stats.Rsq) * (n - 1.0) / dof; /* compute rmse */ w->stats.rmse = sqrt(ss_err / dof); /* store SSE */ w->stats.sse = ss_err; } /* calculate covariance matrix = sigma^2 (X^T X)^{-1} */ s = robust_covariance(w->stats.sigma, cov, w); if (s) return s; /* raise an error if not converged */ if (numit > w->maxiter) { GSL_ERROR("maximum iterations exceeded", GSL_EMAXITER); } return s; } } /* gsl_multifit_robust() */
/* Resuelve el sistema sobredimensioando (un sistema con más ecuaciones de las necesarias) dando una solución que minimize el error cometido. Quiere decir minimize la suma de los cuadrados del error cometido en cada punto y su correpondiente calculado a traves de la matriz solución */ void Module_DLT::linear_multiple_regression(double a_data[NUM_EQU*11], double b_data[NUM_EQU]){ int i,j,k,aux; double chisq; gsl_matrix *X, *cov; gsl_vector *y, *c; gsl_multifit_linear_workspace * work; X = gsl_matrix_alloc(NUM_EQU,11); y = gsl_vector_alloc(NUM_EQU); c = gsl_vector_alloc (11); cov = gsl_matrix_alloc (11,11); /* Prepramos la matriz de muestras NOTA: El sistema de ecuaciones a resolver contiene 11 incognitas en vez de 12. La 12 incognita es igual a alfa (constante), y hay que inicializarla sino La solucion siempre sera el vector nulo */ for (i=0; i<NUM_EQU; i++){ for (j=0; j<11; j++) { aux = i*11+j; gsl_matrix_set(X,i,j,a_data[aux]); } } /* Inicializamos el verctor de muestras */ for (k=0; k<NUM_EQU; k++) { gsl_vector_set(y,k,b_data[k]); } /* Inicializamos y resolvemos el sistema sobredimensioando */ work = gsl_multifit_linear_alloc (NUM_EQU,11); gsl_multifit_linear (X, y, c, cov, &chisq, work); gsl_multifit_linear_free (work); /** copiamos la solución */ for (i=0; i<11; i++) { solution_matrix[i] = gsl_vector_get(c,i); } solution_matrix[11] = 1; this->camera.rt11 = solution_matrix[0]; this->camera.rt12 = solution_matrix[1]; this->camera.rt13 = solution_matrix[2]; this->camera.rt14 = solution_matrix[3]; this->camera.rt21 = solution_matrix[4]; this->camera.rt22 = solution_matrix[5]; this->camera.rt23 = solution_matrix[6]; this->camera.rt24 = solution_matrix[7]; this->camera.rt31 = solution_matrix[8]; this->camera.rt32 = solution_matrix[9]; this->camera.rt33 = solution_matrix[10]; this->camera.rt34 = solution_matrix[11]; this->camera.rt41 = 0; this->camera.rt42 = 0; this->camera.rt43 = 0; this->camera.rt44 = 1; reverse_update_camera_matrix(&(this->camera)); }
void test_filip () { size_t i, j; { gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (filip_n, filip_p); gsl_matrix * X = gsl_matrix_alloc (filip_n, filip_p); gsl_vector_view y = gsl_vector_view_array (filip_y, filip_n); gsl_vector * c = gsl_vector_alloc (filip_p); gsl_matrix * cov = gsl_matrix_alloc (filip_p, filip_p); gsl_vector_view diag; double chisq; double expected_c[11] = { -1467.48961422980, -2772.17959193342, -2316.37108160893, -1127.97394098372, -354.478233703349, -75.1242017393757, -10.8753180355343, -1.06221498588947, -0.670191154593408E-01, -0.246781078275479E-02, -0.402962525080404E-04 }; double expected_sd[11] = { 298.084530995537, 559.779865474950, 466.477572127796, 227.204274477751, 71.6478660875927, 15.2897178747400, 2.23691159816033, 0.221624321934227, 0.142363763154724E-01, 0.535617408889821E-03, 0.896632837373868E-05 }; double expected_chisq = 0.795851382172941E-03; for (i = 0 ; i < filip_n; i++) { for (j = 0; j < filip_p; j++) { gsl_matrix_set(X, i, j, pow(filip_x[i], j)); } } gsl_multifit_linear (X, &y.vector, c, cov, &chisq, work); gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-7, "filip gsl_fit_multilinear c0") ; gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-7, "filip gsl_fit_multilinear c1") ; gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-7, "filip gsl_fit_multilinear c2") ; gsl_test_rel (gsl_vector_get(c,3), expected_c[3], 1e-7, "filip gsl_fit_multilinear c3") ; gsl_test_rel (gsl_vector_get(c,4), expected_c[4], 1e-7, "filip gsl_fit_multilinear c4") ; gsl_test_rel (gsl_vector_get(c,5), expected_c[5], 1e-7, "filip gsl_fit_multilinear c5") ; gsl_test_rel (gsl_vector_get(c,6), expected_c[6], 1e-7, "filip gsl_fit_multilinear c6") ; gsl_test_rel (gsl_vector_get(c,7), expected_c[7], 1e-7, "filip gsl_fit_multilinear c7") ; gsl_test_rel (gsl_vector_get(c,8), expected_c[8], 1e-7, "filip gsl_fit_multilinear c8") ; gsl_test_rel (gsl_vector_get(c,9), expected_c[9], 1e-7, "filip gsl_fit_multilinear c9") ; gsl_test_rel (gsl_vector_get(c,10), expected_c[10], 1e-7, "filip gsl_fit_multilinear c10") ; diag = gsl_matrix_diagonal (cov); gsl_test_rel (gsl_vector_get(&diag.vector,0), pow(expected_sd[0],2.0), 1e-6, "filip gsl_fit_multilinear cov00") ; gsl_test_rel (gsl_vector_get(&diag.vector,1), pow(expected_sd[1],2.0), 1e-6, "filip gsl_fit_multilinear cov11") ; gsl_test_rel (gsl_vector_get(&diag.vector,2), pow(expected_sd[2],2.0), 1e-6, "filip gsl_fit_multilinear cov22") ; gsl_test_rel (gsl_vector_get(&diag.vector,3), pow(expected_sd[3],2.0), 1e-6, "filip gsl_fit_multilinear cov33") ; gsl_test_rel (gsl_vector_get(&diag.vector,4), pow(expected_sd[4],2.0), 1e-6, "filip gsl_fit_multilinear cov44") ; gsl_test_rel (gsl_vector_get(&diag.vector,5), pow(expected_sd[5],2.0), 1e-6, "filip gsl_fit_multilinear cov55") ; gsl_test_rel (gsl_vector_get(&diag.vector,6), pow(expected_sd[6],2.0), 1e-6, "filip gsl_fit_multilinear cov66") ; gsl_test_rel (gsl_vector_get(&diag.vector,7), pow(expected_sd[7],2.0), 1e-6, "filip gsl_fit_multilinear cov77") ; gsl_test_rel (gsl_vector_get(&diag.vector,8), pow(expected_sd[8],2.0), 1e-6, "filip gsl_fit_multilinear cov88") ; gsl_test_rel (gsl_vector_get(&diag.vector,9), pow(expected_sd[9],2.0), 1e-6, "filip gsl_fit_multilinear cov99") ; gsl_test_rel (gsl_vector_get(&diag.vector,10), pow(expected_sd[10],2.0), 1e-6, "filip gsl_fit_multilinear cov1010") ; gsl_test_rel (chisq, expected_chisq, 1e-7, "filip gsl_fit_multilinear chisq") ; gsl_vector_free(c); gsl_matrix_free(cov); gsl_matrix_free(X); gsl_multifit_linear_free (work); } { gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (filip_n, filip_p); gsl_matrix * X = gsl_matrix_alloc (filip_n, filip_p); gsl_vector_view y = gsl_vector_view_array (filip_y, filip_n); gsl_vector * w = gsl_vector_alloc (filip_n); gsl_vector * c = gsl_vector_alloc (filip_p); gsl_matrix * cov = gsl_matrix_alloc (filip_p, filip_p); double chisq; double expected_c[11] = { -1467.48961422980, -2772.17959193342, -2316.37108160893, -1127.97394098372, -354.478233703349, -75.1242017393757, -10.8753180355343, -1.06221498588947, -0.670191154593408E-01, -0.246781078275479E-02, -0.402962525080404E-04 }; /* computed using GNU Calc */ double expected_cov[11][11] ={ { 7.9269341767252183262588583867942e9, 1.4880416622254098343441063389706e10, 1.2385811858111487905481427591107e10, 6.0210784406215266653697715794241e9, 1.8936652526181982747116667336389e9, 4.0274900618493109653998118587093e8, 5.8685468011819735806180092394606e7, 5.7873451475721689084330083708901e6, 3.6982719848703747920663262917032e5, 1.3834818802741350637527054170891e4, 2.301758578713219280719633494302e2 }, { 1.4880416622254098334697515488559e10, 2.7955091668548290835529555438088e10, 2.3286604504243362691678565997033e10, 1.132895006796272983689297219686e10, 3.5657281653312473123348357644683e9, 7.5893300392314445528176646366087e8, 1.1066654886143524811964131660002e8, 1.0921285448484575110763947787775e7, 6.9838139975394769253353547606971e5, 2.6143091775349597218939272614126e4, 4.3523386330348588614289505633539e2 }, { 1.2385811858111487890788272968677e10, 2.3286604504243362677757802422747e10, 1.9412787917766676553608636489674e10, 9.4516246492862131849077729250098e9, 2.9771226694709917550143152097252e9, 6.3413035086730038062129508949859e8, 9.2536164488309401636559552742339e7, 9.1386304643423333815338760248027e6, 5.8479478338916429826337004060941e5, 2.1905933113294737443808429764554e4, 3.6493161325305557266196635180155e2 }, { 6.0210784406215266545770691532365e9, 1.1328950067962729823273441573365e10, 9.4516246492862131792040001429636e9, 4.6053152992000107509329772255094e9, 1.4517147860312147098138030287038e9, 3.0944988323328589376402579060072e8, 4.5190223822292688669369522708712e7, 4.4660958693678497534529855690752e6, 2.8599340736122198213681258676423e5, 1.0720394998549386596165641244705e4, 1.7870937745661967319298031044424e2 }, { 1.8936652526181982701620450132636e9, 3.5657281653312473058825073094524e9, 2.9771226694709917514149924058297e9, 1.451714786031214708936087401632e9, 4.5796563896564815123074920050827e8, 9.7693972414561515534525103622773e7, 1.427717861635658545863942948444e7, 1.4120161287735817621354292900338e6, 9.0484361228623960006818614875557e4, 3.394106783764852373199087455398e3, 5.6617406468519495376287407526295e1 }, { 4.0274900618493109532650887473599e8, 7.589330039231444534478894935778e8, 6.3413035086730037947153564986653e8, 3.09449883233285893390542947998e8, 9.7693972414561515475770399055121e7, 2.0855726248311948992114244257719e7, 3.0501263034740400533872858749566e6, 3.0187475839310308153394428784224e5, 1.9358204633534233524477930175632e4, 7.2662989867560017077361942813911e2, 1.2129002231061036467607394277965e1 }, { 5.868546801181973559370854830868e7, 1.1066654886143524778548044386795e8, 9.2536164488309401413296494869777e7, 4.5190223822292688587853853162072e7, 1.4277178616356585441556046753562e7, 3.050126303474040051574715592746e6, 4.4639982579046340884744460329946e5, 4.4212093985989836047285007760238e4, 2.8371395028774486687625333589972e3, 1.0656694507620102300567296504381e2, 1.7799982046359973175080475654123e0 }, { 5.7873451475721688839974153925406e6, 1.0921285448484575071271480643397e7, 9.1386304643423333540728480344578e6, 4.4660958693678497427674903565664e6, 1.4120161287735817596182229182587e6, 3.0187475839310308117812257613082e5, 4.4212093985989836021482392757677e4, 4.3818874017028389517560906916315e3, 2.813828775753142855163154605027e2, 1.0576188138416671883232607188969e1, 1.7676976288918295012452853715408e-1 }, { 3.6982719848703747742568351456818e5, 6.9838139975394768959780068745979e5, 5.8479478338916429616547638954781e5, 2.8599340736122198128717796825489e5, 9.0484361228623959793493985226792e4, 1.9358204633534233490579641064343e4, 2.8371395028774486654873647731797e3, 2.8138287757531428535592907878017e2, 1.8081118503579798222896804627964e1, 6.8005074291434681866415478598732e-1, 1.1373581557749643543869665860719e-2 }, { 1.3834818802741350562839757244708e4, 2.614309177534959709397445440919e4, 2.1905933113294737352721470167247e4, 1.0720394998549386558251721913182e4, 3.3941067837648523632905604575131e3, 7.2662989867560016909534954790835e2, 1.0656694507620102282337905013451e2, 1.0576188138416671871337685672492e1, 6.8005074291434681828743281967838e-1, 2.5593857187900736057022477529078e-2, 4.2831487599116264442963102045936e-4 }, { 2.3017585787132192669801658674163e2, 4.3523386330348588381716460685124e2, 3.6493161325305557094116270974735e2, 1.7870937745661967246233792737255e2, 5.6617406468519495180024059284629e1, 1.2129002231061036433003571679329e1, 1.7799982046359973135014027410646e0, 1.7676976288918294983059118597214e-1, 1.137358155774964353146460100337e-2, 4.283148759911626442000316269063e-4, 7.172253875245080423800933453952e-6 } }; double expected_chisq = 0.795851382172941E-03; for (i = 0 ; i < filip_n; i++) { for (j = 0; j < filip_p; j++) { gsl_matrix_set(X, i, j, pow(filip_x[i], j)); } } gsl_vector_set_all (w, 1.0); gsl_multifit_wlinear (X, w, &y.vector, c, cov, &chisq, work); gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-7, "filip gsl_fit_multilinear c0") ; gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-7, "filip gsl_fit_multilinear c1") ; gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-7, "filip gsl_fit_multilinear c2") ; gsl_test_rel (gsl_vector_get(c,3), expected_c[3], 1e-7, "filip gsl_fit_multilinear c3") ; gsl_test_rel (gsl_vector_get(c,4), expected_c[4], 1e-7, "filip gsl_fit_multilinear c4") ; gsl_test_rel (gsl_vector_get(c,5), expected_c[5], 1e-7, "filip gsl_fit_multilinear c5") ; gsl_test_rel (gsl_vector_get(c,6), expected_c[6], 1e-7, "filip gsl_fit_multilinear c6") ; gsl_test_rel (gsl_vector_get(c,7), expected_c[7], 1e-7, "filip gsl_fit_multilinear c7") ; gsl_test_rel (gsl_vector_get(c,8), expected_c[8], 1e-7, "filip gsl_fit_multilinear c8") ; gsl_test_rel (gsl_vector_get(c,9), expected_c[9], 1e-7, "filip gsl_fit_multilinear c9") ; gsl_test_rel (gsl_vector_get(c,10), expected_c[10], 1e-7, "filip gsl_fit_multilinear c10") ; for (i = 0; i < filip_p; i++) { for (j = 0; j < filip_p; j++) { gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-6, "filip gsl_fit_wmultilinear cov(%d,%d)", i, j) ; } } gsl_test_rel (chisq, expected_chisq, 1e-7, "filip gsl_fit_multilinear chisq") ; gsl_vector_free(w); gsl_vector_free(c); gsl_matrix_free(cov); gsl_matrix_free(X); gsl_multifit_linear_free (work); } }
void calibrationManager::calculateWeights(vector <ofPoint> trackedPoints, vector <ofPoint> knownPoints){ int length = trackedPoints.size(); int nTerms = 6; gsl_matrix * x = gsl_matrix_alloc(length,nTerms); gsl_vector * yx = gsl_vector_alloc(length); gsl_vector * yy = gsl_vector_alloc(length); gsl_vector * w = gsl_vector_alloc(nTerms); double * ptr; double * ptrScreenX; double * ptrScreenY; ptr = gsl_matrix_ptr(x,0,0); ptrScreenX = gsl_vector_ptr(yx,0); ptrScreenY = gsl_vector_ptr(yy,0); for (int i = 0; i < length; i++){ float xPosEye = trackedPoints[i].x; float yPosEye = trackedPoints[i].y; // was -- Ax + Bx^2 + Cy + Dy^2 + Exy + Fx^3 + Gy^3 + H // now -- Ax + Bx^2 + Cy + Dy^2 + Exy + F *ptr++ = xPosEye; *ptr++ = xPosEye*xPosEye; *ptr++ = yPosEye; *ptr++ = yPosEye*yPosEye; *ptr++ = xPosEye*yPosEye; //*ptr++ = xPosEye*xPosEye*xPosEye; // the cubed term was too much, it seemed like. //*ptr++ = yPosEye*yPosEye*yPosEye; *ptr++ = 1; *ptrScreenX++ = knownPoints[i].x; *ptrScreenY++ = knownPoints[i].y; } gsl_vector *cx = gsl_vector_calloc(nTerms); gsl_vector *cy = gsl_vector_calloc(nTerms); gsl_matrix *cov = gsl_matrix_calloc(nTerms, nTerms); double chisq; gsl_multifit_linear_workspace *work = gsl_multifit_linear_alloc(length, nTerms); int res = gsl_multifit_linear (x, yx, cx, cov, &chisq, work); int res2 = gsl_multifit_linear (x, yy, cy, cov, &chisq, work); double * xptr = gsl_vector_ptr(cx,0); double * yptr = gsl_vector_ptr(cy,0); printf("-------------------------------------------- \n"); for (int i = 0; i < nTerms; i++){ printf("cx %i = %f \n", i, xptr[i]); cxfit[i] = xptr[i]; } for (int i = 0; i < nTerms; i++){ printf("cy %i = %f \n", i, yptr[i]); cyfit[i] = yptr[i]; } printf("-------------------------------------------- \n"); bBeenFit = true; //std::exit(0); //return ; }
int main (int argc, char **argv) { int i=0, j=0, n=0, nl=0, k=0, posi=0, posj=0, posk=0, ncol=0, nrow=0; double xi=0.0, yi=0.0, yy=0.0, ei=0.0, sumsq=0.0, med=0.0; gsl_matrix *X=NULL, *cov=NULL; gsl_vector *y=NULL, *w=NULL, *c=NULL; MRI_IMAGE *im = NULL; double *dar = NULL; gsl_multifit_linear_workspace *work=NULL; if (argc != 2) { fprintf (stderr,"usage: fitanje_1sign data > outfile\n"); exit (-1); } /* slower than specific code you had but more convenient. It allows you to use all the column and row selections we can do with filenames. Also, keeps you fron worrying about dimensions. The problem with your code was assuming you had 13 columns always That was not the case for stat5_fitcoef. OK, that was caused by a problem in the scripts. That is fixed, but I leave this change here anyway. */ fprintf(stderr,"Patience, reading %s... ", argv[1]); im = mri_read_double_1D (argv[1]); if (!im) { fprintf(stderr,"Error: Failed to read matrix data from %s\n", argv[1]); return(-1); } ncol = im->ny; nrow = im->nx; fprintf (stderr,"Have %d cols, %d rows\nNow fitting...", ncol, nrow); n = ncol-3; /* now just get the array and kill the rest */ dar = MRI_DOUBLE_PTR(im); /* make sure that pointer is set to NULL in im, or risk hell */ mri_clear_data_pointer(im) ; if (im) mri_free(im); im = NULL; /* now kill im */ X = gsl_matrix_alloc (n, 5); y = gsl_vector_alloc (n); c = gsl_vector_alloc (5); cov = gsl_matrix_alloc (5, 5); for (i = 0; i < n; i++) { xi = i+1; gsl_matrix_set (X, i, 0, 1.0); gsl_matrix_set (X, i, 1, xi); gsl_matrix_set (X, i, 2, xi*xi); gsl_matrix_set (X, i, 3, xi*xi*xi); gsl_matrix_set (X, i, 4, xi*xi*xi*xi); // printf ("%lg ",xi); } /*make header printf ("matrvola\n"); ZSS: By adding # to the text line, I made the output file be a .1D format */ fprintf(stdout, "#%s_0\t%s_1\t%s_2\t%s_3\t%s_4\n", argv[1],argv[1],argv[1],argv[1],argv[1]); // go by lines - signatures /* pre-allocate, I think this should be just fine, there should be no need to reinitialize work all the time */ work = gsl_multifit_linear_alloc (n, 5); for (nl=0; nl<nrow; ++nl) { posi = (int)dar[nl]; posj = (int)dar[nl+ nrow]; posk = (int)dar[nl+2*nrow]; for (k = 3; k < ncol; k++) { gsl_vector_set (y, k-3, dar[nl+k*nrow]); } gsl_multifit_linear (X, y, c, cov, &sumsq, work); /* printf ( "\n # best fit: Y = %g + %g X + %g X^2 +%g X^3 + %g X^4\n", C(0), C(1), C(2), C(3), C(4)); printf ("# sumsq = %g\n", sumsq); */ fprintf (stdout, "%11g\t%11g\t%11g\t%11g\t%11g\n", C(0), C(1), C(2), C(3), C(4)); /* printf ("# covariance matrix:\n"); printf ("[ %+.5e, %+.5e, %+.5e \n", COV(0,0), COV(0,1), COV(0,2)); printf (" %+.5e, %+.5e, %+.5e \n", COV(1,0), COV(1,1), COV(1,2)); printf (" %+.5e, %+.5e, %+.5e ]\n", COV(2,0), COV(2,1), COV(2,2)); printf ("# chisq = %g\n", chisq); */ } gsl_multifit_linear_free (work); work = NULL; free(dar); dar = NULL; /* done with input array */ gsl_vector_free (y); gsl_vector_free (c); gsl_matrix_free (cov); gsl_matrix_free (X); //gsl_vector_free (w); fprintf (stderr,"\n"); return 0; }
void test_longley () { size_t i, j; { gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (longley_n, longley_p); gsl_matrix_view X = gsl_matrix_view_array (longley_x, longley_n, longley_p); gsl_vector_view y = gsl_vector_view_array (longley_y, longley_n); gsl_vector * c = gsl_vector_alloc (longley_p); gsl_matrix * cov = gsl_matrix_alloc (longley_p, longley_p); gsl_vector_view diag; double chisq; double expected_c[7] = { -3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355 }; double expected_sd[7] = { 890420.383607373, 84.9149257747669, 0.334910077722432E-01, 0.488399681651699, 0.214274163161675, 0.226073200069370, 455.478499142212 } ; double expected_chisq = 836424.055505915; gsl_multifit_linear (&X.matrix, &y.vector, c, cov, &chisq, work); gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "longley gsl_fit_multilinear c0") ; gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "longley gsl_fit_multilinear c1") ; gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "longley gsl_fit_multilinear c2") ; gsl_test_rel (gsl_vector_get(c,3), expected_c[3], 1e-10, "longley gsl_fit_multilinear c3") ; gsl_test_rel (gsl_vector_get(c,4), expected_c[4], 1e-10, "longley gsl_fit_multilinear c4") ; gsl_test_rel (gsl_vector_get(c,5), expected_c[5], 1e-10, "longley gsl_fit_multilinear c5") ; gsl_test_rel (gsl_vector_get(c,6), expected_c[6], 1e-10, "longley gsl_fit_multilinear c6") ; diag = gsl_matrix_diagonal (cov); gsl_test_rel (gsl_vector_get(&diag.vector,0), pow(expected_sd[0],2.0), 1e-10, "longley gsl_fit_multilinear cov00") ; gsl_test_rel (gsl_vector_get(&diag.vector,1), pow(expected_sd[1],2.0), 1e-10, "longley gsl_fit_multilinear cov11") ; gsl_test_rel (gsl_vector_get(&diag.vector,2), pow(expected_sd[2],2.0), 1e-10, "longley gsl_fit_multilinear cov22") ; gsl_test_rel (gsl_vector_get(&diag.vector,3), pow(expected_sd[3],2.0), 1e-10, "longley gsl_fit_multilinear cov33") ; gsl_test_rel (gsl_vector_get(&diag.vector,4), pow(expected_sd[4],2.0), 1e-10, "longley gsl_fit_multilinear cov44") ; gsl_test_rel (gsl_vector_get(&diag.vector,5), pow(expected_sd[5],2.0), 1e-10, "longley gsl_fit_multilinear cov55") ; gsl_test_rel (gsl_vector_get(&diag.vector,6), pow(expected_sd[6],2.0), 1e-10, "longley gsl_fit_multilinear cov66") ; gsl_test_rel (chisq, expected_chisq, 1e-10, "longley gsl_fit_multilinear chisq") ; gsl_vector_free(c); gsl_matrix_free(cov); gsl_multifit_linear_free (work); } { gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (longley_n, longley_p); gsl_matrix_view X = gsl_matrix_view_array (longley_x, longley_n, longley_p); gsl_vector_view y = gsl_vector_view_array (longley_y, longley_n); gsl_vector * w = gsl_vector_alloc (longley_n); gsl_vector * c = gsl_vector_alloc (longley_p); gsl_matrix * cov = gsl_matrix_alloc (longley_p, longley_p); double chisq; double expected_c[7] = { -3482258.63459582, 15.0618722713733, -0.358191792925910E-01, -2.02022980381683, -1.03322686717359, -0.511041056535807E-01, 1829.15146461355 }; double expected_cov[7][7] = { { 8531122.56783558, -166.727799925578, 0.261873708176346, 3.91188317230983, 1.1285582054705, -0.889550869422687, -4362.58709870581}, {-166.727799925578, 0.0775861253030891, -1.98725210399982e-05, -0.000247667096727256, -6.82911920718824e-05, 0.000136160797527761, 0.0775255245956248}, {0.261873708176346, -1.98725210399982e-05, 1.20690316701888e-08, 1.66429546772984e-07, 3.61843600487847e-08, -6.78805814483582e-08, -0.00013158719037715}, {3.91188317230983, -0.000247667096727256, 1.66429546772984e-07, 2.56665052544717e-06, 6.96541409215597e-07, -9.00858307771567e-07, -0.00197260370663974}, {1.1285582054705, -6.82911920718824e-05, 3.61843600487847e-08, 6.96541409215597e-07, 4.94032602583969e-07, -9.8469143760973e-08, -0.000576921112208274}, {-0.889550869422687, 0.000136160797527761, -6.78805814483582e-08, -9.00858307771567e-07, -9.8469143760973e-08, 5.49938542664952e-07, 0.000430074434198215}, {-4362.58709870581, 0.0775255245956248, -0.00013158719037715, -0.00197260370663974, -0.000576921112208274, 0.000430074434198215, 2.23229587481535 }} ; double expected_chisq = 836424.055505915; gsl_vector_set_all (w, 1.0); gsl_multifit_wlinear (&X.matrix, w, &y.vector, c, cov, &chisq, work); gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "longley gsl_fit_wmultilinear c0") ; gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "longley gsl_fit_wmultilinear c1") ; gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "longley gsl_fit_wmultilinear c2") ; gsl_test_rel (gsl_vector_get(c,3), expected_c[3], 1e-10, "longley gsl_fit_wmultilinear c3") ; gsl_test_rel (gsl_vector_get(c,4), expected_c[4], 1e-10, "longley gsl_fit_wmultilinear c4") ; gsl_test_rel (gsl_vector_get(c,5), expected_c[5], 1e-10, "longley gsl_fit_wmultilinear c5") ; gsl_test_rel (gsl_vector_get(c,6), expected_c[6], 1e-10, "longley gsl_fit_wmultilinear c6") ; for (i = 0; i < longley_p; i++) { for (j = 0; j < longley_p; j++) { gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-7, "longley gsl_fit_wmultilinear cov(%d,%d)", i, j) ; } } gsl_test_rel (chisq, expected_chisq, 1e-10, "longley gsl_fit_wmultilinear chisq") ; gsl_vector_free(w); gsl_vector_free(c); gsl_matrix_free(cov); gsl_multifit_linear_free (work); } }
int main(int argc, char* argv[]) { // parameters that you can set. string delim = "\t "; string chipFile = ""; vector<string> ctrlFiles; string outFile = ""; int readLen = 50; int chunkSize = 100000; int windowSize = 5; int interval = 5; bool talk = false; string errorLine = "usage " + string(argv[0]) + " [Parameters]\n" + "\t-i <infile, BED-formated file containing the ChIP-reads, sorted on chromosome and position.>\n" + "\t-c <space\\tab separataed list of infile(s), BED-formated file(s) \n" + "\t containing the control-reads (e.g. Input/IgG et cetera), sorted as the file given in '-i' \n" + "\t-o <outfile, BED-formated file of resulting reads after normalization, \n" + "\t with read lengths as defined by -l>\n" + "\t-rl <read length, defaults to 50 >\n" + "\t-cs <chunk size, number of bp considered at a time when building the model>\n" + "\t-ws <window size, at every point used to build the model a window of +/- \n" + "\t this size is averaged to create an observed data point.>\n" + "\t-iv <interval, the step size determining the distance between points \n" + "\t used as observations in the regression model.>\n" + "\t-v <set verbose>\n" + "example: \n" + string(argv[0]) + " -i myreads.bed -c input.bed igg.bed noise.bed -o normalized.bed -rl 50 -cs 100000 -ws 5 -iv 5 \n" ; bool fail = false; bool ctrlfiles = false; string failmessage = ""; for (int i=1;i<argc;i++) { if(strcmp(argv[i],"-i") == 0) { chipFile.assign(argv[++i]); ctrlfiles = false; } else if(strcmp(argv[i],"-o") == 0) { outFile.assign(argv[++i]); ctrlfiles = false; } else if(strcmp(argv[i],"-c") == 0) { ctrlfiles = true; } else if(strcmp(argv[i],"-rl") == 0) { readLen = atoi(argv[++i]); ctrlfiles = false; } else if(strcmp(argv[i],"-cs") == 0) { chunkSize = atoi(argv[++i]); ctrlfiles = false; } else if(strcmp(argv[i],"-ws") == 0) { windowSize = atoi(argv[++i]); ctrlfiles = false; } else if(strcmp(argv[i],"-iv") == 0) { interval = atoi(argv[++i]); ctrlfiles = false; } else if(strcmp(argv[i],"-v") == 0) { talk = true; ctrlfiles = false; } else { if(ctrlfiles) // assume that all things not parsable after -c are control files. Check for existance/readability below. { ctrlFiles.push_back(argv[i]); } else { failmessage.assign("Unknown argument: "); failmessage.append(argv[i]); failmessage.append("\n"); fail = true; } } } // Check infile and readability. if(chipFile == "") { failmessage.append("infile (-i) must be specified.\n"); fail = true; } ifstream inf; inf.open(chipFile.c_str()); if(!inf) { failmessage.append("Could not open infile '" + chipFile + "' (does the file exist?)\n"); fail = true; } // Check control files. if(ctrlFiles.size() < 1) { failmessage.append("at least one control file (-c) must be specified.\n"); fail = true; } ifstream infc[ctrlFiles.size()]; if(!fail) for (int i = 0;i<ctrlFiles.size();i++) { infc[i].open(ctrlFiles[i].c_str()); if(!infc[i]) { failmessage.append("Could not open ctrlfile '" + ctrlFiles[i] + "' (does the file exist?)\n"); fail = true; } } // Check outfile and readability. if(outFile == "") { failmessage.append("outfile (-o) must be specified.\n"); fail = true; } ofstream outf; if(!fail) outf.open(outFile.c_str(),ios::trunc); if(!outf) { failmessage.append("Could not open outfile '" + outFile + "' (do we have permission ?)\n"); fail = true; } // are we ok so far? if (fail) { cerr << endl << failmessage.c_str() << endl << errorLine << endl; //try and close any opened files inf.close(); for (int i = 0;i<ctrlFiles.size();i++) infc[i].close(); outf.close(); return(1); } /* * Get some initial parameters. */ map <string,seqStats> seqMapChip; map <string,seqStats> *seqMapCtrls; seqMapCtrls = new map<string,seqStats>[ctrlFiles.size()]; map <string,seqStats>::iterator it; map <int,int*>::iterator valIt; // Read the reference sequences and the range of each file cout<<"Reading BED-files."<<endl; int nlinesChIP, nlinesCtrl=0; cout<<"ChIP file.."<<endl; nlinesChIP = initControlBEDlite(&inf,&seqMapChip,0,1,2,5,true); cout<<"Control file(s) .."<<endl; for (int i = 0;i<ctrlFiles.size();i++) nlinesCtrl += initControlBEDlite(&infc[i],&seqMapCtrls[i],0,1,2,5,true); cout<<"ChIP-data consists of "<<nlinesChIP<<" mapped fragments."<<endl; cout<<"Control-data consists of "<<nlinesCtrl<<" mapped fragments."<<endl; // print some stats. cout <<"ChIP Read Statistics::"<<endl; cout <<setw(10)<<"Name\t"<<setw(10)<<"minCrd\t"<<setw(10)<<"maxCrd\t"<<setw(10)<<"F_counts\t"<<setw(10)<<"R_counts\t"<<endl; for ( it=seqMapChip.begin() ; it != seqMapChip.end(); it++ ) { cout <<setw(10)<< (*it).first << "\t" <<setw(10)<< (*it).second.minPos << "\t" << setw(10)<<(*it).second.maxPos<<"\t"; cout <<setw(10)<< (*it).second.countF << "\t" <<setw(10)<< (*it).second.countR<<endl; } cout <<"Control Statistics::"<<endl; for (int i = 0;i<ctrlFiles.size();i++) { cout<<ctrlFiles[i]<<endl; cout <<setw(10)<<"Name\t"<<setw(10)<<"minCrd\t"<<setw(10)<<"maxCrd\t"<<setw(10)<<"F_counts\t"<<setw(10)<<"R_counts\t"<<endl; for ( it=seqMapCtrls[i].begin() ; it != seqMapCtrls[i].end(); it++ ) { cout <<setw(10)<< (*it).first << "\t" <<setw(10)<< (*it).second.minPos << "\t" << setw(10)<<(*it).second.maxPos<<"\t"; cout <<setw(10)<< (*it).second.countF << "\t" <<setw(10)<< (*it).second.countR<<endl; } } cout<<"Processing reads in chunks of "<<chunkSize<<" bp."<<endl; int lowPos,highPos; int chunkRange; int chunkObs; int obsCount; int *winSumF = new int[ctrlFiles.size()+1]; int *winSumR = new int[ctrlFiles.size()+1]; double chisqF,chisqR; gsl_matrix *XF, *covF,*XR, *covR; gsl_vector *yF,*cF,*rF,*yR,*cR,*rR; double *resiF,*resiR; int *cntF,*cntR; int posOff; // initialize. string line; inputLine chipLine; inputLine *ctrlLines; ctrlLines = new inputLine[ctrlFiles.size()]; // read first line from each file. check position and chr. assume that the files are ordered inside chr. i.e don't loop // over chr by seqMap but over info in the files. retrieve min/max position from the seqMap depending on file contents. // also assume that the chr ordering is the same in chip & control files. getline(inf,line); parseBEDline(line,&chipLine,0,1,2,5); for(int i=0;i<ctrlFiles.size();i++) { getline(infc[i],line); parseBEDline(line,&ctrlLines[i],0,1,2,5); } // initialize with the "first" chromosome and its min/max pos. string currChr = chipLine.seq; int chrMinPos,chrMaxPos; int chrMinPosCtrl,chrMaxPosCtrl; int currLine = 1; int memNeeded; int chrPosChip,chrPosCtrl; int ctrlIndex; // tmp. storage for the chip/control signals. unsigned short *chipF,*chipR,*ctrlF,*ctrlR; // introduce curr pos, curr Chr etc. and a loop on !EOF in the chip file. // no point in normalizing where there are no signals in chip... while(currLine <= nlinesChIP) { chrMinPos = seqMapChip.find(currChr)->second.minPos; chrMaxPos = seqMapChip.find(currChr)->second.maxPos;; if(talk) cout<<"ChIP: "<<chipLine.seq<<" "<<chrMinPos<<" "<<chrMaxPos<<endl; chrPosChip = chrMaxPos - chrMinPos +1; // check the min/max for this chr in ctrl-data chrMinPosCtrl = INT_MAX; chrMaxPosCtrl = -1; for(int i = 0;i<ctrlFiles.size();i++) { if(seqMapCtrls[i].count(currChr)) { chrMinPosCtrl = min(chrMinPosCtrl,seqMapCtrls[i][currChr].minPos); chrMaxPosCtrl = max(chrMaxPosCtrl,seqMapCtrls[i][currChr].maxPos); } } if(talk) cout<<"Control: "<<chipLine.seq<<" "<<chrMinPosCtrl<<" "<<chrMaxPosCtrl<<endl; chrPosCtrl = chrMaxPosCtrl - chrMinPosCtrl +1; memNeeded = sizeof(unsigned short)*(chrPosChip + ctrlFiles.size()*chrPosCtrl); // allocate memory to hold the entire chromosome, do the regression in chunks. try{ cout<<"Trying to allocate: "; if(memNeeded > 1000000000) cout<<memNeeded/1000000000<<" Gb for "<<currChr<<"."; else if (memNeeded > 1000000) cout<<memNeeded/1000000<<" Mb for "<<currChr<<"."; else if (memNeeded > 1000) cout<<memNeeded/1000<<" kb for "<<currChr<<"."; else cout<<memNeeded<<" bytes for raw signals"<<currChr<<"."; chipF = new unsigned short[chrPosChip]; chipR = new unsigned short[chrPosChip]; ctrlF = new unsigned short[chrPosCtrl*ctrlFiles.size()]; // these will need to be accessed in a "[i + chrPosCtrl*j]"-type of fashion. ctrlR = new unsigned short[chrPosCtrl*ctrlFiles.size()]; cout<<" Done."<<endl; }catch (std::bad_alloc &f){ cerr<<string(argv[0])<<" couldn't allocate as much memory as it wanted. Failure: '"<<f.what()<<endl; // close files. inf.close(); for (int i = 0;i<ctrlFiles.size();i++) infc[i].close(); outf.close(); delete[] chipF; delete[] chipR; delete[] ctrlF; delete[] ctrlR; delete[] resiF; delete[] resiR; delete[] cntF; delete[] cntR; return(-1); } // make sure it's all zeroes. for(int i=0;i<chrPosChip;i++) { chipF[i] = 0; chipR[i] = 0; } for(int i=0;i<chrPosCtrl;i++) for(int j = 0;j<ctrlFiles.size();j++) { ctrlF[i + j*chrPosCtrl] = 0; ctrlR[i + j*chrPosCtrl] = 0; } // read in the sought chip-data while((chipLine.seq == currChr) && !(inf.eof())) // chip-file { //cout<<chipLine.seq<<"\t"<<line<<endl; // update previous line's data. if(chipLine.strand == 1) chipF[chipLine.pos-chrMinPos]++; else chipR[chipLine.pos-chrMinPos+chipLine.len]++; // read in the nextline. getline(inf,line); parseBEDline(line,&chipLine,0,1,2,5); currLine++; } if((chipLine.seq == currChr) && (inf.eof())) // chip-file, last read, ok chr, use. { //cout<<"Last line of the ChipFIle"<<endl; //cout<<chipLine.seq<<"\t"<<line<<endl; if(chipLine.strand == 1) chipF[chipLine.pos-chrMinPos]++; else chipR[chipLine.pos-chrMinPos+chipLine.len-1]++; } // read in the sought ctrl-data for(int i = 0;i<ctrlFiles.size();i++) { // is there data at all for this chr in this control file? if(seqMapCtrls[i].count(currChr) == 1) { // we're assuming that the chromosomes are in the same order in the chip & ctrl files. // cases: // chr on current line is not the same as in chip // => we know that we should have chr data on this chr & that chrs comes in the same order. this can prob. only // happen for a chr-specific chromosome, e.g. its safe to read past and check again. // chr on current line is the same as in chip // => this is good. last time (either preFirst or not) should have read prev. chr completely. so just start reading until we hit // another chr. // if(ctrlLines[i].seq != currChr) { // read past th "wrong" chromosome(s). while(!infc[i].eof() && ctrlLines[i].seq != currChr) { getline(infc[i],line); parseBEDline(line,&ctrlLines[i],0,1,2,5); } } // now we have the first line of the the correct chr in 'ctrlLines[i]' // Read in the complete chr and store the data accordingly. while(!infc[i].eof() && ctrlLines[i].seq == currChr) { if(ctrlLines[i].strand == 1) ctrlF[ctrlLines[i].pos-chrMinPosCtrl + i*chrPosCtrl]++; else ctrlR[ctrlLines[i].pos-chrMinPosCtrl+ctrlLines[i].len + i*chrPosCtrl-1]++; getline(infc[i],line); parseBEDline(line,&ctrlLines[i],0,1,2,5); } } } cout<<"Analysing "<<currChr<<endl; currChr = chipLine.seq; // store "next" chromosome // now all data for this chr is read. Start analysing in chunks. lowPos = chrMinPos; while(lowPos < chrMaxPos) // loop over this chromosome data in chunks. { if(!talk) { cout<<lowPos<<" of "<<chrMaxPos<<"\r"; } highPos = lowPos + chunkSize-1; if(highPos >= (chrMaxPos - 0.5*chunkSize)) // less than 0.8 of a chunk left. merge. highPos = chrMaxPos; chunkRange = highPos - lowPos + 1; if(talk) cout<<"["<<lowPos<<","<<highPos<<"]\tsize: "<<chunkRange<<endl; resiF = new double[chunkRange]; resiR = new double[chunkRange]; cntF = new int[chunkRange]; cntR = new int[chunkRange]; for(int i=0;i<chunkRange;i++) { resiF[i] = 0.0; resiR[i] = 0.0; cntF[i] = 0; cntR[i] = 0; } // for each chunk, step forward in 'interval' steps and average signals in that window. chunkObs = (chunkRange-2*windowSize)/interval + 1; if (talk) cout<<"\tsampling this chunk at "<<chunkObs<<" positions."<<endl; // Storage for the signals on '+' XF = gsl_matrix_alloc (chunkObs, ctrlFiles.size()); yF = gsl_vector_alloc (chunkObs); rF = gsl_vector_alloc (chunkObs); cF = gsl_vector_alloc (ctrlFiles.size()); covF = gsl_matrix_alloc (ctrlFiles.size(), ctrlFiles.size()); // Storage for the signals on '-' XR = gsl_matrix_alloc (chunkObs, ctrlFiles.size()); yR = gsl_vector_alloc (chunkObs); rR = gsl_vector_alloc (chunkObs); cR = gsl_vector_alloc (ctrlFiles.size()); covR = gsl_matrix_alloc (ctrlFiles.size(), ctrlFiles.size()); // loop over the signals in interval steps and average in +/- windowSize. fill in the matrices. obsCount = 0; for (int i = lowPos+windowSize;i<(highPos-windowSize);) { // collect sums over each signal in the sough window for (int j = 0;j < ctrlFiles.size() + 1;j++) { winSumF[j] = 0; winSumR[j] = 0; } for (int j = -windowSize;j<=windowSize;j++) { winSumF[0] += chipF[i - chrMinPos + j]; winSumR[0] += chipR[i - chrMinPos + j]; for(int k = 0;k<ctrlFiles.size();k++) { ctrlIndex = i - chrMinPosCtrl + j + k*chrPosCtrl; if(ctrlIndex >= 0 && ctrlIndex <chrPosCtrl) // is there ctrl data for this position? { winSumF[1+k] += ctrlF[ctrlIndex]; winSumR[1+k] += ctrlR[ctrlIndex]; } } } // the chip signal gsl_vector_set (yF, obsCount, (double)winSumF[0]/(double)(2*windowSize+1)); gsl_vector_set (yR, obsCount, (double)winSumR[0]/(double)(2*windowSize+1)); // the control signals for (int j = 0;j < ctrlFiles.size();j++) { gsl_matrix_set (XF, obsCount, j, (double)winSumF[j+1]/(double)(2*windowSize+1)); gsl_matrix_set (XR, obsCount, j, (double)winSumR[j+1]/(double)(2*windowSize+1)); } obsCount++; i+=interval; } // fit the models. gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (chunkObs, ctrlFiles.size()); /* * '+' Strand */ gsl_multifit_linear (XF, yF, cF, covF,&chisqF, work); if(talk) { cout<<"\t'+' chisq: "<<chisqF<<"\t"<<"c's:"; for (int j = 0;j < ctrlFiles.size();j++) cout<<gsl_vector_get(cF,j)<<" "; cout<<endl; } /* * '-' Strand */ gsl_multifit_linear (XR, yR, cR, covR,&chisqR, work); if(talk) { cout<<"\t'-' chisq: "<<chisqR<<"\t"<<"c's:"; for (int j = 0;j < ctrlFiles.size();j++) cout<<gsl_vector_get(cR,j)<<" "; cout<<endl; } gsl_multifit_linear_free (work); // calculate residuals. if(talk) cout<<"\tCaclulating residuals.."; gsl_multifit_linear_residuals (XF,yF,cF,rF); gsl_multifit_linear_residuals (XR,yR,cR,rR); if(talk) cout<<"done."<<endl<<"\tRebuilding signal.."; // rebuild a per-bp-signal for (int i=0;i<chunkObs;i++) { // center of this observation. posOff = 1+(i+1)*interval; if(posOff > chunkRange) // outside of our chunk, should never happen. continue; if(gsl_vector_get(rF,i) > 0.5) // original R-code used 'round' on the residuals, ceiling(x-0.5) does the same thing for (int j = -windowSize;j<=windowSize;j++) { resiF[j+posOff] += ceil(gsl_vector_get(rF,i)-0.5); cntF[j+posOff] += 1; } if(gsl_vector_get(rR,i) > 0.5) for (int j = -windowSize;j<=windowSize;j++) { resiR[j+posOff] += ceil(gsl_vector_get(rR,i)-0.5); cntR[j+posOff] += 1; } } if(talk) cout<<"done."<<endl<<"\tWriting output.."; for (int i=0;i<chunkRange;i++) { if(cntF[i] > 0) { resiF[i] = resiF[i]/(double)cntF[i]; if(resiF[i] > 0) { for(int j=0;j<ceil(resiF[i]);j++) { outf<<currChr<<"\t"<<lowPos + i-1<<"\t"<<lowPos+i+readLen-2<<"\tDUMMY\t"; // bed is zero-based, halfopen (ie.-1/-2) outf<<resiF[i]<<"\t+"<<endl; } } } if(cntR[i] > 0) { resiR[i] = resiR[i]/(double)cntR[i]; if(resiR[i] > 0) { for(int j=0;j<ceil(resiR[i]);j++) { outf<<currChr<<"\t"<<lowPos + i-readLen-2<<"\t"<<lowPos+i-1<<"\tDUMMY\t"; outf<<resiR[i]<<"\t-"<<endl; } } } } if(talk) cout<<"done."<<endl; lowPos = highPos+1; delete[] resiF; delete[] resiR; delete[] cntF; delete[] cntR; gsl_matrix_free(XF); gsl_vector_free(yF); gsl_vector_free(rF); gsl_vector_free(cF); gsl_matrix_free(covF); gsl_matrix_free(XR); gsl_vector_free(yR); gsl_vector_free(rR); gsl_vector_free(cR); gsl_matrix_free(covR); } if(!talk) cout<<endl; } // close files. inf.close(); for (int i = 0;i<ctrlFiles.size();i++) infc[i].close(); outf.close(); string statFname = "readStats.txt"; bool writeStats = true; ofstream ofc; ofc.open(statFname.c_str(),ios::trunc); if (ofc.fail()) { failmessage.clear(); failmessage.append("ERROR: Output file \""); failmessage.append(statFname.c_str()); failmessage.append("\" could not be created, skipping.\n"); writeStats = false; } if(writeStats) { ofc <<"Chip reads"<<endl<<"Name\t"<<"minCrd\t"<<"maxCrd\t"<<"F_counts\t"<<"R_counts\t"<<endl; for ( it=seqMapChip.begin() ; it != seqMapChip.end(); it++ ) { ofc << (*it).first << "\t" << (*it).second.minPos << "\t" << (*it).second.maxPos<<"\t"; ofc << (*it).second.countF << "\t" << (*it).second.countR; ofc <<endl; } for (int i = 0;i<ctrlFiles.size();i++) { ofc<<ctrlFiles[i]<<endl; ofc <<"Control reads"<<endl<<"Name\t"<<"minCrd\t"<<"maxCrd\t"<<"F_counts\t"<<"R_counts\t"<<endl; for ( it=seqMapCtrls[i].begin() ; it != seqMapCtrls[i].end(); it++ ) { ofc << (*it).first << "\t" << (*it).second.minPos << "\t" << (*it).second.maxPos<<"\t"; ofc << (*it).second.countF << "\t" << (*it).second.countR; ofc <<endl; } } }else{ cerr<<failmessage.c_str()<<endl; } ofc.close(); return(0); }
void test_pontius () { size_t i, j; gsl_multifit_linear_workspace * work = gsl_multifit_linear_alloc (pontius_n, pontius_p); gsl_multifit_robust_workspace * work_rob = gsl_multifit_robust_alloc (gsl_multifit_robust_ols, pontius_n, pontius_p); gsl_matrix * X = gsl_matrix_alloc (pontius_n, pontius_p); gsl_vector_view y = gsl_vector_view_array (pontius_y, pontius_n); gsl_vector * c = gsl_vector_alloc (pontius_p); gsl_vector * r = gsl_vector_alloc (pontius_n); gsl_matrix * cov = gsl_matrix_alloc (pontius_p, pontius_p); double chisq, chisq_res; double expected_c[3] = { 0.673565789473684E-03, 0.732059160401003E-06, -0.316081871345029E-14}; double expected_sd[3] = { 0.107938612033077E-03, 0.157817399981659E-09, 0.486652849992036E-16 }; double expected_chisq = 0.155761768796992E-05; gsl_vector_view diag = gsl_matrix_diagonal (cov); gsl_vector_view exp_c = gsl_vector_view_array(expected_c, pontius_p); gsl_vector_view exp_sd = gsl_vector_view_array(expected_sd, pontius_p); for (i = 0 ; i < pontius_n; i++) { for (j = 0; j < pontius_p; j++) { gsl_matrix_set(X, i, j, pow(pontius_x[i], j)); } } /* test unweighted least squares */ gsl_multifit_linear (X, &y.vector, c, cov, &chisq, work); gsl_multifit_linear_residuals(X, &y.vector, c, r); gsl_blas_ddot(r, r, &chisq_res); test_pontius_results("pontius gsl_multifit_linear", c, &exp_c.vector, &diag.vector, &exp_sd.vector, chisq, chisq_res, expected_chisq); /* test robust least squares */ gsl_multifit_robust (X, &y.vector, c, cov, work_rob); test_pontius_results("pontius gsl_multifit_robust", c, &exp_c.vector, &diag.vector, &exp_sd.vector, 1.0, 1.0, 1.0); /* test weighted least squares */ { gsl_vector * w = gsl_vector_alloc (pontius_n); double expected_cov[3][3] ={ {2.76754385964916e-01 , -3.59649122807024e-07, 9.74658869395731e-14}, {-3.59649122807024e-07, 5.91630591630603e-13, -1.77210703526497e-19}, {9.74658869395731e-14, -1.77210703526497e-19, 5.62573661988878e-26} }; gsl_vector_set_all (w, 1.0); gsl_multifit_wlinear (X, w, &y.vector, c, cov, &chisq, work); gsl_multifit_linear_residuals(X, &y.vector, c, r); gsl_blas_ddot(r, r, &chisq_res); test_pontius_results("pontius gsl_multifit_wlinear", c, &exp_c.vector, NULL, NULL, chisq, chisq_res, expected_chisq); for (i = 0; i < pontius_p; i++) { for (j = 0; j < pontius_p; j++) { gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-10, "pontius gsl_multifit_wlinear cov(%d,%d)", i, j) ; } } gsl_vector_free(w); } gsl_vector_free(c); gsl_vector_free(r); gsl_matrix_free(cov); gsl_matrix_free(X); gsl_multifit_linear_free (work); gsl_multifit_robust_free (work_rob); }
/*! \brief out_set = thd_polyfit( in_set, mask, polorder, prefix, verb); fits a polynomial model of order polorder to the time series of voxels in in_set \param in_set (THD_3dim_dataset* ) An AFNI dset pointer to input data \param mask (byte *) if mask is not NULL then voxel i will be processed if mask[i] != 0. if mask is NULL then all voxels are processed. \param polorder (int) polynomial order \param prefix (char *) prefix of output dset \param verb (int) verbosity flag \return out_set (THD_3dim_dataset* ) Dset containing polynomial fits. */ THD_3dim_dataset *thd_polyfit(THD_3dim_dataset *in_set, byte *mask, int polorder, char *prefix, int verb) { int i=0, j=0, nl=0, k=0, posi=0, posj=0, posk=0, nrow=0, ncol = 0; double xi=0.0, yi=0.0, yy=0.0, ei=0.0, sumsq=0.0, med=0.0; gsl_matrix *X=NULL, *cov=NULL; gsl_vector *y=NULL, *w=NULL, *c=NULL; MRI_IMAGE *im = NULL; THD_3dim_dataset *out_set=NULL; double *dar = NULL; float *cbuf=NULL; float *dvec = NULL; gsl_multifit_linear_workspace *work=NULL; ENTRY("thd_polyfit"); /* prepare output */ out_set = EDIT_empty_copy(in_set) ; EDIT_dset_items( out_set , ADN_nvals , polorder , ADN_ntt , polorder , ADN_datum_all , MRI_float , ADN_brick_fac , NULL , ADN_prefix , prefix ? prefix : "OMG!" , ADN_none ) ; for( j=0 ; j < polorder ; j++ ) /* create empty bricks to be filled below */ EDIT_substitute_brick( out_set , j , MRI_float , NULL ) ; /* do the fitting */ if (verb) fprintf (stderr,"Now fitting...\n"); ncol = DSET_NVALS(in_set); nrow = DSET_NVOX(in_set); X = gsl_matrix_alloc (ncol, polorder); y = gsl_vector_alloc (ncol); c = gsl_vector_alloc (polorder); cov = gsl_matrix_alloc (polorder, polorder); for (i = 0; i < ncol; i++) { xi = i+1; gsl_matrix_set (X, i, 0, 1.0); gsl_matrix_set (X, i, 1, xi); gsl_matrix_set (X, i, 2, xi*xi); gsl_matrix_set (X, i, 3, xi*xi*xi); gsl_matrix_set (X, i, 4, xi*xi*xi*xi); // printf ("%lg ",xi); } /*make header printf ("matrvola\n"); ZSS: By adding # to the text line, I made the output file be a .1D format */ if (verb > 1) fprintf(stdout, "#%s_0\t%s_1\t%s_2\t%s_3\t%s_4\n", DSET_PREFIX(in_set),DSET_PREFIX(in_set), DSET_PREFIX(in_set),DSET_PREFIX(in_set), DSET_PREFIX(in_set)); // go by lines - signatures /* pre-allocate, I think this should be just fine, there should be no need to reinitialize work all the time */ work = gsl_multifit_linear_alloc (ncol, polorder); dvec = (float * )malloc(sizeof(float)*ncol) ; /* array to hold signature */ cbuf = (float *)malloc(sizeof(float)*polorder) ; /* array to hold fit */ for (nl=0; nl<nrow; ++nl) { if (!mask || mask[nl]) { posi = -1; posj = -1; posk = -1; THD_extract_array( nl , in_set , 0 , dvec ) ; /*get signature from voxel */ for (k = 0; k < ncol; k++) { gsl_vector_set (y, k, dvec[k]); } gsl_multifit_linear (X, y, c, cov, &sumsq, work); /* printf ( "\n # best fit: Y = %g + %g X + %g X^2 +%g X^3 + %g X^4\n", C(0), C(1), C(2), C(3), C(4)); printf ("# sumsq = %g\n", sumsq); */ for (i=0;i<polorder;++i) cbuf[i] = (float)C(i); THD_insert_series( nl , out_set , polorder , MRI_float , cbuf , 1 ) ; /* stick result in output */ if (verb > 1) fprintf (stdout, "%11g\t%11g\t%11g\t%11g\t%11g\n", C(0), C(1), C(2), C(3), C(4)); /* printf ("# covariance matrix:\n"); printf ("[ %+.5e, %+.5e, %+.5e \n", COV(0,0), COV(0,1), COV(0,2)); printf (" %+.5e, %+.5e, %+.5e \n", COV(1,0), COV(1,1), COV(1,2)); printf (" %+.5e, %+.5e, %+.5e ]\n", COV(2,0), COV(2,1), COV(2,2)); printf ("# chisq = %g\n", chisq); */ } } gsl_multifit_linear_free (work); work = NULL; free(dvec); dvec = NULL; free(cbuf); cbuf = NULL; gsl_vector_free (y); gsl_vector_free (c); gsl_matrix_free (cov); gsl_matrix_free (X); //gsl_vector_free (w); free(dvec); dvec = NULL; RETURN(out_set); }