Exemple #1
0
void
test_pontius ()
{
  size_t i, j;
  gsl_multifit_linear_workspace * work = 
    gsl_multifit_linear_alloc (pontius_n, pontius_p);

  gsl_multifit_robust_workspace * work_rob = 
    gsl_multifit_robust_alloc (gsl_multifit_robust_ols, pontius_n, pontius_p);

  gsl_matrix * X = gsl_matrix_alloc (pontius_n, pontius_p);
  gsl_vector_view y = gsl_vector_view_array (pontius_y, pontius_n);
  gsl_vector * c = gsl_vector_alloc (pontius_p);
  gsl_vector * r = gsl_vector_alloc (pontius_n);
  gsl_matrix * cov = gsl_matrix_alloc (pontius_p, pontius_p);

  double chisq, chisq_res;

  double expected_c[3] = { 0.673565789473684E-03,
                           0.732059160401003E-06,
                          -0.316081871345029E-14};

  double expected_sd[3] = { 0.107938612033077E-03,
                            0.157817399981659E-09,
                            0.486652849992036E-16 };

  double expected_chisq = 0.155761768796992E-05;

  gsl_vector_view diag = gsl_matrix_diagonal (cov);
  gsl_vector_view exp_c = gsl_vector_view_array(expected_c, pontius_p);
  gsl_vector_view exp_sd = gsl_vector_view_array(expected_sd, pontius_p);

  for (i = 0 ; i < pontius_n; i++) 
    {
      for (j = 0; j < pontius_p; j++) 
        {
          gsl_matrix_set(X, i, j, pow(pontius_x[i], j));
        }
    }

  /* test unweighted least squares */
  gsl_multifit_linear (X, &y.vector, c, cov, &chisq, work);
  gsl_multifit_linear_residuals(X, &y.vector, c, r);
  gsl_blas_ddot(r, r, &chisq_res);

  test_pontius_results("pontius gsl_multifit_linear",
                       c, &exp_c.vector,
                       &diag.vector, &exp_sd.vector,
                       chisq, chisq_res, expected_chisq);

  /* test robust least squares */
  gsl_multifit_robust (X, &y.vector, c, cov, work_rob);

  test_pontius_results("pontius gsl_multifit_robust",
                       c, &exp_c.vector,
                       &diag.vector, &exp_sd.vector,
                       1.0, 1.0, 1.0);

  /* test weighted least squares */
  {
    gsl_vector * w = gsl_vector_alloc (pontius_n);

    double expected_cov[3][3] ={ 
      {2.76754385964916e-01 , -3.59649122807024e-07,   9.74658869395731e-14},
      {-3.59649122807024e-07,   5.91630591630603e-13,  -1.77210703526497e-19},
      {9.74658869395731e-14,  -1.77210703526497e-19,   5.62573661988878e-26} };

    gsl_vector_set_all (w, 1.0);

    gsl_multifit_wlinear (X, w, &y.vector, c, cov, &chisq, work);
    gsl_multifit_linear_residuals(X, &y.vector, c, r);
    gsl_blas_ddot(r, r, &chisq_res);

    test_pontius_results("pontius gsl_multifit_wlinear",
                         c, &exp_c.vector,
                         NULL, NULL,
                         chisq, chisq_res, expected_chisq);

    for (i = 0; i < pontius_p; i++) 
      {
        for (j = 0; j < pontius_p; j++)
          {
            gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-10, 
                          "pontius gsl_multifit_wlinear cov(%d,%d)", i, j) ;
          }
      }

    gsl_vector_free(w);
  }

  gsl_vector_free(c);
  gsl_vector_free(r);
  gsl_matrix_free(cov);
  gsl_matrix_free(X);
  gsl_multifit_linear_free (work);
  gsl_multifit_robust_free (work_rob);
}
Exemple #2
0
void 
test_longley ()
{     
  gsl_multifit_linear_workspace * work = 
    gsl_multifit_linear_alloc (longley_n, longley_p);

  gsl_multifit_robust_workspace * work_rob =
    gsl_multifit_robust_alloc (gsl_multifit_robust_ols, longley_n, longley_p);

  gsl_matrix_view X = gsl_matrix_view_array (longley_x, longley_n, longley_p);
  gsl_vector_view y = gsl_vector_view_array (longley_y, longley_n);
  gsl_vector * c = gsl_vector_alloc (longley_p);
  gsl_vector * r = gsl_vector_alloc (longley_n);
  gsl_matrix * cov = gsl_matrix_alloc (longley_p, longley_p);

  double chisq, chisq_res;

  double expected_c[7] = {  -3482258.63459582,
                            15.0618722713733,
                            -0.358191792925910E-01,
                            -2.02022980381683,
                            -1.03322686717359,
                            -0.511041056535807E-01,
                            1829.15146461355 };

  double expected_sd[7]  = {  890420.383607373,      
                              84.9149257747669,      
                              0.334910077722432E-01, 
                              0.488399681651699,     
                              0.214274163161675,     
                              0.226073200069370,     
                              455.478499142212 } ;  

  double expected_chisq = 836424.055505915;

  gsl_vector_view diag = gsl_matrix_diagonal (cov);
  gsl_vector_view exp_c = gsl_vector_view_array(expected_c, longley_p);
  gsl_vector_view exp_sd = gsl_vector_view_array(expected_sd, longley_p);

  /* test unweighted least squares */
  gsl_multifit_linear (&X.matrix, &y.vector, c, cov, &chisq, work);
  gsl_multifit_linear_residuals(&X.matrix, &y.vector, c, r);
  gsl_blas_ddot(r, r, &chisq_res);

  test_longley_results("longley gsl_multifit_linear",
                       c, &exp_c.vector,
                       &diag.vector, &exp_sd.vector,
                       chisq, chisq_res, expected_chisq);

  /* test robust least squares */
  gsl_multifit_robust (&X.matrix, &y.vector, c, cov, work_rob);

  test_longley_results("longley gsl_multifit_robust",
                       c, &exp_c.vector,
                       &diag.vector, &exp_sd.vector,
                       1.0, 1.0, 1.0);

  /* test weighted least squares */
  {
    size_t i, j;

    gsl_vector * w = gsl_vector_alloc (longley_n);

    double expected_cov[7][7] = { { 8531122.56783558,
-166.727799925578, 0.261873708176346, 3.91188317230983,
1.1285582054705, -0.889550869422687, -4362.58709870581},

{-166.727799925578, 0.0775861253030891, -1.98725210399982e-05,
-0.000247667096727256, -6.82911920718824e-05, 0.000136160797527761,
0.0775255245956248},

{0.261873708176346, -1.98725210399982e-05, 1.20690316701888e-08,
1.66429546772984e-07, 3.61843600487847e-08, -6.78805814483582e-08,
-0.00013158719037715},

{3.91188317230983, -0.000247667096727256, 1.66429546772984e-07,
2.56665052544717e-06, 6.96541409215597e-07, -9.00858307771567e-07,
-0.00197260370663974},

{1.1285582054705, -6.82911920718824e-05, 3.61843600487847e-08,
6.96541409215597e-07, 4.94032602583969e-07, -9.8469143760973e-08,
-0.000576921112208274},

{-0.889550869422687, 0.000136160797527761, -6.78805814483582e-08,
-9.00858307771567e-07, -9.8469143760973e-08, 5.49938542664952e-07,
0.000430074434198215},

{-4362.58709870581, 0.0775255245956248, -0.00013158719037715,
-0.00197260370663974, -0.000576921112208274, 0.000430074434198215,
2.23229587481535 }} ;

    gsl_vector_set_all (w, 1.0);

    gsl_multifit_wlinear (&X.matrix, w, &y.vector, c, cov, &chisq, work);
    gsl_multifit_linear_residuals(&X.matrix, &y.vector, c, r);
    gsl_blas_ddot(r, r, &chisq_res);

    test_longley_results("longley gsl_multifit_wlinear",
                         c, &exp_c.vector,
                         NULL, NULL,
                         chisq, chisq_res, expected_chisq);

    for (i = 0; i < longley_p; i++) 
      {
        for (j = 0; j < longley_p; j++)
          {
            gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-7, 
                          "longley gsl_multifit_wlinear cov(%d,%d)", i, j) ;
          }
      }

    gsl_vector_free(w);
  }

  gsl_vector_free(c);
  gsl_vector_free(r);
  gsl_matrix_free(cov);
  gsl_multifit_linear_free (work);
  gsl_multifit_robust_free (work_rob);
} /* test_longley() */
void
test_pontius ()
{
  size_t i, j;
  {
    gsl_multifit_linear_workspace * work = 
      gsl_multifit_linear_alloc (pontius_n, pontius_p);

    gsl_matrix * X = gsl_matrix_alloc (pontius_n, pontius_p);
    gsl_vector_view y = gsl_vector_view_array (pontius_y, pontius_n);
    gsl_vector * c = gsl_vector_alloc (pontius_p);
    gsl_vector * r = gsl_vector_alloc (pontius_n);
    gsl_matrix * cov = gsl_matrix_alloc (pontius_p, pontius_p);
    gsl_vector_view diag;

    double chisq;

    double expected_c[3] = { 0.673565789473684E-03,
                             0.732059160401003E-06,
                            -0.316081871345029E-14};

    double expected_sd[3] = { 0.107938612033077E-03,
                              0.157817399981659E-09,
                              0.486652849992036E-16 };

    double expected_chisq = 0.155761768796992E-05;

    for (i = 0 ; i < pontius_n; i++) 
      {
        for (j = 0; j < pontius_p; j++) 
          {
            gsl_matrix_set(X, i, j, pow(pontius_x[i], j));
          }
      }

    gsl_multifit_linear (X, &y.vector, c, cov, &chisq, work);

    gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "pontius gsl_fit_multilinear c0") ;
    gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "pontius gsl_fit_multilinear c1") ;
    gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "pontius gsl_fit_multilinear c2") ;

    diag = gsl_matrix_diagonal (cov);

    gsl_test_rel (gsl_vector_get(&diag.vector,0), pow(expected_sd[0],2.0), 1e-10, "pontius gsl_fit_multilinear cov00") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,1), pow(expected_sd[1],2.0), 1e-10, "pontius gsl_fit_multilinear cov11") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,2), pow(expected_sd[2],2.0), 1e-10, "pontius gsl_fit_multilinear cov22") ;

    gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_multilinear chisq") ;

    gsl_multifit_linear_residuals(X, &y.vector, c, r);
    gsl_blas_ddot(r, r, &chisq);
    gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_multilinear residuals") ;

    gsl_vector_free(c);
    gsl_vector_free(r);
    gsl_matrix_free(cov);
    gsl_matrix_free(X);
    gsl_multifit_linear_free (work);
  }


  {
    gsl_multifit_linear_workspace * work = 
      gsl_multifit_linear_alloc (pontius_n, pontius_p);

    gsl_matrix * X = gsl_matrix_alloc (pontius_n, pontius_p);
    gsl_vector_view y = gsl_vector_view_array (pontius_y, pontius_n);
    gsl_vector * w = gsl_vector_alloc (pontius_n);
    gsl_vector * c = gsl_vector_alloc (pontius_p);
    gsl_vector * r = gsl_vector_alloc (pontius_n);
    gsl_matrix * cov = gsl_matrix_alloc (pontius_p, pontius_p);

    double chisq;

    double expected_c[3] = {  0.673565789473684E-03,
                               0.732059160401003E-06,
                               -0.316081871345029E-14};

    double expected_chisq = 0.155761768796992E-05;

    double expected_cov[3][3] ={ 
      {2.76754385964916e-01 , -3.59649122807024e-07,   9.74658869395731e-14},
      {-3.59649122807024e-07,   5.91630591630603e-13,  -1.77210703526497e-19},
      {9.74658869395731e-14,  -1.77210703526497e-19,   5.62573661988878e-26} };


    for (i = 0 ; i < pontius_n; i++) 
      {
        for (j = 0; j < pontius_p; j++) 
          {
            gsl_matrix_set(X, i, j, pow(pontius_x[i], j));
          }
      }

    gsl_vector_set_all (w, 1.0);

    gsl_multifit_wlinear (X, w, &y.vector, c, cov, &chisq, work);

    gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "pontius gsl_fit_multilinear c0") ;
    gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "pontius gsl_fit_multilinear c1") ;
    gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "pontius gsl_fit_multilinear c2") ;


    for (i = 0; i < pontius_p; i++) 
      {
        for (j = 0; j < pontius_p; j++)
          {
            gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-10, 
                          "pontius gsl_fit_wmultilinear cov(%d,%d)", i, j) ;
          }
      }

    gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_wmultilinear chisq") ;

    gsl_multifit_linear_residuals(X, &y.vector, c, r);
    gsl_blas_ddot(r, r, &chisq);
    gsl_test_rel (chisq, expected_chisq, 1e-10, "pontius gsl_fit_wmultilinear residuals") ;

    gsl_vector_free(w);
    gsl_vector_free(c);
    gsl_vector_free(r);
    gsl_matrix_free(cov);
    gsl_matrix_free(X);
    gsl_multifit_linear_free (work);
  }
}
int
gsl_multifit_robust(const gsl_matrix * X,
                    const gsl_vector * y,
                    gsl_vector * c,
                    gsl_matrix * cov,
                    gsl_multifit_robust_workspace *w)
{
  /* check matrix and vector sizes */
  if (X->size1 != y->size)
    {
      GSL_ERROR
        ("number of observations in y does not match rows of matrix X",
         GSL_EBADLEN);
    }
  else if (X->size2 != c->size)
    {
      GSL_ERROR ("number of parameters c does not match columns of matrix X",
                 GSL_EBADLEN);
    }
  else if (cov->size1 != cov->size2)
    {   
      GSL_ERROR ("covariance matrix is not square", GSL_ENOTSQR);
    }   
  else if (c->size != cov->size1)
    {   
      GSL_ERROR
        ("number of parameters does not match size of covariance matrix",
         GSL_EBADLEN);
    }
  else if (X->size1 != w->n || X->size2 != w->p)
    {
      GSL_ERROR
        ("size of workspace does not match size of observation matrix",
         GSL_EBADLEN);
    }
  else
    {
      int s;
      double chisq;
      const double tol = GSL_SQRT_DBL_EPSILON;
      int converged = 0;
      size_t numit = 0;
      const size_t n = y->size;
      double sigy = gsl_stats_sd(y->data, y->stride, n);
      double sig_lower;
      size_t i;

      /*
       * if the initial fit is very good, then finding outliers by comparing
       * them to the residual standard deviation is difficult. Therefore we
       * set a lower bound on the standard deviation estimate that is a small
       * fraction of the standard deviation of the data values
       */
      sig_lower = 1.0e-6 * sigy;
      if (sig_lower == 0.0)
        sig_lower = 1.0;

      /* compute initial estimates using ordinary least squares */
      s = gsl_multifit_linear(X, y, c, cov, &chisq, w->multifit_p);
      if (s)
        return s;

      /* save Q S^{-1} of original matrix */
      gsl_matrix_memcpy(w->QSI, w->multifit_p->QSI);
      gsl_vector_memcpy(w->D, w->multifit_p->D);

      /* compute statistical leverage of each data point */
      s = gsl_linalg_SV_leverage(w->multifit_p->A, w->resfac);
      if (s)
        return s;

      /* correct residuals with factor 1 / sqrt(1 - h) */
      for (i = 0; i < n; ++i)
        {
          double h = gsl_vector_get(w->resfac, i);

          if (h > 0.9999)
            h = 0.9999;

          gsl_vector_set(w->resfac, i, 1.0 / sqrt(1.0 - h));
        }

      /* compute residuals from OLS fit r = y - X c */
      s = gsl_multifit_linear_residuals(X, y, c, w->r);
      if (s)
        return s;

      /* compute estimate of sigma from ordinary least squares */
      w->stats.sigma_ols = gsl_blas_dnrm2(w->r) / sqrt((double) w->stats.dof);

      while (!converged && ++numit <= w->maxiter)
        {
          double sig;

          /* adjust residuals by statistical leverage (see DuMouchel and O'Brien) */
          s = gsl_vector_mul(w->r, w->resfac);
          if (s)
            return s;

          /* compute estimate of standard deviation using MAD */
          sig = robust_madsigma(w->r, w);

          /* scale residuals by standard deviation and tuning parameter */
          gsl_vector_scale(w->r, 1.0 / (GSL_MAX(sig, sig_lower) * w->tune));

          /* compute weights using these residuals */
          s = w->type->wfun(w->r, w->weights);
          if (s)
            return s;

          gsl_vector_memcpy(w->c_prev, c);

          /* solve weighted least squares with new weights */
          s = gsl_multifit_wlinear(X, w->weights, y, c, cov, &chisq, w->multifit_p);
          if (s)
            return s;

          /* compute new residuals r = y - X c */
          s = gsl_multifit_linear_residuals(X, y, c, w->r);
          if (s)
            return s;

          converged = robust_test_convergence(w->c_prev, c, tol);
        }

      /* compute final MAD sigma */
      w->stats.sigma_mad = robust_madsigma(w->r, w);

      /* compute robust estimate of sigma */
      w->stats.sigma_rob = robust_robsigma(w->r, w->stats.sigma_mad, w->tune, w);

      /* compute final estimate of sigma */
      w->stats.sigma = robust_sigma(w->stats.sigma_ols, w->stats.sigma_rob, w);

      /* store number of iterations */
      w->stats.numit = numit;

      {
        double dof = (double) w->stats.dof;
        double rnorm = w->stats.sigma * sqrt(dof); /* see DuMouchel, sec 4.2 */
        double ss_err = rnorm * rnorm;
        double ss_tot = gsl_stats_tss(y->data, y->stride, n);

        /* compute R^2 */
        w->stats.Rsq = 1.0 - ss_err / ss_tot;

        /* compute adjusted R^2 */
        w->stats.adj_Rsq = 1.0 - (1.0 - w->stats.Rsq) * (n - 1.0) / dof;

        /* compute rmse */
        w->stats.rmse = sqrt(ss_err / dof);

        /* store SSE */
        w->stats.sse = ss_err;
      }

      /* calculate covariance matrix = sigma^2 (X^T X)^{-1} */
      s = robust_covariance(w->stats.sigma, cov, w);
      if (s)
        return s;

      /* raise an error if not converged */
      if (numit > w->maxiter)
        {
          GSL_ERROR("maximum iterations exceeded", GSL_EMAXITER);
        }

      return s;
    }
} /* gsl_multifit_robust() */
int main(int argc, char* argv[]) 
{
  // parameters that you can set. 
  string delim      = "\t ";
  string chipFile   = "";
  vector<string> ctrlFiles;
  string outFile    = "";
  int readLen       = 50;
  int chunkSize     = 100000;
  int windowSize    = 5;
  int interval      = 5;
  bool talk         = false;

  string errorLine = "usage " + 
    string(argv[0]) + 
    " [Parameters]\n" +  
    "\t-i  <infile, BED-formated file containing the ChIP-reads, sorted on chromosome and position.>\n" +
	"\t-c  <space\\tab separataed list of infile(s), BED-formated file(s) \n" + 
    "\t    containing the control-reads (e.g. Input/IgG et cetera), sorted as the file given in '-i' \n" +  
    "\t-o  <outfile, BED-formated file of resulting reads after normalization, \n" + 
	"\t    with read lengths as defined by -l>\n" + 
    "\t-rl <read length, defaults to 50 >\n" + 
    "\t-cs <chunk size, number of bp considered at a time when building the model>\n" + 
	"\t-ws <window size, at every point used to build the model a window of +/- \n" + 
	"\t    this size is averaged to create an observed data point.>\n" +
    "\t-iv <interval, the step size determining the distance between points \n" + 
	"\t     used as observations in the regression model.>\n" +
    "\t-v  <set verbose>\n" + 
 	"example: \n" +  string(argv[0]) + " -i myreads.bed -c input.bed igg.bed noise.bed -o normalized.bed -rl 50 -cs 100000 -ws 5 -iv 5 \n"
   ;


  bool fail = false;
  bool ctrlfiles = false;
  string failmessage = "";
  
  for (int i=1;i<argc;i++)
    {
      if(strcmp(argv[i],"-i") == 0)
		{
		  chipFile.assign(argv[++i]);
		  ctrlfiles = false;
		}
	  else if(strcmp(argv[i],"-o") == 0)
		{
		  outFile.assign(argv[++i]);
    	  ctrlfiles = false;
		}
	  else if(strcmp(argv[i],"-c") == 0)
		{
		  ctrlfiles = true;
		}
      else if(strcmp(argv[i],"-rl") == 0)
		{
		  readLen = atoi(argv[++i]);
		  ctrlfiles = false;
		}
	  else if(strcmp(argv[i],"-cs") == 0)
		{
		  chunkSize = atoi(argv[++i]);
    	  ctrlfiles = false;
		}
	  else if(strcmp(argv[i],"-ws") == 0)
		{
		  windowSize = atoi(argv[++i]);
    	  ctrlfiles = false;
		}
	  else if(strcmp(argv[i],"-iv") == 0)
		{
		  interval = atoi(argv[++i]);
		  ctrlfiles = false;
		}
	  else if(strcmp(argv[i],"-v") == 0)
		{
		  talk = true;
		  ctrlfiles = false;
		}
	  else
		{
    
		  if(ctrlfiles) // assume that all things not parsable after -c are control files. Check for existance/readability below.
			{
			  ctrlFiles.push_back(argv[i]);
			}
		  else
			{
			  failmessage.assign("Unknown argument: ");
			  failmessage.append(argv[i]);
			  failmessage.append("\n");
			  fail = true;
			}
		}
    }
  
  // Check infile and readability. 

  if(chipFile == "")
    {
      failmessage.append("infile (-i) must be specified.\n");
      fail = true;
    }
  
  ifstream inf;
  inf.open(chipFile.c_str());
  
  if(!inf)
    {
      failmessage.append("Could not open infile '" + chipFile + "' (does the file exist?)\n");
      fail = true;
    }
  
  // Check control files. 
  if(ctrlFiles.size() < 1)
    {
      failmessage.append("at least one control file (-c) must be specified.\n");
      fail = true;
    }

  
  ifstream infc[ctrlFiles.size()];
  if(!fail)
	for (int i = 0;i<ctrlFiles.size();i++)
	  {
		infc[i].open(ctrlFiles[i].c_str());
		if(!infc[i])
		  {
			failmessage.append("Could not open ctrlfile '" + ctrlFiles[i]  + "' (does the file exist?)\n");
			fail = true;
		  }
	  }
	  
  // Check outfile and readability. 

  if(outFile == "")
    {
      failmessage.append("outfile (-o) must be specified.\n");
      fail = true;
    }
  ofstream outf;  
  if(!fail)
	outf.open(outFile.c_str(),ios::trunc);
  if(!outf)
    {
      failmessage.append("Could not open outfile '" + outFile + "' (do we have permission ?)\n");
      fail = true;
    }

  // are we ok so far? 
  if (fail)
    {
      cerr << endl << failmessage.c_str() << endl << errorLine << endl;
	  //try and close any opened files
	  inf.close();
	  for (int i = 0;i<ctrlFiles.size();i++)  
		infc[i].close();
	  outf.close(); 
	  return(1);
    }
  
  /*
   * Get some initial parameters. 
   */
  map <string,seqStats> seqMapChip; 
  map <string,seqStats> *seqMapCtrls;
  seqMapCtrls = new map<string,seqStats>[ctrlFiles.size()];
  
  map <string,seqStats>::iterator it;
  map <int,int*>::iterator valIt;
  
  // Read the reference sequences and the range of each file
  cout<<"Reading BED-files."<<endl;
  int nlinesChIP, nlinesCtrl=0;
  cout<<"ChIP file.."<<endl;
  nlinesChIP = initControlBEDlite(&inf,&seqMapChip,0,1,2,5,true);
  cout<<"Control file(s) .."<<endl;
  for (int i = 0;i<ctrlFiles.size();i++)  
	nlinesCtrl += initControlBEDlite(&infc[i],&seqMapCtrls[i],0,1,2,5,true);
  
  cout<<"ChIP-data consists of "<<nlinesChIP<<" mapped fragments."<<endl;
  cout<<"Control-data consists of "<<nlinesCtrl<<" mapped fragments."<<endl;
  
  // print some stats. 
  cout <<"ChIP Read Statistics::"<<endl;
  cout <<setw(10)<<"Name\t"<<setw(10)<<"minCrd\t"<<setw(10)<<"maxCrd\t"<<setw(10)<<"F_counts\t"<<setw(10)<<"R_counts\t"<<endl;
  for ( it=seqMapChip.begin() ; it != seqMapChip.end(); it++ )
    {
      cout <<setw(10)<< (*it).first << "\t" <<setw(10)<< (*it).second.minPos << "\t" << setw(10)<<(*it).second.maxPos<<"\t";
      cout <<setw(10)<< (*it).second.countF << "\t" <<setw(10)<< (*it).second.countR<<endl;
    }
  cout <<"Control Statistics::"<<endl;
  for (int i = 0;i<ctrlFiles.size();i++)
	{
	  cout<<ctrlFiles[i]<<endl;
	  cout <<setw(10)<<"Name\t"<<setw(10)<<"minCrd\t"<<setw(10)<<"maxCrd\t"<<setw(10)<<"F_counts\t"<<setw(10)<<"R_counts\t"<<endl;
	  for ( it=seqMapCtrls[i].begin() ; it != seqMapCtrls[i].end(); it++ )
		{
		  cout <<setw(10)<< (*it).first << "\t" <<setw(10)<< (*it).second.minPos << "\t" << setw(10)<<(*it).second.maxPos<<"\t";
		  cout <<setw(10)<< (*it).second.countF << "\t" <<setw(10)<< (*it).second.countR<<endl;
		}
	}
  
  cout<<"Processing reads in chunks of "<<chunkSize<<" bp."<<endl;
  int lowPos,highPos;
  int chunkRange;
  int chunkObs;
  int obsCount;
  int *winSumF = new int[ctrlFiles.size()+1];
  int *winSumR = new int[ctrlFiles.size()+1];
  double chisqF,chisqR;
  gsl_matrix *XF, *covF,*XR, *covR;
  gsl_vector *yF,*cF,*rF,*yR,*cR,*rR;
  double *resiF,*resiR;
  int *cntF,*cntR;
  int posOff;
  

  // initialize. 
  string line;
  inputLine chipLine;
  inputLine *ctrlLines;
  ctrlLines = new inputLine[ctrlFiles.size()];
  
  // read first line from each file. check position and chr. assume that the files are ordered inside chr. i.e don't loop
  // over chr by seqMap but over info in the files. retrieve min/max position from the seqMap depending on file contents. 
  // also assume that the chr ordering is the same in chip & control files.
  getline(inf,line);
  parseBEDline(line,&chipLine,0,1,2,5);
  for(int i=0;i<ctrlFiles.size();i++)
	{
	  getline(infc[i],line);
	  parseBEDline(line,&ctrlLines[i],0,1,2,5);
	}

  // initialize with the "first" chromosome and its min/max pos.
  string currChr = chipLine.seq;
  int chrMinPos,chrMaxPos;
  int chrMinPosCtrl,chrMaxPosCtrl;
  int currLine = 1;
  int memNeeded;
  int chrPosChip,chrPosCtrl;
  int ctrlIndex;

  // tmp. storage for the chip/control signals.
  unsigned short *chipF,*chipR,*ctrlF,*ctrlR;

  // introduce curr pos, curr Chr etc. and a loop on !EOF in the chip file. 
  // no point in normalizing where there are no signals in chip...

  while(currLine <= nlinesChIP)
    {
	  chrMinPos = seqMapChip.find(currChr)->second.minPos;
	  chrMaxPos = seqMapChip.find(currChr)->second.maxPos;;
	  if(talk)
		cout<<"ChIP: "<<chipLine.seq<<" "<<chrMinPos<<" "<<chrMaxPos<<endl;
	  chrPosChip = chrMaxPos - chrMinPos +1;
	  // check the min/max for this chr in ctrl-data
	  chrMinPosCtrl = INT_MAX;
	  chrMaxPosCtrl = -1;
	  for(int i = 0;i<ctrlFiles.size();i++)
		{
		  if(seqMapCtrls[i].count(currChr))
			{
			  chrMinPosCtrl = min(chrMinPosCtrl,seqMapCtrls[i][currChr].minPos);
			  chrMaxPosCtrl = max(chrMaxPosCtrl,seqMapCtrls[i][currChr].maxPos);
			} 
		}
	  if(talk)
		cout<<"Control: "<<chipLine.seq<<" "<<chrMinPosCtrl<<" "<<chrMaxPosCtrl<<endl;
	  chrPosCtrl = chrMaxPosCtrl - chrMinPosCtrl +1;
	  memNeeded = sizeof(unsigned short)*(chrPosChip + ctrlFiles.size()*chrPosCtrl);
	  // allocate memory to hold the entire chromosome, do the regression in chunks. 
	  try{
		cout<<"Trying to allocate: ";	
		if(memNeeded > 1000000000)
		  cout<<memNeeded/1000000000<<" Gb for "<<currChr<<".";
		else if (memNeeded > 1000000)
		  cout<<memNeeded/1000000<<" Mb for "<<currChr<<".";
		else if (memNeeded > 1000)
		  cout<<memNeeded/1000<<" kb for "<<currChr<<".";
		else
		  cout<<memNeeded<<" bytes for raw signals"<<currChr<<".";
		
		chipF = new unsigned short[chrPosChip];
		chipR = new unsigned short[chrPosChip];
		ctrlF = new unsigned short[chrPosCtrl*ctrlFiles.size()]; // these will need to be accessed in a "[i + chrPosCtrl*j]"-type of fashion.
		ctrlR = new unsigned short[chrPosCtrl*ctrlFiles.size()];
		cout<<" Done."<<endl;
	  }catch (std::bad_alloc  &f){
		cerr<<string(argv[0])<<" couldn't allocate as much memory as it wanted. Failure: '"<<f.what()<<endl;
		// close files. 
		inf.close();
		for (int i = 0;i<ctrlFiles.size();i++)  
		  infc[i].close();
		outf.close();	
		
		delete[] chipF;
		delete[] chipR;
		delete[] ctrlF;
		delete[] ctrlR;
		delete[] resiF;
		delete[] resiR;
		delete[] cntF;
		delete[] cntR;
		return(-1);
	  }

	  // make sure it's all zeroes.
	  for(int i=0;i<chrPosChip;i++)
		{
		  chipF[i] = 0;
		  chipR[i] = 0;
		}
	  for(int i=0;i<chrPosCtrl;i++)
		for(int j = 0;j<ctrlFiles.size();j++)
		  {
			ctrlF[i + j*chrPosCtrl] = 0;
			ctrlR[i + j*chrPosCtrl] = 0;
		  }
			
	  
	  // read in the sought chip-data
	  while((chipLine.seq == currChr)  && !(inf.eof())) // chip-file
		{
		  //cout<<chipLine.seq<<"\t"<<line<<endl;
		  // update previous line's data.
		  if(chipLine.strand == 1)
			chipF[chipLine.pos-chrMinPos]++;
		  else
			chipR[chipLine.pos-chrMinPos+chipLine.len]++;
		  // read in the nextline.
		  getline(inf,line);
		  parseBEDline(line,&chipLine,0,1,2,5);
		  currLine++;
		}
	   if((chipLine.seq == currChr)  && (inf.eof())) // chip-file, last read, ok chr, use.
		{
		  //cout<<"Last line of the ChipFIle"<<endl;
		  //cout<<chipLine.seq<<"\t"<<line<<endl;
		  if(chipLine.strand == 1)
			chipF[chipLine.pos-chrMinPos]++;
		  else
			chipR[chipLine.pos-chrMinPos+chipLine.len-1]++;
		  
	}
	  
	  // read in the sought ctrl-data
	  for(int i = 0;i<ctrlFiles.size();i++)
		{
		  // is there data at all for this chr in this control file?
		  if(seqMapCtrls[i].count(currChr) == 1)
			{
			  // we're assuming that the chromosomes are in the same order in the chip & ctrl files. 
			  // cases:
			  // chr on current line is not the same as in chip
              //    => we know that we should have chr data on this chr & that chrs comes in the same order. this can prob. only 
			  //       happen for a chr-specific chromosome, e.g. its safe to read past and check again. 
			  // chr on current line is the same as in chip
			  //   => this is good. last time (either preFirst or not) should have read prev. chr completely. so just start reading until we hit 
			  //      another chr. 
			  // 
			 
			  if(ctrlLines[i].seq != currChr)
				{
				  // read past th "wrong" chromosome(s). 
				  while(!infc[i].eof() && ctrlLines[i].seq != currChr)
					{
					  getline(infc[i],line);
					  parseBEDline(line,&ctrlLines[i],0,1,2,5);
					}
				}
			  
			  // now we have the first line of the the correct chr in 'ctrlLines[i]'
			  // Read in the complete chr and store the data accordingly.
			  while(!infc[i].eof() && ctrlLines[i].seq == currChr)
				{
				  if(ctrlLines[i].strand == 1)
					ctrlF[ctrlLines[i].pos-chrMinPosCtrl + i*chrPosCtrl]++;
				  else
					ctrlR[ctrlLines[i].pos-chrMinPosCtrl+ctrlLines[i].len + i*chrPosCtrl-1]++;
				  getline(infc[i],line);
				  parseBEDline(line,&ctrlLines[i],0,1,2,5);
				}
			}
		  
		}
	  cout<<"Analysing "<<currChr<<endl;
	  currChr = chipLine.seq; // store "next" chromosome
	  // now all data for this chr is read. Start analysing in chunks. 
	  lowPos = chrMinPos;
	  while(lowPos < chrMaxPos) // loop over this chromosome data in chunks.
		{ 
		  if(!talk)
			{
			  cout<<lowPos<<" of "<<chrMaxPos<<"\r";
			}
		  highPos = lowPos + chunkSize-1;
		  if(highPos >= (chrMaxPos - 0.5*chunkSize)) // less than 0.8 of a chunk left. merge.
			highPos =  chrMaxPos;
		  chunkRange = highPos - lowPos + 1;
		  if(talk)
			cout<<"["<<lowPos<<","<<highPos<<"]\tsize: "<<chunkRange<<endl;
		  
		  resiF = new double[chunkRange];
		  resiR = new double[chunkRange];
		  cntF = new int[chunkRange];
		  cntR = new int[chunkRange];
		  for(int i=0;i<chunkRange;i++)
			{
			  resiF[i] = 0.0;
			  resiR[i] = 0.0;
			  cntF[i] = 0;
			  cntR[i] = 0;
			}
		  
		  // for each chunk, step forward in 'interval' steps and average signals in that window.
		  chunkObs = (chunkRange-2*windowSize)/interval + 1;
		  if (talk) 
			cout<<"\tsampling this chunk at "<<chunkObs<<" positions."<<endl;  
		  // Storage for the signals on '+'
		  XF = gsl_matrix_alloc (chunkObs, ctrlFiles.size());
		  yF = gsl_vector_alloc (chunkObs);
		  rF = gsl_vector_alloc (chunkObs);
		  cF = gsl_vector_alloc (ctrlFiles.size());
		  covF = gsl_matrix_alloc (ctrlFiles.size(), ctrlFiles.size());
		  // Storage for the signals on '-'
		  XR = gsl_matrix_alloc (chunkObs, ctrlFiles.size());
		  yR = gsl_vector_alloc (chunkObs);
		  rR = gsl_vector_alloc (chunkObs);
		  cR = gsl_vector_alloc (ctrlFiles.size());
		  covR = gsl_matrix_alloc (ctrlFiles.size(), ctrlFiles.size());
		  
		  // loop over the signals in interval steps and average in +/- windowSize. fill in the matrices. 
		  obsCount = 0;
		  for (int i = lowPos+windowSize;i<(highPos-windowSize);)
			{
			  // collect sums over each signal in the sough window
			  for (int j = 0;j < ctrlFiles.size() + 1;j++)
				{
				  winSumF[j] = 0;
				  winSumR[j] = 0;
				}
			  for (int j = -windowSize;j<=windowSize;j++)
				{				
				  winSumF[0] += chipF[i - chrMinPos + j];
				  winSumR[0] += chipR[i - chrMinPos + j];
				  for(int k = 0;k<ctrlFiles.size();k++)
					{
					  ctrlIndex = i - chrMinPosCtrl + j + k*chrPosCtrl;
					  if(ctrlIndex >= 0 && ctrlIndex <chrPosCtrl) // is there ctrl data for this position?
						{
						  winSumF[1+k] += ctrlF[ctrlIndex];
						  winSumR[1+k] += ctrlR[ctrlIndex];
						}
					}
				}
			  // the chip signal
			  gsl_vector_set (yF, obsCount, (double)winSumF[0]/(double)(2*windowSize+1));
			  gsl_vector_set (yR, obsCount, (double)winSumR[0]/(double)(2*windowSize+1));
			  
			  // the control signals
			  for (int j = 0;j < ctrlFiles.size();j++)
				{
				  gsl_matrix_set (XF, obsCount, j, (double)winSumF[j+1]/(double)(2*windowSize+1));
				  gsl_matrix_set (XR, obsCount, j, (double)winSumR[j+1]/(double)(2*windowSize+1));
				}
			  obsCount++;
			  i+=interval; 
			}
		  // fit the models.
		  gsl_multifit_linear_workspace * work  = gsl_multifit_linear_alloc (chunkObs, ctrlFiles.size());
		  /* 
		   * '+' Strand
		   */
		  gsl_multifit_linear (XF, yF, cF, covF,&chisqF, work);
		  if(talk)
			{
			  cout<<"\t'+' chisq: "<<chisqF<<"\t"<<"c's:";
			  for (int j = 0;j < ctrlFiles.size();j++)
				cout<<gsl_vector_get(cF,j)<<" ";
			  cout<<endl;
			}
		  /* 
		   * '-' Strand
		   */
		  gsl_multifit_linear (XR, yR, cR, covR,&chisqR, work);
		  if(talk)
			{
			  cout<<"\t'-' chisq: "<<chisqR<<"\t"<<"c's:";
			  for (int j = 0;j < ctrlFiles.size();j++)
				cout<<gsl_vector_get(cR,j)<<" ";
			  cout<<endl;
			}
		  
		  gsl_multifit_linear_free (work);
		  // calculate residuals.
		  if(talk)
			cout<<"\tCaclulating residuals..";
		  gsl_multifit_linear_residuals (XF,yF,cF,rF);
		  gsl_multifit_linear_residuals (XR,yR,cR,rR);
		  if(talk)
			cout<<"done."<<endl<<"\tRebuilding signal..";
		  // rebuild a per-bp-signal 
		  for (int i=0;i<chunkObs;i++)
			{
			  // center of this observation. 
			  posOff = 1+(i+1)*interval;
			  if(posOff > chunkRange) // outside of our chunk, should never happen.
				continue;
			  if(gsl_vector_get(rF,i) > 0.5)  // original R-code used 'round' on the residuals, ceiling(x-0.5) does the same thing
				for (int j = -windowSize;j<=windowSize;j++)
				  {
					resiF[j+posOff] += ceil(gsl_vector_get(rF,i)-0.5);
					cntF[j+posOff] += 1;
				  }
			  if(gsl_vector_get(rR,i) > 0.5)
				for (int j = -windowSize;j<=windowSize;j++)
				  {
					resiR[j+posOff] += ceil(gsl_vector_get(rR,i)-0.5);
					cntR[j+posOff] += 1;
				  }
			}
		  
		  if(talk)
			cout<<"done."<<endl<<"\tWriting output..";
		  for (int i=0;i<chunkRange;i++)
			{
			  if(cntF[i] > 0)
				{
				  resiF[i] = resiF[i]/(double)cntF[i];
				  if(resiF[i] > 0) 
					{
					  for(int j=0;j<ceil(resiF[i]);j++)
						{
						  outf<<currChr<<"\t"<<lowPos + i-1<<"\t"<<lowPos+i+readLen-2<<"\tDUMMY\t"; // bed is zero-based, halfopen (ie.-1/-2)
						  outf<<resiF[i]<<"\t+"<<endl;
						}
					}
				}
			  
			  if(cntR[i] > 0)
				{
				  resiR[i] = resiR[i]/(double)cntR[i];
				  if(resiR[i] > 0)
					{
					  for(int j=0;j<ceil(resiR[i]);j++)
						{
						  outf<<currChr<<"\t"<<lowPos + i-readLen-2<<"\t"<<lowPos+i-1<<"\tDUMMY\t";
						  outf<<resiR[i]<<"\t-"<<endl;
						}
					}

				}
			}
		  if(talk)
			cout<<"done."<<endl;
		  lowPos = highPos+1;
		  delete[] resiF;
		  delete[] resiR;
		  delete[] cntF;
		  delete[] cntR;
		  gsl_matrix_free(XF);
		  gsl_vector_free(yF);
		  gsl_vector_free(rF);
		  gsl_vector_free(cF);
		  gsl_matrix_free(covF);

		  gsl_matrix_free(XR);
		  gsl_vector_free(yR);
		  gsl_vector_free(rR);
		  gsl_vector_free(cR);
		  gsl_matrix_free(covR);
		}
	  if(!talk)
		cout<<endl;
	}


  // close files. 
  inf.close();
  for (int i = 0;i<ctrlFiles.size();i++)  
	infc[i].close();
  outf.close();
  string statFname = "readStats.txt";
  bool writeStats = true;
  ofstream ofc;
  ofc.open(statFname.c_str(),ios::trunc);
  if (ofc.fail())
    {
      failmessage.clear();
      failmessage.append("ERROR: Output file \"");
      failmessage.append(statFname.c_str());
      failmessage.append("\" could not be created, skipping.\n");
      writeStats = false;
    }
  
  if(writeStats)
    {
	  ofc <<"Chip reads"<<endl<<"Name\t"<<"minCrd\t"<<"maxCrd\t"<<"F_counts\t"<<"R_counts\t"<<endl;
      for ( it=seqMapChip.begin() ; it != seqMapChip.end(); it++ )
		{
		  ofc << (*it).first << "\t" << (*it).second.minPos << "\t" << (*it).second.maxPos<<"\t";
		  ofc << (*it).second.countF << "\t" << (*it).second.countR;
		  ofc <<endl;
		}
	  for (int i = 0;i<ctrlFiles.size();i++)
		{
		  ofc<<ctrlFiles[i]<<endl;
		  ofc <<"Control reads"<<endl<<"Name\t"<<"minCrd\t"<<"maxCrd\t"<<"F_counts\t"<<"R_counts\t"<<endl;
		  for ( it=seqMapCtrls[i].begin() ; it != seqMapCtrls[i].end(); it++ )
			{
			  ofc << (*it).first << "\t" << (*it).second.minPos << "\t" << (*it).second.maxPos<<"\t";
			  ofc << (*it).second.countF << "\t" << (*it).second.countR;
			  ofc <<endl;
			}
		}
    }else{
    cerr<<failmessage.c_str()<<endl;
  }
  ofc.close();
  
  return(0);
}
Exemple #6
0
void 
test_longley ()
{     
  size_t i, j;
  {
    gsl_multifit_linear_workspace * work = 
      gsl_multifit_linear_alloc (longley_n, longley_p);

    gsl_matrix_view X = gsl_matrix_view_array (longley_x, longley_n, longley_p);
    gsl_vector_view y = gsl_vector_view_array (longley_y, longley_n);
    gsl_vector * c = gsl_vector_alloc (longley_p);
    gsl_vector * r = gsl_vector_alloc (longley_n);
    gsl_matrix * cov = gsl_matrix_alloc (longley_p, longley_p);
    gsl_vector_view diag;

    double chisq;

    double expected_c[7] = {  -3482258.63459582,
                              15.0618722713733,
                              -0.358191792925910E-01,
                              -2.02022980381683,
                              -1.03322686717359,
                              -0.511041056535807E-01,
                              1829.15146461355 };

    double expected_sd[7]  = {  890420.383607373,      
                                84.9149257747669,      
                                0.334910077722432E-01, 
                                0.488399681651699,     
                                0.214274163161675,     
                                0.226073200069370,     
                                455.478499142212 } ;  

    double expected_chisq = 836424.055505915;

    gsl_multifit_linear (&X.matrix, &y.vector, c, cov, &chisq, work);

    gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "longley gsl_fit_multilinear c0") ;
    gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "longley gsl_fit_multilinear c1") ;
    gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "longley gsl_fit_multilinear c2") ;
    gsl_test_rel (gsl_vector_get(c,3), expected_c[3], 1e-10, "longley gsl_fit_multilinear c3") ;
    gsl_test_rel (gsl_vector_get(c,4), expected_c[4], 1e-10, "longley gsl_fit_multilinear c4") ;
    gsl_test_rel (gsl_vector_get(c,5), expected_c[5], 1e-10, "longley gsl_fit_multilinear c5") ;
    gsl_test_rel (gsl_vector_get(c,6), expected_c[6], 1e-10, "longley gsl_fit_multilinear c6") ;

    diag = gsl_matrix_diagonal (cov);

    gsl_test_rel (gsl_vector_get(&diag.vector,0), pow(expected_sd[0],2.0), 1e-10, "longley gsl_fit_multilinear cov00") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,1), pow(expected_sd[1],2.0), 1e-10, "longley gsl_fit_multilinear cov11") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,2), pow(expected_sd[2],2.0), 1e-10, "longley gsl_fit_multilinear cov22") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,3), pow(expected_sd[3],2.0), 1e-10, "longley gsl_fit_multilinear cov33") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,4), pow(expected_sd[4],2.0), 1e-10, "longley gsl_fit_multilinear cov44") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,5), pow(expected_sd[5],2.0), 1e-10, "longley gsl_fit_multilinear cov55") ;
    gsl_test_rel (gsl_vector_get(&diag.vector,6), pow(expected_sd[6],2.0), 1e-10, "longley gsl_fit_multilinear cov66") ;

    gsl_test_rel (chisq, expected_chisq, 1e-10, "longley gsl_fit_multilinear chisq") ;

    gsl_multifit_linear_residuals(&X.matrix, &y.vector, c, r);
    gsl_blas_ddot(r, r, &chisq);
    gsl_test_rel (chisq, expected_chisq, 1e-10, "longley gsl_fit_multilinear residuals") ;

    gsl_vector_free(c);
    gsl_vector_free(r);
    gsl_matrix_free(cov);
    gsl_multifit_linear_free (work);
  }


  {
    gsl_multifit_linear_workspace * work = 
      gsl_multifit_linear_alloc (longley_n, longley_p);

    gsl_matrix_view X = gsl_matrix_view_array (longley_x, longley_n, longley_p);
    gsl_vector_view y = gsl_vector_view_array (longley_y, longley_n);
    gsl_vector * w = gsl_vector_alloc (longley_n);
    gsl_vector * c = gsl_vector_alloc (longley_p);
    gsl_vector * r = gsl_vector_alloc (longley_n);
    gsl_matrix * cov = gsl_matrix_alloc (longley_p, longley_p);

    double chisq;

    double expected_c[7] = {  -3482258.63459582,
                              15.0618722713733,
                              -0.358191792925910E-01,
                              -2.02022980381683,
                              -1.03322686717359,
                              -0.511041056535807E-01,
                              1829.15146461355 };

    double expected_cov[7][7] = { { 8531122.56783558,
-166.727799925578, 0.261873708176346, 3.91188317230983,
1.1285582054705, -0.889550869422687, -4362.58709870581},

{-166.727799925578, 0.0775861253030891, -1.98725210399982e-05,
-0.000247667096727256, -6.82911920718824e-05, 0.000136160797527761,
0.0775255245956248},

{0.261873708176346, -1.98725210399982e-05, 1.20690316701888e-08,
1.66429546772984e-07, 3.61843600487847e-08, -6.78805814483582e-08,
-0.00013158719037715},

{3.91188317230983, -0.000247667096727256, 1.66429546772984e-07,
2.56665052544717e-06, 6.96541409215597e-07, -9.00858307771567e-07,
-0.00197260370663974},

{1.1285582054705, -6.82911920718824e-05, 3.61843600487847e-08,
6.96541409215597e-07, 4.94032602583969e-07, -9.8469143760973e-08,
-0.000576921112208274},

{-0.889550869422687, 0.000136160797527761, -6.78805814483582e-08,
-9.00858307771567e-07, -9.8469143760973e-08, 5.49938542664952e-07,
0.000430074434198215},

{-4362.58709870581, 0.0775255245956248, -0.00013158719037715,
-0.00197260370663974, -0.000576921112208274, 0.000430074434198215,
2.23229587481535 }} ;

    double expected_chisq = 836424.055505915;

    gsl_vector_set_all (w, 1.0);

    gsl_multifit_wlinear (&X.matrix, w, &y.vector, c, cov, &chisq, work);

    gsl_test_rel (gsl_vector_get(c,0), expected_c[0], 1e-10, "longley gsl_fit_wmultilinear c0") ;
    gsl_test_rel (gsl_vector_get(c,1), expected_c[1], 1e-10, "longley gsl_fit_wmultilinear c1") ;
    gsl_test_rel (gsl_vector_get(c,2), expected_c[2], 1e-10, "longley gsl_fit_wmultilinear c2") ;
    gsl_test_rel (gsl_vector_get(c,3), expected_c[3], 1e-10, "longley gsl_fit_wmultilinear c3") ;
    gsl_test_rel (gsl_vector_get(c,4), expected_c[4], 1e-10, "longley gsl_fit_wmultilinear c4") ;
    gsl_test_rel (gsl_vector_get(c,5), expected_c[5], 1e-10, "longley gsl_fit_wmultilinear c5") ;
    gsl_test_rel (gsl_vector_get(c,6), expected_c[6], 1e-10, "longley gsl_fit_wmultilinear c6") ;

    for (i = 0; i < longley_p; i++) 
      {
        for (j = 0; j < longley_p; j++)
          {
            gsl_test_rel (gsl_matrix_get(cov,i,j), expected_cov[i][j], 1e-7, 
                          "longley gsl_fit_wmultilinear cov(%d,%d)", i, j) ;
          }
      }

    gsl_test_rel (chisq, expected_chisq, 1e-10, "longley gsl_fit_wmultilinear chisq") ;

    gsl_multifit_linear_residuals(&X.matrix, &y.vector, c, r);
    gsl_blas_ddot(r, r, &chisq);
    gsl_test_rel (chisq, expected_chisq, 1e-10, "longley gsl_fit_wmultilinear residuals") ;

    gsl_vector_free(w);
    gsl_vector_free(c);
    gsl_vector_free(r);
    gsl_matrix_free(cov);
    gsl_multifit_linear_free (work);
  }
}