Exemplo n.º 1
0
void base_reg::base_score(const mematrix<double>& resid,
                          const int model,
                          const int interaction,
                          const int ngpreds,
                          const masked_matrix& invvarmatrix,
                          const int nullmodel) {
    mematrix<double> oX = reg_data.extract_genotypes();
    mematrix<double> X = apply_model(oX, model, interaction, ngpreds,
            reg_data.is_interaction_excluded, false, nullmodel);
    beta.reinit(X.ncol, 1);
    sebeta.reinit(X.ncol, 1);
    int length_beta = X.ncol;
    double N = static_cast<double>(resid.nrow);
    mematrix<double> tX = transpose(X);
    if (invvarmatrix.length_of_mask != 0){
        tX = tX * invvarmatrix.masked_data;
    }

    mematrix<double> u = tX * resid;
    mematrix<double> v = tX * X;
    mematrix<double> csum = column_sum(X);
    csum = transpose(csum) * csum;
    csum = csum * (1. / N);
    v = v - csum;
    // use cholesky to invert

    LDLT <MatrixXd> Ch = LDLT < MatrixXd > (v.data.selfadjointView<Lower>());
    // before was
    // mematrix<double> v_i = invert(v);
    beta.data = Ch.solve(v.data.adjoint() * u.data);
    //TODO(maartenk): set size of v_i directly or remove mematrix class
    mematrix<double> v_i = v;
    v_i.data = Ch.solve(MatrixXd(length_beta, length_beta).
                                    Identity(length_beta, length_beta));

    double sr = 0.;
    double srr = 0.;
    for (int i = 0; i < resid.nrow; i++)
    {
        sr += resid[i];
        srr += resid[i] * resid[i];
    }
    double mean_r = sr / N;
    double sigma2_internal = (srr - N * mean_r * mean_r) / (N - beta.nrow);
    for (int i = 0; i < beta.nrow; i++)
        sebeta[i] = sqrt(v_i.get(i, i) * sigma2_internal);

    mematrix<double> chi2 = transpose(u) * v_i * u;
    chi2 = chi2 * (1. / sigma2_internal);
    chi2_score = chi2[0];
}
Exemplo n.º 2
0
/**
 * \brief Perform least squares regression
 *
 * Basically we solve the following linear system here:
 * \f[
 * \mathbf{y} = \mathbf{X} \mathbf{\beta}
 * \f]
 *
 * This function also estimates \f$\sigma^2\f$, the variance of the
 * error term. An estimator of \f$\sigma^2\f$ is given by:
 * \f[
 * \hat\sigma^2 = \frac{1}{n-p}||\mathbf{y} - \mathbf{X} \mathbf{\beta} ||^2,
 * \f]
 * with \f$n\f$ the number of rows of \f$\mathbf{X}\f$ and \f$p\f$
 * the number of columns of \f$\mathbf{X}\f$.
 *
 * @param X The design matrix
 * @param Ch
 */
void linear_reg::LeastSquaredRegression(const mematrix<double>& X,
                                        LDLT<MatrixXd>& Ch) {
    int m = X.ncol;
    MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate(
            X.data.adjoint());
    Ch = LDLT < MatrixXd > (txx.selfadjointView<Lower>());
    beta.data = Ch.solve(X.data.adjoint() * reg_data.Y.data);
    sigma2 = (reg_data.Y.data - (X.data * beta.data)).squaredNorm();
}
Exemplo n.º 3
0
bool pdsolve(const SQMATTP& M,const VCTTP& b,VCTTP& res,double* logDet)
{
	/* static */ LDLT<SQMATTP> MSr;

	MSr = M.ldlt();      // Maybe replace this by a better rank-reaveling criteron
	if (MSr.info()!=Success) return false;
	res = MSr.solve(b);
	if (logDet) {
		Diagonal<const SQMATTP> MSrdiag(MSr.vectorD());      
		*logDet = log(MSrdiag(0));
		for (int i=1;i<M.rows();i++) *logDet += log(MSrdiag(i));
	}
	return true;
}
Exemplo n.º 4
0
bool pdsolve(const SQMATTP& M,SQMATTP& MInv,double* logDet)
{
	/* static */ LDLT<SQMATTP> MSr;
	/* static */ SQMATTP Ip;
	/* static */ int Ipdim(0);
	int p(M.rows());

	if (Ipdim!=p) SetIdentity(Ip,p,&Ipdim);

	MSr = M.ldlt();      // Maybe replace this by a better rank-reaveling criteron
	if (MSr.info()!=Success) return false;
	MInv = MSr.solve(Ip);
	if (logDet) {
		Diagonal<const SQMATTP> MSrdiag(MSr.vectorD());      
		*logDet = log(MSrdiag(0));
		for (int i=1;i<M.rows();i++) *logDet += log(MSrdiag(i));
	}
	return true;
}
Exemplo n.º 5
0
Image smooth_detector(const Image& source, Interpolation level, int r) {
  Image output(source.rows(), source.columns(), 1, numeric_limits<float>::max());
  const MatrixXf reg_matrix = ComputeRegMatrix(level, r);
  const LDLT<MatrixXf> solver = (reg_matrix.transpose() * reg_matrix).ldlt();
  for (int pr = 0; pr <= source.rows() - r; ++pr) {
    for (int pc = 0; pc <= source.columns() - r; ++pc) {
      VectorXf dist = VectorXf::Zero(r * r);
      for (int ch = 0; ch < source.channels(); ++ch) {
        EigenImage y = ExtractPatch(source, r, pr, pc, ch);
        VectorXf reg_surf = solver.solve(reg_matrix.transpose() * y.asvector());
        dist += (reg_matrix * reg_surf - y.asvector()).cwiseAbs2();
      }
      dist = dist.cwiseSqrt();
      for (int row = pr; row < min(output.rows(), pr + r); ++row) {
        for (int col = pc; col < min(output.columns(), pc + r); ++col) {
          output.val(col, row) = min(output.val(col, row), dist((row - pr) * r + col - pc));
        }
      }
    }
  }
  return output;
}
Exemplo n.º 6
0
/**
 * \brief Solve the linear system in case the --mmscore option was
 * specified.
 *
 * Specifying the --mmscore command line option requires a file name
 * argument as well. This file should contain the inverse
 * variance-covariance matrix file. This function is run when Linear
 * regression is done in combination with the mmscore option. It
 * solves the 'mmscore' equation as specified in Eq. (5) in section
 * 8.2.1 of the ProbABEL manual:
 * \f[
 *    \hat{\beta}_g = (\mathbf{X}^T_g
 *    \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}
 *    \mathbf{X}_g)^{-1}
 *    \mathbf{X}^T_g \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}
 *    \mathbf{R}_{\hat{\beta}_x},
 * \f]
 * where \f$\mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}\f$ is the
 * inverse variance-covariance matrix, and
 * \f$\mathbf{R}_{\hat{\beta}_x}\f$ is the vector containing the
 * residuals obtained from the base regression model i.e. the
 * phenotype. In this function, the phenotype is stored in the
 * variable \c Y.
 *
 * @param X The design matrix \f$X_g\f$. \c X should only contain the
 * parts involving genotype data (including any interactions involving
 * a genetic term), all other covariates should have been regressed out
 * before running ProbABEL.
 * @param W_masked The inverse variance-covariance matrix
 * \f$\mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}\f$.
 * @param Ch Reference to the LDLT Cholesky decomposition of the
 * matrix to be inverted to get \f$\hat\beta_g\f$:
 * \f[
 * \mathbf{X}^T_g
 *    \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}
 *    \mathbf{X}_g.
 * \f]
 * On return this variable contains said matrix.
 */
void linear_reg::mmscore_regression(const mematrix<double>& X,
                                    const masked_matrix& W_masked,
                                    LDLT<MatrixXd>& Ch) {
    VectorXd Y = reg_data.Y.data.col(0);
    /*
     in ProbABEL <0.50 this calculation was performed like t(X)*W
     This changed to W*X since this is better vectorized since the left hand
     side has more rows: this introduces an additional transpose, but can be
     neglected compared to the speedup this brings(about a factor 2 for the
     palinear with 1 predictor)

     This function solves the system
        (X^T W X) beta = X^T W Y.
     Since W is symmetric (WX)^T = X^T W, so this can be rewritten as
        (WX)^T X beta = (WX)^T Y,
     which is solved using LDLT Cholesky decomposition.
     */
    MatrixXd WX = W_masked.masked_data->data * X.data;
    MatrixXd XWT = WX.transpose();
    Ch = LDLT<MatrixXd>(XWT * X.data);
    VectorXd beta_vec = Ch.solve(XWT * Y);
    sigma2 = (Y - WX * beta_vec).squaredNorm();
    beta.data = beta_vec;
}
Exemplo n.º 7
0
/**
 * \brief Estimate the parameters for linear regression.
 *
 * @param verbose Turns verbose printing of various matrices on if
 * non-zero.
 * @param model The number of the genetic model (e.g. additive,
 * recessive, ...) that is to be applied by the apply_model() function.
 * @param interaction
 * @param ngpreds Number of genomic predictors (1 for dosages, 2 for
 * probabilities).
 * @param invvarmatrixin
 * @param robust If non-zero calculate robust standard errors.
 * @param nullmodel If non-zero calculate the null model (excluding
 * SNP information).
 */
void linear_reg::estimate(const int verbose,
                          const int model,
                          const int interaction,
                          const int ngpreds,
                          masked_matrix& invvarmatrixin,
                          const int robust,
                          const int nullmodel) {
    // suda interaction parameter
    // model should come here
    //regdata rdata = rdatain.get_unmasked_data();

    if (verbose)
    {
        cout << reg_data.is_interaction_excluded
                << " <-rdata.is_interaction_excluded\n";
        // std::cout << "invvarmatrix:\n";
        // invvarmatrixin.masked_data->print();
        std::cout << "rdata.X:\n";
        reg_data.X.print();
    }

    mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds,
            reg_data.is_interaction_excluded, false, nullmodel);
    if (verbose)
    {
        std::cout << "X:\n";
        X.print();
        std::cout << "Y:\n";
        reg_data.Y.print();
    }

    int length_beta = X.ncol;
    beta.reinit(length_beta, 1);
    sebeta.reinit(length_beta, 1);
    //Han Chen
    if (length_beta > 1)
    {
        if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
        {
            covariance.reinit(length_beta - 2, 1);
        }
        else
        {
            covariance.reinit(length_beta - 1, 1);
        }
    }

    double sigma2_internal;

    LDLT <MatrixXd> Ch;
    if (invvarmatrixin.length_of_mask != 0)
    {
        //retrieve masked data W
        invvarmatrixin.update_mask(reg_data.masked_data);
        mmscore_regression(X, invvarmatrixin, Ch);
        double N = X.nrow;
        //sigma2_internal = sigma2 / (N - static_cast<double>(length_beta));
        // Ugly fix to the fact that if we do mmscore, sigma2 is already
        //  in the matrix...
        //      YSA, 2009.07.20
        sigma2_internal = 1.0;
        sigma2 /= N;
    }
    else  // NO mm-score regression : normal least square regression
    {
        LeastSquaredRegression(X, Ch);
        double N = static_cast<double>(X.nrow);
        double P = static_cast<double>(length_beta);
        sigma2_internal = sigma2 / (N - P);
        sigma2 /= N;
    }
    /*
     loglik = 0.;
     double ss=0;
     for (int i=0;i<rdata.nids;i++) {
     double resid = rdata.Y[i] - beta.get(0,0); // intercept
     for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j);
     // residuals[i] = resid;
     ss += resid*resid;
     }
     sigma2 = ss/N;
     */
    //cout << "estimate " << rdata.nids << "\n";
    //(rdata.X).print();
    //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " ";
    //cout << endl;
    logLikelihood(X);

    MatrixXd tXX_inv = Ch.solve(MatrixXd(length_beta, length_beta).
                                Identity(length_beta, length_beta));
    mematrix<double> robust_sigma2(X.ncol, X.ncol);

    int offset = X.ncol- 1;
     //if additive and interaction and 2 predictors and more than 2 betas
     if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2){
         offset = X.ncol - 2;
     }

    if (robust)
    {
        RobustSEandCovariance(X, robust_sigma2, tXX_inv, offset);
    }
    else
    {
        PlainSEandCovariance(sigma2_internal, tXX_inv, offset);
    }
}