示例#1
0
void base_reg::base_score(const mematrix<double>& resid,
                          const int model,
                          const int interaction,
                          const int ngpreds,
                          const masked_matrix& invvarmatrix,
                          const int nullmodel) {
    mematrix<double> oX = reg_data.extract_genotypes();
    mematrix<double> X = apply_model(oX, model, interaction, ngpreds,
            reg_data.is_interaction_excluded, false, nullmodel);
    beta.reinit(X.ncol, 1);
    sebeta.reinit(X.ncol, 1);
    int length_beta = X.ncol;
    double N = static_cast<double>(resid.nrow);
    mematrix<double> tX = transpose(X);
    if (invvarmatrix.length_of_mask != 0){
        tX = tX * invvarmatrix.masked_data;
    }

    mematrix<double> u = tX * resid;
    mematrix<double> v = tX * X;
    mematrix<double> csum = column_sum(X);
    csum = transpose(csum) * csum;
    csum = csum * (1. / N);
    v = v - csum;
    // use cholesky to invert

    LDLT <MatrixXd> Ch = LDLT < MatrixXd > (v.data.selfadjointView<Lower>());
    // before was
    // mematrix<double> v_i = invert(v);
    beta.data = Ch.solve(v.data.adjoint() * u.data);
    //TODO(maartenk): set size of v_i directly or remove mematrix class
    mematrix<double> v_i = v;
    v_i.data = Ch.solve(MatrixXd(length_beta, length_beta).
                                    Identity(length_beta, length_beta));

    double sr = 0.;
    double srr = 0.;
    for (int i = 0; i < resid.nrow; i++)
    {
        sr += resid[i];
        srr += resid[i] * resid[i];
    }
    double mean_r = sr / N;
    double sigma2_internal = (srr - N * mean_r * mean_r) / (N - beta.nrow);
    for (int i = 0; i < beta.nrow; i++)
        sebeta[i] = sqrt(v_i.get(i, i) * sigma2_internal);

    mematrix<double> chi2 = transpose(u) * v_i * u;
    chi2 = chi2 * (1. / sigma2_internal);
    chi2_score = chi2[0];
}
示例#2
0
bool pdsolve(const SQMATTP& M,const VCTTP& b,VCTTP& res,double* logDet)
{
	/* static */ LDLT<SQMATTP> MSr;

	MSr = M.ldlt();      // Maybe replace this by a better rank-reaveling criteron
	if (MSr.info()!=Success) return false;
	res = MSr.solve(b);
	if (logDet) {
		Diagonal<const SQMATTP> MSrdiag(MSr.vectorD());      
		*logDet = log(MSrdiag(0));
		for (int i=1;i<M.rows();i++) *logDet += log(MSrdiag(i));
	}
	return true;
}
示例#3
0
/**
 * \brief Perform least squares regression
 *
 * Basically we solve the following linear system here:
 * \f[
 * \mathbf{y} = \mathbf{X} \mathbf{\beta}
 * \f]
 *
 * This function also estimates \f$\sigma^2\f$, the variance of the
 * error term. An estimator of \f$\sigma^2\f$ is given by:
 * \f[
 * \hat\sigma^2 = \frac{1}{n-p}||\mathbf{y} - \mathbf{X} \mathbf{\beta} ||^2,
 * \f]
 * with \f$n\f$ the number of rows of \f$\mathbf{X}\f$ and \f$p\f$
 * the number of columns of \f$\mathbf{X}\f$.
 *
 * @param X The design matrix
 * @param Ch
 */
void linear_reg::LeastSquaredRegression(const mematrix<double>& X,
                                        LDLT<MatrixXd>& Ch) {
    int m = X.ncol;
    MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate(
            X.data.adjoint());
    Ch = LDLT < MatrixXd > (txx.selfadjointView<Lower>());
    beta.data = Ch.solve(X.data.adjoint() * reg_data.Y.data);
    sigma2 = (reg_data.Y.data - (X.data * beta.data)).squaredNorm();
}
示例#4
0
bool pdsolve(const SQMATTP& M,SQMATTP& MInv,double* logDet)
{
	/* static */ LDLT<SQMATTP> MSr;
	/* static */ SQMATTP Ip;
	/* static */ int Ipdim(0);
	int p(M.rows());

	if (Ipdim!=p) SetIdentity(Ip,p,&Ipdim);

	MSr = M.ldlt();      // Maybe replace this by a better rank-reaveling criteron
	if (MSr.info()!=Success) return false;
	MInv = MSr.solve(Ip);
	if (logDet) {
		Diagonal<const SQMATTP> MSrdiag(MSr.vectorD());      
		*logDet = log(MSrdiag(0));
		for (int i=1;i<M.rows();i++) *logDet += log(MSrdiag(i));
	}
	return true;
}
示例#5
0
Image smooth_detector(const Image& source, Interpolation level, int r) {
  Image output(source.rows(), source.columns(), 1, numeric_limits<float>::max());
  const MatrixXf reg_matrix = ComputeRegMatrix(level, r);
  const LDLT<MatrixXf> solver = (reg_matrix.transpose() * reg_matrix).ldlt();
  for (int pr = 0; pr <= source.rows() - r; ++pr) {
    for (int pc = 0; pc <= source.columns() - r; ++pc) {
      VectorXf dist = VectorXf::Zero(r * r);
      for (int ch = 0; ch < source.channels(); ++ch) {
        EigenImage y = ExtractPatch(source, r, pr, pc, ch);
        VectorXf reg_surf = solver.solve(reg_matrix.transpose() * y.asvector());
        dist += (reg_matrix * reg_surf - y.asvector()).cwiseAbs2();
      }
      dist = dist.cwiseSqrt();
      for (int row = pr; row < min(output.rows(), pr + r); ++row) {
        for (int col = pc; col < min(output.columns(), pc + r); ++col) {
          output.val(col, row) = min(output.val(col, row), dist((row - pr) * r + col - pc));
        }
      }
    }
  }
  return output;
}
示例#6
0
    inline bool
    check_pos_definite(const char* function,
                       const char* name,
                       const Eigen::Matrix<T_y, Dynamic, Dynamic>& y) {
      check_symmetric(function, name, y);
      check_positive_size(function, name, "rows", y.rows());

      if (y.rows() == 1 && !(y(0, 0) > CONSTRAINT_TOLERANCE))
        domain_error(function, name, y, "is not positive definite: ");

      using Eigen::LDLT;
      using Eigen::Matrix;
      using Eigen::Dynamic;
      LDLT< Matrix<double, Dynamic, Dynamic> > cholesky
        = value_of_rec(y).ldlt();
      if (cholesky.info() != Eigen::Success
          || !cholesky.isPositive()
          || (cholesky.vectorD().array() <= CONSTRAINT_TOLERANCE).any())
        domain_error(function, name, y, "is not positive definite:\n");
      check_not_nan(function, name, y);
      return true;
    }
示例#7
0
/**
 * \brief Solve the linear system in case the --mmscore option was
 * specified.
 *
 * Specifying the --mmscore command line option requires a file name
 * argument as well. This file should contain the inverse
 * variance-covariance matrix file. This function is run when Linear
 * regression is done in combination with the mmscore option. It
 * solves the 'mmscore' equation as specified in Eq. (5) in section
 * 8.2.1 of the ProbABEL manual:
 * \f[
 *    \hat{\beta}_g = (\mathbf{X}^T_g
 *    \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}
 *    \mathbf{X}_g)^{-1}
 *    \mathbf{X}^T_g \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}
 *    \mathbf{R}_{\hat{\beta}_x},
 * \f]
 * where \f$\mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}\f$ is the
 * inverse variance-covariance matrix, and
 * \f$\mathbf{R}_{\hat{\beta}_x}\f$ is the vector containing the
 * residuals obtained from the base regression model i.e. the
 * phenotype. In this function, the phenotype is stored in the
 * variable \c Y.
 *
 * @param X The design matrix \f$X_g\f$. \c X should only contain the
 * parts involving genotype data (including any interactions involving
 * a genetic term), all other covariates should have been regressed out
 * before running ProbABEL.
 * @param W_masked The inverse variance-covariance matrix
 * \f$\mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}\f$.
 * @param Ch Reference to the LDLT Cholesky decomposition of the
 * matrix to be inverted to get \f$\hat\beta_g\f$:
 * \f[
 * \mathbf{X}^T_g
 *    \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}
 *    \mathbf{X}_g.
 * \f]
 * On return this variable contains said matrix.
 */
void linear_reg::mmscore_regression(const mematrix<double>& X,
                                    const masked_matrix& W_masked,
                                    LDLT<MatrixXd>& Ch) {
    VectorXd Y = reg_data.Y.data.col(0);
    /*
     in ProbABEL <0.50 this calculation was performed like t(X)*W
     This changed to W*X since this is better vectorized since the left hand
     side has more rows: this introduces an additional transpose, but can be
     neglected compared to the speedup this brings(about a factor 2 for the
     palinear with 1 predictor)

     This function solves the system
        (X^T W X) beta = X^T W Y.
     Since W is symmetric (WX)^T = X^T W, so this can be rewritten as
        (WX)^T X beta = (WX)^T Y,
     which is solved using LDLT Cholesky decomposition.
     */
    MatrixXd WX = W_masked.masked_data->data * X.data;
    MatrixXd XWT = WX.transpose();
    Ch = LDLT<MatrixXd>(XWT * X.data);
    VectorXd beta_vec = Ch.solve(XWT * Y);
    sigma2 = (Y - WX * beta_vec).squaredNorm();
    beta.data = beta_vec;
}
示例#8
0
            int run(const log4cxx::LoggerPtr &logger, KernelEVD<Scalar> &builder) const {
                KQP_SCALAR_TYPEDEFS(Scalar);
                
                KQP_LOG_INFO_F(logger, "Kernel EVD with dense vectors and builder \"%s\" (pre-images = %d, linear combination = %d)", %KQP_DEMANGLE(builder) %max_preimages %max_lc);
                
                ScalarMatrix matrix(n,n);
                matrix.setConstant(0);
                
                // Construction
                for(int i = 0; i < nb_add; i++) {
                    
                    Scalar alpha = Eigen::internal::abs(Eigen::internal::random_impl<Scalar>::run()) + 1e-3;
                    
                    int k = (int)std::abs(Eigen::internal::random_impl<double>::run() * (double)(max_preimages-min_preimages)) + min_preimages;
                    int p = (int)std::abs(Eigen::internal::random_impl<double>::run() * (double)(max_lc-min_lc)) + min_lc;
                    KQP_LOG_INFO(logger, boost::format("Pre-images (%dx%d) and linear combination (%dx%d)") % n % k % k % p);
                    
                    // Generate a number of pre-images
                    ScalarMatrix m = ScalarMatrix::Random(n, k);
                    
                    // Generate the linear combination matrix
                    ScalarMatrix mA = ScalarMatrix::Random(k, p);
                    
                    matrix.template selfadjointView<Eigen::Lower>().rankUpdate(m * mA, alpha);
                    
                    
                    builder.add(alpha, FMatrixPtr(new Dense<Scalar>(m)), mA);
                }
                
                // Computing via EVD
                KQP_LOG_INFO(logger, "Computing an LDLT decomposition");
                
                typedef Eigen::LDLT<ScalarMatrix> LDLT;
                LDLT ldlt = matrix.template selfadjointView<Eigen::Lower>().ldlt();
                ScalarMatrix mL = ldlt.matrixL();
                mL = ldlt.transpositionsP().transpose() * mL;
                FMatrixPtr  mU(Dense<Scalar>::create(mL));
                Eigen::Matrix<Scalar,Dynamic,1> mU_d = ldlt.vectorD();
                
                
                
                
                // Comparing the results
                
                KQP_LOG_INFO(logger, "Retrieving the decomposition");
                
                auto kevd = builder.getDecomposition();
                
                ScalarAltMatrix mUY = Eigen::Identity<Scalar>(mL.rows(), mL.rows());
                
                KQP_LOG_DEBUG(logger, "=== Decomposition ===");
//                KQP_LOG_DEBUG(logger, "X = " << kevd.mX);
                KQP_LOG_DEBUG(logger, "Y = " << kevd.mY);
                KQP_LOG_DEBUG(logger, "D = " << kevd.mD);
                
                
                // Computing the difference between operators || U1 - U2 ||^2
                
                KQP_LOG_INFO(logger, "Comparing the decompositions");
                double error = KernelOperators<Scalar>::difference(kevd.fs, kevd.mX, kevd.mY, kevd.mD, mU, mUY, mU_d);
                
                KQP_LOG_INFO_F(logger, "Squared error is %e", %error);
                return error < tolerance ? 0 : 1;
            }
示例#9
0
/**
 * \brief Estimate the parameters for linear regression.
 *
 * @param verbose Turns verbose printing of various matrices on if
 * non-zero.
 * @param model The number of the genetic model (e.g. additive,
 * recessive, ...) that is to be applied by the apply_model() function.
 * @param interaction
 * @param ngpreds Number of genomic predictors (1 for dosages, 2 for
 * probabilities).
 * @param invvarmatrixin
 * @param robust If non-zero calculate robust standard errors.
 * @param nullmodel If non-zero calculate the null model (excluding
 * SNP information).
 */
void linear_reg::estimate(const int verbose,
                          const int model,
                          const int interaction,
                          const int ngpreds,
                          masked_matrix& invvarmatrixin,
                          const int robust,
                          const int nullmodel) {
    // suda interaction parameter
    // model should come here
    //regdata rdata = rdatain.get_unmasked_data();

    if (verbose)
    {
        cout << reg_data.is_interaction_excluded
                << " <-rdata.is_interaction_excluded\n";
        // std::cout << "invvarmatrix:\n";
        // invvarmatrixin.masked_data->print();
        std::cout << "rdata.X:\n";
        reg_data.X.print();
    }

    mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds,
            reg_data.is_interaction_excluded, false, nullmodel);
    if (verbose)
    {
        std::cout << "X:\n";
        X.print();
        std::cout << "Y:\n";
        reg_data.Y.print();
    }

    int length_beta = X.ncol;
    beta.reinit(length_beta, 1);
    sebeta.reinit(length_beta, 1);
    //Han Chen
    if (length_beta > 1)
    {
        if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
        {
            covariance.reinit(length_beta - 2, 1);
        }
        else
        {
            covariance.reinit(length_beta - 1, 1);
        }
    }

    double sigma2_internal;

    LDLT <MatrixXd> Ch;
    if (invvarmatrixin.length_of_mask != 0)
    {
        //retrieve masked data W
        invvarmatrixin.update_mask(reg_data.masked_data);
        mmscore_regression(X, invvarmatrixin, Ch);
        double N = X.nrow;
        //sigma2_internal = sigma2 / (N - static_cast<double>(length_beta));
        // Ugly fix to the fact that if we do mmscore, sigma2 is already
        //  in the matrix...
        //      YSA, 2009.07.20
        sigma2_internal = 1.0;
        sigma2 /= N;
    }
    else  // NO mm-score regression : normal least square regression
    {
        LeastSquaredRegression(X, Ch);
        double N = static_cast<double>(X.nrow);
        double P = static_cast<double>(length_beta);
        sigma2_internal = sigma2 / (N - P);
        sigma2 /= N;
    }
    /*
     loglik = 0.;
     double ss=0;
     for (int i=0;i<rdata.nids;i++) {
     double resid = rdata.Y[i] - beta.get(0,0); // intercept
     for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j);
     // residuals[i] = resid;
     ss += resid*resid;
     }
     sigma2 = ss/N;
     */
    //cout << "estimate " << rdata.nids << "\n";
    //(rdata.X).print();
    //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " ";
    //cout << endl;
    logLikelihood(X);

    MatrixXd tXX_inv = Ch.solve(MatrixXd(length_beta, length_beta).
                                Identity(length_beta, length_beta));
    mematrix<double> robust_sigma2(X.ncol, X.ncol);

    int offset = X.ncol- 1;
     //if additive and interaction and 2 predictors and more than 2 betas
     if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2){
         offset = X.ncol - 2;
     }

    if (robust)
    {
        RobustSEandCovariance(X, robust_sigma2, tXX_inv, offset);
    }
    else
    {
        PlainSEandCovariance(sigma2_internal, tXX_inv, offset);
    }
}