void base_reg::base_score(const mematrix<double>& resid, const int model, const int interaction, const int ngpreds, const masked_matrix& invvarmatrix, const int nullmodel) { mematrix<double> oX = reg_data.extract_genotypes(); mematrix<double> X = apply_model(oX, model, interaction, ngpreds, reg_data.is_interaction_excluded, false, nullmodel); beta.reinit(X.ncol, 1); sebeta.reinit(X.ncol, 1); int length_beta = X.ncol; double N = static_cast<double>(resid.nrow); mematrix<double> tX = transpose(X); if (invvarmatrix.length_of_mask != 0){ tX = tX * invvarmatrix.masked_data; } mematrix<double> u = tX * resid; mematrix<double> v = tX * X; mematrix<double> csum = column_sum(X); csum = transpose(csum) * csum; csum = csum * (1. / N); v = v - csum; // use cholesky to invert LDLT <MatrixXd> Ch = LDLT < MatrixXd > (v.data.selfadjointView<Lower>()); // before was // mematrix<double> v_i = invert(v); beta.data = Ch.solve(v.data.adjoint() * u.data); //TODO(maartenk): set size of v_i directly or remove mematrix class mematrix<double> v_i = v; v_i.data = Ch.solve(MatrixXd(length_beta, length_beta). Identity(length_beta, length_beta)); double sr = 0.; double srr = 0.; for (int i = 0; i < resid.nrow; i++) { sr += resid[i]; srr += resid[i] * resid[i]; } double mean_r = sr / N; double sigma2_internal = (srr - N * mean_r * mean_r) / (N - beta.nrow); for (int i = 0; i < beta.nrow; i++) sebeta[i] = sqrt(v_i.get(i, i) * sigma2_internal); mematrix<double> chi2 = transpose(u) * v_i * u; chi2 = chi2 * (1. / sigma2_internal); chi2_score = chi2[0]; }
/** * \brief Perform least squares regression * * Basically we solve the following linear system here: * \f[ * \mathbf{y} = \mathbf{X} \mathbf{\beta} * \f] * * This function also estimates \f$\sigma^2\f$, the variance of the * error term. An estimator of \f$\sigma^2\f$ is given by: * \f[ * \hat\sigma^2 = \frac{1}{n-p}||\mathbf{y} - \mathbf{X} \mathbf{\beta} ||^2, * \f] * with \f$n\f$ the number of rows of \f$\mathbf{X}\f$ and \f$p\f$ * the number of columns of \f$\mathbf{X}\f$. * * @param X The design matrix * @param Ch */ void linear_reg::LeastSquaredRegression(const mematrix<double>& X, LDLT<MatrixXd>& Ch) { int m = X.ncol; MatrixXd txx = MatrixXd(m, m).setZero().selfadjointView<Lower>().rankUpdate( X.data.adjoint()); Ch = LDLT < MatrixXd > (txx.selfadjointView<Lower>()); beta.data = Ch.solve(X.data.adjoint() * reg_data.Y.data); sigma2 = (reg_data.Y.data - (X.data * beta.data)).squaredNorm(); }
bool pdsolve(const SQMATTP& M,const VCTTP& b,VCTTP& res,double* logDet) { /* static */ LDLT<SQMATTP> MSr; MSr = M.ldlt(); // Maybe replace this by a better rank-reaveling criteron if (MSr.info()!=Success) return false; res = MSr.solve(b); if (logDet) { Diagonal<const SQMATTP> MSrdiag(MSr.vectorD()); *logDet = log(MSrdiag(0)); for (int i=1;i<M.rows();i++) *logDet += log(MSrdiag(i)); } return true; }
bool pdsolve(const SQMATTP& M,SQMATTP& MInv,double* logDet) { /* static */ LDLT<SQMATTP> MSr; /* static */ SQMATTP Ip; /* static */ int Ipdim(0); int p(M.rows()); if (Ipdim!=p) SetIdentity(Ip,p,&Ipdim); MSr = M.ldlt(); // Maybe replace this by a better rank-reaveling criteron if (MSr.info()!=Success) return false; MInv = MSr.solve(Ip); if (logDet) { Diagonal<const SQMATTP> MSrdiag(MSr.vectorD()); *logDet = log(MSrdiag(0)); for (int i=1;i<M.rows();i++) *logDet += log(MSrdiag(i)); } return true; }
Image smooth_detector(const Image& source, Interpolation level, int r) { Image output(source.rows(), source.columns(), 1, numeric_limits<float>::max()); const MatrixXf reg_matrix = ComputeRegMatrix(level, r); const LDLT<MatrixXf> solver = (reg_matrix.transpose() * reg_matrix).ldlt(); for (int pr = 0; pr <= source.rows() - r; ++pr) { for (int pc = 0; pc <= source.columns() - r; ++pc) { VectorXf dist = VectorXf::Zero(r * r); for (int ch = 0; ch < source.channels(); ++ch) { EigenImage y = ExtractPatch(source, r, pr, pc, ch); VectorXf reg_surf = solver.solve(reg_matrix.transpose() * y.asvector()); dist += (reg_matrix * reg_surf - y.asvector()).cwiseAbs2(); } dist = dist.cwiseSqrt(); for (int row = pr; row < min(output.rows(), pr + r); ++row) { for (int col = pc; col < min(output.columns(), pc + r); ++col) { output.val(col, row) = min(output.val(col, row), dist((row - pr) * r + col - pc)); } } } } return output; }
/** * \brief Solve the linear system in case the --mmscore option was * specified. * * Specifying the --mmscore command line option requires a file name * argument as well. This file should contain the inverse * variance-covariance matrix file. This function is run when Linear * regression is done in combination with the mmscore option. It * solves the 'mmscore' equation as specified in Eq. (5) in section * 8.2.1 of the ProbABEL manual: * \f[ * \hat{\beta}_g = (\mathbf{X}^T_g * \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2} * \mathbf{X}_g)^{-1} * \mathbf{X}^T_g \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2} * \mathbf{R}_{\hat{\beta}_x}, * \f] * where \f$\mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}\f$ is the * inverse variance-covariance matrix, and * \f$\mathbf{R}_{\hat{\beta}_x}\f$ is the vector containing the * residuals obtained from the base regression model i.e. the * phenotype. In this function, the phenotype is stored in the * variable \c Y. * * @param X The design matrix \f$X_g\f$. \c X should only contain the * parts involving genotype data (including any interactions involving * a genetic term), all other covariates should have been regressed out * before running ProbABEL. * @param W_masked The inverse variance-covariance matrix * \f$\mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2}\f$. * @param Ch Reference to the LDLT Cholesky decomposition of the * matrix to be inverted to get \f$\hat\beta_g\f$: * \f[ * \mathbf{X}^T_g * \mathbf{V}^{-1}_{\hat{\sigma}^2,\hat{h}^2} * \mathbf{X}_g. * \f] * On return this variable contains said matrix. */ void linear_reg::mmscore_regression(const mematrix<double>& X, const masked_matrix& W_masked, LDLT<MatrixXd>& Ch) { VectorXd Y = reg_data.Y.data.col(0); /* in ProbABEL <0.50 this calculation was performed like t(X)*W This changed to W*X since this is better vectorized since the left hand side has more rows: this introduces an additional transpose, but can be neglected compared to the speedup this brings(about a factor 2 for the palinear with 1 predictor) This function solves the system (X^T W X) beta = X^T W Y. Since W is symmetric (WX)^T = X^T W, so this can be rewritten as (WX)^T X beta = (WX)^T Y, which is solved using LDLT Cholesky decomposition. */ MatrixXd WX = W_masked.masked_data->data * X.data; MatrixXd XWT = WX.transpose(); Ch = LDLT<MatrixXd>(XWT * X.data); VectorXd beta_vec = Ch.solve(XWT * Y); sigma2 = (Y - WX * beta_vec).squaredNorm(); beta.data = beta_vec; }
/** * \brief Estimate the parameters for linear regression. * * @param verbose Turns verbose printing of various matrices on if * non-zero. * @param model The number of the genetic model (e.g. additive, * recessive, ...) that is to be applied by the apply_model() function. * @param interaction * @param ngpreds Number of genomic predictors (1 for dosages, 2 for * probabilities). * @param invvarmatrixin * @param robust If non-zero calculate robust standard errors. * @param nullmodel If non-zero calculate the null model (excluding * SNP information). */ void linear_reg::estimate(const int verbose, const int model, const int interaction, const int ngpreds, masked_matrix& invvarmatrixin, const int robust, const int nullmodel) { // suda interaction parameter // model should come here //regdata rdata = rdatain.get_unmasked_data(); if (verbose) { cout << reg_data.is_interaction_excluded << " <-rdata.is_interaction_excluded\n"; // std::cout << "invvarmatrix:\n"; // invvarmatrixin.masked_data->print(); std::cout << "rdata.X:\n"; reg_data.X.print(); } mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds, reg_data.is_interaction_excluded, false, nullmodel); if (verbose) { std::cout << "X:\n"; X.print(); std::cout << "Y:\n"; reg_data.Y.print(); } int length_beta = X.ncol; beta.reinit(length_beta, 1); sebeta.reinit(length_beta, 1); //Han Chen if (length_beta > 1) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { covariance.reinit(length_beta - 2, 1); } else { covariance.reinit(length_beta - 1, 1); } } double sigma2_internal; LDLT <MatrixXd> Ch; if (invvarmatrixin.length_of_mask != 0) { //retrieve masked data W invvarmatrixin.update_mask(reg_data.masked_data); mmscore_regression(X, invvarmatrixin, Ch); double N = X.nrow; //sigma2_internal = sigma2 / (N - static_cast<double>(length_beta)); // Ugly fix to the fact that if we do mmscore, sigma2 is already // in the matrix... // YSA, 2009.07.20 sigma2_internal = 1.0; sigma2 /= N; } else // NO mm-score regression : normal least square regression { LeastSquaredRegression(X, Ch); double N = static_cast<double>(X.nrow); double P = static_cast<double>(length_beta); sigma2_internal = sigma2 / (N - P); sigma2 /= N; } /* loglik = 0.; double ss=0; for (int i=0;i<rdata.nids;i++) { double resid = rdata.Y[i] - beta.get(0,0); // intercept for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j); // residuals[i] = resid; ss += resid*resid; } sigma2 = ss/N; */ //cout << "estimate " << rdata.nids << "\n"; //(rdata.X).print(); //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " "; //cout << endl; logLikelihood(X); MatrixXd tXX_inv = Ch.solve(MatrixXd(length_beta, length_beta). Identity(length_beta, length_beta)); mematrix<double> robust_sigma2(X.ncol, X.ncol); int offset = X.ncol- 1; //if additive and interaction and 2 predictors and more than 2 betas if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2){ offset = X.ncol - 2; } if (robust) { RobustSEandCovariance(X, robust_sigma2, tXX_inv, offset); } else { PlainSEandCovariance(sigma2_internal, tXX_inv, offset); } }