void linear_reg::estimate(regdata& rdatain, int verbose, double tol_chol, int model, int interaction, int ngpreds, masked_matrix& invvarmatrixin, int robust, int nullmodel) { //suda ineraction parameter // model should come here regdata rdata = rdatain.get_unmasked_data(); if (invvarmatrixin.length_of_mask != 0) { invvarmatrixin.update_mask(rdatain.masked_data); // invvarmatrixin.masked_data->print(); } if (verbose) { cout << rdata.is_interaction_excluded << " <-irdata.is_interaction_excluded\n"; // std::cout << "invvarmatrix:\n"; // invvarmatrixin.masked_data->print(); std::cout << "rdata.X:\n"; rdata.X.print(); } mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds, rdata.is_interaction_excluded, false, nullmodel); if (verbose) { std::cout << "X:\n"; X.print(); std::cout << "Y:\n"; rdata.Y.print(); } int length_beta = X.ncol; beta.reinit(length_beta, 1); sebeta.reinit(length_beta, 1); //Han Chen if (length_beta > 1) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { covariance.reinit(length_beta - 2, 1); } else { covariance.reinit(length_beta - 1, 1); } } //Oct 26, 2009 mematrix<double> tX = transpose(X); if (invvarmatrixin.length_of_mask != 0) { tX = tX * invvarmatrixin.masked_data; //!check if quicker //tX = productXbySymM(tX,invvarmatrix); // = invvarmatrix*X; // std::cout<<"new tX.nrow="<<X.nrow<<" tX.ncol="<<X.ncol<<"\n"; } mematrix<double> tXX = tX * X; double N = X.nrow; #if EIGEN_COMMENTEDOUT MatrixXd Xeigen = X.data; MatrixXd tXeigen = Xeigen.transpose(); MatrixXd tXXeigen = tXeigen * Xeigen; VectorXd Yeigen = rdata.Y.data; VectorXd tXYeigen = tXeigen * Yeigen; // Solve X^T * X * beta = X^T * Y for beta: VectorXd betaeigen = tXXeigen.fullPivLu().solve(tXYeigen); beta.data = betaeigen; if (verbose) { std::cout << setprecision(9) << "Xeigen:\n" << Xeigen << endl; std::cout << setprecision(9) << "tX:\n" << tXeigen << endl; std::cout << setprecision(9) << "tXX:\n" << tXXeigen << endl; std::cout << setprecision(9) << "tXY:\n" << tXYeigen << endl; std::cout << setprecision(9) << "beta:\n"<< betaeigen << endl; printf("----\n"); printf("beta[0] = %e\n", betaeigen.data()[0]); printf("----\n"); // (beta).print(); double relative_error = (tXXeigen * betaeigen - tXYeigen).norm() / tXYeigen.norm(); // norm() is L2 norm cout << "The relative error is:\n" << relative_error << endl; } // This one is needed later on in this function mematrix<double> tXX_i = invert(tXX); #else // // use cholesky to invert // mematrix<double> tXX_i = tXX; cholesky2_mm(tXX_i, tol_chol); chinv2_mm(tXX_i); // before was // mematrix<double> tXX_i = invert(tXX); mematrix<double> tXY = tX * (rdata.Y); beta = tXX_i * tXY; if (verbose) { std::cout << "tX:\n"; tX.print(); std::cout << "tXX:\n"; tXX.print(); std::cout << "chole tXX:\n"; tXX_i.print(); std::cout << "tXX-1:\n"; tXX_i.print(); std::cout << "tXY:\n"; tXY.print(); std::cout << "beta:\n"; (beta).print(); } #endif // now compute residual variance sigma2 = 0.; mematrix<double> ttX = transpose(tX); mematrix<double> sigma2_matrix = rdata.Y; mematrix<double> sigma2_matrix1 = ttX * beta; // std::cout << "sigma2_matrix\n"; // sigma2_matrix.print(); // // std::cout << "sigma2_matrix1\n"; // sigma2_matrix1.print(); sigma2_matrix = sigma2_matrix - sigma2_matrix1; // std::cout << "sigma2_matrix\n"; // sigma2_matrix.print(); static double val; // std::cout << "sigma2_matrix.nrow=" << sigma2_matrix.nrow // << "sigma2_matrix.ncol" << sigma2_matrix.ncol // <<"\n"; for (int i = 0; i < sigma2_matrix.nrow; i++) { val = sigma2_matrix.get(i, 0); // std::cout << "val = " << val << "\n"; sigma2 += val * val; // std::cout << "sigma2+= " << sigma2 << "\n"; } double sigma2_internal = sigma2 / (N - static_cast<double>(length_beta)); // now compute residual variance // sigma2 = 0.; // for (int i =0;i<(rdata.Y).nrow;i++) // sigma2 += ((rdata.Y).get(i,0))*((rdata.Y).get(i,0)); // for (int i=0;i<length_beta;i++) // sigma2 -= 2. * (beta.get(i,0)) * tXY.get(i,0); // for (int i=0;i<(length_beta);i++) // for (int j=0;j<(length_beta);j++) // sigma2 += (beta.get(i,0)) * (beta.get(j,0)) * tXX.get(i,j); // std::cout<<"sigma2="<<sigma2<<"\n"; // std::cout<<"sigma2_internal="<<sigma2_internal<<"\n"; // replaced for ML // sigma2_internal = sigma2/(N - double(length_beta) - 1); // std::cout << "sigma2/=N = "<< sigma2 << "\n"; sigma2 /= N; // std::cout<<"N="<<N<<", length_beta="<<length_beta<<"\n"; if (verbose) { std::cout << "sigma2 = " << sigma2 << "\n"; } /* loglik = 0.; double ss=0; for (int i=0;i<rdata.nids;i++) { double resid = rdata.Y[i] - beta.get(0,0); // intercept for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j); // residuals[i] = resid; ss += resid*resid; } sigma2 = ss/N; */ //cout << "estimate " << rdata.nids << "\n"; //(rdata.X).print(); //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " "; //cout << endl; loglik = 0.; double halfrecsig2 = .5 / sigma2; for (int i = 0; i < rdata.nids; i++) { double resid = rdata.Y[i] - beta.get(0, 0); // intercept for (int j = 1; j < beta.nrow; j++) resid -= beta.get(j, 0) * X.get(i, j); residuals[i] = resid; loglik -= halfrecsig2 * resid * resid; } loglik -= static_cast<double>(rdata.nids) * log(sqrt(sigma2)); // cout << "estimate " << rdata.nids << "\n"; // // Ugly fix to the fact that if we do mmscore, sigma2 is already // in the matrix... // YSA, 2009.07.20 // //cout << "estimate 0\n"; if (invvarmatrixin.length_of_mask != 0) sigma2_internal = 1.0; mematrix<double> robust_sigma2(X.ncol, X.ncol); if (robust) { mematrix<double> XbyR = X; for (int i = 0; i < X.nrow; i++) for (int j = 0; j < X.ncol; j++) { double tmpval = XbyR.get(i, j) * residuals[i]; XbyR.put(tmpval, i, j); } XbyR = transpose(XbyR) * XbyR; robust_sigma2 = tXX_i * XbyR; robust_sigma2 = robust_sigma2 * tXX_i; } //cout << "estimate 0\n"; for (int i = 0; i < (length_beta); i++) { if (robust) { // cout << "estimate :robust\n"; double value = sqrt(robust_sigma2.get(i, i)); sebeta.put(value, i, 0); //Han Chen if (i > 0) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { if (i > 1) { double covval = robust_sigma2.get(i, i - 2); covariance.put(covval, i - 2, 0); } } else { double covval = robust_sigma2.get(i, i - 1); covariance.put(covval, i - 1, 0); } } //Oct 26, 2009 } else { // cout << "estimate :non-robust\n"; double value = sqrt(sigma2_internal * tXX_i.get(i, i)); sebeta.put(value, i, 0); //Han Chen if (i > 0) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { if (i > 1) { double covval = sigma2_internal * tXX_i.get(i, i - 2); covariance.put(covval, i - 2, 0); } } else { double covval = sigma2_internal * tXX_i.get(i, i - 1); covariance.put(covval, i - 1, 0); } } //Oct 26, 2009 } } //cout << "estimate E\n"; if (verbose) { std::cout << "sebeta (" << sebeta.nrow << "):\n"; sebeta.print(); } }
void logistic_reg::estimate(regdata& rdatain, int verbose, int maxiter, double eps, double tol_chol, int model, int interaction, int ngpreds, masked_matrix& invvarmatrixin, int robust, int nullmodel) { // In contrast to the 'linear' case 'invvarmatrix' contains the // inverse of correlation matrix (not the inverse of var-cov matrix) // h2.object$InvSigma * h.object2$h2an$estimate[length(h2$h2an$estimate)] // the inverse of var-cov matrix scaled by total variance regdata rdata = rdatain.get_unmasked_data(); // a lot of code duplicated between linear and logistic... // e.g. a piece below... mematrix<double> invvarmatrix; if (invvarmatrixin.length_of_mask != 0) { invvarmatrixin.update_mask(rdatain.masked_data); } mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds, rdata.is_interaction_excluded, false, nullmodel); int length_beta = X.ncol; beta.reinit(length_beta, 1); sebeta.reinit(length_beta, 1); //Han Chen if (length_beta > 1) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { covariance.reinit(length_beta - 2, 1); } else { covariance.reinit(length_beta - 1, 1); } } //Oct 26, 2009 mematrix<double> W((X).nrow, 1); mematrix<double> z((X).nrow, 1); mematrix<double> tXWX(length_beta, length_beta); mematrix<double> tXWX_i(length_beta, length_beta); mematrix<double> tXWz(length_beta, 1); double prev = (rdata.Y).column_mean(0); if (prev >= 1. || prev <= 0.) { std::cerr << "prevalence not within (0,1)\n"; exit(1); } for (int i = 0; i < length_beta; i++) beta.put(0., i, 0); beta.put(log(prev / (1. - prev)), 0, 0); mematrix<double> tX = transpose(X); if (invvarmatrix.nrow != 0 && invvarmatrix.ncol != 0) { //TODO(maarten):invvarmatix is symmetric:is there an more effective way? tX = tX * invvarmatrix; } /* std::cout << "\n"; std::cout << "X " << X.get(0,0) << " " << X.get(0,1) << " " << X.get(0,2) << "\n"; if (X.ncol==4) std::cout << "X[4] " << X.get(0,3) << "\n"; std::cout << "Inv " << invvarmatrix.get(0,0) << " " << invvarmatrix.get(0,1) << " " << invvarmatrix.get(0,2) << "\n"; if (X.ncol==4) std::cout << ,"X[4] " << invvarmatrix.get(0,3) << "\n"; std::cout << "tXInv " << tX.get(0,0) << " " << tX.get(1,0) << " " << tX.get(2,0) << "%f\n"; if (X.ncol==4) std::cout << "X[4] " << tX.get(3,0) << "\n"; */ niter = 0; double delta = 1.; double prevlik = 0.; while (niter < maxiter && delta > eps) { mematrix<double> eMu = (X) * beta; mematrix<double> eMu_us = eMu; for (int i = 0; i < eMu.nrow; i++) { double emu = eMu.get(i, 0); double value = emu; double zval = 0.; value = exp(value) / (1. + exp(value)); residuals[i] = (rdata.Y).get(i, 0) - value; eMu.put(value, i, 0); W.put(value * (1. - value), i, 0); zval = emu + (1. / (value * (1. - value))) * (((rdata.Y).get(i, 0)) - value); z.put(zval, i, 0); } mematrix<double> tmp = productMatrDiag(tX, W); if (verbose) { std::cout << "tXW:\n"; tmp.print(); } mematrix<double> tXWX = tmp * (X); //N = tXWX.get(0, 0); if (verbose) { std::cout << "tXWX:\n"; tXWX.print(); } // std::cout << "tXWX:\n";tXWX.print(); // // use cholesky to invert // // tXWX_i = tXWX; //cholesky2_mm(tXWX_i,tol_chol); //if (verbose) {std::cout << "chole tXWX:\n"; tXWX_i.print();} //std::cout << "chole tXWX:\n"; tXWX_i.print(); //chinv2_mm(tXWX_i); // was before tXWX_i = invert(tXWX); if (verbose) { std::cout << "tXWX-1:\n"; tXWX_i.print(); } // std::cout << "*** tXWX_i\n"; tXWX_i.print(); mematrix<double> tmp1 = productMatrDiag(tX, W); mematrix<double> tXWz = tmp1 * z; if (verbose) { std::cout << "tXWz:\n"; tXWz.print(); } beta = tXWX_i * tXWz; // std::cout << "*** res: " << residuals[0] << " " // << residuals[1] << " " << residuals[2] << "\n"; //mematrix<double> txres = tx * residuals; // std::cout << "*** txres\n";txres.print(); //beta = txwx_i* txres; if (verbose) { std::cout << "beta:\n"; beta.print(); } // std::cout << "beta:\n"; beta.print(); // compute likelihood prevlik = loglik; loglik = 0.; for (int i = 0; i < eMu.nrow; i++) loglik += rdata.Y[i] * eMu_us[i] - log(1. + exp(eMu_us[i])); delta = fabs(1. - (prevlik / loglik)); niter++; } sigma2 = 0.; mematrix<double> robust_sigma2(X.ncol, X.ncol); if (robust) { mematrix<double> XbyR = X; for (int i = 0; i < X.nrow; i++) for (int j = 0; j < X.ncol; j++) { double tmpval = XbyR.get(i, j) * residuals[i]; XbyR.put(tmpval, i, j); } XbyR = transpose(XbyR) * XbyR; robust_sigma2 = tXWX_i * XbyR; robust_sigma2 = robust_sigma2 * tXWX_i; } for (int i = 0; i < (length_beta); i++) { if (robust) { double value = sqrt(robust_sigma2.get(i, i)); sebeta.put(value, i, 0); //Han Chen if (i > 0) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { if (i > 1) { double covval = robust_sigma2.get(i, i - 2); covariance.put(covval, i - 2, 0); } } else { double covval = robust_sigma2.get(i, i - 1); covariance.put(covval, i - 1, 0); } } //Oct 26, 2009 } else { double value = sqrt(tXWX_i.get(i, i)); sebeta.put(value, i, 0); //Han Chen if (i > 0) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { if (i > 1) { double covval = tXWX_i.get(i, i - 2); covariance.put(covval, i - 2, 0); } } else { double covval = tXWX_i.get(i, i - 1); covariance.put(covval, i - 1, 0); } } //Oct 26, 2009 } } if (verbose) { std::cout << "sebeta (" << sebeta.nrow << "):\n"; sebeta.print(); } // std::cout << "beta (" << beta.nrow << "):\n"; beta.print(); // std::cout << "sebeta (" << sebeta.nrow << "):\n"; sebeta.print(); // exit(1); }
/** * \brief Estimate the parameters for linear regression. * * @param verbose Turns verbose printing of various matrices on if * non-zero. * @param model The number of the genetic model (e.g. additive, * recessive, ...) that is to be applied by the apply_model() function. * @param interaction * @param ngpreds Number of genomic predictors (1 for dosages, 2 for * probabilities). * @param invvarmatrixin * @param robust If non-zero calculate robust standard errors. * @param nullmodel If non-zero calculate the null model (excluding * SNP information). */ void linear_reg::estimate(const int verbose, const int model, const int interaction, const int ngpreds, masked_matrix& invvarmatrixin, const int robust, const int nullmodel) { // suda interaction parameter // model should come here //regdata rdata = rdatain.get_unmasked_data(); if (verbose) { cout << reg_data.is_interaction_excluded << " <-rdata.is_interaction_excluded\n"; // std::cout << "invvarmatrix:\n"; // invvarmatrixin.masked_data->print(); std::cout << "rdata.X:\n"; reg_data.X.print(); } mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds, reg_data.is_interaction_excluded, false, nullmodel); if (verbose) { std::cout << "X:\n"; X.print(); std::cout << "Y:\n"; reg_data.Y.print(); } int length_beta = X.ncol; beta.reinit(length_beta, 1); sebeta.reinit(length_beta, 1); //Han Chen if (length_beta > 1) { if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2) { covariance.reinit(length_beta - 2, 1); } else { covariance.reinit(length_beta - 1, 1); } } double sigma2_internal; LDLT <MatrixXd> Ch; if (invvarmatrixin.length_of_mask != 0) { //retrieve masked data W invvarmatrixin.update_mask(reg_data.masked_data); mmscore_regression(X, invvarmatrixin, Ch); double N = X.nrow; //sigma2_internal = sigma2 / (N - static_cast<double>(length_beta)); // Ugly fix to the fact that if we do mmscore, sigma2 is already // in the matrix... // YSA, 2009.07.20 sigma2_internal = 1.0; sigma2 /= N; } else // NO mm-score regression : normal least square regression { LeastSquaredRegression(X, Ch); double N = static_cast<double>(X.nrow); double P = static_cast<double>(length_beta); sigma2_internal = sigma2 / (N - P); sigma2 /= N; } /* loglik = 0.; double ss=0; for (int i=0;i<rdata.nids;i++) { double resid = rdata.Y[i] - beta.get(0,0); // intercept for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j); // residuals[i] = resid; ss += resid*resid; } sigma2 = ss/N; */ //cout << "estimate " << rdata.nids << "\n"; //(rdata.X).print(); //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " "; //cout << endl; logLikelihood(X); MatrixXd tXX_inv = Ch.solve(MatrixXd(length_beta, length_beta). Identity(length_beta, length_beta)); mematrix<double> robust_sigma2(X.ncol, X.ncol); int offset = X.ncol- 1; //if additive and interaction and 2 predictors and more than 2 betas if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2){ offset = X.ncol - 2; } if (robust) { RobustSEandCovariance(X, robust_sigma2, tXX_inv, offset); } else { PlainSEandCovariance(sigma2_internal, tXX_inv, offset); } }