Beispiel #1
0
void linear_reg::estimate(regdata& rdatain, int verbose, double tol_chol,
                          int model, int interaction, int ngpreds,
                          masked_matrix& invvarmatrixin, int robust,
                          int nullmodel)
{
    //suda ineraction parameter
    // model should come here
    regdata rdata = rdatain.get_unmasked_data();
    if (invvarmatrixin.length_of_mask != 0)
    {
        invvarmatrixin.update_mask(rdatain.masked_data);
        //  invvarmatrixin.masked_data->print();
    }
    if (verbose)
    {
        cout << rdata.is_interaction_excluded
                << " <-irdata.is_interaction_excluded\n";
        // std::cout << "invvarmatrix:\n";
        // invvarmatrixin.masked_data->print();
        std::cout << "rdata.X:\n";
        rdata.X.print();
    }
    mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
                                     rdata.is_interaction_excluded, false,
                                     nullmodel);
    if (verbose)
    {
        std::cout << "X:\n";
        X.print();
        std::cout << "Y:\n";
        rdata.Y.print();
    }
    int length_beta = X.ncol;
    beta.reinit(length_beta, 1);
    sebeta.reinit(length_beta, 1);
    //Han Chen
    if (length_beta > 1)
    {
        if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
        {
            covariance.reinit(length_beta - 2, 1);
        }
        else
        {
            covariance.reinit(length_beta - 1, 1);
        }
    }

    //Oct 26, 2009
    mematrix<double> tX = transpose(X);
    if (invvarmatrixin.length_of_mask != 0)
    {
        tX = tX * invvarmatrixin.masked_data;
        //!check if quicker
        //tX = productXbySymM(tX,invvarmatrix);
        // = invvarmatrix*X;
        // std::cout<<"new tX.nrow="<<X.nrow<<" tX.ncol="<<X.ncol<<"\n";
    }

    mematrix<double> tXX = tX * X;
    double N = X.nrow;

#if EIGEN_COMMENTEDOUT
    MatrixXd Xeigen    = X.data;
    MatrixXd tXeigen   = Xeigen.transpose();
    MatrixXd tXXeigen  = tXeigen * Xeigen;
    VectorXd Yeigen    = rdata.Y.data;
    VectorXd tXYeigen  = tXeigen * Yeigen;
    // Solve X^T * X * beta = X^T * Y for beta:
    VectorXd betaeigen = tXXeigen.fullPivLu().solve(tXYeigen);
    beta.data = betaeigen;

    if (verbose)
    {
        std::cout << setprecision(9) << "Xeigen:\n"  << Xeigen  << endl;
        std::cout << setprecision(9) << "tX:\n"  << tXeigen  << endl;
        std::cout << setprecision(9) << "tXX:\n" << tXXeigen << endl;
        std::cout << setprecision(9) << "tXY:\n" << tXYeigen << endl;
        std::cout << setprecision(9) << "beta:\n"<< betaeigen << endl;
        printf("----\n");
        printf("beta[0] = %e\n", betaeigen.data()[0]);
        printf("----\n");
//        (beta).print();
        double relative_error = (tXXeigen * betaeigen - tXYeigen).norm() / tXYeigen.norm(); // norm() is L2 norm
        cout << "The relative error is:\n" << relative_error << endl;

    }

    // This one is needed later on in this function
    mematrix<double> tXX_i = invert(tXX);
#else
    //
    // use cholesky to invert
    //
    mematrix<double> tXX_i = tXX;
    cholesky2_mm(tXX_i, tol_chol);
    chinv2_mm(tXX_i);
    // before was
    // mematrix<double> tXX_i = invert(tXX);

    mematrix<double> tXY = tX * (rdata.Y);
    beta = tXX_i * tXY;

    if (verbose)
    {
        std::cout << "tX:\n";
        tX.print();
        std::cout << "tXX:\n";
        tXX.print();
        std::cout << "chole tXX:\n";
        tXX_i.print();
        std::cout << "tXX-1:\n";
        tXX_i.print();
        std::cout << "tXY:\n";
        tXY.print();
        std::cout << "beta:\n";
        (beta).print();
    }
#endif
    // now compute residual variance
    sigma2 = 0.;
    mematrix<double> ttX = transpose(tX);
    mematrix<double> sigma2_matrix = rdata.Y;
    mematrix<double> sigma2_matrix1 = ttX * beta;
    //        std::cout << "sigma2_matrix\n";
    //        sigma2_matrix.print();
    //
    //        std::cout << "sigma2_matrix1\n";
    //        sigma2_matrix1.print();
    sigma2_matrix = sigma2_matrix - sigma2_matrix1;
    //        std::cout << "sigma2_matrix\n";
    //        sigma2_matrix.print();
    static double val;
    //  std::cout << "sigma2_matrix.nrow=" << sigma2_matrix.nrow
    //            << "sigma2_matrix.ncol" << sigma2_matrix.ncol
    //            <<"\n";

    for (int i = 0; i < sigma2_matrix.nrow; i++)
    {
        val = sigma2_matrix.get(i, 0);
        //            std::cout << "val = " << val << "\n";
        sigma2 += val * val;
        //            std::cout << "sigma2+= " << sigma2 << "\n";
    }

    double sigma2_internal = sigma2 / (N - static_cast<double>(length_beta));
    // now compute residual variance
    //      sigma2 = 0.;
    //      for (int i =0;i<(rdata.Y).nrow;i++)
    //          sigma2 += ((rdata.Y).get(i,0))*((rdata.Y).get(i,0));
    //      for (int i=0;i<length_beta;i++)
    //          sigma2 -= 2. * (beta.get(i,0)) * tXY.get(i,0);
    //      for (int i=0;i<(length_beta);i++)
    //      for (int j=0;j<(length_beta);j++)
    //          sigma2 += (beta.get(i,0)) * (beta.get(j,0)) * tXX.get(i,j);
    //  std::cout<<"sigma2="<<sigma2<<"\n";
    //  std::cout<<"sigma2_internal="<<sigma2_internal<<"\n";
    //      replaced for ML
    //      sigma2_internal = sigma2/(N - double(length_beta) - 1);
    //        std::cout << "sigma2/=N = "<< sigma2 << "\n";
    sigma2 /= N;
    //  std::cout<<"N="<<N<<", length_beta="<<length_beta<<"\n";
    if (verbose)
    {
        std::cout << "sigma2 = " << sigma2 << "\n";
    }
    /*
     loglik = 0.;
     double ss=0;
     for (int i=0;i<rdata.nids;i++) {
     double resid = rdata.Y[i] - beta.get(0,0); // intercept
     for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j);
     // residuals[i] = resid;
     ss += resid*resid;
     }
     sigma2 = ss/N;
     */
    //cout << "estimate " << rdata.nids << "\n";
    //(rdata.X).print();
    //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " ";
    //cout << endl;
    loglik = 0.;
    double halfrecsig2 = .5 / sigma2;
    for (int i = 0; i < rdata.nids; i++)
    {
        double resid = rdata.Y[i] - beta.get(0, 0); // intercept
        for (int j = 1; j < beta.nrow; j++)
            resid -= beta.get(j, 0) * X.get(i, j);
        residuals[i] = resid;
        loglik -= halfrecsig2 * resid * resid;
    }
    loglik -= static_cast<double>(rdata.nids) * log(sqrt(sigma2));
    // cout << "estimate " << rdata.nids << "\n";
    //
    // Ugly fix to the fact that if we do mmscore, sigma2 is already
    //  in the matrix...
    //      YSA, 2009.07.20
    //
    //cout << "estimate 0\n";
    if (invvarmatrixin.length_of_mask != 0)
        sigma2_internal = 1.0;

    mematrix<double> robust_sigma2(X.ncol, X.ncol);
    if (robust)
    {
        mematrix<double> XbyR = X;
        for (int i = 0; i < X.nrow; i++)
            for (int j = 0; j < X.ncol; j++)
            {
                double tmpval = XbyR.get(i, j) * residuals[i];
                XbyR.put(tmpval, i, j);
            }
        XbyR = transpose(XbyR) * XbyR;
        robust_sigma2 = tXX_i * XbyR;
        robust_sigma2 = robust_sigma2 * tXX_i;
    }
    //cout << "estimate 0\n";
    for (int i = 0; i < (length_beta); i++)
    {
        if (robust)
        {
            // cout << "estimate :robust\n";
            double value = sqrt(robust_sigma2.get(i, i));
            sebeta.put(value, i, 0);
            //Han Chen
            if (i > 0)
            {
                if (model == 0 && interaction != 0 && ngpreds == 2
                        && length_beta > 2)
                {
                    if (i > 1)
                    {
                        double covval = robust_sigma2.get(i, i - 2);
                        covariance.put(covval, i - 2, 0);
                    }
                }
                else
                {
                    double covval = robust_sigma2.get(i, i - 1);
                    covariance.put(covval, i - 1, 0);
                }
            }
            //Oct 26, 2009
        }
        else
        {
            // cout << "estimate :non-robust\n";
            double value = sqrt(sigma2_internal * tXX_i.get(i, i));
            sebeta.put(value, i, 0);
            //Han Chen
            if (i > 0)
            {
                if (model == 0 && interaction != 0 && ngpreds == 2
                        && length_beta > 2)
                {
                    if (i > 1)
                    {
                        double covval = sigma2_internal * tXX_i.get(i, i - 2);
                        covariance.put(covval, i - 2, 0);
                    }
                }
                else
                {
                    double covval = sigma2_internal * tXX_i.get(i, i - 1);
                    covariance.put(covval, i - 1, 0);
                }
            }
            //Oct 26, 2009
        }
    }
    //cout << "estimate E\n";
    if (verbose)
    {
        std::cout << "sebeta (" << sebeta.nrow << "):\n";
        sebeta.print();
    }
}
Beispiel #2
0
void logistic_reg::estimate(regdata& rdatain, int verbose, int maxiter,
                            double eps, double tol_chol, int model,
                            int interaction, int ngpreds,
                            masked_matrix& invvarmatrixin, int robust,
                            int nullmodel)
{
    // In contrast to the 'linear' case 'invvarmatrix' contains the
    // inverse of correlation matrix (not the inverse of var-cov matrix)
    // h2.object$InvSigma * h.object2$h2an$estimate[length(h2$h2an$estimate)]
    // the inverse of var-cov matrix scaled by total variance
    regdata rdata = rdatain.get_unmasked_data();
    // a lot of code duplicated between linear and logistic...
    // e.g. a piece below...
    mematrix<double> invvarmatrix;
    if (invvarmatrixin.length_of_mask != 0)
    {
        invvarmatrixin.update_mask(rdatain.masked_data);
    }

    mematrix<double> X = apply_model(rdata.X, model, interaction, ngpreds,
                                     rdata.is_interaction_excluded, false,
                                     nullmodel);
    int length_beta = X.ncol;
    beta.reinit(length_beta, 1);
    sebeta.reinit(length_beta, 1);
    //Han Chen
    if (length_beta > 1)
    {
        if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
        {
            covariance.reinit(length_beta - 2, 1);
        }
        else
        {
            covariance.reinit(length_beta - 1, 1);
        }
    }
    //Oct 26, 2009
    mematrix<double> W((X).nrow, 1);
    mematrix<double> z((X).nrow, 1);
    mematrix<double> tXWX(length_beta, length_beta);
    mematrix<double> tXWX_i(length_beta, length_beta);
    mematrix<double> tXWz(length_beta, 1);
    double prev = (rdata.Y).column_mean(0);
    if (prev >= 1. || prev <= 0.)
    {
        std::cerr << "prevalence not within (0,1)\n";
        exit(1);
    }
    for (int i = 0; i < length_beta; i++)
        beta.put(0., i, 0);

    beta.put(log(prev / (1. - prev)), 0, 0);
    mematrix<double> tX = transpose(X);
    if (invvarmatrix.nrow != 0 && invvarmatrix.ncol != 0)
    {
        //TODO(maarten):invvarmatix is symmetric:is there an more effective way?
        tX = tX * invvarmatrix;
    }
    /*
      std::cout << "\n";
      std::cout << "X " << X.get(0,0) << " " << X.get(0,1) << " "
      << X.get(0,2) << "\n";
      if (X.ncol==4) std::cout << "X[4] " << X.get(0,3) << "\n";
      std::cout << "Inv " << invvarmatrix.get(0,0) << " "
                << invvarmatrix.get(0,1) << " "
                << invvarmatrix.get(0,2) << "\n";

      if (X.ncol==4) std::cout << ,"X[4] " << invvarmatrix.get(0,3) << "\n";
      std::cout << "tXInv " << tX.get(0,0) << " " << tX.get(1,0) << " "
                << tX.get(2,0) << "%f\n";
      if (X.ncol==4) std::cout << "X[4] " << tX.get(3,0) << "\n";
    */
    niter = 0;
    double delta = 1.;
    double prevlik = 0.;
    while (niter < maxiter && delta > eps)
    {
        mematrix<double> eMu = (X) * beta;
        mematrix<double> eMu_us = eMu;
        for (int i = 0; i < eMu.nrow; i++)
        {
            double emu = eMu.get(i, 0);
            double value = emu;
            double zval = 0.;
            value = exp(value) / (1. + exp(value));
            residuals[i] = (rdata.Y).get(i, 0) - value;
            eMu.put(value, i, 0);
            W.put(value * (1. - value), i, 0);
            zval = emu
                    + (1. / (value * (1. - value)))
                            * (((rdata.Y).get(i, 0)) - value);
            z.put(zval, i, 0);
        }

        mematrix<double> tmp = productMatrDiag(tX, W);
        if (verbose)
        {
            std::cout << "tXW:\n";
            tmp.print();
        }
        mematrix<double> tXWX = tmp * (X);
        //N = tXWX.get(0, 0);
        if (verbose)
        {
            std::cout << "tXWX:\n";
            tXWX.print();
        }
        // std::cout << "tXWX:\n";tXWX.print();
        //
        // use cholesky to invert
        //
        // tXWX_i = tXWX;
        //cholesky2_mm(tXWX_i,tol_chol);
        //if (verbose) {std::cout << "chole tXWX:\n"; tXWX_i.print();}
        //std::cout << "chole tXWX:\n"; tXWX_i.print();
        //chinv2_mm(tXWX_i);
        // was before
        tXWX_i = invert(tXWX);
        if (verbose)
        {
            std::cout << "tXWX-1:\n";
            tXWX_i.print();
        }
        // std::cout << "*** tXWX_i\n"; tXWX_i.print();
        mematrix<double> tmp1 = productMatrDiag(tX, W);
        mematrix<double> tXWz = tmp1 * z;
        if (verbose)
        {
            std::cout << "tXWz:\n";
            tXWz.print();
        }
        beta = tXWX_i * tXWz;
        // std::cout << "*** res: " << residuals[0] << " "
        //           << residuals[1] << " " << residuals[2] << "\n";
        //mematrix<double> txres = tx * residuals;
        // std::cout << "*** txres\n";txres.print();
        //beta = txwx_i* txres;
        if (verbose)
        {
            std::cout << "beta:\n";
            beta.print();
        }
        // std::cout << "beta:\n"; beta.print();
        // compute likelihood
        prevlik = loglik;
        loglik = 0.;
        for (int i = 0; i < eMu.nrow; i++)
            loglik += rdata.Y[i] * eMu_us[i] - log(1. + exp(eMu_us[i]));

        delta = fabs(1. - (prevlik / loglik));
        niter++;
    }
    sigma2 = 0.;
    mematrix<double> robust_sigma2(X.ncol, X.ncol);
    if (robust)
    {
        mematrix<double> XbyR = X;
        for (int i = 0; i < X.nrow; i++)
            for (int j = 0; j < X.ncol; j++)
            {
                double tmpval = XbyR.get(i, j) * residuals[i];
                XbyR.put(tmpval, i, j);
            }
        XbyR = transpose(XbyR) * XbyR;
        robust_sigma2 = tXWX_i * XbyR;
        robust_sigma2 = robust_sigma2 * tXWX_i;
    }
    for (int i = 0; i < (length_beta); i++)
    {
        if (robust)
        {
            double value = sqrt(robust_sigma2.get(i, i));
            sebeta.put(value, i, 0);
            //Han Chen
            if (i > 0)
            {
                if (model == 0 && interaction != 0 && ngpreds == 2
                        && length_beta > 2)
                {
                    if (i > 1)
                    {
                        double covval = robust_sigma2.get(i, i - 2);
                        covariance.put(covval, i - 2, 0);
                    }
                }
                else
                {
                    double covval = robust_sigma2.get(i, i - 1);
                    covariance.put(covval, i - 1, 0);
                }
            }
            //Oct 26, 2009
        }
        else
        {
            double value = sqrt(tXWX_i.get(i, i));
            sebeta.put(value, i, 0);
            //Han Chen
            if (i > 0)
            {
                if (model == 0 && interaction != 0 && ngpreds == 2
                        && length_beta > 2)
                {
                    if (i > 1)
                    {
                        double covval = tXWX_i.get(i, i - 2);
                        covariance.put(covval, i - 2, 0);
                    }
                }
                else
                {
                    double covval = tXWX_i.get(i, i - 1);
                    covariance.put(covval, i - 1, 0);
                }
            }
            //Oct 26, 2009
        }
    }
    if (verbose)
    {
        std::cout << "sebeta (" << sebeta.nrow << "):\n";
        sebeta.print();
    }
    // std::cout << "beta (" << beta.nrow << "):\n"; beta.print();
    // std::cout << "sebeta (" << sebeta.nrow << "):\n"; sebeta.print();
    // exit(1);
}
Beispiel #3
0
/**
 * \brief Estimate the parameters for linear regression.
 *
 * @param verbose Turns verbose printing of various matrices on if
 * non-zero.
 * @param model The number of the genetic model (e.g. additive,
 * recessive, ...) that is to be applied by the apply_model() function.
 * @param interaction
 * @param ngpreds Number of genomic predictors (1 for dosages, 2 for
 * probabilities).
 * @param invvarmatrixin
 * @param robust If non-zero calculate robust standard errors.
 * @param nullmodel If non-zero calculate the null model (excluding
 * SNP information).
 */
void linear_reg::estimate(const int verbose,
                          const int model,
                          const int interaction,
                          const int ngpreds,
                          masked_matrix& invvarmatrixin,
                          const int robust,
                          const int nullmodel) {
    // suda interaction parameter
    // model should come here
    //regdata rdata = rdatain.get_unmasked_data();

    if (verbose)
    {
        cout << reg_data.is_interaction_excluded
                << " <-rdata.is_interaction_excluded\n";
        // std::cout << "invvarmatrix:\n";
        // invvarmatrixin.masked_data->print();
        std::cout << "rdata.X:\n";
        reg_data.X.print();
    }

    mematrix<double> X = apply_model(reg_data.X, model, interaction, ngpreds,
            reg_data.is_interaction_excluded, false, nullmodel);
    if (verbose)
    {
        std::cout << "X:\n";
        X.print();
        std::cout << "Y:\n";
        reg_data.Y.print();
    }

    int length_beta = X.ncol;
    beta.reinit(length_beta, 1);
    sebeta.reinit(length_beta, 1);
    //Han Chen
    if (length_beta > 1)
    {
        if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2)
        {
            covariance.reinit(length_beta - 2, 1);
        }
        else
        {
            covariance.reinit(length_beta - 1, 1);
        }
    }

    double sigma2_internal;

    LDLT <MatrixXd> Ch;
    if (invvarmatrixin.length_of_mask != 0)
    {
        //retrieve masked data W
        invvarmatrixin.update_mask(reg_data.masked_data);
        mmscore_regression(X, invvarmatrixin, Ch);
        double N = X.nrow;
        //sigma2_internal = sigma2 / (N - static_cast<double>(length_beta));
        // Ugly fix to the fact that if we do mmscore, sigma2 is already
        //  in the matrix...
        //      YSA, 2009.07.20
        sigma2_internal = 1.0;
        sigma2 /= N;
    }
    else  // NO mm-score regression : normal least square regression
    {
        LeastSquaredRegression(X, Ch);
        double N = static_cast<double>(X.nrow);
        double P = static_cast<double>(length_beta);
        sigma2_internal = sigma2 / (N - P);
        sigma2 /= N;
    }
    /*
     loglik = 0.;
     double ss=0;
     for (int i=0;i<rdata.nids;i++) {
     double resid = rdata.Y[i] - beta.get(0,0); // intercept
     for (int j=1;j<beta.nrow;j++) resid -= beta.get(j,0)*X.get(i,j);
     // residuals[i] = resid;
     ss += resid*resid;
     }
     sigma2 = ss/N;
     */
    //cout << "estimate " << rdata.nids << "\n";
    //(rdata.X).print();
    //for (int i=0;i<rdata.nids;i++) cout << rdata.masked_data[i] << " ";
    //cout << endl;
    logLikelihood(X);

    MatrixXd tXX_inv = Ch.solve(MatrixXd(length_beta, length_beta).
                                Identity(length_beta, length_beta));
    mematrix<double> robust_sigma2(X.ncol, X.ncol);

    int offset = X.ncol- 1;
     //if additive and interaction and 2 predictors and more than 2 betas
     if (model == 0 && interaction != 0 && ngpreds == 2 && length_beta > 2){
         offset = X.ncol - 2;
     }

    if (robust)
    {
        RobustSEandCovariance(X, robust_sigma2, tXX_inv, offset);
    }
    else
    {
        PlainSEandCovariance(sigma2_internal, tXX_inv, offset);
    }
}