Пример #1
0
bool FirthRegression::FitFirthModel(Matrix & X, Vector & y, int nrrounds)
{
  this-> Reset(X);

  G_to_Eigen(X, &this->w->X);
  G_to_Eigen(y, &this->w->y);

  int rounds = 0;
  // double lastDeviance, currentDeviance;
  Eigen::MatrixXf xw; // W^(1/2) * X
  // Newton-Raphson  
  while (rounds < nrrounds) {
    // std::cout << "beta = " << this->w->beta << "\n";
    this->w->eta = this->w->X * this->w->beta;
    this->w->p = (1.0 + (-this->w->eta.array()).exp()).inverse();
    this->w->V = this->w->p.array() * (1.0 - this->w->p.array());

    xw = (this->w->V.array().sqrt().matrix().asDiagonal() * this->w->X).eval(); // W^(1/2) * X 
    this->w->D = xw.transpose() * xw; // X' V X
    this->w->covB = this->w->D.eval().ldlt().solve(Eigen::MatrixXf::Identity(this->w->D.rows(), this->w->D.rows()));

    // double rel = ((this->w->D * this->w->covB).array() - Eigen::MatrixXf::Identity(this->w->D.rows(), this->w->D.rows()).array()).matrix().norm() / this->w->D.rows() / this->w->D.rows();
    // // printf("norm = %g\n", rel);
    // if (rel > 1e-6) { // use relative accuracy to evalute convergence
    if ((this->w->D * this->w->covB - Eigen::MatrixXf::Identity(this->w->D.rows(), this->w->D.rows())).norm() > 1e-3) {      
      // cannot inverse
      // printToFile(this->w->D, "matD", "D");
      // printToFile(this->w->covB, "matCovB", "B");
      return false;
    }
    this->w->h = (xw * this->w->covB * xw.transpose()).diagonal();
    this->w->r = this->w->X.transpose() * (this->w->y - this->w->p + (this->w->h.array() * (0.5 - this->w->p.array())).matrix()); // X' (y-mu)
    this->w->delta_beta = this->w->covB * this->w->r;
    this->w->beta += this->w->delta_beta;
    // printf("norm = %g\n", this->w->delta_beta.norm());
    // use relative accuracy to evalute convergence
    if (rounds > 1 && (this->w->beta.norm() > 0 && this->w->delta_beta.norm() / this->w->beta.norm() < 1e-6)) {
      rounds = 0;
      break;
    }
    rounds ++;
  }
  if (rounds == nrrounds)
  {
    printf("Not enough iterations!");
    return false;
  }
  // this->w->covB = this->w->D.eval().llt().solve(Eigen::MatrixXf::Identity(this->w->D.rows(), this->w->D.rows()));

  Eigen_to_G(this->w->beta, &B);
  Eigen_to_G(this->w->covB, &covB);
  Eigen_to_G(this->w->p, &p);
  Eigen_to_G(this->w->V, &V);

  return true;
}
Пример #2
0
bool FirthRegression::FitFirthModel(Matrix & X, Vector & succ, Vector& total, int nrrounds) {
  this-> Reset(X);

  G_to_Eigen(X, &this->w->X);
  G_to_Eigen(succ, &this->w->succ);
  G_to_Eigen(total, &this->w->total);

  int rounds = 0;
  // double lastDeviance, currentDeviance;
  Eigen::MatrixXf xw; // W^(1/2) * X
  // Newton-Raphson
  while (rounds < nrrounds) {
    // beta = beta + solve( t(X)%*%diag(p*(1-p)) %*%X) %*% t(X) %*% (Y-p);
    this->w->eta = this->w->X * this->w->beta;
    this->w->p = (-this->w->eta.array().exp() + 1.0).inverse();
    this->w->V = this->w->p.array() * (1.0 - this->w->p.array()) * this->w->total.array();

    xw = (this->w->V.array().sqrt().matrix().asDiagonal() * this->w->X).eval();
    this->w->D = xw.transpose() * xw; // this->w->X.transpose() * this->w->V.asDiagonal() * this->w->X; // X' V X
    this->w->covB = this->w->D.eval().llt().solve(Eigen::MatrixXf::Identity(this->w->D.rows(), this->w->D.rows()));
    // double rel = ((this->w->D * this->w->covB).array() - Eigen::MatrixXf::Identity(this->w->D.rows(), this->w->D.rows()).array()).matrix().norm() / this->w->D.rows() / this->w->D.rows();
    // if (rel > 1e-6) { // use relative accuracy to evalute convergence
    if ((this->w->D * this->w->covB - Eigen::MatrixXf::Identity(this->w->D.rows(), this->w->D.rows())).norm() > 1e-3) {
      // cannot inverse
      return false;
    }
    this->w->h = (xw.transpose() * this->w->covB * xw).diagonal();
    this->w->r = this->w->X.transpose() * (this->w->succ.array() - this->w->total.array() * this->w->p.array()
                                           + this->w->total.array() * this->w->h.array() * (0.5 - this->w->p.array())).matrix();
    this->w->delta_beta = this->w->covB * this->w->r;
    this->w->beta += this->w->delta_beta;
    if (rounds > 1 && (this->w->beta.norm() > 0 && this->w->delta_beta.norm() / this->w->beta.norm() < 1e-6)) {
      rounds = 0;
      break;
    }
    rounds ++;
  }
  if (rounds == nrrounds)
  {
    printf("Not enough iterations!");
    return false;
  }

  Eigen_to_G(this->w->beta, &B);
  Eigen_to_G(this->w->covB, &covB);
  Eigen_to_G(this->w->p, &p);
  Eigen_to_G(this->w->V, &V);

  return true;
}
Пример #3
0
void CholeskyInverseMatrix(Matrix& in, Matrix* out) {
  if (in.rows != in.cols) return;

  const int n = in.rows;
  Eigen::MatrixXf x, res;
  G_to_Eigen(in, &x);
  res = x.ldlt().solve(Eigen::MatrixXf::Identity(n, n));
  Eigen_to_G(res, out);
}
Пример #4
0
  double GetQFromNewResidual(
      Vector& res_G)  // e.g. permuted residual under NULL
  {
    // calculate Q
    Eigen::VectorXf res;
    G_to_Eigen(res_G, &res);
    double Q = (this->K_sqrt * res).squaredNorm();

    return Q;
  }
Пример #5
0
 int TestCovariate(Matrix& Xnull, Matrix& y, Matrix& Xcol) {
   G_to_Eigen(Xcol, &this->g);
   U = (resid.transpose() * sigmaMatInv * g).sum();
   V_GG = (g.transpose() * sigmaMatInv * g);
   V_XG = (x.transpose() * sigmaMatInv * g);
   V_XX = (x.transpose() * sigmaMatInv * x);
   const int m = V_XX.rows();
   V = (V_GG - V_XG.transpose() * V_XX.ldlt().solve(Eigen::MatrixXf::Identity(m, m)) * V_XG).sum();
   
   return 0;
 }
Пример #6
0
  int FitNullModel(Matrix& Xnull, Matrix& Y) {
    G_to_Eigen(Xnull, &this->x);
    G_to_Eigen(Y, &this->y);

    // append identity matrix
    AppendIdentity();
    
    // initialize beta and sigma2
    sigma2.resize(kinship.size());
    for (size_t i = 0; i < sigma2.size(); ++i) {
      sigma2[i] = 1.0;
    }
    calculateSigmaMat();
    calculateBeta();
    
    // fit null model
    calculateLLK();
    double oldLLK = llk;
    int time = 1;
    double diff;
    while (true) {
      calculateSigma2();
      diff = llk - oldLLK;
      if (diff < 1e-6) {
#ifdef DEBUG        
        fprintf(stderr, "Model converges or llk cannot be improved\n");
#endif
        break;
      }
      oldLLK = llk;
      time ++;
      if (time > 100) {
#ifdef DEBUG                
        fprintf(stderr, "Model probably do not converge at 100 times, llk = %g", llk);
#endif
        break;
      }      
    }  
    return 0;
  }
Пример #7
0
  int TestCovariate(Matrix& Xnull, Matrix& Y, Matrix& Xcol,
                    const EigenMatrix& kinshipU, const EigenMatrix& kinshipS){
    Eigen::MatrixXf g;
    G_to_Eigen(Xcol, &g);

    // store U'*G for computing AF later.
    const Eigen::MatrixXf& U = kinshipU.mat;
    this->ug = U.transpose() * g;

    Eigen::RowVectorXf g_mean = g.colwise().mean();
    g = g.rowwise() - g_mean;

    double gTg = g.array().square().sum();
    double t_new = (g.array() * this->transformedY.array()).sum();
    t_new = t_new * t_new / gTg;
    double t_score = t_new / this->gamma;
    this->betaG = (g.transpose() * this->transformedY).sum() / gTg / this->gamma;
    this->betaGVar = this->ySigmaY / gTg / this->gamma;

    this->pvalue = gsl_cdf_chisq_Q(t_score, 1.0);
    return 0;
  }
Пример #8
0
  int Fit(Vector& res_G,  // residual under NULL -- may change when permuting
          Vector& v_G,    // variance under NULL -- may change when permuting
          Matrix& X_G,    // covariance
          Matrix& G_G,    // genotype
          Vector& w_G)    // weight
  {
    this->nPeople = X_G.rows;
    this->nMarker = G_G.cols;
    this->nCovariate = X_G.cols;

    // calculation w_sqrt
    G_to_Eigen(w_G, &this->w_sqrt);
    w_sqrt = w_sqrt.cwiseSqrt();

    // calculate K = G * W * G'
    Eigen::MatrixXf G;
    G_to_Eigen(G_G, &G);
    this->K_sqrt.noalias() = w_sqrt.asDiagonal() * G.transpose();

    // calculate Q = ||res * K||
    Eigen::VectorXf res;
    G_to_Eigen(res_G, &res);
    this->Q = (this->K_sqrt * res).squaredNorm();

    // calculate P0 = V - V X (X' V X)^(-1) X' V
    Eigen::VectorXf v;
    G_to_Eigen(v_G, &v);
    if (this->nCovariate == 1) {
      P0 = -v * v.transpose() / v.sum();
      // printf("dim(P0) = %d, %d\n", P0.rows(), P0.cols());
      // printf("dim(v) = %d\n", v.size());
      P0.diagonal() += v;
      // printf("dim(v) = %d\n", v.size());
    } else {
      Eigen::MatrixXf X;
      G_to_Eigen(X_G, &X);
      Eigen::MatrixXf XtV;  // X^t V
      XtV.noalias() = X.transpose() * v.asDiagonal();
      P0 = -XtV.transpose() * ((XtV * X).inverse()) * XtV;
      P0.diagonal() += v;
    }
    // dump();
    // Eigen::MatrixXf tmp = K_sqrt * P0 * K_sqrt.transpose();
    // dumpToFile(tmp, "out.tmp");
    // eigen decomposition
    Eigen::SelfAdjointEigenSolver<Eigen::MatrixXf> es;
    es.compute(K_sqrt * P0 * K_sqrt.transpose());

#ifdef DEBUG
    std::ofstream k("K");
    k << K_sqrt;
    k.close();
#endif
    // std::ofstream p("P0");
    // p << P0;
    // p.close();

    this->mixChiSq.reset();
    int r_ub = std::min(nPeople, nMarker);
    int r = 0;  // es.eigenvalues().size();
    int eigen_len = es.eigenvalues().size();
    for (int i = eigen_len - 1; i >= 0; i--) {
      if (es.eigenvalues()[i] > ZBOUND && r < r_ub) {
        this->mixChiSq.addLambda(es.eigenvalues()[i]);
        r++;
      } else
        break;
    }
    // calculate p-value
    this->pValue = this->mixChiSq.getPvalue(this->Q);
    if (this->pValue == 0.0 || this->pValue == 1.0) {
      this->pValue = this->mixChiSq.getLiuPvalue(this->Q);
    }
    return 0;
  };
Пример #9
0
 int AppendKinship(Matrix& k) {
   Eigen::MatrixXf mat;
   G_to_Eigen(k, &mat);
   return AppendKinship(mat);
 }
Пример #10
0
  int FitNullModel(Matrix& mat_Xnull, Matrix& mat_y,
                   const EigenMatrix& kinshipU, const EigenMatrix& kinshipS){
    // type conversion
    Eigen::MatrixXf x;
    Eigen::MatrixXf y;
    G_to_Eigen(mat_Xnull, &x);
    G_to_Eigen(mat_y, &y);
    this->lambda = kinshipS.mat;
    const Eigen::MatrixXf& U = kinshipU.mat;
    // rotate
    this->ux = U.transpose() * x;
    this->uy = U.transpose() * y;

    // get beta, sigma2_g and delta
    // where delta = sigma2_e / sigma2_g
    double loglik[101];
    int maxIndex = -1;
    double maxLogLik = 0;
    for (int i = 0; i <= 100; ++i ){
      double d = exp(-10 + i * 0.2);
      getBetaSigma2(d);
      loglik[i] = getLogLikelihood(d);
      // fprintf(stderr, "%d\tdelta=%g\tll=%lf\n", i, delta, loglik[i]);
      if (std::isnan(loglik[i])) {
        continue;
      }
      if (maxIndex < 0 || loglik[i] > maxLogLik) {
        maxIndex = i;
        maxLogLik = loglik[i];
      }
    }
    if (maxIndex < -1) {
      fprintf(stderr, "Cannot optimize\n");
      return -1;
    }
    if (maxIndex == 0 || maxIndex == 100) {
      // on the boundary
      // do not try maximize it.
    } else {
      gsl_function F;
      F.function = goalFunction;
      F.params = this;

      Minimizer minimizer;
      double lb = exp(-10 + (maxIndex-1) * 0.2);
      double ub = exp(-10 + (maxIndex+1) * 0.2);
      double start =  exp(-10 + maxIndex * 0.2);
      if (minimizer.minimize(F, start, lb, ub)) {
        // fprintf(stderr, "Minimization failed, fall back to initial guess.\n");
        this->delta = start;
      } else {
        this->delta = minimizer.getX();
        // fprintf(stderr, "minimization succeed when delta = %g, sigma2_g = %g\n", this->delta, this->sigma2_g);
      }
    }
    // store some intermediate results
    // fprintf(stderr, "maxIndex = %d, delta = %g, Try brent\n", maxIndex, delta);
    // fprintf(stderr, "beta[%d][%d] = %g\n", (int)beta.rows(), (int)beta.cols(), beta(0,0));
    this->h2 =  1.0 /(1.0 + this->delta);
    this->sigma2 = this->sigma2_g * this->h2;
    
    // we derive different formular to replace original eqn (7)
    this->gamma = (this->lambda.array() / (this->lambda.array() + this->delta)).sum() / this->sigma2_g / (this->ux.rows() - 1 ) ;
    // fprintf(stderr, "gamma = %g\n", this->gamma);
    // transformedY = \Sigma^{-1} * (y_tilda) and y_tilda = y - X * \beta
    // since \Sigma = (\sigma^2_g * h^2 ) * (U * (\lambda + delta) * U')
    // transformedY = 1 / (\sigma^2_g * h^2 ) * (U * (\lambda+delta)^{-1} * U' * (y_tilda))
    //              = 1 / (\sigma^2_g * h^2 ) * (U * \lambda^{-1} * (uResid))
    // since h^2 = 1 / (1+delta)
    // transformedY = (1 + delta/ (\sigma^2_g ) * (U * \lambda^{-1} * (uResid))
    Eigen::MatrixXf resid = y - x * (x.transpose() * x).eval().ldlt().solve(x.transpose() * y); // this is y_tilda
            
    this->transformedY.noalias() =  U.transpose() * resid;
    this->transformedY = (this->lambda.array() + this->delta).inverse().matrix().asDiagonal() * this->transformedY;
    this->transformedY = U * this->transformedY;
    this->transformedY /= this->sigma2_g;
    // fprintf(stderr, "transformedY(0,0) = %g\n", transformedY(0,0));
    
    this->ySigmaY= (resid.array() * transformedY.array()).sum();
    return 0;
  }
Пример #11
0
  int FitNullModel(Matrix& mat_Xnull, Matrix& mat_y,
                   const EigenMatrix& kinshipU, const EigenMatrix& kinshipS){
    // sanity check
    if (mat_Xnull.rows != mat_y.rows) return -1;
    if (mat_Xnull.rows != kinshipU.mat.rows()) return -1;
    if (mat_Xnull.rows != kinshipS.mat.rows()) return -1;
    
    // type conversion
    G_to_Eigen(mat_Xnull, &this->ux);
    G_to_Eigen(mat_y, &this->uy);
    this->lambda = kinshipS.mat;
    const Eigen::MatrixXf& U = kinshipU.mat;

    // rotate
    this->ux = U.transpose() * this->ux;
    this->uy = U.transpose() * this->uy;

    // get beta, sigma and delta
    // where delta = sigma2_e / sigma2_g
    double loglik[101];
    int maxIndex = -1;
    double maxLogLik = 0;
    for (int i = 0; i <= 100; ++i ){
      delta = exp(-10 + i * 0.2);
      getBetaSigma2(delta);
      loglik[i] = getLogLikelihood(delta);
#ifdef DEBUG
      fprintf(stderr, "%d\tdelta=%g\tll=%lf\n", i, delta, loglik[i]);
      fprintf(stderr, "beta(0)=%lf\tsigma2=%lf\n",
              beta(0), sigma2);
#endif
      if (std::isnan(loglik[i])) {
        continue;
      }
      if (maxIndex < 0 || loglik[i] > maxLogLik) {
        maxIndex = i;
        maxLogLik = loglik[i];
      }
    }
    if (maxIndex < -1) {
      fprintf(stderr, "Cannot optimize\n");
      return -1;
    }
#if 0
    fprintf(stderr, "maxIndex = %d\tll=%lf\t\tbeta(0)=%lf\tsigma2=%lf\n",
            maxIndex, maxLogLik, beta(0), sigma2);
#endif

    if (maxIndex == 0 || maxIndex == 100) {
      // on the boundary
      // do not try maximize it.
    } else {
      gsl_function F;
      F.function = goalFunction;
      F.params = this;

      Minimizer minimizer;
      double lb = exp(-10 + (maxIndex-1) * 0.2);
      double ub = exp(-10 + (maxIndex+1) * 0.2);
      double start =  exp(-10 + maxIndex * 0.2);
      if (minimizer.minimize(F, start, lb, ub)) {
        fprintf(stderr, "Minimization failed, fall back to initial guess.\n");
        this->delta = start;
      } else {
        this->delta = minimizer.getX();
#ifdef DEBUG       
        fprintf(stderr, "minimization succeed when delta = %g, sigma2 = %g\n", this->delta, this->sigma2);
#endif
      }
    }
    // store some intermediate results
#ifdef DEBUG       
    fprintf(stderr, "delta = sigma2_e/sigma2_g, and sigma2 is sigma2_g\n");
    fprintf(stderr, "maxIndex = %d, delta = %g, Try brent\n", maxIndex, delta);
    fprintf(stderr, "beta[0][0] = %g\t sigma2_g = %g\tsigma2_e = %g\n", beta(0,0), this->sigma2, delta * sigma2);
#endif
    // if (this->test == MetaCov::LRT) {
    // this->nullLikelihood = getLogLikelihood(this->delta);
    // } else if (this->test == MetaCov::SCORE) {
    //   this->uResid = this->uy - this->ux * this->beta;
    // }
    return 0;
  }