Esempio n. 1
0
int KinshipHolder::loadDecomposed() {
  LineReader lr(this->eigenFileName);
  int lineNo = 0;
  int fieldLen = 0;
  std::vector<std::string> fd;
  std::vector<int> columnToExtract;
  std::vector<std::string> header;  // header line of the kinship eigen file
  Eigen::MatrixXf& matK = this->matK->mat;
  Eigen::MatrixXf& matS = this->matS->mat;
  Eigen::MatrixXf& matU = this->matU->mat;
  const std::vector<std::string>& names = *this->pSample;
  const int NumSample = (int)names.size();
  std::map<std::string, int> nameMap;
  makeMap(names, &nameMap);
  std::map<std::string, int> headerMap;

  while (lr.readLineBySep(&fd, "\t ")) {
    ++lineNo;
    if (lineNo == 1) {  // check header
      header = fd;
      fieldLen = fd.size();
      if (fieldLen < 3) {  // at least three columns: IID, Lambda, U1
        logger->error(
            "Insufficient column number (<3) in the first line of kinsihp "
            "file!");
        return -1;
      };
      for (size_t i = 0; i != fd.size(); ++i) {
        fd[i] = tolower(fd[i]);
      }
      makeMap(fd, &headerMap);
      if (fd.size() != headerMap.size()) {
        logger->error("Kinship file have duplicated headers!");
        return -1;
      }

      // check IID, Lambda, U1, U2, ... U(N) where (N) is the sample size
      if (headerMap.count("iid") == 0) {
        logger->error("Missing 'IID' column!");
        return -1;
      }
      columnToExtract.push_back(headerMap["iid"]);

      if (headerMap.count("lambda") == 0) {
        logger->error("Missing 'Lambda' column!");
        return -1;
      }
      columnToExtract.push_back(headerMap["lambda"]);

      std::string s;
      for (int i = 0; i < NumSample; ++i) {
        s = "u";
        s += toString(i + 1);
        if (headerMap.count(s) == 0) {
          logger->error("Missing '%s' column!", s.c_str());
          return -1;
        }
        columnToExtract.push_back(headerMap[s]);
      }
      s = "u";
      s += toString(NumSample + 1);
      if (headerMap.count(s) != 0) {
        logger->error("Unexpected column '%s'!", s.c_str());
        return -1;
      }

      matS.resize(NumSample, 1);
      matU.resize(NumSample, NumSample);
      continue;
    }
    // body lines
    if ((int)fd.size() != fieldLen) {
      logger->error(
          "Inconsistent column number [ %zu ] (used to be [ %d ])in kinship "
          "file line [ %d ] - skip this file!",
          fd.size(), fieldLen, lineNo);
      return -1;
    }

    const int iidColumn = columnToExtract[0];
    const std::string& iid = fd[iidColumn];
    if (nameMap.count(iid) == 0) {
      logger->error("Unexpected sample [ %s ]!", iid.c_str());
      return -1;
    }
    const int row = nameMap[iid];

    const int lambdaColumn = columnToExtract[1];
    double temp = 0.0;
    if (!str2double(fd[lambdaColumn], &temp)) {
      logger->warn("Invalid numeric value [ %s ] treated as zero!",
                   fd[lambdaColumn].c_str());
    }
    matS(lineNo - 2, 0) = temp;

    for (int i = 0; i < NumSample; ++i) {
      int uColumn = columnToExtract[i + 2];
      if (!str2double(fd[uColumn], &temp)) {
        logger->warn("Invalid numeric value [ %s ] treated as zero!",
                     fd[lambdaColumn].c_str());
      }
      matU(row, i) = temp;
    }
  }

  // verify eigen decomposition results make senses
  // check largest eigen vector and eigen value
  Eigen::MatrixXf v1 = matK * matU.col(0);
  Eigen::MatrixXf v2 = matS(0, 0) * matU.col(0);
  if (matS(0, 0) > 0.5 && v1.col(0).norm() > .5 && v2.col(0).norm() > 0.5 &&
      corr(v1, v2) < 0.8) {
    logger->warn("Cannot verify spectral decompose results!");
    return -1;
  }

  // check the min(10, NumSample) random eigen vector and eigen value
  int randomCol = 10;
  if (randomCol > NumSample - 1) {
    randomCol = NumSample - 1;
  }
  v1 = matK * matU.col(randomCol);
  v2 = matS(randomCol, 0) * matU.col(randomCol);
  if (matS(randomCol, 0) > 0.5 && v1.col(0).norm() > 0.5 &&
      v2.col(0).norm() > 0.5 && corr(v1, v2) < 0.8) {
    logger->warn("Cannot verify spectral decompose results!");
    return -1;
  }

#ifdef DEBUG
  std::string tmp = fn;
  tmp += ".tmp";
  std::ofstream ofs(tmp.c_str(), std::ofstream::out);
  ofs << mat;
  ofs.close();
#endif

  // fprintf(stderr, "Kinship matrix [ %d x %d ] loaded", (int)mat.rows(),
  // (int)mat.cols());

  if (this->matK) {
    delete this->matK;
    this->matK = NULL;
  }

  return 0;
}
Esempio n. 2
0
GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::GcmZTildeInfo(
  const GpmsaComputerModelOptions&                     gcmOptionsObj,
  const GcmJointInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>& jj,
  const GcmZInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>&     z)
  :
  m_env                     (jj.m_env),
  m_Cmat_tilde              (m_env,jj.m_omega_space.map(),z.m_Cmat_rank),
  m_z_tilde_space           (m_env, "z_tilde_", z.m_Cmat_rank, NULL),
  m_Lmat                    (m_env,m_z_tilde_space.map(),z.m_Cmat->numCols()),
  m_Lmat_t                  (m_env,z.m_z_space.map(),z.m_Cmat_rank),
  m_Zvec_tilde_hat          (m_z_tilde_space.zeroVector()),
  m_tmp_Smat_z_tilde        (m_z_tilde_space.zeroVector()),
  m_tmp_Smat_extra_tilde    (m_z_tilde_space.zeroVector()),
  m_tmp_Smat_z_tilde_hat    (m_z_tilde_space.zeroVector()),
  m_tmp_Smat_z_tilde_hat_inv(m_z_tilde_space.zeroVector())
{
  if (m_env.subDisplayFile()) {
    *m_env.subDisplayFile() << "Entering GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)"
                            << std::endl;
  }

  std::set<unsigned int> tmpSet;
  tmpSet.insert(m_env.subId());

    // Naive formation of 'm_Cmat_tilde'
    D_M matU(z.m_Cmat->svdMatU());
    unsigned int uMatRank   = matU.rank(0.,1.e-8 ); // todo: should be an option
    unsigned int uMatRank14 = matU.rank(0.,1.e-14);
    if (m_env.subDisplayFile()) {
      *m_env.subDisplayFile() << "In GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)"
                              << ": matU.numRowsLocal() = "  << matU.numRowsLocal()
                              << ", matU.numCols() = "       << matU.numCols()
                              << ", matU.rank(0.,1.e-8) = "  << uMatRank
                              << ", matU.rank(0.,1.e-14) = " << uMatRank14
                              << std::endl;
    }

    if (m_env.checkingLevel() >= 1) {
      D_M matUcheck(z.m_z_space.zeroVector());
      D_V vecI(jj.m_omega_space.zeroVector());
      D_V vecJ(jj.m_omega_space.zeroVector());
      for (unsigned int i = 0; i < matU.numCols(); ++i) {
        matU.getColumn(i,vecI);
        for (unsigned int j = i; j < matU.numCols(); ++j) {
          matU.getColumn(j,vecJ);
          matUcheck(i,j) = scalarProduct(vecI,vecJ);
        }
      }
      matUcheck.setPrintHorizontally(false);
      if (m_env.subDisplayFile()) {
        *m_env.subDisplayFile() << "In GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)"
                                << ": m_Cmat_rank = "              << z.m_Cmat_rank
                                << ", matUcheck.numRowsLocal() = " << matUcheck.numRowsLocal()
                                << ", matUcheck.numCols() = "      << matUcheck.numCols()
                                << ", matUcheck =\n"               << matUcheck
                                << std::endl;
      }
    }

    D_V vecJ(jj.m_omega_space.zeroVector());
    for (unsigned int j = 0; j < z.m_Cmat_rank; ++j) {
      matU.getColumn(j,vecJ);
      m_Cmat_tilde.setColumn(j,vecJ);
    }

    if (gcmOptionsObj.m_ov.m_dataOutputAllowedSet.find(m_env.subId()) != gcmOptionsObj.m_ov.m_dataOutputAllowedSet.end()) {
      m_Cmat_tilde.subWriteContents("Ctilde",
                                    "Ctilde2",
                                    "m",
                                    tmpSet);
    }
    if (m_env.subDisplayFile()) {
      *m_env.subDisplayFile() << "In GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)"
                              << ": m_Cmat_tilde formed (2)"
                              << std::endl;
    }

    // Naive formation of 'm_Lmat'
    m_Cmat_tilde.svdSolve(*(z.m_Cmat),m_Lmat);

  commonConstructor(z);
}