int KinshipHolder::loadDecomposed() { LineReader lr(this->eigenFileName); int lineNo = 0; int fieldLen = 0; std::vector<std::string> fd; std::vector<int> columnToExtract; std::vector<std::string> header; // header line of the kinship eigen file Eigen::MatrixXf& matK = this->matK->mat; Eigen::MatrixXf& matS = this->matS->mat; Eigen::MatrixXf& matU = this->matU->mat; const std::vector<std::string>& names = *this->pSample; const int NumSample = (int)names.size(); std::map<std::string, int> nameMap; makeMap(names, &nameMap); std::map<std::string, int> headerMap; while (lr.readLineBySep(&fd, "\t ")) { ++lineNo; if (lineNo == 1) { // check header header = fd; fieldLen = fd.size(); if (fieldLen < 3) { // at least three columns: IID, Lambda, U1 logger->error( "Insufficient column number (<3) in the first line of kinsihp " "file!"); return -1; }; for (size_t i = 0; i != fd.size(); ++i) { fd[i] = tolower(fd[i]); } makeMap(fd, &headerMap); if (fd.size() != headerMap.size()) { logger->error("Kinship file have duplicated headers!"); return -1; } // check IID, Lambda, U1, U2, ... U(N) where (N) is the sample size if (headerMap.count("iid") == 0) { logger->error("Missing 'IID' column!"); return -1; } columnToExtract.push_back(headerMap["iid"]); if (headerMap.count("lambda") == 0) { logger->error("Missing 'Lambda' column!"); return -1; } columnToExtract.push_back(headerMap["lambda"]); std::string s; for (int i = 0; i < NumSample; ++i) { s = "u"; s += toString(i + 1); if (headerMap.count(s) == 0) { logger->error("Missing '%s' column!", s.c_str()); return -1; } columnToExtract.push_back(headerMap[s]); } s = "u"; s += toString(NumSample + 1); if (headerMap.count(s) != 0) { logger->error("Unexpected column '%s'!", s.c_str()); return -1; } matS.resize(NumSample, 1); matU.resize(NumSample, NumSample); continue; } // body lines if ((int)fd.size() != fieldLen) { logger->error( "Inconsistent column number [ %zu ] (used to be [ %d ])in kinship " "file line [ %d ] - skip this file!", fd.size(), fieldLen, lineNo); return -1; } const int iidColumn = columnToExtract[0]; const std::string& iid = fd[iidColumn]; if (nameMap.count(iid) == 0) { logger->error("Unexpected sample [ %s ]!", iid.c_str()); return -1; } const int row = nameMap[iid]; const int lambdaColumn = columnToExtract[1]; double temp = 0.0; if (!str2double(fd[lambdaColumn], &temp)) { logger->warn("Invalid numeric value [ %s ] treated as zero!", fd[lambdaColumn].c_str()); } matS(lineNo - 2, 0) = temp; for (int i = 0; i < NumSample; ++i) { int uColumn = columnToExtract[i + 2]; if (!str2double(fd[uColumn], &temp)) { logger->warn("Invalid numeric value [ %s ] treated as zero!", fd[lambdaColumn].c_str()); } matU(row, i) = temp; } } // verify eigen decomposition results make senses // check largest eigen vector and eigen value Eigen::MatrixXf v1 = matK * matU.col(0); Eigen::MatrixXf v2 = matS(0, 0) * matU.col(0); if (matS(0, 0) > 0.5 && v1.col(0).norm() > .5 && v2.col(0).norm() > 0.5 && corr(v1, v2) < 0.8) { logger->warn("Cannot verify spectral decompose results!"); return -1; } // check the min(10, NumSample) random eigen vector and eigen value int randomCol = 10; if (randomCol > NumSample - 1) { randomCol = NumSample - 1; } v1 = matK * matU.col(randomCol); v2 = matS(randomCol, 0) * matU.col(randomCol); if (matS(randomCol, 0) > 0.5 && v1.col(0).norm() > 0.5 && v2.col(0).norm() > 0.5 && corr(v1, v2) < 0.8) { logger->warn("Cannot verify spectral decompose results!"); return -1; } #ifdef DEBUG std::string tmp = fn; tmp += ".tmp"; std::ofstream ofs(tmp.c_str(), std::ofstream::out); ofs << mat; ofs.close(); #endif // fprintf(stderr, "Kinship matrix [ %d x %d ] loaded", (int)mat.rows(), // (int)mat.cols()); if (this->matK) { delete this->matK; this->matK = NULL; } return 0; }
GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::GcmZTildeInfo( const GpmsaComputerModelOptions& gcmOptionsObj, const GcmJointInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>& jj, const GcmZInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>& z) : m_env (jj.m_env), m_Cmat_tilde (m_env,jj.m_omega_space.map(),z.m_Cmat_rank), m_z_tilde_space (m_env, "z_tilde_", z.m_Cmat_rank, NULL), m_Lmat (m_env,m_z_tilde_space.map(),z.m_Cmat->numCols()), m_Lmat_t (m_env,z.m_z_space.map(),z.m_Cmat_rank), m_Zvec_tilde_hat (m_z_tilde_space.zeroVector()), m_tmp_Smat_z_tilde (m_z_tilde_space.zeroVector()), m_tmp_Smat_extra_tilde (m_z_tilde_space.zeroVector()), m_tmp_Smat_z_tilde_hat (m_z_tilde_space.zeroVector()), m_tmp_Smat_z_tilde_hat_inv(m_z_tilde_space.zeroVector()) { if (m_env.subDisplayFile()) { *m_env.subDisplayFile() << "Entering GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)" << std::endl; } std::set<unsigned int> tmpSet; tmpSet.insert(m_env.subId()); // Naive formation of 'm_Cmat_tilde' D_M matU(z.m_Cmat->svdMatU()); unsigned int uMatRank = matU.rank(0.,1.e-8 ); // todo: should be an option unsigned int uMatRank14 = matU.rank(0.,1.e-14); if (m_env.subDisplayFile()) { *m_env.subDisplayFile() << "In GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)" << ": matU.numRowsLocal() = " << matU.numRowsLocal() << ", matU.numCols() = " << matU.numCols() << ", matU.rank(0.,1.e-8) = " << uMatRank << ", matU.rank(0.,1.e-14) = " << uMatRank14 << std::endl; } if (m_env.checkingLevel() >= 1) { D_M matUcheck(z.m_z_space.zeroVector()); D_V vecI(jj.m_omega_space.zeroVector()); D_V vecJ(jj.m_omega_space.zeroVector()); for (unsigned int i = 0; i < matU.numCols(); ++i) { matU.getColumn(i,vecI); for (unsigned int j = i; j < matU.numCols(); ++j) { matU.getColumn(j,vecJ); matUcheck(i,j) = scalarProduct(vecI,vecJ); } } matUcheck.setPrintHorizontally(false); if (m_env.subDisplayFile()) { *m_env.subDisplayFile() << "In GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)" << ": m_Cmat_rank = " << z.m_Cmat_rank << ", matUcheck.numRowsLocal() = " << matUcheck.numRowsLocal() << ", matUcheck.numCols() = " << matUcheck.numCols() << ", matUcheck =\n" << matUcheck << std::endl; } } D_V vecJ(jj.m_omega_space.zeroVector()); for (unsigned int j = 0; j < z.m_Cmat_rank; ++j) { matU.getColumn(j,vecJ); m_Cmat_tilde.setColumn(j,vecJ); } if (gcmOptionsObj.m_ov.m_dataOutputAllowedSet.find(m_env.subId()) != gcmOptionsObj.m_ov.m_dataOutputAllowedSet.end()) { m_Cmat_tilde.subWriteContents("Ctilde", "Ctilde2", "m", tmpSet); } if (m_env.subDisplayFile()) { *m_env.subDisplayFile() << "In GcmZTildeInfo<S_V,S_M,D_V,D_M,P_V,P_M,Q_V,Q_M>::constructor(1)" << ": m_Cmat_tilde formed (2)" << std::endl; } // Naive formation of 'm_Lmat' m_Cmat_tilde.svdSolve(*(z.m_Cmat),m_Lmat); commonConstructor(z); }