/** * Append @param genotype to @param covariate in the right order * @param phenotypeNameInOrder is the row names for @param covariate * @param rowLabel is the row names for @param geno * return 0 if succeed */ int appendGenotype(Matrix* covariate, const std::vector<std::string>& phenotypeNameInOrder, Matrix& geno, const std::vector<std::string>& rowLabel) { if (!covariate) { return -1; } Matrix& m = *covariate; int baseCols = m.cols; m.Dimension(phenotypeNameInOrder.size(), m.cols + geno.cols); Indexer indexer(rowLabel); if (indexer.hasDuplication()) { return -1; } for (size_t i = 0; i < phenotypeNameInOrder.size(); ++i) { for (int j = 0; j < m.cols; ++j) { int index = indexer[phenotypeNameInOrder[i]]; if (index < 0) { // did not find a person return -1; } m[i][baseCols + j] = geno[index][j]; if (i == 0) { m.SetColumnLabel(baseCols + j, geno.GetColumnLabel(j)); } } } return 0; }
void Matrix::CopyLabels(Matrix &M) { for (int i = 0; i < rows; i++) if (i < M.rows) data[i]->SetLabel(M[i].label); for (int i = 0; i < cols; i++) if (i < M.cols) SetColumnLabel(i, M.GetColumnLabel(i)); }
int DataLoader::loadMarkerAsCovariate(const std::string& inVcf, const std::string& marker) { this->FLAG_inVcf = inVcf; this->FLAG_condition = marker; Matrix geno; VCFGenotypeExtractor ge(FLAG_inVcf); ge.excludeAllPeople(); ge.includePeople(phenotype.getRowName()); ge.setRangeList(marker); if (ge.extractMultipleGenotype(&geno) != GenotypeExtractor::SUCCEED) { logger->error("Load conditional markers [ %s ] from [ %s ] failed", FLAG_condition.c_str(), FLAG_inVcf.c_str()); exit(1); } std::vector<double> d(geno.rows); for (int i = 0; i < geno.cols; ++i) { for (int j = 0; j < geno.rows; ++j) { d[j] = geno[j][i]; } covariate.appendCol(d, geno.GetColumnLabel(i)); } // // load conditional markers // if (!FLAG_condition.empty()) { // Matrix geno; // std::vector<std::string> rowLabel; // if (loadMarkerFromVCF(FLAG_inVcf, FLAG_condition, &rowLabel, &geno) < 0) // { // logger->error("Load conditional markers [ %s ] from [ %s ] failed.", // FLAG_condition.c_str(), FLAG_inVcf.c_str()); // exit(1); // } // if (appendGenotype(&covariate, phenotypeNameInOrder, geno, rowLabel) < 0) // { // logger->error( // "Failed to combine conditional markers [ %s ] from [ %s ] failed.", // FLAG_condition.c_str(), FLAG_inVcf.c_str()); // exit(1); // } // } return 0; }
int DataLoader::useResidualAsPhenotype() { if (binaryPhenotype) { logger->warn( "WARNING: Skip transforming binary phenotype, although you want to " "use residual as phenotype!"); return 0; } LinearRegression lr; Vector pheno; Matrix covAndInt; const int numCovariate = covariate.ncol(); copyPhenotype(phenotype, &pheno); copyCovariateAndIntercept(pheno.Length(), covariate, &covAndInt); if (!lr.FitLinearModel(covAndInt, pheno)) { if (numCovariate > 0) { logger->error( "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the " "original phenotype"); } else { logger->error( "Cannot fit model: [ phenotype ~ 1 ], now use the " "original phenotype"); } } else { // linear model fitted successfully copyVectorToMatrixColumn(lr.GetResiduals(), &phenotype, 0); // const int n = lr.GetResiduals().Length(); // for (int i = 0; i < n; ++i) { // // phenotypeInOrder[i] = lr.GetResiduals()[i]; // phenotype[i][0] = lr.GetResiduals()[i]; // } covariate.clear(); if (numCovariate > 0) { logger->info( "DONE: Fit model [ phenotype ~ 1 + covariates ] and model " "residuals will be used as responses"); } else { logger->info("DONE: Use residual as phenotype by centerng it"); } // store fitting results Vector& beta = lr.GetCovEst(); Matrix& betaSd = lr.GetCovB(); const int n = beta.Length(); for (int i = 0; i < n; ++i) { addFittedParameter(covAndInt.GetColumnLabel(i), beta[i], betaSd[i][i]); } addFittedParameter("Sigma2", lr.GetSigma2(), NAN); } #if 0 if (covariate.ncol() > 0) { LinearRegression lr; Vector pheno; Matrix covAndInt; copyPhenotype(phenotype, &pheno); copyCovariateAndIntercept(covariate.nrow(), covariate, &covAndInt); if (!lr.FitLinearModel(covAndInt, pheno)) { logger->error( "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the " "original phenotype"); } else { const int n = lr.GetResiduals().Length(); for (int i = 0; i < n; ++i) { // phenotypeInOrder[i] = lr.GetResiduals()[i]; phenotype[i][0] = lr.GetResiduals()[i]; } covariate.clear(); logger->info( "DONE: Fit model [ phenotype ~ 1 + covariates ] and model " "residuals will be used as responses"); } storeFittedModel(lr); } else { // no covaraites // centerVector(&phenotypeInOrder); std::vector<double> v; phenotype.extractCol(0, &v); centerVector(&v); phenotype.setCol(0, v); logger->info("DONE: Use residual as phenotype by centerng it"); } #endif return 0; }
void makeColNameToDict(Matrix& m, std::map<std::string, int>* dict) { std::map<std::string, int>& d = *dict; for (int i = 0; i < m.cols; ++i) { d[m.GetColumnLabel(i)] = i; } }