Exemple #1
0
/**
 * Append @param genotype to @param covariate in the right order
 * @param phenotypeNameInOrder is the row names for @param covariate
 * @param rowLabel is the row names for @param geno
 * return 0 if succeed
 */
int appendGenotype(Matrix* covariate,
                   const std::vector<std::string>& phenotypeNameInOrder,
                   Matrix& geno, const std::vector<std::string>& rowLabel) {
  if (!covariate) {
    return -1;
  }
  Matrix& m = *covariate;
  int baseCols = m.cols;
  m.Dimension(phenotypeNameInOrder.size(), m.cols + geno.cols);

  Indexer indexer(rowLabel);
  if (indexer.hasDuplication()) {
    return -1;
  }
  for (size_t i = 0; i < phenotypeNameInOrder.size(); ++i) {
    for (int j = 0; j < m.cols; ++j) {
      int index = indexer[phenotypeNameInOrder[i]];
      if (index < 0) {  // did not find a person
        return -1;
      }
      m[i][baseCols + j] = geno[index][j];

      if (i == 0) {
        m.SetColumnLabel(baseCols + j, geno.GetColumnLabel(j));
      }
    }
  }
  return 0;
}
Exemple #2
0
void Matrix::CopyLabels(Matrix &M) {
  for (int i = 0; i < rows; i++)
    if (i < M.rows) data[i]->SetLabel(M[i].label);

  for (int i = 0; i < cols; i++)
    if (i < M.cols) SetColumnLabel(i, M.GetColumnLabel(i));
}
Exemple #3
0
int DataLoader::loadMarkerAsCovariate(const std::string& inVcf,
                                      const std::string& marker) {
  this->FLAG_inVcf = inVcf;
  this->FLAG_condition = marker;

  Matrix geno;
  VCFGenotypeExtractor ge(FLAG_inVcf);
  ge.excludeAllPeople();
  ge.includePeople(phenotype.getRowName());
  ge.setRangeList(marker);

  if (ge.extractMultipleGenotype(&geno) != GenotypeExtractor::SUCCEED) {
    logger->error("Load conditional markers [ %s ] from [ %s ] failed",
                  FLAG_condition.c_str(), FLAG_inVcf.c_str());
    exit(1);
  }

  std::vector<double> d(geno.rows);
  for (int i = 0; i < geno.cols; ++i) {
    for (int j = 0; j < geno.rows; ++j) {
      d[j] = geno[j][i];
    }
    covariate.appendCol(d, geno.GetColumnLabel(i));
  }

  // // load conditional markers
  // if (!FLAG_condition.empty()) {
  //   Matrix geno;
  //   std::vector<std::string> rowLabel;
  //   if (loadMarkerFromVCF(FLAG_inVcf, FLAG_condition, &rowLabel, &geno) < 0)
  //   {
  //     logger->error("Load conditional markers [ %s ] from [ %s ] failed.",
  //                   FLAG_condition.c_str(), FLAG_inVcf.c_str());
  //     exit(1);
  //   }
  //   if (appendGenotype(&covariate, phenotypeNameInOrder, geno, rowLabel) < 0)
  //   {
  //     logger->error(
  //         "Failed to combine conditional markers [ %s ] from [ %s ] failed.",
  //         FLAG_condition.c_str(), FLAG_inVcf.c_str());
  //     exit(1);
  //   }
  // }
  return 0;
}
Exemple #4
0
int DataLoader::useResidualAsPhenotype() {
  if (binaryPhenotype) {
    logger->warn(
        "WARNING: Skip transforming binary phenotype, although you want to "
        "use residual as phenotype!");
    return 0;
  }

  LinearRegression lr;
  Vector pheno;
  Matrix covAndInt;
  const int numCovariate = covariate.ncol();

  copyPhenotype(phenotype, &pheno);
  copyCovariateAndIntercept(pheno.Length(), covariate, &covAndInt);
  if (!lr.FitLinearModel(covAndInt, pheno)) {
    if (numCovariate > 0) {
      logger->error(
          "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the "
          "original phenotype");
    } else {
      logger->error(
          "Cannot fit model: [ phenotype ~ 1 ], now use the "
          "original phenotype");
    }
  } else {  // linear model fitted successfully
    copyVectorToMatrixColumn(lr.GetResiduals(), &phenotype, 0);
    // const int n = lr.GetResiduals().Length();
    // for (int i = 0; i < n; ++i) {
    //   // phenotypeInOrder[i] = lr.GetResiduals()[i];
    //   phenotype[i][0] = lr.GetResiduals()[i];
    // }
    covariate.clear();
    if (numCovariate > 0) {
      logger->info(
          "DONE: Fit model [ phenotype ~ 1 + covariates ] and model "
          "residuals will be used as responses");
    } else {
      logger->info("DONE: Use residual as phenotype by centerng it");
    }

    // store fitting results
    Vector& beta = lr.GetCovEst();
    Matrix& betaSd = lr.GetCovB();
    const int n = beta.Length();
    for (int i = 0; i < n; ++i) {
      addFittedParameter(covAndInt.GetColumnLabel(i), beta[i], betaSd[i][i]);
    }
    addFittedParameter("Sigma2", lr.GetSigma2(), NAN);
  }

#if 0
  if (covariate.ncol() > 0) {
    LinearRegression lr;
    Vector pheno;
    Matrix covAndInt;
    copyPhenotype(phenotype, &pheno);
    copyCovariateAndIntercept(covariate.nrow(), covariate, &covAndInt);
    if (!lr.FitLinearModel(covAndInt, pheno)) {
      logger->error(
          "Cannot fit model: [ phenotype ~ 1 + covariates ], now use the "
          "original phenotype");
    } else {
      const int n = lr.GetResiduals().Length();
      for (int i = 0; i < n; ++i) {
        // phenotypeInOrder[i] = lr.GetResiduals()[i];
        phenotype[i][0] = lr.GetResiduals()[i];
      }
      covariate.clear();
      logger->info(
          "DONE: Fit model [ phenotype ~ 1 + covariates ] and model "
          "residuals will be used as responses");
    }
    storeFittedModel(lr);
  } else {  // no covaraites
    // centerVector(&phenotypeInOrder);
    std::vector<double> v;
    phenotype.extractCol(0, &v);
    centerVector(&v);
    phenotype.setCol(0, v);

    logger->info("DONE: Use residual as phenotype by centerng it");
  }
#endif

  return 0;
}
void makeColNameToDict(Matrix& m, std::map<std::string, int>* dict) {
  std::map<std::string, int>& d = *dict;
  for (int i = 0; i < m.cols; ++i) {
    d[m.GetColumnLabel(i)] = i;
  }
}