Ejemplo n.º 1
0
Matrix Matrix::operator* (Matrix M)//operator+ with an other matrix
{
   int i,j,k;
   int nrow2, ncol2;

   //test the dimension
   M.Dimension(&nrow2,&ncol2);
   if(nrow2!=ncol)
   {
      error err(OUFFF_ERROR_DIMENSIONDISAGREE, "Matrix::*(matrix)", "Try to make product between matrix of bad size");
      throw err;
   }

   //creation of the new matrix
   Matrix N(nrow,ncol2);
   for(i=0; i<nrow; i++)
   {
      for(j=0;j<ncol2;j++)
      {
         //N(i,j) is the product of the ith row from this by the jth col from M
         N(i,j)=0;//init value to 0
         for(k=0; k<ncol; k++)
         {
            N(i,j) += value[i][k]*M(k,j);
         }
      }
   }
   return N;
}
Ejemplo n.º 2
0
void LoadMatrix(const char* fn, Matrix& m) {
  LineReader lr(fn);
  std::vector<std::string> s;
  int lineNo = 0;
  while (lr.readLineBySep(&s, " \t")) {
    lineNo++;
    m.Dimension(lineNo, s.size());
    for (int j = 0; j < s.size(); j++) {
      m(lineNo - 1, j) = atof(s[j].c_str());
    }
  }
};
Ejemplo n.º 3
0
Matrix Matrix::operator+ (Matrix M)//operator+ with an other matrix
{
   int i,j;

   M.Dimension(&i,&j);
   if(i!=nrow || j!= ncol)
   {
      throw error(OUFFF_ERROR_DIMENSIONDISAGREE, "Matrix::+(matrix)", "Try to make sum with matrix of different size");;
   }
   Matrix N(nrow,ncol);
   for(i=0; i<nrow*ncol; i++)
      N(i)=value[0][i] + M(i);
   return N;
}
void LinearRegressionPermutationTest::splitMatrix(Matrix& x, int col,
                                                  Matrix& xnull, Vector& xcol) {
  if (x.cols < 2) {
    printf("input matrix has too few cols!\n");
  }
  xnull.Dimension(x.rows, x.cols - 1);
  xcol.Dimension(x.rows);
  for (int i = 0; i < x.rows; i++) {
    for (int j = 0; j < x.cols; j++) {
      if (j < col) {
        xnull[i][j] = x[i][j];
      } else if (j == col) {
        xcol[i] = x[i][j];
      } else {
        xnull[i][j - 1] = x[i][j];
      }
    }
  }
};
Ejemplo n.º 5
0
void HashErrorModel::setDataforPrediction(Matrix & X, Vector & succ, Vector & total,bool binarizeFlag)
{
    int i = 0;
    BaseData data;

    if(ourUseFast)
    {
        return;
    }

    for(HashMatch::const_iterator it = mismatchTable.begin();
        it != mismatchTable.end();
        ++it)
    {
        data.parseKey(it->first);
        Covariates cov;
        cov.setCovariates(data);
        if(i == 0)
        {
            uint32_t rows = mismatchTable.size();

            succ.Dimension(rows);
            total.Dimension(rows);
            X.Dimension(rows, cov.covariates.size() + 1);
            X.Zero();
        }

        // The first column of the design matrix is constant one, for the slope
        X[i][0] = 1.0;

        int j = 0;
                
        //binarize a couple of co-variates
        for(std::vector<uint16_t>::const_iterator itv = cov.covariates.begin();
            itv != cov.covariates.end();
            ++itv)
        {
            if(binarizeFlag)
            {
                //hardcoded pos is 2
                if(j==1){
                    uint16_t pos = (uint16_t)*itv;
                    // hardcoded
                    pos += 7;
                    X[i][pos] = 1;
                }
                else
                {
                    if(j>1)
                        X[i][j] = (uint16_t)*itv;
                    else
                        X[i][j+1] = (uint16_t)*itv;
                }
                j++;
            }
            else
            {
                X[i][j+1] = (uint16_t)*itv;
                j++;
            }
        }
        total[i] = it->second.mm + it->second.m;
        succ[i] = it->second.m;
        i++;
    }
}
Ejemplo n.º 6
0
int main(int argc, char** argv){
    time_t currentTime = time(0);
    fprintf(stderr, "Analysis started at: %s", ctime(&currentTime));

    ////////////////////////////////////////////////
    BEGIN_PARAMETER_LIST(pl)
        ADD_PARAMETER_GROUP(pl, "Input/Output")
        ADD_STRING_PARAMETER(pl, inVcf, "--inVcf", "input VCF File")
        ADD_STRING_PARAMETER(pl, outMerlin, "--outMerlin", "output prefix")
        ADD_PARAMETER_GROUP(pl, "People Filter")
        ADD_STRING_PARAMETER(pl, peopleIncludeID, "--peopleIncludeID", "give IDs of people that will be included in study")
        ADD_STRING_PARAMETER(pl, peopleIncludeFile, "--peopleIncludeFile", "from given file, set IDs of people that will be included in study")
        ADD_STRING_PARAMETER(pl, peopleExcludeID, "--peopleExcludeID", "give IDs of people that will be included in study")
        ADD_STRING_PARAMETER(pl, peopleExcludeFile, "--peopleExcludeFile", "from given file, set IDs of people that will be included in study")
        ADD_PARAMETER_GROUP(pl, "Site Filter")
        ADD_STRING_PARAMETER(pl, rangeList, "--rangeList", "Specify some ranges to use, please use chr:begin-end format.")
        ADD_STRING_PARAMETER(pl, rangeFile, "--rangeFile", "Specify the file containing ranges, please use chr:begin-end format.")
        END_PARAMETER_LIST(pl)
        ;    

    pl.Read(argc, argv);
    pl.Status();
    
    if (FLAG_REMAIN_ARG.size() > 0){
        fprintf(stderr, "Unparsed arguments: ");
        for (unsigned int i = 0; i < FLAG_REMAIN_ARG.size(); i++){
            fprintf(stderr, " %s", FLAG_REMAIN_ARG[i].c_str());
        }
        fprintf(stderr, "\n");
        abort();
    }

    REQUIRE_STRING_PARAMETER(FLAG_inVcf, "Please provide input file using: --inVcf");

    const char* fn = FLAG_inVcf.c_str(); 
    VCFInputFile vin(fn);

    // set range filters here
    // e.g.     
    // vin.setRangeList("1:69500-69600");
    vin.setRangeList(FLAG_rangeList.c_str());
    vin.setRangeFile(FLAG_rangeFile.c_str());

    // set people filters here
    if (FLAG_peopleIncludeID.size() || FLAG_peopleIncludeFile.size()) {
        vin.excludeAllPeople();
        vin.includePeople(FLAG_peopleIncludeID.c_str());
        vin.includePeopleFromFile(FLAG_peopleIncludeFile.c_str());
    }
    vin.excludePeople(FLAG_peopleExcludeID.c_str());
    vin.excludePeopleFromFile(FLAG_peopleExcludeFile.c_str());
    
    // let's write it out.
    FILE* fMap;  // CHROMOSOME   MARKER          POSITION
    FILE* fDat; // A some_disease\n
                // T some_trait
                // M some_marker
                // M another_marker
    FILE* fPed; // first 5 column: FID, IID, PID, MID, SEX; then follow Dat file
    FILE* fPid; // Person ID file, (extra for Merlin), including all people ID as they are in PED file.

    fMap = fopen( (FLAG_outMerlin + ".map").c_str(), "wt");
    fDat = fopen( (FLAG_outMerlin + ".dat").c_str(), "wt");
    fPed = fopen( (FLAG_outMerlin + ".ped").c_str(), "wt");
    fPid = fopen( (FLAG_outMerlin + ".pid").c_str(), "wt");
    assert(fMap && fDat && fPed && fPid);

    std::string marker; // marker x people
    std::vector<std::string> allMarker;
    Matrix geno; 
    fputs("CHROMOSOME\tMARKER\tPOSITION\n", fMap);
    
    while (vin.readRecord()){
        VCFRecord& r = vin.getVCFRecord(); 
        VCFPeople& people = r.getPeople();
        VCFIndividual* indv;
        // write map file
        marker = r.getID();
        if ( marker == "." ) {
            fprintf(fMap, "%s\t%s:%d\t%d\n", r.getChrom(), r.getChrom(), r.getPos(), r.getPos());
            fprintf(fDat, "M\t%s:%d\n", r.getChrom(), r.getPos());
        } else {
            fprintf(fMap, "%s\t%s\t%d\n", r.getChrom(), marker.c_str(), r.getPos());
            fprintf(fDat, "M\t%s\n", marker.c_str());
        }
        allMarker.push_back(marker);


        geno.Dimension(allMarker.size(), people.size());

        // e.g.: get TAG from INFO field
        // fprintf(stderr, "%s\n", r.getInfoTag("ANNO"));

        int m = allMarker.size() - 1; 
        // e.g.: Loop each (selected) people in the same order as in the VCF 
        for (int i = 0; i < people.size(); i++) {
            indv = people[i];
            // get GT index. if you are sure the index will not change, call this function only once!
            int GTidx = r.getFormatIndex("GT");
            if (GTidx >= 0) {
                geno[m][i] = (*indv)[GTidx].getGenotype();
            }else {
                fprintf(stderr, "Cannot find GT field!\n");
                abort();
            }
        }
    };
    VCFHeader* h = vin.getVCFHeader();
    std::vector< std::string> peopleId;
    h->getPeopleName(&peopleId);
    
    // dump PED and PID file
    for (int p = 0; p < peopleId.size(); p++){
        fprintf(fPed, "%s\t%s\t0\t0\t0", peopleId[p].c_str(), peopleId[p].c_str());
        for (int m = 0; m < allMarker.size(); m++){
            int g = (int)geno[m][p];
            switch (g){
            case 0:
                fputs("\t0/0", fPed);
                break;
            case 1:
                fputs("\t0/1", fPed);
                break;
            case 2:
                fputs("\t1/1", fPed);
                break;
            default:
                fputs("x/x", fPed);
                break;
            }
        }
        fputs("\n", fPed);

        fprintf(fPid, "%s\n", peopleId[p].c_str());
    }
    return 0; 
};
Ejemplo n.º 7
0
int GenotypeExtractor::extractMultipleGenotype(Matrix* g) {
  static Matrix m;  // make it static to reduce memory allocation
  int row = 0;
  std::vector<std::string> colNames;
  std::string name;
  this->hemiRegion.clear();
  GenotypeCounter genoCounter;
  while (this->vin->readRecord()) {
    VCFRecord& r = this->vin->getVCFRecord();
    VCFPeople& people = r.getPeople();
    VCFIndividual* indv;

    m.Dimension(row + 1, people.size());
    genoCounter.reset();

    int genoIdx;
    const bool useDosage = (!this->dosageTag.empty());
    if (useDosage) {
      genoIdx = r.getFormatIndex(dosageTag.c_str());
    } else {
      genoIdx = r.getFormatIndex("GT");
    }
    int GDidx = r.getFormatIndex("GD");
    int GQidx = r.getFormatIndex("GQ");
    assert(this->parRegion);
    bool hemiRegion = this->parRegion->isHemiRegion(r.getChrom(), r.getPos());
    // e.g.: Loop each (selected) people in the same order as in the VCF
    const int numPeople = (int)people.size();
    for (int i = 0; i < numPeople; i++) {
      indv = people[i];
      // get GT index. if you are sure the index will not change, call this
      // function only once!
      if (genoIdx >= 0) {
        // printf("%s ", indv->justGet(0).toStr());  // [0] meaning the first
        // field of each individual
        if (useDosage) {
          if (!hemiRegion) {
            m[row][i] = indv->justGet(genoIdx).toDouble();
          } else {
            // for male hemi region, imputated dosage is usually between 0 and 1
            // need to multiply by 2.0
            if ((*sex)[i] == PLINK_MALE) {
              m[row][i] = indv->justGet(genoIdx).toDouble() * 2.0;
            }
          }
        } else {
          if (!hemiRegion) {
            m[row][i] = indv->justGet(genoIdx).getGenotype();
          } else {
            if ((*sex)[i] == PLINK_MALE) {
              m[row][i] = indv->justGet(genoIdx).getMaleNonParGenotype02();
            } else if ((*sex)[i] == PLINK_FEMALE) {
              m[row][i] = indv->justGet(genoIdx).getGenotype();
            } else {
              m[row][i] = MISSING_GENOTYPE;
            }
          }
        }
        if (!checkGD(indv, GDidx) || !checkGQ(indv, GQidx)) {
          m[row][i] = MISSING_GENOTYPE;
        }
        genoCounter.add(m[row][i]);
      } else {
        logger->error("Cannot find %s field!",
                      this->dosageTag.empty() ? "GT" : dosageTag.c_str());
        return -1;
      }
    }

    // check frequency cutoffs
    // int numNonMissingPeople = 0;
    // double maf = 0.;
    // for (int i = 0; i < numPeople; ++i) {
    //   if (m[row][i] < 0) continue;
    //   maf += m[row][i];
    //   ++numNonMissingPeople;
    // }
    // if (numNonMissingPeople) {
    //   maf = maf / (2. * numNonMissingPeople);
    // } else {
    //   maf = 0.0;
    // }
    // if (maf > .5) {
    //   maf = 1.0 - maf;
    // }
    const double maf = genoCounter.getMAF();
    if (this->freqMin > 0. && this->freqMin > maf) continue;
    if (this->freqMax > 0. && this->freqMax < maf) continue;

    // store genotype results
    name = r.getChrom();
    name += ":";
    name += r.getPosStr();
    colNames.push_back(name);
    ++row;

    assert(this->parRegion);
    if (this->parRegion &&
        this->parRegion->isHemiRegion(r.getChrom(), r.getPos())) {
      this->hemiRegion.push_back(true);
    } else {
      this->hemiRegion.push_back(false);
    }
    this->counter.push_back(genoCounter);
  }  // end while (this->vin->readRecord())

  // delete rows (ugly code here, as we may allocate extra row in previous
  // loop)
  m.Dimension(row, m.cols);

  // now transpose (marker by people -> people by marker)
  g->Transpose(m);
  for (int i = 0; i < row; ++i) {
    g->SetColumnLabel(i, colNames[i].c_str());
  }
  return SUCCEED;
}  // end GenotypeExtractor