コード例 #1
0
void GenomeRegionSeqStats::OutputClusters(String &outFile, int minDepth, double minAvgDepth, int minClusterSize)
{
 FILE *fh = fopen(outFile.c_str(), "w");
 if(fh==NULL) error("Open cluster output file %s failed!\n", outFile.c_str());
 
 bool inCluster = false;
 int winSize = 0;
 double avgDepth = 0;
 int totalDepth = 0;
 uint32_t cstart, cend; //cluster start and cluster end
 for(uint32_t i=0; i<depth.size(); i++)
 {
  if(depth[i]>=minDepth)
  {
   if(inCluster==false)
   {
    inCluster = true;
    cstart = i;
   }
   winSize++;
   totalDepth += depth[i]; 
  }
  else
  {
    if(inCluster==false) continue;

    //std::cout<<totalDepth<<" "<<winSize<<" "<<inCluster<<":"<<minClusterSize<<":"<<avgDepth<<":"<<minAvgDepth<<std::endl;
    avgDepth = double(totalDepth)/winSize;
    if(winSize<minClusterSize || avgDepth<minAvgDepth) 
    {
      winSize = 0;
      totalDepth = 0;
      inCluster = false;
      continue;
     }
    winSize = 0;
    totalDepth = 0;
    inCluster = false;
     cend = i-1;
    int atCnt, gcCnt;
    double gc = CalcRegionGCContent(referencegenome, cstart, cend, atCnt, gcCnt);

    String chr; int start, end;
    referencegenome.getChromosomeAndIndex(chr, cstart);

    StringArray tokens;
    tokens.ReplaceTokens(chr, ":");
	 chr = tokens[0];
	 start = tokens[1].AsInteger();
	 end = start + (cend-cstart);

    fprintf(fh, "%s\t%u\t%u\t%.2f\t%.2f\n", chr.c_str(), start, end, avgDepth, gc);
  }
 }
 fclose(fh);
}
コード例 #2
0
void GenomeRegionSeqStats::LoadRegionList(String &inputList)
{
  FILE *in = fopen(inputList.c_str(), "r");
  if(in==NULL) error("Open region input file %s failed!\n", inputList.c_str());
  StringArray tokens;
  String buffer;
  while(!feof(in))
    {
      buffer.ReadLine(in);
      if (buffer.IsEmpty() || buffer[0] == '#') continue;
      tokens.ReplaceTokens(buffer);
      if(tokens.Length()<3)
	error("Too few columns: %s\n", buffer.c_str());
      
      String CSE = tokens[0]+":"+tokens[1]+":"+tokens[2];
      std::pair<int, int> start_end;
      start_end.first = tokens[1].AsInteger();
      start_end.second = tokens[2].AsInteger();
      if(start_end.first>=start_end.second) // positions are 0-based. Otherwise == is valid
      	error("Region end is equal or smaller than the start: %s!\n", buffer.c_str());      
      genomeRegions_lines[tokens[0]].push_back(buffer);
      genomeRegions[tokens[0]].push_back(start_end);
      genomeRegions_currentIndex[tokens[0]] = 0; 

      if(tokens.Length()>3) {
	groupStats[tokens[3]].segCount++;
	groupStats[tokens[3]].totalLen += (start_end.second - start_end.first);
	genomeRegionGroups[CSE].push_back(tokens[3]);
      }
    }
  
  fclose(in);
  
  // Chromosome info
  contigs.clear();
  std::map<String, vector<std::pair<int, int> > >::iterator p;
  for(p=genomeRegions.begin(); p!=genomeRegions.end(); p++)
    {
      contigs.push_back(p->first);
      for(unsigned int i=1; i<genomeRegions[p->first].size(); i++)
	if(genomeRegions[p->first][i].first<genomeRegions[p->first][i-1].first)
	  error("Input coordinates are not in order: %s %d %d!\n", p->first.c_str(),genomeRegions[p->first][i].first,genomeRegions[p->first][i].second);
    }
  // Group info such as gene names
  groups.clear();
  std::map<String, Stats>::iterator p2;
  for(p2=groupStats.begin(); p2!=groupStats.end(); p2++)
    groups.push_back(p2->first);
}
コード例 #3
0
ファイル: VerifyBamID.cpp プロジェクト: statgen/verifyBamID
void VerifyBamID::loadSubsetInds(const char* subsetFile) {
  if ( ( pPile == NULL ) && ( pGenotypes == NULL ) ) {
    if ( subsetInds.size() > 0 ) {
      Logger::gLogger->error("VerifyBamID::loadSubsetInds() called multiple times");
    }

    IFILE f = ifopen(subsetFile,"rb");
    String line;
    StringArray tok;
    while( line.ReadLine(f) > 0 ) {
      tok.ReplaceTokens(line,"\t \n\r");
      subsetInds.push_back(tok[0].c_str());
    }
  }
  else {
    Logger::gLogger->error("VerifyBamID::loadSubsetInds() called after VerifyBamID::loadFiles()");
  }
}
コード例 #4
0
bool MarkovParameters::ReadCrossoverRates(const char * filename)
   {
   StringArray tokens;
   StringArray rec;
   rec.Read(filename);

   // Load estimated per marker error rates
   if (rec.Length() == markers)
      {
      printf("  Updating error rates using data in %s ...\n", (const char *) filename);
      for (int i = 0; i < markers; i++)
         {
         tokens.ReplaceTokens(rec[i+1]);

         if (tokens.Length() >= 2) R[i] = tokens[1].AsDouble();
         }

      return true;
      }

   return false;
   }
コード例 #5
0
ファイル: VcfFile.cpp プロジェクト: amarawi/gotcloud
void BedFile::openForRead(const char* bedFile, const char* bimFile, const char* famFile, const char* refFile, int nbuf) {
  StringArray tokens;

  reset();

  iFile = ifopen(bedFile,"rb");
  if ( iFile == NULL ) {
    throw VcfFileException("Failed opening file %s - %s",bedFile,strerror(errno));
  }
  
  // read magic numbers
  char magicNumbers[3] = {0x6c,0x1b,0x01};
  char firstThreeBytes[3];
  ifread( iFile, firstThreeBytes, 3 );
  for(int i=0; i < 3; ++i) {
    if ( firstThreeBytes[i] != magicNumbers[i] ) {
      throw VcfFileException("The magic numbers do not match in BED file %s",bedFile);
    }
  }

  iBimFile = ifopen(bimFile,"rb");
  iFamFile = ifopen(famFile,"rb");
  sRefFile = refFile;

  while( 1 ) {
    int ret = line.ReadLine(iFamFile);
    if ( ret <= 0 ) break;
    tokens.ReplaceTokens(line, " \t\r\n");
    if ( tokens.Length() < 5 ) {
      throw VcfFileException("Less then 5 columns are observed in FAM file");
    }
    VcfInd* p = new VcfInd(tokens[1],tokens[0],tokens[2],tokens[3],tokens[4]);
    vpVcfInds.push_back(p);
  }

  //Logger::gLogger->writeLog("Finished loading %d individuals from FAM file",(int)vpVcfInds.size());

  nBytes = (vpVcfInds.size()+3)/4;
  if ( pBedBuffer != NULL ) { delete[] pBedBuffer; }
  pBedBuffer = new char[nBytes];

  nBuffers = nbuf;
  nNumMarkers = 0;
  nHead = 0;

  bParseGenotypes = true;
  bParseDosages = false;
  bParseValues = false;

  if ( nBuffers == 0 ) { // infinite buffer size
    // do not set size of markers
  }
  else {
    vpVcfMarkers.resize( nBuffers );
    for(int i=0; i < nBuffers; ++i) {
      VcfMarker* p = new VcfMarker;
      vpVcfMarkers[i] = p;
    }
  }

  genomeSequence.setReferenceName(sRefFile.c_str());
  genomeSequence.useMemoryMap(true);

  //Logger::gLogger->writeLog("Loading reference file %s",sRefFile.c_str());

  if ( genomeSequence.open() ) {
    // write a message that new index file is being created
    if ( genomeSequence.create(false) ) {
      throw VcfFileException("Failed creating index file of the reference. Please check the file permission");
    }
    if ( genomeSequence.open() ) {
      throw VcfFileException("Failed opening index file of the reference.");
    }
  }
}