Example #1
0
bool BamInterface::writeHeader(IFILE filePtr, SamFileHeader& header,
                               SamStatus& status)
{
    if((filePtr == NULL) || (filePtr->isOpen() == false))
    {
        // File is not open, return false.
        status.setStatus(SamStatus::FAIL_ORDER,
                         "Cannot write header since the file pointer is null");
        return(false);
    }

    char magic[4];
    magic[0] = 'B';
    magic[1] = 'A';
    magic[2] = 'M';
    magic[3] = 1;

    // Write magic to the file.
    ifwrite(filePtr, magic, 4);

    ////////////////////////////////
    // Write the header to the file.
    ////////////////////////////////
    // Construct a string containing the entire header.
    std::string headerString = "";
    header.getHeaderString(headerString);

    int32_t headerLen = headerString.length();
    int numWrite = 0;

    // Write the header length.
    numWrite = ifwrite(filePtr, &headerLen, sizeof(int32_t));
    if(numWrite != sizeof(int32_t))
    {
        status.setStatus(SamStatus::FAIL_IO,
                         "Failed to write the BAM header length.");
        return(false);
    }

    // Write the header to the file.
    numWrite = ifwrite(filePtr, headerString.c_str(), headerLen);
    if(numWrite != headerLen)
    {
        status.setStatus(SamStatus::FAIL_IO,
                         "Failed to write the BAM header.");
        return(false);
    }

    ////////////////////////////////////////////////////////
    // Write the Reference Information.
    const SamReferenceInfo& refInfo = header.getReferenceInfo();

    // Get the number of sequences.
    int32_t numSeq = refInfo.getNumEntries();
    ifwrite(filePtr, &numSeq, sizeof(int32_t));

    // Write each reference sequence
    for (int i = 0; i < numSeq; i++)
    {
        const char* refName = refInfo.getReferenceName(i);
        // Add one for the null value.
        int32_t nameLength = strlen(refName) + 1;
        // Write the length of the reference name.
        ifwrite(filePtr, &nameLength, sizeof(int32_t));

        // Write the name.
        ifwrite(filePtr, refName, nameLength);
        // Write the length of the reference sequence.
        int32_t refLen = refInfo.getReferenceLength(i);
        ifwrite(filePtr, &refLen, sizeof(int32_t));
    }

    return(true);
}
Example #2
0
// main function
int MergeBam::execute(int argc, char ** argv)
{
  static struct option getopt_long_options[] = 
    {
      // Input options
      { "list", required_argument, NULL, 'l'},
      { "in", required_argument, NULL, 'i'},
      { "out", required_argument, NULL, 'o'},
      { "verbose", no_argument, NULL, 'v'},
      { "log", required_argument, NULL, 'L'},
      { NULL, 0, NULL, 0 },
    };

  // Adjust the arguments since it is called as ./bam mergeBam instead of
  // just mergeBam.
  --argc;
  ++argv;

  int n_option_index = 0;
  char c;
  bool b_verbose = false;
  vector<std::string> vs_in_bam_files; // input BAM files

  std::string s_list, s_out, s_logger;

  while ( ( c = getopt_long(argc, argv, "l:i:o:vL:", getopt_long_options, &n_option_index) ) != -1 ) {
    switch(c) {
    case 'i':
      vs_in_bam_files.push_back(optarg);
      break;
    case 'l':
      s_list = optarg;
      break;
    case 'o':
      s_out = optarg;
      break;
    case 'v':
      b_verbose = true;
      break;
    case 'L':
      s_logger = optarg;
      break;
    default:
      fprintf(stderr,"Unrecognized option %s",getopt_long_options[n_option_index].name);
      abort();
    }
  }

  if ( s_logger.empty() ) {
      if(s_out.empty())
      {
          s_logger = "-";
      }
      else
      {
          s_logger = s_out + ".log";
      }
  }

  // create a logger object, now possible to write logs/warnings/errors
  Logger::gLogger = new Logger(s_logger.c_str(), b_verbose);

  // every argument must correspond to an option
  if ( optind < argc ) {
    usage();
    Logger::gLogger->error("non-option argument exist");
  }

  // check the required arguments are nonempty
  if ( (vs_in_bam_files.empty() && s_list.empty()) || s_out.empty() ) {
    usage();
    Logger::gLogger->error("At least one of the required argument is missing");
  }

  if(!vs_in_bam_files.empty() && !s_list.empty())
  {
      Logger::gLogger->error("Cannot specify both --in/-i and --list/-l");
  }

  if(!s_list.empty())
  {
      Logger::gLogger->writeLog("Input list file : %s",s_list.c_str());
  }
  else
  {
      std::string bamList = "";
      for(unsigned int i = 0; i < vs_in_bam_files.size(); i++)
      {
          if(i != 0)
          {
              bamList += ", ";
          }
          bamList += vs_in_bam_files[i];
      }
      Logger::gLogger->writeLog("Input list file : %s", bamList.c_str());
  }
  Logger::gLogger->writeLog("Output BAM file : %s",s_out.c_str());
  Logger::gLogger->writeLog("Output log file : %s",s_logger.c_str());
  Logger::gLogger->writeLog("Verbose mode    : %s",b_verbose ? "On" : "Off");
  
  vector<ReadGroup> v_readgroups;      // readGroups corresponding to BAM file
  vector<ReadGroup> v_uniq_readgroups; // unique readGroups written to header

  // If the list file is being used instead of the individual bams, parse it.
  if(!s_list.empty())
  {
      // parse the list file and fill the vectors above
      if ( parseListFile(s_list, vs_in_bam_files, v_readgroups, v_uniq_readgroups) == false ) {
          Logger::gLogger->error("Error in parsing the list file %s",s_list.c_str());
      }
      if ( vs_in_bam_files.size() != v_readgroups.size() ) {
          Logger::gLogger->error("parseListFile gave different size for vs_in_bam_files, v_readgroups: %d, %d", vs_in_bam_files.size(), v_readgroups.size());
      }
  }

  // sanity check
  uint32_t n_bams = vs_in_bam_files.size();
  Logger::gLogger->writeLog("Total of %d BAM files are being merged",n_bams);

  if ( n_bams < 2 )
  {
      Logger::gLogger->error("At least two BAM files must be specified for merging");
  }

  // create SamFile and SamFileHeader object for each BAM file
  SamFile *p_in_bams = new SamFile[n_bams];
  SamFileHeader *p_headers = new SamFileHeader[n_bams];

  // read each BAM file and its header, 
  // making sure that the headers are identical

  std::string firstHeaderNoRGPG = "";
  std::string headerNoRGPG = "";
  SamFileHeader newHeader;

  std::string firstHeaderString = "";
  for(uint32_t i=0; i < n_bams; ++i)
  {
      if ( ! p_in_bams[i].OpenForRead(vs_in_bam_files[i].c_str()) )
      {
          Logger::gLogger->error("Cannot open BAM file %s for reading",vs_in_bam_files[i].c_str());
      }
      p_in_bams[i].setSortedValidation(SamFile::COORDINATE);
      
      p_in_bams[i].ReadHeader(p_headers[i]);

      // Extract the RGs from this header.
      if(i == 0)
      {
          // First header, so store it as the first header
          newHeader = p_headers[i];
          // Determine the header without RG.
          parseOutRG(p_headers[i], firstHeaderNoRGPG, NULL);
      }
      else
      {
          parseOutRG(p_headers[i], headerNoRGPG, &newHeader);
          if(firstHeaderNoRGPG != headerNoRGPG)
          {
              Logger::gLogger->error("The headers are not identical at index %d",i);
          }
          if(newHeader.getReferenceInfo() != p_headers[i].getReferenceInfo())
          {
              Logger::gLogger->error("The headers are not identical at index %d",i);
          }
      }
  }

  // first header will be the new header to be written to output
  // adding all possible readGroups to the new header
  for(uint32_t i=0; i < v_uniq_readgroups.size(); ++i)
  {
    addReadGroupToHeader(newHeader, v_uniq_readgroups[i]);
  }

  // Write an output file with new headers
  SamFile bam_out;
  if ( !bam_out.OpenForWrite(s_out.c_str()) )
  {
    Logger::gLogger->error("Cannot open BAM file %s for writing",s_out.c_str());
  }
  bam_out.setSortedValidation(SamFile::COORDINATE);
  bam_out.WriteHeader(newHeader);

  // create SamRecords and GenomicCoordinates for each input BAM file
  SamRecord* p_records = new SamRecord[n_bams];
  uint64_t* p_gcoordinates = new uint64_t[n_bams];

  // read the first record for every input BAM file
  for(uint32_t i=0; i < n_bams; ++i) {
    if ( p_in_bams[i].ReadRecord(p_headers[i],p_records[i]) ) {
      if ( p_records[i].isValid(p_headers[i]) ) {
	p_gcoordinates[i] = getGenomicCoordinate(p_records[i]);
      }
      else {
	Logger::gLogger->error("Invalid record found at the first line of file %u. Failure code is %d", i, static_cast<int>(p_in_bams[i].GetFailure()));
      }
    }
    else {
      if ( p_in_bams[i].GetFailure() == SamStatus::NO_MORE_RECS ) {
	// the BAM file has no record
	p_gcoordinates[i] = MAX_GENOMIC_COORDINATE;
      }
      else {
	Logger::gLogger->error("Invalid record found at the first line of file %u. Failure code is %d", i, static_cast<int>(p_in_bams[i].GetFailure()));
      }
    }
  }

  // Routine for writing output BAM file
  uint32_t nWrittenRecords = 0; // number of written BAM records
  while(true) {
    // scan the minimum index of genomic coordinate
    int min_idx = -1;
    uint64_t min_gcoordinate = MAX_GENOMIC_COORDINATE;
    for(uint32_t i=0; i < n_bams; ++i) {
      if ( min_gcoordinate > p_gcoordinates[i] ) {
	min_gcoordinate = p_gcoordinates[i];
	min_idx = static_cast<int>(i);
      }
    }

    // If every file reached EOF, exit the loop
    if ( min_idx < 0 ) break;


    // If adding read groups, add the tag.
    if(!v_readgroups.empty())
    {
        // add readGroup tag to the record to write and write to output BAM file
        //Logger::gLogger->writeLog("%d",min_idx);
        addReadGroupTag(p_records[min_idx], v_readgroups[min_idx]);
    }
    bam_out.WriteRecord(newHeader, p_records[min_idx]);
    ++nWrittenRecords;
    if ( nWrittenRecords % 1000000 == 0 ) {
      Logger::gLogger->writeLog("Writing %u records to the output file",nWrittenRecords);
    }

    // Read a record from the input BAM file 
    if ( p_in_bams[min_idx].ReadRecord(p_headers[min_idx], p_records[min_idx]) ) {
      if ( p_records[min_idx].isValid(p_headers[min_idx]) ) {
	p_gcoordinates[min_idx] = getGenomicCoordinate(p_records[min_idx]);
      }
      else { // if invalid record found
	Logger::gLogger->error("Invalid record found at recordCount %d of file %d. Failure code is %d", p_in_bams[min_idx].GetCurrentRecordCount(), min_idx, static_cast<int>(p_in_bams[min_idx].GetFailure()));
      }
    }
    else {
      if ( p_in_bams[min_idx].GetFailure() == SamStatus::NO_MORE_RECS ) {
	p_gcoordinates[min_idx] = MAX_GENOMIC_COORDINATE; // Mark that all record has been read
      }
      else {
	Logger::gLogger->error("Cannot read record at recordCount %d of file %d. Failure code is %d", p_in_bams[min_idx].GetCurrentRecordCount(), min_idx, static_cast<int>(p_in_bams[min_idx].GetFailure()));
      }
    }
  }

  // close files and free allocated memory
  Logger::gLogger->writeLog("Finished writing %d records into the output BAM file",bam_out.GetCurrentRecordCount());
  bam_out.Close();
  for(uint32_t i=0; i < n_bams; ++i) {
    p_in_bams[i].Close();
  }
  delete[] p_records;
  delete[] p_in_bams;
  delete[] p_headers;
  delete[] p_gcoordinates;
  delete Logger::gLogger;
  return 0;
}
Example #3
0
int Stats::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    String indexFile = "";
    bool basic = false;
    bool noeof = false;
    bool params = false;
    bool qual = false;
    bool phred = false;
    int maxNumReads = -1;
    bool unmapped = false;
    String pBaseQC = "";
    String cBaseQC = "";
    String regionList = "";
    int excludeFlags = 0;
    int requiredFlags = 0;
    bool withinRegion = false;
    int minMapQual = 0;
    String dbsnp = "";
    PosList *dbsnpListPtr = NULL;
    bool baseSum = false;
    int bufferSize = PileupHelper::DEFAULT_WINDOW_SIZE;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_PARAMETER_GROUP("Required Parameters")
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER_GROUP("Types of Statistics")
        LONG_PARAMETER("basic", &basic)
        LONG_PARAMETER("qual", &qual)
        LONG_PARAMETER("phred", &phred)
        LONG_STRINGPARAMETER("pBaseQC", &pBaseQC)
        LONG_STRINGPARAMETER("cBaseQC", &cBaseQC)
        LONG_PARAMETER_GROUP("Optional Parameters")
        LONG_INTPARAMETER("maxNumReads", &maxNumReads)
        LONG_PARAMETER("unmapped", &unmapped)
        LONG_STRINGPARAMETER("bamIndex", &indexFile)
        LONG_STRINGPARAMETER("regionList", &regionList)
        LONG_INTPARAMETER("excludeFlags", &excludeFlags)
        LONG_INTPARAMETER("requiredFlags", &requiredFlags)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("params", &params)
        LONG_PARAMETER_GROUP("Optional phred/qual Only Parameters")
        LONG_PARAMETER("withinRegion", &withinRegion)
        LONG_PARAMETER_GROUP("Optional BaseQC Only Parameters")
        LONG_PARAMETER("baseSum", &baseSum)
        LONG_INTPARAMETER("bufferSize", &bufferSize)
        LONG_INTPARAMETER("minMapQual", &minMapQual)
        LONG_STRINGPARAMETER("dbsnp", &dbsnp)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    inputParameters.Read(argc-1, &(argv[1]));

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument for stats, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    // Use the index file if unmapped or regionList is not empty.
    bool useIndex = (unmapped|| (!regionList.IsEmpty()));

    // IndexFile is required, so check to see if it has been set.
    if(useIndex && (indexFile == ""))
    {
        // In file was not specified, so set it to the in file
        // + ".bai"
        indexFile = inFile + ".bai";
    }
    ////////////////////////////////////////
    // Setup in case pileup is used.
    Pileup<PileupElementBaseQCStats> pileup(bufferSize);
    // Initialize start/end positions.
    myStartPos = 0;
    myEndPos = -1;
    
    // Open the output qc file if applicable.
    IFILE baseQCPtr = NULL;
    if(!pBaseQC.IsEmpty() && !cBaseQC.IsEmpty())
    {
        usage();
        inputParameters.Status();
        // Cannot specify both types of baseQC.
        std::cerr << "Cannot specify both --pBaseQC & --cBaseQC." << std::endl;
        return(-1);
    }
    else if(!pBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(pBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(true);
    }
    else if(!cBaseQC.IsEmpty())
    {
        baseQCPtr = ifopen(cBaseQC, "w");
        PileupElementBaseQCStats::setPercentStats(false);
    }

    if(baseQCPtr != NULL)
    {
        PileupElementBaseQCStats::setOutputFile(baseQCPtr);
        PileupElementBaseQCStats::printHeader();
    }
    if((baseQCPtr != NULL) || baseSum)
    {
        PileupElementBaseQCStats::setMapQualFilter(minMapQual);
        PileupElementBaseQCStats::setBaseSum(baseSum);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the file for reading.
    SamFile samIn;
    if(!samIn.OpenForRead(inFile))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    samIn.SetReadFlags(requiredFlags, excludeFlags);

    // Set whether or not basic statistics should be generated.
    samIn.GenerateStatistics(basic);

    // Read the sam header.
    SamFileHeader samHeader;
    if(!samIn.ReadHeader(samHeader))
    {
        fprintf(stderr, "%s\n", samIn.GetStatusMessage());
        return(samIn.GetStatus());
    }

    // Open the bam index file for reading if we are
    // doing unmapped reads (also set the read section).
    if(useIndex)
    {
        samIn.ReadBamIndex(indexFile);

        if(unmapped)
        {
            samIn.SetReadSection(-1);
        }

        if(!regionList.IsEmpty())
        {
            myRegionList = ifopen(regionList, "r");
        }
    }

    //////////////////////////
    // Read dbsnp if specified and doing baseQC
    if(((baseQCPtr != NULL) || baseSum) && (!dbsnp.IsEmpty()))
    {
        // Read the dbsnp file.
        IFILE fdbSnp;
        fdbSnp = ifopen(dbsnp,"r");
        // Determine how many entries.
        const SamReferenceInfo& refInfo = samHeader.getReferenceInfo();
        int maxRefLen = 0;
        for(int i = 0; i < refInfo.getNumEntries(); i++)
        {
            int refLen = refInfo.getReferenceLength(i);
            if(refLen >= maxRefLen)
            {
                maxRefLen = refLen + 1;
            }
        }
        
        dbsnpListPtr = new PosList(refInfo.getNumEntries(),maxRefLen);

        if(fdbSnp==NULL)
        {
            std::cerr << "Open dbSNP file " << dbsnp.c_str() << " failed!\n";
        }
        else if(dbsnpListPtr == NULL)
        {
            std::cerr << "Failed to init the memory allocation for the dbsnpList.\n";
        }
        else
        {
            // Read the dbsnp file.
            StringArray tokens;
            String buffer;
            int position = 0;
            int refID = 0;

            // Loop til the end of the file.
            while (!ifeof(fdbSnp))
            {
                // Read the next line.
                buffer.ReadLine(fdbSnp);
                // If it does not have at least 2 columns, 
                // continue to the next line.
                if (buffer.IsEmpty() || buffer[0] == '#') continue;
                tokens.AddTokens(buffer);
                if(tokens.Length() < 2) continue;

                if(!tokens[1].AsInteger(position))
                {
                    std::cerr << "Improperly formatted region line, start position "
                              << "(2nd column) is not an integer: "
                              << tokens[1]
                              << "; Skipping to the next line.\n";         
                    continue;
                }

                // Look up the reference name.
                refID = samHeader.getReferenceID(tokens[0]);
                if(refID != SamReferenceInfo::NO_REF_ID)
                {
                    // Reference id was found, so add it to the dbsnp
                    dbsnpListPtr->addPosition(refID, position);
                }
        
                tokens.Clear();
                buffer.Clear();
            }
        }
        ifclose(fdbSnp);
    }

    // Read the sam records.
    SamRecord samRecord;

    int numReads = 0;

    //////////////////////
    // Setup in case doing a quality count.
    // Quality histogram.
    const int MAX_QUAL = 126;
    const int START_QUAL = 33;
    uint64_t qualCount[MAX_QUAL+1];
    for(int i = 0; i <= MAX_QUAL; i++)
    {
        qualCount[i] = 0;
    }
    
    const int START_PHRED = 0;
    const int PHRED_DIFF = START_QUAL - START_PHRED;
    const int MAX_PHRED = MAX_QUAL - PHRED_DIFF;
    uint64_t phredCount[MAX_PHRED+1];
    for(int i = 0; i <= MAX_PHRED; i++)
    {
        phredCount[i] = 0;
    }
    
    int refPos = 0;
    Cigar* cigarPtr = NULL;
    char cigarChar = '?';
    // Exclude clips from the qual/phred counts if unmapped reads are excluded.
    bool qualExcludeClips = excludeFlags & SamFlag::UNMAPPED;

    //////////////////////////////////
    // When not reading by sections, getNextSection returns true
    // the first time, then false the next time.
    while(getNextSection(samIn))
    {
        // Keep reading records from the file until SamFile::ReadRecord
        // indicates to stop (returns false).
        while(((maxNumReads < 0) || (numReads < maxNumReads)) && samIn.ReadRecord(samHeader, samRecord))
        {
            // Another record was read, so increment the number of reads.
            ++numReads;
            // See if the quality histogram should be genereated.
            if(qual || phred)
            {
                // Get the quality.
                const char* qual = samRecord.getQuality();
                // Check for no quality ('*').
                if((qual[0] == '*') && (qual[1] == 0))
                {
                    // This record does not have a quality string, so no 
                    // quality processing is necessary.
                }
                else
                {
                    int index = 0;
                    cigarPtr = samRecord.getCigarInfo();
                    cigarChar = '?';
                    refPos = samRecord.get0BasedPosition();
                    if(!qualExcludeClips && (cigarPtr != NULL))
                    {
                        // Offset the reference position by any soft clips
                        // by subtracting the queryIndex of this start position.
                        // refPos is now the start position of the clips.
                        refPos -= cigarPtr->getQueryIndex(0);
                    }

                    while(qual[index] != 0)
                    {
                        // Skip this quality if it is clipped and we are skipping clips.
                        if(cigarPtr != NULL)
                        {
                            cigarChar = cigarPtr->getCigarCharOpFromQueryIndex(index);
                        }
                        if(qualExcludeClips && Cigar::isClip(cigarChar))
                        {
                            // Skip a clipped quality.
                            ++index;
                            // Increment the position.
                            continue;
                        }

                        if(withinRegion && (myEndPos != -1) && (refPos >= myEndPos))
                        {
                            // We have hit the end of the region, stop processing this
                            // quality string.
                            break;
                        }

                        if(withinRegion && (refPos < myStartPos))
                        {
                            // This position is not in the target.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }

                        // Check for valid quality.
                        if((qual[index] < START_QUAL) || (qual[index] > MAX_QUAL))
                        {
                            if(qual)
                            {
                                std::cerr << "Invalid Quality found: " << qual[index] 
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            if(phred)
                            {
                                std::cerr << "Invalid Phred Quality found: " << qual[index] - PHRED_DIFF
                                          << ".  Must be between "
                                          << START_QUAL << " and " << MAX_QUAL << ".\n";
                            }
                            // Skip an invalid quality.
                            ++index;
                            // Update the position if this is found in the reference or a clip.
                            if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                            {
                                ++refPos;
                            }
                            continue;
                        }
                        
                        // Increment the count for this quality.
                        ++(qualCount[(int)(qual[index])]);
                        ++(phredCount[(int)(qual[index]) - PHRED_DIFF]);
                        // Update the position if this is found in the reference or a clip.
                        if(Cigar::foundInReference(cigarChar) || Cigar::isClip(cigarChar))
                        {
                            ++refPos;
                        }
                        ++index;
                    }
                }
            }

            // Check the next thing to do for the read.
            if((baseQCPtr != NULL) || baseSum)
            {
                // Pileup the bases for this read.
                pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
            }
        }

        // Done with a section, move on to the next one.

        // New section, so flush the pileup.
        pileup.flushPileup();
    }

    // Flush the rest of the pileup.
    if((baseQCPtr != NULL) || baseSum)
    {
        // Pileup the bases.
        pileup.processAlignmentRegion(samRecord, myStartPos, myEndPos, dbsnpListPtr);
        PileupElementBaseQCStats::printSummary();
        ifclose(baseQCPtr);
    }

    std::cerr << "Number of records read = " << 
        samIn.GetCurrentRecordCount() << std::endl;

    if(basic)
    {
        std::cerr << std::endl;
        samIn.PrintStatistics();
    }

    // Print the quality stats.
    if(qual)
    {
        std::cerr << std::endl;
        std::cerr << "Quality\tCount\n";
        for(int i = START_QUAL; i <= MAX_QUAL; i++)
        {
            std::cerr << i << "\t" << qualCount[i] << std::endl;
        }
    }
    // Print the phred quality stats.
    if(phred)
    {
        std::cerr << std::endl;
        std::cerr << "Phred\tCount\n";
        for(int i = START_PHRED; i <= MAX_PHRED; i++)
        {
            std::cerr << i << "\t" << phredCount[i] << std::endl;
        }
    }

    SamStatus::Status status = samIn.GetStatus();
    if(status == SamStatus::NO_MORE_RECS)
    {
        // A status of NO_MORE_RECS means that all reads were successful.
        status = SamStatus::SUCCESS;
    }

    return(status);
}
Example #4
0
// Dump the reference information from specified SAM/BAM file.
int DumpRefInfo::execute(int argc, char **argv)
{
    // Extract command line arguments.
    String inFile = "";
    bool noeof = false;
    bool printRecordRefs = false;
    bool params = false;

    ParameterList inputParameters;
    BEGIN_LONG_PARAMETERS(longParameterList)
        LONG_STRINGPARAMETER("in", &inFile)
        LONG_PARAMETER("noeof", &noeof)
        LONG_PARAMETER("printRecordRefs", &printRecordRefs)
        LONG_PARAMETER("params", &params)
        LONG_PHONEHOME(VERSION)
        END_LONG_PARAMETERS();
   
    inputParameters.Add(new LongParameters ("Input Parameters", 
                                            longParameterList));

    // parameters start at index 2 rather than 1.
    inputParameters.Read(argc, argv, 2);

    // If no eof block is required for a bgzf file, set the bgzf file type to 
    // not look for it.
    if(noeof)
    {
        // Set that the eof block is not required.
        BgzfFileType::setRequireEofBlock(false);
    }

    // Check to see if the in file was specified, if not, report an error.
    if(inFile == "")
    {
        usage();
        inputParameters.Status();
        // In file was not specified but it is mandatory.
        std::cerr << "--in is a mandatory argument, "
                  << "but was not specified" << std::endl;
        return(-1);
    }

    if(params)
    {
        inputParameters.Status();
    }

    // Open the input file for reading.
    SamFile samIn;
    samIn.OpenForRead(inFile);

    // Read the sam header.
    SamFileHeader samHeader;
    samIn.ReadHeader(samHeader);

    const SamReferenceInfo& refInfo = samHeader.getReferenceInfo();
    int numReferences = refInfo.getNumEntries();
    
    for(int i = 0; i < numReferences; i++)
    {
        std::cout << "Reference Index " << i;
        std::cout << "; Name: " << refInfo.getReferenceName(i)
                  << std::endl;
    }
    if(numReferences == 0)
    {
        // There is no reference info.
        std::cerr << "The header contains no reference information.\n";
    }

    // If we are to print the references as found in the records, loop
    // through reading the records.
    if(printRecordRefs)
    {
        SamRecord samRecord;

        // Track the prev name/id.
        std::string prevName = "";
        int prevID = -2;
        int recCount = 0; // track the num records in a ref.
        // Keep reading records until ReadRecord returns false.
        while(samIn.ReadRecord(samHeader, samRecord))
        {
            const char* name = samRecord.getReferenceName();
            int id = samRecord.getReferenceID();
            if((strcmp(name, prevName.c_str()) != 0) || (id != prevID))
            {
                if(prevID != -2)
                {
                    std::cout << "\tRef ID: " << prevID
                              << "\tRef Name: " << prevName 
                              << "\tNumRecs: " << recCount
                              << std::endl;
                }
                recCount = 0;
                prevID = id;
                prevName = name;
            }
            ++recCount;
        }
        // Print the last index.
        if(prevID != -2)
        {
            std::cout << "\tRef ID: " << prevID
                      << "\tRef Name: " << prevName 
                      << "\tNumRecs: " << recCount
                      << std::endl;
        }
    }
    return(SamStatus::SUCCESS);
}