/*! \fn bool BamWriter::Open(const std::string& filename, const SamHeader& samHeader, const RefVector& referenceSequences) \brief Opens a BAM file for writing. This is an overloaded function. Will overwrite the BAM file if it already exists. \param[in] filename name of output BAM file \param[in] samHeader header data, wrapped in SamHeader object \param[in] referenceSequences list of reference entries \return \c true if opened successfully \sa Close(), IsOpen(), BamReader::GetHeader(), BamReader::GetReferenceData() */ bool BamWriter::Open(const std::string& filename, const SamHeader& samHeader, const RefVector& referenceSequences) { d->SetParallel(m_numThreads); return d->Open(filename, samHeader.ToString(), referenceSequences); }
// makes a virtual, unified header for all the bam files in the multireader string BamMultiReaderPrivate::GetHeaderText(void) const { // N.B. - right now, simply copies all header data from first BAM, // and then appends RG's from other BAM files // TODO: make this more intelligent wrt other header lines/fields // if no readers open const size_t numReaders = m_readers.size(); if ( numReaders == 0 ) return string(); // retrieve first reader's header const MergeItem& firstItem = m_readers.front(); const BamReader* reader = firstItem.Reader; if ( reader == 0 ) return string(); SamHeader mergedHeader = reader->GetHeader(); // Add filename to read ID name so we know // which file it came from for (SamReadGroupIterator it = mergedHeader.ReadGroups.Begin(); it != mergedHeader.ReadGroups.End(); it++) { it->ID = it->ID + "-" + reader->GetFilename(); } // iterate over any remaining readers (skipping the first) for ( size_t i = 1; i < numReaders; ++i ) { const MergeItem& item = m_readers.at(i); const BamReader* reader = item.Reader; if ( reader == 0 ) continue; // retrieve current reader's header SamHeader currentHeader = reader->GetHeader(); // Add filename to read ID name so we know // which file it came from for (SamReadGroupIterator it = currentHeader.ReadGroups.Begin(); it != currentHeader.ReadGroups.End(); it++) { it->ID = it->ID + "-" + reader->GetFilename(); } // append current reader's RG entries to merged header // N.B. - SamReadGroupDictionary handles duplicate-checking mergedHeader.ReadGroups.Add(currentHeader.ReadGroups); // TODO: merge anything else?? } // return stringified header return mergedHeader.ToString(); }
//{{{bool sort_inter_chrom_bam(string in_file_name, bool sort_inter_chrom_bam(string in_file_name, string out_file_name) { // open input BAM file BamReader reader; if ( !reader.Open(in_file_name) ) { cerr << "sort ERROR: could not open " << in_file_name << " for reading... Aborting." << endl; return false; } SamHeader header = reader.GetHeader(); if ( !header.HasVersion() ) header.Version = Constants::SAM_CURRENT_VERSION; string header_text = header.ToString(); RefVector ref = reader.GetReferenceData(); // set up alignments buffer BamAlignment al; vector<BamAlignment> buffer; buffer.reserve( (size_t)(SORT_DEFAULT_MAX_BUFFER_COUNT*1.1) ); bool bufferFull = false; int buff_count = 0; // iterate through file while ( reader.GetNextAlignment(al)) { // check buffer's usage bufferFull = ( buffer.size() >= SORT_DEFAULT_MAX_BUFFER_COUNT ); // store alignments until buffer is "full" if ( !bufferFull ) buffer.push_back(al); // if buffer is "full" else { // so create a sorted temp file with current buffer contents // then push "al" into fresh buffer create_sorted_temp_file(buffer, out_file_name, buff_count, header_text, ref); ++buff_count; buffer.push_back(al); } } // handle any leftover buffer contents if ( !buffer.empty() ) { create_sorted_temp_file(buffer, out_file_name, buff_count, header_text, ref); ++buff_count; } reader.Close(); return merge_sorted_files(out_file_name, buff_count, header_text, ref); /* for (int i = 0; i < buff_count; ++i) { stringstream temp_name; temp_name << out_file_name << i; } */ }
// generates mutiple sorted temp BAM files from single unsorted BAM file bool SortTool::SortToolPrivate::GenerateSortedRuns(void) { // open input BAM file BamReader reader; if ( !reader.Open(m_settings->InputBamFilename) ) { cerr << "bamtools sort ERROR: could not open " << m_settings->InputBamFilename << " for reading... Aborting." << endl; return false; } // get basic data that will be shared by all temp/output files SamHeader header = reader.GetHeader(); header.SortOrder = ( m_settings->IsSortingByName ? Constants::SAM_HD_SORTORDER_QUERYNAME : Constants::SAM_HD_SORTORDER_COORDINATE ); m_headerText = header.ToString(); m_references = reader.GetReferenceData(); // set up alignments buffer BamAlignment al; vector<BamAlignment> buffer; buffer.reserve( (size_t)(m_settings->MaxBufferCount*1.1) ); bool bufferFull = false; // if sorting by name, we need to generate full char data // so can't use GetNextAlignmentCore() if ( m_settings->IsSortingByName ) { // iterate through file while ( reader.GetNextAlignment(al)) { // check buffer's usage bufferFull = ( buffer.size() >= m_settings->MaxBufferCount ); // store alignments until buffer is "full" if ( !bufferFull ) buffer.push_back(al); // if buffer is "full" else { // push any unmapped reads into buffer, // don't want to split these into a separate temp file if ( !al.IsMapped() ) buffer.push_back(al); // "al" is mapped, so create a sorted temp file with current buffer contents // then push "al" into fresh buffer else { CreateSortedTempFile(buffer); buffer.push_back(al); } } } } // sorting by position, can take advantage of GNACore() speedup else { // iterate through file while ( reader.GetNextAlignmentCore(al) ) { // check buffer's usage bufferFull = ( buffer.size() >= m_settings->MaxBufferCount ); // store alignments until buffer is "full" if ( !bufferFull ) buffer.push_back(al); // if buffer is "full" else { // push any unmapped reads into buffer, // don't want to split these into a separate temp file if ( !al.IsMapped() ) buffer.push_back(al); // "al" is mapped, so create a sorted temp file with current buffer contents // then push "al" into fresh buffer else { CreateSortedTempFile(buffer); buffer.push_back(al); } } } } // handle any leftover buffer contents if ( !buffer.empty() ) CreateSortedTempFile(buffer); // close reader & return success reader.Close(); return true; }
/*! \fn bool BamWriter::Open(const std::string& filename, const SamHeader& samHeader, const RefVector& referenceSequences) \brief Opens a BAM file for writing. This is an overloaded function. Will overwrite the BAM file if it already exists. \param filename name of output BAM file \param samHeader header data, wrapped in SamHeader object \param referenceSequences list of reference entries \return \c true if opened successfully \sa Close(), IsOpen(), BamReader::GetHeader(), BamReader::GetReferenceData() */ bool BamWriter::Open(const std::string& filename, const SamHeader& samHeader, const RefVector& referenceSequences) { return d->Open(filename, samHeader.ToString(), referenceSequences); }