//------------------------------------------------------------------------- void XList::sortByElementNumber(String order){ // Get the number of sessions per speaker LKVector spk(0,0); for(unsigned long i=0;i<_vector.size();i++){ LKVector::type sps; sps.idx = i; sps.lk = _vector.getObject(i).getElementCount(); spk.addValue(sps); } // Sort Xlines of the temporary XList by element number spk.descendingSort(); // Copy the current RefVector<XLine> into a temporary one RefVector<XLine> tmpX; for(unsigned long i=0;i<_vector.size();i++){ XLine *ll = new XLine(_vector.getObject(i)); tmpX.addObject(*ll); } // Remove all elements from the XList _vector.deleteAllObjects(); // Fill the XList according to the number of elements if(order == "descend"){ for(unsigned long i=0;i<tmpX.size();i++){ _vector.addObject(tmpX.getObject(spk[i].idx)); } } else if(order == "ascend"){ for(long i=tmpX.size()-1;i>=0;i--){ _vector.addObject(tmpX.getObject(spk[i].idx)); } } }
long get_ref_lengths(int id, RefVector ref) { long length = 0; for (size_t i = 0; i < (size_t) id && i < ref.size(); i++) { length += (long) ref[i].RefLength + (long) Parameter::Instance()->max_dist; } return length; }
GenomeFile::GenomeFile(const RefVector &genome) { for (size_t i = 0; i < genome.size(); ++i) { string chrom = genome[i].RefName; int length = genome[i].RefLength; _chromSizes[chrom] = length; _chromList.push_back(chrom); } }
long Breakpoint::calc_pos(long pos, RefVector ref) { size_t i = 0; pos -= ref[i].RefLength; while (i < ref.size() && pos >= 0) { i++; pos -= ref[i].RefLength; } return pos + ref[i].RefLength; }
long fuck_off(long pos, RefVector ref, std::string &chr) { size_t i = 0; pos -= (ref[i].RefLength + Parameter::Instance()->max_dist); while (i < ref.size() && pos >= 0) { i++; pos -= ((long) ref[i].RefLength + (long) Parameter::Instance()->max_dist); } chr = ref[i].RefName; return pos + ref[i].RefLength + (long) Parameter::Instance()->max_dist; }
std::string Breakpoint::get_chr(long pos, RefVector ref) { // std::cout << "pos: " << pos << std::endl; size_t id = 0; while (id < ref.size() && pos >= 0) { pos -= (long) ref[id].RefLength; // std::cout << id << std::endl; id++; } return ref[id - 1].RefName; }
bool RandomTool::RandomToolPrivate::Run(void) { // set to default stdin if no input files provided if ( !m_settings->HasInput && !m_settings->HasInputFilelist ) m_settings->InputFiles.push_back(Options::StandardIn()); // add files in the filelist to the input file list if ( m_settings->HasInputFilelist ) { ifstream filelist(m_settings->InputFilelist.c_str(), ios::in); if ( !filelist.is_open() ) { cerr << "bamtools random ERROR: could not open input BAM file list... Aborting." << endl; return false; } string line; while ( getline(filelist, line) ) m_settings->InputFiles.push_back(line); } // open our reader BamMultiReader reader; if ( !reader.Open(m_settings->InputFiles) ) { cerr << "bamtools random ERROR: could not open input BAM file(s)... Aborting." << endl; return false; } // look up index files for all BAM files reader.LocateIndexes(); // make sure index data is available if ( !reader.HasIndexes() ) { cerr << "bamtools random ERROR: could not load index data for all input BAM file(s)... Aborting." << endl; reader.Close(); return false; } // get BamReader metadata const string headerText = reader.GetHeaderText(); const RefVector references = reader.GetReferenceData(); if ( references.empty() ) { cerr << "bamtools random ERROR: no reference data available... Aborting." << endl; reader.Close(); return false; } // determine compression mode for BamWriter bool writeUncompressed = ( m_settings->OutputFilename == Options::StandardOut() && !m_settings->IsForceCompression ); BamWriter::CompressionMode compressionMode = BamWriter::Compressed; if ( writeUncompressed ) compressionMode = BamWriter::Uncompressed; // open BamWriter BamWriter writer; writer.SetCompressionMode(compressionMode); if ( !writer.Open(m_settings->OutputFilename, headerText, references) ) { cerr << "bamtools random ERROR: could not open " << m_settings->OutputFilename << " for writing... Aborting." << endl; reader.Close(); return false; } // if user specified a REGION constraint, attempt to parse REGION string BamRegion region; if ( m_settings->HasRegion && !Utilities::ParseRegionString(m_settings->Region, reader, region) ) { cerr << "bamtools random ERROR: could not parse REGION: " << m_settings->Region << endl; cerr << "Check that REGION is in valid format (see documentation) and that the coordinates are valid" << endl; reader.Close(); writer.Close(); return false; } // seed our random number generator srand( time(NULL) ); // grab random alignments BamAlignment al; unsigned int i = 0; while ( i < m_settings->AlignmentCount ) { int randomRefId = 0; int randomPosition = 0; // use REGION constraints to select random refId & position if ( m_settings->HasRegion ) { // select a random refId randomRefId = getRandomInt(region.LeftRefID, region.RightRefID); // select a random position based on randomRefId const int lowerBoundPosition = ( (randomRefId == region.LeftRefID) ? region.LeftPosition : 0 ); const int upperBoundPosition = ( (randomRefId == region.RightRefID) ? region.RightPosition : (references.at(randomRefId).RefLength - 1) ); randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } // otherwise select from all possible random refId & position else { // select random refId randomRefId = getRandomInt(0, (int)references.size() - 1); // select random position based on randomRefId const int lowerBoundPosition = 0; const int upperBoundPosition = references.at(randomRefId).RefLength - 1; randomPosition = getRandomInt(lowerBoundPosition, upperBoundPosition); } // if jump & read successful, save first alignment that overlaps random refId & position if ( reader.Jump(randomRefId, randomPosition) ) { while ( reader.GetNextAlignmentCore(al) ) { if ( al.RefID == randomRefId && al.Position >= randomPosition ) { writer.SaveAlignment(al); ++i; break; } } } } // cleanup & exit reader.Close(); writer.Close(); return true; }
// this has been copied from bamtools utilities, since it isn't in the API. Original file is bamtools_utilities.cpp. // Like the rest of Bamtools, it is under the BSD license. bool Filter::ParseRegionString(const string& regionString, BamRegion& region) { // ------------------------------- // parse region string // check first for empty string if ( regionString.empty() ) return false; // non-empty string, look for a colom size_t foundFirstColon = regionString.find(':'); // store chrom strings, and numeric positions string chrom; int startPos; int stopPos; // no colon found // going to use entire contents of requested chromosome // just store entire region string as startChrom name // use BamReader methods to check if its valid for current BAM file if ( foundFirstColon == string::npos ) { chrom = regionString; startPos = 0; stopPos = -1; } // colon found, so we at least have some sort of startPos requested else { // store start chrom from beginning to first colon chrom = regionString.substr(0,foundFirstColon); // look for ".." after the colon size_t foundRangeDots = regionString.find("..", foundFirstColon+1); // no dots found // so we have a startPos but no range // store contents before colon as startChrom, after as startPos if ( foundRangeDots == string::npos ) { startPos = atoi( regionString.substr(foundFirstColon+1).c_str() ); stopPos = -1; } // ".." found, so we have some sort of range selected else { // store startPos between first colon and range dots ".." startPos = atoi( regionString.substr(foundFirstColon+1, foundRangeDots-foundFirstColon-1).c_str() ); // look for second colon size_t foundSecondColon = regionString.find(':', foundRangeDots+1); // no second colon found // so we have a "standard" chrom:start..stop input format (on single chrom) if ( foundSecondColon == string::npos ) { stopPos = atoi( regionString.substr(foundRangeDots+2).c_str() ); } else { return false; } } } // ------------------------------- // validate reference IDs & genomic positions const RefVector references = getReferences(); int RefID = -1; for(int i = 0; i < references.size(); i++) { if(references[i].RefName == chrom) RefID = i; } // if startRefID not found, return false if ( RefID == -1 ) { cerr << "Can't find chromosome'" << chrom << "'" << endl; return false; } // startPos cannot be greater than or equal to reference length const RefData& startReference = references.at(RefID); if ( startPos >= startReference.RefLength ) { cerr << "Start position (" << startPos << ") after end of the reference sequence (" << startReference.RefLength << ")" << endl; return false; } // stopPosition cannot be larger than reference length const RefData& stopReference = references.at(RefID); if ( stopPos > stopReference.RefLength ) { cerr << "Start position (" << stopPos << ") after end of the reference sequence (" << stopReference.RefLength << ")" << endl; return false; } // if no stopPosition specified, set to reference end if ( stopPos == -1 ) stopPos = stopReference.RefLength; // ------------------------------- // set up Region struct & return region.LeftRefID = RefID; region.LeftPosition = startPos; region.RightRefID = RefID;; region.RightPosition = stopPos; return true; }
bool check(const PropertyFilter& filter, const BamAlignment& al) { bool keepAlignment = true; const PropertyMap& properties = filter.Properties; PropertyMap::const_iterator propertyIter = properties.begin(); PropertyMap::const_iterator propertyEnd = properties.end(); for ( ; propertyIter != propertyEnd; ++propertyIter ) { // check alignment data field depending on propertyName const string& propertyName = (*propertyIter).first; const PropertyFilterValue& valueFilter = (*propertyIter).second; if ( propertyName == ALIGNMENTFLAG_PROPERTY ) keepAlignment &= valueFilter.check(al.AlignmentFlag); else if ( propertyName == CIGAR_PROPERTY ) { stringstream cigarSs; const vector<CigarOp>& cigarData = al.CigarData; if ( !cigarData.empty() ) { vector<CigarOp>::const_iterator cigarBegin = cigarData.begin(); vector<CigarOp>::const_iterator cigarIter = cigarBegin; vector<CigarOp>::const_iterator cigarEnd = cigarData.end(); for ( ; cigarIter != cigarEnd; ++cigarIter ) { const CigarOp& op = (*cigarIter); cigarSs << op.Length << op.Type; } keepAlignment &= valueFilter.check(cigarSs.str()); } } else if ( propertyName == INSERTSIZE_PROPERTY ) keepAlignment &= valueFilter.check(al.InsertSize); else if ( propertyName == ISDUPLICATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsDuplicate()); else if ( propertyName == ISFAILEDQC_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFailedQC()); else if ( propertyName == ISFIRSTMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsFirstMate()); else if ( propertyName == ISMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMapped()); else if ( propertyName == ISMATEMAPPED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateMapped()); else if ( propertyName == ISMATEREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsMateReverseStrand()); else if ( propertyName == ISPAIRED_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPaired()); else if ( propertyName == ISPRIMARYALIGNMENT_PROPERTY ) keepAlignment &= valueFilter.check(al.IsPrimaryAlignment()); else if ( propertyName == ISPROPERPAIR_PROPERTY ) keepAlignment &= valueFilter.check(al.IsProperPair()); else if ( propertyName == ISREVERSESTRAND_PROPERTY ) keepAlignment &= valueFilter.check(al.IsReverseStrand()); else if ( propertyName == ISSECONDMATE_PROPERTY ) keepAlignment &= valueFilter.check(al.IsSecondMate()); else if ( propertyName == ISSINGLETON_PROPERTY ) { const bool isSingleton = al.IsPaired() && al.IsMapped() && !al.IsMateMapped(); keepAlignment &= valueFilter.check(isSingleton); } else if ( propertyName == MAPQUALITY_PROPERTY ) keepAlignment &= valueFilter.check(al.MapQuality); else if ( propertyName == MATEPOSITION_PROPERTY ) keepAlignment &= ( al.IsPaired() && al.IsMateMapped() && valueFilter.check(al.MateRefID) ); else if ( propertyName == MATEREFERENCE_PROPERTY ) { if ( !al.IsPaired() || !al.IsMateMapped() ) return false; BAMTOOLS_ASSERT_MESSAGE( (al.MateRefID>=0 && (al.MateRefID<(int)filterToolReferences.size())), "Invalid MateRefID"); const string& refName = filterToolReferences.at(al.MateRefID).RefName; keepAlignment &= valueFilter.check(refName); } else if ( propertyName == NAME_PROPERTY ) keepAlignment &= valueFilter.check(al.Name); else if ( propertyName == POSITION_PROPERTY ) keepAlignment &= valueFilter.check(al.Position); else if ( propertyName == QUERYBASES_PROPERTY ) keepAlignment &= valueFilter.check(al.QueryBases); else if ( propertyName == REFERENCE_PROPERTY ) { BAMTOOLS_ASSERT_MESSAGE( (al.RefID>=0 && (al.RefID<(int)filterToolReferences.size())), "Invalid RefID"); const string& refName = filterToolReferences.at(al.RefID).RefName; keepAlignment &= valueFilter.check(refName); } else if ( propertyName == TAG_PROPERTY ) keepAlignment &= checkAlignmentTag(valueFilter, al); else BAMTOOLS_ASSERT_UNREACHABLE; // if alignment fails at ANY point, just quit and return false if ( !keepAlignment ) return false; } BAMTOOLS_ASSERT_MESSAGE( keepAlignment, "Error in BamAlignmentChecker... keepAlignment should be true here"); return keepAlignment; }
// ValidateReaders checks that all the readers point to BAM files representing // alignments against the same set of reference sequences, and that the // sequences are identically ordered. If these checks fail the operation of // the multireader is undefined, so we force program exit. bool BamMultiReaderPrivate::ValidateReaders() const { m_errorString.clear(); // skip if 0 or 1 readers opened if (m_readers.empty() || (m_readers.size() == 1)) return true; // retrieve first reader const MergeItem& firstItem = m_readers.front(); const BamReader* firstReader = firstItem.Reader; if (firstReader == 0) return false; // retrieve first reader's header data const SamHeader& firstReaderHeader = firstReader->GetHeader(); const std::string& firstReaderSortOrder = firstReaderHeader.SortOrder; // retrieve first reader's reference data const RefVector& firstReaderRefData = firstReader->GetReferenceData(); const int firstReaderRefCount = firstReader->GetReferenceCount(); const int firstReaderRefSize = firstReaderRefData.size(); // iterate over all readers std::vector<MergeItem>::const_iterator readerIter = m_readers.begin(); std::vector<MergeItem>::const_iterator readerEnd = m_readers.end(); for (; readerIter != readerEnd; ++readerIter) { const MergeItem& item = (*readerIter); BamReader* reader = item.Reader; if (reader == 0) continue; // get current reader's header data const SamHeader& currentReaderHeader = reader->GetHeader(); const std::string& currentReaderSortOrder = currentReaderHeader.SortOrder; // check compatible sort order if (currentReaderSortOrder != firstReaderSortOrder) { const std::string message = std::string("mismatched sort order in ") + reader->GetFilename() + ", expected " + firstReaderSortOrder + ", but found " + currentReaderSortOrder; SetErrorString("BamMultiReader::ValidateReaders", message); return false; } // get current reader's reference data const RefVector currentReaderRefData = reader->GetReferenceData(); const int currentReaderRefCount = reader->GetReferenceCount(); const int currentReaderRefSize = currentReaderRefData.size(); // init reference data iterators RefVector::const_iterator firstRefIter = firstReaderRefData.begin(); RefVector::const_iterator firstRefEnd = firstReaderRefData.end(); RefVector::const_iterator currentRefIter = currentReaderRefData.begin(); // compare reference counts from BamReader ( & container size, in case of BR error) if ((currentReaderRefCount != firstReaderRefCount) || (firstReaderRefSize != currentReaderRefSize)) { std::stringstream s; s << "mismatched reference count in " << reader->GetFilename() << ", expected " << firstReaderRefCount << ", but found " << currentReaderRefCount; SetErrorString("BamMultiReader::ValidateReaders", s.str()); return false; } // this will be ok; we just checked above that we have identically-sized sets of references // here we simply check if they are all, in fact, equal in content while (firstRefIter != firstRefEnd) { const RefData& firstRef = (*firstRefIter); const RefData& currentRef = (*currentRefIter); // compare reference name & length if ((firstRef.RefName != currentRef.RefName) || (firstRef.RefLength != currentRef.RefLength)) { std::stringstream s; s << "mismatched references found in" << reader->GetFilename() << "expected: " << std::endl; // print first reader's reference data RefVector::const_iterator refIter = firstReaderRefData.begin(); RefVector::const_iterator refEnd = firstReaderRefData.end(); for (; refIter != refEnd; ++refIter) { const RefData& entry = (*refIter); std::stringstream s; s << entry.RefName << ' ' << std::endl; } s << "but found: " << std::endl; // print current reader's reference data refIter = currentReaderRefData.begin(); refEnd = currentReaderRefData.end(); for (; refIter != refEnd; ++refIter) { const RefData& entry = (*refIter); s << entry.RefName << ' ' << entry.RefLength << std::endl; } SetErrorString("BamMultiReader::ValidateReaders", s.str()); return false; } // update iterators ++firstRefIter; ++currentRefIter; } } // if we get here, everything checks out return true; }