ReadEnds( ::libmaus::bambam::BamAlignment const & p, ::libmaus::bambam::BamAlignment const & q, ::libmaus::bambam::BamHeader const & header, ::libmaus::bambam::ReadEnds & RE, bool const copyAlignment = false ) { reset(); fillFragPair(p,q,header,RE); if ( copyAlignment ) { this->p = p.sclone(); this->q = q.sclone(); } }
/** * put rank aux tag in current alignment **/ void putRank() { uint64_t const lrank = rank++; if ( putrank ) { alignment.putRank("ZR",lrank /*,bamheader */); } }
bool strip12(libmaus::bambam::BamAlignment & algn) { char const * name = algn.getName(); char const * u1 = name; while ( *u1 && *u1 != '_' ) ++u1; if ( ! *u1 ) return true; else { bool ok = true; uint64_t ranka = 0; for ( char const * t1 = name; t1 != u1; ++t1 ) { ranka *= 10; ranka += ((*t1)-'0'); ok = ok && isdigit(*t1); } int const read1 = algn.isRead1() ? 1 : 0; int const read2 = algn.isRead2() ? 1 : 0; if ( (read1+read2 != 1) || (!ok) ) { return true; } else { std::ostringstream upnamestr; upnamestr << (u1+1); std::string const upname = upnamestr.str(); algn.replaceName(upname.begin(),upname.size()); return true; } } }
static bool readAlignmentGz( stream_type & GZ, ::libmaus::bambam::BamAlignment & alignment, ::libmaus::bambam::BamHeader const * bamheader = 0, bool const validate = true ) { /* read alignment block size */ int64_t const bs0 = GZ.get(); int64_t const bs1 = GZ.get(); int64_t const bs2 = GZ.get(); int64_t const bs3 = GZ.get(); if ( bs3 < 0 ) // reached end of file return false; /* assemble block size as LE integer */ alignment.blocksize = (bs0 << 0) | (bs1 << 8) | (bs2 << 16) | (bs3 << 24) ; /* read alignment block */ if ( alignment.blocksize > alignment.D.size() ) alignment.D = ::libmaus::bambam::BamAlignment::D_array_type(alignment.blocksize,false); GZ.read(reinterpret_cast<char *>(alignment.D.begin()),alignment.blocksize); if ( static_cast<int64_t>(GZ.gcount()) != static_cast<int64_t>(alignment.blocksize) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Invalid alignment (EOF in alignment block of length " << alignment.blocksize << ")" << std::endl; se.finish(); throw se; } if ( validate ) { libmaus_bambam_alignment_validity const validity = bamheader ? alignment.valid(*bamheader) : alignment.valid(); if ( validity != ::libmaus::bambam::libmaus_bambam_alignment_validity_ok ) { ::libmaus::exception::LibMausException se; se.getStream() << "Invalid alignment: " << validity << std::endl; se.finish(); throw se; } } return true; }
virtual bool operator()(libmaus::bambam::BamAlignment const & algn) const { int64_t const rg = header.getReadGroupId(algn.getReadGroup()); return rg >= 0 && pBV->get(rg); }
bool checkCigarValid( ::libmaus::bambam::BamAlignment const & alignment, ::libmaus::bambam::BamHeader const & bamheader, ::libmaus::autoarray::AutoArray < ::libmaus::autoarray::AutoArray<uint8_t>::unique_ptr_type > const & text ) { if ( alignment.isUnmap() ) return true; if ( ! alignment.isCigarLengthConsistent() ) { std::cerr << "[E] inconsistent cigar " << alignment.getCigarString() << " for " << alignment.getName() << std::endl; return false; } if ( alignment.getRefID() < 0 || alignment.getRefID() >= static_cast<int64_t>(bamheader.chromosomes.size()) ) { std::cerr << "[E] reference id " << alignment.getRefID() << " out of range for " << alignment.getName() << std::endl; return false; } ::libmaus::autoarray::AutoArray<uint8_t> const & ctext = *(text[alignment.getRefID()]); int64_t refpos = alignment.getPos(); int64_t seqpos = 0; bool alok = true; std::string const read = alignment.getRead(); for ( uint64_t i = 0; alok && i < alignment.getNCigar(); ++i ) { char const cop = alignment.getCigarFieldOpAsChar(i); int64_t const clen = alignment.getCigarFieldLength(i); switch ( cop ) { // match/mismatch, increment both case '=': case 'X': case 'M': { for ( int64_t j = 0; alok && j < clen; ++j, ++refpos, ++ seqpos ) { if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) ) { std::cerr << "[E] " << cop << " operation outside of chromosome coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } else if ( seqpos >= alignment.getLseq() ) { std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } else if ( cop == '=' && toupper(ctext[refpos]) != toupper(read[seqpos]) ) { std::cerr << "[E] " << cop << " operation but mismatch between reference and query." << std::endl; alok = false; } else if ( cop == 'X' && toupper(ctext[refpos]) == toupper(read[seqpos]) ) { std::cerr << "[E] " << cop << " operation but mismatch between reference and query." << std::endl; alok = false; } } break; } // insert into reference, increment seq case 'P': case 'I': { for ( int64_t j = 0; alok && j < clen; ++j, ++seqpos ) { if ( seqpos >= alignment.getLseq() ) { std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } // delete from reference, increment ref case 'D': { for ( int64_t j = 0; alok && j < clen; ++j, ++refpos ) { if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) ) { std::cerr << "[E] " << cop << " operation outside of reference coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } // soft clipping, increment seq case 'S': { for ( int64_t j = 0; alok && j < clen; ++j, ++seqpos ) { if ( seqpos >= alignment.getLseq() ) { std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } // hard clipping, do nothing case 'H': { break; } // skip region in reference, increment ref case 'N': { for ( int64_t j = 0; alok && j < clen; ++j, ++refpos ) { if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) ) { std::cerr << "[E] " << cop << " operation outside of reference coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } } } return alok; }
static void fillFragPair( ::libmaus::bambam::BamAlignment const & p, ::libmaus::bambam::BamAlignment const & q, ::libmaus::bambam::BamHeader const & header, ::libmaus::bambam::ReadEnds & RE ) { RE.read1Sequence = p.getRefIDChecked() + 1; RE.read1Coordinate = p.getCoordinate() + 1; RE.read2Sequence = q.getRefIDChecked() + 1; RE.read2Coordinate = q.getCoordinate() + 1; if ( ! p.isReverse() ) if ( ! q.isReverse() ) RE.orientation = ::libmaus::bambam::ReadEnds::FF; else RE.orientation = ::libmaus::bambam::ReadEnds::FR; else if ( ! q.isReverse() ) RE.orientation = ::libmaus::bambam::ReadEnds::RF; else RE.orientation = ::libmaus::bambam::ReadEnds::RR; RE.read1IndexInFile = p.getRank(); RE.read2IndexInFile = q.getRank(); RE.score = p.getScore() + q.getScore(); if ( p.isPaired() && (!p.isMateUnmap()) ) RE.read2Sequence = p.getNextRefIDChecked() + 1; char const * readname = p.getName(); char const * readnamee = readname + (p.getLReadName()-1); int cnt[2] = { 0,0 }; for ( char const * c = readname; c != readnamee; ++c ) cnt [ (static_cast<int>(*c) - ':') == 0 ] ++; bool const rnparseok = (cnt[1] == 4); // parse tile, x, y if ( rnparseok ) { uint8_t const * sem[4]; uint8_t const ** psem = &sem[0]; for ( uint8_t const * c = reinterpret_cast<uint8_t const *>(readname); c != reinterpret_cast<uint8_t const *>(readnamee); ++c ) if ( *c == ':' ) *(psem++) = c+1; uint8_t const * t = sem[1]; while ( D[*t] ) { RE.tile *= 10; RE.tile += *(t++)-'0'; } RE.tile += 1; t = sem[2]; while ( D[*t] ) { RE.x *= 10; RE.x += *(t++)-'0'; } t = sem[3]; while ( D[*t] ) { RE.y *= 10; RE.y += *(t++)-'0'; } } int64_t const rg = p.getReadGroupId(header); RE.readGroup = rg + 1; }