bool checkCigarValid( ::libmaus::bambam::BamAlignment const & alignment, ::libmaus::bambam::BamHeader const & bamheader, ::libmaus::autoarray::AutoArray < ::libmaus::autoarray::AutoArray<uint8_t>::unique_ptr_type > const & text ) { if ( alignment.isUnmap() ) return true; if ( ! alignment.isCigarLengthConsistent() ) { std::cerr << "[E] inconsistent cigar " << alignment.getCigarString() << " for " << alignment.getName() << std::endl; return false; } if ( alignment.getRefID() < 0 || alignment.getRefID() >= static_cast<int64_t>(bamheader.chromosomes.size()) ) { std::cerr << "[E] reference id " << alignment.getRefID() << " out of range for " << alignment.getName() << std::endl; return false; } ::libmaus::autoarray::AutoArray<uint8_t> const & ctext = *(text[alignment.getRefID()]); int64_t refpos = alignment.getPos(); int64_t seqpos = 0; bool alok = true; std::string const read = alignment.getRead(); for ( uint64_t i = 0; alok && i < alignment.getNCigar(); ++i ) { char const cop = alignment.getCigarFieldOpAsChar(i); int64_t const clen = alignment.getCigarFieldLength(i); switch ( cop ) { // match/mismatch, increment both case '=': case 'X': case 'M': { for ( int64_t j = 0; alok && j < clen; ++j, ++refpos, ++ seqpos ) { if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) ) { std::cerr << "[E] " << cop << " operation outside of chromosome coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } else if ( seqpos >= alignment.getLseq() ) { std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } else if ( cop == '=' && toupper(ctext[refpos]) != toupper(read[seqpos]) ) { std::cerr << "[E] " << cop << " operation but mismatch between reference and query." << std::endl; alok = false; } else if ( cop == 'X' && toupper(ctext[refpos]) == toupper(read[seqpos]) ) { std::cerr << "[E] " << cop << " operation but mismatch between reference and query." << std::endl; alok = false; } } break; } // insert into reference, increment seq case 'P': case 'I': { for ( int64_t j = 0; alok && j < clen; ++j, ++seqpos ) { if ( seqpos >= alignment.getLseq() ) { std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } // delete from reference, increment ref case 'D': { for ( int64_t j = 0; alok && j < clen; ++j, ++refpos ) { if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) ) { std::cerr << "[E] " << cop << " operation outside of reference coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } // soft clipping, increment seq case 'S': { for ( int64_t j = 0; alok && j < clen; ++j, ++seqpos ) { if ( seqpos >= alignment.getLseq() ) { std::cerr << "[E] " << cop << " operation outside of sequence coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } // hard clipping, do nothing case 'H': { break; } // skip region in reference, increment ref case 'N': { for ( int64_t j = 0; alok && j < clen; ++j, ++refpos ) { if ( refpos < 0 || refpos >= static_cast<int64_t>(ctext.size()) ) { std::cerr << "[E] " << cop << " operation outside of reference coordinate range " << " for " << alignment.getName() << std::endl; alok = false; } } break; } } } return alok; }