//{{{ void process_intra_chrom_pair(const BamAlignment &curr, void SV_Pair:: process_intra_chrom_pair(const BamAlignment &curr, const RefVector refs, BamWriter &inter_chrom_reads, map<string, BamAlignment> &mapped_pairs, UCSCBins<SV_BreakPoint*> &r_bin, int weight, int ev_id, SV_PairReader *reader) { if (curr.RefID == curr.MateRefID) { process_pair(curr, refs, mapped_pairs, r_bin, weight, ev_id, reader); } else if (curr.IsMapped() && curr.IsMateMapped() && (curr.RefID >= 0) && (curr.MateRefID >= 0) ) { BamAlignment al = curr; string x = reader->get_source_file_name(); al.AddTag("LS","Z",x); inter_chrom_reads.SaveAlignment(al); } }
void TagBam::Tag() { // open the annotations files for processing; OpenAnnoFiles(); // open the BAM file BamReader reader; BamWriter writer; if (!reader.Open(_bamFile)) { cerr << "Failed to open BAM file " << _bamFile << endl; exit(1); } // get header & reference information string bamHeader = reader.GetHeaderText(); RefVector refs = reader.GetReferenceData(); // set compression mode BamWriter::CompressionMode compressionMode = BamWriter::Compressed; // if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed; writer.SetCompressionMode(compressionMode); // open our BAM writer writer.Open("stdout", bamHeader, refs); // rip through the BAM file and test for overlaps with each annotation file. BamAlignment al; vector<BED> hits; while (reader.GetNextAlignment(al)) { if (al.IsMapped() == true) { BED a; a.chrom = refs.at(al.RefID).RefName; a.start = al.Position; a.end = al.GetEndPosition(false, false); a.strand = "+"; if (al.IsReverseStrand()) a.strand = "-"; ostringstream annotations; // annotate the BAM file based on overlaps with the annotation files. for (size_t i = 0; i < _annoFiles.size(); ++i) { // grab the current annotation file. BedFile *anno = _annoFiles[i]; if (!_useNames && !_useScores && !_useIntervals) { // add the label for this annotation file to tag if there is overlap if (anno->anyHits(a.chrom, a.start, a.end, a.strand, _sameStrand, _diffStrand, _overlapFraction, false)) { annotations << _annoLabels[i] << ";"; } } // use the score field else if (!_useNames && _useScores && !_useIntervals) { anno->allHits(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand, 0.0, false); for (size_t i = 0; i < hits.size(); ++i) { annotations << hits[i].score; if (i < hits.size() - 1) annotations << ","; } if (hits.size() > 0) annotations << ";"; hits.clear(); } // use the name field from the annotation files to populate tag else if (_useNames && !_useScores && !_useIntervals) { anno->allHits(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand, 0.0, false); for (size_t j = 0; j < hits.size(); ++j) { annotations << hits[j].name; if (j < hits.size() - 1) annotations << ","; } if (hits.size() > 0) annotations << ";"; hits.clear(); } // use the full interval information annotation files to populate tag else if (!_useNames && !_useScores && _useIntervals) { anno->allHits(a.chrom, a.start, a.end, a.strand, hits, _sameStrand, _diffStrand, 0.0, false); for (size_t j = 0; j < hits.size(); ++j) { annotations << _annoLabels[i] << ":" << hits[j].chrom << ":" << hits[j].start << "-" << hits[j].end << "," << hits[j].name << "," << hits[j].score << "," << hits[j].strand; if (j < hits.size() - 1) annotations << ","; } if (hits.size() > 0) annotations << ";"; hits.clear(); } } // were there any overlaps with which to make a tag? if (annotations.str().size() > 0) { al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";" } } writer.SaveAlignment(al); } reader.Close(); writer.Close(); // close the annotations files; CloseAnnoFiles(); }
void ReadContainerTest::test_GetBamTags() { // TODO BamAlignment aln; string str_result; float float_result; string rg = "test"; float xc = 12.0; aln.AddTag("RG","Z",rg); aln.AddTag("XD","i",0); aln.AddTag("XC","f",xc); // GetStringBamTag CPPUNIT_ASSERT(_read_container->GetStringBamTag(aln, "RG", &str_result)); CPPUNIT_ASSERT(str_result == "test"); CPPUNIT_ASSERT(!_read_container->GetStringBamTag(aln, "XX", &str_result)); // GetFloatBamTag CPPUNIT_ASSERT(_read_container->GetFloatBamTag(aln, "XC", &float_result)); CPPUNIT_ASSERT(float_result == 12.0); CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XX", &float_result)); // GetIntBamTag int int_result; int d = 0; aln.AddTag("XA","i",d); CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result)); CPPUNIT_ASSERT_EQUAL(int_result, 0); aln.RemoveTag("XA"); int8_t d8 = 10; aln.AddTag("XA","c", d8); CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result)); CPPUNIT_ASSERT_EQUAL(int_result, 10); aln.RemoveTag("XA"); aln.AddTag("XA","c",rg); CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result)); aln.RemoveTag("XA"); uint8_t ud8 = 9; aln.AddTag("XA","C", ud8); CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result)); CPPUNIT_ASSERT_EQUAL(int_result, 9); aln.RemoveTag("XA"); aln.AddTag("XA","C",rg); CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result)); aln.RemoveTag("XA"); int16_t d16 = 8; aln.AddTag("XA","s",d16); CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result)); CPPUNIT_ASSERT_EQUAL(int_result, 8); aln.RemoveTag("XA"); aln.AddTag("XA","s",rg); CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result)); aln.RemoveTag("XA"); uint16_t ud16 = 7; aln.AddTag("XA","S",ud16); CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result)); CPPUNIT_ASSERT_EQUAL(int_result, 7); aln.RemoveTag("XA"); aln.AddTag("XA","S",rg); CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result)); aln.RemoveTag("XA"); uint32_t ud32 = 6; aln.AddTag("XA","I",ud32); CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result)); CPPUNIT_ASSERT_EQUAL(int_result, 6); aln.RemoveTag("XA"); aln.AddTag("XA","I",rg); CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result)); aln.RemoveTag("XA"); }
void ReadContainerTest::test_ParseRead() { map<pair<string,int>, string> ref_ext_nucleotides; BamAlignment aln; std::string rg = "test"; std::string repseq = "AC"; AlignedRead aligned_read; float copynum = 10; include_flank = false; // Test valid allele length aln.Name = "test"; aln.QueryBases = "NNNNN"; aln.Qualities = "NNNNN"; aln.SetIsReverseStrand(true); aln.Position = 0; aln.SetIsSecondMate(false); aln.AddTag("RG","Z",rg); aln.AddTag("XS","i",0); aln.AddTag("XE","i",20); aln.AddTag("XR", "Z", repseq); aln.AddTag("XC", "f", copynum); aln.RefID = 0; // No XD aln.RemoveTag("XD"); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); // Test more valid allele lengths aln.AddTag("XD","i",20); CPPUNIT_ASSERT(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)); aln.RemoveTag("XD"); aln.AddTag("XD","i",-19); CPPUNIT_ASSERT(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)); // Test invalid allele length aln.RemoveTag("XD"); aln.AddTag("XD","i",-31); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); // Exceed max_diff_ref aln.RemoveTag("XD"); aln.AddTag("XD","i",100); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); aln.RemoveTag("XD"); aln.AddTag("XD","i",0); // Exceed max mate dist aln.AddTag("XM","i",1000000); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); aln.RemoveTag("XM"); // Exceed max mapq aln.AddTag("XQ","i",10000); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); aln.RemoveTag("XQ"); // Read is mate aln.SetIsSecondMate(true); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); aln.SetIsSecondMate(false); // Read is partial aln.AddTag("XP","i",1); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); aln.RemoveTag("XP"); // Non-unit unit = true; aln.RemoveTag("XD"); aln.AddTag("XD","i",5); CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); unit = false; }
void setMateInfo( BamAlignment & rec1, BamAlignment & rec2, SamHeader & header) { const int NO_ALIGNMENT_REFERENCE_INDEX = -1; const int NO_ALIGNMENT_START = -1; // If neither read is unmapped just set their mate info if (rec1.IsMapped() && rec2.IsMapped()) { rec1.MateRefID = rec2.MateRefID; rec1.MatePosition = rec2.Position; rec1.SetIsReverseStrand(rec2.IsReverseStrand()); rec1.SetIsMapped(true); rec1.AddTag("MQ", "i", rec2.MapQuality); rec2.MateRefID = rec1.RefID; rec2.MatePosition = rec1.Position; rec2.SetIsReverseStrand( rec1.IsReverseStrand() ); rec2.SetIsMapped(true); rec2.AddTag("MQ", "i", rec1.MapQuality); } // Else if they're both unmapped set that straight else if (!rec1.IsMapped() && !rec2.IsMapped()) { rec1.RefID = NO_ALIGNMENT_REFERENCE_INDEX; rec1.Position = NO_ALIGNMENT_START; rec1.MateRefID = NO_ALIGNMENT_REFERENCE_INDEX; rec1.MatePosition = NO_ALIGNMENT_START; rec1.SetIsReverseStrand(rec2.IsReverseStrand()); rec1.SetIsMapped(false); rec2.RemoveTag("MQ"); rec1.Length = 0; rec2.RefID = NO_ALIGNMENT_REFERENCE_INDEX; rec2.Position = NO_ALIGNMENT_START; rec2.MateRefID = NO_ALIGNMENT_REFERENCE_INDEX; rec2.MatePosition = NO_ALIGNMENT_START; rec2.SetIsReverseStrand(rec1.IsReverseStrand()); rec2.SetIsMapped(false); rec2.RemoveTag("MQ"); rec2.Length = 0; } // And if only one is mapped copy it's coordinate information to the mate else { BamAlignment & mapped = rec1.IsMapped() ? rec1 : rec2; BamAlignment & unmapped = rec1.IsMapped() ? rec2 : rec1; unmapped.RefID = mapped.RefID; unmapped.Position = mapped.Position; mapped.MateRefID = unmapped.RefID; mapped.MatePosition = unmapped.Position; mapped.SetIsMateReverseStrand(unmapped.IsReverseStrand()); mapped.SetIsMateMapped(false); mapped.Length = 0; unmapped.MateRefID = mapped.RefID; unmapped.MatePosition = mapped.Position; unmapped.SetIsMateReverseStrand(mapped.IsReverseStrand()); unmapped.SetIsMateMapped(true); unmapped.Length = 0; } const int insertSize = computeInsertSize(rec1, rec2); rec1.Length = insertSize; rec2.Length = -insertSize; }