Ejemplo n.º 1
0
//{{{ void process_intra_chrom_pair(const BamAlignment &curr,
void
SV_Pair::
process_intra_chrom_pair(const BamAlignment &curr,
                         const RefVector refs,
                         BamWriter &inter_chrom_reads,
                         map<string, BamAlignment> &mapped_pairs,
                         UCSCBins<SV_BreakPoint*> &r_bin,
                         int weight,
                         int ev_id,
                         SV_PairReader *reader)
{
    if (curr.RefID == curr.MateRefID) {

        process_pair(curr,
                     refs,
                     mapped_pairs,
                     r_bin,
                     weight,
                     ev_id,
                     reader);

    } else if (curr.IsMapped() &&
               curr.IsMateMapped() &&
               (curr.RefID >= 0) &&
               (curr.MateRefID >= 0) ) {
        BamAlignment al = curr;
        string x = reader->get_source_file_name();
        al.AddTag("LS","Z",x);
        inter_chrom_reads.SaveAlignment(al);
    }
}
Ejemplo n.º 2
0
void TagBam::Tag() {

    // open the annotations files for processing;
    OpenAnnoFiles();

    // open the BAM file
    BamReader reader;
    BamWriter writer;
	if (!reader.Open(_bamFile)) {
        cerr << "Failed to open BAM file " << _bamFile << endl;
        exit(1);
    }
    
    // get header & reference information
    string bamHeader  = reader.GetHeaderText();
    RefVector refs = reader.GetReferenceData();

    // set compression mode
    BamWriter::CompressionMode compressionMode = BamWriter::Compressed;
//    if ( _isUncompressedBam ) compressionMode = BamWriter::Uncompressed;
    writer.SetCompressionMode(compressionMode);
    // open our BAM writer
    writer.Open("stdout", bamHeader, refs);

    // rip through the BAM file and test for overlaps with each annotation file.
    BamAlignment al;
    vector<BED> hits;

    while (reader.GetNextAlignment(al)) {
        if (al.IsMapped() == true) {
            BED a;
            a.chrom = refs.at(al.RefID).RefName;
            a.start = al.Position;
            a.end   = al.GetEndPosition(false, false);
            a.strand = "+";
            if (al.IsReverseStrand()) a.strand = "-";
            
            ostringstream annotations;
            // annotate the BAM file based on overlaps with the annotation files.
            for (size_t i = 0; i < _annoFiles.size(); ++i) 
            {
                // grab the current annotation file.
                BedFile *anno = _annoFiles[i];
                
                if (!_useNames && !_useScores && !_useIntervals) {
                    // add the label for this annotation file to tag if there is overlap
                    if (anno->anyHits(a.chrom, a.start, a.end, a.strand, 
                                      _sameStrand, _diffStrand, _overlapFraction, false))
                    {
                        annotations << _annoLabels[i] << ";";
                    }
                }
                // use the score field
                else if (!_useNames && _useScores && !_useIntervals) {
                    anno->allHits(a.chrom, a.start, a.end, a.strand, 
                                  hits, _sameStrand, _diffStrand, 0.0, false);
                    for (size_t i = 0; i < hits.size(); ++i) {
                        annotations << hits[i].score;
                        if (i < hits.size() - 1) annotations << ",";
                    }
                    if (hits.size() > 0) annotations << ";";
                    hits.clear();
                }
                // use the name field from the annotation files to populate tag
                else if (_useNames && !_useScores && !_useIntervals) {
                    anno->allHits(a.chrom, a.start, a.end, a.strand, 
                                  hits, _sameStrand, _diffStrand, 0.0, false);
                    for (size_t j = 0; j < hits.size(); ++j) {
                        annotations << hits[j].name;
                        if (j < hits.size() - 1) annotations << ",";
                    }
                    if (hits.size() > 0) annotations << ";";
                    hits.clear();
                }
                // use the full interval information annotation files to populate tag
                else if (!_useNames && !_useScores && _useIntervals) {
                    anno->allHits(a.chrom, a.start, a.end, a.strand, 
                                  hits, _sameStrand, _diffStrand,  0.0, false);
                    for (size_t j = 0; j < hits.size(); ++j) {
                        annotations << _annoLabels[i]  << ":" << 
                                        hits[j].chrom  << ":" <<
                                        hits[j].start  << "-" <<
                                        hits[j].end    << "," <<
                                        hits[j].name   << "," <<
                                        hits[j].score  << "," <<
                                        hits[j].strand;
                        if (j < hits.size() - 1) annotations << ",";
                    }
                    if (hits.size() > 0) annotations << ";";
                    hits.clear();
                }
            }
            // were there any overlaps with which to make a tag?
            if (annotations.str().size() > 0) {
                al.AddTag(_tag, "Z", annotations.str().substr(0, annotations.str().size() - 1)); // get rid of the last ";"
            }
        }
        writer.SaveAlignment(al);
    }
    reader.Close();
    writer.Close();
    // close the annotations files;
    CloseAnnoFiles();
}
void ReadContainerTest::test_GetBamTags() {
  // TODO
  BamAlignment aln;
  string str_result;
  float float_result;
  string rg = "test";
  float xc = 12.0;
  aln.AddTag("RG","Z",rg);
  aln.AddTag("XD","i",0);
  aln.AddTag("XC","f",xc);
  // GetStringBamTag
  CPPUNIT_ASSERT(_read_container->GetStringBamTag(aln, "RG", &str_result));
  CPPUNIT_ASSERT(str_result == "test");
  CPPUNIT_ASSERT(!_read_container->GetStringBamTag(aln, "XX", &str_result));
  // GetFloatBamTag
  CPPUNIT_ASSERT(_read_container->GetFloatBamTag(aln, "XC", &float_result));
  CPPUNIT_ASSERT(float_result == 12.0);
  CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XX", &float_result));
  // GetIntBamTag
  int int_result;
  int d = 0;
  aln.AddTag("XA","i",d);
  CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result));
  CPPUNIT_ASSERT_EQUAL(int_result, 0);
  aln.RemoveTag("XA");
  int8_t d8 = 10;
  aln.AddTag("XA","c", d8);
  CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result));
  CPPUNIT_ASSERT_EQUAL(int_result, 10);
  aln.RemoveTag("XA");
  aln.AddTag("XA","c",rg);
  CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result));
  aln.RemoveTag("XA");
  uint8_t ud8 = 9;
  aln.AddTag("XA","C", ud8);
  CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result));
  CPPUNIT_ASSERT_EQUAL(int_result, 9);
  aln.RemoveTag("XA");
  aln.AddTag("XA","C",rg);
  CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result));
  aln.RemoveTag("XA");
  int16_t d16 = 8;
  aln.AddTag("XA","s",d16);
  CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result));
  CPPUNIT_ASSERT_EQUAL(int_result, 8);
  aln.RemoveTag("XA");
  aln.AddTag("XA","s",rg);
  CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result));
  aln.RemoveTag("XA");
  uint16_t ud16 = 7;
  aln.AddTag("XA","S",ud16);
  CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result));
  CPPUNIT_ASSERT_EQUAL(int_result, 7);
  aln.RemoveTag("XA");
  aln.AddTag("XA","S",rg);
  CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result));
  aln.RemoveTag("XA");
  uint32_t ud32 = 6;
  aln.AddTag("XA","I",ud32);
  CPPUNIT_ASSERT(_read_container->GetIntBamTag(aln, "XA", &int_result));
  CPPUNIT_ASSERT_EQUAL(int_result, 6);
  aln.RemoveTag("XA");
  aln.AddTag("XA","I",rg);
  CPPUNIT_ASSERT(!_read_container->GetFloatBamTag(aln, "XA", &float_result));
  aln.RemoveTag("XA");
}
void ReadContainerTest::test_ParseRead() {
  map<pair<string,int>, string> ref_ext_nucleotides;
  BamAlignment aln;
  std::string rg = "test";
  std::string repseq = "AC";
  AlignedRead aligned_read;
  float copynum = 10;
  include_flank = false;
  // Test valid allele length
  aln.Name = "test";
  aln.QueryBases = "NNNNN";
  aln.Qualities = "NNNNN";
  aln.SetIsReverseStrand(true);
  aln.Position = 0;
  aln.SetIsSecondMate(false);
  aln.AddTag("RG","Z",rg);
  aln.AddTag("XS","i",0);
  aln.AddTag("XE","i",20);
  aln.AddTag("XR", "Z", repseq);
  aln.AddTag("XC", "f", copynum);
  aln.RefID = 0;
  // No XD
  aln.RemoveTag("XD");
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides))); 
  // Test more valid allele lengths
  aln.AddTag("XD","i",20);
  CPPUNIT_ASSERT(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides));
  aln.RemoveTag("XD");
  aln.AddTag("XD","i",-19);
  CPPUNIT_ASSERT(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides));
  // Test invalid allele length
  aln.RemoveTag("XD");
  aln.AddTag("XD","i",-31);
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)));
  // Exceed max_diff_ref
  aln.RemoveTag("XD");
  aln.AddTag("XD","i",100);
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)));
  aln.RemoveTag("XD");
  aln.AddTag("XD","i",0);
  // Exceed max mate dist
  aln.AddTag("XM","i",1000000);
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)));
  aln.RemoveTag("XM");
  // Exceed max mapq
  aln.AddTag("XQ","i",10000);
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)));
  aln.RemoveTag("XQ");
  // Read is mate
  aln.SetIsSecondMate(true);
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)));
  aln.SetIsSecondMate(false);
  // Read is partial
  aln.AddTag("XP","i",1);
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)));
  aln.RemoveTag("XP");
  // Non-unit
  unit = true;
  aln.RemoveTag("XD");
  aln.AddTag("XD","i",5);
  CPPUNIT_ASSERT(!(_read_container->ParseRead(aln, &aligned_read, ref_ext_nucleotides)));
  unit = false;
}
Ejemplo n.º 5
0
void setMateInfo( BamAlignment & rec1, BamAlignment & rec2, SamHeader & header) {
    const int NO_ALIGNMENT_REFERENCE_INDEX = -1;
    const int NO_ALIGNMENT_START = -1;
    // If neither read is unmapped just set their mate info
    if (rec1.IsMapped() && rec2.IsMapped()) {
        
        rec1.MateRefID = rec2.MateRefID;
        rec1.MatePosition = rec2.Position;
        rec1.SetIsReverseStrand(rec2.IsReverseStrand());
        rec1.SetIsMapped(true);
        rec1.AddTag("MQ", "i", rec2.MapQuality);
        
        rec2.MateRefID = rec1.RefID;
        rec2.MatePosition = rec1.Position;
        rec2.SetIsReverseStrand( rec1.IsReverseStrand() );
        rec2.SetIsMapped(true);
        rec2.AddTag("MQ", "i", rec1.MapQuality);
    }
    // Else if they're both unmapped set that straight
    else if (!rec1.IsMapped() && !rec2.IsMapped()) {
        rec1.RefID = NO_ALIGNMENT_REFERENCE_INDEX;
        rec1.Position = NO_ALIGNMENT_START;
        rec1.MateRefID = NO_ALIGNMENT_REFERENCE_INDEX;
        rec1.MatePosition = NO_ALIGNMENT_START;
        rec1.SetIsReverseStrand(rec2.IsReverseStrand());
        rec1.SetIsMapped(false);
        rec2.RemoveTag("MQ");
        rec1.Length = 0;
        
        rec2.RefID = NO_ALIGNMENT_REFERENCE_INDEX;
        rec2.Position = NO_ALIGNMENT_START;
        rec2.MateRefID = NO_ALIGNMENT_REFERENCE_INDEX;
        rec2.MatePosition = NO_ALIGNMENT_START;
        rec2.SetIsReverseStrand(rec1.IsReverseStrand());
        rec2.SetIsMapped(false);
        rec2.RemoveTag("MQ");
        rec2.Length = 0;
    }
    // And if only one is mapped copy it's coordinate information to the mate
    else {
        BamAlignment & mapped   = rec1.IsMapped() ? rec1 : rec2;
        BamAlignment & unmapped = rec1.IsMapped() ? rec2 : rec1;
        unmapped.RefID = mapped.RefID;
        unmapped.Position = mapped.Position;
        
        mapped.MateRefID = unmapped.RefID;
        mapped.MatePosition = unmapped.Position;
        mapped.SetIsMateReverseStrand(unmapped.IsReverseStrand());
        mapped.SetIsMateMapped(false);
        mapped.Length = 0;
        
        unmapped.MateRefID = mapped.RefID;
        unmapped.MatePosition = mapped.Position;
        unmapped.SetIsMateReverseStrand(mapped.IsReverseStrand());
        unmapped.SetIsMateMapped(true);
        unmapped.Length = 0;
    }
    
    const int insertSize = computeInsertSize(rec1, rec2);
    rec1.Length = insertSize;
    rec2.Length = -insertSize;
}