Ejemplo n.º 1
0
ReadGroup::ReadGroup(BamAlignment &al, int max_isize, int isize_samples,
	string prefix, list<string> blacklist) :
	max_isize(max_isize),
	isize_samples(isize_samples),
	prefix(prefix),
	blacklisted(false)
{
	if (!al.GetReadGroup(name))
		name = "none";

	nreads = 0;

	/* Determine if this read group is in the blacklist */
	for (list<string>::iterator it = blacklist.begin();
	     it != blacklist.end(); ++it) {
		if (*it == name) {
			blacklisted = true;
			break;
		}
	}

	if (!blacklisted) {
		f1.open((prefix + "/" + name + "_1.fq.gz").c_str());
		f2.open((prefix + "/" + name + "_2.fq.gz").c_str());
	}

	witness(al);
}
int main_asequantmultirg(const vector<string> &all_args)
{
    Init(all_args);
    
    cerr << "* Reading bam file " << endl;
    OpenBam(bam_reader, bam_file);
    bam_reader.OpenIndex(bam_file + ".bai");
    
    vector<string> readGroupVector;
    SamHeader header = bam_reader.GetHeader();
    SamReadGroupDictionary headerRG = header.ReadGroups;
    for (SamReadGroupIterator it = headerRG.Begin(); it != headerRG.End(); it ++)
    {
        readGroupVector.push_back(it -> ID);
    }
    
    
    vector<RefData> chroms = bam_reader.GetReferenceData();
    
    cout << "#CHROM" << "\t" << "POS" << "\t" << "REF" << "\t" << "ALT";
    for (vector<string>::iterator it = readGroupVector.begin(); it != readGroupVector.end(); it ++)
    {
        cout << "\t" << *it;
    }
    cout << endl;
    
    StlFor(chrom_idx, chroms)
    {
        string &chrom = chroms[chrom_idx].RefName;
        vector<Snp> snps = snps_by_chrom[chrom];
        
        int s = 0; // Index into snp array
        
        BamAlignment bam;
        bam_reader.Jump(chrom_idx);
        
        string align;
        string qualities;
        
        cerr << "* On chrom " << chrom << endl;

        while (bam_reader.GetNextAlignment(bam) && bam.RefID == chrom_idx) 
        {
	  if (bam.MapQuality < min_map_qual || !bam.IsMapped())
                continue;
       
            string currentRG;
            Assert(bam.GetReadGroup(currentRG));
            
            int start = AlignStart(bam);
            int end = AlignEnd(bam);
            
            // Move the current SNP pointer so that it is ahead of the read's start (since bam alignments are in sorted order)
            while (s < snps.size() && snps[s].pos < start)
                ++s;
            
            // Stop everything if we have visited all SNPs on this chrom
            if (s >= snps.size())
                break;
            
            // Find any/all SNPs that are within the bam alignment
            int n = 0; // Number of SNPs overlapped
            while ((s + n) < snps.size() && snps[s + n].pos < end) // Then it overlaps!
                ++n;
            
            // Now, look at each SNP and see which way it votes
            AlignedString(bam, align);
            AlignedQualities(bam, qualities);
            Assert(align.size() == qualities.size());

            // Now, tally votes
            for (int i = 0; i < n; ++i)
            {
                Snp &snp = snps[s + i];
                char base = align[snp.pos - start]; // Base from the read
                int qual = int(qualities[snp.pos - start]) - ascii_offset; // Base from the read
                
                //AssertMsg(qual >= 0 && qual <= 100, ToStr(qual) + "\n" + bam.Name + "\n" + CigarToStr(bam.CigarData) + "\n" + bam.QueryBases + "\n" + bam.Qualities);
                
                if (base == '-' || qual < min_base_qual)
                    continue;
                
                map<string, Counts> &RG_counts = bam.IsReverseStrand() ? snp.rev : snp.fwd;
                
                map<string, Counts>::iterator searchIt = RG_counts.find(currentRG);
                
                if (searchIt == RG_counts.end())
                {
                    if (base == snp.ref)
                    {
                        RG_counts[currentRG].num_ref = 1;
                        RG_counts[currentRG].num_alt = 0;
                        RG_counts[currentRG].num_other = 0;
                    }
                    else if (base == snp.alt)
                    {
                        RG_counts[currentRG].num_ref = 0;
                        RG_counts[currentRG].num_alt = 1;
                        RG_counts[currentRG].num_other = 0;
                    }
                    else
                    {
                        RG_counts[currentRG].num_ref = 0;
                        RG_counts[currentRG].num_alt = 0;
                        RG_counts[currentRG].num_other = 1;
                    }
                }
                else
                {
                    if (base == snp.ref)
                    {
                        searchIt -> second.num_ref += 1;
                    }
                    else if (base == snp.alt)
                    {
                        searchIt -> second.num_alt += 1;
                    }
                    else
                    {
                        searchIt -> second.num_other += 1;
                    }
                }
            }
        }
        
        // Output counts
        for (int s = 0; s < snps.size(); ++s)
        {
            cout << chrom << "\t" << snps[s].pos + 1 << "\t" << snps[s].ref << "\t" << snps[s].alt;
            for (vector<string>::iterator it = readGroupVector.begin(); it != readGroupVector.end(); it ++)
            {
                map<string, Counts>::iterator searchIt = snps[s].fwd.find(*it);
                if (searchIt != snps[s].fwd.end())
                {
                    cout << "\t" << searchIt -> second.num_ref << "," << searchIt -> second.num_alt << "," << searchIt -> second.num_other << ",";
                }
                else
                {
                    cout << "\t" << "0,0,0,";
                }
                searchIt = snps[s].rev.find(*it);
                if (searchIt != snps[s].rev.end())
                {
                    cout << searchIt -> second.num_ref << "," << searchIt -> second.num_alt << "," << searchIt -> second.num_other;
                }
                else
                {
                    cout << "0,0,0";
                }
            }
            cout << endl;
        }
    }
int main_aseregion(const vector<string> &all_args)
{
    Init(all_args);
    
    cerr << "* Reading bam file " << endl;
    OpenBam(bam_reader, bam_file);
    bam_reader.OpenIndex(bam_file + ".bai");
    
    vector<string> readGroupVector; //Obtain all the readgroups.
    SamHeader header = bam_reader.GetHeader();
    SamReadGroupDictionary headerRG = header.ReadGroups;
    for (SamReadGroupIterator it = headerRG.Begin(); it != headerRG.End(); it ++)
    {
        readGroupVector.push_back(it -> ID);
    }
    
    cout << "#CHROM" << "\t" << "StartPos" << "\t" << "EndPos";
    for (vector<string>::iterator it = readGroupVector.begin(); it != readGroupVector.end(); it ++)
    {
        cout << "\t" << *it;
    }
    cout << endl;
    
    vector<RefData> chroms = bam_reader.GetReferenceData();
    
    StlFor(chrom_idx, chroms)
    {
        string &chrom = chroms[chrom_idx].RefName;
        cerr << "* On chrom " << chrom << endl;
        
        map<string, vector<GenomicRegion> >::iterator searchIt = chrom_genomicRegions.find(chrom);
        
        BamAlignment startPointer; // This pointer will point to the region immediately before the start of current regions under inspection.
        bam_reader.Jump(chrom_idx);
        if (!bam_reader.GetNextAlignment(startPointer))
            break;
        
        int count = 0;
        // For each region, walk through all the reads correspoinding to this region and count the reads.
        for (vector<GenomicRegion>::iterator it = searchIt -> second.begin(); it != searchIt -> second.end(); ++it)
        {
            bam_reader.Jump(chrom_idx, startPointer.Position); // Fix the reading pointer.
            if (!bam_reader.GetNextAlignment(startPointer))
                break;
            int flag = 0;
            while (true)
            {
                int startEnd = startPointer.GetEndPosition();
                if (startEnd < it -> start)
                {
                    if (!bam_reader.GetNextAlignment(startPointer))
                    {
                        flag = 1;
                        break;
                    }
                }
                else
                {
                    break;
                }
            }
            
            if (flag == 1)
            {
                break;
            }
            // Now startPointer assumes its rightful position.
            BamAlignment nextPointer = startPointer; //This pointer traverse through all reads that align to the current genomic region in bed file and the iteration ends when this pointer pass through the end of the region.
            
            while (true)
            {
                int nextStart = nextPointer.Position;
                if (nextStart > it -> end)
                {
                    break; // This iteration is done.
                }
                
                if (nextPointer.MapQuality < min_map_qual)
                {
                    if (!bam_reader.GetNextAlignment(nextPointer))
                    {
                        break;
                    }
                    continue;
                }
                
                string currentRG;
                Assert(nextPointer.GetReadGroup(currentRG));
                
                map<string, int> &RG_counts = nextPointer.IsReverseStrand() ? it -> revCounts : it -> fwdCounts;
                map<string, int>::iterator searchItForRG = RG_counts.find(currentRG);
                if (searchItForRG == RG_counts.end())
                {
                    RG_counts[currentRG] = 1;
                }
                else
                {
                    ++ RG_counts[currentRG];
                }
                if (!bam_reader.GetNextAlignment(nextPointer))
                {
                    break;
                }
            }
            count ++;
            if (count % 1000 == 0)
                cerr << "Processed" << "\t" << count << endl;
        }
        
        // Output the counts
        for (vector<GenomicRegion>::iterator it = searchIt -> second.begin(); it != searchIt -> second.end(); ++it)
        {
            cout << chrom << "\t" << it -> start << "\t" << it -> end;
            for (vector<string>::iterator subIt = readGroupVector.begin(); subIt != readGroupVector.end(); ++subIt)
            {
                map<string, int>::iterator searchItForRG = it -> fwdCounts.find(*subIt);
                if (searchItForRG != it -> fwdCounts.end())
                {
                    cout << "\t" << searchItForRG -> second << ",";
                }
                else
                {
                    cout << "\t" << "0,";
                }
                searchItForRG = it -> revCounts.find(*subIt);
                if (searchItForRG != it -> revCounts.end())
                {
                    cout << searchItForRG -> second;
                }
                else
                {
                    cout << "0";
                }
            }
            cout << endl;
        }
    }