Пример #1
0
int readerToMeth(BamTools::BamReader & reader1, 
                 BamTools::BamReader & reader2, 
                 std::map<std::string, std::vector<std::string> > & lociMeth1, 
                 std::map<std::string, std::vector<std::string> > & lociMeth2, 
                 BamTools::RefVector::const_iterator i, 
                 int d, 
                 const BamTools::RefVector refs,
                 std::string sample)
{
    const int r1=reader1.GetReferenceID(i->RefName);
    const int r2=reader2.GetReferenceID(i->RefName);
    const int rl=i->RefLength;
    if(reader1.SetRegion(r1,0, r1, rl) & reader2.SetRegion(r2,0, r2, rl))
    {
        std::cerr << "Processing " << i->RefName << std::endl;
        BamTools::BamAlignment al;            
        while (reader1.GetNextAlignment(al)){
            //std::cout << al.RefID << std::endl;
            byread(al, d, lociMeth1, refs, sample);
        }
        while (reader2.GetNextAlignment(al)){
            //std::cout << al.RefID << std::endl;
            byread(al, d, lociMeth2, refs, sample);
        }
    }
    reader1.Rewind();
    reader2.Rewind();            
    return 0;
}
Пример #2
0
std::string getQuickStats(const std::string &bamFile, std::map< std::string, int > &keyLen, unsigned int &nFlowFZ, unsigned int &nFlowZM) {
	std::string errMsg = "";
	BamTools::BamReader bamReader;
	if(!bamReader.Open(bamFile)) {
		errMsg += "Failed to open bam " + bamFile + "\n";
		return(errMsg);
	}
	BamTools::SamHeader samHeader = bamReader.GetHeader();
	for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) {
		if(itr->HasID())
			keyLen[itr->ID] = itr->HasKeySequence() ? itr->KeySequence.length() : 0;
		if(itr->HasFlowOrder())
			nFlowZM = std::max(nFlowZM,(unsigned int) itr->FlowOrder.length());
	}
	BamTools::BamAlignment alignment;
	std::vector<uint16_t> flowIntFZ;
	while(bamReader.GetNextAlignment(alignment)) {
		if(alignment.GetTag("FZ", flowIntFZ))
			nFlowFZ = flowIntFZ.size();
		break;
	}
	bamReader.Close();
	if(nFlowFZ==0)
		std::cout << "NOTE: bam file has no flow signals in FZ tag: " + bamFile + "\n";
	if(nFlowZM==0)
		std::cout << "NOTE: bam file has no flow signals in ZM tag: " + bamFile + "\n";
	return(errMsg);
}
Пример #3
0
void bamParser::parse(Reads & reads, string & filename,
		vector<string> & chrs_to_parse) {
	BamTools::BamReader bam;
	BamTools::BamAlignment read;

	string chr;
	uint64_t readCnt = 0;
	uint32_t meanReadLen = 0;

	if (!(bam.Open(filename))) {
		throw FileNotGood(filename);
	}

	const BamTools::RefVector refvec = bam.GetReferenceData();

	while (bam.GetNextAlignment(read)) {
		chr = getR1Chr(read, refvec);
		if (isGoodRead(read)) {
			if (isChrToParse(chrs_to_parse, chr)) {
				updateAvgReadLength(readCnt, meanReadLen, read);
				insertRead(read, reads, chr);
			}
		}
	}
	reads.setReadlength(meanReadLen);
}
Пример #4
0
bool getNextAlignment(BamTools::BamAlignment &alignment, BamTools::BamReader &bamReader, const std::map<std::string, int> &groupID, std::vector< BamTools::BamAlignment > &alignmentSample, std::map<std::string, int> &wellIndex, unsigned int nSample) {
	if(nSample > 0) {
		// We are randomly sampling, so next read should come from the sample that was already taken from the bam file
		if(alignmentSample.size() > 0) {
			alignment = alignmentSample.back();
			alignmentSample.pop_back();
			alignment.BuildCharData();
			return(true);
		} else {
			return(false);
		}
	} else {
		// No random sampling, so we're either returning everything or we're looking for specific read names
		bool storeRead = false;
		while(bamReader.GetNextAlignment(alignment)) {
			if(groupID.size() > 0) {
				std::string thisReadGroupID = "";
				if( !alignment.GetTag("RG", thisReadGroupID) || (groupID.find(thisReadGroupID)==groupID.end()) );
					continue;
			}
			storeRead=true;
			if(wellIndex.size() > 0) {
				// We are filtering by position, so check if we should skip or keep the read
				int thisCol,thisRow;
				if(1 != ion_readname_to_rowcol(alignment.Name.c_str(), &thisRow, &thisCol))
					std::cerr << "Error parsing read name: " << alignment.Name << "\n";
				std::stringstream wellIdStream;
				wellIdStream << thisCol << ":" << thisRow;
				std::map<std::string, int>::iterator wellIndexIter;
				wellIndexIter = wellIndex.find(wellIdStream.str());
				if(wellIndexIter != wellIndex.end()) {
					// If the read ID matches we should keep, unless its a duplicate
					if(wellIndexIter->second >= 0) {
						storeRead=true;
						wellIndexIter->second=-1;
					} else {
						storeRead=false;
						std::cerr << "WARNING: found extra instance of readID " << wellIdStream.str() << ", keeping only first\n";
					}
				} else {
					// read ID is not one we should keep
					storeRead=false;
				}
			}
			if(storeRead)
				break;
		}
		return(storeRead);
	}
}
	uint32_t BamAlignmentReader::GetReadLength(const std::string& bamPath)
	{
		uint32_t bamReadLength = 300;
		BamTools::BamReader bamReader;
		if (!bamReader.Open(bamPath))
		{
			throw "Unable to open bam file";
		}
		BamTools::BamAlignment bamAlignment;
		while(bamReader.GetNextAlignment(bamAlignment))
		{
			if (bamAlignment.IsPrimaryAlignment())
			{
				bamReadLength = bamAlignment.QueryBases.size();
				break;
			}
		}
		bamReader.Close();
		return bamReadLength;
	}
Пример #6
0
void Config::InitializationClustering() {
    struct stat st;
    if(stat(Workspace.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Workspace directory already present");
    else if (mkdir(Workspace.c_str(), 0755) != 0) {
        Log("[Error] Could not create workspace directory: " + Workspace);
        exit(1);
    }
    RunningTasksFile = Workspace + "/" + FilePrefix + "running.tasks";
    StatsFile = Workspace + "/" + FilePrefix + "stats";
    BinClusterFile = Workspace + "/" + FilePrefix + "bpc";
    clusterFile = new ClusterFile(BinClusterFile);
    clusterDir = Workspace + "/clusters/";
    if(stat(clusterDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Cluster directory already present");
    else if (mkdir(clusterDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create cluster directory: " + clusterDir);
        exit(1);
    }
    insertsizeDir = Workspace + "/insertsize/";
    if(stat(insertsizeDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Insertsize directory already present");
    else if (mkdir(insertsizeDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create insertsize directory: " + insertsizeDir);
        exit(1);
    }
    coverageDir = Workspace + "/coverage/";
    if(stat(coverageDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Coverage directory already present");
    else if (mkdir(coverageDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create coverage directory: " + coverageDir);
        exit(1);
    }
    
    if (!ForwardBam.empty() && !ReverseBam.empty() && PairedBam.empty()) {
        UsePairedBam = false;
    } else if (ForwardBam.empty() && ReverseBam.empty() && !PairedBam.empty()) {
        UsePairedBam = true;
    } else {
        Log("[Error] No correct bam file(s)");
        exit(1);
    }
    
    BamTools::BamAlignment alignment;
    BamTools::BamReader BamReader;
    
    if (UsePairedBam) {
        BamReader.Open(PairedBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open paired bam");
            exit(1);
        }
        if (PairedIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                PairedIndex = PairedBam.substr(0,PairedBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(PairedIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader header = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = header.ReadGroups.Begin(); it != header.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (not NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    } else {
        BamReader.Open(ForwardBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open first/forward bam");
            exit(1);
        }
        if (ForwardIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ForwardIndex = ForwardBam.substr(0,ForwardBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ForwardIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for forward bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader forwardheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = forwardheader.ReadGroups.Begin(); it != forwardheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in forward reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
        BamReader.Open(ReverseBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open second/reverse bam");
            exit(1);
        }
        if (ReverseIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ReverseIndex = ReverseBam.substr(0,ReverseBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ReverseIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for reverse bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader reverseheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = reverseheader.ReadGroups.Begin(); it != reverseheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            if (readNameConverter.AddReadGroup(readgroup->ID)) {
                Log("[Warning] Readgroup '" + readgroup->ID + "' found in reverse but not in forward");
            }
        }
        count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reverse reads but not in header");
                    count = 0;
                } 
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    }
    
    for(map<string, int>::iterator it = readNameConverter.ReadGroups.begin(); it!=readNameConverter.ReadGroups.end(); ++it) {
        ostringstream logBuffer;
        logBuffer << "Readgroup found: " << it->second << " - " << it->first;
        Log(logBuffer.str());
    }
    
    writeConfigFile(Workspace + FilePrefix + "config");
}