Beispiel #1
0
std::string getQuickStats(const std::string &bamFile, std::map< std::string, int > &keyLen, unsigned int &nFlowFZ, unsigned int &nFlowZM) {
	std::string errMsg = "";
	BamTools::BamReader bamReader;
	if(!bamReader.Open(bamFile)) {
		errMsg += "Failed to open bam " + bamFile + "\n";
		return(errMsg);
	}
	BamTools::SamHeader samHeader = bamReader.GetHeader();
	for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) {
		if(itr->HasID())
			keyLen[itr->ID] = itr->HasKeySequence() ? itr->KeySequence.length() : 0;
		if(itr->HasFlowOrder())
			nFlowZM = std::max(nFlowZM,(unsigned int) itr->FlowOrder.length());
	}
	BamTools::BamAlignment alignment;
	std::vector<uint16_t> flowIntFZ;
	while(bamReader.GetNextAlignment(alignment)) {
		if(alignment.GetTag("FZ", flowIntFZ))
			nFlowFZ = flowIntFZ.size();
		break;
	}
	bamReader.Close();
	if(nFlowFZ==0)
		std::cout << "NOTE: bam file has no flow signals in FZ tag: " + bamFile + "\n";
	if(nFlowZM==0)
		std::cout << "NOTE: bam file has no flow signals in ZM tag: " + bamFile + "\n";
	return(errMsg);
}
Beispiel #2
0
void BamHeaderHelper::GetRefID(BamTools::BamReader &bamReader)
{ 
  BamTools::SamHeader samHeader = bamReader.GetHeader();
  for (BamTools::SamSequenceIterator itr = samHeader.Sequences.Begin(); itr != samHeader.Sequences.End(); ++itr) {
    string bamseq = itr->Name;
    bam_sequence_names.push_back(bamseq);
   }
}
Beispiel #3
0
void BamHeaderHelper::GetFlowOrder(BamTools::BamReader &bamReader){
  BamTools::SamHeader samHeader = bamReader.GetHeader();
  if (!samHeader.HasReadGroups()) {
    //bamReader.Close();
    cerr << "ERROR: there is no read group in " << "this file" << endl;
    //exit(1);
  }
    for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr) {
    if (itr->HasFlowOrder()) {
      flow_order_set.push_back(itr->FlowOrder);
      //flowKey = itr->KeySequence;
    }
  }
}
	std::vector< Sample::SharedPtr > BamAlignmentReader::GetBamReaderSamples(const std::string& bamPath)
	{
		std::vector< Sample::SharedPtr > samplePtrs;
		BamTools::BamReader bamReader;
		if (!bamReader.Open(bamPath))
		{
			throw "Unable to open bam file";
		}
		auto readGroups = bamReader.GetHeader().ReadGroups;
		auto iter = readGroups.Begin();
		for (; iter != readGroups.End(); ++iter)
		{
			auto samplePtr = std::make_shared< Sample >((*iter).Sample, (*iter).ID, bamPath);
			samplePtrs.emplace_back(samplePtr);
		}
		bamReader.Close();
		return samplePtrs;
	}
Beispiel #5
0
void Config::InitializationClustering() {
    struct stat st;
    if(stat(Workspace.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Workspace directory already present");
    else if (mkdir(Workspace.c_str(), 0755) != 0) {
        Log("[Error] Could not create workspace directory: " + Workspace);
        exit(1);
    }
    RunningTasksFile = Workspace + "/" + FilePrefix + "running.tasks";
    StatsFile = Workspace + "/" + FilePrefix + "stats";
    BinClusterFile = Workspace + "/" + FilePrefix + "bpc";
    clusterFile = new ClusterFile(BinClusterFile);
    clusterDir = Workspace + "/clusters/";
    if(stat(clusterDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Cluster directory already present");
    else if (mkdir(clusterDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create cluster directory: " + clusterDir);
        exit(1);
    }
    insertsizeDir = Workspace + "/insertsize/";
    if(stat(insertsizeDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Insertsize directory already present");
    else if (mkdir(insertsizeDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create insertsize directory: " + insertsizeDir);
        exit(1);
    }
    coverageDir = Workspace + "/coverage/";
    if(stat(coverageDir.c_str(),&st) == 0 and st.st_mode and S_IFDIR != 0) Log("[Warning] Coverage directory already present");
    else if (mkdir(coverageDir.c_str(), 0755) != 0) {
        Log("[Error] Could not create coverage directory: " + coverageDir);
        exit(1);
    }
    
    if (!ForwardBam.empty() && !ReverseBam.empty() && PairedBam.empty()) {
        UsePairedBam = false;
    } else if (ForwardBam.empty() && ReverseBam.empty() && !PairedBam.empty()) {
        UsePairedBam = true;
    } else {
        Log("[Error] No correct bam file(s)");
        exit(1);
    }
    
    BamTools::BamAlignment alignment;
    BamTools::BamReader BamReader;
    
    if (UsePairedBam) {
        BamReader.Open(PairedBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open paired bam");
            exit(1);
        }
        if (PairedIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                PairedIndex = PairedBam.substr(0,PairedBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(PairedIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader header = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = header.ReadGroups.Begin(); it != header.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (not NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    } else {
        BamReader.Open(ForwardBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open first/forward bam");
            exit(1);
        }
        if (ForwardIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ForwardIndex = ForwardBam.substr(0,ForwardBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ForwardIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for forward bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader forwardheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = forwardheader.ReadGroups.Begin(); it != forwardheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            readNameConverter.AddReadGroup(readgroup->ID);
        }
        long int count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in forward reads but not in header");
                    count = 0;
                }
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
        BamReader.Open(ReverseBam);
        if (not BamReader.IsOpen()) {
            Log("[Error] Could not open second/reverse bam");
            exit(1);
        }
        if (ReverseIndex.empty()) {
            if (not BamReader.LocateIndex(BamTools::BamIndex::STANDARD)) {
                ReverseIndex = ReverseBam.substr(0,ReverseBam.find_last_of(".bam")-3) + ".bai";
                BamReader.OpenIndex(ReverseIndex);
            }
            if (not BamReader.HasIndex()) {
                Log("[Error] No index for reverse bamfile");
                exit(1);
            }
        }
        BamTools::SamHeader reverseheader = BamReader.GetHeader();
        for (BamTools::SamReadGroupIterator it = reverseheader.ReadGroups.Begin(); it != reverseheader.ReadGroups.End(); it++) {
            BamTools::SamReadGroup* readgroup = &*it;
            readNameConverter.TrimName(readgroup->ID);
            if (readNameConverter.AddReadGroup(readgroup->ID)) {
                Log("[Warning] Readgroup '" + readgroup->ID + "' found in reverse but not in forward");
            }
        }
        count = 0;
        while (BamReader.GetNextAlignment(alignment)) {
            string RG;
            if (alignment.GetTag("RG", RG)) {
                if (!NameTrim.empty()) readNameConverter.TrimName(RG);
                if (readNameConverter.AddReadGroup(RG)) {
                    Log("[Warning] Readgroup '" + RG + "' found in reverse reads but not in header");
                    count = 0;
                } 
            }
            count++;
            if (count > 10000) break;
        }
        BamReader.Close();
    }
    
    for(map<string, int>::iterator it = readNameConverter.ReadGroups.begin(); it!=readNameConverter.ReadGroups.end(); ++it) {
        ostringstream logBuffer;
        logBuffer << "Readgroup found: " << it->second << " - " << it->first;
        Log(logBuffer.str());
    }
    
    writeConfigFile(Workspace + FilePrefix + "config");
}
Beispiel #6
0
void MyBamGroup::ReadGroup(char *bamFile){

	BamTools::BamReader bamReader;
	if(!bamReader.Open(std::string(bamFile))) {
		 errMsg = "Failed to open bam " + std::string(bamFile) + "\n";
	} else {
		BamTools::SamHeader samHeader = bamReader.GetHeader();
		for (BamTools::SamReadGroupIterator itr = samHeader.ReadGroups.Begin(); itr != samHeader.ReadGroups.End(); ++itr ) {
			if(itr->HasID()) {
				ID.push_back(itr->ID);
			} else {
				ID.push_back("");
			}
			if(itr->HasFlowOrder()) {
				FlowOrder.push_back(itr->FlowOrder);
			} else {
				FlowOrder.push_back("");
			}
			if(itr->HasKeySequence()) {
				KeySequence.push_back(itr->KeySequence);
			} else {
				KeySequence.push_back("");
			}
			if(itr->HasDescription()) {
				Description.push_back(itr->Description);
			} else {
				Description.push_back("");
			}
			if(itr->HasLibrary()) {
				Library.push_back(itr->Library);
			} else {
				Library.push_back("");
			}
			if(itr->HasPlatformUnit()) {
				PlatformUnit.push_back(itr->PlatformUnit);
			} else {
				PlatformUnit.push_back("");
			}
			if(itr->HasPredictedInsertSize()) {
				PredictedInsertSize.push_back(itr->PredictedInsertSize);
			} else {
				PredictedInsertSize.push_back("");
			}
			if(itr->HasProductionDate()) {
				ProductionDate.push_back(itr->ProductionDate);
			} else {
				ProductionDate.push_back("");
			}
			if(itr->HasProgram()) {
				Program.push_back(itr->Program);
			} else {
				Program.push_back("");
			}
			if(itr->HasSample()) {
				Sample.push_back(itr->Sample);
			} else {
				Sample.push_back("");
			}
			if(itr->HasSequencingCenter()) {
				SequencingCenter.push_back(itr->SequencingCenter);
			} else {
				SequencingCenter.push_back("");
			}
			if(itr->HasSequencingTechnology()) {
				SequencingTechnology.push_back(itr->SequencingTechnology);
			} else {
				SequencingTechnology.push_back("");
			}
		}
		bamReader.Close();
	}

}