MdStringComputationContext() : T0(256,false), T1(256,false), nm(0) { std::fill(T0.begin(),T0.end(),4); std::fill(T1.begin(),T1.end(),5); T0['A'] = T0['a'] = T1['A'] = T1['a'] = 0; T0['C'] = T0['c'] = T1['C'] = T1['c'] = 1; T0['G'] = T0['g'] = T1['G'] = T1['g'] = 2; T0['T'] = T0['t'] = T1['T'] = T1['t'] = 3; auxvec.set("MD"); auxvec.set("NM"); }
void init() { rgfilter.set("RG"); pgfilter.set("PG"); std::vector < std::vector<libmaus2::bambam::Chromosome> const * > V; std::vector < std::vector<libmaus2::bambam::ReadGroup> const * > R; std::vector< std::string const * > H; for ( uint64_t i = 0; i < inputbamheaders.size(); ++i ) { libmaus2::bambam::BamHeader const & header = *inputbamheaders[i]; V.push_back( & (header.getChromosomes()) ); R.push_back( & (header.getReadGroups()) ); H.push_back( & (header.text) ); std::string const SO = libmaus2::bambam::BamHeader::getSortOrderStatic(header.text); orderedCoordinates = orderedCoordinates && (SO == "coordinate"); orderedNames = orderedNames && (SO == "queryname"); } libmaus2::bambam::ChromosomeVectorMerge::unique_ptr_type tchromosomeMergeInfo(new libmaus2::bambam::ChromosomeVectorMerge(V)); chromosomeMergeInfo = UNIQUE_PTR_MOVE(tchromosomeMergeInfo); libmaus2::bambam::ReadGroupVectorMerge::unique_ptr_type treadGroupMergeInfo(new libmaus2::bambam::ReadGroupVectorMerge(R)); readGroupMergeInfo = UNIQUE_PTR_MOVE(treadGroupMergeInfo); libmaus2::bambam::ProgramHeaderLinesMerge::unique_ptr_type tprogramHeaderLinesMergeInfo(new libmaus2::bambam::ProgramHeaderLinesMerge(H)); programHeaderLinesMergeInfo = UNIQUE_PTR_MOVE(tprogramHeaderLinesMergeInfo); // get HD line fields std::vector < std::pair<std::string,std::string> > VHDP; for ( uint64_t i = 0; i < inputbamheaders.size(); ++i ) { std::vector<libmaus2::bambam::HeaderLine> VHD = libmaus2::bambam::HeaderLine::extractLinesByType(inputbamheaders[i]->text,"HD"); if ( VHD.size() ) { libmaus2::bambam::HeaderLine const & H = VHD.front(); for ( std::map<std::string,std::string>::const_iterator ita = H.M.begin(); ita != H.M.end(); ++ita ) VHDP.push_back(*ita); } } // sort by tag std::sort(VHDP.begin(),VHDP.end()); // extract consistent tags present in all HD lines std::map<std::string,std::string> MHD; uint64_t l = 0; while ( l < VHDP.size() ) { uint64_t h = l+1; while ( h < VHDP.size() && VHDP[l].first == VHDP[h].first ) ++h; // we have the right number if ( h-l == inputbamheaders.size() ) { // check for consistent value bool eq = true; for ( uint64_t i = l+1; i < h; ++i ) eq = eq && (VHDP[i].second == VHDP[l].second); if ( eq ) MHD[VHDP[l].first] = VHDP[l].second; } l = h; } std::string const VN = (MHD.find("VN") != MHD.end()) ? MHD.find("VN")->second : "1.5"; std::ostringstream headertextstr; headertextstr << "@HD\tVN:" << VN; if ( inputbamheaders.size() == 1 ) headertextstr << "\tSO:" << libmaus2::bambam::BamHeader::getSortOrderStatic(inputbamheaders[0]->text); else headertextstr << "\tSO:unknown"; for ( std::map<std::string,std::string>::const_iterator ita = MHD.begin(); ita != MHD.end(); ++ita ) { std::string const & key = ita->first; if ( key != "VN" && key != "SO" ) headertextstr << "\t" << key << ":" << ita->second; } headertextstr << "\n"; for ( uint64_t i = 0; i < chromosomeMergeInfo->chromosomes.size(); ++i ) headertextstr << chromosomeMergeInfo->chromosomes[i].createLine() << "\n"; for ( uint64_t i = 0; i < readGroupMergeInfo->readgroups.size(); ++i ) headertextstr << readGroupMergeInfo->readgroups[i].createLine() << "\n"; headertextstr << programHeaderLinesMergeInfo->PGtext; std::vector<std::string> otherlines; for ( uint64_t i = 0; i < inputbamheaders.size(); ++i ) { std::vector<libmaus2::bambam::HeaderLine> lines = libmaus2::bambam::HeaderLine::extractLines(inputbamheaders[i]->text); for ( uint64_t j = 0; j < lines.size(); ++j ) { libmaus2::bambam::HeaderLine const & line = lines[j]; if ( line.type != "HD" && line.type != "SQ" && line.type != "RG" && line.type != "PG" ) { otherlines.push_back(line.line); } } } std::set<std::string> otherlinesseen; for ( uint64_t i = 0; i < otherlines.size(); ++i ) if ( otherlinesseen.find(otherlines[i]) == otherlinesseen.end() ) { headertextstr << otherlines[i] << std::endl; otherlinesseen.insert(otherlines[i]); } // std::cerr << std::string(80,'-') << std::endl; std::string const headertext = headertextstr.str(); ::libmaus2::bambam::BamHeader::unique_ptr_type tbamheader(new ::libmaus2::bambam::BamHeader(headertext)); bamheader = UNIQUE_PTR_MOVE(tbamheader); // std::cerr << "topologically sorted: " << chromosomeMergeInfo->topological << std::endl; // std::cerr << bamheader->text; }