Exemplo n.º 1
0
			MdStringComputationContext()
			: T0(256,false), T1(256,false), nm(0)
			{
				std::fill(T0.begin(),T0.end(),4);
				std::fill(T1.begin(),T1.end(),5);
				T0['A'] = T0['a'] =  T1['A'] = T1['a'] = 0;
				T0['C'] = T0['c'] =  T1['C'] = T1['c'] = 1;
				T0['G'] = T0['g'] =  T1['G'] = T1['g'] = 2;
				T0['T'] = T0['t'] =  T1['T'] = T1['t'] = 3;
				auxvec.set("MD");
				auxvec.set("NM");
			}
Exemplo n.º 2
0
			void init()
			{
				rgfilter.set("RG");
				pgfilter.set("PG");

				std::vector < std::vector<libmaus2::bambam::Chromosome> const * > V;
				std::vector < std::vector<libmaus2::bambam::ReadGroup> const * > R;
				std::vector< std::string const * > H;
				for ( uint64_t i = 0; i < inputbamheaders.size(); ++i )
				{
					libmaus2::bambam::BamHeader const & header = *inputbamheaders[i];

					V.push_back( & (header.getChromosomes()) );
					R.push_back( & (header.getReadGroups()) );
					H.push_back( & (header.text) );

					std::string const SO = libmaus2::bambam::BamHeader::getSortOrderStatic(header.text);
					orderedCoordinates = orderedCoordinates && (SO == "coordinate");
					orderedNames = orderedNames && (SO == "queryname");
				}

				libmaus2::bambam::ChromosomeVectorMerge::unique_ptr_type tchromosomeMergeInfo(new libmaus2::bambam::ChromosomeVectorMerge(V));
				chromosomeMergeInfo = UNIQUE_PTR_MOVE(tchromosomeMergeInfo);

				libmaus2::bambam::ReadGroupVectorMerge::unique_ptr_type treadGroupMergeInfo(new libmaus2::bambam::ReadGroupVectorMerge(R));
				readGroupMergeInfo = UNIQUE_PTR_MOVE(treadGroupMergeInfo);

				libmaus2::bambam::ProgramHeaderLinesMerge::unique_ptr_type tprogramHeaderLinesMergeInfo(new libmaus2::bambam::ProgramHeaderLinesMerge(H));
				programHeaderLinesMergeInfo = UNIQUE_PTR_MOVE(tprogramHeaderLinesMergeInfo);

				// get HD line fields
				std::vector < std::pair<std::string,std::string> > VHDP;
				for ( uint64_t i = 0; i < inputbamheaders.size(); ++i )
				{
					std::vector<libmaus2::bambam::HeaderLine> VHD = libmaus2::bambam::HeaderLine::extractLinesByType(inputbamheaders[i]->text,"HD");
					if ( VHD.size() )
					{
						libmaus2::bambam::HeaderLine const & H = VHD.front();
						for ( std::map<std::string,std::string>::const_iterator ita = H.M.begin(); ita != H.M.end(); ++ita )
							VHDP.push_back(*ita);
					}
				}

				// sort by tag
				std::sort(VHDP.begin(),VHDP.end());

				// extract consistent tags present in all HD lines
				std::map<std::string,std::string> MHD;
				uint64_t l = 0;
				while ( l < VHDP.size() )
				{
					uint64_t h = l+1;
					while ( h < VHDP.size() && VHDP[l].first == VHDP[h].first )
						++h;

					// we have the right number
					if ( h-l == inputbamheaders.size() )
					{
						// check for consistent value
						bool eq = true;
						for ( uint64_t i = l+1; i < h; ++i )
							eq = eq && (VHDP[i].second == VHDP[l].second);
						if ( eq )
							MHD[VHDP[l].first] = VHDP[l].second;
					}

					l = h;
				}

				std::string const VN = (MHD.find("VN") != MHD.end()) ? MHD.find("VN")->second : "1.5";

				std::ostringstream headertextstr;
				headertextstr << "@HD\tVN:" << VN;

				if ( inputbamheaders.size() == 1 )
					headertextstr << "\tSO:" << libmaus2::bambam::BamHeader::getSortOrderStatic(inputbamheaders[0]->text);
				else
					headertextstr << "\tSO:unknown";

				for ( std::map<std::string,std::string>::const_iterator ita = MHD.begin(); ita != MHD.end(); ++ita )
				{
					std::string const & key = ita->first;

					if ( key != "VN" && key != "SO" )
						headertextstr << "\t" << key << ":" << ita->second;
				}

				headertextstr << "\n";

				for ( uint64_t i = 0; i < chromosomeMergeInfo->chromosomes.size(); ++i )
					headertextstr << chromosomeMergeInfo->chromosomes[i].createLine() << "\n";

				for ( uint64_t i = 0; i < readGroupMergeInfo->readgroups.size(); ++i )
					headertextstr << readGroupMergeInfo->readgroups[i].createLine() << "\n";

				headertextstr << programHeaderLinesMergeInfo->PGtext;

				std::vector<std::string> otherlines;
				for ( uint64_t i = 0; i < inputbamheaders.size(); ++i )
				{
					std::vector<libmaus2::bambam::HeaderLine> lines = libmaus2::bambam::HeaderLine::extractLines(inputbamheaders[i]->text);

					for ( uint64_t j = 0; j < lines.size(); ++j )
					{
						libmaus2::bambam::HeaderLine const & line = lines[j];

						if (
							line.type != "HD" &&
							line.type != "SQ" &&
							line.type != "RG" &&
							line.type != "PG"
						)
						{
							otherlines.push_back(line.line);
						}
					}
				}
				std::set<std::string> otherlinesseen;

				for ( uint64_t i = 0; i < otherlines.size(); ++i )
					if ( otherlinesseen.find(otherlines[i]) == otherlinesseen.end() )
					{
						headertextstr << otherlines[i] << std::endl;
						otherlinesseen.insert(otherlines[i]);
					}

				// std::cerr << std::string(80,'-') << std::endl;
				std::string const headertext = headertextstr.str();

				::libmaus2::bambam::BamHeader::unique_ptr_type tbamheader(new ::libmaus2::bambam::BamHeader(headertext));
				bamheader = UNIQUE_PTR_MOVE(tbamheader);

				// std::cerr << "topologically sorted: " << chromosomeMergeInfo->topological << std::endl;
				// std::cerr << bamheader->text;
			}