Esempio n. 1
0
			static ::libmaus::aio::CheckedInputStream::unique_ptr_type openFileAtPosition(std::string const & filename, uint64_t const pos)
			{
				::libmaus::aio::CheckedInputStream::unique_ptr_type istr(new ::libmaus::aio::CheckedInputStream(filename));
				istr->seekg(pos,std::ios::beg);
				return UNIQUE_PTR_MOVE(istr);
			}
Esempio n. 2
0
int bamclipreinsert(::libmaus2::util::ArgInfo const & arginfo)
{
	if ( isatty(STDIN_FILENO) )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	if ( isatty(STDOUT_FILENO) )
	{
		::libmaus2::exception::LibMausException se;
		se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl;
		se.finish();
		throw se;
	}

	int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
	int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());

	::libmaus2::bambam::BamDecoder dec(std::cin,false);
	::libmaus2::bambam::BamHeader const & header = dec.getHeader();

	std::string const headertext(header.text);

	// add PG line to header
	std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine(
		headertext,
		"bamclipreinsert", // ID
		"bamclipreinsert", // PN
		arginfo.commandline, // CL
		::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP
		std::string(PACKAGE_VERSION) // VN
	);

	// construct new header
	libmaus2::bambam::BamHeader const uphead(upheadtext);

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */

	::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs));
	libmaus2::bambam::BamAuxFilterVector bafv;
 	// bafv.set('z','z');
 	// std::vector<uint8_t> R(8);
 	// std::string const zz("zz");

	libmaus2::bambam::BamAlignment & algn = dec.getAlignment();
	uint64_t c = 0;

	libmaus2::autoarray::AutoArray < std::pair<uint8_t,uint8_t> > auxtags;
	libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop;
	std::stack < libmaus2::bambam::cigar_operation > hardstack;
	libmaus2::bambam::BamAlignment::D_array_type Tcigar;
	libmaus2::bambam::BamAuxFilterVector auxfilterout;
	auxfilterout.set('q','s');
	auxfilterout.set('q','q');

	while ( dec.readAlignment() )
	{
		// reinsert clipped parts and attach soft clipping cigar operations as needed
		clipReinsert(algn,auxtags,bafv,cigop,Tcigar,hardstack,auxfilterout);

		algn.serialise(writer->getStream());

		++c;

		if ( verbose && (c & (1024*1024-1)) == 0 )
 			std::cerr << "[V] " << c/(1024*1024) << std::endl;
	}

	writer.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return EXIT_SUCCESS;
}
Esempio n. 3
0
int bamfixmatecoordinates(::libmaus::util::ArgInfo const & arginfo)
{
	::libmaus::util::TempFileRemovalContainer::setup();
	::libmaus::timing::RealTimeClock rtc; rtc.start();
	
	bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose());
	unsigned int const colhashbits = arginfo.getValue<unsigned int>("colhashbits",getDefaultColHashBits());
	unsigned int const collistsize = arginfo.getValue<unsigned int>("collistsize",getDefaultColListSize());
	int const level = arginfo.getValue<int>("level",getDefaultLevel());
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	
	switch ( level )
	{
		case Z_NO_COMPRESSION:
		case Z_BEST_SPEED:
		case Z_BEST_COMPRESSION:
		case Z_DEFAULT_COMPRESSION:
			break;
		default:
		{
			::libmaus::exception::LibMausException se;
			se.getStream()
				<< "Unknown compression level, please use"
				<< " level=" << Z_DEFAULT_COMPRESSION << " (default) or"
				<< " level=" << Z_BEST_SPEED << " (fast) or"
				<< " level=" << Z_BEST_COMPRESSION << " (best) or"
				<< " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl;
			se.finish();
			throw se;
		}
			break;
	}


	std::string const tmpfilename = tmpfilenamebase + "_bamcollate";
	::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilename);
	
	::libmaus::bambam::CollatingBamDecoder CBD(std::cin,tmpfilename,false /* put rank */,colhashbits/*hash bits*/,collistsize/*size of output list*/);
	::libmaus::bambam::BamFormatAuxiliary auxdata;
	::libmaus::bambam::BamHeader const & bamheader = CBD.getHeader();
	
	// add PG line to header
	std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine(
		bamheader.text,
		"bamfixmatecoordinates", // ID
		"bamfixmatecoordinates", // PN
		arginfo.commandline, // CL
		::libmaus::bambam::ProgramHeaderLineSet(bamheader.text).getLastIdInChain(), // PP
		std::string(PACKAGE_VERSION) // VN			
	);
	// construct new header
	::libmaus::bambam::BamHeader uphead(upheadtext);
	
	if ( uphead.getSortOrder() != "queryname" )
		uphead.changeSortOrder("unknown");

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */
	
	// setup bam writer
	::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,uphead,level,Pcbs));
	
	#if 0
	::libmaus::bambam::ProgramHeaderLineSet PHLS(bamheader.text);
	std::cerr << "Last id in PG chain: " << PHLS.getLastIdInChain() << std::endl;
	#endif

	// std::cout << bamheader.text;

	typedef ::libmaus::bambam::CollatingBamDecoder::alignment_ptr_type alignment_ptr_type;
	std::pair<alignment_ptr_type,alignment_ptr_type> P;
	uint64_t const mod = 1024*1024;
	uint64_t proc = 0;
	uint64_t lastproc = 0;
	uint64_t paircnt = 0;
	
	while ( CBD.tryPair(P) )
	{
		uint64_t const mapcnt = getMapCnt(P.first) + getMapCnt(P.second);
		
		if ( mapcnt == 1 )
		{
			int32_t refid = -1;
			int32_t pos = -1;
			
			if ( P.first )
			{
				refid = P.first->getRefID();
				pos = P.first->getPos();
			}
			else
			{
				assert ( P.second );

				refid = P.second->getRefID();
				pos = P.second->getPos();
			}
			
			P.first->putRefId(refid);
			P.first->putPos(pos);
			P.first->putNextRefId(refid);
			P.first->putNextPos(pos);
			P.second->putRefId(refid);
			P.second->putPos(pos);
			P.second->putNextRefId(refid);
			P.second->putNextPos(pos);
		}
		
		if ( P.first )
		{
			P.first->serialise(writer->getStream());
			++proc;
		}
		if ( P.second )
		{
			P.second->serialise(writer->getStream());
			++proc;
		}
		if ( P.first && P.second )
		{
			paircnt++;
		}
		
		if ( verbose && (proc/mod != lastproc/mod) )
		{
			std::cerr 
				<< "Processed " << proc << " fragments, " << paircnt << " pairs, " 
				<< proc/rtc.getElapsedSeconds() << " al/s"
				<< std::endl;
			lastproc = proc;
		}
	}		

	if ( verbose )
		std::cerr 	
			<< "Processed " << proc << " fragments, " << paircnt << " pairs, " 
			<< proc/rtc.getElapsedSeconds() << " al/s"
			<< std::endl;

	writer.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return EXIT_SUCCESS;
}
Esempio n. 4
0
			void init()
			{
				rgfilter.set("RG");
				pgfilter.set("PG");

				std::vector < std::vector<libmaus2::bambam::Chromosome> const * > V;
				std::vector < std::vector<libmaus2::bambam::ReadGroup> const * > R;
				std::vector< std::string const * > H;
				for ( uint64_t i = 0; i < inputbamheaders.size(); ++i )
				{
					libmaus2::bambam::BamHeader const & header = *inputbamheaders[i];

					V.push_back( & (header.getChromosomes()) );
					R.push_back( & (header.getReadGroups()) );
					H.push_back( & (header.text) );

					std::string const SO = libmaus2::bambam::BamHeader::getSortOrderStatic(header.text);
					orderedCoordinates = orderedCoordinates && (SO == "coordinate");
					orderedNames = orderedNames && (SO == "queryname");
				}

				libmaus2::bambam::ChromosomeVectorMerge::unique_ptr_type tchromosomeMergeInfo(new libmaus2::bambam::ChromosomeVectorMerge(V));
				chromosomeMergeInfo = UNIQUE_PTR_MOVE(tchromosomeMergeInfo);

				libmaus2::bambam::ReadGroupVectorMerge::unique_ptr_type treadGroupMergeInfo(new libmaus2::bambam::ReadGroupVectorMerge(R));
				readGroupMergeInfo = UNIQUE_PTR_MOVE(treadGroupMergeInfo);

				libmaus2::bambam::ProgramHeaderLinesMerge::unique_ptr_type tprogramHeaderLinesMergeInfo(new libmaus2::bambam::ProgramHeaderLinesMerge(H));
				programHeaderLinesMergeInfo = UNIQUE_PTR_MOVE(tprogramHeaderLinesMergeInfo);

				// get HD line fields
				std::vector < std::pair<std::string,std::string> > VHDP;
				for ( uint64_t i = 0; i < inputbamheaders.size(); ++i )
				{
					std::vector<libmaus2::bambam::HeaderLine> VHD = libmaus2::bambam::HeaderLine::extractLinesByType(inputbamheaders[i]->text,"HD");
					if ( VHD.size() )
					{
						libmaus2::bambam::HeaderLine const & H = VHD.front();
						for ( std::map<std::string,std::string>::const_iterator ita = H.M.begin(); ita != H.M.end(); ++ita )
							VHDP.push_back(*ita);
					}
				}

				// sort by tag
				std::sort(VHDP.begin(),VHDP.end());

				// extract consistent tags present in all HD lines
				std::map<std::string,std::string> MHD;
				uint64_t l = 0;
				while ( l < VHDP.size() )
				{
					uint64_t h = l+1;
					while ( h < VHDP.size() && VHDP[l].first == VHDP[h].first )
						++h;

					// we have the right number
					if ( h-l == inputbamheaders.size() )
					{
						// check for consistent value
						bool eq = true;
						for ( uint64_t i = l+1; i < h; ++i )
							eq = eq && (VHDP[i].second == VHDP[l].second);
						if ( eq )
							MHD[VHDP[l].first] = VHDP[l].second;
					}

					l = h;
				}

				std::string const VN = (MHD.find("VN") != MHD.end()) ? MHD.find("VN")->second : "1.5";

				std::ostringstream headertextstr;
				headertextstr << "@HD\tVN:" << VN;

				if ( inputbamheaders.size() == 1 )
					headertextstr << "\tSO:" << libmaus2::bambam::BamHeader::getSortOrderStatic(inputbamheaders[0]->text);
				else
					headertextstr << "\tSO:unknown";

				for ( std::map<std::string,std::string>::const_iterator ita = MHD.begin(); ita != MHD.end(); ++ita )
				{
					std::string const & key = ita->first;

					if ( key != "VN" && key != "SO" )
						headertextstr << "\t" << key << ":" << ita->second;
				}

				headertextstr << "\n";

				for ( uint64_t i = 0; i < chromosomeMergeInfo->chromosomes.size(); ++i )
					headertextstr << chromosomeMergeInfo->chromosomes[i].createLine() << "\n";

				for ( uint64_t i = 0; i < readGroupMergeInfo->readgroups.size(); ++i )
					headertextstr << readGroupMergeInfo->readgroups[i].createLine() << "\n";

				headertextstr << programHeaderLinesMergeInfo->PGtext;

				std::vector<std::string> otherlines;
				for ( uint64_t i = 0; i < inputbamheaders.size(); ++i )
				{
					std::vector<libmaus2::bambam::HeaderLine> lines = libmaus2::bambam::HeaderLine::extractLines(inputbamheaders[i]->text);

					for ( uint64_t j = 0; j < lines.size(); ++j )
					{
						libmaus2::bambam::HeaderLine const & line = lines[j];

						if (
							line.type != "HD" &&
							line.type != "SQ" &&
							line.type != "RG" &&
							line.type != "PG"
						)
						{
							otherlines.push_back(line.line);
						}
					}
				}
				std::set<std::string> otherlinesseen;

				for ( uint64_t i = 0; i < otherlines.size(); ++i )
					if ( otherlinesseen.find(otherlines[i]) == otherlinesseen.end() )
					{
						headertextstr << otherlines[i] << std::endl;
						otherlinesseen.insert(otherlines[i]);
					}

				// std::cerr << std::string(80,'-') << std::endl;
				std::string const headertext = headertextstr.str();

				::libmaus2::bambam::BamHeader::unique_ptr_type tbamheader(new ::libmaus2::bambam::BamHeader(headertext));
				bamheader = UNIQUE_PTR_MOVE(tbamheader);

				// std::cerr << "topologically sorted: " << chromosomeMergeInfo->topological << std::endl;
				// std::cerr << bamheader->text;
			}
			PlainOrGzipStreamBufferWrapper(int const rfd, uint64_t const bufsize = 64*1024, uint64_t const pushbacksize = 64*1024)
			{
				libmaus::aio::PosixFdInputStream::unique_ptr_type TPFIS(new libmaus::aio::PosixFdInputStream(rfd,bufsize,pushbacksize));
				PFIS = UNIQUE_PTR_MOVE(TPFIS);	
				init(*PFIS,bufsize,pushbacksize);
			}
Esempio n. 6
0
static void filterBamUsedSequences(
	libmaus::util::ArgInfo const & arginfo,
	std::istream & in,
	::libmaus::bitio::IndexedBitVector const & IBV,
	std::ostream & out
)
{
	libmaus::lz::BgzfInflateStream bgzfin(in);
	libmaus::bambam::BamHeaderLowMem::unique_ptr_type PBHLM ( libmaus::bambam::BamHeaderLowMem::constructFromBAM(bgzfin));

	bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose());
	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	std::string md5filename;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}

	int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
	libmaus::lz::BgzfDeflate<std::ostream>::unique_ptr_type Pbgzfout(
		new libmaus::lz::BgzfDeflate<std::ostream>(
			out,level
		)
	);
	libmaus::lz::BgzfDeflate<std::ostream> & bgzfout = *Pbgzfout;
	
	if ( verbose )
		std::cerr << "[V] writing filtered header...";
	PBHLM->serialiseSequenceSubset(bgzfout,IBV,"bamfilterheader2" /* id */,"bamfilterheader2" /* pn */,
		arginfo.commandline /* pgCL */, PACKAGE_VERSION /* pgVN */
	);
	if ( verbose )
		std::cerr << "done." << std::endl;

	::libmaus::bambam::BamAlignment algn;
	uint64_t c = 0;
	while ( libmaus::bambam::BamAlignmentDecoder::readAlignmentGz(bgzfin,algn) )
	{
		if ( algn.isMapped() )
		{
			int64_t const refid = algn.getRefID();
			assert ( refid >= 0 );
			assert ( IBV.get(refid) );
			algn.putRefId(IBV.rank1(refid)-1);
		}
		else
		{
			algn.putRefId(-1);
		}
		
		if ( algn.isPaired() && algn.isMapped() )
		{
			int64_t const refid = algn.getNextRefID();
			assert ( refid >= 0 );
			assert ( IBV.get(refid) );
			algn.putNextRefId(IBV.rank1(refid)-1);
		}
		else
		{
			algn.putNextRefId(-1);
		}
		
		algn.serialise(bgzfout);
		
		if ( verbose && ( ((++c) & (1024*1024-1)) == 0 ) )
			std::cerr << "[V] " << c/(1024*1024) << std::endl;
	}
	
	bgzfout.flush();
	bgzfout.addEOFBlock();	
		
	Pbgzfout.reset();

	if ( Pmd5cb )
		Pmd5cb->saveDigestAsFile(md5filename);
}
Esempio n. 7
0
			RMMTree(base_layer_type const & rB, uint64_t const rn)
			: B(rB), n(rn), numlevels(computeNumLevels(n)), I(numlevels), C(numlevels), S(numlevels+1)
			{
				uint64_t in = n;
				unsigned int level = 0;
				
				while ( in > 1 )
				{
					uint64_t const out = (in+k-1) >> klog;
					
					// minimal indices for next level
					libmaus::bitio::CompactArray::unique_ptr_type tIlevel(
                                                new libmaus::bitio::CompactArray(out,klog));
					I[level] = UNIQUE_PTR_MOVE(tIlevel);

					libmaus::util::Histogram::unique_ptr_type subhist;

					if ( level == 0 )
					{
						libmaus::util::Histogram::unique_ptr_type tsubhist(fillSubHistogram(B.begin(),in));
						subhist = UNIQUE_PTR_MOVE(tsubhist);
					}
					else
					{
						libmaus::util::Histogram::unique_ptr_type tsubhist(fillSubHistogram(C[level-1]->begin(),in));
						subhist = UNIQUE_PTR_MOVE(tsubhist);
					}
					
					C_type::generator_type impgen(*subhist);

					if ( level == 0 )
						fillSubArrays(B.begin(),in,*(I[level]),impgen);
					else
						fillSubArrays(C[level-1]->begin(),in,*(I[level]),impgen);
						
					C_ptr_type tClevel(impgen.createFinal());
					C[level] = UNIQUE_PTR_MOVE(tClevel);
					
					in = out;
					++level;
				}
				
				S[0] = n;
				for ( uint64_t i = 0; i < numlevels; ++i )
					S[i+1] = I[i]->size();
				
				if ( rmmtreedebug )
					for ( uint64_t kk = k, level = 0; kk < n; kk *= k, ++level )
					{
						uint64_t low = 0;
						uint64_t z = 0;
						while ( low < n )
						{
							uint64_t const high = std::min(low+kk,n);
							
							uint64_t minv = B[low];
							uint64_t mini = low;
							for ( uint64_t i = low+1; i < high; ++i )
								if ( B[i] < minv )
								{
									minv = B[i];
									mini = i;
								}
								
							assert ( (*C[level])[z] == minv );
							assert ( (*I[level])[z] == ((mini-low)*k)/kk );
						
							++z;	
							low = high;
						}
					}
			}
Esempio n. 8
0
void bamalignfrac(::libmaus2::util::ArgInfo const & arginfo)
{
	libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo));
	::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus2::bambam::BamAlignment const & algn = dec.getAlignment();
        libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop;

        uint64_t basealgn = 0;
        uint64_t clip = 0;
        uint64_t totalbases = 0;

        #if defined(LIBMAUS2_HAVE_REGEX_H)
        std::string const regexs = arginfo.getUnparsedValue("name","");
        libmaus2::util::unique_ptr<libmaus2::regex::PosixRegex>::type regex_ptr;
        if ( regexs.size() )
	{
	        libmaus2::util::unique_ptr<libmaus2::regex::PosixRegex>::type tregex_ptr(new libmaus2::regex::PosixRegex(regexs));
	        regex_ptr = UNIQUE_PTR_MOVE(tregex_ptr);
	}
	#endif

	while ( dec.readAlignment() )
	{
		if ( 
			algn.isMapped()
			#if defined(LIBMAUS2_HAVE_REGEX_H)
			&&
			(
				(!regex_ptr)
				||
				(regex_ptr->findFirstMatch(algn.getName()) != -1)
			)
			#endif
		)
	        {
		        uint32_t const numcig = algn.getCigarOperations(cigop);
		        
		        totalbases += algn.getLseq();
		        
		        for ( uint64_t i = 0; i < numcig; ++i )
		        {
		        	switch ( cigop[i].first )
		        	{
		        		case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CMATCH:
		        		case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CINS:
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CEQUAL:
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDIFF:
						basealgn += cigop[i].second;
						break;
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CSOFT_CLIP:
						clip += cigop[i].second;
						break;
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CHARD_CLIP:
						totalbases += cigop[i].second;
						clip += cigop[i].second;
						break;
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDEL:
					case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CREF_SKIP:
						break;
		        	}
		        }
	        }                                                                        
	}
	
	std::cerr << "total bases in mapped reads\t" << totalbases << std::endl;
	std::cerr << "clipped (hard and soft) bases in mapped reads\t" << clip << std::endl;
	std::cerr << "aligned bases in mapped reads\t" << basealgn << std::endl;
}
Esempio n. 9
0
			unique_ptr_type extendEmpty() const
			{
				unique_ptr_type O(new this_type(slog+1));
				return UNIQUE_PTR_MOVE(O);
			}
Esempio n. 10
0
			static unique_ptr_type construct(std::istream & textstr)
			{
				return UNIQUE_PTR_MOVE(unique_ptr_type(new this_type(textstr)));
			}
Esempio n. 11
0
			CompactFastQContainer(std::istream & textstr)
			: T(textstr), dict(new ::libmaus::fastx::CompactFastQContainerDictionary(textstr)), H(), C()
			{
				GetObject G(T.begin());
				H = UNIQUE_PTR_MOVE(::libmaus::fastx::CompactFastQHeader::unique_ptr_type(new ::libmaus::fastx::CompactFastQHeader(G)));
			}
Esempio n. 12
0
int bamsplitmod(libmaus::util::ArgInfo const & arginfo)
{
    if ( isatty(STDIN_FILENO) )
    {
        ::libmaus::exception::LibMausException se;
        se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl;
        se.finish();
        throw se;
    }

    int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
    int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose());
    uint64_t const div = arginfo.getValue<int>("div",getDefaultDiv());
    std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo));

    if ( ! div )
    {
        ::libmaus::exception::LibMausException se;
        se.getStream() << "div cannot be 0." << std::endl;
        se.finish();
        throw se;
    }

    libmaus::bambam::BamDecoder bamdec(std::cin);
    libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment();
    libmaus::bambam::BamHeader const & header = bamdec.getHeader();
    ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header));

    libmaus::autoarray::AutoArray<libmaus::aio::CheckedOutputStream::unique_ptr_type> COS(div);
    libmaus::autoarray::AutoArray<libmaus::bambam::BamWriter::unique_ptr_type> writers(div);
    std::vector < std::string > filenames;
    for ( uint64_t i = 0; i < div; ++i )
    {
        std::ostringstream ostr;
        ostr << prefix << "_" << std::setw(6) << std::setfill('0') << i << std::setw(0) << ".bam";

        libmaus::aio::CheckedOutputStream::unique_ptr_type tCOS(new libmaus::aio::CheckedOutputStream(ostr.str()));
        COS[i] = UNIQUE_PTR_MOVE(tCOS);
        libmaus::bambam::BamWriter::unique_ptr_type twriter(new libmaus::bambam::BamWriter(*COS[i],*uphead,level));
        writers[i] = UNIQUE_PTR_MOVE(twriter);
    }

    uint64_t c = 0;
    if ( verbose )
    {
        while ( bamdec.readAlignment() )
        {
            algn.serialise ( writers [ (c++) % div ] -> getStream() );

            if ( ((c) & ((1ull<<20)-1)) == 0 )
                std::cerr << "[V] " << c << std::endl;
        }
        std::cerr << "[V] " << c << std::endl;
    }
    else
    {
        while ( bamdec.readAlignment() )
            algn.serialise ( writers [ (c++) % div ] -> getStream() );
    }

    for ( uint64_t i = 0; i < div; ++i )
    {
        writers[i].reset();
        COS[i]->flush();
        COS[i].reset();
    }

    return EXIT_SUCCESS;
}
Esempio n. 13
0
			static unique_ptr_type load(std::string const & filename)
			{
				libmaus::aio::CheckedInputStream CIS(filename);
				unique_ptr_type tptr(new this_type(CIS));
				return UNIQUE_PTR_MOVE(tptr);
			}
Esempio n. 14
0
			FastABgzfDecoder::unique_ptr_type getStream(std::string const & filename, uint64_t const id) const
			{
				FastABgzfDecoder::unique_ptr_type Tptr(new FastABgzfDecoder(filename,(*this)[id],blocksize));
				return UNIQUE_PTR_MOVE(Tptr);
			}
Esempio n. 15
0
libmaus2::util::LogPipeMultiplexGeneric::LogPipeMultiplexGeneric(
    std::string const & serverhostname,
    unsigned short port,
    std::string const & sid,
    uint64_t const id
)
    : pid(-1)
{
    // reset
    stdoutpipe[0] = stdoutpipe[1] = -1;
    stderrpipe[0] = stderrpipe[1] = -1;

    // connect
    ::libmaus2::network::ClientSocket::unique_ptr_type tsock(
        new ::libmaus2::network::ClientSocket(
            port,serverhostname.c_str()
        )
    );
    sock = UNIQUE_PTR_MOVE(tsock);

    // no delay on socket
    sock->setNoDelay();
    // write session id
    sock->writeString(0,sid);

    // id
    sock->writeSingle<uint64_t>(id);
    // connection type
    sock->writeString("log");

    // create pipe for standard out
    if ( pipe(&stdoutpipe[0]) != 0 )
    {
        closeFds();
        ::libmaus2::exception::LibMausException se;
        se.getStream() << "pipe() failed: " << strerror(errno) << std::endl;
        se.finish();
        throw se;
    }
    //create pipe for standard error
    if ( pipe(&stderrpipe[0]) != 0 )
    {
        closeFds();
        ::libmaus2::exception::LibMausException se;
        se.getStream() << "pipe() failed: " << strerror(errno) << std::endl;
        se.finish();
        throw se;
    }
    // close previous standard output
    if ( close(STDOUT_FILENO) != 0 )
    {
        closeFds();
        ::libmaus2::exception::LibMausException se;
        se.getStream() << "close() failed: " << strerror(errno) << std::endl;
        se.finish();
        throw se;
    }
    if ( close(STDERR_FILENO) != 0 )
    {
        closeFds();
        ::libmaus2::exception::LibMausException se;
        se.getStream() << "close() failed: " << strerror(errno) << std::endl;
        se.finish();
        throw se;
    }
    if ( dup2(stdoutpipe[1],STDOUT_FILENO) == -1 )
    {
        closeFds();
        ::libmaus2::exception::LibMausException se;
        se.getStream() << "dup2() failed: " << strerror(errno) << std::endl;
        se.finish();
        throw se;
    }
    if ( dup2(stderrpipe[1],STDERR_FILENO) == -1 )
    {
        closeFds();
        ::libmaus2::exception::LibMausException se;
        se.getStream() << "dup2() failed: " << strerror(errno) << std::endl;
        se.finish();
        throw se;
    }

    pid = fork();

    if ( pid < 0 )
    {
        closeFds();
        ::libmaus2::exception::LibMausException se;
        se.getStream() << "fork() failed: " << strerror(errno) << std::endl;
        se.finish();
        throw se;
    }
    else if ( pid == 0 )
    {
        // close write end
        close(stdoutpipe[1]);
        stdoutpipe[1] = -1;
        close(stderrpipe[1]);
        stderrpipe[1] = -1;
        // close copies
        close(STDOUT_FILENO);
        close(STDERR_FILENO);

        bool running = true;

        try
        {
            while ( running )
            {
                running = false;
                fd_set fds;
                int maxfd = -1;
                FD_ZERO(&fds);

                if ( stdoutpipe[0] != -1 )
                {
                    FD_SET(stdoutpipe[0],&fds);
                    maxfd = std::max(maxfd,stdoutpipe[0]);
                }
                if ( stderrpipe[0] != -1 )
                {
                    FD_SET(stderrpipe[0],&fds);
                    maxfd = std::max(maxfd,stderrpipe[0]);
                }

                running = (maxfd != -1);

                if ( running )
                {
                    int r = ::select(maxfd+1,&fds,0,0,0);

                    try
                    {
                        if ( r > 0 )
                        {
                            if ( (stdoutpipe[0] != -1) && FD_ISSET(stdoutpipe[0],&fds) )
                            {
                                ::libmaus2::autoarray::AutoArray<char> B(1024,false);
                                ssize_t red = read(stdoutpipe[0],B.get(),B.size());
                                if ( red <= 0 )
                                {
                                    std::ostringstream errstream;
                                    errstream << "Failed to read from stdout pipe: " << strerror(errno) << std::endl;
                                    std::string errstring = errstream.str();

                                    close(stdoutpipe[0]);
                                    stdoutpipe[0] = -1;

                                    sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size());
                                    sock->readSingle<uint64_t>();
                                }
                                else
                                {
                                    sock->writeMessage<char>(STDOUT_FILENO,B.get(),red);
                                    sock->readSingle<uint64_t>();
                                }
                            }
                            if ( stderrpipe[0] != -1 && FD_ISSET(stderrpipe[0],&fds) )
                            {
                                ::libmaus2::autoarray::AutoArray<char> B(1024,false);
                                ssize_t red = read(stderrpipe[0],B.get(),B.size());
                                if ( red <= 0 )
                                {
                                    std::ostringstream errstream;
                                    errstream << "Failed to read from stderr pipe: " << strerror(errno) << std::endl;
                                    std::string errstring = errstream.str();

                                    close(stderrpipe[0]);
                                    stderrpipe[0] = -1;

                                    sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size());
                                    sock->readSingle<uint64_t>();
                                }
                                else
                                {
                                    sock->writeMessage<char>(STDERR_FILENO,B.get(),red);
                                    sock->readSingle<uint64_t>();
                                }
                            }
                        }
                    }
                    catch(std::exception const & ex)
                    {
                    }
                }
            }
        }
        catch(std::exception const & ex)
        {
            std::cerr << "LogPipeMultiplexGeneric " << ex.what() << std::endl;
        }
        catch(...)
        {
            std::cerr << "LogPipeMultiplexGeneric caught unknown exception." << std::endl;
        }

        try
        {
            std::ostringstream quitmsgstr;
            quitmsgstr << "\nLog process for id " << id << " is terminating." << std::endl;
            std::string const quitmsg = quitmsgstr.str();

            sock->writeMessage<char>(std::max(STDOUT_FILENO,STDERR_FILENO)+1,quitmsg.c_str(),quitmsg.size());
            sock->readSingle<uint64_t>();
        }
        catch(...)
        {

        }

        _exit(0);
    }
    else
    {
        // close read ends
        close(stdoutpipe[0]);
        stdoutpipe[0] = -1;
        close(stderrpipe[0]);
        stderrpipe[0] = -1;
    }
}
Esempio n. 16
0
		::libmaus2::autoarray::AutoArray<uint64_t> toWaveletTreeBitsParallel(
			::libmaus2::bitio::CompactArray * C, bool const verbose, uint64_t const
				#if defined(_OPENMP)
				numthreads
				#endif
		)
		{
			uint64_t const pn = ((C->n + 63) / 64)*64;
			::libmaus2::autoarray::AutoArray<uint64_t> B( pn/64 , false );
			::libmaus2::parallel::OMPLock block;

			typedef std::pair<uint64_t, uint64_t> qtype;
			std::deque < qtype > Q;
			Q.push_back( qtype(0,C->n) );

			if ( verbose )
				std::cerr << "(Sorting bits...";
			for ( int ib = (C->getB())-1; ib>=0; --ib )
			{
				std::deque < qtype > Q2;
				uint64_t const sb = (C->getB()-ib-1);

				uint64_t const mask = (1ull << ib);
				if ( verbose )
					std::cerr << "(l=" << ib << ")";

				::libmaus2::bitio::CompactSparseArray S(C->D,C->n, C->getB() - sb , sb , C->getB());

				while ( Q.size() )
				{
					uint64_t l = Q.front().first, r = Q.front().second;
					Q.pop_front();

					// std::cerr << "[" << l << "," << r << "]" << std::endl;

					uint64_t const numpackets = getMaxThreads() * 2;
					::libmaus2::autoarray::AutoArray < uint64_t > aones(numpackets+1);
					::libmaus2::autoarray::AutoArray < uint64_t > azeroes(numpackets+1);
					uint64_t const intervalsize = r-l;
					uint64_t const packetsize = ( intervalsize + numpackets - 1 ) / numpackets;

					if ( verbose )
						std::cerr << "(c01/b";
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic,1) num_threads(numthreads)
#endif
					for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h )
					{
						uint64_t ones = 0;
						uint64_t low = std::min ( l + h * packetsize, r );
						uint64_t const rlow = low;
						uint64_t const high = std::min ( low + packetsize, r );
						uint64_t const low64 = std::min ( ((low+63)/64)*64, high );
						uint64_t const high64 = high & (~(63ull));

						// std::cerr << "low=" << low << " low64=" << low64 << std::endl;

						/**
						 * align low to 64
						 **/
						block.lock();
						for ( ;  low != low64 ; ++low )
						{
							uint64_t const v = (C->get(low)&mask)>>ib;
							ones += v;
							::libmaus2::bitio::putBit(B.get(), low, v);
						}
						block.unlock();

						/**
						 * handle full blocks of 64 values
						 **/
						if ( low != high )
						{
							assert ( low % 64 == 0 );
							assert ( high64 >= low );

							uint64_t * Bptr = B.get() + (low/64);

							while ( low != high64 )
							{
								uint64_t vb = 0;
								uint64_t const lh = low+64;

								for ( ; low != lh ; ++low )
								{
									uint64_t const v = (C->get(low)&mask)>>ib;
									ones += v;
									vb <<= 1;
									vb |= v;
								}

								(*Bptr++) = vb;
							}
						}

						/**
						 * handle rest
						 **/
						block.lock();
						for ( ; (low != high) ; ++low )
						{
							uint64_t const v = (C->get(low)&mask)>>ib;
							ones += v;
							::libmaus2::bitio::putBit(B.get(), low, v);
						}
						block.unlock();

						uint64_t const zeroes = (high-rlow)-ones;

						aones [ h ] = ones;
						azeroes [ h ] = zeroes;
					}

					if ( verbose )
						std::cerr << ")";

					/**
					 * compute prefix sums for zeroes and ones
					 **/
					{
						uint64_t c = 0;

						for ( uint64_t i = 0; i < numpackets + 1; ++i )
						{
							uint64_t const t = aones[i];
							aones[i] = c;
							c += t;
						}
					}
					{
						uint64_t c = 0;

						for ( uint64_t i = 0; i < numpackets + 1; ++i )
						{
							uint64_t const t = azeroes[i];
							azeroes[i] = c;
							c += t;
						}
					}

					uint64_t const ones = aones[numpackets];
					uint64_t const zeros = (r-l)-ones;

					::libmaus2::autoarray::AutoArray < ::libmaus2::bitio::CompactArray::unique_ptr_type > ACZ(numpackets);
					::libmaus2::autoarray::AutoArray < ::libmaus2::bitio::CompactArray::unique_ptr_type > ACO(numpackets);

					if ( verbose )
						std::cerr << "(a";
					for ( uint64_t h = 0; h < numpackets; ++h )
					{
						::libmaus2::bitio::CompactArray::unique_ptr_type tACZ(
							new ::libmaus2::bitio::CompactArray( azeroes [ h+1 ] - azeroes[ h ], C->getB() - sb )
						);
						ACZ[h] = UNIQUE_PTR_MOVE(tACZ);
						::libmaus2::bitio::CompactArray::unique_ptr_type tACO(
							new ::libmaus2::bitio::CompactArray( aones [ h+1 ] - aones[ h ], C->getB() - sb )
						);
						ACO[h] = UNIQUE_PTR_MOVE(tACO);
					}
					if ( verbose )
						std::cerr << ")";

					if ( verbose )
						std::cerr << "(d";
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic,1) num_threads(numthreads)
#endif
					for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h )
					{
						uint64_t const low = std::min ( l + h * packetsize, r );
						uint64_t const high = std::min ( low + packetsize, r );
						uint64_t zp = 0;
						uint64_t op = 0;

						::libmaus2::bitio::CompactArray & CO = *ACO[h];
						::libmaus2::bitio::CompactArray & CZ = *ACZ[h];

						for ( uint64_t i = low; i != high; ++i )
						{
							uint64_t const v = S.get(i);

							if ( v & mask )
								CO.set ( op++, v);
							else
								CZ.set ( zp++, v);
						}

						assert ( zp == azeroes[h+1]-azeroes[h] );
						assert ( op == aones[h+1]-aones[h] );
					}
					if ( verbose )
						std::cerr << ")";

					std::vector < CopyBackPacket > zpacketstodo;
					for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h )
					{
						uint64_t const low = l + azeroes[h];
						uint64_t const high = low + (azeroes[h+1]-azeroes[h]);

						if ( high-low )
							zpacketstodo.push_back ( CopyBackPacket(h,low,high) );
					}
					std::vector < CopyBackPacket > opacketstodo;
					for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h )
					{
						uint64_t const low = l + azeroes[numpackets ] + aones[h];
						uint64_t const high = low + (aones[h+1]-aones[h]);

						if ( high-low )
							opacketstodo.push_back ( CopyBackPacket(h,low,high) );
					}

					std::vector < std::vector < CopyBackPacket > > zpackets;
					while ( zpacketstodo.size() )
					{
						std::vector < CopyBackPacket > zpacketsnewtodo;

						std::vector < CopyBackPacket > nlist;
						nlist.push_back(zpacketstodo.front());

						for ( uint64_t i = 1; i < zpacketstodo.size(); ++i )
							if ( CopyBackPacket::overlap(nlist.back(), zpacketstodo[i], C->getB()) )
								zpacketsnewtodo.push_back(zpacketstodo[i]);
							else
								nlist.push_back(zpacketstodo[i]);

						zpackets.push_back(nlist);

						zpacketstodo = zpacketsnewtodo;
					}

					std::vector < std::vector < CopyBackPacket > > opackets;
					while ( opacketstodo.size() )
					{
						std::vector < CopyBackPacket > opacketsnewtodo;

						std::vector < CopyBackPacket > nlist;
						nlist.push_back(opacketstodo.front());

						for ( uint64_t i = 1; i < opacketstodo.size(); ++i )
							if ( CopyBackPacket::overlap(nlist.back(), opacketstodo[i], C->getB()) )
								opacketsnewtodo.push_back(opacketstodo[i]);
							else
								nlist.push_back(opacketstodo[i]);

						opackets.push_back(nlist);

						opacketstodo = opacketsnewtodo;
					}

					// std::cerr << "zpackets: " << zpackets.size() << " opackets: " << opackets.size() << std::endl;

					if ( verbose )
						std::cerr << "(cb";
					for ( uint64_t q = 0; q < zpackets.size(); ++q )
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic,1) num_threads(numthreads)
#endif
						for ( int64_t j = 0; j < static_cast<int64_t>(zpackets[q].size()); ++j )
						{
							CopyBackPacket const CBP = zpackets[q][j];
							uint64_t ac = CBP.low;
							::libmaus2::bitio::CompactArray & CZ = *ACZ[CBP.h];

							for ( uint64_t zc = 0 ; zc != CBP.high-CBP.low; ++zc )
								S.set ( ac++ , CZ.get(zc) );
						}

					for ( uint64_t q = 0; q < opackets.size(); ++q )
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic,1) num_threads(numthreads)
#endif
						for ( int64_t j = 0; j < static_cast<int64_t>(opackets[q].size()); ++j )
						{
							CopyBackPacket const CBP = opackets[q][j];
							uint64_t ac = CBP.low;
							::libmaus2::bitio::CompactArray & CO = *ACO[CBP.h];

							for ( uint64_t oc = 0 ; oc != CBP.high-CBP.low; ++oc )
								S.set ( ac++ , CO.get(oc) );
						}
					if ( verbose )
						std::cerr << ")";

					if ( zeros )
						Q2.push_back ( qtype(l,l+zeros) );
					if ( ones )
						Q2.push_back ( qtype(r-ones,r) );
				}
				// std::cerr << std::endl;

				uint64_t const numpackets = getMaxThreads() * 2;
				uint64_t const intervalsize = C->n;
				uint64_t const packetsize = ( intervalsize + numpackets - 1 ) / numpackets;

				std::vector < CopyBackPacket > packetstodo;
				for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h )
				{
					uint64_t const low = std::min(h*packetsize,C->n);
					uint64_t const high = std::min(low+packetsize,C->n);

					if ( high-low )
						packetstodo.push_back ( CopyBackPacket(h,low,high) );
				}
				std::vector < std::vector < CopyBackPacket > > packets;
				while ( packetstodo.size() )
				{
					std::vector < CopyBackPacket > packetsnewtodo;

					std::vector < CopyBackPacket > nlist;
					nlist.push_back(packetstodo.front());

					for ( uint64_t i = 1; i < packetstodo.size(); ++i )
						if ( CopyBackPacket::overlap(nlist.back(), packetstodo[i], C->getB()) )
							packetsnewtodo.push_back(packetstodo[i]);
						else
							nlist.push_back(packetstodo[i]);

					packets.push_back(nlist);

					packetstodo = packetsnewtodo;
				}

				for ( uint64_t q = 0; q < packets.size(); ++q )
#if defined(_OPENMP)
#pragma omp parallel for schedule(dynamic,1) num_threads(numthreads)
#endif
					for ( int64_t h = 0; h < static_cast<int64_t>(packets[q].size()); ++h )
					{
						CopyBackPacket const CBP = packets[q][h];

						for ( uint64_t i = CBP.low; i < CBP.high; ++i )
							::libmaus2::bitio::putBit ( C->D , i*C->getB() + sb , ::libmaus2::bitio::getBit(B.get(), i) );
					}

				Q = Q2;
			}
			unique_ptr_type uclone() const
			{
				unique_ptr_type ptr(new this_type(*this));
				return UNIQUE_PTR_MOVE(ptr);
			}
int bamfixmatecoordinatesnamesorted(::libmaus::util::ArgInfo const & arginfo)
{
	bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose());
	
	::libmaus::timing::RealTimeClock rtc; rtc.start();
	
	// gzip compression level for output
	int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
	
	::libmaus::bambam::BamDecoder bamfile(std::cin);
	std::string const headertext(bamfile.getHeader().text);

	// add PG line to header
	std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine(
		headertext,
		"bamfixmatecoordinatesnamesorted", // ID
		"bamfixmatecoordinatesnamesorted", // PN
		arginfo.commandline, // CL
		::libmaus::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP
		std::string(PACKAGE_VERSION) // VN			
	);
	// construct new header
	::libmaus::bambam::BamHeader uphead(upheadtext);
	
	if ( uphead.getSortOrder() != "queryname" )
		uphead.changeSortOrder("unknown");
		
	std::string const & finalheadtext = uphead.text;
	::libmaus::bambam::BamHeader finalheader(finalheadtext);

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */

	::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,finalheader,level,Pcbs));
	std::pair< std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool> , std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool> > 
		P(std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool>(::libmaus::bambam::BamAlignment::shared_ptr_type(),false),std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool>(::libmaus::bambam::BamAlignment::shared_ptr_type(),false));
	
	// try to read two alignments	
	P.first.second  = bamfile.readAlignment();
	if ( P.first.second )
	{
		P.first.first   = bamfile.salignment();
		P.second.second = P.first.second && bamfile.readAlignment();
		P.second.first  = bamfile.salignment();
	}
	
	uint64_t single = 0, pairs = 0;
	uint64_t proc = 0;
	uint64_t lastproc = 0;
	uint64_t const mod = 1024*1024;
	
	// while we have two alignments
	while ( P.first.second && P.second.second )
	{
		uint32_t const aflags = P.first.first->getFlags();
		uint32_t const bflags = P.second.first->getFlags();
	
		// same name?
		if ( 
			(aflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPAIRED)
			&&
			(bflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPAIRED)
			&&
			(! strcmp(P.first.first->getName(),P.second.first->getName()))
		)
		{			
			unsigned int const amap = (aflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP) ? 0 : 1;
			unsigned int const bmap = (bflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP) ? 0 : 1;

			// std::cerr << "Pair " << bam1_qname(P.first.first->alignment) << " amap=" << amap << " bmap=" << bmap << std::endl;
			
			// if exactly one of the two is mapped
			if ( amap + bmap == 1 )
			{
				::libmaus::bambam::BamAlignment::shared_ptr_type mapped = amap ? P.first.first : P.second.first;
				int64_t const tid = mapped->getRefID();
				int64_t const pos = mapped->getPos();
				
				// std::cerr << "tid=" << tid << " pos=" << pos << std::endl;
				
				// set all tid and pos values
				P.first.first->putRefId(tid);
				P.first.first->putPos(pos);
				P.first.first->putNextRefId(tid);
				P.first.first->putNextPos(pos);
				P.second.first->putRefId(tid);
				P.second.first->putPos(pos);
				P.second.first->putNextRefId(tid);
				P.second.first->putNextPos(pos);
			}
		
			// write alignments
			P.first.first->serialise(writer->getStream());
			P.second.first->serialise(writer->getStream());
			// read new alignments
			P.first.second = bamfile.readAlignment();
			if ( P.first.second )
			{
				P.first.first = bamfile.salignment();
				P.second.second = bamfile.readAlignment();
				P.second.first = bamfile.salignment();
			}
			
			pairs++;
			proc += 2;
		}
		// different names
		else
		{
			// write first alignment
			P.first.first->serialise(writer->getStream());
			// move second to first
			std::swap(P.first,P.second);
			// read new second
			P.second.second = P.first.second && bamfile.readAlignment();
			if ( P.second.second )
				P.second.first = bamfile.salignment();
			
			single++;
			proc += 1;
		}
		
		if ( verbose && (proc/mod != lastproc/mod) )
		{
			std::cerr << proc << "\t" << single << "\t" << pairs << "\t" <<
				proc/rtc.getElapsedSeconds() << "al/s"
				<< std::endl;
			lastproc = proc;
		}
	}
	
	if ( P.first.second )
	{
		P.first.first->serialise(writer->getStream());
		single++;
		proc += 1;
	}

	if ( verbose )
		std::cerr << proc << "\t" << single << "\t" << pairs << "\t" <<
			proc/rtc.getElapsedSeconds() << "al/s"
			<< std::endl;
		
	assert ( ! P.second.second );

	writer.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}	
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}

	return EXIT_SUCCESS;
}
Esempio n. 19
0
libmaus2::util::NegativeDifferenceArray32::NegativeDifferenceArray32(::libmaus2::util::Array832::unique_ptr_type & rA)
: A(UNIQUE_PTR_MOVE(rA))
{

}
Esempio n. 20
0
libmaus::util::LogPipeMultiplex::LogPipeMultiplex(
	std::string const & serverhostname,
	unsigned short port,
	std::string const & sid
	)
: pid(-1)
{
	// connect
	::libmaus::network::ClientSocket::unique_ptr_type tsock(
                        new ::libmaus::network::ClientSocket(
                                port,serverhostname.c_str()
                        )
                );
	sock = UNIQUE_PTR_MOVE(tsock);
	// no delay on socket
	sock->setNoDelay();
	// write session id
	sock->writeString(0,sid);
	uint64_t stag;
	cmdline = sock->readString(stag);
		
	if ( pipe(&stdoutpipe[0]) != 0 )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "pipe() failed: " << strerror(errno) << std::endl;
		se.finish();
		throw se;
	}
	if ( pipe(&stderrpipe[0]) != 0 )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "pipe() failed: " << strerror(errno) << std::endl;
		se.finish();
		throw se;
	}
	if ( close(STDOUT_FILENO) != 0 )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "close() failed: " << strerror(errno) << std::endl;
		se.finish();
		throw se;		
	}
	if ( close(STDERR_FILENO) != 0 )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "close() failed: " << strerror(errno) << std::endl;
		se.finish();
		throw se;		
	}
	if ( dup2(stdoutpipe[1],STDOUT_FILENO) == -1 )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "dup2() failed: " << strerror(errno) << std::endl;
		se.finish();
		throw se;				
	}
	if ( dup2(stderrpipe[1],STDERR_FILENO) == -1 )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "dup2() failed: " << strerror(errno) << std::endl;
		se.finish();
		throw se;				
	}

	pid = fork();
	
	if ( pid < 0 )
	{
		::libmaus::exception::LibMausException se;
		se.getStream() << "fork() failed: " << strerror(errno) << std::endl;
		se.finish();
		throw se;		
	}
	else if ( pid == 0 )
	{
		// close write end
		close(stdoutpipe[1]);
		close(stderrpipe[1]);
		// close copies
		close(STDOUT_FILENO);
		close(STDERR_FILENO);
		
		bool running = true;
		
		try
		{
			while ( running )
			{
				running = false;
				fd_set fds;
				int maxfd = -1;
				FD_ZERO(&fds);
				
				if ( stdoutpipe[0] != -1 )
				{
					FD_SET(stdoutpipe[0],&fds);
					maxfd = std::max(maxfd,stdoutpipe[0]);
				}
				if ( stderrpipe[0] != -1 )
				{
					FD_SET(stderrpipe[0],&fds);
					maxfd = std::max(maxfd,stderrpipe[0]);
				}
				
				running = (maxfd != -1);
				
				if ( running )
				{
					int r = select(maxfd+1,&fds,0,0,0);
					
					try
					{
						if ( r > 0 )
						{
							if ( stdoutpipe[0] != -1 && FD_ISSET(stdoutpipe[0],&fds) )
							{
								::libmaus::autoarray::AutoArray<char> B(1024,false);
								ssize_t red = read(stdoutpipe[0],B.get(),B.size());
								if ( red <= 0 )
								{
									std::ostringstream errstream;
									errstream << "Failed to read from stdout pipe: " << strerror(errno) << std::endl;
									std::string errstring = errstream.str();
									
									sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size());
									uint64_t stag, n;
									sock->readMessage<uint64_t>(stag,0,n);

									stdoutpipe[0] = -1;
								}
								else
								{
									sock->writeMessage<char>(STDOUT_FILENO,B.get(),red);
									uint64_t stag, n;
									sock->readMessage<uint64_t>(stag,0,n);
								}
							}
							if ( stderrpipe[0] != -1 && FD_ISSET(stderrpipe[0],&fds) )
							{
								::libmaus::autoarray::AutoArray<char> B(1024,false);
								ssize_t red = read(stderrpipe[0],B.get(),B.size());
								if ( red <= 0 )
								{
									std::ostringstream errstream;
									errstream << "Failed to read from stderr pipe: " << strerror(errno) << std::endl;
									std::string errstring = errstream.str();
									
									sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size());
									uint64_t stag, n;
									sock->readMessage<uint64_t>(stag,0,n);

									stderrpipe[0] = -1;
								}
								else
								{
									sock->writeMessage<char>(STDERR_FILENO,B.get(),red);
									uint64_t stag, n;
									sock->readMessage<uint64_t>(stag,0,n);
								}
							}	
						}
					}
					catch(std::exception const & ex)
					{
					}
				}
			}
		}
		catch(...)
		{
			std::cerr << "Caught exception in LogPipeMultiplex" << std::endl;
		}
		
		_exit(0);
	}
	else
	{
		// close read ends
		close(stdoutpipe[0]);
		close(stderrpipe[0]);
	}
}
			virtual CompressorObject::unique_ptr_type operator()()
			{
				CompressorObject::unique_ptr_type ptr(new ZlibCompressorObject(level));
				return UNIQUE_PTR_MOVE(ptr);
			}
			/**
			 * construct decoder
			 *
			 * @param rindex block index
			 * @param fn file name
			 * @return decoder object
			 **/
			static unique_ptr_type construct(std::vector < std::pair < uint64_t, uint64_t > > const & rindex, std::string const & fn)
			{
				unique_ptr_type ptr(new this_type(rindex,fn));
				return UNIQUE_PTR_MOVE(ptr);
			}
Esempio n. 23
0
int bamfilter(libmaus::util::ArgInfo const & arginfo)
{
	uint64_t const minmapped = arginfo.getValue<uint64_t>("minmapped",getDefaultMinMapped());
	uint64_t const maxmapped = arginfo.getValue<uint64_t>("maxmapped",getDefaultMaxMapped());
	uint64_t const minlen = arginfo.getValue<uint64_t>("minlen",getDefaultMinLen());
	int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel()));
	
	::libmaus::bambam::BamDecoder BD(std::cin);
	::libmaus::bambam::BamHeader const & bamheader = BD.getHeader();
	::libmaus::bambam::BamAlignment & alignment = BD.getAlignment();

	/*
	 * start index/md5 callbacks
	 */
	std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName());
	std::string const tmpfileindex = tmpfilenamebase + "_index";
	::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex);

	std::string md5filename;
	std::string indexfilename;

	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs;
	::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb;
	if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) )
	{
		if ( arginfo.hasArg("md5filename") &&  arginfo.getUnparsedValue("md5filename","") != "" )
			md5filename = arginfo.getUnparsedValue("md5filename","");
		else
			std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl;

		if ( md5filename.size() )
		{
			::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5);
			Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb);
			cbs.push_back(Pmd5cb.get());
		}
	}
	libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex;
	if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) )
	{
		if ( arginfo.hasArg("indexfilename") &&  arginfo.getUnparsedValue("indexfilename","") != "" )
			indexfilename = arginfo.getUnparsedValue("indexfilename","");
		else
			std::cerr << "[V] no filename for index given, not creating index" << std::endl;

		if ( indexfilename.size() )
		{
			libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex));
			Pindex = UNIQUE_PTR_MOVE(Tindex);
			cbs.push_back(Pindex.get());
		}
	}
	std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0;
	if ( cbs.size() )
		Pcbs = &cbs;
	/*
	 * end md5/index callbacks
	 */

	::libmaus::bambam::BamHeader::unique_ptr_type uphead(libmaus::bambam::BamHeaderUpdate::updateHeader(arginfo,bamheader,"bamfilter",std::string(PACKAGE_VERSION)));
	::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,*uphead,level,Pcbs));
	
	while ( BD.readAlignment() )
	{
		bool const a_1_mapped = !(alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP);
		bool const a_2_mapped = !(alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FMUNMAP);
		bool const proper     =  (alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPROPER_PAIR);

		uint64_t const nummapped = (a_1_mapped?1:0)+(a_2_mapped?1:0)+(proper?1:0);

		if ( 
			nummapped >= minmapped && 
			nummapped <= maxmapped && 
			alignment.getLseq() >= static_cast<int64_t>(minlen)
		)
			alignment.serialise(writer->getStream());
	}	

	writer.reset();

	if ( Pmd5cb )
	{
		Pmd5cb->saveDigestAsFile(md5filename);
	}
	if ( Pindex )
	{
		Pindex->flush(std::string(indexfilename));
	}
	
	return EXIT_SUCCESS;
}
			/**
			 * constructor
			 *
			 * @param rindex block index
			 * @param fn file name
			 **/
			SnappyAlignmentMergeInput(
				std::vector < std::pair < uint64_t, uint64_t > > const & rindex,
				std::string const & fn)
			: index(rindex), streams(index.size()), data(index.size()), namecomp(static_cast<uint8_t const *>(0)), heapcomp(namecomp,data.begin()), Q(heapcomp)
			{
				bool openok = true;
			
				try
				{
					for ( uint64_t i = 0; i < index.size(); ++i )
						if ( index[i].second )
						{
							libmaus::lz::SnappyOffsetFileInputStream::unique_ptr_type tstreamsi(
                                	                                        new libmaus::lz::SnappyOffsetFileInputStream(fn,index[i].first)
                                        	                        );
							streams [ i ] = UNIQUE_PTR_MOVE(tstreamsi);
						}
				}
				catch(std::exception const & ex)
				{
					openok = false;
				}
				
				if ( ! openok )
				{
					std::cerr << "[V] failed to open a file handle for each single collation block, trying to merge through a single file handle" << std::endl;

					for ( uint64_t i = 0; i < index.size(); ++i )
						if ( index[i].second )
							streams[i].reset();
					
					libmaus::aio::CheckedInputStream::unique_ptr_type TCIS(new libmaus::aio::CheckedInputStream(fn));
					Psingle	= UNIQUE_PTR_MOVE(TCIS);

					for ( uint64_t i = 0; i < index.size(); ++i )
						if ( index[i].second )
						{
							libmaus::lz::SnappyOffsetFileInputStream::unique_ptr_type tstreamsi
							(
                                	                	new libmaus::lz::SnappyOffsetFileInputStream(*Psingle,index[i].first)
                                        	        );
							streams [ i ] = UNIQUE_PTR_MOVE(tstreamsi);
						}
				}
					
				for ( uint64_t i = 0; i < index.size(); ++i )
					if ( index[i].second )
					{
						index[i].second -= 1;

						#if !defined(NDEBUG)
						bool const alok = 
						#endif
						        libmaus::bambam::BamDecoder::readAlignmentGz(*(streams[i]),data[i],0,false);
						        
						#if !defined(NDEBUG)
						assert ( alok );
						#endif
						
						Q.push(i);
					}
			}
Esempio n. 25
0
int main(int argc, char * argv[])
{
	try
	{
		::libmaus::util::ArgInfo arginfo(argc,argv);
		::libmaus::util::TempFileRemovalContainer::setup();
		::std::vector<std::string> const & inputfilenames = arginfo.restargs;
		char const * fasuffixes[] = { ".fa", ".fasta", 0 };
		
		std::string deftmpname = libmaus::util::OutputFileNameTools::endClipLcp(inputfilenames,&fasuffixes[0]) + ".fa.tmp";
		while ( ::libmaus::util::GetFileSize::fileExists(deftmpname) )
			deftmpname += "_";
		std::string defoutname = libmaus::util::OutputFileNameTools::endClipLcp(inputfilenames,&fasuffixes[0]) + ".fa.recoded";
		while ( ::libmaus::util::GetFileSize::fileExists(defoutname) )
			defoutname += "_";

		std::string const tempfilename = arginfo.getValue<std::string>("tempfilename",deftmpname);
		std::string const outfilename = arginfo.getValue<std::string>("outputfilename",defoutname);
		std::string const indexfilename = tempfilename + ".index";
		unsigned int const addterm = arginfo.getValue<unsigned int>("addterm",0);
		unsigned int const termadd = addterm ? 1 : 0;

		::libmaus::util::TempFileRemovalContainer::addTempFile(tempfilename);
		::libmaus::util::TempFileRemovalContainer::addTempFile(indexfilename);
		
		std::cerr << "temp file name " << tempfilename << std::endl;
		std::cerr << "output file name " << outfilename << std::endl;
		
		/* uint64_t const numseq = */ ::libmaus::fastx::FastAReader::rewriteFiles(inputfilenames,tempfilename,indexfilename);
		uint64_t curpos = 0;
		::libmaus::aio::CheckedOutputStream COS(outfilename);
		
		// 0,A,C,G,T,N
		// map forward
		::libmaus::autoarray::AutoArray<char> cmap(256,false);
		std::fill(cmap.begin(),cmap.end(),5+termadd);
		cmap['\n'] = 0 + termadd;
		cmap['a'] = cmap['A'] = 1 + termadd;
		cmap['c'] = cmap['C'] = 2 + termadd;
		cmap['g'] = cmap['G'] = 3 + termadd;
		cmap['t'] = cmap['T'] = 4 + termadd;
		cmap['n'] = cmap['N'] = 5 + termadd;

		// map to reverse complement
		::libmaus::autoarray::AutoArray<char> rmap(256,false);
		std::fill(rmap.begin(),rmap.end(),5+termadd);
		rmap['\n'] = 0 + termadd;
		rmap['a'] = rmap['A'] = 4 + termadd;
		rmap['c'] = rmap['C'] = 3 + termadd;
		rmap['g'] = rmap['G'] = 2 + termadd;
		rmap['t'] = rmap['T'] = 1 + termadd;
		rmap['n'] = rmap['N'] = 5 + termadd;

		// reverse complement for mapped data
		::libmaus::autoarray::AutoArray<char> xmap(256,false);
		std::fill(xmap.begin(),xmap.end(),5+termadd);
		xmap[0] = 0 + termadd;
		xmap[1] = 4 + termadd;
		xmap[2] = 3 + termadd;
		xmap[3] = 2 + termadd;
		xmap[4] = 1 + termadd;
		xmap[5] = 5 + termadd;

		::libmaus::autoarray::AutoArray<char> imap(256,false);
		for ( uint64_t i = 0; i < imap.size(); ++i )
			imap[i] = static_cast<char>(i);
		
		::libmaus::fastx::FastAReader::RewriteInfoDecoder::unique_ptr_type infodec(new ::libmaus::fastx::FastAReader::RewriteInfoDecoder(indexfilename));
		::libmaus::fastx::FastAReader::RewriteInfo info;
		uint64_t maxseqlen = 0;
		while ( infodec->get(info) )
			maxseqlen = std::max(maxseqlen,info.seqlen);
			
		std::cerr << "[V] max seq len " << maxseqlen << std::endl;

		::libmaus::fastx::FastAReader::RewriteInfoDecoder::unique_ptr_type tinfodec(new ::libmaus::fastx::FastAReader::RewriteInfoDecoder(indexfilename));
		infodec = UNIQUE_PTR_MOVE(tinfodec);
		
		if ( maxseqlen <= 256*1024 )
		{
			::libmaus::aio::CheckedInputStream CIS(tempfilename);
			::libmaus::autoarray::AutoArray<uint8_t> B(maxseqlen+1,false);

			while ( infodec->get(info) )
			{
				// skip id
				CIS.ignore(info.idlen+2);
				// read sequence plus following terminator
				CIS.read(reinterpret_cast<char *>(B.begin()), info.seqlen+1);
				// map
				for ( uint64_t i = 0; i < info.seqlen+1; ++i )
					B[i] = cmap[B[i]];
				// write
				COS.write(reinterpret_cast<char const *>(B.begin()),info.seqlen+1);
				// remap
				for ( uint64_t i = 0; i < info.seqlen+1; ++i )
					B[i] = xmap[B[i]];
				// reverse
				std::reverse(B.begin(),B.begin()+info.seqlen);
				// write
				COS.write(reinterpret_cast<char const *>(B.begin()),info.seqlen+1);
			}
		}
		else
		{
			while ( infodec->get(info) )
			{
				// std::cerr << info.valid << "\t" << info.idlen << "\t" << info.seqlen << "\t" << info.getIdPrefix() << std::endl;
				uint64_t const seqbeg = curpos + (info.idlen+2);
				uint64_t const seqend = seqbeg + info.seqlen;
				
				::libmaus::aio::CheckedInputStream CIS(tempfilename); CIS.seekg(seqbeg);
				::libmaus::util::GetFileSize::copyMap(CIS,COS,cmap.begin(),seqend-seqbeg+1);
				
				::libmaus::aio::CircularReverseWrapper CRW(tempfilename,seqend);
				::libmaus::util::GetFileSize::copyMap(CRW,COS,rmap.begin(),seqend-seqbeg+1);
				
				curpos += (info.idlen+2) + (info.seqlen+1);
			}		
		}
		
		if ( addterm )
			COS.put(0);

		return EXIT_SUCCESS;
	}
	catch(std::exception const & ex)
	{
		std::cerr << ex.what() << std::endl;
		return EXIT_FAILURE;
	}
}
			bool readBlock(SimpleCompressedInputBlockConcatBlock & block)
			{
				block.compsize = 0;
				block.uncompsize = 0;
				block.currentInterval = currentInterval;
				block.eof = false;

				// check whether we need to open the next file
				if ( (!(Pcis.get())) )
				{
					// skip over empty intervals
					while ( intervalsIt != intervals.end() && intervalsIt->empty() )
						++intervalsIt;

					// check whether we are done
					if ( intervalsIt == intervals.end() )
					{
						block.eof = true;
						return true;
					}

					// get interval
					currentInterval = &(*(intervalsIt++));
					block.currentInterval = currentInterval;
					// open file
					libmaus2::aio::InputStreamInstance::unique_ptr_type Tcis(new libmaus2::aio::InputStreamInstance(currentInterval->name));
					Pcis = UNIQUE_PTR_MOVE(Tcis);

					// seek
					Pcis->seekg(currentInterval->start.first);
					streampos = currentInterval->start.first;
				}

				block.blockstreampos = streampos;

				libmaus2::util::CountPutObject CPO;
				block.uncompsize = libmaus2::util::UTF8::decodeUTF8(*Pcis);
				::libmaus2::util::UTF8::encodeUTF8(block.uncompsize,CPO);

				block.compsize = ::libmaus2::util::NumberSerialisation::deserialiseNumber(*Pcis);
				::libmaus2::util::NumberSerialisation::serialiseNumber(CPO,block.compsize);

				block.metasize = CPO.c;

				if ( block.compsize > block.I.size() )
					block.I = libmaus2::autoarray::AutoArray<uint8_t>(block.compsize,false);

				Pcis->read(reinterpret_cast<char *>(block.I.begin()),block.compsize);

				streampos += (block.metasize+block.compsize);

				bool const gcountok = Pcis->gcount() == static_cast<int64_t>(block.compsize);

				if ( block.blockstreampos == currentInterval->end.first )
				{
					Pcis.reset();

					// skip over empty intervals
					while ( intervalsIt != intervals.end() && intervalsIt->empty() )
						++intervalsIt;

					if ( intervalsIt == intervals.end() )
						block.eof = true;
				}

				if ( gcountok )
					return true;
				else
					return false;
			}
Esempio n. 27
0
int bamheap2(libmaus::util::ArgInfo const & arginfo)
{
	bool const verbose = arginfo.getValue("verbose",getDefaultVerbose());
	std::string const reference = arginfo.getUnparsedValue("reference",std::string());
	std::string const outputprefix = arginfo.getUnparsedValue("outputprefix",std::string());
	
	libmaus::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper(
		libmaus::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo));
	::libmaus::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder());
	::libmaus::bambam::BamAlignmentDecoder & dec = *ppdec;
	::libmaus::bambam::BamHeader const & header = dec.getHeader();	
	::libmaus::bambam::BamAlignment const & algn = dec.getAlignment();
	
	double const damult = arginfo.getValue<double>("amult",1);
	double const dcmult = arginfo.getValue<double>("cmult",1);
	double const dgmult = arginfo.getValue<double>("gmult",1);
	double const dtmult = arginfo.getValue<double>("tmult",1);
	double const dpadmult = arginfo.getValue<double>("padmult",1);
	
	double maxmult = 0;
	maxmult = std::max(damult,maxmult);
	maxmult = std::max(dcmult,maxmult);
	maxmult = std::max(dgmult,maxmult);
	maxmult = std::max(dtmult,maxmult);
	maxmult = std::max(dpadmult,maxmult);
	
	uint64_t const amult = std::floor((damult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const cmult = std::floor((dcmult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const gmult = std::floor((dgmult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const tmult = std::floor((dtmult / maxmult) * (1ull<<16) + 0.5);
	uint64_t const padmult = std::floor((dpadmult / maxmult) * (1ull<<16) + 0.5);
	
	libmaus::fastx::FastAIndex::unique_ptr_type Pindex;
	libmaus::aio::CheckedInputStream::unique_ptr_type PCIS;
	if ( reference.size() )
	{
		libmaus::fastx::FastAIndex::unique_ptr_type Tindex(
			libmaus::fastx::FastAIndex::load(reference+".fai")
		);
		Pindex = UNIQUE_PTR_MOVE(Tindex);
		
		libmaus::aio::CheckedInputStream::unique_ptr_type TCIS(new libmaus::aio::CheckedInputStream(reference));
		PCIS = UNIQUE_PTR_MOVE(TCIS);
	}

	libmaus::autoarray::AutoArray<libmaus::bambam::cigar_operation> cigop;
	libmaus::autoarray::AutoArray<char> bases;
	
	int64_t prevrefid = -1;
	std::string refidname = "*";
	
	std::map< uint64_t, HeapEntry > M;
	uint64_t alcnt = 0;
	std::vector< std::pair<char,uint8_t> > pendinginserts;
	int64_t loadedRefId = -1;
	int64_t streamRefId = -1;
	libmaus::autoarray::AutoArray<char> refseqbases;
	ConsensusAccuracy * consacc = 0;
	std::map<uint64_t,ConsensusAccuracy> Mconsacc;
	typedef libmaus::util::shared_ptr<std::ostringstream>::type stream_ptr_type;
	stream_ptr_type Pstream;
	ConsensusAux Caux;
	
	Caux.M['a'] = Caux.M['A'] = amult;
	Caux.M['c'] = Caux.M['C'] = cmult;
	Caux.M['g'] = Caux.M['G'] = gmult;
	Caux.M['t'] = Caux.M['T'] = tmult;
	Caux.M[padsym] = padmult;
	
	while ( dec.readAlignment() )
	{
		if ( algn.isMapped() && (!algn.isQCFail()) )
		{
			assert ( ! pendinginserts.size() );
		
			uint32_t const numcigop = algn.getCigarOperations(cigop);
			uint64_t readpos = 0;
			uint64_t refpos = algn.getPos();
			uint64_t const seqlen = algn.decodeRead(bases);
			uint8_t const * qual = libmaus::bambam::BamAlignmentDecoderBase::getQual(algn.D.begin());
			
			// handle finished columns
			if ( algn.getRefID() != prevrefid )
			{
				while ( M.size() )
				{
					HeapEntry & H = M.begin()->second;
					
					if ( outputprefix.size() && (streamRefId != prevrefid) )
					{
						if ( Pstream )
						{
							std::ostringstream fnostr;
							fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
							libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
							PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
							PFOS << Pstream->str() << '\n';
							
							Pstream.reset();
						}
						
						stream_ptr_type Tstream(new std::ostringstream);
						Pstream = Tstream;
						streamRefId = prevrefid;
					}
					
					if ( Pindex && (loadedRefId != prevrefid) )
					{
						refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname));
						loadedRefId = prevrefid;
						
						if ( Mconsacc.find(loadedRefId) == Mconsacc.end() )
							Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size());
						
						consacc = &(Mconsacc[loadedRefId]);
					}
					
					H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get());
					
					M.erase(M.begin());
				}
			
				prevrefid = algn.getRefID();
				refidname = header.getRefIDName(prevrefid);
			}
			else
			{
				while ( M.size() && M.begin()->first < refpos )
				{
					HeapEntry & H = M.begin()->second;

					if ( outputprefix.size() && (streamRefId != prevrefid) )
					{
						if ( Pstream )
						{
							std::ostringstream fnostr;
							fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
							libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
							PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
							PFOS << Pstream->str() << '\n';

							Pstream.reset();
						}
						
						stream_ptr_type Tstream(new std::ostringstream);
						Pstream = Tstream;
						streamRefId = prevrefid;
					}

					if ( Pindex && (loadedRefId != prevrefid) )
					{
						refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname));
						loadedRefId = prevrefid;

						if ( Mconsacc.find(loadedRefId) == Mconsacc.end() )
							Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size());

						consacc = &(Mconsacc[loadedRefId]);
					}
					
					H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get());
					
					M.erase(M.begin());				
				}
			}
			
			for ( uint64_t ci = 0; ci < numcigop; ++ci )
			{
				uint64_t const ciglen = cigop[ci].second;
				
				switch ( cigop[ci].first )
				{
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CMATCH:
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CEQUAL:
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CDIFF:
					{
						if ( pendinginserts.size() )
						{
							M[refpos].I.push_back(pendinginserts);
							pendinginserts.resize(0);
						}
					
						for ( uint64_t i = 0; i < ciglen; ++i )
						{
							M[refpos].V.push_back(std::make_pair(bases[readpos],qual[readpos]));
							readpos++;
							refpos++;
						}
						break;
					}
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CINS:
					{
						for ( uint64_t i = 0; i < ciglen; ++i, ++readpos )
							pendinginserts.push_back(std::make_pair(bases[readpos],qual[readpos]));
						break;
					}
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CDEL:
						// handle pending inserts
						if ( pendinginserts.size() )
						{
							M[refpos].I.push_back(pendinginserts);
							pendinginserts.resize(0);
						}
						
						// deleting bases from the reference
						for ( uint64_t i = 0; i < ciglen; ++i, ++refpos )
							M[refpos].V.push_back(std::make_pair(padsym,0));
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CREF_SKIP:
						// handle pending inserts
						if ( pendinginserts.size() )
						{
							M[refpos].I.push_back(pendinginserts);
							pendinginserts.resize(0);
						}

						// skip bases on reference
						for ( uint64_t i = 0; i < ciglen; ++i )
						{
							refpos++;
						}
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CSOFT_CLIP:
						// skip bases on read
						for ( uint64_t i = 0; i < ciglen; ++i )
						{
							readpos++;
						}
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CHARD_CLIP:
						break;
					case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CPAD:
					{
						for ( uint64_t i = 0; i < ciglen; ++i, ++readpos )
							pendinginserts.push_back(std::make_pair(padsym,0));
						break;
					}
				}
			}

			if ( pendinginserts.size() )
			{
				M[refpos].I.push_back(pendinginserts);
				M[refpos].iadd++;
				pendinginserts.resize(0);
			}

			assert ( readpos == seqlen );
		}
		
		if ( verbose && ((++alcnt % (1024*1024)) == 0) )
			std::cerr << "[V] " << alcnt << std::endl;
	}

	while ( M.size() )
	{
		HeapEntry & H = M.begin()->second;

		if ( outputprefix.size() && (streamRefId != prevrefid) )
		{
			if ( Pstream )
			{
				std::ostringstream fnostr;
				fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
				libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
				PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
				PFOS << Pstream->str() << '\n';

				Pstream.reset();
			}
			
			stream_ptr_type Tstream(new std::ostringstream);
			Pstream = Tstream;
			streamRefId = prevrefid;
		}

		if ( Pindex && (loadedRefId != prevrefid) )
		{
			refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname));
			loadedRefId = prevrefid;

			if ( Mconsacc.find(loadedRefId) == Mconsacc.end() )
				Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size());

			consacc = &(Mconsacc[loadedRefId]);
		}
			
		H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get());
		
		M.erase(M.begin());
	}
	
	if ( Pstream )
	{
		std::ostringstream fnostr;
		fnostr << outputprefix << "_" << header.getRefIDName(streamRefId);
		libmaus::aio::PosixFdOutputStream PFOS(fnostr.str());
		PFOS << ">" << header.getRefIDName(streamRefId) << '\n';
		PFOS << Pstream->str() << '\n';

		Pstream.reset();
	}
	
	ConsensusAccuracy constotal;
	for ( std::map<uint64_t,ConsensusAccuracy>::const_iterator ita = Mconsacc.begin(); ita != Mconsacc.end(); ++ita )
	{
		std::cerr << header.getRefIDName(ita->first) << "\t" << ita->second << std::endl;

		std::map<uint64_t,uint64_t> const M = ita->second.depthhistogram.get();
		uint64_t total = 0;
		uint64_t preavg = 0;
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			total += aita->second;
			preavg += aita->first * aita->second;
		}

		uint64_t acc = 0;		
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << header.getRefIDName(ita->first) << "," << aita->first << ",+]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		acc = 0;
		for ( std::map<uint64_t,uint64_t>::const_reverse_iterator aita = M.rbegin(); aita != M.rend(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << header.getRefIDName(ita->first) << "," << aita->first << ",-]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		
		std::cerr << "H[" << header.getRefIDName(ita->first) << ",avg]\t" << 
			static_cast<double>(preavg)/total << std::endl;
		
		constotal += ita->second;
	}
	if ( Mconsacc.size() )
	{
		std::cerr << "all\t" << constotal << std::endl;

		std::map<uint64_t,uint64_t> const M = constotal.depthhistogram.get();
		uint64_t total = 0;
		uint64_t preavg = 0;
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			total += aita->second;
			preavg += aita->first * aita->second;
		}

		uint64_t acc = 0;		
		for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << "all" << "," << aita->first << ",+]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		acc = 0;
		for ( std::map<uint64_t,uint64_t>::const_reverse_iterator aita = M.rbegin(); aita != M.rend(); ++aita )
		{
			acc += aita->second;
			std::cerr << "H[" << "all" << "," << aita->first << ",-]"
				<< "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total
				<< "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl;
		}
		
		std::cerr << "H[all,avg]\t" << static_cast<double>(preavg) / total << std::endl;
		
	}

	return EXIT_SUCCESS;
}
			virtual DecompressorObject::unique_ptr_type operator()()
			{
				DecompressorObject::unique_ptr_type ptr(new ZlibDecompressorObject);
				return UNIQUE_PTR_MOVE(ptr);
			}
Esempio n. 29
0
			FastABgzfDecoder::unique_ptr_type getStream(std::istream & in, uint64_t const id) const
			{
				FastABgzfDecoder::unique_ptr_type Tptr(new FastABgzfDecoder(in,(*this)[id],blocksize));
				return UNIQUE_PTR_MOVE(Tptr);
			}
Esempio n. 30
0
			static libmaus::autoarray::AutoArray<libmaus::bambam::BamRange::unique_ptr_type> parse(std::string const & ranges, libmaus::bambam::BamHeader const & header)
			{
				std::vector < std::string > const outertokens = splitSpace(ranges);
				libmaus::autoarray::AutoArray<libmaus::bambam::BamRange::unique_ptr_type> A(outertokens.size());

				for ( uint64_t i = 0; i < outertokens.size(); ++i )
				{
					std::string const & outertoken = outertokens[i];
					
					uint64_t sempos = outertoken.size();
					
					for ( uint64_t j = 0; j < outertoken.size(); ++j )
						if ( outertoken[j] == ':' )
							sempos = j;
							
					if ( sempos == outertoken.size() )
					{
						libmaus::bambam::BamRange::unique_ptr_type tAi(new libmaus::bambam::BamRangeChromosome(outertoken,header));
						A[i] = UNIQUE_PTR_MOVE(tAi);
					}
					else
					{
						std::string const refname = outertoken.substr(0,sempos);
						std::string const rest = outertoken.substr(sempos+1);
						
						// std::cerr << "refname=" << refname << " rest=" << rest << std::endl;
						
						uint64_t dashpos = rest.size();
						
						for ( uint64_t j = 0; j < rest.size(); ++j )
							if ( rest[j] == '-' )
								dashpos = j;
								
						if ( dashpos == rest.size() )
						{
							int64_t num = 0;
							for ( uint64_t j = 0; j < rest.size(); ++j )
								if ( isdigit(rest[j]) )
								{
									num *= 10;
									num += rest[j]-'0';
								}
								else if ( rest[j] == ',' )
								{
								
								}
								else
								{
									libmaus::exception::LibMausException se;
									se.getStream() << "Found invalid range character in " << rest << std::endl;
									se.finish();
									throw se;
								}
								
							libmaus::bambam::BamRange::unique_ptr_type tAi(new libmaus::bambam::BamRangeHalfOpen(refname,num-1,header));
							A[i] = UNIQUE_PTR_MOVE(tAi);
						}
						else
						{
							std::string const sstart = rest.substr(0,dashpos);
							std::string const send = rest.substr(dashpos+1);
						
							int64_t start = 0;
							for ( uint64_t j = 0; j < sstart.size(); ++j )
								if ( isdigit(sstart[j]) )
								{
									start *= 10;
									start += sstart[j]-'0';
								}
								else if ( sstart[j] == ',' )
								{
								
								}
								else
								{
									libmaus::exception::LibMausException se;
									se.getStream() << "Found invalid range character in " << sstart << std::endl;
									se.finish();
									throw se;
								}
							int64_t end = 0;
							for ( uint64_t j = 0; j < send.size(); ++j )
								if ( isdigit(send[j]) )
								{
									end *= 10;
									end += send[j]-'0';
								}
								else if ( send[j] == ',' )
								{
								
								}
								else
								{
									libmaus::exception::LibMausException se;
									se.getStream() << "Found invalid range character in " << send << std::endl;
									se.finish();
									throw se;
								}
								
							libmaus::bambam::BamRange::unique_ptr_type tAi(new libmaus::bambam::BamRangeInterval(refname,start-1,end,header));
							A[i] = UNIQUE_PTR_MOVE(tAi);						
						}
					}
				}
				
				return A;
			}