int32_t elog::registerInstance(const std::string& _name) { for (size_t iii = 0; iii < getList().size(); ++iii) { if (getList()[iii].first == _name) { return iii; } } getList().push_back(std::make_pair(_name, getDefaultLevel())); if (_name.size() >= getNameSizeLog()) { getNameSizeLog() = _name.size()+1; } //std::cout << "register log : '" << _name << "'=" << getList().size()-1 << std::endl; return getList().size()-1; }
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( "colhashbits=<["+::biobambam::Licensing::formatNumber(getDefaultColHashBits())+"]>", "log_2 of size of hash table used for collation" ) ); V.push_back ( std::pair<std::string,std::string> ( "collistsize=<["+::biobambam::Licensing::formatNumber(getDefaultColListSize())+"]>", "output list size for collation" ) ); V.push_back ( std::pair<std::string,std::string> ( "tmpfile=<filename>", "prefix for temporary files, default: create files in current directory" ) ); V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam::Licensing::formatNumber(getDefaultLevel())+"]>", "compression settings for output bam file (0=uncompressed,1=fast,9=best,-1=zlib default)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum (default: extend output file name)" ) ); V.push_back ( std::pair<std::string,std::string> ( "index=<["+::biobambam::Licensing::formatNumber(getDefaultIndex())+"]>", "create BAM index (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "indexfilename=<filename>", "file name for BAM index file (default: extend output file name)" ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamfixmatecoordinates(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam::Licensing::formatNumber(getDefaultLevel())+"]>", "compression settings for output bam file (0=uncompressed,1=fast,9=best,-1=zlib default)" ) ); V.push_back ( std::pair<std::string,std::string> ( "SO=<["+getDefaultSortOrder()+"]>", "sorting order (coordinate or queryname)" ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( "blockmb=<["+::biobambam::Licensing::formatNumber(getDefaultBlockSize())+"]>", "size of internal memory buffer used for sorting in MiB" ) ); V.push_back ( std::pair<std::string,std::string> ( "disablevalidation=<["+::biobambam::Licensing::formatNumber(getDefaultDisableValidation())+"]>", "disable input validation (default is 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "tmpfile=<filename>", "prefix for temporary files, default: create files in current directory" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum" ) ); V.push_back ( std::pair<std::string,std::string> ( "index=<["+::biobambam::Licensing::formatNumber(getDefaultIndex())+"]>", "create BAM index (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "indexfilename=<filename>", "file name for BAM index file" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("inputformat=<[")+getDefaultInputFormat()+"]>", std::string("input format (") + libmaus::bambam::BamMultiAlignmentDecoderFactory::getValidInputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("outputformat=<[")+libmaus::bambam::BamBlockWriterBaseFactory::getDefaultOutputFormat()+"]>", std::string("output format (") + libmaus::bambam::BamBlockWriterBaseFactory::getValidOutputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( "I=<[stdin]>", "input filename (standard input if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( "inputthreads=<[1]>", "input helper threads (for inputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "reference=<>", "reference FastA (.fai file required, for cram i/o only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "range=<>", "coordinate range to be processed (for coordinate sorted indexed BAM input only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "outputthreads=<[1]>", "output helper threads (for outputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "O=<[stdout]>", "output filename (standard output if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("fixmates=<[")+::biobambam::Licensing::formatNumber(getDefaultFixMates())+"]>", "fix mate information (for name collated input only, disabled by default)" ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamsort(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { ::libmaus2::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam2::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam2::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam2::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus2::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam2::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress information" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam2::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum (default: extend output file name)" ) ); V.push_back ( std::pair<std::string,std::string> ( "index=<["+::biobambam2::Licensing::formatNumber(getDefaultIndex())+"]>", "create BAM index (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "indexfilename=<filename>", "file name for BAM index file (default: extend output file name)" ) ); V.push_back ( std::pair<std::string,std::string> ( "tmpfile=<filename>", "prefix for temporary files, default: create files in current directory" ) ); ::biobambam2::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamclipreinsert(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { libmaus::timing::RealTimeClock rtc; rtc.start(); ::libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license() << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( std::string("cols=<[")+libmaus::util::NumberSerialisation::formatNumber(getDefaultCols(),0)+"]>", "column width" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("bgzf=<[")+libmaus::util::NumberSerialisation::formatNumber(getDefaultBgzf(),0)+"]>", "compress output" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("index=<>"), "file name for index if bgzf=1 (no index is created if key is not given)" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("level=<[")+::biobambam::Licensing::formatNumber(getDefaultLevel())+"]>", std::string("compression level if bgzf=1 (") + libmaus::bambam::BamBlockWriterBaseFactory::getLevelHelpText() + std::string(")") ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } normalisefasta(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( "I=<[input filename]>", "name of the input file (mandatory)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum (default: extend output file name)" ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamfilterheader2(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "minmapped=<["+::biobambam::Licensing::formatNumber(getDefaultMinMapped())+"]>", "minimum number of mapped fragments in a template" ) ); V.push_back ( std::pair<std::string,std::string> ( "maxmapped=<["+::biobambam::Licensing::formatNumber(getDefaultMaxMapped())+"]>", "maximum number of mapped fragments in a template" ) ); V.push_back ( std::pair<std::string,std::string> ( "minlen=<["+::biobambam::Licensing::formatNumber(getDefaultMinLen())+"]>", "minimum sequence length" ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report (default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum (default: extend output file name)" ) ); V.push_back ( std::pair<std::string,std::string> ( "index=<["+::biobambam::Licensing::formatNumber(getDefaultIndex())+"]>", "create BAM index (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "indexfilename=<filename>", "file name for BAM index file (default: extend output file name)" ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamfilter(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int bamreset(::libmaus2::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); int const resetsortorder = arginfo.getValue<int>("resetsortorder",getDefaultResetSortOrder()); ::libmaus2::bambam::BamDecoder dec(std::cin,false); ::libmaus2::bambam::BamHeader const & header = dec.getHeader(); std::string headertext = header.text; // no replacement header file given if ( ! arginfo.hasArg("resetheadertext") ) { // remove SQ lines std::vector<libmaus2::bambam::HeaderLine> allheaderlines = libmaus2::bambam::HeaderLine::extractLines(headertext); std::ostringstream upheadstr; for ( uint64_t i = 0; i < allheaderlines.size(); ++i ) if ( allheaderlines[i].type != "SQ" ) upheadstr << allheaderlines[i].line << std::endl; headertext = upheadstr.str(); } // replace header given in file else { std::string const headerfilename = arginfo.getUnparsedValue("resetheadertext",""); uint64_t const headerlen = libmaus2::util::GetFileSize::getFileSize(headerfilename); libmaus2::aio::CheckedInputStream CIS(headerfilename); libmaus2::autoarray::AutoArray<char> ctext(headerlen,false); CIS.read(ctext.begin(),headerlen); headertext = std::string(ctext.begin(),ctext.end()); } // add PG line to header headertext = libmaus2::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bamreset", // ID "bamreset", // PN arginfo.commandline, // CL ::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header libmaus2::bambam::BamHeader uphead(headertext); if ( resetsortorder ) uphead.changeSortOrder("unknown"); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex); uint32_t const excludeflags = libmaus2::bambam::BamFlagBase::stringToFlags( arginfo.getValue<std::string>("exclude",getDefaultExcludeFlags())); std::string md5filename; std::string indexfilename; std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs)); libmaus2::timing::RealTimeClock rtc; rtc.start(); libmaus2::bambam::BamAlignment & algn = dec.getAlignment(); uint64_t c = 0; bool const resetaux = arginfo.getValue<int>("resetaux",getDefaultResetAux()); libmaus2::bambam::BamAuxFilterVector::unique_ptr_type const prgfilter(libmaus2::bambam::BamAuxFilterVector::parseAuxFilterList(arginfo)); libmaus2::bambam::BamAuxFilterVector const * rgfilter = prgfilter.get(); while ( dec.readAlignment() ) { bool const keep = resetAlignment(algn,resetaux /* reset aux */,excludeflags,rgfilter); if ( keep ) algn.serialise(writer->getStream()); if ( verbose && (++c & (1024*1024-1)) == 0 ) std::cerr << "[V] " << c/(1024*1024) << " " << (c / rtc.getElapsedSeconds()) << std::endl; } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int bam12split(::libmaus::util::ArgInfo const & arginfo) { ::libmaus::util::TempFileRemovalContainer::setup(); if ( isatty(STDIN_FILENO) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } if ( arginfo.hasArg("keep") && arginfo.hasArg("remove") ) { ::libmaus::exception::LibMausException se; se.getStream() << "The keep and remove keys are mutually exclusive." << std::endl; se.finish(); throw se; } int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); ::libmaus::bambam::BamDecoder dec(std::cin,false); ::libmaus::bambam::BamHeader const & header = dec.getHeader(); std::string const headertext(header.text); // add PG line to header std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bam12split", // ID "bam12split", // PN arginfo.commandline, // CL ::libmaus::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header libmaus::bambam::BamHeader uphead(upheadtext); uphead.changeSortOrder("unknown"); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,uphead,level,Pcbs)); libmaus::bambam::BamAlignment & algn = dec.getAlignment(); uint64_t c = 0; while ( dec.readAlignment() ) { bool const ok = split12(algn); if ( ok ) algn.serialise(writer->getStream()); if ( verbose && (++c & (1024*1024-1)) == 0 ) std::cerr << "[V] " << c/(1024*1024) << std::endl; } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int bammaskflags(::libmaus2::util::ArgInfo const & arginfo) { uint64_t const maskpos = arginfo.getValue<uint64_t>("maskpos",0xFFFFUL); uint64_t const maskneg = arginfo.getValue<uint64_t>("maskneg",getDefaultMaskNeg()); uint64_t const mask = maskpos & (~maskneg); if ( mask ) { std::cerr << "Keeping flags "; for ( uint64_t i = 1; i <= ::libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FSUPPLEMENTARY; i <<= 1 ) if ( mask & i ) std::cerr << static_cast< ::libmaus2::bambam::BamFlagBase::bam_flags >(i) << ";"; std::cerr << std::endl; std::cerr << "Erasing flags "; for ( uint64_t i = 1; i <= ::libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FSUPPLEMENTARY; i <<= 1 ) if ( !(mask & i) ) std::cerr << static_cast< ::libmaus2::bambam::BamFlagBase::bam_flags >(i) << ";"; std::cerr << std::endl; } else { std::cerr << "Erasing all flags." << std::endl; } int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const resetmatecoord = arginfo.getValue<int>("resetmatecoord",0); ::libmaus2::bambam::BamDecoder BD(std::cin); ::libmaus2::bambam::BamHeader const & bamheader = BD.getHeader(); std::string const headertext(bamheader.text); // add PG line to header std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bammaskflags", // ID "bammaskflags", // PN arginfo.commandline, // CL ::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header ::libmaus2::bambam::BamHeader uphead(upheadtext); ::libmaus2::bambam::BamAlignment & alignment = BD.getAlignment(); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs)); while ( BD.readAlignment() ) { alignment.putFlags(alignment.getFlags() & mask); if ( resetmatecoord ) { alignment.putNextRefId(-1); alignment.putNextPos(-1); } alignment.serialise(writer->getStream()); } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int main(int argc, char * argv[]) { try { ::libmaus2::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam2::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam2::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam2::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus2::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam2::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress information" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam2::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum" ) ); V.push_back ( std::pair<std::string,std::string> ( "resetheadertext=[<>]", "replacement SAM header text file (default: filter header in source BAM file)" ) ); V.push_back ( std::pair<std::string,std::string> ( "exclude=["+getDefaultExcludeFlags()+"]", "drop alignments having any of the given flags set" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("resetaux=<[")+::biobambam2::Licensing::formatNumber(getDefaultResetAux())+"]>", "reset auxiliary fields" ) ); V.push_back ( std::pair<std::string,std::string> ( "auxfilter=[<>]", "comma separated list of aux tags to keep if resetaux=0 (default: keep all)" ) ); V.push_back ( std::pair<std::string,std::string> ( "resetsortorder=<["+::biobambam2::Licensing::formatNumber(getDefaultResetSortOrder())+"]>", "set sort order to unknown (default: 1)" ) ); ::biobambam2::Licensing::printMap(std::cerr,V); std::cerr << std::endl; std::cerr << "Alignment flags: PAIRED,PROPER_PAIR,UNMAP,MUNMAP,REVERSE,MREVERSE,READ1,READ2,SECONDARY,QCFAIL,DUP,SUPPLEMENTARY" << std::endl; return EXIT_SUCCESS; } return bamreset(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { ::libmaus2::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam2::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam2::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam2::Licensing::formatNumber(getDefaultVerbose())+"]>", "print stats at the end of a successfull run" ) ); V.push_back ( std::pair<std::string,std::string> ( "basequalhist=<["+::biobambam2::Licensing::formatNumber(getDefaultBaseQualHist())+"]>", "print base quality histogram at end of a successfull run" ) ); V.push_back ( std::pair<std::string,std::string> ( "passthrough=<["+::biobambam2::Licensing::formatNumber(getDefaultPassThrough())+"]>", "write alignments to standard output (default: do not pass through)" ) ); V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam2::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus2::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "tmpfile=<filename>", "prefix for temporary files, default: create files in current directory (passthrough=1, index=1 only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam2::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0, passthrough=1 only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum" ) ); V.push_back ( std::pair<std::string,std::string> ( "index=<["+::biobambam2::Licensing::formatNumber(getDefaultIndex())+"]>", "create BAM index (default: 0, passthrough=1 only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "indexfilename=<filename>", "file name for BAM index file" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("inputformat=<[")+getDefaultInputFormat()+"]>", std::string("input format (") + libmaus2::bambam::BamMultiAlignmentDecoderFactory::getValidInputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("outputformat=<[")+libmaus2::bambam::BamBlockWriterBaseFactory::getDefaultOutputFormat()+"]>", std::string("output format (") + libmaus2::bambam::BamBlockWriterBaseFactory::getValidOutputFormats() + ", passthrough=1 only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "I=<[stdin]>", "input filename (standard input if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( "inputthreads=<[1]>", "input helper threads (for inputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "reference=<>", "reference FastA (.fai file required, for cram i/o only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "range=<>", "coordinate range to be processed (for coordinate sorted indexed BAM input only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "outputthreads=<[1]>", "output helper threads (for outputformat=bam only, default: 1, passthrough=1 only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "O=<[stdout]>", "output filename (standard output if unset, passthrough=1 only)" ) ); ::biobambam2::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamvalidate(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { libmaus2::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam2::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam2::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam2::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus2::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam2::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("inputformat=<[")+getDefaultInputFormat()+"]>", std::string("input format (") + libmaus2::bambam::BamMultiAlignmentDecoderFactory::getValidInputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("outputformat=<[")+libmaus2::bambam::BamBlockWriterBaseFactory::getDefaultOutputFormat()+"]>", std::string("output format (") + libmaus2::bambam::BamBlockWriterBaseFactory::getValidOutputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( "I=<[stdin]>", "input filename (standard input if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( "inputthreads=<[1]>", "input helper threads (for inputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "reference=<>", "reference FastA (.fai file required, for cram i/o only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "range=<>", "coordinate range to be processed (for coordinate sorted indexed BAM input only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "outputthreads=<[1]>", "output helper threads (for outputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "O=<[stdout]>", "output filename (standard output if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("prefix=<[")+getDefaultPrefix()+"]>", "prefix of output file names" ) ); V.push_back ( std::pair<std::string,std::string> ( "thres=<["+::biobambam2::Licensing::formatNumber(getDefaultSizeThres())+"]>", "size threshold for the creation of next file" ) ); ::biobambam2::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamexplode(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int bamsort(::libmaus::util::ArgInfo const & arginfo) { ::libmaus::util::TempFileRemovalContainer::setup(); bool const inputisstdin = (!arginfo.hasArg("I")) || (arginfo.getUnparsedValue("I","-") == "-"); bool const outputisstdout = (!arginfo.hasArg("O")) || (arginfo.getUnparsedValue("O","-") == "-"); if ( isatty(STDIN_FILENO) && inputisstdin && (arginfo.getValue<std::string>("inputformat","bam") != "sam") ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) && outputisstdout && (arginfo.getValue<std::string>("outputformat","bam") != "sam") ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); bool const disablevalidation = arginfo.getValue<int>("disablevalidation",getDefaultDisableValidation()); std::string const inputformat = arginfo.getUnparsedValue("inputformat",getDefaultInputFormat()); int const level = arginfo.getValue<int>("level",getDefaultLevel()); switch ( level ) { case Z_NO_COMPRESSION: case Z_BEST_SPEED: case Z_BEST_COMPRESSION: case Z_DEFAULT_COMPRESSION: break; default: { ::libmaus::exception::LibMausException se; se.getStream() << "Unknown compression level, please use" << " level=" << Z_DEFAULT_COMPRESSION << " (default) or" << " level=" << Z_BEST_SPEED << " (fast) or" << " level=" << Z_BEST_COMPRESSION << " (best) or" << " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl; se.finish(); throw se; } break; } // prefix for tmp files std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfilenameout = tmpfilenamebase + "_bamsort"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilenameout); uint64_t blockmem = arginfo.getValue<uint64_t>("blockmb",getDefaultBlockSize())*1024*1024; std::string const sortorder = arginfo.getValue<std::string>("SO","coordinate"); bool const fixmates = arginfo.getValue<int>("fixmates",getDefaultFixMates()); uint64_t sortthreads = arginfo.getValue<uint64_t>("sortthreads",getDefaultSortThreads()); // input decoder wrapper libmaus::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper( libmaus::bambam::BamMultiAlignmentDecoderFactory::construct( arginfo,false // put rank ) ); ::libmaus::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder()); ::libmaus::bambam::BamAlignmentDecoder & dec = *ppdec; if ( disablevalidation ) dec.disableValidation(); ::libmaus::bambam::BamHeader const & header = dec.getHeader(); std::string const headertext(header.text); // add PG line to header std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bamsort", // ID "bamsort", // PN arginfo.commandline, // CL ::libmaus::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header ::libmaus::bambam::BamHeader uphead(upheadtext); /* * start index/md5 callbacks */ std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ if ( sortorder != "queryname" ) uphead.changeSortOrder("coordinate"); else uphead.changeSortOrder("queryname"); libmaus::bambam::BamBlockWriterBase::unique_ptr_type Pout ( libmaus::bambam::BamBlockWriterBaseFactory::construct(uphead, arginfo, Pcbs) ); if ( fixmates ) { if ( sortorder != "queryname" ) { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentPosComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; // current alignment libmaus::bambam::BamAlignment & curalgn = dec.getAlignment(); // previous alignment libmaus::bambam::BamAlignment prevalgn; // previous alignment valid bool prevalgnvalid = false; // MQ field filter libmaus::bambam::BamAuxFilterVector MQfilter; MQfilter.set("MQ"); while ( dec.readAlignment() ) { if ( curalgn.isSecondary() || curalgn.isSupplementary() ) { BEC.putAlignment(curalgn); } else if ( prevalgnvalid ) { // different name if ( strcmp(curalgn.getName(),prevalgn.getName()) ) { BEC.putAlignment(prevalgn); curalgn.swap(prevalgn); } // same name else { libmaus::bambam::BamAlignment::fixMateInformation(prevalgn,curalgn,MQfilter); BEC.putAlignment(prevalgn); BEC.putAlignment(curalgn); prevalgnvalid = false; } } else { prevalgn.swap(curalgn); prevalgnvalid = true; } if ( verbose && ( ( ++incnt & ((1ull<<20)-1) ) == 0 ) ) std::cerr << "[V] " << incnt << std::endl; } if ( prevalgnvalid ) { BEC.putAlignment(prevalgn); prevalgnvalid = false; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } else { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentNameComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; // current alignment libmaus::bambam::BamAlignment & curalgn = dec.getAlignment(); // previous alignment libmaus::bambam::BamAlignment prevalgn; // previous alignment valid bool prevalgnvalid = false; // MQ field filter libmaus::bambam::BamAuxFilterVector MQfilter; MQfilter.set("MQ"); while ( dec.readAlignment() ) { if ( curalgn.isSecondary() || curalgn.isSupplementary() ) { BEC.putAlignment(curalgn); } else if ( prevalgnvalid ) { // different name if ( strcmp(curalgn.getName(),prevalgn.getName()) ) { BEC.putAlignment(prevalgn); curalgn.swap(prevalgn); } // same name else { libmaus::bambam::BamAlignment::fixMateInformation(prevalgn,curalgn,MQfilter); BEC.putAlignment(prevalgn); BEC.putAlignment(curalgn); prevalgnvalid = false; } } else { prevalgn.swap(curalgn); prevalgnvalid = true; } if ( verbose && ( ( ++incnt & ((1ull<<20)-1) ) == 0 ) ) std::cerr << "[V] " << incnt << std::endl; } if ( prevalgnvalid ) { BEC.putAlignment(prevalgn); prevalgnvalid = false; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } } else { if ( sortorder != "queryname" ) { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentPosComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; while ( dec.readAlignment() ) { BEC.putAlignment(dec.getAlignment()); incnt++; if ( verbose && (incnt % (1024*1024) == 0) ) std::cerr << "[V] " << incnt/(1024*1024) << "M" << std::endl; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } else { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentNameComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; while ( dec.readAlignment() ) { BEC.putAlignment(dec.getAlignment()); incnt++; if ( verbose && (incnt % (1024*1024) == 0) ) std::cerr << "[V] " << incnt/(1024*1024) << "M" << std::endl; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } } // flush encoder so callbacks see all output data Pout.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
void elog::setLevel(enum level _level) { getDefaultLevel() = _level; for (size_t iii = 0; iii < getList().size(); ++iii) { getList()[iii].second = _level; } }
int bamcat(libmaus::util::ArgInfo const & arginfo) { if ( isatty(STDOUT_FILENO) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } int const level = arginfo.getValue<int>("level",getDefaultLevel()); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); switch ( level ) { case Z_NO_COMPRESSION: case Z_BEST_SPEED: case Z_BEST_COMPRESSION: case Z_DEFAULT_COMPRESSION: break; default: { ::libmaus::exception::LibMausException se; se.getStream() << "Unknown compression level, please use" << " level=" << Z_DEFAULT_COMPRESSION << " (default) or" << " level=" << Z_BEST_SPEED << " (fast) or" << " level=" << Z_BEST_COMPRESSION << " (best) or" << " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl; se.finish(); throw se; } break; } std::vector<std::string> inputfilenames = arginfo.getPairValues("I"); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) inputfilenames.push_back(arginfo.restargs[i]); libmaus::bambam::BamCat bamdec(inputfilenames /* ,true */); libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,*uphead,level,Pcbs)); libmaus::bambam::BamWriter::stream_type & bamoutstr = writer->getStream(); if ( verbose ) { uint64_t c = 0; while ( bamdec.readAlignment() ) { algn.serialise(bamoutstr); if ( ((++c) & ((1ull<<20)-1)) == 0 ) std::cerr << "[V] " << c << std::endl; } std::cerr << "[V] " << c << std::endl; } else while ( bamdec.readAlignment() ) algn.serialise(bamoutstr); writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int bamfixmatecoordinatesnamesorted(::libmaus::util::ArgInfo const & arginfo) { bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose()); ::libmaus::timing::RealTimeClock rtc; rtc.start(); // gzip compression level for output int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); ::libmaus::bambam::BamDecoder bamfile(std::cin); std::string const headertext(bamfile.getHeader().text); // add PG line to header std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bamfixmatecoordinatesnamesorted", // ID "bamfixmatecoordinatesnamesorted", // PN arginfo.commandline, // CL ::libmaus::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header ::libmaus::bambam::BamHeader uphead(upheadtext); if ( uphead.getSortOrder() != "queryname" ) uphead.changeSortOrder("unknown"); std::string const & finalheadtext = uphead.text; ::libmaus::bambam::BamHeader finalheader(finalheadtext); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,finalheader,level,Pcbs)); std::pair< std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool> , std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool> > P(std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool>(::libmaus::bambam::BamAlignment::shared_ptr_type(),false),std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool>(::libmaus::bambam::BamAlignment::shared_ptr_type(),false)); // try to read two alignments P.first.second = bamfile.readAlignment(); if ( P.first.second ) { P.first.first = bamfile.salignment(); P.second.second = P.first.second && bamfile.readAlignment(); P.second.first = bamfile.salignment(); } uint64_t single = 0, pairs = 0; uint64_t proc = 0; uint64_t lastproc = 0; uint64_t const mod = 1024*1024; // while we have two alignments while ( P.first.second && P.second.second ) { uint32_t const aflags = P.first.first->getFlags(); uint32_t const bflags = P.second.first->getFlags(); // same name? if ( (aflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPAIRED) && (bflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPAIRED) && (! strcmp(P.first.first->getName(),P.second.first->getName())) ) { unsigned int const amap = (aflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP) ? 0 : 1; unsigned int const bmap = (bflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP) ? 0 : 1; // std::cerr << "Pair " << bam1_qname(P.first.first->alignment) << " amap=" << amap << " bmap=" << bmap << std::endl; // if exactly one of the two is mapped if ( amap + bmap == 1 ) { ::libmaus::bambam::BamAlignment::shared_ptr_type mapped = amap ? P.first.first : P.second.first; int64_t const tid = mapped->getRefID(); int64_t const pos = mapped->getPos(); // std::cerr << "tid=" << tid << " pos=" << pos << std::endl; // set all tid and pos values P.first.first->putRefId(tid); P.first.first->putPos(pos); P.first.first->putNextRefId(tid); P.first.first->putNextPos(pos); P.second.first->putRefId(tid); P.second.first->putPos(pos); P.second.first->putNextRefId(tid); P.second.first->putNextPos(pos); } // write alignments P.first.first->serialise(writer->getStream()); P.second.first->serialise(writer->getStream()); // read new alignments P.first.second = bamfile.readAlignment(); if ( P.first.second ) { P.first.first = bamfile.salignment(); P.second.second = bamfile.readAlignment(); P.second.first = bamfile.salignment(); } pairs++; proc += 2; } // different names else { // write first alignment P.first.first->serialise(writer->getStream()); // move second to first std::swap(P.first,P.second); // read new second P.second.second = P.first.second && bamfile.readAlignment(); if ( P.second.second ) P.second.first = bamfile.salignment(); single++; proc += 1; } if ( verbose && (proc/mod != lastproc/mod) ) { std::cerr << proc << "\t" << single << "\t" << pairs << "\t" << proc/rtc.getElapsedSeconds() << "al/s" << std::endl; lastproc = proc; } } if ( P.first.second ) { P.first.first->serialise(writer->getStream()); single++; proc += 1; } if ( verbose ) std::cerr << proc << "\t" << single << "\t" << pairs << "\t" << proc/rtc.getElapsedSeconds() << "al/s" << std::endl; assert ( ! P.second.second ); writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int main(int argc, char * argv[]) { try { ::libmaus2::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam2::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam2::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam2::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus2::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam2::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( "mod=<["+::biobambam2::Licensing::formatNumber(getDefaultMod())+"]>", "print progress for every mod'th alignment if verbose" ) ); V.push_back ( std::pair<std::string,std::string> ( "ranksplit=<["+::biobambam2::Licensing::formatNumber(getDefaultRankSplit())+"]>", "split rank pairs in names (see bam12split command)" ) ); V.push_back ( std::pair<std::string,std::string> ( "rankstrip=<["+::biobambam2::Licensing::formatNumber(getDefaultRankStrip())+"]>", "strip ranks of names (see bam12strip command)" ) ); V.push_back ( std::pair<std::string,std::string> ( "clipreinsert=<["+::biobambam2::Licensing::formatNumber(getDefaultClipReinsert())+"]>", "reinsert clipped sequence fragments (see bamclipreinsert command)" ) ); V.push_back ( std::pair<std::string,std::string> ( "zztoname=<["+::biobambam2::Licensing::formatNumber(getDefaultZZToName())+"]>", "move rank from zz to name aux field to name (see bamzztoname command)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam2::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum (default: extend output file name)" ) ); V.push_back ( std::pair<std::string,std::string> ( "index=<["+::biobambam2::Licensing::formatNumber(getDefaultIndex())+"]>", "create BAM index (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "indexfilename=<filename>", "file name for BAM index file (default: extend output file name)" ) ); V.push_back ( std::pair<std::string,std::string> ( "tmpfile=<filename>", "prefix for temporary files, default: create files in current directory" ) ); V.push_back ( std::pair<std::string,std::string> ( "sanity=<["+::biobambam2::Licensing::formatNumber(getDefaultSanity())+"]>", "extra checking of reads" ) ); ::biobambam2::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bam12auxmerge(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
static void filterBamUsedSequences( libmaus::util::ArgInfo const & arginfo, std::istream & in, ::libmaus::bitio::IndexedBitVector const & IBV, std::ostream & out ) { libmaus::lz::BgzfInflateStream bgzfin(in); libmaus::bambam::BamHeaderLowMem::unique_ptr_type PBHLM ( libmaus::bambam::BamHeaderLowMem::constructFromBAM(bgzfin)); bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose()); std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; std::string md5filename; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); libmaus::lz::BgzfDeflate<std::ostream>::unique_ptr_type Pbgzfout( new libmaus::lz::BgzfDeflate<std::ostream>( out,level ) ); libmaus::lz::BgzfDeflate<std::ostream> & bgzfout = *Pbgzfout; if ( verbose ) std::cerr << "[V] writing filtered header..."; PBHLM->serialiseSequenceSubset(bgzfout,IBV,"bamfilterheader2" /* id */,"bamfilterheader2" /* pn */, arginfo.commandline /* pgCL */, PACKAGE_VERSION /* pgVN */ ); if ( verbose ) std::cerr << "done." << std::endl; ::libmaus::bambam::BamAlignment algn; uint64_t c = 0; while ( libmaus::bambam::BamAlignmentDecoder::readAlignmentGz(bgzfin,algn) ) { if ( algn.isMapped() ) { int64_t const refid = algn.getRefID(); assert ( refid >= 0 ); assert ( IBV.get(refid) ); algn.putRefId(IBV.rank1(refid)-1); } else { algn.putRefId(-1); } if ( algn.isPaired() && algn.isMapped() ) { int64_t const refid = algn.getNextRefID(); assert ( refid >= 0 ); assert ( IBV.get(refid) ); algn.putNextRefId(IBV.rank1(refid)-1); } else { algn.putNextRefId(-1); } algn.serialise(bgzfout); if ( verbose && ( ((++c) & (1024*1024-1)) == 0 ) ) std::cerr << "[V] " << c/(1024*1024) << std::endl; } bgzfout.flush(); bgzfout.addEOFBlock(); Pbgzfout.reset(); if ( Pmd5cb ) Pmd5cb->saveDigestAsFile(md5filename); }
int bam12auxmerge(::libmaus2::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } std::string const prefilename = arginfo.getRestArg<std::string>(0); libmaus2::bambam::BamDecoder bampredec(prefilename); int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); int const ranksplit = arginfo.getValue<int>("ranksplit",getDefaultRankSplit()); int const rankstrip = arginfo.getValue<int>("rankstrip",getDefaultRankSplit()); int const clipreinsert = arginfo.getValue<int>("clipreinsert",getDefaultClipReinsert()); int const zztoname = arginfo.getValue<int>("zztoname",getDefaultZZToName()); int const sanity = arginfo.getValue<int>("sanity",getDefaultSanity()); uint64_t const mod = arginfo.getValue<int>("mod",getDefaultMod()); uint64_t const bmod = libmaus2::math::nextTwoPow(mod); uint64_t const bmask = bmod-1; libmaus2::autoarray::AutoArray<char> Aread; ::libmaus2::bambam::BamDecoder bamdec(std::cin,false); ::libmaus2::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus2::bambam::BamHeader const & preheader = bampredec.getHeader(); std::string const headertext(header.text); std::string const preheadertext(libmaus2::bambam::HeaderLine::removeSequenceLines(preheader.text)); libmaus2::bambam::ProgramHeaderLineSet headerlines(headertext); libmaus2::bambam::ProgramHeaderLineSet preheaderlines(preheadertext); std::vector<libmaus2::bambam::HeaderLine> allheaderlines = libmaus2::bambam::HeaderLine::extractLines(headertext); std::string const lastid = preheaderlines.getLastIdInChain(); std::stack < std::pair<uint64_t,std::string> > pgtodo; for ( uint64_t i = 0; i < headerlines.roots.size(); ++i ) pgtodo.push(std::pair<uint64_t,std::string>(headerlines.roots[i],lastid)); std::string upheadtext = preheadertext; while ( pgtodo.size() ) { uint64_t const hid = pgtodo.top().first; std::string const PP = pgtodo.top().second; pgtodo.pop(); libmaus2::bambam::HeaderLine const & line = headerlines.lines[hid]; // ID, PP, PN, CL, VN std::string ID = (line.M.find("ID") != line.M.end()) ? line.M.find("ID")->second : ""; std::string const PN = (line.M.find("PN") != line.M.end()) ? line.M.find("PN")->second : ""; std::string const CL = (line.M.find("CL") != line.M.end()) ? line.M.find("CL")->second : ""; std::string const VN = (line.M.find("VN") != line.M.end()) ? line.M.find("VN")->second : ""; upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLineRef( upheadtext, ID, PN, CL, PP, VN ); if ( headerlines.edges.find(hid) != headerlines.edges.end() ) { std::vector<uint64_t> const & children = headerlines.edges.find(hid)->second; for ( uint64_t j = 0; j < children.size(); ++j ) pgtodo.push(std::pair<uint64_t,std::string>(children[j],ID)); } } /* copy SQ lines */ std::ostringstream sqconcstr; sqconcstr << upheadtext; for ( uint64_t i = 0; i < allheaderlines.size(); ++i ) if ( allheaderlines[i].type == "SQ" ) sqconcstr << allheaderlines[i].line << "\n"; upheadtext = sqconcstr.str(); ::libmaus2::bambam::BamHeader uphead(upheadtext); uphead.changeSortOrder("unknown"); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs)); ::libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment(); ::libmaus2::bambam::BamAlignment & prealgn = bampredec.getAlignment(); int64_t curid = -1; libmaus2::autoarray::AutoArray< std::pair<uint8_t,uint8_t> > auxpre; libmaus2::autoarray::AutoArray< std::pair<uint8_t,uint8_t> > auxnew; libmaus2::bambam::BamAuxFilterVector auxfilter; // helpers for clipReinsert libmaus2::autoarray::AutoArray < std::pair<uint8_t,uint8_t> > auxtags; libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop; std::stack < libmaus2::bambam::cigar_operation > hardstack; libmaus2::bambam::BamAlignment::D_array_type Tcigar; libmaus2::bambam::BamAuxFilterVector bafv; libmaus2::bambam::BamAuxFilterVector auxfilterout; auxfilterout.set('q','s'); auxfilterout.set('q','q'); // helpers for zztoname libmaus2::bambam::BamAuxFilterVector zzbafv; zzbafv.set('z','z'); // tag filters for secondary/supplementary reads libmaus2::bambam::BamAuxFilterVector auxfiltersec; auxfiltersec.set('q','s'); auxfiltersec.set('q','q'); auxfiltersec.set('a','s'); auxfiltersec.set('a','h'); auxfiltersec.set('a','a'); auxfiltersec.set('a','f'); auxfiltersec.set('a','r'); auxfiltersec.set('a','3'); // loop over aligned BAM file while ( bamdec.readAlignment() ) { if ( ranksplit ) split12(algn); // extract rank char const * name = algn.getName(); char const * u1 = name; bool ok = true; uint64_t rank = 0; while ( *u1 && *u1 != '_' ) { rank *= 10; rank += (*u1-'0'); ok = ok && isdigit(*u1); ++u1; } // unable to find rank? write out as is and continue if ( ! ok ) { algn.serialise(writer->getStream()); continue; } // loop over unaligned BAM file while ( curid != static_cast<int64_t>(rank) ) { bool const a_ok = bampredec.readAlignment(); if ( ! a_ok ) { libmaus2::exception::LibMausException se; se.getStream() << "Found unexpected EOF on file " << prefilename << std::endl; se.finish(); throw se; } assert ( a_ok ); ++curid; if ( verbose && (! (curid & bmask)) ) std::cerr << "[V] " << (curid / bmod) << std::endl; } if ( verbose > 1 ) std::cerr << "Merging:\n" << algn.formatAlignment(header) << "\n" << prealgn.formatAlignment(preheader) << std::endl; uint64_t pretagnum = prealgn.enumerateAuxTags(auxpre); uint64_t newtagnum = algn.enumerateAuxTags(auxnew); // do some sanity checking if ( sanity ) { // first do a name check char const * prename = prealgn.getName(); u1++; // put on the first letter of readname if ( verbose > 1 ) std::cerr << "Sanity: comparing " << name << " and " << prename << std::endl; if ( !is_suffix(prename, u1) ) // names do not match { libmaus2::exception::LibMausException se; se.getStream() << "Sanity check failed on read names, found " << name << " and " << prename << std::endl; se.finish(); throw se; } // now the names match so try the flags if ( !(algn.isPaired() == prealgn.isPaired() && algn.isRead1() == prealgn.isRead1() && algn.isRead2() == prealgn.isRead2()) ) { libmaus2::exception::LibMausException se; se.getStream() << "Sanity check failed on flags, " << std::endl << "Aligned " << name << " paired " << algn.isPaired() << " first " << algn.isRead1() << " last " << algn.isRead2() << std::endl << "Unaligned " << prename << " paired " << prealgn.isPaired() << " first " << prealgn.isRead1() << " last " << prealgn.isRead2() << std::endl; se.finish(); throw se; } if ( verbose > 1 ) std::cerr << "Sanity check on flags: " << std::endl << "Aligned " << name << " paired " << algn.isPaired() << " first " << algn.isRead1() << " last " << algn.isRead2() << std::endl << "Unaligned " << prename << " paired " << prealgn.isPaired() << " first " << prealgn.isRead1() << " last " << prealgn.isRead2() << std::endl; } std::sort(auxpre.begin(),auxpre.begin()+pretagnum); std::sort(auxnew.begin(),auxnew.begin()+newtagnum); if ( verbose > 1 ) std::cerr << "pretagnum=" << pretagnum << " newtagnum=" << newtagnum << std::endl; std::pair<uint8_t,uint8_t> * prec = auxpre.begin(); std::pair<uint8_t,uint8_t> * pree = prec + pretagnum; std::pair<uint8_t,uint8_t> * preo = prec; std::pair<uint8_t,uint8_t> * newc = auxnew.begin(); std::pair<uint8_t,uint8_t> * newe = newc + newtagnum; std::pair<uint8_t,uint8_t> * newo = newc; while ( prec != pree && newc != newe ) { // pre which is not in new if ( *prec < *newc ) { *(preo++) = *(prec++); } // tag in both, drop pre else if ( *prec == *newc ) { *(newo++) = *(newc++); prec++; } // new not in pre else { *(newo++) = *(newc++); } } while ( prec != pree ) *(preo++) = *(prec++); while ( newc != newe ) *(newo++) = *(newc++); pretagnum = preo-auxpre.begin(); newtagnum = newo-auxnew.begin(); for ( uint64_t i = 0; i < pretagnum; ++i ) auxfilter.set(auxpre[i].first,auxpre[i].second); algn.copyAuxTags(prealgn, auxfilter); for ( uint64_t i = 0; i < pretagnum; ++i ) auxfilter.clear(auxpre[i].first,auxpre[i].second); if ( verbose > 1 ) { std::cerr << "pretagnum=" << pretagnum << " newtagnum=" << newtagnum << std::endl; std::cerr << "result: " << algn.formatAlignment(header) << std::endl; } if ( algn.isSecondary() || algn.isSupplementary() ) { // adding adapter clip data to secondary/supplementary reads // can lead to incorrect clip reinserts so remove these tags algn.filterOutAux(auxfiltersec); } // copy QC fail flag from original file to aligner output if ( prealgn.isQCFail() ) algn.putFlags( algn.getFlags() | libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FQCFAIL ); if ( rankstrip ) strip12(algn); if ( clipreinsert ) clipReinsert(algn,auxtags,bafv,cigop,Tcigar,hardstack,auxfilterout); if ( zztoname ) zzToRank(algn,zzbafv); algn.serialise(writer->getStream()); } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int bamfilter(libmaus::util::ArgInfo const & arginfo) { uint64_t const minmapped = arginfo.getValue<uint64_t>("minmapped",getDefaultMinMapped()); uint64_t const maxmapped = arginfo.getValue<uint64_t>("maxmapped",getDefaultMaxMapped()); uint64_t const minlen = arginfo.getValue<uint64_t>("minlen",getDefaultMinLen()); int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); ::libmaus::bambam::BamDecoder BD(std::cin); ::libmaus::bambam::BamHeader const & bamheader = BD.getHeader(); ::libmaus::bambam::BamAlignment & alignment = BD.getAlignment(); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus::bambam::BamHeader::unique_ptr_type uphead(libmaus::bambam::BamHeaderUpdate::updateHeader(arginfo,bamheader,"bamfilter",std::string(PACKAGE_VERSION))); ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,*uphead,level,Pcbs)); while ( BD.readAlignment() ) { bool const a_1_mapped = !(alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP); bool const a_2_mapped = !(alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FMUNMAP); bool const proper = (alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPROPER_PAIR); uint64_t const nummapped = (a_1_mapped?1:0)+(a_2_mapped?1:0)+(proper?1:0); if ( nummapped >= minmapped && nummapped <= maxmapped && alignment.getLseq() >= static_cast<int64_t>(minlen) ) alignment.serialise(writer->getStream()); } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
uint64_t bamrecompress(libmaus2::util::ArgInfo const & arginfo) { int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); int const numthreads = std::max(1,arginfo.getValue<int>("numthreads",getDefaultNumThreads())); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } /* * end md5/index callbacks */ libmaus2::lz::BgzfInflateDeflateParallel::unique_ptr_type BIDP(new libmaus2::lz::BgzfInflateDeflateParallel(std::cin,std::cout,level,numthreads,4*numthreads)); for ( uint64_t i = 0; i < cbs.size(); ++i ) BIDP->registerBlockOutputCallback(cbs[i]); libmaus2::autoarray::AutoArray<char> B(64*1024,false); int r; uint64_t t = 0; uint64_t last = std::numeric_limits<uint64_t>::max(); uint64_t lcnt = 0; uint64_t const mod = 64*1024*1024; libmaus2::timing::RealTimeClock rtc; rtc.start(); libmaus2::timing::RealTimeClock lrtc; lrtc.start(); while ( (r = BIDP->read(B.begin(),B.size())) ) { BIDP->write(B.begin(),r); lcnt += r; t += r; if ( t/mod != last/mod ) { if ( verbose ) { if ( isatty(STDERR_FILENO) ) std::cerr << "\r" << std::string(60,' ') << "\r"; std::cerr << rtc.formatTime(rtc.getElapsedSeconds()) << " " << t/(1024*1024) << "MB, " << (lcnt/lrtc.getElapsedSeconds())/(1024.0*1024.0) << "MB/s"; if ( isatty(STDERR_FILENO) ) std::cerr << std::flush; else std::cerr << std::endl; } lrtc.start(); last = t; lcnt = 0; } } if ( verbose ) { if ( isatty(STDERR_FILENO) ) std::cerr << "\r" << std::string(60,' ') << "\r"; std::cerr << rtc.formatTime(rtc.getElapsedSeconds()) << " " << t/(1024*1024) << "MB, " << (t/rtc.getElapsedSeconds())/(1024.0*1024.0) << "MB/s"; std::cerr << std::endl; } BIDP.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return 0; }
int bamclipreinsert(::libmaus2::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); ::libmaus2::bambam::BamDecoder dec(std::cin,false); ::libmaus2::bambam::BamHeader const & header = dec.getHeader(); std::string const headertext(header.text); // add PG line to header std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bamclipreinsert", // ID "bamclipreinsert", // PN arginfo.commandline, // CL ::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header libmaus2::bambam::BamHeader const uphead(upheadtext); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs)); libmaus2::bambam::BamAuxFilterVector bafv; // bafv.set('z','z'); // std::vector<uint8_t> R(8); // std::string const zz("zz"); libmaus2::bambam::BamAlignment & algn = dec.getAlignment(); uint64_t c = 0; libmaus2::autoarray::AutoArray < std::pair<uint8_t,uint8_t> > auxtags; libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop; std::stack < libmaus2::bambam::cigar_operation > hardstack; libmaus2::bambam::BamAlignment::D_array_type Tcigar; libmaus2::bambam::BamAuxFilterVector auxfilterout; auxfilterout.set('q','s'); auxfilterout.set('q','q'); while ( dec.readAlignment() ) { // reinsert clipped parts and attach soft clipping cigar operations as needed clipReinsert(algn,auxtags,bafv,cigop,Tcigar,hardstack,auxfilterout); algn.serialise(writer->getStream()); ++c; if ( verbose && (c & (1024*1024-1)) == 0 ) std::cerr << "[V] " << c/(1024*1024) << std::endl; } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "div=<["+::biobambam::Licensing::formatNumber(getDefaultDiv())+"]>", "divisor" ) ); V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam::Licensing::formatNumber(getDefaultLevel())+"]>", libmaus::bambam::BamBlockWriterBaseFactory::getBamOutputLevelHelpText() ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( "prefix=<["+getDefaultFilePrefix(arginfo)+"]>", "default output file prefix" ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamsplitmod(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
void normalisefastaBgzf(libmaus::util::ArgInfo const & arginfo, std::ostream & out) { libmaus::fastx::StreamFastAReaderWrapper in(std::cin); libmaus::fastx::StreamFastAReaderWrapper::pattern_type pattern; int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue("level",getDefaultLevel())); std::string const indexfn = arginfo.getUnparsedValue("index",""); ::libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(level); libmaus::lz::BgzfDeflate<std::ostream> defl(out,level,false /* full flush */); uint64_t const inbufsize = defl.getInputBufferSize(); uint64_t zoffset = 0; uint64_t ioffset = 0; std::vector<libmaus::fastx::BgzfFastAIndexEntry> index; std::ostringstream indexstr; ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,inbufsize); uint64_t patid = 0; while ( in.getNextPatternUnlocked(pattern) ) { std::string const name = pattern.getStringId(); std::string const shortname = stripName(name); std::string const & spat = pattern.spattern; char const * cpat = spat.c_str(); uint64_t const patlen = spat.size(); uint64_t const numblocks = (patlen + inbufsize - 1)/inbufsize; index.push_back(libmaus::fastx::BgzfFastAIndexEntry(shortname,patid++,ioffset)); ioffset += libmaus::util::StringSerialisation::serialiseString(indexstr,name); ioffset += libmaus::util::StringSerialisation::serialiseString(indexstr,shortname); ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,patlen); ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,zoffset); ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,numblocks); std::ostringstream nameostr; nameostr << '>' << name << '\n'; std::string const nameser = nameostr.str(); std::pair<uint64_t,uint64_t> const P0 = defl.writeSyncedCount(nameser.c_str(),nameser.size()); zoffset += P0.second; uint64_t o = 0; while ( o != patlen ) { assert ( o % inbufsize == 0 ); uint64_t const towrite = std::min(patlen-o,inbufsize); std::pair<uint64_t,uint64_t> const P1 = defl.writeSyncedCount(cpat,towrite); ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,zoffset); zoffset += P1.second; o += towrite; cpat += towrite; } ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,zoffset); std::pair<uint64_t,uint64_t> const Pn = defl.writeSyncedCount("\n",1); zoffset += Pn.second; } defl.flush(); out << std::flush; uint64_t const imetaoffset = ioffset; ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,index.size()); for ( uint64_t i = 0; i < index.size(); ++i ) ioffset += libmaus::util::NumberSerialisation::serialiseNumber(indexstr,index[i].ioffset); libmaus::util::NumberSerialisation::serialiseNumber(indexstr,imetaoffset); if ( indexfn.size() ) { std::string const & sindex = indexstr.str(); libmaus::aio::CheckedOutputStream indexCOS(indexfn); indexCOS.write(sindex.c_str(),sindex.size()); indexCOS.flush(); indexCOS.close(); } }
int bamsplitmod(libmaus::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); uint64_t const div = arginfo.getValue<int>("div",getDefaultDiv()); std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo)); if ( ! div ) { ::libmaus::exception::LibMausException se; se.getStream() << "div cannot be 0." << std::endl; se.finish(); throw se; } libmaus::bambam::BamDecoder bamdec(std::cin); libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); libmaus::autoarray::AutoArray<libmaus::aio::CheckedOutputStream::unique_ptr_type> COS(div); libmaus::autoarray::AutoArray<libmaus::bambam::BamWriter::unique_ptr_type> writers(div); std::vector < std::string > filenames; for ( uint64_t i = 0; i < div; ++i ) { std::ostringstream ostr; ostr << prefix << "_" << std::setw(6) << std::setfill('0') << i << std::setw(0) << ".bam"; libmaus::aio::CheckedOutputStream::unique_ptr_type tCOS(new libmaus::aio::CheckedOutputStream(ostr.str())); COS[i] = UNIQUE_PTR_MOVE(tCOS); libmaus::bambam::BamWriter::unique_ptr_type twriter(new libmaus::bambam::BamWriter(*COS[i],*uphead,level)); writers[i] = UNIQUE_PTR_MOVE(twriter); } uint64_t c = 0; if ( verbose ) { while ( bamdec.readAlignment() ) { algn.serialise ( writers [ (c++) % div ] -> getStream() ); if ( ((c) & ((1ull<<20)-1)) == 0 ) std::cerr << "[V] " << c << std::endl; } std::cerr << "[V] " << c << std::endl; } else { while ( bamdec.readAlignment() ) algn.serialise ( writers [ (c++) % div ] -> getStream() ); } for ( uint64_t i = 0; i < div; ++i ) { writers[i].reset(); COS[i]->flush(); COS[i].reset(); } return EXIT_SUCCESS; }
int bamfixmatecoordinates(::libmaus::util::ArgInfo const & arginfo) { ::libmaus::util::TempFileRemovalContainer::setup(); ::libmaus::timing::RealTimeClock rtc; rtc.start(); bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose()); unsigned int const colhashbits = arginfo.getValue<unsigned int>("colhashbits",getDefaultColHashBits()); unsigned int const collistsize = arginfo.getValue<unsigned int>("collistsize",getDefaultColListSize()); int const level = arginfo.getValue<int>("level",getDefaultLevel()); std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); switch ( level ) { case Z_NO_COMPRESSION: case Z_BEST_SPEED: case Z_BEST_COMPRESSION: case Z_DEFAULT_COMPRESSION: break; default: { ::libmaus::exception::LibMausException se; se.getStream() << "Unknown compression level, please use" << " level=" << Z_DEFAULT_COMPRESSION << " (default) or" << " level=" << Z_BEST_SPEED << " (fast) or" << " level=" << Z_BEST_COMPRESSION << " (best) or" << " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl; se.finish(); throw se; } break; } std::string const tmpfilename = tmpfilenamebase + "_bamcollate"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilename); ::libmaus::bambam::CollatingBamDecoder CBD(std::cin,tmpfilename,false /* put rank */,colhashbits/*hash bits*/,collistsize/*size of output list*/); ::libmaus::bambam::BamFormatAuxiliary auxdata; ::libmaus::bambam::BamHeader const & bamheader = CBD.getHeader(); // add PG line to header std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine( bamheader.text, "bamfixmatecoordinates", // ID "bamfixmatecoordinates", // PN arginfo.commandline, // CL ::libmaus::bambam::ProgramHeaderLineSet(bamheader.text).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header ::libmaus::bambam::BamHeader uphead(upheadtext); if ( uphead.getSortOrder() != "queryname" ) uphead.changeSortOrder("unknown"); /* * start index/md5 callbacks */ std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ // setup bam writer ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,uphead,level,Pcbs)); #if 0 ::libmaus::bambam::ProgramHeaderLineSet PHLS(bamheader.text); std::cerr << "Last id in PG chain: " << PHLS.getLastIdInChain() << std::endl; #endif // std::cout << bamheader.text; typedef ::libmaus::bambam::CollatingBamDecoder::alignment_ptr_type alignment_ptr_type; std::pair<alignment_ptr_type,alignment_ptr_type> P; uint64_t const mod = 1024*1024; uint64_t proc = 0; uint64_t lastproc = 0; uint64_t paircnt = 0; while ( CBD.tryPair(P) ) { uint64_t const mapcnt = getMapCnt(P.first) + getMapCnt(P.second); if ( mapcnt == 1 ) { int32_t refid = -1; int32_t pos = -1; if ( P.first ) { refid = P.first->getRefID(); pos = P.first->getPos(); } else { assert ( P.second ); refid = P.second->getRefID(); pos = P.second->getPos(); } P.first->putRefId(refid); P.first->putPos(pos); P.first->putNextRefId(refid); P.first->putNextPos(pos); P.second->putRefId(refid); P.second->putPos(pos); P.second->putNextRefId(refid); P.second->putNextPos(pos); } if ( P.first ) { P.first->serialise(writer->getStream()); ++proc; } if ( P.second ) { P.second->serialise(writer->getStream()); ++proc; } if ( P.first && P.second ) { paircnt++; } if ( verbose && (proc/mod != lastproc/mod) ) { std::cerr << "Processed " << proc << " fragments, " << paircnt << " pairs, " << proc/rtc.getElapsedSeconds() << " al/s" << std::endl; lastproc = proc; } } if ( verbose ) std::cerr << "Processed " << proc << " fragments, " << paircnt << " pairs, " << proc/rtc.getElapsedSeconds() << " al/s" << std::endl; writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int bamsplit(libmaus2::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); uint64_t const n = arginfo.getValue<int>("n",getDefaultN()); std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo)); libmaus2::bambam::BamDecoder bamdec(std::cin); libmaus2::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus2::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus2::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); libmaus2::aio::OutputStreamInstance::unique_ptr_type COS; libmaus2::bambam::BamWriter::unique_ptr_type writer; uint64_t c = 0; uint64_t f = 0; while ( bamdec.readAlignment() ) { if ( c++ % n == 0 ) { writer.reset(); if ( COS ) COS->flush(); COS.reset(); std::ostringstream fnostr; fnostr << prefix << "_" << std::setw(6) << std::setfill('0') << f++ << std::setw(0) << ".bam"; std::string const fn = fnostr.str(); libmaus2::aio::OutputStreamInstance::unique_ptr_type tCOS(new libmaus2::aio::OutputStreamInstance(fn)); COS = UNIQUE_PTR_MOVE(tCOS); libmaus2::bambam::BamWriter::unique_ptr_type twriter(new libmaus2::bambam::BamWriter(*COS,*uphead,level)); writer = UNIQUE_PTR_MOVE(twriter); if ( verbose ) std::cerr << "[V] opened file " << fn << std::endl; } algn.serialise(writer->getStream()); } writer.reset(); if ( COS ) COS->flush(); COS.reset(); return EXIT_SUCCESS; }