int bamsort(::libmaus::util::ArgInfo const & arginfo) { ::libmaus::util::TempFileRemovalContainer::setup(); bool const inputisstdin = (!arginfo.hasArg("I")) || (arginfo.getUnparsedValue("I","-") == "-"); bool const outputisstdout = (!arginfo.hasArg("O")) || (arginfo.getUnparsedValue("O","-") == "-"); if ( isatty(STDIN_FILENO) && inputisstdin && (arginfo.getValue<std::string>("inputformat","bam") != "sam") ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) && outputisstdout && (arginfo.getValue<std::string>("outputformat","bam") != "sam") ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); bool const disablevalidation = arginfo.getValue<int>("disablevalidation",getDefaultDisableValidation()); std::string const inputformat = arginfo.getUnparsedValue("inputformat",getDefaultInputFormat()); int const level = arginfo.getValue<int>("level",getDefaultLevel()); switch ( level ) { case Z_NO_COMPRESSION: case Z_BEST_SPEED: case Z_BEST_COMPRESSION: case Z_DEFAULT_COMPRESSION: break; default: { ::libmaus::exception::LibMausException se; se.getStream() << "Unknown compression level, please use" << " level=" << Z_DEFAULT_COMPRESSION << " (default) or" << " level=" << Z_BEST_SPEED << " (fast) or" << " level=" << Z_BEST_COMPRESSION << " (best) or" << " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl; se.finish(); throw se; } break; } // prefix for tmp files std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfilenameout = tmpfilenamebase + "_bamsort"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilenameout); uint64_t blockmem = arginfo.getValue<uint64_t>("blockmb",getDefaultBlockSize())*1024*1024; std::string const sortorder = arginfo.getValue<std::string>("SO","coordinate"); bool const fixmates = arginfo.getValue<int>("fixmates",getDefaultFixMates()); uint64_t sortthreads = arginfo.getValue<uint64_t>("sortthreads",getDefaultSortThreads()); // input decoder wrapper libmaus::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper( libmaus::bambam::BamMultiAlignmentDecoderFactory::construct( arginfo,false // put rank ) ); ::libmaus::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder()); ::libmaus::bambam::BamAlignmentDecoder & dec = *ppdec; if ( disablevalidation ) dec.disableValidation(); ::libmaus::bambam::BamHeader const & header = dec.getHeader(); std::string const headertext(header.text); // add PG line to header std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bamsort", // ID "bamsort", // PN arginfo.commandline, // CL ::libmaus::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header ::libmaus::bambam::BamHeader uphead(upheadtext); /* * start index/md5 callbacks */ std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ if ( sortorder != "queryname" ) uphead.changeSortOrder("coordinate"); else uphead.changeSortOrder("queryname"); libmaus::bambam::BamBlockWriterBase::unique_ptr_type Pout ( libmaus::bambam::BamBlockWriterBaseFactory::construct(uphead, arginfo, Pcbs) ); if ( fixmates ) { if ( sortorder != "queryname" ) { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentPosComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; // current alignment libmaus::bambam::BamAlignment & curalgn = dec.getAlignment(); // previous alignment libmaus::bambam::BamAlignment prevalgn; // previous alignment valid bool prevalgnvalid = false; // MQ field filter libmaus::bambam::BamAuxFilterVector MQfilter; MQfilter.set("MQ"); while ( dec.readAlignment() ) { if ( curalgn.isSecondary() || curalgn.isSupplementary() ) { BEC.putAlignment(curalgn); } else if ( prevalgnvalid ) { // different name if ( strcmp(curalgn.getName(),prevalgn.getName()) ) { BEC.putAlignment(prevalgn); curalgn.swap(prevalgn); } // same name else { libmaus::bambam::BamAlignment::fixMateInformation(prevalgn,curalgn,MQfilter); BEC.putAlignment(prevalgn); BEC.putAlignment(curalgn); prevalgnvalid = false; } } else { prevalgn.swap(curalgn); prevalgnvalid = true; } if ( verbose && ( ( ++incnt & ((1ull<<20)-1) ) == 0 ) ) std::cerr << "[V] " << incnt << std::endl; } if ( prevalgnvalid ) { BEC.putAlignment(prevalgn); prevalgnvalid = false; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } else { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentNameComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; // current alignment libmaus::bambam::BamAlignment & curalgn = dec.getAlignment(); // previous alignment libmaus::bambam::BamAlignment prevalgn; // previous alignment valid bool prevalgnvalid = false; // MQ field filter libmaus::bambam::BamAuxFilterVector MQfilter; MQfilter.set("MQ"); while ( dec.readAlignment() ) { if ( curalgn.isSecondary() || curalgn.isSupplementary() ) { BEC.putAlignment(curalgn); } else if ( prevalgnvalid ) { // different name if ( strcmp(curalgn.getName(),prevalgn.getName()) ) { BEC.putAlignment(prevalgn); curalgn.swap(prevalgn); } // same name else { libmaus::bambam::BamAlignment::fixMateInformation(prevalgn,curalgn,MQfilter); BEC.putAlignment(prevalgn); BEC.putAlignment(curalgn); prevalgnvalid = false; } } else { prevalgn.swap(curalgn); prevalgnvalid = true; } if ( verbose && ( ( ++incnt & ((1ull<<20)-1) ) == 0 ) ) std::cerr << "[V] " << incnt << std::endl; } if ( prevalgnvalid ) { BEC.putAlignment(prevalgn); prevalgnvalid = false; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } } else { if ( sortorder != "queryname" ) { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentPosComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; while ( dec.readAlignment() ) { BEC.putAlignment(dec.getAlignment()); incnt++; if ( verbose && (incnt % (1024*1024) == 0) ) std::cerr << "[V] " << incnt/(1024*1024) << "M" << std::endl; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } else { ::libmaus::bambam::BamEntryContainer< ::libmaus::bambam::BamAlignmentNameComparator > BEC(blockmem,tmpfilenameout,sortthreads); if ( verbose ) std::cerr << "[V] Reading alignments from source." << std::endl; uint64_t incnt = 0; while ( dec.readAlignment() ) { BEC.putAlignment(dec.getAlignment()); incnt++; if ( verbose && (incnt % (1024*1024) == 0) ) std::cerr << "[V] " << incnt/(1024*1024) << "M" << std::endl; } if ( verbose ) std::cerr << "[V] read " << incnt << " alignments" << std::endl; // BEC.createOutput(std::cout, uphead, level, verbose, Pcbs); BEC.createOutput(*Pout, verbose); } } // flush encoder so callbacks see all output data Pout.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int main(int argc, char * argv[]) { try { libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( "disablevalidation=<["+::biobambam::Licensing::formatNumber(getDefaultDisableValidation())+"]>", "disable input validation (default is 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("inputformat=<[")+getDefaultInputFormat()+"]>", std::string("input format (") + libmaus::bambam::BamMultiAlignmentDecoderFactory::getValidInputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( "I=<[stdin]>", "input filename (standard input if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( "inputthreads=<[1]>", "input helper threads (for inputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "reference=<>", "reference FastA (.fai file required, for cram i/o only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "range=<>", "coordinate range to be processed (for coordinate sorted indexed BAM input only)" ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamheap2(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo const arginfo(argc,argv); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) if ( arginfo.restargs[i] == "-v" || arginfo.restargs[i] == "--version" ) { std::cerr << ::biobambam::Licensing::license(); return EXIT_SUCCESS; } else if ( arginfo.restargs[i] == "-h" || arginfo.restargs[i] == "--help" ) { std::cerr << ::biobambam::Licensing::license(); std::cerr << std::endl; std::cerr << "Key=Value pairs:" << std::endl; std::cerr << std::endl; std::vector< std::pair<std::string,std::string> > V; V.push_back ( std::pair<std::string,std::string> ( "level=<["+::biobambam::Licensing::formatNumber(getDefaultLevel())+"]>", "compression settings for output bam file (0=uncompressed,1=fast,9=best,-1=zlib default)" ) ); V.push_back ( std::pair<std::string,std::string> ( "SO=<["+getDefaultSortOrder()+"]>", "sorting order (coordinate or queryname)" ) ); V.push_back ( std::pair<std::string,std::string> ( "verbose=<["+::biobambam::Licensing::formatNumber(getDefaultVerbose())+"]>", "print progress report" ) ); V.push_back ( std::pair<std::string,std::string> ( "blockmb=<["+::biobambam::Licensing::formatNumber(getDefaultBlockSize())+"]>", "size of internal memory buffer used for sorting in MiB" ) ); V.push_back ( std::pair<std::string,std::string> ( "disablevalidation=<["+::biobambam::Licensing::formatNumber(getDefaultDisableValidation())+"]>", "disable input validation (default is 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "tmpfile=<filename>", "prefix for temporary files, default: create files in current directory" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5=<["+::biobambam::Licensing::formatNumber(getDefaultMD5())+"]>", "create md5 check sum (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "md5filename=<filename>", "file name for md5 check sum" ) ); V.push_back ( std::pair<std::string,std::string> ( "index=<["+::biobambam::Licensing::formatNumber(getDefaultIndex())+"]>", "create BAM index (default: 0)" ) ); V.push_back ( std::pair<std::string,std::string> ( "indexfilename=<filename>", "file name for BAM index file" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("inputformat=<[")+getDefaultInputFormat()+"]>", std::string("input format (") + libmaus::bambam::BamMultiAlignmentDecoderFactory::getValidInputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("outputformat=<[")+libmaus::bambam::BamBlockWriterBaseFactory::getDefaultOutputFormat()+"]>", std::string("output format (") + libmaus::bambam::BamBlockWriterBaseFactory::getValidOutputFormats() + ")" ) ); V.push_back ( std::pair<std::string,std::string> ( "I=<[stdin]>", "input filename (standard input if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( "inputthreads=<[1]>", "input helper threads (for inputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "reference=<>", "reference FastA (.fai file required, for cram i/o only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "range=<>", "coordinate range to be processed (for coordinate sorted indexed BAM input only)" ) ); V.push_back ( std::pair<std::string,std::string> ( "outputthreads=<[1]>", "output helper threads (for outputformat=bam only, default: 1)" ) ); V.push_back ( std::pair<std::string,std::string> ( "O=<[stdout]>", "output filename (standard output if unset)" ) ); V.push_back ( std::pair<std::string,std::string> ( std::string("fixmates=<[")+::biobambam::Licensing::formatNumber(getDefaultFixMates())+"]>", "fix mate information (for name collated input only, disabled by default)" ) ); ::biobambam::Licensing::printMap(std::cerr,V); std::cerr << std::endl; return EXIT_SUCCESS; } return bamsort(arginfo); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }