int bamsplit(libmaus2::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); uint64_t const n = arginfo.getValue<int>("n",getDefaultN()); std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo)); libmaus2::bambam::BamDecoder bamdec(std::cin); libmaus2::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus2::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus2::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); libmaus2::aio::OutputStreamInstance::unique_ptr_type COS; libmaus2::bambam::BamWriter::unique_ptr_type writer; uint64_t c = 0; uint64_t f = 0; while ( bamdec.readAlignment() ) { if ( c++ % n == 0 ) { writer.reset(); if ( COS ) COS->flush(); COS.reset(); std::ostringstream fnostr; fnostr << prefix << "_" << std::setw(6) << std::setfill('0') << f++ << std::setw(0) << ".bam"; std::string const fn = fnostr.str(); libmaus2::aio::OutputStreamInstance::unique_ptr_type tCOS(new libmaus2::aio::OutputStreamInstance(fn)); COS = UNIQUE_PTR_MOVE(tCOS); libmaus2::bambam::BamWriter::unique_ptr_type twriter(new libmaus2::bambam::BamWriter(*COS,*uphead,level)); writer = UNIQUE_PTR_MOVE(twriter); if ( verbose ) std::cerr << "[V] opened file " << fn << std::endl; } algn.serialise(writer->getStream()); } writer.reset(); if ( COS ) COS->flush(); COS.reset(); return EXIT_SUCCESS; }
int main() { try { ::libmaus2::bambam::BamDecoder bamdec(std::cin); // ::libmaus2::lz::BufferedGzipStream bgs(std::cin); ::libmaus2::autoarray::AutoArray<char> C(32*1024*1024,false); // double const errfreq = 1e-6; // srand(time(0)); ::libmaus2::random::Random::setup(); ::libmaus2::bambam::BamWriter writer(std::cout,bamdec.getHeader()); uint64_t red = 0; uint64_t total = 0; while ( (bamdec.getStream().read(C.begin(),C.size())) && (red=bamdec.getStream().gcount()) ) { uint64_t off = 0; while ( off < red ) { uint64_t const skip = std::min(red-off, ::libmaus2::random::Random::rand64() % (64*1024) ); // ::libmaus2::random::Random::rand64() % (red-off+1); C [ off + skip ] = ::libmaus2::random::Random::rand8(); off += skip; // std::cerr << "changed offset " << total + off << std::endl; } writer.getStream().write(C.begin(),red); total += red; } } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; } }
int main(int argc, char * argv[]) { try { libmaus::util::ArgInfo const arginfo(argc,argv); std::vector<std::string> const & inputfilenames = arginfo.restargs; libmaus::bambam::BamMergeCoordinate bamdec(inputfilenames /* ,true */); libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); libmaus::bambam::BamWriter writer(std::cout,*uphead); libmaus::bambam::BamWriter::stream_type & bamoutstr = writer.getStream(); while ( bamdec.readAlignment() ) algn.serialise(bamoutstr); } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int main(int argc, char * argv[]) { uint64_t cnt = 0; try { ::libmaus::util::ArgInfo const arginfo(argc,argv); #if 0 ::libmaus::bambam::BamFormatAuxiliary auxiliary; std::string const tmpfilename = arginfo.getDefaultTmpFileName(); ::libmaus::util::TempFileRemovalContainer::setup(); ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilename); ::libmaus::bambam::CollatingBamDecoder bamdec(std::cin,tmpfilename); ::libmaus::bambam::CollatingBamDecoder::alignment_ptr_type algn; ::libmaus::autoarray::AutoArray<char> B; uint64_t c = 0; while ( (algn=bamdec.get()) ) { uint64_t const fqlen = algn->getFastQLength(); if ( fqlen > B.size() ) B = ::libmaus::autoarray::AutoArray<char>(fqlen); char * pe = algn->putFastQ(B.begin()); // std::cout.write(B.begin(),pe-B.begin()); assert ( pe == B.begin() + fqlen ); std::string const reffq = algn->formatFastq(auxiliary); // std::cerr << "expecting " << fqlen << " got " << reffq.size() << std::endl; assert ( reffq.size() == fqlen ); for ( uint64_t i = 0; i < fqlen; ++i ) assert ( reffq[i] == B[i] ); if ( ++c % (1024*1024) == 0 ) { std::cerr << "[V] " << c/(1024*1024) << std::endl; } } #else ::libmaus::bambam::BamDecoder reader(std::cin); ::libmaus::bambam::BamHeader & header = reader.bamheader; ::libmaus::bambam::BamFormatAuxiliary aux; // std::cout << header.text; while ( reader.readAlignment() ) { // std::cout << reader.alignment.formatAlignment(header,aux) << std::endl; if ( ++cnt % (1024*1024) == 0 ) std::cerr << "[V] " << cnt/(1024*1024) << std::endl; } #endif } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; std::cerr << "cnt=" << cnt << std::endl; } }
int bamsplitmod(libmaus::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); uint64_t const div = arginfo.getValue<int>("div",getDefaultDiv()); std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo)); if ( ! div ) { ::libmaus::exception::LibMausException se; se.getStream() << "div cannot be 0." << std::endl; se.finish(); throw se; } libmaus::bambam::BamDecoder bamdec(std::cin); libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); libmaus::autoarray::AutoArray<libmaus::aio::CheckedOutputStream::unique_ptr_type> COS(div); libmaus::autoarray::AutoArray<libmaus::bambam::BamWriter::unique_ptr_type> writers(div); std::vector < std::string > filenames; for ( uint64_t i = 0; i < div; ++i ) { std::ostringstream ostr; ostr << prefix << "_" << std::setw(6) << std::setfill('0') << i << std::setw(0) << ".bam"; libmaus::aio::CheckedOutputStream::unique_ptr_type tCOS(new libmaus::aio::CheckedOutputStream(ostr.str())); COS[i] = UNIQUE_PTR_MOVE(tCOS); libmaus::bambam::BamWriter::unique_ptr_type twriter(new libmaus::bambam::BamWriter(*COS[i],*uphead,level)); writers[i] = UNIQUE_PTR_MOVE(twriter); } uint64_t c = 0; if ( verbose ) { while ( bamdec.readAlignment() ) { algn.serialise ( writers [ (c++) % div ] -> getStream() ); if ( ((c) & ((1ull<<20)-1)) == 0 ) std::cerr << "[V] " << c << std::endl; } std::cerr << "[V] " << c << std::endl; } else { while ( bamdec.readAlignment() ) algn.serialise ( writers [ (c++) % div ] -> getStream() ); } for ( uint64_t i = 0; i < div; ++i ) { writers[i].reset(); COS[i]->flush(); COS[i].reset(); } return EXIT_SUCCESS; }
int bam12auxmerge(::libmaus2::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } std::string const prefilename = arginfo.getRestArg<std::string>(0); libmaus2::bambam::BamDecoder bampredec(prefilename); int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); int const ranksplit = arginfo.getValue<int>("ranksplit",getDefaultRankSplit()); int const rankstrip = arginfo.getValue<int>("rankstrip",getDefaultRankSplit()); int const clipreinsert = arginfo.getValue<int>("clipreinsert",getDefaultClipReinsert()); int const zztoname = arginfo.getValue<int>("zztoname",getDefaultZZToName()); int const sanity = arginfo.getValue<int>("sanity",getDefaultSanity()); uint64_t const mod = arginfo.getValue<int>("mod",getDefaultMod()); uint64_t const bmod = libmaus2::math::nextTwoPow(mod); uint64_t const bmask = bmod-1; libmaus2::autoarray::AutoArray<char> Aread; ::libmaus2::bambam::BamDecoder bamdec(std::cin,false); ::libmaus2::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus2::bambam::BamHeader const & preheader = bampredec.getHeader(); std::string const headertext(header.text); std::string const preheadertext(libmaus2::bambam::HeaderLine::removeSequenceLines(preheader.text)); libmaus2::bambam::ProgramHeaderLineSet headerlines(headertext); libmaus2::bambam::ProgramHeaderLineSet preheaderlines(preheadertext); std::vector<libmaus2::bambam::HeaderLine> allheaderlines = libmaus2::bambam::HeaderLine::extractLines(headertext); std::string const lastid = preheaderlines.getLastIdInChain(); std::stack < std::pair<uint64_t,std::string> > pgtodo; for ( uint64_t i = 0; i < headerlines.roots.size(); ++i ) pgtodo.push(std::pair<uint64_t,std::string>(headerlines.roots[i],lastid)); std::string upheadtext = preheadertext; while ( pgtodo.size() ) { uint64_t const hid = pgtodo.top().first; std::string const PP = pgtodo.top().second; pgtodo.pop(); libmaus2::bambam::HeaderLine const & line = headerlines.lines[hid]; // ID, PP, PN, CL, VN std::string ID = (line.M.find("ID") != line.M.end()) ? line.M.find("ID")->second : ""; std::string const PN = (line.M.find("PN") != line.M.end()) ? line.M.find("PN")->second : ""; std::string const CL = (line.M.find("CL") != line.M.end()) ? line.M.find("CL")->second : ""; std::string const VN = (line.M.find("VN") != line.M.end()) ? line.M.find("VN")->second : ""; upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLineRef( upheadtext, ID, PN, CL, PP, VN ); if ( headerlines.edges.find(hid) != headerlines.edges.end() ) { std::vector<uint64_t> const & children = headerlines.edges.find(hid)->second; for ( uint64_t j = 0; j < children.size(); ++j ) pgtodo.push(std::pair<uint64_t,std::string>(children[j],ID)); } } /* copy SQ lines */ std::ostringstream sqconcstr; sqconcstr << upheadtext; for ( uint64_t i = 0; i < allheaderlines.size(); ++i ) if ( allheaderlines[i].type == "SQ" ) sqconcstr << allheaderlines[i].line << "\n"; upheadtext = sqconcstr.str(); ::libmaus2::bambam::BamHeader uphead(upheadtext); uphead.changeSortOrder("unknown"); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs)); ::libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment(); ::libmaus2::bambam::BamAlignment & prealgn = bampredec.getAlignment(); int64_t curid = -1; libmaus2::autoarray::AutoArray< std::pair<uint8_t,uint8_t> > auxpre; libmaus2::autoarray::AutoArray< std::pair<uint8_t,uint8_t> > auxnew; libmaus2::bambam::BamAuxFilterVector auxfilter; // helpers for clipReinsert libmaus2::autoarray::AutoArray < std::pair<uint8_t,uint8_t> > auxtags; libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop; std::stack < libmaus2::bambam::cigar_operation > hardstack; libmaus2::bambam::BamAlignment::D_array_type Tcigar; libmaus2::bambam::BamAuxFilterVector bafv; libmaus2::bambam::BamAuxFilterVector auxfilterout; auxfilterout.set('q','s'); auxfilterout.set('q','q'); // helpers for zztoname libmaus2::bambam::BamAuxFilterVector zzbafv; zzbafv.set('z','z'); // tag filters for secondary/supplementary reads libmaus2::bambam::BamAuxFilterVector auxfiltersec; auxfiltersec.set('q','s'); auxfiltersec.set('q','q'); auxfiltersec.set('a','s'); auxfiltersec.set('a','h'); auxfiltersec.set('a','a'); auxfiltersec.set('a','f'); auxfiltersec.set('a','r'); auxfiltersec.set('a','3'); // loop over aligned BAM file while ( bamdec.readAlignment() ) { if ( ranksplit ) split12(algn); // extract rank char const * name = algn.getName(); char const * u1 = name; bool ok = true; uint64_t rank = 0; while ( *u1 && *u1 != '_' ) { rank *= 10; rank += (*u1-'0'); ok = ok && isdigit(*u1); ++u1; } // unable to find rank? write out as is and continue if ( ! ok ) { algn.serialise(writer->getStream()); continue; } // loop over unaligned BAM file while ( curid != static_cast<int64_t>(rank) ) { bool const a_ok = bampredec.readAlignment(); if ( ! a_ok ) { libmaus2::exception::LibMausException se; se.getStream() << "Found unexpected EOF on file " << prefilename << std::endl; se.finish(); throw se; } assert ( a_ok ); ++curid; if ( verbose && (! (curid & bmask)) ) std::cerr << "[V] " << (curid / bmod) << std::endl; } if ( verbose > 1 ) std::cerr << "Merging:\n" << algn.formatAlignment(header) << "\n" << prealgn.formatAlignment(preheader) << std::endl; uint64_t pretagnum = prealgn.enumerateAuxTags(auxpre); uint64_t newtagnum = algn.enumerateAuxTags(auxnew); // do some sanity checking if ( sanity ) { // first do a name check char const * prename = prealgn.getName(); u1++; // put on the first letter of readname if ( verbose > 1 ) std::cerr << "Sanity: comparing " << name << " and " << prename << std::endl; if ( !is_suffix(prename, u1) ) // names do not match { libmaus2::exception::LibMausException se; se.getStream() << "Sanity check failed on read names, found " << name << " and " << prename << std::endl; se.finish(); throw se; } // now the names match so try the flags if ( !(algn.isPaired() == prealgn.isPaired() && algn.isRead1() == prealgn.isRead1() && algn.isRead2() == prealgn.isRead2()) ) { libmaus2::exception::LibMausException se; se.getStream() << "Sanity check failed on flags, " << std::endl << "Aligned " << name << " paired " << algn.isPaired() << " first " << algn.isRead1() << " last " << algn.isRead2() << std::endl << "Unaligned " << prename << " paired " << prealgn.isPaired() << " first " << prealgn.isRead1() << " last " << prealgn.isRead2() << std::endl; se.finish(); throw se; } if ( verbose > 1 ) std::cerr << "Sanity check on flags: " << std::endl << "Aligned " << name << " paired " << algn.isPaired() << " first " << algn.isRead1() << " last " << algn.isRead2() << std::endl << "Unaligned " << prename << " paired " << prealgn.isPaired() << " first " << prealgn.isRead1() << " last " << prealgn.isRead2() << std::endl; } std::sort(auxpre.begin(),auxpre.begin()+pretagnum); std::sort(auxnew.begin(),auxnew.begin()+newtagnum); if ( verbose > 1 ) std::cerr << "pretagnum=" << pretagnum << " newtagnum=" << newtagnum << std::endl; std::pair<uint8_t,uint8_t> * prec = auxpre.begin(); std::pair<uint8_t,uint8_t> * pree = prec + pretagnum; std::pair<uint8_t,uint8_t> * preo = prec; std::pair<uint8_t,uint8_t> * newc = auxnew.begin(); std::pair<uint8_t,uint8_t> * newe = newc + newtagnum; std::pair<uint8_t,uint8_t> * newo = newc; while ( prec != pree && newc != newe ) { // pre which is not in new if ( *prec < *newc ) { *(preo++) = *(prec++); } // tag in both, drop pre else if ( *prec == *newc ) { *(newo++) = *(newc++); prec++; } // new not in pre else { *(newo++) = *(newc++); } } while ( prec != pree ) *(preo++) = *(prec++); while ( newc != newe ) *(newo++) = *(newc++); pretagnum = preo-auxpre.begin(); newtagnum = newo-auxnew.begin(); for ( uint64_t i = 0; i < pretagnum; ++i ) auxfilter.set(auxpre[i].first,auxpre[i].second); algn.copyAuxTags(prealgn, auxfilter); for ( uint64_t i = 0; i < pretagnum; ++i ) auxfilter.clear(auxpre[i].first,auxpre[i].second); if ( verbose > 1 ) { std::cerr << "pretagnum=" << pretagnum << " newtagnum=" << newtagnum << std::endl; std::cerr << "result: " << algn.formatAlignment(header) << std::endl; } if ( algn.isSecondary() || algn.isSupplementary() ) { // adding adapter clip data to secondary/supplementary reads // can lead to incorrect clip reinserts so remove these tags algn.filterOutAux(auxfiltersec); } // copy QC fail flag from original file to aligner output if ( prealgn.isQCFail() ) algn.putFlags( algn.getFlags() | libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FQCFAIL ); if ( rankstrip ) strip12(algn); if ( clipreinsert ) clipReinsert(algn,auxtags,bafv,cigop,Tcigar,hardstack,auxfilterout); if ( zztoname ) zzToRank(algn,zzbafv); algn.serialise(writer->getStream()); } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int bamcat(libmaus::util::ArgInfo const & arginfo) { if ( isatty(STDOUT_FILENO) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } int const level = arginfo.getValue<int>("level",getDefaultLevel()); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); switch ( level ) { case Z_NO_COMPRESSION: case Z_BEST_SPEED: case Z_BEST_COMPRESSION: case Z_DEFAULT_COMPRESSION: break; default: { ::libmaus::exception::LibMausException se; se.getStream() << "Unknown compression level, please use" << " level=" << Z_DEFAULT_COMPRESSION << " (default) or" << " level=" << Z_BEST_SPEED << " (fast) or" << " level=" << Z_BEST_COMPRESSION << " (best) or" << " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl; se.finish(); throw se; } break; } std::vector<std::string> inputfilenames = arginfo.getPairValues("I"); for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) inputfilenames.push_back(arginfo.restargs[i]); libmaus::bambam::BamCat bamdec(inputfilenames /* ,true */); libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,*uphead,level,Pcbs)); libmaus::bambam::BamWriter::stream_type & bamoutstr = writer->getStream(); if ( verbose ) { uint64_t c = 0; while ( bamdec.readAlignment() ) { algn.serialise(bamoutstr); if ( ((++c) & ((1ull<<20)-1)) == 0 ) std::cerr << "[V] " << c << std::endl; } std::cerr << "[V] " << c << std::endl; } else while ( bamdec.readAlignment() ) algn.serialise(bamoutstr); writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int main(int argc, char * argv[]) { try { libmaus2::util::ArgInfo const arginfo(argc,argv); libmaus2::timing::RealTimeClock rtc; uint64_t const runs = 10; std::pair <libmaus2::bambam::BamAlignment const *, libmaus2::bambam::BamAlignment const *> P; for ( uint64_t i = 0; i < arginfo.restargs.size(); ++i ) { std::string const fn = arginfo.restargs[i]; double srate = 0, drate = 0; for ( uint64_t j = 0; j < runs; ++j ) { rtc.start(); libmaus2::bambam::BamDecoder bamdec(fn); uint64_t cnt = 0; while ( bamdec.readAlignment() ) ++cnt; double const lela = rtc.getElapsedSeconds(); std::cerr << "[S] " << "cnt=" << cnt << " ela=" << lela << " rate=" << cnt/lela << std::endl; srate += cnt/lela; } for ( uint64_t j = 0; j < runs; ++j ) { rtc.start(); libmaus2::aio::InputStreamInstance CIS(fn); libmaus2::bambam::BamCircularHashCollatingBamDecoder bamdec(CIS,"tmpfile"); uint64_t cnt = 0; while ( bamdec.tryPair(P) ) { if ( P.first ) ++cnt; if ( P.second ) ++cnt; } libmaus2::aio::FileRemoval::removeFile("tmpfile"); double const lela = rtc.getElapsedSeconds(); std::cerr << "[D] " << "cnt=" << cnt << " ela=" << lela << " rate=" << cnt/lela << std::endl; drate += cnt/lela; } srate /= runs; drate /= runs; std::cerr << "[Q] " << srate/drate << std::endl; } } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
int bamchecksort(libmaus2::util::ArgInfo const & arginfo) { int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); libmaus2::bambam::BamDecoder bamdec(std::cin); libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment(); libmaus2::bambam::BamHeader const & header = bamdec.getHeader(); std::string const sortorder = libmaus2::bambam::BamHeader::getSortOrderStatic(header.text); libmaus2::bambam::BamAlignment prevalgn; if ( bamdec.readAlignment() ) { prevalgn.swap(algn); if ( sortorder == "coordinate" ) { uint64_t c = 0; while ( bamdec.readAlignment() ) { bool const ok = (static_cast<uint32_t>( algn.getRefID()) > static_cast<uint32_t>(prevalgn.getRefID()) ) || ( (static_cast<uint32_t>( algn.getRefID()) == static_cast<uint32_t>(prevalgn.getRefID()) ) && (static_cast<uint32_t>( algn.getPos()) >= static_cast<uint32_t>(prevalgn.getPos()) ) ); if ( ! ok ) { libmaus2::exception::LibMausException se; se.getStream() << "Broken order:"; se.getStream() << prevalgn.formatAlignment(header) << std::endl; se.getStream() << algn.formatAlignment(header) << std::endl; se.finish(); throw se; } prevalgn.swap(algn); if ( verbose && ( ((++c) & ((1ull<<20)-1)) == 0 ) ) std::cerr << "[V] " << c << std::endl; } if ( verbose ) std::cerr << "[V] " << c << std::endl; std::cerr << "Alignments sorted by coordinate." << std::endl; } else if ( sortorder == "queryname" ) { uint64_t c = 0; while ( bamdec.readAlignment() ) { // bool const ok = libmaus2::bambam::BamAlignmentNameComparator::compareInt(prevalgn,algn) <= 0; bool const ok = !libmaus2::bambam::BamAlignmentNameComparator::compare(algn,prevalgn); if ( ! ok ) { libmaus2::exception::LibMausException se; se.getStream() << "Broken order:"; se.getStream() << prevalgn.formatAlignment(header) << std::endl; se.getStream() << algn.formatAlignment(header) << std::endl; se.getStream() << libmaus2::bambam::BamAlignmentNameComparator::compareInt(prevalgn,algn) << std::endl; se.finish(); throw se; } prevalgn.swap(algn); if ( verbose && ( ((++c) & ((1ull<<20)-1)) == 0 ) ) std::cerr << "[V] " << c << std::endl; } if ( verbose ) std::cerr << "[V] " << c << std::endl; std::cerr << "Alignments sorted by query name." << std::endl; } else { std::cerr << "[V] not checking order for \"" << sortorder << "\"" << std::endl; } } return EXIT_SUCCESS; }
void testInplaceReverseComplement() { ::libmaus2::bambam::BamHeader header; header.addChromosome("chr1",2000); std::ostringstream ostr; { ::libmaus2::bambam::BamWriter bamwriter(ostr,header); bamwriter.encodeAlignment("name",0,1000,30, ::libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FUNMAP, "", 0, 1500, -1, "ACGTATGCA", "HGHGHGHGH" ); bamwriter.commit(); } std::istringstream istr(ostr.str()); libmaus2::bambam::BamDecoder bamdec(istr); while ( bamdec.readAlignment() ) { libmaus2::bambam::BamAlignment & algn = bamdec.getAlignment(); for ( unsigned int k = 0; k <= 10; ++k ) { for ( uint64_t i = 0; i < (1ull << (2*k)); ++i ) { std::string s(k,'A'); uint64_t t = i; for ( uint64_t j = 0; j < k; ++j, t>>=2 ) { switch ( t & 0x3 ) { case 0: s[j] = 'A'; break; case 1: s[j] = 'C'; break; case 2: s[j] = 'G'; break; case 3: s[j] = 'T'; break; } } std::reverse(s.begin(),s.end()); algn.replaceSequence(s,std::string(k,'H')); libmaus2::bambam::BamAlignment::unique_ptr_type ualgn(algn.uclone()); // std::cout << algn.formatAlignment(header) << std::endl; ualgn->reverseComplementInplace(); // std::cout << ualgn->formatAlignment(header) << std::endl; assert ( algn.getReadRC() == ualgn->getRead() ); } std::cerr << "k=" << k << std::endl; } } }