static ::libmaus::aio::CheckedInputStream::unique_ptr_type openFileAtPosition(std::string const & filename, uint64_t const pos) { ::libmaus::aio::CheckedInputStream::unique_ptr_type istr(new ::libmaus::aio::CheckedInputStream(filename)); istr->seekg(pos,std::ios::beg); return UNIQUE_PTR_MOVE(istr); }
int bamclipreinsert(::libmaus2::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing to read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } if ( isatty(STDOUT_FILENO) ) { ::libmaus2::exception::LibMausException se; se.getStream() << "Refusing write binary data to terminal, please redirect standard output to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus2::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); ::libmaus2::bambam::BamDecoder dec(std::cin,false); ::libmaus2::bambam::BamHeader const & header = dec.getHeader(); std::string const headertext(header.text); // add PG line to header std::string const upheadtext = ::libmaus2::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bamclipreinsert", // ID "bamclipreinsert", // PN arginfo.commandline, // CL ::libmaus2::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header libmaus2::bambam::BamHeader const uphead(upheadtext); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus2::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus2::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus2::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus2::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus2::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus2::bambam::BamWriter::unique_ptr_type writer(new ::libmaus2::bambam::BamWriter(std::cout,uphead,level,Pcbs)); libmaus2::bambam::BamAuxFilterVector bafv; // bafv.set('z','z'); // std::vector<uint8_t> R(8); // std::string const zz("zz"); libmaus2::bambam::BamAlignment & algn = dec.getAlignment(); uint64_t c = 0; libmaus2::autoarray::AutoArray < std::pair<uint8_t,uint8_t> > auxtags; libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop; std::stack < libmaus2::bambam::cigar_operation > hardstack; libmaus2::bambam::BamAlignment::D_array_type Tcigar; libmaus2::bambam::BamAuxFilterVector auxfilterout; auxfilterout.set('q','s'); auxfilterout.set('q','q'); while ( dec.readAlignment() ) { // reinsert clipped parts and attach soft clipping cigar operations as needed clipReinsert(algn,auxtags,bafv,cigop,Tcigar,hardstack,auxfilterout); algn.serialise(writer->getStream()); ++c; if ( verbose && (c & (1024*1024-1)) == 0 ) std::cerr << "[V] " << c/(1024*1024) << std::endl; } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
int bamfixmatecoordinates(::libmaus::util::ArgInfo const & arginfo) { ::libmaus::util::TempFileRemovalContainer::setup(); ::libmaus::timing::RealTimeClock rtc; rtc.start(); bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose()); unsigned int const colhashbits = arginfo.getValue<unsigned int>("colhashbits",getDefaultColHashBits()); unsigned int const collistsize = arginfo.getValue<unsigned int>("collistsize",getDefaultColListSize()); int const level = arginfo.getValue<int>("level",getDefaultLevel()); std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); switch ( level ) { case Z_NO_COMPRESSION: case Z_BEST_SPEED: case Z_BEST_COMPRESSION: case Z_DEFAULT_COMPRESSION: break; default: { ::libmaus::exception::LibMausException se; se.getStream() << "Unknown compression level, please use" << " level=" << Z_DEFAULT_COMPRESSION << " (default) or" << " level=" << Z_BEST_SPEED << " (fast) or" << " level=" << Z_BEST_COMPRESSION << " (best) or" << " level=" << Z_NO_COMPRESSION << " (no compression)" << std::endl; se.finish(); throw se; } break; } std::string const tmpfilename = tmpfilenamebase + "_bamcollate"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfilename); ::libmaus::bambam::CollatingBamDecoder CBD(std::cin,tmpfilename,false /* put rank */,colhashbits/*hash bits*/,collistsize/*size of output list*/); ::libmaus::bambam::BamFormatAuxiliary auxdata; ::libmaus::bambam::BamHeader const & bamheader = CBD.getHeader(); // add PG line to header std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine( bamheader.text, "bamfixmatecoordinates", // ID "bamfixmatecoordinates", // PN arginfo.commandline, // CL ::libmaus::bambam::ProgramHeaderLineSet(bamheader.text).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header ::libmaus::bambam::BamHeader uphead(upheadtext); if ( uphead.getSortOrder() != "queryname" ) uphead.changeSortOrder("unknown"); /* * start index/md5 callbacks */ std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ // setup bam writer ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,uphead,level,Pcbs)); #if 0 ::libmaus::bambam::ProgramHeaderLineSet PHLS(bamheader.text); std::cerr << "Last id in PG chain: " << PHLS.getLastIdInChain() << std::endl; #endif // std::cout << bamheader.text; typedef ::libmaus::bambam::CollatingBamDecoder::alignment_ptr_type alignment_ptr_type; std::pair<alignment_ptr_type,alignment_ptr_type> P; uint64_t const mod = 1024*1024; uint64_t proc = 0; uint64_t lastproc = 0; uint64_t paircnt = 0; while ( CBD.tryPair(P) ) { uint64_t const mapcnt = getMapCnt(P.first) + getMapCnt(P.second); if ( mapcnt == 1 ) { int32_t refid = -1; int32_t pos = -1; if ( P.first ) { refid = P.first->getRefID(); pos = P.first->getPos(); } else { assert ( P.second ); refid = P.second->getRefID(); pos = P.second->getPos(); } P.first->putRefId(refid); P.first->putPos(pos); P.first->putNextRefId(refid); P.first->putNextPos(pos); P.second->putRefId(refid); P.second->putPos(pos); P.second->putNextRefId(refid); P.second->putNextPos(pos); } if ( P.first ) { P.first->serialise(writer->getStream()); ++proc; } if ( P.second ) { P.second->serialise(writer->getStream()); ++proc; } if ( P.first && P.second ) { paircnt++; } if ( verbose && (proc/mod != lastproc/mod) ) { std::cerr << "Processed " << proc << " fragments, " << paircnt << " pairs, " << proc/rtc.getElapsedSeconds() << " al/s" << std::endl; lastproc = proc; } } if ( verbose ) std::cerr << "Processed " << proc << " fragments, " << paircnt << " pairs, " << proc/rtc.getElapsedSeconds() << " al/s" << std::endl; writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
void init() { rgfilter.set("RG"); pgfilter.set("PG"); std::vector < std::vector<libmaus2::bambam::Chromosome> const * > V; std::vector < std::vector<libmaus2::bambam::ReadGroup> const * > R; std::vector< std::string const * > H; for ( uint64_t i = 0; i < inputbamheaders.size(); ++i ) { libmaus2::bambam::BamHeader const & header = *inputbamheaders[i]; V.push_back( & (header.getChromosomes()) ); R.push_back( & (header.getReadGroups()) ); H.push_back( & (header.text) ); std::string const SO = libmaus2::bambam::BamHeader::getSortOrderStatic(header.text); orderedCoordinates = orderedCoordinates && (SO == "coordinate"); orderedNames = orderedNames && (SO == "queryname"); } libmaus2::bambam::ChromosomeVectorMerge::unique_ptr_type tchromosomeMergeInfo(new libmaus2::bambam::ChromosomeVectorMerge(V)); chromosomeMergeInfo = UNIQUE_PTR_MOVE(tchromosomeMergeInfo); libmaus2::bambam::ReadGroupVectorMerge::unique_ptr_type treadGroupMergeInfo(new libmaus2::bambam::ReadGroupVectorMerge(R)); readGroupMergeInfo = UNIQUE_PTR_MOVE(treadGroupMergeInfo); libmaus2::bambam::ProgramHeaderLinesMerge::unique_ptr_type tprogramHeaderLinesMergeInfo(new libmaus2::bambam::ProgramHeaderLinesMerge(H)); programHeaderLinesMergeInfo = UNIQUE_PTR_MOVE(tprogramHeaderLinesMergeInfo); // get HD line fields std::vector < std::pair<std::string,std::string> > VHDP; for ( uint64_t i = 0; i < inputbamheaders.size(); ++i ) { std::vector<libmaus2::bambam::HeaderLine> VHD = libmaus2::bambam::HeaderLine::extractLinesByType(inputbamheaders[i]->text,"HD"); if ( VHD.size() ) { libmaus2::bambam::HeaderLine const & H = VHD.front(); for ( std::map<std::string,std::string>::const_iterator ita = H.M.begin(); ita != H.M.end(); ++ita ) VHDP.push_back(*ita); } } // sort by tag std::sort(VHDP.begin(),VHDP.end()); // extract consistent tags present in all HD lines std::map<std::string,std::string> MHD; uint64_t l = 0; while ( l < VHDP.size() ) { uint64_t h = l+1; while ( h < VHDP.size() && VHDP[l].first == VHDP[h].first ) ++h; // we have the right number if ( h-l == inputbamheaders.size() ) { // check for consistent value bool eq = true; for ( uint64_t i = l+1; i < h; ++i ) eq = eq && (VHDP[i].second == VHDP[l].second); if ( eq ) MHD[VHDP[l].first] = VHDP[l].second; } l = h; } std::string const VN = (MHD.find("VN") != MHD.end()) ? MHD.find("VN")->second : "1.5"; std::ostringstream headertextstr; headertextstr << "@HD\tVN:" << VN; if ( inputbamheaders.size() == 1 ) headertextstr << "\tSO:" << libmaus2::bambam::BamHeader::getSortOrderStatic(inputbamheaders[0]->text); else headertextstr << "\tSO:unknown"; for ( std::map<std::string,std::string>::const_iterator ita = MHD.begin(); ita != MHD.end(); ++ita ) { std::string const & key = ita->first; if ( key != "VN" && key != "SO" ) headertextstr << "\t" << key << ":" << ita->second; } headertextstr << "\n"; for ( uint64_t i = 0; i < chromosomeMergeInfo->chromosomes.size(); ++i ) headertextstr << chromosomeMergeInfo->chromosomes[i].createLine() << "\n"; for ( uint64_t i = 0; i < readGroupMergeInfo->readgroups.size(); ++i ) headertextstr << readGroupMergeInfo->readgroups[i].createLine() << "\n"; headertextstr << programHeaderLinesMergeInfo->PGtext; std::vector<std::string> otherlines; for ( uint64_t i = 0; i < inputbamheaders.size(); ++i ) { std::vector<libmaus2::bambam::HeaderLine> lines = libmaus2::bambam::HeaderLine::extractLines(inputbamheaders[i]->text); for ( uint64_t j = 0; j < lines.size(); ++j ) { libmaus2::bambam::HeaderLine const & line = lines[j]; if ( line.type != "HD" && line.type != "SQ" && line.type != "RG" && line.type != "PG" ) { otherlines.push_back(line.line); } } } std::set<std::string> otherlinesseen; for ( uint64_t i = 0; i < otherlines.size(); ++i ) if ( otherlinesseen.find(otherlines[i]) == otherlinesseen.end() ) { headertextstr << otherlines[i] << std::endl; otherlinesseen.insert(otherlines[i]); } // std::cerr << std::string(80,'-') << std::endl; std::string const headertext = headertextstr.str(); ::libmaus2::bambam::BamHeader::unique_ptr_type tbamheader(new ::libmaus2::bambam::BamHeader(headertext)); bamheader = UNIQUE_PTR_MOVE(tbamheader); // std::cerr << "topologically sorted: " << chromosomeMergeInfo->topological << std::endl; // std::cerr << bamheader->text; }
PlainOrGzipStreamBufferWrapper(int const rfd, uint64_t const bufsize = 64*1024, uint64_t const pushbacksize = 64*1024) { libmaus::aio::PosixFdInputStream::unique_ptr_type TPFIS(new libmaus::aio::PosixFdInputStream(rfd,bufsize,pushbacksize)); PFIS = UNIQUE_PTR_MOVE(TPFIS); init(*PFIS,bufsize,pushbacksize); }
static void filterBamUsedSequences( libmaus::util::ArgInfo const & arginfo, std::istream & in, ::libmaus::bitio::IndexedBitVector const & IBV, std::ostream & out ) { libmaus::lz::BgzfInflateStream bgzfin(in); libmaus::bambam::BamHeaderLowMem::unique_ptr_type PBHLM ( libmaus::bambam::BamHeaderLowMem::constructFromBAM(bgzfin)); bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose()); std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; std::string md5filename; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); libmaus::lz::BgzfDeflate<std::ostream>::unique_ptr_type Pbgzfout( new libmaus::lz::BgzfDeflate<std::ostream>( out,level ) ); libmaus::lz::BgzfDeflate<std::ostream> & bgzfout = *Pbgzfout; if ( verbose ) std::cerr << "[V] writing filtered header..."; PBHLM->serialiseSequenceSubset(bgzfout,IBV,"bamfilterheader2" /* id */,"bamfilterheader2" /* pn */, arginfo.commandline /* pgCL */, PACKAGE_VERSION /* pgVN */ ); if ( verbose ) std::cerr << "done." << std::endl; ::libmaus::bambam::BamAlignment algn; uint64_t c = 0; while ( libmaus::bambam::BamAlignmentDecoder::readAlignmentGz(bgzfin,algn) ) { if ( algn.isMapped() ) { int64_t const refid = algn.getRefID(); assert ( refid >= 0 ); assert ( IBV.get(refid) ); algn.putRefId(IBV.rank1(refid)-1); } else { algn.putRefId(-1); } if ( algn.isPaired() && algn.isMapped() ) { int64_t const refid = algn.getNextRefID(); assert ( refid >= 0 ); assert ( IBV.get(refid) ); algn.putNextRefId(IBV.rank1(refid)-1); } else { algn.putNextRefId(-1); } algn.serialise(bgzfout); if ( verbose && ( ((++c) & (1024*1024-1)) == 0 ) ) std::cerr << "[V] " << c/(1024*1024) << std::endl; } bgzfout.flush(); bgzfout.addEOFBlock(); Pbgzfout.reset(); if ( Pmd5cb ) Pmd5cb->saveDigestAsFile(md5filename); }
RMMTree(base_layer_type const & rB, uint64_t const rn) : B(rB), n(rn), numlevels(computeNumLevels(n)), I(numlevels), C(numlevels), S(numlevels+1) { uint64_t in = n; unsigned int level = 0; while ( in > 1 ) { uint64_t const out = (in+k-1) >> klog; // minimal indices for next level libmaus::bitio::CompactArray::unique_ptr_type tIlevel( new libmaus::bitio::CompactArray(out,klog)); I[level] = UNIQUE_PTR_MOVE(tIlevel); libmaus::util::Histogram::unique_ptr_type subhist; if ( level == 0 ) { libmaus::util::Histogram::unique_ptr_type tsubhist(fillSubHistogram(B.begin(),in)); subhist = UNIQUE_PTR_MOVE(tsubhist); } else { libmaus::util::Histogram::unique_ptr_type tsubhist(fillSubHistogram(C[level-1]->begin(),in)); subhist = UNIQUE_PTR_MOVE(tsubhist); } C_type::generator_type impgen(*subhist); if ( level == 0 ) fillSubArrays(B.begin(),in,*(I[level]),impgen); else fillSubArrays(C[level-1]->begin(),in,*(I[level]),impgen); C_ptr_type tClevel(impgen.createFinal()); C[level] = UNIQUE_PTR_MOVE(tClevel); in = out; ++level; } S[0] = n; for ( uint64_t i = 0; i < numlevels; ++i ) S[i+1] = I[i]->size(); if ( rmmtreedebug ) for ( uint64_t kk = k, level = 0; kk < n; kk *= k, ++level ) { uint64_t low = 0; uint64_t z = 0; while ( low < n ) { uint64_t const high = std::min(low+kk,n); uint64_t minv = B[low]; uint64_t mini = low; for ( uint64_t i = low+1; i < high; ++i ) if ( B[i] < minv ) { minv = B[i]; mini = i; } assert ( (*C[level])[z] == minv ); assert ( (*I[level])[z] == ((mini-low)*k)/kk ); ++z; low = high; } } }
void bamalignfrac(::libmaus2::util::ArgInfo const & arginfo) { libmaus2::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper( libmaus2::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo)); ::libmaus2::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder()); ::libmaus2::bambam::BamAlignmentDecoder & dec = *ppdec; ::libmaus2::bambam::BamAlignment const & algn = dec.getAlignment(); libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> cigop; uint64_t basealgn = 0; uint64_t clip = 0; uint64_t totalbases = 0; #if defined(LIBMAUS2_HAVE_REGEX_H) std::string const regexs = arginfo.getUnparsedValue("name",""); libmaus2::util::unique_ptr<libmaus2::regex::PosixRegex>::type regex_ptr; if ( regexs.size() ) { libmaus2::util::unique_ptr<libmaus2::regex::PosixRegex>::type tregex_ptr(new libmaus2::regex::PosixRegex(regexs)); regex_ptr = UNIQUE_PTR_MOVE(tregex_ptr); } #endif while ( dec.readAlignment() ) { if ( algn.isMapped() #if defined(LIBMAUS2_HAVE_REGEX_H) && ( (!regex_ptr) || (regex_ptr->findFirstMatch(algn.getName()) != -1) ) #endif ) { uint32_t const numcig = algn.getCigarOperations(cigop); totalbases += algn.getLseq(); for ( uint64_t i = 0; i < numcig; ++i ) { switch ( cigop[i].first ) { case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CMATCH: case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CINS: case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CEQUAL: case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDIFF: basealgn += cigop[i].second; break; case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CSOFT_CLIP: clip += cigop[i].second; break; case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CHARD_CLIP: totalbases += cigop[i].second; clip += cigop[i].second; break; case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDEL: case libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CREF_SKIP: break; } } } } std::cerr << "total bases in mapped reads\t" << totalbases << std::endl; std::cerr << "clipped (hard and soft) bases in mapped reads\t" << clip << std::endl; std::cerr << "aligned bases in mapped reads\t" << basealgn << std::endl; }
unique_ptr_type extendEmpty() const { unique_ptr_type O(new this_type(slog+1)); return UNIQUE_PTR_MOVE(O); }
static unique_ptr_type construct(std::istream & textstr) { return UNIQUE_PTR_MOVE(unique_ptr_type(new this_type(textstr))); }
CompactFastQContainer(std::istream & textstr) : T(textstr), dict(new ::libmaus::fastx::CompactFastQContainerDictionary(textstr)), H(), C() { GetObject G(T.begin()); H = UNIQUE_PTR_MOVE(::libmaus::fastx::CompactFastQHeader::unique_ptr_type(new ::libmaus::fastx::CompactFastQHeader(G))); }
int bamsplitmod(libmaus::util::ArgInfo const & arginfo) { if ( isatty(STDIN_FILENO) ) { ::libmaus::exception::LibMausException se; se.getStream() << "Refusing read binary data from terminal, please redirect standard input to pipe or file." << std::endl; se.finish(); throw se; } int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); int const verbose = arginfo.getValue<int>("verbose",getDefaultVerbose()); uint64_t const div = arginfo.getValue<int>("div",getDefaultDiv()); std::string const prefix = arginfo.getUnparsedValue("prefix",getDefaultFilePrefix(arginfo)); if ( ! div ) { ::libmaus::exception::LibMausException se; se.getStream() << "div cannot be 0." << std::endl; se.finish(); throw se; } libmaus::bambam::BamDecoder bamdec(std::cin); libmaus::bambam::BamAlignment const & algn = bamdec.getAlignment(); libmaus::bambam::BamHeader const & header = bamdec.getHeader(); ::libmaus::bambam::BamHeader::unique_ptr_type uphead(updateHeader(arginfo,header)); libmaus::autoarray::AutoArray<libmaus::aio::CheckedOutputStream::unique_ptr_type> COS(div); libmaus::autoarray::AutoArray<libmaus::bambam::BamWriter::unique_ptr_type> writers(div); std::vector < std::string > filenames; for ( uint64_t i = 0; i < div; ++i ) { std::ostringstream ostr; ostr << prefix << "_" << std::setw(6) << std::setfill('0') << i << std::setw(0) << ".bam"; libmaus::aio::CheckedOutputStream::unique_ptr_type tCOS(new libmaus::aio::CheckedOutputStream(ostr.str())); COS[i] = UNIQUE_PTR_MOVE(tCOS); libmaus::bambam::BamWriter::unique_ptr_type twriter(new libmaus::bambam::BamWriter(*COS[i],*uphead,level)); writers[i] = UNIQUE_PTR_MOVE(twriter); } uint64_t c = 0; if ( verbose ) { while ( bamdec.readAlignment() ) { algn.serialise ( writers [ (c++) % div ] -> getStream() ); if ( ((c) & ((1ull<<20)-1)) == 0 ) std::cerr << "[V] " << c << std::endl; } std::cerr << "[V] " << c << std::endl; } else { while ( bamdec.readAlignment() ) algn.serialise ( writers [ (c++) % div ] -> getStream() ); } for ( uint64_t i = 0; i < div; ++i ) { writers[i].reset(); COS[i]->flush(); COS[i].reset(); } return EXIT_SUCCESS; }
static unique_ptr_type load(std::string const & filename) { libmaus::aio::CheckedInputStream CIS(filename); unique_ptr_type tptr(new this_type(CIS)); return UNIQUE_PTR_MOVE(tptr); }
FastABgzfDecoder::unique_ptr_type getStream(std::string const & filename, uint64_t const id) const { FastABgzfDecoder::unique_ptr_type Tptr(new FastABgzfDecoder(filename,(*this)[id],blocksize)); return UNIQUE_PTR_MOVE(Tptr); }
libmaus2::util::LogPipeMultiplexGeneric::LogPipeMultiplexGeneric( std::string const & serverhostname, unsigned short port, std::string const & sid, uint64_t const id ) : pid(-1) { // reset stdoutpipe[0] = stdoutpipe[1] = -1; stderrpipe[0] = stderrpipe[1] = -1; // connect ::libmaus2::network::ClientSocket::unique_ptr_type tsock( new ::libmaus2::network::ClientSocket( port,serverhostname.c_str() ) ); sock = UNIQUE_PTR_MOVE(tsock); // no delay on socket sock->setNoDelay(); // write session id sock->writeString(0,sid); // id sock->writeSingle<uint64_t>(id); // connection type sock->writeString("log"); // create pipe for standard out if ( pipe(&stdoutpipe[0]) != 0 ) { closeFds(); ::libmaus2::exception::LibMausException se; se.getStream() << "pipe() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } //create pipe for standard error if ( pipe(&stderrpipe[0]) != 0 ) { closeFds(); ::libmaus2::exception::LibMausException se; se.getStream() << "pipe() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } // close previous standard output if ( close(STDOUT_FILENO) != 0 ) { closeFds(); ::libmaus2::exception::LibMausException se; se.getStream() << "close() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( close(STDERR_FILENO) != 0 ) { closeFds(); ::libmaus2::exception::LibMausException se; se.getStream() << "close() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( dup2(stdoutpipe[1],STDOUT_FILENO) == -1 ) { closeFds(); ::libmaus2::exception::LibMausException se; se.getStream() << "dup2() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( dup2(stderrpipe[1],STDERR_FILENO) == -1 ) { closeFds(); ::libmaus2::exception::LibMausException se; se.getStream() << "dup2() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } pid = fork(); if ( pid < 0 ) { closeFds(); ::libmaus2::exception::LibMausException se; se.getStream() << "fork() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } else if ( pid == 0 ) { // close write end close(stdoutpipe[1]); stdoutpipe[1] = -1; close(stderrpipe[1]); stderrpipe[1] = -1; // close copies close(STDOUT_FILENO); close(STDERR_FILENO); bool running = true; try { while ( running ) { running = false; fd_set fds; int maxfd = -1; FD_ZERO(&fds); if ( stdoutpipe[0] != -1 ) { FD_SET(stdoutpipe[0],&fds); maxfd = std::max(maxfd,stdoutpipe[0]); } if ( stderrpipe[0] != -1 ) { FD_SET(stderrpipe[0],&fds); maxfd = std::max(maxfd,stderrpipe[0]); } running = (maxfd != -1); if ( running ) { int r = ::select(maxfd+1,&fds,0,0,0); try { if ( r > 0 ) { if ( (stdoutpipe[0] != -1) && FD_ISSET(stdoutpipe[0],&fds) ) { ::libmaus2::autoarray::AutoArray<char> B(1024,false); ssize_t red = read(stdoutpipe[0],B.get(),B.size()); if ( red <= 0 ) { std::ostringstream errstream; errstream << "Failed to read from stdout pipe: " << strerror(errno) << std::endl; std::string errstring = errstream.str(); close(stdoutpipe[0]); stdoutpipe[0] = -1; sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size()); sock->readSingle<uint64_t>(); } else { sock->writeMessage<char>(STDOUT_FILENO,B.get(),red); sock->readSingle<uint64_t>(); } } if ( stderrpipe[0] != -1 && FD_ISSET(stderrpipe[0],&fds) ) { ::libmaus2::autoarray::AutoArray<char> B(1024,false); ssize_t red = read(stderrpipe[0],B.get(),B.size()); if ( red <= 0 ) { std::ostringstream errstream; errstream << "Failed to read from stderr pipe: " << strerror(errno) << std::endl; std::string errstring = errstream.str(); close(stderrpipe[0]); stderrpipe[0] = -1; sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size()); sock->readSingle<uint64_t>(); } else { sock->writeMessage<char>(STDERR_FILENO,B.get(),red); sock->readSingle<uint64_t>(); } } } } catch(std::exception const & ex) { } } } } catch(std::exception const & ex) { std::cerr << "LogPipeMultiplexGeneric " << ex.what() << std::endl; } catch(...) { std::cerr << "LogPipeMultiplexGeneric caught unknown exception." << std::endl; } try { std::ostringstream quitmsgstr; quitmsgstr << "\nLog process for id " << id << " is terminating." << std::endl; std::string const quitmsg = quitmsgstr.str(); sock->writeMessage<char>(std::max(STDOUT_FILENO,STDERR_FILENO)+1,quitmsg.c_str(),quitmsg.size()); sock->readSingle<uint64_t>(); } catch(...) { } _exit(0); } else { // close read ends close(stdoutpipe[0]); stdoutpipe[0] = -1; close(stderrpipe[0]); stderrpipe[0] = -1; } }
::libmaus2::autoarray::AutoArray<uint64_t> toWaveletTreeBitsParallel( ::libmaus2::bitio::CompactArray * C, bool const verbose, uint64_t const #if defined(_OPENMP) numthreads #endif ) { uint64_t const pn = ((C->n + 63) / 64)*64; ::libmaus2::autoarray::AutoArray<uint64_t> B( pn/64 , false ); ::libmaus2::parallel::OMPLock block; typedef std::pair<uint64_t, uint64_t> qtype; std::deque < qtype > Q; Q.push_back( qtype(0,C->n) ); if ( verbose ) std::cerr << "(Sorting bits..."; for ( int ib = (C->getB())-1; ib>=0; --ib ) { std::deque < qtype > Q2; uint64_t const sb = (C->getB()-ib-1); uint64_t const mask = (1ull << ib); if ( verbose ) std::cerr << "(l=" << ib << ")"; ::libmaus2::bitio::CompactSparseArray S(C->D,C->n, C->getB() - sb , sb , C->getB()); while ( Q.size() ) { uint64_t l = Q.front().first, r = Q.front().second; Q.pop_front(); // std::cerr << "[" << l << "," << r << "]" << std::endl; uint64_t const numpackets = getMaxThreads() * 2; ::libmaus2::autoarray::AutoArray < uint64_t > aones(numpackets+1); ::libmaus2::autoarray::AutoArray < uint64_t > azeroes(numpackets+1); uint64_t const intervalsize = r-l; uint64_t const packetsize = ( intervalsize + numpackets - 1 ) / numpackets; if ( verbose ) std::cerr << "(c01/b"; #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t ones = 0; uint64_t low = std::min ( l + h * packetsize, r ); uint64_t const rlow = low; uint64_t const high = std::min ( low + packetsize, r ); uint64_t const low64 = std::min ( ((low+63)/64)*64, high ); uint64_t const high64 = high & (~(63ull)); // std::cerr << "low=" << low << " low64=" << low64 << std::endl; /** * align low to 64 **/ block.lock(); for ( ; low != low64 ; ++low ) { uint64_t const v = (C->get(low)&mask)>>ib; ones += v; ::libmaus2::bitio::putBit(B.get(), low, v); } block.unlock(); /** * handle full blocks of 64 values **/ if ( low != high ) { assert ( low % 64 == 0 ); assert ( high64 >= low ); uint64_t * Bptr = B.get() + (low/64); while ( low != high64 ) { uint64_t vb = 0; uint64_t const lh = low+64; for ( ; low != lh ; ++low ) { uint64_t const v = (C->get(low)&mask)>>ib; ones += v; vb <<= 1; vb |= v; } (*Bptr++) = vb; } } /** * handle rest **/ block.lock(); for ( ; (low != high) ; ++low ) { uint64_t const v = (C->get(low)&mask)>>ib; ones += v; ::libmaus2::bitio::putBit(B.get(), low, v); } block.unlock(); uint64_t const zeroes = (high-rlow)-ones; aones [ h ] = ones; azeroes [ h ] = zeroes; } if ( verbose ) std::cerr << ")"; /** * compute prefix sums for zeroes and ones **/ { uint64_t c = 0; for ( uint64_t i = 0; i < numpackets + 1; ++i ) { uint64_t const t = aones[i]; aones[i] = c; c += t; } } { uint64_t c = 0; for ( uint64_t i = 0; i < numpackets + 1; ++i ) { uint64_t const t = azeroes[i]; azeroes[i] = c; c += t; } } uint64_t const ones = aones[numpackets]; uint64_t const zeros = (r-l)-ones; ::libmaus2::autoarray::AutoArray < ::libmaus2::bitio::CompactArray::unique_ptr_type > ACZ(numpackets); ::libmaus2::autoarray::AutoArray < ::libmaus2::bitio::CompactArray::unique_ptr_type > ACO(numpackets); if ( verbose ) std::cerr << "(a"; for ( uint64_t h = 0; h < numpackets; ++h ) { ::libmaus2::bitio::CompactArray::unique_ptr_type tACZ( new ::libmaus2::bitio::CompactArray( azeroes [ h+1 ] - azeroes[ h ], C->getB() - sb ) ); ACZ[h] = UNIQUE_PTR_MOVE(tACZ); ::libmaus2::bitio::CompactArray::unique_ptr_type tACO( new ::libmaus2::bitio::CompactArray( aones [ h+1 ] - aones[ h ], C->getB() - sb ) ); ACO[h] = UNIQUE_PTR_MOVE(tACO); } if ( verbose ) std::cerr << ")"; if ( verbose ) std::cerr << "(d"; #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = std::min ( l + h * packetsize, r ); uint64_t const high = std::min ( low + packetsize, r ); uint64_t zp = 0; uint64_t op = 0; ::libmaus2::bitio::CompactArray & CO = *ACO[h]; ::libmaus2::bitio::CompactArray & CZ = *ACZ[h]; for ( uint64_t i = low; i != high; ++i ) { uint64_t const v = S.get(i); if ( v & mask ) CO.set ( op++, v); else CZ.set ( zp++, v); } assert ( zp == azeroes[h+1]-azeroes[h] ); assert ( op == aones[h+1]-aones[h] ); } if ( verbose ) std::cerr << ")"; std::vector < CopyBackPacket > zpacketstodo; for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = l + azeroes[h]; uint64_t const high = low + (azeroes[h+1]-azeroes[h]); if ( high-low ) zpacketstodo.push_back ( CopyBackPacket(h,low,high) ); } std::vector < CopyBackPacket > opacketstodo; for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = l + azeroes[numpackets ] + aones[h]; uint64_t const high = low + (aones[h+1]-aones[h]); if ( high-low ) opacketstodo.push_back ( CopyBackPacket(h,low,high) ); } std::vector < std::vector < CopyBackPacket > > zpackets; while ( zpacketstodo.size() ) { std::vector < CopyBackPacket > zpacketsnewtodo; std::vector < CopyBackPacket > nlist; nlist.push_back(zpacketstodo.front()); for ( uint64_t i = 1; i < zpacketstodo.size(); ++i ) if ( CopyBackPacket::overlap(nlist.back(), zpacketstodo[i], C->getB()) ) zpacketsnewtodo.push_back(zpacketstodo[i]); else nlist.push_back(zpacketstodo[i]); zpackets.push_back(nlist); zpacketstodo = zpacketsnewtodo; } std::vector < std::vector < CopyBackPacket > > opackets; while ( opacketstodo.size() ) { std::vector < CopyBackPacket > opacketsnewtodo; std::vector < CopyBackPacket > nlist; nlist.push_back(opacketstodo.front()); for ( uint64_t i = 1; i < opacketstodo.size(); ++i ) if ( CopyBackPacket::overlap(nlist.back(), opacketstodo[i], C->getB()) ) opacketsnewtodo.push_back(opacketstodo[i]); else nlist.push_back(opacketstodo[i]); opackets.push_back(nlist); opacketstodo = opacketsnewtodo; } // std::cerr << "zpackets: " << zpackets.size() << " opackets: " << opackets.size() << std::endl; if ( verbose ) std::cerr << "(cb"; for ( uint64_t q = 0; q < zpackets.size(); ++q ) #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t j = 0; j < static_cast<int64_t>(zpackets[q].size()); ++j ) { CopyBackPacket const CBP = zpackets[q][j]; uint64_t ac = CBP.low; ::libmaus2::bitio::CompactArray & CZ = *ACZ[CBP.h]; for ( uint64_t zc = 0 ; zc != CBP.high-CBP.low; ++zc ) S.set ( ac++ , CZ.get(zc) ); } for ( uint64_t q = 0; q < opackets.size(); ++q ) #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t j = 0; j < static_cast<int64_t>(opackets[q].size()); ++j ) { CopyBackPacket const CBP = opackets[q][j]; uint64_t ac = CBP.low; ::libmaus2::bitio::CompactArray & CO = *ACO[CBP.h]; for ( uint64_t oc = 0 ; oc != CBP.high-CBP.low; ++oc ) S.set ( ac++ , CO.get(oc) ); } if ( verbose ) std::cerr << ")"; if ( zeros ) Q2.push_back ( qtype(l,l+zeros) ); if ( ones ) Q2.push_back ( qtype(r-ones,r) ); } // std::cerr << std::endl; uint64_t const numpackets = getMaxThreads() * 2; uint64_t const intervalsize = C->n; uint64_t const packetsize = ( intervalsize + numpackets - 1 ) / numpackets; std::vector < CopyBackPacket > packetstodo; for ( int64_t h = 0; h < static_cast<int64_t>(numpackets); ++h ) { uint64_t const low = std::min(h*packetsize,C->n); uint64_t const high = std::min(low+packetsize,C->n); if ( high-low ) packetstodo.push_back ( CopyBackPacket(h,low,high) ); } std::vector < std::vector < CopyBackPacket > > packets; while ( packetstodo.size() ) { std::vector < CopyBackPacket > packetsnewtodo; std::vector < CopyBackPacket > nlist; nlist.push_back(packetstodo.front()); for ( uint64_t i = 1; i < packetstodo.size(); ++i ) if ( CopyBackPacket::overlap(nlist.back(), packetstodo[i], C->getB()) ) packetsnewtodo.push_back(packetstodo[i]); else nlist.push_back(packetstodo[i]); packets.push_back(nlist); packetstodo = packetsnewtodo; } for ( uint64_t q = 0; q < packets.size(); ++q ) #if defined(_OPENMP) #pragma omp parallel for schedule(dynamic,1) num_threads(numthreads) #endif for ( int64_t h = 0; h < static_cast<int64_t>(packets[q].size()); ++h ) { CopyBackPacket const CBP = packets[q][h]; for ( uint64_t i = CBP.low; i < CBP.high; ++i ) ::libmaus2::bitio::putBit ( C->D , i*C->getB() + sb , ::libmaus2::bitio::getBit(B.get(), i) ); } Q = Q2; }
unique_ptr_type uclone() const { unique_ptr_type ptr(new this_type(*this)); return UNIQUE_PTR_MOVE(ptr); }
int bamfixmatecoordinatesnamesorted(::libmaus::util::ArgInfo const & arginfo) { bool const verbose = arginfo.getValue<unsigned int>("verbose",getDefaultVerbose()); ::libmaus::timing::RealTimeClock rtc; rtc.start(); // gzip compression level for output int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); ::libmaus::bambam::BamDecoder bamfile(std::cin); std::string const headertext(bamfile.getHeader().text); // add PG line to header std::string const upheadtext = ::libmaus::bambam::ProgramHeaderLineSet::addProgramLine( headertext, "bamfixmatecoordinatesnamesorted", // ID "bamfixmatecoordinatesnamesorted", // PN arginfo.commandline, // CL ::libmaus::bambam::ProgramHeaderLineSet(headertext).getLastIdInChain(), // PP std::string(PACKAGE_VERSION) // VN ); // construct new header ::libmaus::bambam::BamHeader uphead(upheadtext); if ( uphead.getSortOrder() != "queryname" ) uphead.changeSortOrder("unknown"); std::string const & finalheadtext = uphead.text; ::libmaus::bambam::BamHeader finalheader(finalheadtext); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,finalheader,level,Pcbs)); std::pair< std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool> , std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool> > P(std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool>(::libmaus::bambam::BamAlignment::shared_ptr_type(),false),std::pair< ::libmaus::bambam::BamAlignment::shared_ptr_type, bool>(::libmaus::bambam::BamAlignment::shared_ptr_type(),false)); // try to read two alignments P.first.second = bamfile.readAlignment(); if ( P.first.second ) { P.first.first = bamfile.salignment(); P.second.second = P.first.second && bamfile.readAlignment(); P.second.first = bamfile.salignment(); } uint64_t single = 0, pairs = 0; uint64_t proc = 0; uint64_t lastproc = 0; uint64_t const mod = 1024*1024; // while we have two alignments while ( P.first.second && P.second.second ) { uint32_t const aflags = P.first.first->getFlags(); uint32_t const bflags = P.second.first->getFlags(); // same name? if ( (aflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPAIRED) && (bflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPAIRED) && (! strcmp(P.first.first->getName(),P.second.first->getName())) ) { unsigned int const amap = (aflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP) ? 0 : 1; unsigned int const bmap = (bflags & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP) ? 0 : 1; // std::cerr << "Pair " << bam1_qname(P.first.first->alignment) << " amap=" << amap << " bmap=" << bmap << std::endl; // if exactly one of the two is mapped if ( amap + bmap == 1 ) { ::libmaus::bambam::BamAlignment::shared_ptr_type mapped = amap ? P.first.first : P.second.first; int64_t const tid = mapped->getRefID(); int64_t const pos = mapped->getPos(); // std::cerr << "tid=" << tid << " pos=" << pos << std::endl; // set all tid and pos values P.first.first->putRefId(tid); P.first.first->putPos(pos); P.first.first->putNextRefId(tid); P.first.first->putNextPos(pos); P.second.first->putRefId(tid); P.second.first->putPos(pos); P.second.first->putNextRefId(tid); P.second.first->putNextPos(pos); } // write alignments P.first.first->serialise(writer->getStream()); P.second.first->serialise(writer->getStream()); // read new alignments P.first.second = bamfile.readAlignment(); if ( P.first.second ) { P.first.first = bamfile.salignment(); P.second.second = bamfile.readAlignment(); P.second.first = bamfile.salignment(); } pairs++; proc += 2; } // different names else { // write first alignment P.first.first->serialise(writer->getStream()); // move second to first std::swap(P.first,P.second); // read new second P.second.second = P.first.second && bamfile.readAlignment(); if ( P.second.second ) P.second.first = bamfile.salignment(); single++; proc += 1; } if ( verbose && (proc/mod != lastproc/mod) ) { std::cerr << proc << "\t" << single << "\t" << pairs << "\t" << proc/rtc.getElapsedSeconds() << "al/s" << std::endl; lastproc = proc; } } if ( P.first.second ) { P.first.first->serialise(writer->getStream()); single++; proc += 1; } if ( verbose ) std::cerr << proc << "\t" << single << "\t" << pairs << "\t" << proc/rtc.getElapsedSeconds() << "al/s" << std::endl; assert ( ! P.second.second ); writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
libmaus2::util::NegativeDifferenceArray32::NegativeDifferenceArray32(::libmaus2::util::Array832::unique_ptr_type & rA) : A(UNIQUE_PTR_MOVE(rA)) { }
libmaus::util::LogPipeMultiplex::LogPipeMultiplex( std::string const & serverhostname, unsigned short port, std::string const & sid ) : pid(-1) { // connect ::libmaus::network::ClientSocket::unique_ptr_type tsock( new ::libmaus::network::ClientSocket( port,serverhostname.c_str() ) ); sock = UNIQUE_PTR_MOVE(tsock); // no delay on socket sock->setNoDelay(); // write session id sock->writeString(0,sid); uint64_t stag; cmdline = sock->readString(stag); if ( pipe(&stdoutpipe[0]) != 0 ) { ::libmaus::exception::LibMausException se; se.getStream() << "pipe() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( pipe(&stderrpipe[0]) != 0 ) { ::libmaus::exception::LibMausException se; se.getStream() << "pipe() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( close(STDOUT_FILENO) != 0 ) { ::libmaus::exception::LibMausException se; se.getStream() << "close() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( close(STDERR_FILENO) != 0 ) { ::libmaus::exception::LibMausException se; se.getStream() << "close() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( dup2(stdoutpipe[1],STDOUT_FILENO) == -1 ) { ::libmaus::exception::LibMausException se; se.getStream() << "dup2() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } if ( dup2(stderrpipe[1],STDERR_FILENO) == -1 ) { ::libmaus::exception::LibMausException se; se.getStream() << "dup2() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } pid = fork(); if ( pid < 0 ) { ::libmaus::exception::LibMausException se; se.getStream() << "fork() failed: " << strerror(errno) << std::endl; se.finish(); throw se; } else if ( pid == 0 ) { // close write end close(stdoutpipe[1]); close(stderrpipe[1]); // close copies close(STDOUT_FILENO); close(STDERR_FILENO); bool running = true; try { while ( running ) { running = false; fd_set fds; int maxfd = -1; FD_ZERO(&fds); if ( stdoutpipe[0] != -1 ) { FD_SET(stdoutpipe[0],&fds); maxfd = std::max(maxfd,stdoutpipe[0]); } if ( stderrpipe[0] != -1 ) { FD_SET(stderrpipe[0],&fds); maxfd = std::max(maxfd,stderrpipe[0]); } running = (maxfd != -1); if ( running ) { int r = select(maxfd+1,&fds,0,0,0); try { if ( r > 0 ) { if ( stdoutpipe[0] != -1 && FD_ISSET(stdoutpipe[0],&fds) ) { ::libmaus::autoarray::AutoArray<char> B(1024,false); ssize_t red = read(stdoutpipe[0],B.get(),B.size()); if ( red <= 0 ) { std::ostringstream errstream; errstream << "Failed to read from stdout pipe: " << strerror(errno) << std::endl; std::string errstring = errstream.str(); sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size()); uint64_t stag, n; sock->readMessage<uint64_t>(stag,0,n); stdoutpipe[0] = -1; } else { sock->writeMessage<char>(STDOUT_FILENO,B.get(),red); uint64_t stag, n; sock->readMessage<uint64_t>(stag,0,n); } } if ( stderrpipe[0] != -1 && FD_ISSET(stderrpipe[0],&fds) ) { ::libmaus::autoarray::AutoArray<char> B(1024,false); ssize_t red = read(stderrpipe[0],B.get(),B.size()); if ( red <= 0 ) { std::ostringstream errstream; errstream << "Failed to read from stderr pipe: " << strerror(errno) << std::endl; std::string errstring = errstream.str(); sock->writeMessage<char>(STDERR_FILENO,errstring.c_str(),errstring.size()); uint64_t stag, n; sock->readMessage<uint64_t>(stag,0,n); stderrpipe[0] = -1; } else { sock->writeMessage<char>(STDERR_FILENO,B.get(),red); uint64_t stag, n; sock->readMessage<uint64_t>(stag,0,n); } } } } catch(std::exception const & ex) { } } } } catch(...) { std::cerr << "Caught exception in LogPipeMultiplex" << std::endl; } _exit(0); } else { // close read ends close(stdoutpipe[0]); close(stderrpipe[0]); } }
virtual CompressorObject::unique_ptr_type operator()() { CompressorObject::unique_ptr_type ptr(new ZlibCompressorObject(level)); return UNIQUE_PTR_MOVE(ptr); }
/** * construct decoder * * @param rindex block index * @param fn file name * @return decoder object **/ static unique_ptr_type construct(std::vector < std::pair < uint64_t, uint64_t > > const & rindex, std::string const & fn) { unique_ptr_type ptr(new this_type(rindex,fn)); return UNIQUE_PTR_MOVE(ptr); }
int bamfilter(libmaus::util::ArgInfo const & arginfo) { uint64_t const minmapped = arginfo.getValue<uint64_t>("minmapped",getDefaultMinMapped()); uint64_t const maxmapped = arginfo.getValue<uint64_t>("maxmapped",getDefaultMaxMapped()); uint64_t const minlen = arginfo.getValue<uint64_t>("minlen",getDefaultMinLen()); int const level = libmaus::bambam::BamBlockWriterBaseFactory::checkCompressionLevel(arginfo.getValue<int>("level",getDefaultLevel())); ::libmaus::bambam::BamDecoder BD(std::cin); ::libmaus::bambam::BamHeader const & bamheader = BD.getHeader(); ::libmaus::bambam::BamAlignment & alignment = BD.getAlignment(); /* * start index/md5 callbacks */ std::string const tmpfilenamebase = arginfo.getValue<std::string>("tmpfile",arginfo.getDefaultTmpFileName()); std::string const tmpfileindex = tmpfilenamebase + "_index"; ::libmaus::util::TempFileRemovalContainer::addTempFile(tmpfileindex); std::string md5filename; std::string indexfilename; std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > cbs; ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Pmd5cb; if ( arginfo.getValue<unsigned int>("md5",getDefaultMD5()) ) { if ( arginfo.hasArg("md5filename") && arginfo.getUnparsedValue("md5filename","") != "" ) md5filename = arginfo.getUnparsedValue("md5filename",""); else std::cerr << "[V] no filename for md5 given, not creating hash" << std::endl; if ( md5filename.size() ) { ::libmaus::lz::BgzfDeflateOutputCallbackMD5::unique_ptr_type Tmd5cb(new ::libmaus::lz::BgzfDeflateOutputCallbackMD5); Pmd5cb = UNIQUE_PTR_MOVE(Tmd5cb); cbs.push_back(Pmd5cb.get()); } } libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Pindex; if ( arginfo.getValue<unsigned int>("index",getDefaultIndex()) ) { if ( arginfo.hasArg("indexfilename") && arginfo.getUnparsedValue("indexfilename","") != "" ) indexfilename = arginfo.getUnparsedValue("indexfilename",""); else std::cerr << "[V] no filename for index given, not creating index" << std::endl; if ( indexfilename.size() ) { libmaus::bambam::BgzfDeflateOutputCallbackBamIndex::unique_ptr_type Tindex(new libmaus::bambam::BgzfDeflateOutputCallbackBamIndex(tmpfileindex)); Pindex = UNIQUE_PTR_MOVE(Tindex); cbs.push_back(Pindex.get()); } } std::vector< ::libmaus::lz::BgzfDeflateOutputCallback * > * Pcbs = 0; if ( cbs.size() ) Pcbs = &cbs; /* * end md5/index callbacks */ ::libmaus::bambam::BamHeader::unique_ptr_type uphead(libmaus::bambam::BamHeaderUpdate::updateHeader(arginfo,bamheader,"bamfilter",std::string(PACKAGE_VERSION))); ::libmaus::bambam::BamWriter::unique_ptr_type writer(new ::libmaus::bambam::BamWriter(std::cout,*uphead,level,Pcbs)); while ( BD.readAlignment() ) { bool const a_1_mapped = !(alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FUNMAP); bool const a_2_mapped = !(alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FMUNMAP); bool const proper = (alignment.getFlags() & ::libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_FPROPER_PAIR); uint64_t const nummapped = (a_1_mapped?1:0)+(a_2_mapped?1:0)+(proper?1:0); if ( nummapped >= minmapped && nummapped <= maxmapped && alignment.getLseq() >= static_cast<int64_t>(minlen) ) alignment.serialise(writer->getStream()); } writer.reset(); if ( Pmd5cb ) { Pmd5cb->saveDigestAsFile(md5filename); } if ( Pindex ) { Pindex->flush(std::string(indexfilename)); } return EXIT_SUCCESS; }
/** * constructor * * @param rindex block index * @param fn file name **/ SnappyAlignmentMergeInput( std::vector < std::pair < uint64_t, uint64_t > > const & rindex, std::string const & fn) : index(rindex), streams(index.size()), data(index.size()), namecomp(static_cast<uint8_t const *>(0)), heapcomp(namecomp,data.begin()), Q(heapcomp) { bool openok = true; try { for ( uint64_t i = 0; i < index.size(); ++i ) if ( index[i].second ) { libmaus::lz::SnappyOffsetFileInputStream::unique_ptr_type tstreamsi( new libmaus::lz::SnappyOffsetFileInputStream(fn,index[i].first) ); streams [ i ] = UNIQUE_PTR_MOVE(tstreamsi); } } catch(std::exception const & ex) { openok = false; } if ( ! openok ) { std::cerr << "[V] failed to open a file handle for each single collation block, trying to merge through a single file handle" << std::endl; for ( uint64_t i = 0; i < index.size(); ++i ) if ( index[i].second ) streams[i].reset(); libmaus::aio::CheckedInputStream::unique_ptr_type TCIS(new libmaus::aio::CheckedInputStream(fn)); Psingle = UNIQUE_PTR_MOVE(TCIS); for ( uint64_t i = 0; i < index.size(); ++i ) if ( index[i].second ) { libmaus::lz::SnappyOffsetFileInputStream::unique_ptr_type tstreamsi ( new libmaus::lz::SnappyOffsetFileInputStream(*Psingle,index[i].first) ); streams [ i ] = UNIQUE_PTR_MOVE(tstreamsi); } } for ( uint64_t i = 0; i < index.size(); ++i ) if ( index[i].second ) { index[i].second -= 1; #if !defined(NDEBUG) bool const alok = #endif libmaus::bambam::BamDecoder::readAlignmentGz(*(streams[i]),data[i],0,false); #if !defined(NDEBUG) assert ( alok ); #endif Q.push(i); } }
int main(int argc, char * argv[]) { try { ::libmaus::util::ArgInfo arginfo(argc,argv); ::libmaus::util::TempFileRemovalContainer::setup(); ::std::vector<std::string> const & inputfilenames = arginfo.restargs; char const * fasuffixes[] = { ".fa", ".fasta", 0 }; std::string deftmpname = libmaus::util::OutputFileNameTools::endClipLcp(inputfilenames,&fasuffixes[0]) + ".fa.tmp"; while ( ::libmaus::util::GetFileSize::fileExists(deftmpname) ) deftmpname += "_"; std::string defoutname = libmaus::util::OutputFileNameTools::endClipLcp(inputfilenames,&fasuffixes[0]) + ".fa.recoded"; while ( ::libmaus::util::GetFileSize::fileExists(defoutname) ) defoutname += "_"; std::string const tempfilename = arginfo.getValue<std::string>("tempfilename",deftmpname); std::string const outfilename = arginfo.getValue<std::string>("outputfilename",defoutname); std::string const indexfilename = tempfilename + ".index"; unsigned int const addterm = arginfo.getValue<unsigned int>("addterm",0); unsigned int const termadd = addterm ? 1 : 0; ::libmaus::util::TempFileRemovalContainer::addTempFile(tempfilename); ::libmaus::util::TempFileRemovalContainer::addTempFile(indexfilename); std::cerr << "temp file name " << tempfilename << std::endl; std::cerr << "output file name " << outfilename << std::endl; /* uint64_t const numseq = */ ::libmaus::fastx::FastAReader::rewriteFiles(inputfilenames,tempfilename,indexfilename); uint64_t curpos = 0; ::libmaus::aio::CheckedOutputStream COS(outfilename); // 0,A,C,G,T,N // map forward ::libmaus::autoarray::AutoArray<char> cmap(256,false); std::fill(cmap.begin(),cmap.end(),5+termadd); cmap['\n'] = 0 + termadd; cmap['a'] = cmap['A'] = 1 + termadd; cmap['c'] = cmap['C'] = 2 + termadd; cmap['g'] = cmap['G'] = 3 + termadd; cmap['t'] = cmap['T'] = 4 + termadd; cmap['n'] = cmap['N'] = 5 + termadd; // map to reverse complement ::libmaus::autoarray::AutoArray<char> rmap(256,false); std::fill(rmap.begin(),rmap.end(),5+termadd); rmap['\n'] = 0 + termadd; rmap['a'] = rmap['A'] = 4 + termadd; rmap['c'] = rmap['C'] = 3 + termadd; rmap['g'] = rmap['G'] = 2 + termadd; rmap['t'] = rmap['T'] = 1 + termadd; rmap['n'] = rmap['N'] = 5 + termadd; // reverse complement for mapped data ::libmaus::autoarray::AutoArray<char> xmap(256,false); std::fill(xmap.begin(),xmap.end(),5+termadd); xmap[0] = 0 + termadd; xmap[1] = 4 + termadd; xmap[2] = 3 + termadd; xmap[3] = 2 + termadd; xmap[4] = 1 + termadd; xmap[5] = 5 + termadd; ::libmaus::autoarray::AutoArray<char> imap(256,false); for ( uint64_t i = 0; i < imap.size(); ++i ) imap[i] = static_cast<char>(i); ::libmaus::fastx::FastAReader::RewriteInfoDecoder::unique_ptr_type infodec(new ::libmaus::fastx::FastAReader::RewriteInfoDecoder(indexfilename)); ::libmaus::fastx::FastAReader::RewriteInfo info; uint64_t maxseqlen = 0; while ( infodec->get(info) ) maxseqlen = std::max(maxseqlen,info.seqlen); std::cerr << "[V] max seq len " << maxseqlen << std::endl; ::libmaus::fastx::FastAReader::RewriteInfoDecoder::unique_ptr_type tinfodec(new ::libmaus::fastx::FastAReader::RewriteInfoDecoder(indexfilename)); infodec = UNIQUE_PTR_MOVE(tinfodec); if ( maxseqlen <= 256*1024 ) { ::libmaus::aio::CheckedInputStream CIS(tempfilename); ::libmaus::autoarray::AutoArray<uint8_t> B(maxseqlen+1,false); while ( infodec->get(info) ) { // skip id CIS.ignore(info.idlen+2); // read sequence plus following terminator CIS.read(reinterpret_cast<char *>(B.begin()), info.seqlen+1); // map for ( uint64_t i = 0; i < info.seqlen+1; ++i ) B[i] = cmap[B[i]]; // write COS.write(reinterpret_cast<char const *>(B.begin()),info.seqlen+1); // remap for ( uint64_t i = 0; i < info.seqlen+1; ++i ) B[i] = xmap[B[i]]; // reverse std::reverse(B.begin(),B.begin()+info.seqlen); // write COS.write(reinterpret_cast<char const *>(B.begin()),info.seqlen+1); } } else { while ( infodec->get(info) ) { // std::cerr << info.valid << "\t" << info.idlen << "\t" << info.seqlen << "\t" << info.getIdPrefix() << std::endl; uint64_t const seqbeg = curpos + (info.idlen+2); uint64_t const seqend = seqbeg + info.seqlen; ::libmaus::aio::CheckedInputStream CIS(tempfilename); CIS.seekg(seqbeg); ::libmaus::util::GetFileSize::copyMap(CIS,COS,cmap.begin(),seqend-seqbeg+1); ::libmaus::aio::CircularReverseWrapper CRW(tempfilename,seqend); ::libmaus::util::GetFileSize::copyMap(CRW,COS,rmap.begin(),seqend-seqbeg+1); curpos += (info.idlen+2) + (info.seqlen+1); } } if ( addterm ) COS.put(0); return EXIT_SUCCESS; } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }
bool readBlock(SimpleCompressedInputBlockConcatBlock & block) { block.compsize = 0; block.uncompsize = 0; block.currentInterval = currentInterval; block.eof = false; // check whether we need to open the next file if ( (!(Pcis.get())) ) { // skip over empty intervals while ( intervalsIt != intervals.end() && intervalsIt->empty() ) ++intervalsIt; // check whether we are done if ( intervalsIt == intervals.end() ) { block.eof = true; return true; } // get interval currentInterval = &(*(intervalsIt++)); block.currentInterval = currentInterval; // open file libmaus2::aio::InputStreamInstance::unique_ptr_type Tcis(new libmaus2::aio::InputStreamInstance(currentInterval->name)); Pcis = UNIQUE_PTR_MOVE(Tcis); // seek Pcis->seekg(currentInterval->start.first); streampos = currentInterval->start.first; } block.blockstreampos = streampos; libmaus2::util::CountPutObject CPO; block.uncompsize = libmaus2::util::UTF8::decodeUTF8(*Pcis); ::libmaus2::util::UTF8::encodeUTF8(block.uncompsize,CPO); block.compsize = ::libmaus2::util::NumberSerialisation::deserialiseNumber(*Pcis); ::libmaus2::util::NumberSerialisation::serialiseNumber(CPO,block.compsize); block.metasize = CPO.c; if ( block.compsize > block.I.size() ) block.I = libmaus2::autoarray::AutoArray<uint8_t>(block.compsize,false); Pcis->read(reinterpret_cast<char *>(block.I.begin()),block.compsize); streampos += (block.metasize+block.compsize); bool const gcountok = Pcis->gcount() == static_cast<int64_t>(block.compsize); if ( block.blockstreampos == currentInterval->end.first ) { Pcis.reset(); // skip over empty intervals while ( intervalsIt != intervals.end() && intervalsIt->empty() ) ++intervalsIt; if ( intervalsIt == intervals.end() ) block.eof = true; } if ( gcountok ) return true; else return false; }
int bamheap2(libmaus::util::ArgInfo const & arginfo) { bool const verbose = arginfo.getValue("verbose",getDefaultVerbose()); std::string const reference = arginfo.getUnparsedValue("reference",std::string()); std::string const outputprefix = arginfo.getUnparsedValue("outputprefix",std::string()); libmaus::bambam::BamAlignmentDecoderWrapper::unique_ptr_type decwrapper( libmaus::bambam::BamMultiAlignmentDecoderFactory::construct(arginfo)); ::libmaus::bambam::BamAlignmentDecoder * ppdec = &(decwrapper->getDecoder()); ::libmaus::bambam::BamAlignmentDecoder & dec = *ppdec; ::libmaus::bambam::BamHeader const & header = dec.getHeader(); ::libmaus::bambam::BamAlignment const & algn = dec.getAlignment(); double const damult = arginfo.getValue<double>("amult",1); double const dcmult = arginfo.getValue<double>("cmult",1); double const dgmult = arginfo.getValue<double>("gmult",1); double const dtmult = arginfo.getValue<double>("tmult",1); double const dpadmult = arginfo.getValue<double>("padmult",1); double maxmult = 0; maxmult = std::max(damult,maxmult); maxmult = std::max(dcmult,maxmult); maxmult = std::max(dgmult,maxmult); maxmult = std::max(dtmult,maxmult); maxmult = std::max(dpadmult,maxmult); uint64_t const amult = std::floor((damult / maxmult) * (1ull<<16) + 0.5); uint64_t const cmult = std::floor((dcmult / maxmult) * (1ull<<16) + 0.5); uint64_t const gmult = std::floor((dgmult / maxmult) * (1ull<<16) + 0.5); uint64_t const tmult = std::floor((dtmult / maxmult) * (1ull<<16) + 0.5); uint64_t const padmult = std::floor((dpadmult / maxmult) * (1ull<<16) + 0.5); libmaus::fastx::FastAIndex::unique_ptr_type Pindex; libmaus::aio::CheckedInputStream::unique_ptr_type PCIS; if ( reference.size() ) { libmaus::fastx::FastAIndex::unique_ptr_type Tindex( libmaus::fastx::FastAIndex::load(reference+".fai") ); Pindex = UNIQUE_PTR_MOVE(Tindex); libmaus::aio::CheckedInputStream::unique_ptr_type TCIS(new libmaus::aio::CheckedInputStream(reference)); PCIS = UNIQUE_PTR_MOVE(TCIS); } libmaus::autoarray::AutoArray<libmaus::bambam::cigar_operation> cigop; libmaus::autoarray::AutoArray<char> bases; int64_t prevrefid = -1; std::string refidname = "*"; std::map< uint64_t, HeapEntry > M; uint64_t alcnt = 0; std::vector< std::pair<char,uint8_t> > pendinginserts; int64_t loadedRefId = -1; int64_t streamRefId = -1; libmaus::autoarray::AutoArray<char> refseqbases; ConsensusAccuracy * consacc = 0; std::map<uint64_t,ConsensusAccuracy> Mconsacc; typedef libmaus::util::shared_ptr<std::ostringstream>::type stream_ptr_type; stream_ptr_type Pstream; ConsensusAux Caux; Caux.M['a'] = Caux.M['A'] = amult; Caux.M['c'] = Caux.M['C'] = cmult; Caux.M['g'] = Caux.M['G'] = gmult; Caux.M['t'] = Caux.M['T'] = tmult; Caux.M[padsym] = padmult; while ( dec.readAlignment() ) { if ( algn.isMapped() && (!algn.isQCFail()) ) { assert ( ! pendinginserts.size() ); uint32_t const numcigop = algn.getCigarOperations(cigop); uint64_t readpos = 0; uint64_t refpos = algn.getPos(); uint64_t const seqlen = algn.decodeRead(bases); uint8_t const * qual = libmaus::bambam::BamAlignmentDecoderBase::getQual(algn.D.begin()); // handle finished columns if ( algn.getRefID() != prevrefid ) { while ( M.size() ) { HeapEntry & H = M.begin()->second; if ( outputprefix.size() && (streamRefId != prevrefid) ) { if ( Pstream ) { std::ostringstream fnostr; fnostr << outputprefix << "_" << header.getRefIDName(streamRefId); libmaus::aio::PosixFdOutputStream PFOS(fnostr.str()); PFOS << ">" << header.getRefIDName(streamRefId) << '\n'; PFOS << Pstream->str() << '\n'; Pstream.reset(); } stream_ptr_type Tstream(new std::ostringstream); Pstream = Tstream; streamRefId = prevrefid; } if ( Pindex && (loadedRefId != prevrefid) ) { refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname)); loadedRefId = prevrefid; if ( Mconsacc.find(loadedRefId) == Mconsacc.end() ) Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size()); consacc = &(Mconsacc[loadedRefId]); } H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get()); M.erase(M.begin()); } prevrefid = algn.getRefID(); refidname = header.getRefIDName(prevrefid); } else { while ( M.size() && M.begin()->first < refpos ) { HeapEntry & H = M.begin()->second; if ( outputprefix.size() && (streamRefId != prevrefid) ) { if ( Pstream ) { std::ostringstream fnostr; fnostr << outputprefix << "_" << header.getRefIDName(streamRefId); libmaus::aio::PosixFdOutputStream PFOS(fnostr.str()); PFOS << ">" << header.getRefIDName(streamRefId) << '\n'; PFOS << Pstream->str() << '\n'; Pstream.reset(); } stream_ptr_type Tstream(new std::ostringstream); Pstream = Tstream; streamRefId = prevrefid; } if ( Pindex && (loadedRefId != prevrefid) ) { refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname)); loadedRefId = prevrefid; if ( Mconsacc.find(loadedRefId) == Mconsacc.end() ) Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size()); consacc = &(Mconsacc[loadedRefId]); } H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get()); M.erase(M.begin()); } } for ( uint64_t ci = 0; ci < numcigop; ++ci ) { uint64_t const ciglen = cigop[ci].second; switch ( cigop[ci].first ) { case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CMATCH: case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CEQUAL: case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CDIFF: { if ( pendinginserts.size() ) { M[refpos].I.push_back(pendinginserts); pendinginserts.resize(0); } for ( uint64_t i = 0; i < ciglen; ++i ) { M[refpos].V.push_back(std::make_pair(bases[readpos],qual[readpos])); readpos++; refpos++; } break; } case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CINS: { for ( uint64_t i = 0; i < ciglen; ++i, ++readpos ) pendinginserts.push_back(std::make_pair(bases[readpos],qual[readpos])); break; } case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CDEL: // handle pending inserts if ( pendinginserts.size() ) { M[refpos].I.push_back(pendinginserts); pendinginserts.resize(0); } // deleting bases from the reference for ( uint64_t i = 0; i < ciglen; ++i, ++refpos ) M[refpos].V.push_back(std::make_pair(padsym,0)); break; case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CREF_SKIP: // handle pending inserts if ( pendinginserts.size() ) { M[refpos].I.push_back(pendinginserts); pendinginserts.resize(0); } // skip bases on reference for ( uint64_t i = 0; i < ciglen; ++i ) { refpos++; } break; case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CSOFT_CLIP: // skip bases on read for ( uint64_t i = 0; i < ciglen; ++i ) { readpos++; } break; case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CHARD_CLIP: break; case libmaus::bambam::BamFlagBase::LIBMAUS_BAMBAM_CPAD: { for ( uint64_t i = 0; i < ciglen; ++i, ++readpos ) pendinginserts.push_back(std::make_pair(padsym,0)); break; } } } if ( pendinginserts.size() ) { M[refpos].I.push_back(pendinginserts); M[refpos].iadd++; pendinginserts.resize(0); } assert ( readpos == seqlen ); } if ( verbose && ((++alcnt % (1024*1024)) == 0) ) std::cerr << "[V] " << alcnt << std::endl; } while ( M.size() ) { HeapEntry & H = M.begin()->second; if ( outputprefix.size() && (streamRefId != prevrefid) ) { if ( Pstream ) { std::ostringstream fnostr; fnostr << outputprefix << "_" << header.getRefIDName(streamRefId); libmaus::aio::PosixFdOutputStream PFOS(fnostr.str()); PFOS << ">" << header.getRefIDName(streamRefId) << '\n'; PFOS << Pstream->str() << '\n'; Pstream.reset(); } stream_ptr_type Tstream(new std::ostringstream); Pstream = Tstream; streamRefId = prevrefid; } if ( Pindex && (loadedRefId != prevrefid) ) { refseqbases = Pindex->readSequence(*PCIS, Pindex->getSequenceIdByName(refidname)); loadedRefId = prevrefid; if ( Mconsacc.find(loadedRefId) == Mconsacc.end() ) Mconsacc[loadedRefId] = ConsensusAccuracy(refseqbases.size()); consacc = &(Mconsacc[loadedRefId]); } H.toStream(std::cout,M.begin()->first,refidname,(M.begin()->first < refseqbases.size()) ? static_cast<int>(refseqbases[M.begin()->first]) : -1,Caux,consacc,Pstream.get()); M.erase(M.begin()); } if ( Pstream ) { std::ostringstream fnostr; fnostr << outputprefix << "_" << header.getRefIDName(streamRefId); libmaus::aio::PosixFdOutputStream PFOS(fnostr.str()); PFOS << ">" << header.getRefIDName(streamRefId) << '\n'; PFOS << Pstream->str() << '\n'; Pstream.reset(); } ConsensusAccuracy constotal; for ( std::map<uint64_t,ConsensusAccuracy>::const_iterator ita = Mconsacc.begin(); ita != Mconsacc.end(); ++ita ) { std::cerr << header.getRefIDName(ita->first) << "\t" << ita->second << std::endl; std::map<uint64_t,uint64_t> const M = ita->second.depthhistogram.get(); uint64_t total = 0; uint64_t preavg = 0; for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita ) { total += aita->second; preavg += aita->first * aita->second; } uint64_t acc = 0; for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita ) { acc += aita->second; std::cerr << "H[" << header.getRefIDName(ita->first) << "," << aita->first << ",+]" << "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total << "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl; } acc = 0; for ( std::map<uint64_t,uint64_t>::const_reverse_iterator aita = M.rbegin(); aita != M.rend(); ++aita ) { acc += aita->second; std::cerr << "H[" << header.getRefIDName(ita->first) << "," << aita->first << ",-]" << "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total << "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl; } std::cerr << "H[" << header.getRefIDName(ita->first) << ",avg]\t" << static_cast<double>(preavg)/total << std::endl; constotal += ita->second; } if ( Mconsacc.size() ) { std::cerr << "all\t" << constotal << std::endl; std::map<uint64_t,uint64_t> const M = constotal.depthhistogram.get(); uint64_t total = 0; uint64_t preavg = 0; for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita ) { total += aita->second; preavg += aita->first * aita->second; } uint64_t acc = 0; for ( std::map<uint64_t,uint64_t>::const_iterator aita = M.begin(); aita != M.end(); ++aita ) { acc += aita->second; std::cerr << "H[" << "all" << "," << aita->first << ",+]" << "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total << "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl; } acc = 0; for ( std::map<uint64_t,uint64_t>::const_reverse_iterator aita = M.rbegin(); aita != M.rend(); ++aita ) { acc += aita->second; std::cerr << "H[" << "all" << "," << aita->first << ",-]" << "\t" << aita->second << "\t" << static_cast<double>(aita->second)/total << "\t" << acc << "\t" << static_cast<double>(acc)/total << std::endl; } std::cerr << "H[all,avg]\t" << static_cast<double>(preavg) / total << std::endl; } return EXIT_SUCCESS; }
virtual DecompressorObject::unique_ptr_type operator()() { DecompressorObject::unique_ptr_type ptr(new ZlibDecompressorObject); return UNIQUE_PTR_MOVE(ptr); }
FastABgzfDecoder::unique_ptr_type getStream(std::istream & in, uint64_t const id) const { FastABgzfDecoder::unique_ptr_type Tptr(new FastABgzfDecoder(in,(*this)[id],blocksize)); return UNIQUE_PTR_MOVE(Tptr); }
static libmaus::autoarray::AutoArray<libmaus::bambam::BamRange::unique_ptr_type> parse(std::string const & ranges, libmaus::bambam::BamHeader const & header) { std::vector < std::string > const outertokens = splitSpace(ranges); libmaus::autoarray::AutoArray<libmaus::bambam::BamRange::unique_ptr_type> A(outertokens.size()); for ( uint64_t i = 0; i < outertokens.size(); ++i ) { std::string const & outertoken = outertokens[i]; uint64_t sempos = outertoken.size(); for ( uint64_t j = 0; j < outertoken.size(); ++j ) if ( outertoken[j] == ':' ) sempos = j; if ( sempos == outertoken.size() ) { libmaus::bambam::BamRange::unique_ptr_type tAi(new libmaus::bambam::BamRangeChromosome(outertoken,header)); A[i] = UNIQUE_PTR_MOVE(tAi); } else { std::string const refname = outertoken.substr(0,sempos); std::string const rest = outertoken.substr(sempos+1); // std::cerr << "refname=" << refname << " rest=" << rest << std::endl; uint64_t dashpos = rest.size(); for ( uint64_t j = 0; j < rest.size(); ++j ) if ( rest[j] == '-' ) dashpos = j; if ( dashpos == rest.size() ) { int64_t num = 0; for ( uint64_t j = 0; j < rest.size(); ++j ) if ( isdigit(rest[j]) ) { num *= 10; num += rest[j]-'0'; } else if ( rest[j] == ',' ) { } else { libmaus::exception::LibMausException se; se.getStream() << "Found invalid range character in " << rest << std::endl; se.finish(); throw se; } libmaus::bambam::BamRange::unique_ptr_type tAi(new libmaus::bambam::BamRangeHalfOpen(refname,num-1,header)); A[i] = UNIQUE_PTR_MOVE(tAi); } else { std::string const sstart = rest.substr(0,dashpos); std::string const send = rest.substr(dashpos+1); int64_t start = 0; for ( uint64_t j = 0; j < sstart.size(); ++j ) if ( isdigit(sstart[j]) ) { start *= 10; start += sstart[j]-'0'; } else if ( sstart[j] == ',' ) { } else { libmaus::exception::LibMausException se; se.getStream() << "Found invalid range character in " << sstart << std::endl; se.finish(); throw se; } int64_t end = 0; for ( uint64_t j = 0; j < send.size(); ++j ) if ( isdigit(send[j]) ) { end *= 10; end += send[j]-'0'; } else if ( send[j] == ',' ) { } else { libmaus::exception::LibMausException se; se.getStream() << "Found invalid range character in " << send << std::endl; se.finish(); throw se; } libmaus::bambam::BamRange::unique_ptr_type tAi(new libmaus::bambam::BamRangeInterval(refname,start-1,end,header)); A[i] = UNIQUE_PTR_MOVE(tAi); } } } return A; }