uint64_t decodeBinList(uint64_t const id, libmaus2::autoarray::AutoArray< std::pair<uint64_t,uint64_t> > & A) const { typedef std::vector< std::pair<uint64_t,uint64_t> >::const_iterator it; std::pair<it,it> const P = std::equal_range(T.begin(),T.end(),std::pair<uint64_t,uint64_t>(id,0),PairFirstComparator()); if ( P.second == P.first ) return 0; assert ( P.second-P.first == 1 ); libmaus2::aio::InputStreamInstance ISI(indexfn); ISI.clear(); ISI.seekg(P.first->second,std::ios::beg); uint64_t const rid = libmaus2::util::NumberSerialisation::deserialiseNumber(ISI); assert ( rid == id ); uint64_t const n = libmaus2::util::NumberSerialisation::deserialiseNumber(ISI); uint64_t o = 0; for ( uint64_t i = 0; i < n; ++i ) { uint64_t const a = libmaus2::util::NumberSerialisation::deserialiseNumber(ISI); uint64_t const b = libmaus2::util::NumberSerialisation::deserialiseNumber(ISI); A.push(o, std::pair<uint64_t,uint64_t>(a,b) ); } return n; }
matrix<long> sir(MLNetworkSharedPtr& mnet, double beta, int tau, long num_iterations) { std::vector<std::string> statuses = {"S","I","R"}; std::vector<double> init_distribution({1,0,0}); std::string seed = "I"; beta_transition ISI("I","S",beta,"I"); tau_transition IR("I",tau,"R"); std::vector<transition*> transitions = {&ISI,&IR}; return run(mnet, statuses, init_distribution, seed, transitions, num_iterations); }
std::vector< std::pair<uint64_t,uint64_t> > getTable() const { libmaus2::aio::InputStreamInstance ISI(indexfn); std::pair<uint64_t,uint64_t> const P = getTableInfo(ISI); ISI.clear(); ISI.seekg(P.first,std::ios::beg); std::vector< std::pair<uint64_t,uint64_t> > V; for ( uint64_t i = 0; i < P.second; ++i ) { uint64_t const a = libmaus2::util::NumberSerialisation::deserialiseNumber(ISI); uint64_t const b = libmaus2::util::NumberSerialisation::deserialiseNumber(ISI); V.push_back(std::pair<uint64_t,uint64_t>(a,b)); } return V; }
GammaPDIndexDecoder(std::vector<std::string> const & rVfn) : Vfn(rVfn), valuesPerFile(0), blocksPerFile(0), indexEntriesPerFile(0) { uint64_t o = 0; for ( uint64_t i = 0; i < Vfn.size(); ++i ) { libmaus2::aio::InputStreamInstance ISI(Vfn[i]); uint64_t const vpf = getNumValues(ISI); if ( vpf ) { valuesPerFile.push_back(vpf); blocksPerFile.push_back(getNumBlocks(ISI)); indexEntriesPerFile.push_back(blocksPerFile.back()+1); indexOffset.push_back(getIndexOffset(ISI)); Vfn[o++] = Vfn[i]; } } // for prefix sum valuesPerFile.push_back(0); Vfn.resize(o); libmaus2::util::PrefixSums::prefixSums(valuesPerFile.begin(),valuesPerFile.end()); }
static int dispatch(std::string const & fn, uint64_t const i, std::string const & scriptfn) { libmaus2::aio::InputStreamInstance ISI(fn); return dispatch(ISI,i,scriptfn); }
static NWDSCCODE __NWDSGetNDSStatistics( NWCONN_HANDLE conn, nuint32 requestMask, size_t statsInfoLen, NDSStatsInfo_T* statsInfo ) { nuint8 rqbuffer[5]; nuint32 rpbuffer[32]; NW_FRAGMENT rp; NWDSCCODE dserr; BSET(rqbuffer, 0, DS_NCP_GET_DS_STATISTICS); DSET_LH(rqbuffer, 1, requestMask | 0x00000001); rp.fragAddr.rw = rpbuffer; rp.fragSize = sizeof(rpbuffer); dserr = NWRequestSimple(conn, DS_NCP_VERB, rqbuffer, 5, &rp); if (!dserr) { size_t rplen; rplen = rp.fragSize; if (rplen < 4) { dserr = NWE_INVALID_NCP_PACKET_LENGTH; } else { size_t pos; int i; nuint32 flags; flags = DVAL_LH(rpbuffer, 0); statsInfo->statsVersion = flags; pos = 4; for (i = 1; i < 32; i++) { nuint32 val; if (flags & (1 << i)) { if (pos + 4 > rplen) { dserr = NWE_INVALID_NCP_PACKET_LENGTH; break; } val = DVAL_LH(rpbuffer, pos); pos += 4; } else val = 0; switch (i) { #define endof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER + sizeof(((TYPE *)0)->MEMBER)) #define ISI(v,x) case v: if (endof(NDSStatsInfo_T, x) <= statsInfoLen) statsInfo->x = val; break ISI(1, noSuchEntry); ISI(2, localEntry); ISI(3, typeReferral); ISI(4, aliasReferral); ISI(5, requestCount); ISI(6, requestDataSize); ISI(7, replyDataSize); ISI(8, resetTime); ISI(9, transportReferral); ISI(10, upReferral); ISI(11, downReferral); /* 12..31 are reserved */ #undef ISI } } } } return dserr; }
static unique_ptr_type load(std::string const & s) { libmaus2::aio::InputStreamInstance ISI(s); unique_ptr_type tptr(new this_type(ISI)); return UNIQUE_PTR_MOVE(tptr); }
static int64_t getNovl(std::string const & aligns) { libmaus2::aio::InputStreamInstance ISI(aligns); this_type algn(ISI); return algn.novl; }
int main(int argc, char ** argv) { try { libmaus2::util::ArgInfo const arginfo(argc,argv); uint64_t randseed = arginfo.getValueUnsignedNumeric<uint64_t>("randomseed",time(0)); libmaus2::random::Random::setup(randseed); libmaus2::bambam::BamSeqEncodeTable const seqenc; ::libmaus2::fastx::UCharBuffer UB; // substitution error fraction double substrate = arginfo.getValue<double>("substrate",.2); // deletion error fraction double delrate = arginfo.getValue<double>("delrate",.3); // insertion error fraction double insrate = arginfo.getValue<double>("insrate",.5); // insertion homopolymer error fraction double inshomopolrate = arginfo.getValue<double>("inshomopolrate",.0); // low error rate double eratelow = arginfo.getValue<double>("eratelow",.15); // high error rate double eratehigh = arginfo.getValue<double>("eratehigh",.25); // std dev for low error rate double eratelowstddev = arginfo.getValue<double>("eratelowstddev",0.03); // std dev for high error rate double eratehighstddev = arginfo.getValue<double>("eratehighstddev",0.04); // 8581.35 6953.55 // drop rate double const droprate = arginfo.getValue<double>("droprate",0.00); // number of traversals per input sequence uint64_t const numtraversals = arginfo.getValueUnsignedNumeric<uint64_t>("numtraversals",1); // average read length double const readlenavg = arginfo.getValue<double>("readlenavg", 8500); // read length std dev double const readlenstddev = arginfo.getValue<double>("readlenstddev", 6953); // probability to stay in low error rate mode double const keeplowstate = arginfo.getValue<double>("keeplowstate", 0.9998); // probability to stay in high error rate mode double const keephighstate = arginfo.getValue<double>("keephighstate", 0.995); // probability to start in low error rate mode double const startlowprob = arginfo.getValue<double>("startlowprob", 0.7); // number of random bases appended at front and back uint64_t const randlen = arginfo.getValueUnsignedNumeric<uint64_t>("randlen",0); bool const placerandom = arginfo.getValueUnsignedNumeric<uint64_t>("placerandom",0); bool const nthres = arginfo.getValueUnsignedNumeric<uint64_t>("nthres",0); uint64_t const minlen = arginfo.getValueUnsignedNumeric<uint64_t>("minlen",0); std::pair<double,double> const lognormalP = libmaus2::random::LogNormalRandom::computeParameters(readlenavg,readlenstddev); // std::floor(libmaus2::random::LogNormalRandom::random(lognormalP.second, lognormalP.first) + 0.5); // noise spiker object libmaus2::random::DNABaseNoiseSpiker DBNS(substrate, delrate, insrate, inshomopolrate, eratelow, eratehigh, eratelowstddev, eratehighstddev, keeplowstate, keephighstate, startlowprob); if ( arginfo.restargs.size() < 2 ) { std::cerr << "usage: " << arginfo.progname << " <text.fa> <out.fasta> >out.bam" << std::endl; return EXIT_FAILURE; } // input reference name (FastA format) std::string const reffn = arginfo.restargs.at(0); // output fasta file name std::string const fafn = arginfo.restargs.at(1); // construct name of FAI file std::string const fainame = reffn + ".fai"; libmaus2::fastx::FastAIndexGenerator::generate(reffn,fainame,1); // load FAI index for reference FastA libmaus2::fastx::FastAIndex::unique_ptr_type PFAI(libmaus2::fastx::FastAIndex::load(fainame)); // open output fasta file libmaus2::aio::OutputStreamInstance faOSI(fafn); // create SAM header std::ostringstream samheaderstr; samheaderstr << "@HD\tVN:1.5\tSO:unknown\n"; for ( uint64_t i = 0; i < PFAI->size(); ++i ) { libmaus2::fastx::FastAIndexEntry const & entry = (*PFAI)[i]; samheaderstr << "@SQ\tSN:" << entry.name << "\tLN:" << entry.length << "\n"; } libmaus2::bambam::BamHeader header(samheaderstr.str()); // bam writer libmaus2::bambam::BamBlockWriterBase::unique_ptr_type writer( libmaus2::bambam::BamBlockWriterBaseFactory::construct(header, arginfo) ); // id of run uint64_t const runid = 0; // current well id uint64_t wellid = 0; libmaus2::aio::InputStreamInstance ISI(reffn); libmaus2::fastx::StreamFastAReaderWrapper SFAR(ISI); libmaus2::fastx::StreamFastAReaderWrapper::pattern_type pattern; for ( uint64_t seqid = 0; SFAR.getNextPatternUnlocked(pattern); ++seqid ) { std::cerr << "[V] " << pattern.sid << std::endl; std::ostringstream seqstr; seqstr << pattern.spattern; std::string const seq = seqstr.str(); for ( uint64_t trav = 0; trav < numtraversals; ++trav ) { std::cerr << "\t[V] traversal " << trav << std::endl; uint64_t pp = 0; typedef std::pair<uint64_t,uint64_t> upair; std::vector<upair> poslenvec; if ( placerandom ) while ( pp < seq.size() ) { int64_t len = -1; while ( len <= 0 ) len = std::floor(libmaus2::random::LogNormalRandom::random(lognormalP.second, lognormalP.first) + 0.5); // libmaus2::random::GaussianRandom::random(readlenstddev,readlenavg); len = std::min(len,static_cast<int64_t>(seq.size())); uint64_t p = libmaus2::random::Random::rand64() % (seq.size()-len+1); poslenvec.push_back(upair(p,len)); pp += len; } else while ( pp < seq.size() ) { int64_t len = -1; while ( len <= 0 ) len = std::floor(libmaus2::random::LogNormalRandom::random(lognormalP.second, lognormalP.first) + 0.5); // libmaus2::random::GaussianRandom::random(readlenstddev,readlenavg); len = std::min(len,static_cast<int64_t>(seq.size()-pp)); poslenvec.push_back(upair(pp,len)); pp += len; } for ( uint64_t z = 0; z < poslenvec.size(); ++z ) { if ( libmaus2::random::UniformUnitRandom::uniformUnitRandom() < droprate ) continue; uint64_t const len = poslenvec[z].second; if ( len < minlen ) continue; uint64_t const pos = poslenvec[z].first; bool const strand = libmaus2::random::Random::rand8() & 1; bool rc = !strand; std::string sub = seq.substr(pos,len); uint64_t ncnt = 0; for ( uint64_t i = 0; i < sub.size(); ++i ) if ( sub[i] == 'N' ) ncnt++; if ( ncnt > nthres ) continue; libmaus2::random::DNABaseNoiseSpiker::ErrorStats E; std::pair<std::string,std::string> const P = DBNS.modifyAndComment(sub,&E); // std::cerr << P.second << std::endl; std::string errintv = P.second; if ( errintv.find("ERRINTV=[[") != std::string::npos ) { errintv = errintv.substr(errintv.find("ERRINTV=[[" )+ strlen("ERRINTV=[[")); if ( errintv.find("]]") != std::string::npos ) { errintv = errintv.substr(0,errintv.find("]]")); #if 0 std::deque<std::string> Vintv(libmaus2::util::stringFunctions::tokenize<std::string>(errintv,";")); for ( uint64_t i = 0; i < Vintv.size(); ++i ) std::cerr << Vintv[i] << std::endl; #endif } else { errintv = std::string(); } } else { errintv = std::string(); } std::string mod = P.first; std::string const r = getRandom(randlen) + mod + getRandom(randlen); std::string cig = P.second; if ( cig.find("CIGAR=[") != std::string::npos ) cig = cig.substr(cig.find("CIGAR=[")+strlen("CIGAR=[")); if ( cig.find("]") != std::string::npos ) cig = cig.substr(0,cig.find("]")); std::vector<libmaus2::bambam::cigar_operation> Vcigop = libmaus2::bambam::CigarStringParser::parseCigarString(cig); libmaus2::lcs::AlignmentTraceContainer ATC; libmaus2::bambam::CigarStringParser::cigarToTrace(Vcigop.begin(),Vcigop.end(),ATC); uint64_t frontdel = 0; uint64_t backdel = 0; while ( ATC.ta != ATC.te && ATC.ta[0] == libmaus2::lcs::AlignmentTraceContainer::STEP_DEL ) { ++frontdel; ++ATC.ta; } while ( ATC.ta != ATC.te && ATC.te[-1] == libmaus2::lcs::AlignmentTraceContainer::STEP_DEL ) { ++backdel; --ATC.te; } libmaus2::autoarray::AutoArray< std::pair<libmaus2::lcs::AlignmentTraceContainer::step_type,uint64_t> > Aopblocks; libmaus2::autoarray::AutoArray<libmaus2::bambam::cigar_operation> Aop; uint64_t const ncig = libmaus2::bambam::CigarStringParser::traceToCigar(ATC,Aopblocks,Aop,0,0,0,0); std::string const upcig = libmaus2::bambam::CigarStringParser::cigarBlocksToString(Aop.begin(),ncig); #if 0 uint64_t delshift = 0; for ( uint64_t ind = 0; ind < Vcigop.size() && Vcigop[ind].first != libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CMATCH && Vcigop[ind].first != libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CEQUAL && Vcigop[ind].first != libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDIFF ; ++ind ) { if ( Vcigop[ind].first == libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_CDEL ) delshift += Vcigop[ind].second; } #endif std::ostringstream padcigstr; if ( randlen ) padcigstr << randlen << "S"; padcigstr << upcig; if ( randlen ) padcigstr << randlen << "S"; cig = padcigstr.str(); std::ostringstream namestr; std::string const rfasta = strand ? r : libmaus2::fastx::reverseComplementUnmapped(r); namestr << 'L' << runid << '/' << (wellid++) << '/' << 0 << '_' << rfasta.size(); std::string const name = namestr.str(); UB.reset(); std::vector<char> Vqual(r.size(),'H'); try { libmaus2::bambam::BamAlignmentEncoderBase::encodeAlignment ( UB, seqenc, name, seqid, pos + frontdel, 255, rc ? libmaus2::bambam::BamFlagBase::LIBMAUS2_BAMBAM_FREVERSE : 0, cig, -1, -1, 0, r, std::string(Vqual.begin(),Vqual.end()), 33,true ); } catch(std::exception const & lme) { std::cerr << lme.what() << std::endl; continue; } ::libmaus2::bambam::MdStringComputationContext mdnmcontext; libmaus2::bambam::BamAlignmentDecoderBase::calculateMd(UB.buffer,UB.length,mdnmcontext,seq.begin()+pos /* itref */); libmaus2::bambam::BamAlignmentEncoderBase::putAuxString(UB,"MD",mdnmcontext.md.get()); libmaus2::bambam::BamAlignmentEncoderBase::putAuxNumber(UB,"NM",'i',mdnmcontext.nm); if ( errintv.size() ) { libmaus2::bambam::BamAlignmentEncoderBase::putAuxString( UB, "er", errintv.c_str() ); #if 0 std::vector<Intv> Vintv = Intv::parse(errintv); std::vector<Intv> Rintv = Intv::computeRIntv(Vintv,UB.buffer); std::ostringstream ostr; for ( uint64_t i = 0; i < Rintv.size(); ++i ) ostr << Rintv[i] << ";"; libmaus2::bambam::BamAlignmentEncoderBase::putAuxString( UB, "ee", ostr.str().c_str() ); #endif } libmaus2::bambam::BamAlignmentEncoderBase::putAuxString(UB,"oc",cig.c_str()); faOSI << '>' << name << " RQ=0.851" << " POS=[" << pos << "] REFID=[" << seqid << "] RC=[" << rc << "]\n"; for ( uint64_t i = 0; i < rfasta.size(); ) { uint64_t toprint = std::min(static_cast<int>(rfasta.size()-i),static_cast<int>(80)); faOSI.write(rfasta.c_str()+i,toprint); i += toprint; faOSI.put('\n'); } writer->writeBamBlock(UB.buffer,UB.length); } } } } catch(std::exception const & ex) { std::cerr << ex.what() << std::endl; return EXIT_FAILURE; } }