int main(int argc, char* argv[]) { START_EASYLOGGINGPP(argc, argv); parseArgs(argc, argv); LOG(INFO) << "Initializing alignment provider"; DazAlnProvider* ap; ap = new DazAlnProvider(popts); TrgBuf trgBuf(20); CnsBuf cnsBuf(10); std::thread writerThread(Writer, std::ref(cnsBuf)); std::vector<std::thread> cnsThreads; for (int i=0; i < popts.threads; i++) { std::thread ct(Consensus, i, std::ref(trgBuf), std::ref(cnsBuf)); cnsThreads.push_back(std::move(ct)); } std::thread readerThread(Reader, std::ref(trgBuf), ap); writerThread.join(); std::vector<std::thread>::iterator it; for (it = cnsThreads.begin(); it != cnsThreads.end(); ++it) it->join(); readerThread.join(); delete ap; return 0; }
void Genome::read ( std::ifstream& input ) { log("Genome: reading file..."); unsigned int L(1); // line counter LockFreeQueue<std::vector<std::string>> q; auto readerTask = [&q,&input]() { debug("Starting reader thread..."); for (std::string line; getline(input, line);) { q.push(strsplit(line, "\t")); } q.done(); debug("All data read and tokenized, closing reader rhread"); }; auto parserTask = [&q,&L,this]() { debug("Starting parser thread..."); std::vector<std::string> splits; while(q.pop(splits)) { assume(parseDataLine(splits), "Could not parse data in line " + std::to_string(L), false); L++; } debug("All tokens processed, closing parser thread"); }; // read and parse header lines sequentially for (std::string line; getline(input, line);) { // std::cout << "Line #" << L << std::endl; if (line[0] == '@') { assume(parseHeaderLine(line), "Could not parse header in line " + std::to_string(L), false); } else { break; // header lines parsed, break and begin threaded processing } L++; } // init and start threads std::thread readerThread(readerTask); std::thread parserThread(parserTask); // wait for threads to finish readerThread.join(); parserThread.join(); if (multistrand != nullptr) { log("Found " + std::to_string(multistrand->size()) + " strand switching events"); } if (circular != nullptr) { log("Found " + std::to_string(circular->size()) + " circular transcripts"); } }
void DnarchModule::Bin2Dnarch(const std::string &inBinFile_, const std::string &outDnarchFile_, const CompressorParams& params_, uint32 threadsNum_, bool verboseMode_) { BinModuleConfig conf; BinFileExtractor* extractor = new BinFileExtractor(params_.minBinSize); extractor->StartDecompress(inBinFile_, conf); DnarchFileWriter* dnarch = new DnarchFileWriter(); dnarch->StartCompress(outDnarchFile_, conf.minimizer, params_); if (threadsNum_ > 1) { const uint32 partNum = threadsNum_ + (threadsNum_ >> 1);//threadsNum_ * 2; const uint64 dnaBufferSize = 1 << 20; const uint64 outBufferSize = 1 << 20; MinimizerPartsPool* inPool = new MinimizerPartsPool(partNum, dnaBufferSize); MinimizerPartsQueue* inQueue = new MinimizerPartsQueue(partNum, 1); CompressedDnaPartsPool* outPool = new CompressedDnaPartsPool(partNum, outBufferSize); CompressedDnaPartsQueue* outQueue = new CompressedDnaPartsQueue(partNum, threadsNum_); BinPartsExtractor* inReader = new BinPartsExtractor(extractor, inQueue, inPool); DnarchPartsWriter* outWriter = new DnarchPartsWriter(dnarch, outQueue, outPool); // preprocess small bins and N bin <-- this should be done internally // { DnaCompressor compressor(conf.minimizer, params_); DnaPacker packer(conf.minimizer); uint32 signatureId = 0; CompressedDnaBlock compBin; BinaryBinBlock binBin; std::vector<const BinFileExtractor::BlockDescriptor*> descriptors = extractor->GetSmallBlockDescriptors(); uint64 totalDnaBufferSize = 0; uint64 totalRecords = 0; for (uint32 i = 0; i < descriptors.size(); ++i) { totalDnaBufferSize += descriptors[i]->rawDnaSize; totalRecords += descriptors[i]->recordsCount; } const BinFileExtractor::BlockDescriptor* nd = extractor->GetNBlockDescriptor(); totalDnaBufferSize += nd->rawDnaSize; totalRecords += nd->recordsCount; if (compBin.workBuffers.dnaBuffer.data.Size() < totalDnaBufferSize) compBin.workBuffers.dnaBuffer.data.Extend(totalDnaBufferSize); // extract and unpack small bins // while (extractor->ExtractNextSmallBin(binBin, signatureId)) { ASSERT(binBin.metaSize != 0); packer.UnpackFromBin(binBin, compBin.workBuffers.dnaBin, signatureId, compBin.workBuffers.dnaBuffer, true); } if (extractor->ExtractNBin(binBin, signatureId) && binBin.metaSize > 0) packer.UnpackFromBin(binBin, compBin.workBuffers.dnaBin, signatureId, compBin.workBuffers.dnaBuffer, true); // un-reverse-compliment records // { char rcBuf[DnaRecord::MaxDnaLen]; DnaRecord rcRec; rcRec.dna = rcBuf; rcRec.reverse = true; for (uint64 i = 0; i < compBin.workBuffers.dnaBin.Size(); ++i) { DnaRecord& r = compBin.workBuffers.dnaBin[i]; if (r.reverse) { r.ComputeRC(rcRec); std::copy(rcRec.dna, rcRec.dna + r.len, r.dna); r.reverse = false; r.minimizerPos = 0; } } } // compress all bins together // const uint32 nSignature = conf.minimizer.TotalMinimizersCount(); compressor.CompressDna(compBin.workBuffers.dnaBin, nSignature, totalDnaBufferSize, compBin.workBuffers.dnaWorkBin, compBin); dnarch->WriteNextBin(&compBin); } // launch stuff // mt::thread readerThread(mt::ref(*inReader)); std::vector<IOperator*> operators; operators.resize(threadsNum_); #ifdef USE_BOOST_THREAD boost::thread_group opThreadGroup; for (uint32 i = 0; i < threadsNum_; ++i) { operators[i] = new BinPartsCompressor(conf.minimizer, params_, inQueue, inPool, outQueue, outPool); opThreadGroup.create_thread(mt::ref(*operators[i])); } (*outWriter)(); readerThread.join(); opThreadGroup.join_all(); #else std::vector<mt::thread> opThreadGroup; for (uint32 i = 0; i < threadsNum_; ++i) { operators[i] = new BinPartsCompressor(conf.minimizer, params_, inQueue, inPool, outQueue, outPool); opThreadGroup.push_back(mt::thread(mt::ref(*operators[i]))); } (*outWriter)(); readerThread.join(); for (mt::thread& t : opThreadGroup) { t.join(); } #endif for (uint32 i = 0; i < threadsNum_; ++i) { delete operators[i]; } TFREE(outWriter); TFREE(inReader); TFREE(outQueue); TFREE(outPool); TFREE(inQueue); TFREE(inPool); } else {
void BinModule::Fastq2Bin(const std::vector<std::string> &inFastqFiles_, const std::string &outBinFile_, uint32 threadNum_, bool compressedInput_, bool verboseMode_) { // TODO: try/catch to free resources // IFastqStreamReader* fastqFile = NULL; if (compressedInput_) fastqFile = new MultiFastqFileReaderGz(inFastqFiles_); else fastqFile = new MultiFastqFileReader(inFastqFiles_); BinFileWriter binFile; binFile.StartCompress(outBinFile_, config); const uint32 minimizersCount = config.minimizer.TotalMinimizersCount(); if (threadNum_ > 1) { FastqChunkPool* fastqPool = NULL; FastqChunkQueue* fastqQueue = NULL; BinaryPartsPool* binPool = NULL; BinaryPartsQueue* binQueue = NULL; FastqChunkReader* fastqReader = NULL; BinChunkWriter* binWriter = NULL; const uint32 partNum = threadNum_ * 4; fastqPool = new FastqChunkPool(partNum, config.fastqBlockSize); fastqQueue = new FastqChunkQueue(partNum, 1); binPool = new BinaryPartsPool(partNum, minimizersCount); binQueue = new BinaryPartsQueue(partNum, threadNum_); fastqReader = new FastqChunkReader(fastqFile, fastqQueue, fastqPool); binWriter = new BinChunkWriter(&binFile, binQueue, binPool); // launch stuff // mt::thread readerThread(mt::ref(*fastqReader)); std::vector<IOperator*> operators; operators.resize(threadNum_); #ifdef USE_BOOST_THREAD boost::thread_group opThreadGroup; for (uint32 i = 0; i < threadNum_; ++i) { operators[i] = new BinEncoder(config.minimizer, config.catParams, fastqQueue, fastqPool, binQueue, binPool); opThreadGroup.create_thread(mt::ref(*operators[i])); } (*binWriter)(); readerThread.join(); opThreadGroup.join_all(); #else std::vector<mt::thread> opThreadGroup; for (uint32 i = 0; i < threadNum_; ++i) { operators[i] = new BinEncoder(config.minimizer, config.catParams, fastqQueue, fastqPool, binQueue, binPool); opThreadGroup.push_back(mt::thread(mt::ref(*operators[i]))); } (*binWriter)(); readerThread.join(); for (mt::thread& t : opThreadGroup) { t.join(); } #endif for (uint32 i = 0; i < threadNum_; ++i) { delete operators[i]; } TFREE(binWriter); TFREE(fastqReader); TFREE(binQueue); TFREE(binPool); TFREE(fastqQueue); TFREE(fastqPool); } else { DnaParser parser; DnaCategorizer categorizer(config.minimizer, config.catParams); DnaPacker packer(config.minimizer); DataChunk fastqChunk(config.fastqBlockSize); std::vector<DnaRecord> records; records.resize(1 << 10); DnaBinBlock dnaBins(minimizersCount); BinaryBinBlock binBins; DataChunk dnaBuffer; while (fastqFile->ReadNextChunk(&fastqChunk)) { uint64 recordsCount = 0; parser.ParseFrom(fastqChunk, dnaBuffer, records, recordsCount); ASSERT(recordsCount > 0); categorizer.Categorize(records, recordsCount, dnaBins); packer.PackToBins(dnaBins, binBins); binFile.WriteNextBlock(&binBins); } } binFile.FinishCompress(); if (verboseMode_) { std::vector<uint64> recordCounts; binFile.GetBinStats(recordCounts); std::cout << "Signatures count: " << recordCounts.size() << std::endl; std::cout << "Records distribution in bins by signature:\n"; for (uint32 i = 0; i < recordCounts.size(); ++i) { if (recordCounts[i] > 0) std::cout << i << " : " << recordCounts[i] << '\n'; } std::cout << std::endl; } delete fastqFile; }