示例#1
0
int main(int argc, char* argv[]) {

    START_EASYLOGGINGPP(argc, argv);
    parseArgs(argc, argv);

    LOG(INFO) << "Initializing alignment provider";
    DazAlnProvider* ap;
    ap = new DazAlnProvider(popts);
    TrgBuf trgBuf(20);
    CnsBuf cnsBuf(10);

    std::thread writerThread(Writer, std::ref(cnsBuf));

    std::vector<std::thread> cnsThreads;
    for (int i=0; i < popts.threads; i++) {
        std::thread ct(Consensus, i, std::ref(trgBuf), std::ref(cnsBuf));
        cnsThreads.push_back(std::move(ct));
    }

    std::thread readerThread(Reader, std::ref(trgBuf), ap);

    writerThread.join();

    std::vector<std::thread>::iterator it;
    for (it = cnsThreads.begin(); it != cnsThreads.end(); ++it)
        it->join();

    readerThread.join();

    delete ap;

    return 0;
}
示例#2
0
void Genome::read ( std::ifstream& input ) {
  log("Genome: reading file...");
  unsigned int L(1);                         // line counter
  LockFreeQueue<std::vector<std::string>> q;
  
  auto readerTask = [&q,&input]() {
    debug("Starting reader thread...");
      for (std::string line; getline(input, line);) {
	q.push(strsplit(line, "\t"));
      }
      q.done();
      debug("All data read and tokenized, closing reader rhread");
  };
  
  auto parserTask = [&q,&L,this]() {
    debug("Starting parser thread...");
    std::vector<std::string> splits;
    while(q.pop(splits)) {
      assume(parseDataLine(splits), "Could not parse data in line " + std::to_string(L), false);
      L++;
    }
    debug("All tokens processed, closing parser thread");
  };
  
  // read and parse header lines sequentially
  for (std::string line; getline(input, line);) {
    //    std::cout << "Line #" << L << std::endl;
    if (line[0] == '@') {
      assume(parseHeaderLine(line), "Could not parse header in line " + std::to_string(L), false);
    }
    else {
      break;  // header lines parsed, break and begin threaded processing
    }
    L++;
  }
  
  // init and start threads
  std::thread readerThread(readerTask);
  std::thread parserThread(parserTask);
  
  // wait for threads to finish
  readerThread.join();
  parserThread.join();
  
  if (multistrand != nullptr) {
    log("Found " + std::to_string(multistrand->size()) + " strand switching events");
  }
  if (circular != nullptr) {
    log("Found " + std::to_string(circular->size()) + " circular transcripts");
  }

}
示例#3
0
void DnarchModule::Bin2Dnarch(const std::string &inBinFile_, const std::string &outDnarchFile_, const CompressorParams& params_,
							  uint32 threadsNum_, bool verboseMode_)
{
	BinModuleConfig conf;
	BinFileExtractor* extractor = new BinFileExtractor(params_.minBinSize);

	extractor->StartDecompress(inBinFile_, conf);

	DnarchFileWriter* dnarch = new DnarchFileWriter();
	dnarch->StartCompress(outDnarchFile_, conf.minimizer, params_);

	if (threadsNum_ > 1)
	{
		const uint32 partNum = threadsNum_ + (threadsNum_ >> 1);//threadsNum_ * 2;
		const uint64 dnaBufferSize = 1 << 20;
		const uint64 outBufferSize = 1 << 20;

		MinimizerPartsPool* inPool = new MinimizerPartsPool(partNum, dnaBufferSize);
		MinimizerPartsQueue* inQueue = new MinimizerPartsQueue(partNum, 1);

		CompressedDnaPartsPool* outPool = new CompressedDnaPartsPool(partNum, outBufferSize);
		CompressedDnaPartsQueue* outQueue = new CompressedDnaPartsQueue(partNum, threadsNum_);

		BinPartsExtractor* inReader = new BinPartsExtractor(extractor, inQueue, inPool);
		DnarchPartsWriter* outWriter = new DnarchPartsWriter(dnarch, outQueue, outPool);


		// preprocess small bins and N bin <-- this should be done internally
		//
		{
			DnaCompressor compressor(conf.minimizer, params_);
			DnaPacker packer(conf.minimizer);

			uint32 signatureId = 0;

			CompressedDnaBlock compBin;
			BinaryBinBlock binBin;

			std::vector<const BinFileExtractor::BlockDescriptor*> descriptors = extractor->GetSmallBlockDescriptors();

			uint64 totalDnaBufferSize = 0;
			uint64 totalRecords = 0;
			for (uint32 i = 0; i < descriptors.size(); ++i)
			{
				totalDnaBufferSize += descriptors[i]->rawDnaSize;
				totalRecords += descriptors[i]->recordsCount;
			}
			const BinFileExtractor::BlockDescriptor* nd = extractor->GetNBlockDescriptor();
			totalDnaBufferSize += nd->rawDnaSize;
			totalRecords += nd->recordsCount;

			if (compBin.workBuffers.dnaBuffer.data.Size() < totalDnaBufferSize)
				compBin.workBuffers.dnaBuffer.data.Extend(totalDnaBufferSize);


			// extract and unpack small bins
			//
			while (extractor->ExtractNextSmallBin(binBin, signatureId))
			{
				ASSERT(binBin.metaSize != 0);

				packer.UnpackFromBin(binBin, compBin.workBuffers.dnaBin, signatureId, compBin.workBuffers.dnaBuffer, true);
			}

			if (extractor->ExtractNBin(binBin, signatureId) && binBin.metaSize > 0)
				packer.UnpackFromBin(binBin, compBin.workBuffers.dnaBin, signatureId, compBin.workBuffers.dnaBuffer, true);


			// un-reverse-compliment records
			//
			{
				char rcBuf[DnaRecord::MaxDnaLen];
				DnaRecord rcRec;
				rcRec.dna = rcBuf;
				rcRec.reverse = true;

				for (uint64 i = 0; i < compBin.workBuffers.dnaBin.Size(); ++i)
				{
					DnaRecord& r = compBin.workBuffers.dnaBin[i];
					if (r.reverse)
					{
						r.ComputeRC(rcRec);
						std::copy(rcRec.dna, rcRec.dna + r.len, r.dna);
						r.reverse = false;
						r.minimizerPos = 0;
					}
				}
			}

			// compress all bins together
			//
			const uint32 nSignature = conf.minimizer.TotalMinimizersCount();
			compressor.CompressDna(compBin.workBuffers.dnaBin, nSignature, totalDnaBufferSize, compBin.workBuffers.dnaWorkBin, compBin);

			dnarch->WriteNextBin(&compBin);
		}



		// launch stuff
		//
		mt::thread readerThread(mt::ref(*inReader));

		std::vector<IOperator*> operators;
		operators.resize(threadsNum_);

#ifdef USE_BOOST_THREAD
		boost::thread_group opThreadGroup;

		for (uint32 i = 0; i < threadsNum_; ++i)
		{
			operators[i] = new BinPartsCompressor(conf.minimizer, params_, 
												  inQueue, inPool,
												  outQueue, outPool);
			opThreadGroup.create_thread(mt::ref(*operators[i]));
		}

		(*outWriter)();

		readerThread.join();
		opThreadGroup.join_all();


#else
		std::vector<mt::thread> opThreadGroup;

		for (uint32 i = 0; i < threadsNum_; ++i)
		{
			operators[i] = new BinPartsCompressor(conf.minimizer, params_,
												  inQueue, inPool,
												  outQueue, outPool);
			opThreadGroup.push_back(mt::thread(mt::ref(*operators[i])));
		}

		(*outWriter)();

		readerThread.join();

		for (mt::thread& t : opThreadGroup)
		{
			t.join();
		}

#endif

		for (uint32 i = 0; i < threadsNum_; ++i)
		{
			delete operators[i];
		}

		TFREE(outWriter);
		TFREE(inReader);

		TFREE(outQueue);
		TFREE(outPool);
		TFREE(inQueue);
		TFREE(inPool);
	}
	else
	{
示例#4
0
文件: BinModule.cpp 项目: lrog/orcom
void BinModule::Fastq2Bin(const std::vector<std::string> &inFastqFiles_, const std::string &outBinFile_,
						  uint32 threadNum_,  bool compressedInput_, bool verboseMode_)
{
	// TODO: try/catch to free resources
	//
	IFastqStreamReader* fastqFile = NULL;
	if (compressedInput_)
		fastqFile = new MultiFastqFileReaderGz(inFastqFiles_);
	else
		fastqFile = new MultiFastqFileReader(inFastqFiles_);


	BinFileWriter binFile;
	binFile.StartCompress(outBinFile_, config);

	const uint32 minimizersCount = config.minimizer.TotalMinimizersCount();
	if (threadNum_ > 1)
	{
		FastqChunkPool* fastqPool = NULL;
		FastqChunkQueue* fastqQueue = NULL;
		BinaryPartsPool* binPool = NULL;
		BinaryPartsQueue* binQueue = NULL;

		FastqChunkReader* fastqReader = NULL;
		BinChunkWriter* binWriter = NULL;

		const uint32 partNum = threadNum_ * 4;
		fastqPool = new FastqChunkPool(partNum, config.fastqBlockSize);
		fastqQueue = new FastqChunkQueue(partNum, 1);

		binPool = new BinaryPartsPool(partNum, minimizersCount);
		binQueue = new BinaryPartsQueue(partNum, threadNum_);

		fastqReader = new FastqChunkReader(fastqFile, fastqQueue, fastqPool);
		binWriter = new BinChunkWriter(&binFile, binQueue, binPool);

		// launch stuff
		//
		mt::thread readerThread(mt::ref(*fastqReader));

		std::vector<IOperator*> operators;
		operators.resize(threadNum_);

#ifdef USE_BOOST_THREAD
		boost::thread_group opThreadGroup;

		for (uint32 i = 0; i < threadNum_; ++i)
		{
			operators[i] = new BinEncoder(config.minimizer, config.catParams,
										  fastqQueue, fastqPool,
										  binQueue, binPool);
			opThreadGroup.create_thread(mt::ref(*operators[i]));
		}

		(*binWriter)();

		readerThread.join();
		opThreadGroup.join_all();


#else
		std::vector<mt::thread> opThreadGroup;

		for (uint32 i = 0; i < threadNum_; ++i)
		{
			operators[i] = new BinEncoder(config.minimizer, config.catParams,
										  fastqQueue, fastqPool, binQueue, binPool);
			opThreadGroup.push_back(mt::thread(mt::ref(*operators[i])));
		}

		(*binWriter)();

		readerThread.join();

		for (mt::thread& t : opThreadGroup)
		{
			t.join();
		}

#endif

		for (uint32 i = 0; i < threadNum_; ++i)
		{
			delete operators[i];
		}

		TFREE(binWriter);
		TFREE(fastqReader);

		TFREE(binQueue);
		TFREE(binPool);
		TFREE(fastqQueue);
		TFREE(fastqPool);
	}
	else
	{
		DnaParser parser;
		DnaCategorizer categorizer(config.minimizer, config.catParams);
		DnaPacker packer(config.minimizer);

		DataChunk fastqChunk(config.fastqBlockSize);
		std::vector<DnaRecord> records;
		records.resize(1 << 10);

		DnaBinBlock dnaBins(minimizersCount);
		BinaryBinBlock binBins;
		DataChunk dnaBuffer;

		while (fastqFile->ReadNextChunk(&fastqChunk))
		{
			uint64 recordsCount = 0;
			parser.ParseFrom(fastqChunk, dnaBuffer, records, recordsCount);

			ASSERT(recordsCount > 0);
			categorizer.Categorize(records, recordsCount, dnaBins);

			packer.PackToBins(dnaBins, binBins);

			binFile.WriteNextBlock(&binBins);
		}
	}

	binFile.FinishCompress();

	if (verboseMode_)
	{
		std::vector<uint64> recordCounts;
		binFile.GetBinStats(recordCounts);

		std::cout << "Signatures count: " << recordCounts.size() << std::endl;
		std::cout << "Records distribution in bins by signature:\n";
		for (uint32 i = 0; i < recordCounts.size(); ++i)
		{
			if (recordCounts[i] > 0)
				std::cout << i << " : " << recordCounts[i] << '\n';
		}
		std::cout << std::endl;
	}

	delete fastqFile;
}