示例#1
0
bool DsrcCompressorMT::Process(const InputParameters &args_)
{
	IFastqStreamReader* fileReader = NULL;
	DsrcFileWriter* fileWriter = NULL;

	// make reusable
	//
	FastqDataPool* fastqPool = NULL;
	FastqDataQueue* fastqQueue = NULL;
	DsrcDataPool* dsrcPool = NULL;
	DsrcDataQueue* dsrcQueue = NULL;
	ErrorHandler* errorHandler = NULL;
	//
	//

	FastqReader* dataReader = NULL;
	DsrcWriter* dataWriter = NULL;

	FastqDatasetType datasetType;
	CompressionSettings compSettings = GetCompressionSettings(args_);

	try
	{
		if (args_.useFastqStdIo)
			fileReader = new FastqStdIoReader();
		else
			fileReader = new FastqFileReader(args_.inputFilename);

		fileWriter = new DsrcFileWriter();
		fileWriter->StartCompress(args_.outputFilename);

		const uint32 partNum = (args_.fastqBufferSizeMB < 128) ? args_.threadNum * 4 : args_.threadNum * 2;
		fastqPool = new FastqDataPool(partNum, args_.fastqBufferSizeMB << 20);		// maxPart, bufferPartSize
		fastqQueue = new FastqDataQueue(partNum, 1);								// maxPart, threadCount

		dsrcPool = new DsrcDataPool(partNum, args_.fastqBufferSizeMB << 20);
		dsrcQueue = new DsrcDataQueue(partNum, args_.threadNum);

		if (args_.calculateCrc32)
			errorHandler = new MultithreadedErrorHandler();
		else
			errorHandler = new ErrorHandler();

		dataReader = new FastqReader(*fileReader, *fastqQueue, *fastqPool, *errorHandler);
		dataWriter = new DsrcWriter(*fileWriter, *dsrcQueue, *dsrcPool, *errorHandler);

		// analyze file
		//
		const bool findQOffset = args_.qualityOffset == fq::FastqDatasetType::AutoQualityOffset;
		if (!findQOffset)
			datasetType.qualityOffset = args_.qualityOffset;

		if (!dataReader->AnalyzeFirstChunk(datasetType, findQOffset))
		{
			throw DsrcException("Error analyzing FASTQ dataset");
		}

		fileWriter->SetDatasetType(datasetType);
		fileWriter->SetCompressionSettings(compSettings);
	}
	catch (const std::runtime_error& e_)
	{
		AddError(e_.what());
	}

	if (!IsError())
	{
		const uint32 threadsNum = args_.threadNum;

		// launch threads
		//
		th::thread readerThread(th::ref(*dataReader));

		std::vector<DsrcCompressor*> operators;
		operators.resize(threadsNum);

#ifdef USE_BOOST_THREAD
		boost::thread_group opThreadGroup;		// why C++11 does not have thread_group? ://

		for (uint32 i = 0; i < threadsNum; ++i)
		{
			operators[i] = new DsrcCompressor(*fastqQueue, *fastqPool, *dsrcQueue, *dsrcPool, *errorHandler, datasetType, compSettings);
			opThreadGroup.create_thread(th::ref(*operators[i]));
		}

		(*dataWriter)();			// main thread works as writer

		readerThread.join();
		opThreadGroup.join_all();

#else
		std::vector<th::thread> opThreadGroup;

		for (uint32 i = 0; i < threadsNum; ++i)
		{
			operators[i] = new DsrcCompressor(*fastqQueue, *fastqPool, *dsrcQueue, *dsrcPool, *errorHandler, datasetType, compSettings);
			opThreadGroup.push_back(th::thread(th::ref(*operators[i])));
		}

		(*dataWriter)();

		readerThread.join();

		// find difference: 'for (std::vector<th::thread>::iterator i = opThreadGroup.begin(); i != opThreadGroup.end(); ++i)'
		// --> 'for (auto& t : opThreadGroup)'
		for (th::thread& t : opThreadGroup)
		{
			t.join();
		}

#endif

		// check for errors
		//
		if (errorHandler->IsError())
			AddError(errorHandler->GetError());


		// free resources, cleanup
		//
		fastqQueue->Reset();
		dsrcQueue->Reset();

		for (uint32 i = 0; i < threadsNum; ++i)
		{
			delete operators[i];
		}

		fileReader->Close();
		fileWriter->FinishCompress();


		// set log
		//
		fq::StreamsInfo rawSize = fileWriter->GetFastqStreamInfo();
		fq::StreamsInfo compSize = fileWriter->GetDsrcStreamInfo();

		std::ostringstream ss;
		ss << "Compressed streams sizes (in bytes)\n";
		ss << "TAG: " << std::setw(16) << compSize.sizes[fq::StreamsInfo::MetaStream] + compSize.sizes[fq::StreamsInfo::TagStream]
					  << " / " << std::setw(16) << rawSize.sizes[fq::StreamsInfo::TagStream] << '\n';
		ss << "DNA: " << std::setw(16) << compSize.sizes[fq::StreamsInfo::DnaStream]
					  << " / " << std::setw(16) << rawSize.sizes[fq::StreamsInfo::DnaStream] << '\n';
		ss << "QUA: " << std::setw(16) << compSize.sizes[fq::StreamsInfo::QualityStream]
					  << " / " << std::setw(16) << rawSize.sizes[fq::StreamsInfo::QualityStream] << '\n';
		AddLog(ss.str());
	}

	TFree(dataWriter);
	TFree(dataReader);
	TFree(errorHandler);

	// make reusable
	//
	TFree(dsrcQueue);
	TFree(dsrcPool);
	TFree(fastqQueue);
	TFree(fastqPool);
	//
	//

	TFree(fileWriter);
	TFree(fileReader);

	return !IsError();
}