Пример #1
0
::libmaus2::util::Histogram::unique_ptr_type libmaus2::util::Utf8String::getHistogram(::libmaus2::autoarray::AutoArray<uint8_t> const & A)
{
	#if defined(_OPENMP)
	uint64_t const numthreads = omp_get_max_threads();
	#else
	uint64_t const numthreads = 1;
	#endif
	
	::libmaus2::autoarray::AutoArray<uint64_t> const partstarts = computePartStarts(A,numthreads);
	uint64_t const numparts = partstarts.size()-1;
	
	::libmaus2::util::Histogram::unique_ptr_type hist(new ::libmaus2::util::Histogram);
	::libmaus2::parallel::OMPLock lock;
	
	#if defined(_OPENMP)
	#pragma omp parallel for
	#endif
	for ( int64_t t = 0; t < static_cast<int64_t>(numparts); ++t )
	{
		::libmaus2::util::Histogram::unique_ptr_type lhist(new ::libmaus2::util::Histogram);
	
		uint64_t codelen = 0;
		uint64_t const tcodelen = partstarts[t+1]-partstarts[t];
		::libmaus2::util::GetObject<uint8_t const *> G(A.begin()+partstarts[t]);
		
		while ( codelen != tcodelen )
			(*lhist)(::libmaus2::util::UTF8::decodeUTF8(G,codelen));
			
		lock.lock();
		hist->merge(*lhist);
		lock.unlock();
	}
	
	return UNIQUE_PTR_MOVE(hist);
}
Пример #2
0
/**
 * compute character histogram in parallel
 **/
::libmaus::autoarray::AutoArray<uint64_t> computeCharHist(std::string const & inputfile)
{
	uint64_t const n = ::libmaus::util::GetFileSize::getFileSize(inputfile);
	
	#if defined(_OPENMP)
	uint64_t const numthreads = omp_get_max_threads();
	#else
	uint64_t const numthreads = 1;
	#endif

	uint64_t const packsize = (n + numthreads-1)/numthreads;

	::libmaus::parallel::OMPLock lock;
	::libmaus::autoarray::AutoArray<uint64_t> ghist(256);	
	#if defined(_OPENMP)
	#pragma omp parallel for
	#endif
	for ( int64_t t = 0; t < static_cast<int64_t>(numthreads); ++t )
	{
		uint64_t const low  = std::min(n,t*packsize);
		uint64_t const high = std::min(n,low+packsize);
		uint64_t const range = high-low;
		
		if ( range )
		{
			::libmaus::autoarray::AutoArray<uint64_t> lhist(ghist.size());	
			::libmaus::aio::CheckedInputStream CIS(inputfile);
			CIS.seekg(low);
			uint64_t const blocksize = 8192;
			uint64_t const numblocks = ((range)+blocksize-1)/blocksize;
			::libmaus::autoarray::AutoArray<uint8_t> B(blocksize);
			
			for ( uint64_t b = 0; b < numblocks; ++b )
			{
				uint64_t const llow = std::min(low + b*blocksize,high);
				uint64_t const lhigh = std::min(llow + blocksize,high);
				uint64_t const lrange = lhigh-llow;
				CIS.read ( reinterpret_cast<char *>(B.begin()), lrange );
				assert ( CIS.gcount() == static_cast<int64_t>(lrange) );
				for ( uint64_t i = 0; i < lrange; ++i )
					lhist[B[i]]++;
			}

			lock.lock();
			for ( uint64_t i = 0; i < lhist.size(); ++i )
				ghist[i] += lhist[i];
			lock.unlock();
		}
	}
	
	return ghist;
}