Esempio n. 1
0
void BenchmarkScale(unsigned int in_w, unsigned int in_h, unsigned int out_w, unsigned int out_h) {

	std::mt19937 rng(12345);
	bool use_ssse3 = (CPUFeatures::HasMMX() && CPUFeatures::HasSSE() && CPUFeatures::HasSSE2() && CPUFeatures::HasSSE3() && CPUFeatures::HasSSSE3());

	// the queue needs to use enough memory to make sure that the CPU cache is flushed
	unsigned int pixels = std::max(in_w * in_h, out_w * out_h);
	unsigned int queue_size = 1 + 20000000 / pixels;
	unsigned int run_size = queue_size * 20;

	// create queue
	std::vector<std::unique_ptr<ImageGeneric> > queue_in(queue_size);
	std::vector<std::unique_ptr<ImageGeneric> > queue_out(queue_size);
	for(unsigned int i = 0; i < queue_size; ++i) {
		queue_in[i] = NewImageBGRA(in_w, in_h, rng);
		queue_out[i] = NewImageBGRA(out_w, out_h, rng);
	}

	// run test
	unsigned int time_fallback = 0, time_ssse3 = 0;
	{
		int64_t t1 = hrt_time_micro();
		for(unsigned int i = 0; i < run_size; ++i) {
			unsigned int ii = i % queue_size;
			Scale_BGRA_Fallback(in_w, in_h, queue_in[ii]->m_data[0], queue_in[ii]->m_stride[0],
								out_w, out_h, queue_out[ii]->m_data[0], queue_out[ii]->m_stride[0]);
		}
		int64_t t2 = hrt_time_micro();
		time_fallback = (t2 - t1) / run_size;
	}
	if(use_ssse3) {
		int64_t t1 = hrt_time_micro();
		for(unsigned int i = 0; i < run_size; ++i) {
			unsigned int ii = i % queue_size;
			Scale_BGRA_SSSE3(in_w, in_h, queue_in[ii]->m_data[0], queue_in[ii]->m_stride[0],
							 out_w, out_h, queue_out[ii]->m_data[0], queue_out[ii]->m_stride[0]);
		}
		int64_t t2 = hrt_time_micro();
		time_ssse3 = (t2 - t1) / run_size;
	}

	// print result
	QString in_size = QString("%1x%2").arg(in_w).arg(in_h);
	QString out_size = QString("%1x%2").arg(out_w).arg(out_h);
	Logger::LogInfo("[BenchmarkScale] " + Logger::tr("BGRA %1 to BGRA %2  |  Fallback %3 ms  |  SSSE3 %4 ms  |  %5%")
					.arg(in_size, 9).arg(out_size, 9).arg(time_fallback, 6).arg(time_ssse3, 6).arg(100 * time_ssse3 / time_fallback, 3));

}
Esempio n. 2
0
void BenchmarkScale(unsigned int in_w, unsigned int in_h, unsigned int out_w, unsigned int out_h) {

	std::mt19937 rng(12345);
#if SSR_USE_X86_ASM
	bool use_ssse3 = (CPUFeatures::HasMMX() && CPUFeatures::HasSSE() && CPUFeatures::HasSSE2() && CPUFeatures::HasSSE3() && CPUFeatures::HasSSSE3());
#endif

	// the queue needs to use enough memory to make sure that the CPU cache is flushed
	unsigned int pixels = std::max(in_w * in_h, out_w * out_h);
	unsigned int queue_size = 1 + 20000000 / pixels;
	unsigned int run_size = queue_size * 20;

	// create queue
	std::vector<std::unique_ptr<ImageGeneric> > queue_in(queue_size);
	std::vector<std::unique_ptr<ImageGeneric> > queue_out(queue_size);
	for(unsigned int i = 0; i < queue_size; ++i) {
		queue_in[i] = NewImageBGRA(in_w, in_h, rng);
		queue_out[i] = NewImageBGRA(out_w, out_h, rng);
	}

	// run test
	unsigned int time_swscale = 0, time_fallback = 0, time_ssse3 = 0;
	{
		SwsContext *sws = sws_getCachedContext(NULL,
											   in_w, in_h, AV_PIX_FMT_BGRA,
											   out_w, out_h, AV_PIX_FMT_BGRA,
											   SWS_BILINEAR, NULL, NULL, NULL);
		if(sws == NULL) {
			Logger::LogError("[BenchmarkScale] " + Logger::tr("Error: Can't get swscale context!", "Don't translate 'swscale'"));
			throw LibavException();
		}
		sws_setColorspaceDetails(sws,
								 sws_getCoefficients(SWS_CS_ITU709), 0,
								 sws_getCoefficients(SWS_CS_DEFAULT), 0,
								 0, 1 << 16, 1 << 16);
		int64_t t1 = hrt_time_micro();
		for(unsigned int i = 0; i < run_size / 2; ++i) {
			unsigned int ii = i % queue_size;
			sws_scale(sws, queue_in[ii]->m_data.data(), queue_in[ii]->m_stride.data(), 0, in_h, queue_out[ii]->m_data.data(), queue_out[ii]->m_stride.data());
		}
		int64_t t2 = hrt_time_micro();
		time_swscale = (t2 - t1) / (run_size / 2);
	}
	{
		int64_t t1 = hrt_time_micro();
		for(unsigned int i = 0; i < run_size; ++i) {
			unsigned int ii = i % queue_size;
			Scale_BGRA_Fallback(in_w, in_h, queue_in[ii]->m_data[0], queue_in[ii]->m_stride[0],
								out_w, out_h, queue_out[ii]->m_data[0], queue_out[ii]->m_stride[0]);
		}
		int64_t t2 = hrt_time_micro();
		time_fallback = (t2 - t1) / run_size;
	}
#if SSR_USE_X86_ASM
	if(use_ssse3) {
		int64_t t1 = hrt_time_micro();
		for(unsigned int i = 0; i < run_size; ++i) {
			unsigned int ii = i % queue_size;
			Scale_BGRA_SSSE3(in_w, in_h, queue_in[ii]->m_data[0], queue_in[ii]->m_stride[0],
							 out_w, out_h, queue_out[ii]->m_data[0], queue_out[ii]->m_stride[0]);
		}
		int64_t t2 = hrt_time_micro();
		time_ssse3 = (t2 - t1) / run_size;
	}
#endif

	// print result
	QString in_size = QString("%1x%2").arg(in_w).arg(in_h);
	QString out_size = QString("%1x%2").arg(out_w).arg(out_h);
	Logger::LogInfo("[BenchmarkScale] " + Logger::tr("BGRA %1 to BGRA %2  |  SWScale %3 us  |  Fallback %4 us (%5%)  |  SSSE3 %6 us (%7%)")
					.arg(in_size, 9).arg(out_size, 9)
					.arg(time_swscale, 6)
					.arg(time_fallback, 6).arg(100 * time_fallback / time_swscale, 3)
					.arg(time_ssse3, 6).arg(100 * time_ssse3 / time_fallback, 3));

}