void wavelet_speed_cuda (SchroFrame *frame, int filter, int width, int height) { int i; int j; double start, stop; SchroParams params; params.transform_depth = 4; params.iwt_luma_width = ROUND_UP_POW2(width, params.transform_depth); params.iwt_luma_height = ROUND_UP_POW2(height, params.transform_depth); params.iwt_chroma_width = params.iwt_luma_width/2; params.iwt_chroma_height = params.iwt_luma_height; params.wavelet_filter_index = filter; for(j=0; j<10; j++) { start = gettime(); for(i=0; i<10; i++) { schro_gpuframe_iwt_transform (frame, ¶ms); } stop = gettime(); printf("time %g, %g fps\n", (stop - start)/10.0, 10.0/(stop-start)); } }
/** * schro_params_calculate_iwt_sizes: * @params: pointer to @SchroParams structure * * Calculates the size of the array used for wavelet transformation * using the current video format and transformation depth in the * @params structure. The @params structure is updated with the new * values. * * The structure fields changed are: iwt_chroma_width, iwt_chroma_height, * iwt_luma_width, iwt_luma_height. */ void schro_params_calculate_iwt_sizes (SchroParams * params) { SchroVideoFormat *video_format = params->video_format; int picture_luma_width, picture_luma_height; int picture_chroma_width, picture_chroma_height; schro_video_format_get_picture_luma_size (video_format, &picture_luma_width, &picture_luma_height); params->iwt_luma_width = ROUND_UP_POW2 (picture_luma_width, params->transform_depth); params->iwt_luma_height = ROUND_UP_POW2 (picture_luma_height, params->transform_depth); schro_video_format_get_picture_chroma_size (video_format, &picture_chroma_width, &picture_chroma_height); params->iwt_chroma_width = ROUND_UP_POW2 (picture_chroma_width, params->transform_depth); params->iwt_chroma_height = ROUND_UP_POW2 (picture_chroma_height, params->transform_depth); SCHRO_DEBUG ("iwt chroma size %d x %d", params->iwt_chroma_width, params->iwt_chroma_height); SCHRO_DEBUG ("iwt luma size %d x %d", params->iwt_luma_width, params->iwt_luma_height); }
static int get_buffer_size (GstSchroDec *schro_dec) { int size; int width = schro_dec->width; int height = schro_dec->height; switch (schro_dec->fourcc) { case GST_MAKE_FOURCC('I','4','2','0'): size = ROUND_UP_POW2(width,2) * ROUND_UP_POW2(height,1); size += 2 * ROUND_UP_POW2(ROUND_UP_SHIFT(width,1),2) * ROUND_UP_SHIFT(height,1); break; case GST_MAKE_FOURCC('Y','U','Y','2'): size = ROUND_UP_POW2(width,2) * 2 * height; break; case GST_MAKE_FOURCC('A','Y','U','V'): size = width * 4 * height; break; default: g_assert_not_reached(); break; } return size; }
StreamBuffer::StreamBuffer(u32 type, u32 size) : m_buffer(GenBuffer()), m_buffertype(type), m_size(ROUND_UP_POW2(size)), m_bit_per_slot(IntLog2(ROUND_UP_POW2(size) / SYNC_POINTS)) { m_iterator = 0; m_used_iterator = 0; m_free_iterator = 0; }
StreamBuffer::StreamBuffer(u32 type, u32 size, u32 align_size, bool need_cpu_buffer) : m_buffer(GenBuffer()), m_buffertype(type), m_size(Common::AlignUpSizePow2(ROUND_UP_POW2(size), align_size)), m_bit_per_slot(IntLog2(Common::AlignUpSizePow2(ROUND_UP_POW2(size), align_size) / SYNC_POINTS)) { m_iterator = 0; m_used_iterator = 0; m_free_iterator = 0; for (int i = 0; i < SYNC_POINTS; i++) { m_fences[i] = 0; } }
// returns true if size was changed bool FixTextureSize(int& width, int& height) { int oldw = width, oldh = height; // conditional nonpow2 support should work fine for us if ((caps.TextureCaps & D3DPTEXTURECAPS_POW2) && !(caps.TextureCaps & D3DPTEXTURECAPS_NONPOW2CONDITIONAL)) { // all texture dimensions need to be powers of two width = (int)ROUND_UP_POW2((u32)width); height = (int)ROUND_UP_POW2((u32)height); } if (caps.TextureCaps & D3DPTEXTURECAPS_SQUAREONLY) { width = height = std::max(width, height); } width = std::min(width, (int)caps.MaxTextureWidth); height = std::min(height, (int)caps.MaxTextureHeight); return (width != oldw) || (height != oldh); }