/* ------------------------------------------------------------------------- */ PRIM_STATIC pstatus_t general_set_32u( UINT32 val, UINT32 *pDst, INT32 len) { UINT32 *dptr = (UINT32 *) pDst; size_t span, remaining; primitives_t *prims; if (len < 256) { while (len--) *dptr++ = val; return PRIMITIVES_SUCCESS; } /* else quadratic growth memcpy algorithm */ span = 1; *dptr = val; remaining = len - 1; prims = primitives_get(); while (remaining) { size_t thiswidth = span; if (thiswidth > remaining) thiswidth = remaining; prims->copy_8u((BYTE *) dptr, (BYTE *) (dptr + span), thiswidth<<2); remaining -= thiswidth; span <<= 1; } return PRIMITIVES_SUCCESS; }
/* stride is bytes between rows in the output buffer. */ static void rfx_decode_format_rgb(INT16* r_buf, INT16* g_buf, INT16* b_buf, RDP_PIXEL_FORMAT pixel_format, BYTE* dst_buf, int stride) { primitives_t *prims = primitives_get(); INT16* r = r_buf; INT16* g = g_buf; INT16* b = b_buf; INT16* pSrc[3]; static const prim_size_t roi_64x64 = { 64, 64 }; BYTE* dst = dst_buf; int x, y; switch (pixel_format) { case RDP_PIXEL_FORMAT_B8G8R8A8: pSrc[0] = r; pSrc[1] = g; pSrc[2] = b; prims->RGBToRGB_16s8u_P3AC4R( (const INT16 **) pSrc, 64*sizeof(INT16), dst, stride, &roi_64x64); break; case RDP_PIXEL_FORMAT_R8G8B8A8: pSrc[0] = b; pSrc[1] = g; pSrc[2] = r; prims->RGBToRGB_16s8u_P3AC4R( (const INT16 **) pSrc, 64*sizeof(INT16), dst, stride, &roi_64x64); break; case RDP_PIXEL_FORMAT_B8G8R8: for (y=0; y<64; y++) { for (x=0; x<64; x++) { *dst++ = (BYTE) (*b++); *dst++ = (BYTE) (*g++); *dst++ = (BYTE) (*r++); } dst += stride - (64*3); } break; case RDP_PIXEL_FORMAT_R8G8B8: for (y=0; y<64; y++) { for (x=0; x<64; x++) { *dst++ = (BYTE) (*r++); *dst++ = (BYTE) (*g++); *dst++ = (BYTE) (*b++); } dst += stride - (64*3); } break; default: break; } }
/* ------------------------------------------------------------------------- */ int test_sign16s_func(void) { INT16 ALIGN(src[65535]), ALIGN(d1[65535]), ALIGN(d2[65535]); int failed = 0; int i; UINT32 pflags = primitives_get_flags(primitives_get()); char testStr[256]; /* Test when we can reach 16-byte alignment */ testStr[0] = '\0'; get_random_data(src, sizeof(src)); general_sign_16s(src+1, d1+1, 65535); #ifdef _M_IX86_AMD64 if (pflags & PRIM_X86_SSSE3_AVAILABLE) { strcat(testStr, " SSSE3"); ssse3_sign_16s(src+1, d2+1, 65535); for (i=1; i<65535; ++i) { if (d1[i] != d2[i]) { printf("SIGN16s-SSE-aligned FAIL[%d] of %d: want %d, got %d\n", i, src[i], d1[i], d2[i]); ++failed; } } } #endif /* i386 */ /* Test when we cannot reach 16-byte alignment */ get_random_data(src, sizeof(src)); general_sign_16s(src+1, d1+2, 65535); #ifdef _M_IX86_AMD64 if (pflags & PRIM_X86_SSSE3_AVAILABLE) { ssse3_sign_16s(src+1, d2+2, 65535); for (i=2; i<65535; ++i) { if (d1[i] != d2[i]) { printf("SIGN16s-SSE-unaligned FAIL[%d] of %d: want %d, got %d\n", i, src[i-1], d1[i], d2[i]); ++failed; } } } #endif /* i386 */ if (!failed) printf("All sign16s tests passed (%s).\n", testStr); return (failed > 0) ? FAILURE : SUCCESS; }
void rfx_quantization_decode(INT16* buffer, const UINT32* quantVals) { const primitives_t* prims = primitives_get(); rfx_quantization_decode_block(prims, &buffer[0], 1024, quantVals[8] - 1); /* HL1 */ rfx_quantization_decode_block(prims, &buffer[1024], 1024, quantVals[7] - 1); /* LH1 */ rfx_quantization_decode_block(prims, &buffer[2048], 1024, quantVals[9] - 1); /* HH1 */ rfx_quantization_decode_block(prims, &buffer[3072], 256, quantVals[5] - 1); /* HL2 */ rfx_quantization_decode_block(prims, &buffer[3328], 256, quantVals[4] - 1); /* LH2 */ rfx_quantization_decode_block(prims, &buffer[3584], 256, quantVals[6] - 1); /* HH2 */ rfx_quantization_decode_block(prims, &buffer[3840], 64, quantVals[2] - 1); /* HL3 */ rfx_quantization_decode_block(prims, &buffer[3904], 64, quantVals[1] - 1); /* LH3 */ rfx_quantization_decode_block(prims, &buffer[3968], 64, quantVals[3] - 1); /* HH3 */ rfx_quantization_decode_block(prims, &buffer[4032], 64, quantVals[0] - 1); /* LL3 */ }
/* stride is bytes between rows in the output buffer. */ BOOL rfx_decode_rgb(RFX_CONTEXT* context, RFX_TILE* tile, BYTE* rgb_buffer, int stride) { INT16* pSrcDst[3]; UINT32 *y_quants, *cb_quants, *cr_quants; static const prim_size_t roi_64x64 = { 64, 64 }; const primitives_t *prims = primitives_get(); PROFILER_ENTER(context->priv->prof_rfx_decode_rgb); y_quants = context->quants + (tile->quantIdxY * 10); cb_quants = context->quants + (tile->quantIdxCb * 10); cr_quants = context->quants + (tile->quantIdxCr * 10); pSrcDst[0] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* y_r_buffer */ pSrcDst[1] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cb_g_buffer */ pSrcDst[2] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cr_b_buffer */ rfx_decode_component(context, y_quants, tile->YData, tile->YLen, pSrcDst[0]); /* YData */ rfx_decode_component(context, cb_quants, tile->CbData, tile->CbLen, pSrcDst[1]); /* CbData */ rfx_decode_component(context, cr_quants, tile->CrData, tile->CrLen, pSrcDst[2]); /* CrData */ PROFILER_ENTER(context->priv->prof_rfx_ycbcr_to_rgb); prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16), pSrcDst, 64 * sizeof(INT16), &roi_64x64); PROFILER_EXIT(context->priv->prof_rfx_ycbcr_to_rgb); PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb); rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2], context->pixel_format, rgb_buffer, stride); PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb); PROFILER_EXIT(context->priv->prof_rfx_decode_rgb); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[0] - 16); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[1] - 16); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[2] - 16); return TRUE; }
int h264_compress(H264_CONTEXT* h264, BYTE* pSrcData, DWORD SrcFormat, int nSrcStep, int nSrcWidth, int nSrcHeight, UINT32 TargetFrameSizeInBits, BYTE** ppDstData, UINT32* pDstSize) { int status; prim_size_t roi; int nWidth, nHeight; primitives_t *prims = primitives_get(); if (!h264) return -1; if (!h264->subsystem->Compress) return -1; nWidth = (nSrcWidth + 1) & ~1; nHeight = (nSrcHeight + 1) & ~1; h264->pYUVData[0] = (BYTE*) malloc(nWidth * nHeight); h264->iStride[0] = nWidth; h264->pYUVData[1] = (BYTE*) malloc(nWidth * nHeight / 4); h264->iStride[1] = nWidth / 2; h264->pYUVData[2] = (BYTE*) malloc(nWidth * nHeight / 4); h264->iStride[2] = nWidth / 2; h264->width = nWidth; h264->height = nHeight; roi.width = nSrcWidth; roi.height = nSrcHeight; prims->RGBToYUV420_8u_P3AC4R(pSrcData, nSrcStep, h264->pYUVData, h264->iStride, &roi); status = h264->subsystem->Compress(h264, TargetFrameSizeInBits, ppDstData, pDstSize); free(h264->pYUVData[0]); free(h264->pYUVData[1]); free(h264->pYUVData[2]); h264->pYUVData[0] = NULL; h264->pYUVData[1] = NULL; h264->pYUVData[2] = NULL; return status; }
/* ------------------------------------------------------------------------- */ int test_copy8u_func(void) { primitives_t *prims = primitives_get(); BYTE ALIGN(data[COPY_TESTSIZE+15]); int i, soff; int failed = 0; char testStr[256]; BYTE ALIGN(dest[COPY_TESTSIZE+15]); testStr[0] = '\0'; get_random_data(data, sizeof(data)); strcat(testStr, " ptr"); for (soff=0; soff<16; ++soff) { int doff; for (doff=0; doff<16; ++doff) { int length; for (length=1; length<=COPY_TESTSIZE-doff; ++length) { memset(dest, 0, sizeof(dest)); prims->copy_8u(data+soff, dest+doff, length); for (i=0; i<length; ++i) { if (dest[i+doff] != data[i+soff]) { printf("COPY8U FAIL: off=%d len=%d, dest[%d]=0x%02x" "data[%d]=0x%02x\n", doff, length, i+doff, dest[i+doff], i+soff, data[i+soff]); failed = 1; } } } } } if (!failed) printf("All copy8 tests passed (%s).\n", testStr); return (failed > 0) ? FAILURE : SUCCESS; }
/* Copy a block of pixels from one buffer to another. * The addresses are assumed to have been already offset to the upper-left * corners of the source and destination region of interest. */ PRIM_STATIC pstatus_t general_copy_8u_AC4r( const BYTE *pSrc, INT32 srcStep, BYTE *pDst, INT32 dstStep, INT32 width, INT32 height) { primitives_t *prims = primitives_get(); const BYTE *src = (const BYTE *) pSrc; BYTE *dst = (BYTE *) pDst; int rowbytes = width * sizeof(UINT32); if ((width == 0) || (height == 0)) return PRIMITIVES_SUCCESS; if (memory_regions_overlap_2d(pSrc, srcStep, sizeof(UINT32), pDst, dstStep, sizeof(UINT32), width, height)) { do { prims->copy(src, dst, rowbytes); src += srcStep; dst += dstStep; } while (--height); } else { /* TODO: do it in one operation when the rowdata is adjacent. */ do { /* If we find a replacement for memcpy that is consistently * faster, this could be replaced with that. */ memcpy(dst, src, rowbytes); src += srcStep; dst += dstStep; } while (--height); } return PRIMITIVES_SUCCESS; }
RFX_CONTEXT* rfx_context_new(void) { HKEY hKey; LONG status; DWORD dwType; DWORD dwSize; DWORD dwValue; SYSTEM_INFO sysinfo; RFX_CONTEXT* context; context = (RFX_CONTEXT*) malloc(sizeof(RFX_CONTEXT)); ZeroMemory(context, sizeof(RFX_CONTEXT)); context->priv = (RFX_CONTEXT_PRIV*) malloc(sizeof(RFX_CONTEXT_PRIV)); ZeroMemory(context->priv, sizeof(RFX_CONTEXT_PRIV)); context->priv->TilePool = Queue_New(TRUE, -1, -1); context->priv->TileQueue = Queue_New(TRUE, -1, -1); /* * align buffers to 16 byte boundary (needed for SSE/NEON instructions) * * y_r_buffer, cb_g_buffer, cr_b_buffer: 64 * 64 * 4 = 16384 (0x4000) * dwt_buffer: 32 * 32 * 2 * 2 * 4 = 16384, maximum sub-band width is 32 */ context->priv->BufferPool = BufferPool_New(TRUE, 16384, 16); #ifdef _WIN32 { BOOL isVistaOrLater; OSVERSIONINFOA verinfo; ZeroMemory(&verinfo, sizeof(OSVERSIONINFOA)); verinfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOA); GetVersionExA(&verinfo); isVistaOrLater = ((verinfo.dwMajorVersion >= 6) && (verinfo.dwMinorVersion >= 0)) ? TRUE : FALSE; context->priv->UseThreads = isVistaOrLater; } #else context->priv->UseThreads = TRUE; #endif GetNativeSystemInfo(&sysinfo); context->priv->MinThreadCount = sysinfo.dwNumberOfProcessors; context->priv->MaxThreadCount = 0; status = RegOpenKeyEx(HKEY_LOCAL_MACHINE, _T("Software\\FreeRDP\\RemoteFX"), 0, KEY_READ | KEY_WOW64_64KEY, &hKey); if (status == ERROR_SUCCESS) { if (RegQueryValueEx(hKey, _T("UseThreads"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) context->priv->UseThreads = dwValue ? 1 : 0; if (RegQueryValueEx(hKey, _T("MinThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) context->priv->MinThreadCount = dwValue; if (RegQueryValueEx(hKey, _T("MaxThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) context->priv->MaxThreadCount = dwValue; RegCloseKey(hKey); } if (context->priv->UseThreads) { /* Call primitives_get here in order to avoid race conditions when using primitives_get */ /* from multiple threads. This call will initialize all function pointers correctly */ /* before any decoding threads are started */ primitives_get(); context->priv->ThreadPool = CreateThreadpool(NULL); InitializeThreadpoolEnvironment(&context->priv->ThreadPoolEnv); SetThreadpoolCallbackPool(&context->priv->ThreadPoolEnv, context->priv->ThreadPool); if (context->priv->MinThreadCount) SetThreadpoolThreadMinimum(context->priv->ThreadPool, context->priv->MinThreadCount); if (context->priv->MaxThreadCount) SetThreadpoolThreadMaximum(context->priv->ThreadPool, context->priv->MaxThreadCount); } /* initialize the default pixel format */ rfx_context_set_pixel_format(context, RDP_PIXEL_FORMAT_B8G8R8A8); /* create profilers for default decoding routines */ rfx_profiler_create(context); /* set up default routines */ context->quantization_decode = rfx_quantization_decode; context->quantization_encode = rfx_quantization_encode; context->dwt_2d_decode = rfx_dwt_2d_decode; context->dwt_2d_encode = rfx_dwt_2d_encode; RFX_INIT_SIMD(context); return context; }
static BOOL TestPrimitiveYUV(BOOL use444) { BOOL rc = FALSE; UINT32 x, y; UINT32 awidth, aheight; BYTE* yuv[3] = {0}; UINT32 yuv_step[3]; prim_size_t roi; BYTE* rgb = NULL; BYTE* rgb_dst = NULL; size_t size; primitives_t* prims = primitives_get(); size_t uvsize, uvwidth; size_t padding = 10000; size_t stride; get_size(&roi.width, &roi.height); /* Buffers need to be 16x16 aligned. */ awidth = roi.width + 16 - roi.width % 16; aheight = roi.height + 16 - roi.height % 16; stride = awidth * sizeof(UINT32); size = awidth * aheight; if (use444) { uvwidth = awidth; uvsize = size; if (!prims || !prims->RGBToYUV444_8u_P3AC4R || !prims->YUV444ToRGB_8u_P3AC4R) return FALSE; } else { uvwidth = (awidth + 1) / 2; uvsize = (aheight + 1) / 2 * uvwidth; if (!prims || !prims->RGBToYUV420_8u_P3AC4R || !prims->YUV420ToRGB_8u_P3AC4R) return FALSE; } fprintf(stderr, "Running AVC%s on frame size %"PRIu32"x%"PRIu32"\n", use444 ? "444" : "420", roi.width, roi.height); /* Test RGB to YUV444 conversion and vice versa */ if (!(rgb = set_padding(size * sizeof(UINT32), padding))) goto fail; if (!(rgb_dst = set_padding(size * sizeof(UINT32), padding))) goto fail; if (!(yuv[0] = set_padding(size, padding))) goto fail; if (!(yuv[1] = set_padding(uvsize, padding))) goto fail; if (!(yuv[2] = set_padding(uvsize, padding))) goto fail; for (y = 0; y < roi.height; y++) { BYTE* line = &rgb[y * stride]; for (x = 0; x < roi.width; x++) { line[x * 4 + 0] = 0x81; line[x * 4 + 1] = 0x33; line[x * 4 + 2] = 0xAB; line[x * 4 + 3] = 0xFF; } } yuv_step[0] = awidth; yuv_step[1] = uvwidth; yuv_step[2] = uvwidth; if (use444) { if (prims->RGBToYUV444_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32, stride, yuv, yuv_step, &roi) != PRIMITIVES_SUCCESS) goto fail; } else if (prims->RGBToYUV420_8u_P3AC4R(rgb, PIXEL_FORMAT_BGRA32, stride, yuv, yuv_step, &roi) != PRIMITIVES_SUCCESS) goto fail; if (!check_padding(rgb, size * sizeof(UINT32), padding, "rgb")) goto fail; if ((!check_padding(yuv[0], size, padding, "Y")) || (!check_padding(yuv[1], uvsize, padding, "U")) || (!check_padding(yuv[2], uvsize, padding, "V"))) goto fail; if (use444) { if (prims->YUV444ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride, PIXEL_FORMAT_BGRA32, &roi) != PRIMITIVES_SUCCESS) goto fail; } else if (prims->YUV420ToRGB_8u_P3AC4R((const BYTE**)yuv, yuv_step, rgb_dst, stride, PIXEL_FORMAT_BGRA32, &roi) != PRIMITIVES_SUCCESS) goto fail; if (!check_padding(rgb_dst, size * sizeof(UINT32), padding, "rgb dst")) goto fail; if ((!check_padding(yuv[0], size, padding, "Y")) || (!check_padding(yuv[1], uvsize, padding, "U")) || (!check_padding(yuv[2], uvsize, padding, "V"))) goto fail; for (y = 0; y < roi.height; y++) { BYTE* srgb = &rgb[y * stride]; BYTE* drgb = &rgb_dst[y * stride]; if (!similar(srgb, drgb, roi.width * sizeof(UINT32))) goto fail; } rc = TRUE; fail: free_padding(rgb, padding); free_padding(rgb_dst, padding); free_padding(yuv[0], padding); free_padding(yuv[1], padding); free_padding(yuv[2], padding); return rc; }
/* Create 2 pseudo YUV420 frames of same size. * Combine them and check, if the data is at the expected position. */ static BOOL TestPrimitiveYUVCombine(void) { UINT32 x, y, i; UINT32 awidth, aheight; BOOL rc = FALSE; BYTE* luma[3] = { 0 }; BYTE* chroma[3] = { 0 }; BYTE* yuv[3] = { 0 }; BYTE* pmain[3] = { 0 }; BYTE* paux[3] = { 0 }; UINT32 lumaStride[3]; UINT32 chromaStride[3]; UINT32 yuvStride[3]; size_t padding = 10000; prim_size_t roi; primitives_t* prims = primitives_get(); get_size(&roi.width, &roi.height); awidth = roi.width + 16 - roi.width % 16; aheight = roi.height + 16 - roi.height % 16; fprintf(stderr, "Running YUVCombine on frame size %"PRIu32"x%"PRIu32" [%"PRIu32"x%"PRIu32"]\n", roi.width, roi.height, awidth, aheight); if (!prims || !prims->YUV420CombineToYUV444) goto fail; for (x = 0; x < 3; x++) { size_t halfStride = ((x > 0) ? awidth / 2 : awidth); size_t size = aheight * awidth; size_t halfSize = ((x > 0) ? halfStride * aheight / 2 : awidth * aheight); yuvStride[x] = awidth; if (!(yuv[x] = set_padding(size, padding))) goto fail; lumaStride[x] = halfStride; if (!(luma[x] = set_padding(halfSize, padding))) goto fail; if (!(pmain[x] = set_padding(halfSize, padding))) goto fail; chromaStride[x] = halfStride; if (!(chroma[x] = set_padding(halfSize, padding))) goto fail; if (!(paux[x] = set_padding(halfSize, padding))) goto fail; memset(luma[x], 0xAB + 3 * x, halfSize); memset(chroma[x], 0x80 + 2 * x, halfSize); if (!check_padding(luma[x], halfSize, padding, "luma")) goto fail; if (!check_padding(chroma[x], halfSize, padding, "chroma")) goto fail; if (!check_padding(pmain[x], halfSize, padding, "main")) goto fail; if (!check_padding(paux[x], halfSize, padding, "aux")) goto fail; if (!check_padding(yuv[x], size, padding, "yuv")) goto fail; } if (prims->YUV420CombineToYUV444((const BYTE**)luma, lumaStride, (const BYTE**)chroma, chromaStride, yuv, yuvStride, &roi) != PRIMITIVES_SUCCESS) goto fail; for (x = 0; x < 3; x++) { size_t halfStride = ((x > 0) ? awidth / 2 : awidth); size_t size = aheight * awidth; size_t halfSize = ((x > 0) ? halfStride * aheight / 2 : awidth * aheight); if (!check_padding(luma[x], halfSize, padding, "luma")) goto fail; if (!check_padding(chroma[x], halfSize, padding, "chroma")) goto fail; if (!check_padding(yuv[x], size, padding, "yuv")) goto fail; } if (prims->YUV444SplitToYUV420((const BYTE**)yuv, yuvStride, pmain, lumaStride, paux, chromaStride, &roi) != PRIMITIVES_SUCCESS) goto fail; for (x = 0; x < 3; x++) { size_t halfStride = ((x > 0) ? awidth / 2 : awidth); size_t size = aheight * awidth; size_t halfSize = ((x > 0) ? halfStride * aheight / 2 : awidth * aheight); if (!check_padding(pmain[x], halfSize, padding, "main")) goto fail; if (!check_padding(paux[x], halfSize, padding, "aux")) goto fail; if (!check_padding(yuv[x], size, padding, "yuv")) goto fail; } for (i = 0; i < 3; i++) { for (y = 0; y < roi.height; y++) { UINT32 w = roi.width; UINT32 lstride = lumaStride[i]; UINT32 cstride = chromaStride[i]; if (i > 0) { w = (roi.width + 3) / 4; if (roi.height > (roi.height + 1) / 2) continue; } if (!similar(luma[i] + y * lstride, pmain[i] + y * lstride, w)) goto fail; /* Need to ignore lines of destination Y plane, * if the lines are not a multiple of 16 * as the UV planes are packed in 8 line stripes. */ if (i == 0) { /* TODO: This check is not perfect, it does not * include the last V lines packed to the Y * frame. */ UINT32 rem = roi.height % 16; if (y > roi.height - rem) continue; } if (!similar(chroma[i] + y * cstride, paux[i] + y * cstride, w)) goto fail; } } rc = TRUE; fail: for (x = 0; x < 3; x++) { free_padding(yuv[x], padding); free_padding(luma[x], padding); free_padding(chroma[x], padding); free_padding(pmain[x], padding); free_padding(paux[x], padding); } return rc; }
int h264_decompress(H264_CONTEXT* h264, BYTE* pSrcData, UINT32 SrcSize, BYTE** ppDstData, DWORD DstFormat, int nDstStep, int nDstWidth, int nDstHeight, RDPGFX_RECT16* regionRects, int numRegionRects) { int index; int status; int* iStride; BYTE* pDstData; BYTE* pDstPoint; prim_size_t roi; BYTE** pYUVData; int width, height; BYTE* pYUVPoint[3]; RDPGFX_RECT16* rect; primitives_t *prims = primitives_get(); if (!h264) return -1; #if 0 WLog_INFO(TAG, "h264_decompress: pSrcData=%p, SrcSize=%u, pDstData=%p, nDstStep=%d, nDstHeight=%d, numRegionRects=%d", pSrcData, SrcSize, *ppDstData, nDstStep, nDstHeight, numRegionRects); #endif if (!(pDstData = *ppDstData)) return -1; if ((status = h264->subsystem->Decompress(h264, pSrcData, SrcSize)) < 0) return status; pYUVData = h264->pYUVData; iStride = h264->iStride; for (index = 0; index < numRegionRects; index++) { rect = &(regionRects[index]); /* Check, if the ouput rectangle is valid in decoded h264 frame. */ if ((rect->right > h264->width) || (rect->left > h264->width)) return -1; if ((rect->top > h264->height) || (rect->bottom > h264->height)) return -1; /* Check, if the output rectangle is valid in destination buffer. */ if ((rect->right > nDstWidth) || (rect->left > nDstWidth)) return -1; if ((rect->bottom > nDstHeight) || (rect->top > nDstHeight)) return -1; width = rect->right - rect->left; height = rect->bottom - rect->top; pDstPoint = pDstData + rect->top * nDstStep + rect->left * 4; pYUVPoint[0] = pYUVData[0] + rect->top * iStride[0] + rect->left; pYUVPoint[1] = pYUVData[1] + rect->top/2 * iStride[1] + rect->left/2; pYUVPoint[2] = pYUVData[2] + rect->top/2 * iStride[2] + rect->left/2; #if 0 WLog_INFO(TAG, "regionRect: x: %d y: %d width: %d height: %d", rect->left, rect->top, width, height); #endif roi.width = width; roi.height = height; prims->YUV420ToRGB_8u_P3AC4R((const BYTE**) pYUVPoint, iStride, pDstPoint, nDstStep, &roi); } return 1; }
RFX_CONTEXT* rfx_context_new(BOOL encoder) { HKEY hKey; LONG status; DWORD dwType; DWORD dwSize; DWORD dwValue; SYSTEM_INFO sysinfo; RFX_CONTEXT* context; wObject *pool; RFX_CONTEXT_PRIV *priv; context = (RFX_CONTEXT*)calloc(1, sizeof(RFX_CONTEXT)); if (!context) return NULL; context->encoder = encoder; context->priv = priv = (RFX_CONTEXT_PRIV *)calloc(1, sizeof(RFX_CONTEXT_PRIV) ); if (!priv) goto error_priv; WLog_Init(); priv->log = WLog_Get("com.freerdp.codec.rfx"); WLog_OpenAppender(priv->log); #ifdef WITH_DEBUG_RFX WLog_SetLogLevel(priv->log, WLOG_DEBUG); #endif priv->TilePool = ObjectPool_New(TRUE); if (!priv->TilePool) goto error_tilePool; pool = ObjectPool_Object(priv->TilePool); pool->fnObjectInit = (OBJECT_INIT_FN) rfx_tile_init; if (context->encoder) { pool->fnObjectNew = (OBJECT_NEW_FN) rfx_encoder_tile_new; pool->fnObjectFree = (OBJECT_FREE_FN) rfx_encoder_tile_free; } else { pool->fnObjectNew = (OBJECT_NEW_FN) rfx_decoder_tile_new; pool->fnObjectFree = (OBJECT_FREE_FN) rfx_decoder_tile_free; } /* * align buffers to 16 byte boundary (needed for SSE/NEON instructions) * * y_r_buffer, cb_g_buffer, cr_b_buffer: 64 * 64 * sizeof(INT16) = 8192 (0x2000) * dwt_buffer: 32 * 32 * 2 * 2 * sizeof(INT16) = 8192, maximum sub-band width is 32 * * Additionally we add 32 bytes (16 in front and 16 at the back of the buffer) * in order to allow optimized functions (SEE, NEON) to read from positions * that are actually in front/beyond the buffer. Offset calculations are * performed at the BufferPool_Take function calls in rfx_encode/decode.c. * * We then multiply by 3 to use a single, partioned buffer for all 3 channels. */ priv->BufferPool = BufferPool_New(TRUE, (8192 + 32) * 3, 16); if (!priv->BufferPool) goto error_BufferPool; #ifdef _WIN32 { BOOL isVistaOrLater; OSVERSIONINFOA verinfo; ZeroMemory(&verinfo, sizeof(OSVERSIONINFOA)); verinfo.dwOSVersionInfoSize = sizeof(OSVERSIONINFOA); GetVersionExA(&verinfo); isVistaOrLater = ((verinfo.dwMajorVersion >= 6) && (verinfo.dwMinorVersion >= 0)) ? TRUE : FALSE; priv->UseThreads = isVistaOrLater; } #else priv->UseThreads = TRUE; #endif GetNativeSystemInfo(&sysinfo); priv->MinThreadCount = sysinfo.dwNumberOfProcessors; priv->MaxThreadCount = 0; status = RegOpenKeyEx(HKEY_LOCAL_MACHINE, _T("Software\\FreeRDP\\RemoteFX"), 0, KEY_READ | KEY_WOW64_64KEY, &hKey); if (status == ERROR_SUCCESS) { dwSize = sizeof(dwValue); if (RegQueryValueEx(hKey, _T("UseThreads"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) priv->UseThreads = dwValue ? 1 : 0; if (RegQueryValueEx(hKey, _T("MinThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) priv->MinThreadCount = dwValue; if (RegQueryValueEx(hKey, _T("MaxThreadCount"), NULL, &dwType, (BYTE*) &dwValue, &dwSize) == ERROR_SUCCESS) priv->MaxThreadCount = dwValue; RegCloseKey(hKey); } if (priv->UseThreads) { /* Call primitives_get here in order to avoid race conditions when using primitives_get */ /* from multiple threads. This call will initialize all function pointers correctly */ /* before any decoding threads are started */ primitives_get(); priv->ThreadPool = CreateThreadpool(NULL); if (!priv->ThreadPool) goto error_threadPool; InitializeThreadpoolEnvironment(&priv->ThreadPoolEnv); SetThreadpoolCallbackPool(&priv->ThreadPoolEnv, priv->ThreadPool); if (priv->MinThreadCount) if (!SetThreadpoolThreadMinimum(priv->ThreadPool, priv->MinThreadCount)) goto error_threadPool_minimum; if (priv->MaxThreadCount) SetThreadpoolThreadMaximum(priv->ThreadPool, priv->MaxThreadCount); } /* initialize the default pixel format */ rfx_context_set_pixel_format(context, RDP_PIXEL_FORMAT_B8G8R8A8); /* create profilers for default decoding routines */ rfx_profiler_create(context); /* set up default routines */ context->quantization_decode = rfx_quantization_decode; context->quantization_encode = rfx_quantization_encode; context->dwt_2d_decode = rfx_dwt_2d_decode; context->dwt_2d_encode = rfx_dwt_2d_encode; RFX_INIT_SIMD(context); context->state = RFX_STATE_SEND_HEADERS; return context; error_threadPool_minimum: CloseThreadpool(priv->ThreadPool); error_threadPool: BufferPool_Free(priv->BufferPool); error_BufferPool: ObjectPool_Free(priv->TilePool); error_tilePool: free(priv); error_priv: free(context); return NULL; }
/* stride is bytes between rows in the output buffer. */ BOOL rfx_decode_rgb(RFX_CONTEXT* context, wStream* data_in, int y_size, const UINT32* y_quants, int cb_size, const UINT32* cb_quants, int cr_size, const UINT32* cr_quants, BYTE* rgb_buffer, int stride) { INT16* pSrcDst[3]; static const prim_size_t roi_64x64 = { 64, 64 }; const primitives_t *prims = primitives_get(); PROFILER_ENTER(context->priv->prof_rfx_decode_rgb); pSrcDst[0] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* y_r_buffer */ pSrcDst[1] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cb_g_buffer */ pSrcDst[2] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cr_b_buffer */ #if 0 if (context->priv->UseThreads) { PTP_WORK work_objects[3]; RFX_COMPONENT_WORK_PARAM params[3]; params[0].context = context; params[0].quantization_values = y_quants; params[0].buffer = stream_get_tail(data_in); params[0].capacity = y_size; params[0].buffer = pSrcDst[0]; stream_seek(data_in, y_size); params[1].context = context; params[1].quantization_values = cb_quants; params[1].buffer = stream_get_tail(data_in); params[1].capacity = cb_size; params[1].buffer = pSrcDst[1]; stream_seek(data_in, cb_size); params[2].context = context; params[2].quantization_values = cr_quants; params[2].buffer = stream_get_tail(data_in); params[2].capacity = cr_size; params[2].buffer = pSrcDst[2]; stream_seek(data_in, cr_size); work_objects[0] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback, (void*) ¶ms[0], &context->priv->ThreadPoolEnv); work_objects[1] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback, (void*) ¶ms[1], &context->priv->ThreadPoolEnv); work_objects[2] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback, (void*) ¶ms[2], &context->priv->ThreadPoolEnv); SubmitThreadpoolWork(work_objects[0]); SubmitThreadpoolWork(work_objects[1]); SubmitThreadpoolWork(work_objects[2]); WaitForThreadpoolWorkCallbacks(work_objects[0], FALSE); WaitForThreadpoolWorkCallbacks(work_objects[1], FALSE); WaitForThreadpoolWorkCallbacks(work_objects[2], FALSE); } else #endif { if (stream_get_left(data_in) < y_size+cb_size+cr_size) { DEBUG_WARN("rfx_decode_rgb: packet too small for y_size+cb_size+cr_size"); return FALSE; } rfx_decode_component(context, y_quants, stream_get_tail(data_in), y_size, pSrcDst[0]); /* YData */ stream_seek(data_in, y_size); rfx_decode_component(context, cb_quants, stream_get_tail(data_in), cb_size, pSrcDst[1]); /* CbData */ stream_seek(data_in, cb_size); rfx_decode_component(context, cr_quants, stream_get_tail(data_in), cr_size, pSrcDst[2]); /* CrData */ stream_seek(data_in, cr_size); } prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16), pSrcDst, 64 * sizeof(INT16), &roi_64x64); PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb); rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2], context->pixel_format, rgb_buffer, stride); PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb); PROFILER_EXIT(context->priv->prof_rfx_decode_rgb); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[0] - 16); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[1] - 16); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[2] - 16); return TRUE; }