예제 #1
0
void 
EasyIocp::InitThreadPool()
{
	//初始化线程池
	int num = GetProcessorsNum();

	InitializeThreadpoolEnvironment(&tpCBE_);

	worksNum_ = num;
	ptpWorks_ = (PTP_WORK*)SysMalloc(sizeof(PTP_WORK) * worksNum_);
	workArgs_ = (EasyIocpStruct::WORK_ARG*)SysMalloc(sizeof(EasyIocpStruct::WORK_ARG) * worksNum_);
	if(!ptpWorks_ || !workArgs_) {
		print("EasyIocp::Construct: SysMalloc failed.");
	}
	assert(ptpWorks_ != NULL);
	assert(workArgs_ != NULL);

	for(int i = 0; i < worksNum_; ++i) {
		workArgs_[i].id = i;
		workArgs_[i].iocpModel = this;
		ptpWorks_[i] = CreateThreadpoolWork(TPCallBack, (PVOID)&workArgs_[i], &tpCBE_);
		SubmitThreadpoolWork(ptpWorks_[i]);
	}

	print("EasyIocp::InitThreadPool: init thread pool successfully. num: %d.", worksNum_);
}
예제 #2
0
bool WorkItem<T>::StartWork()
{
	work_ = CreateThreadpoolWork(callback, this, callback_env_); 
	if(work_ == nullptr)
	{
		LOG_FATAL();
		return false;
	}

	SubmitThreadpoolWork(work_);
	return true;
}
예제 #3
0
    virtual void schedule(pplx::TaskProc_t proc, void* param)
    {
        pplx::details::atomic_increment(s_flag);
        auto schedulerParam = std::unique_ptr<_Scheduler_Param>(new _Scheduler_Param(proc, param));
        auto work = CreateThreadpoolWork(DefaultWorkCallbackTest, schedulerParam.get(), NULL);

        if (work == nullptr)
        {
            throw utility::details::create_system_error(GetLastError());
        }

        SubmitThreadpoolWork(work);
        CloseThreadpoolWork(work);
        schedulerParam.release();
    }
예제 #4
0
파일: 11Batch.cpp 프로젝트: H2-T23/garage
	BOOL	OnCreate( LPCREATESTRUCT lpCreateStruct ){
		m_pWorkItem	= CreateThreadpoolWork( WorkerThread, this, NULL );
		if( m_pWorkItem == NULL ){
			return FALSE;
		}

		m_btnStart.Create(this, 10, 10, 100, 25, IDC_START_BTN, _T("START"));
		m_btnStart.SetFont();

		m_ListBox.Create(this, 10, 30, 200, 200, IDC_LISTBOX);
		m_ListBox.SetFont();

		cmd.Initialize( this );
		cmd.Register( IDC_START_BTN, &CMainForm::OnStartBatch );

		msg.Initialize( this );
		msg.Register( WM_APP_COMPLETED, &CMainForm::OnCompleted );

		return TRUE;
	}
예제 #5
0
BOOL CService::UnregisterNotification(NOTIFY_HANDLE Handle)
{
    BOOL ret = FALSE;

    if (Handle)
    {
#ifdef UNIVERSAL
        PTP_WORK work;

        work = CreateThreadpoolWork(UnregisterNotificationWork, Handle, NULL);
        if (work != NULL)
        {
            SubmitThreadpoolWork(work);
        }

        ret = TRUE;
#else // UNIVERSAL
        ret = ::UnregisterDeviceNotification(Handle);
#endif // UNIVERSAL
    }

    return ret;
}
예제 #6
0
__declspec(noinline) bool benchmark_ntp_fs_stat() {
	TP_CALLBACK_ENVIRON env;
	InitializeThreadpoolEnvironment(&env);

	PTP_POOL pool{nullptr};
	pool = CreateThreadpool(nullptr);
	SetThreadpoolThreadMaximum(pool, 48);
	SetThreadpoolThreadMinimum(pool, 12);

	PTP_CLEANUP_GROUP group = CreateThreadpoolCleanupGroup();
	SetThreadpoolCallbackPool(&env, pool);
	SetThreadpoolCallbackCleanupGroup(&env, group, nullptr);

	PTP_WORK work_fs_stat = CreateThreadpoolWork(WorkCallback_fs_stat, nullptr, &env);


	SubmitThreadpoolWork(work_fs_stat);
	WaitForThreadpoolWorkCallbacks(work_fs_stat, false);

	CloseThreadpoolWork(work_fs_stat);

	CloseThreadpool(pool);
	return false;
}
예제 #7
0
파일: Server.cpp 프로젝트: aliakseis/IOCP
bool Server::Create(short port, int maxPostAccept)
{
    assert(maxPostAccept > 0);

    m_MaxPostAccept = maxPostAccept;

    // Create Client Work Thread Env for using cleaning group. We need this for shutting down
    // properly.
    InitializeThreadpoolEnvironment(&m_ClientTPENV);
    m_ClientTPCLEAN = CreateThreadpoolCleanupGroup();
    if (m_ClientTPCLEAN == NULL)
    {
        ERROR_CODE(GetLastError(), "Could not create client cleaning group.");
        return false;
    }
    SetThreadpoolCallbackCleanupGroup(&m_ClientTPENV, m_ClientTPCLEAN, NULL);

    // Create Listen Socket
    m_listenSocket = Network::CreateSocket(true, port);
    if (m_listenSocket == INVALID_SOCKET)
    {
        return false;
    }

    // Make the address re-usable to re-run the same server instantly.
    bool reuseAddr = true;
    if (setsockopt(m_listenSocket, SOL_SOCKET, SO_REUSEADDR,
                   reinterpret_cast<const char*>(&reuseAddr), sizeof(reuseAddr)) == SOCKET_ERROR)
    {
        ERROR_CODE(WSAGetLastError(), "setsockopt() failed with SO_REUSEADDR.");
        Destroy();
        return false;
    }

    // Create & Start ThreaddPool for socket IO
    m_pTPIO = CreateThreadpoolIo(reinterpret_cast<HANDLE>(m_listenSocket),
                                 Server::IoCompletionCallback, NULL, NULL);
    if (m_pTPIO == NULL)
    {
        ERROR_CODE(WSAGetLastError(), "Could not assign the listen socket to the IOCP handle.");
        Destroy();
        return false;
    }

    // Start listening
    StartThreadpoolIo(m_pTPIO);
    if (listen(m_listenSocket, SOMAXCONN) == SOCKET_ERROR)
    {
        ERROR_CODE(WSAGetLastError(), "listen() failed.");
        return false;
    }

    // Create critical sections for m_Clients
    InitializeCriticalSection(&m_CSForClients);

    // Create Accept worker
    m_AcceptTPWORK = CreateThreadpoolWork(Server::WorkerPostAccept, this, NULL);
    if (m_AcceptTPWORK == NULL)
    {
        ERROR_CODE(GetLastError(), "Could not create AcceptEx worker TPIO.");
        Destroy();
        return false;
    }

    m_ShuttingDown = false;

    SubmitThreadpoolWork(m_AcceptTPWORK);

    return true;
}
예제 #8
0
int TestPoolWork(int argc, char* argv[])
{
	int index;
	PTP_POOL pool;
	PTP_WORK work;
	PTP_CLEANUP_GROUP cleanupGroup;
	TP_CALLBACK_ENVIRON environment;

	printf("Global Thread Pool\n");

	work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", NULL);

	if (!work)
	{
		printf("CreateThreadpoolWork failure\n");
		return -1;
	}

	/**
	 * You can post a work object one or more times (up to MAXULONG) without waiting for prior callbacks to complete.
	 * The callbacks will execute in parallel. To improve efficiency, the thread pool may throttle the threads.
	 */

	for (index = 0; index < 10; index++)
		SubmitThreadpoolWork(work);

	WaitForThreadpoolWorkCallbacks(work, FALSE);
	CloseThreadpoolWork(work);

	printf("Private Thread Pool\n");

	if (!(pool = CreateThreadpool(NULL)))
	{
		printf("CreateThreadpool failure\n");
		return -1;
	}

	if (!SetThreadpoolThreadMinimum(pool, 4))
	{
		printf("SetThreadpoolThreadMinimum failure\n");
		return -1;
	}

	SetThreadpoolThreadMaximum(pool, 8);

	InitializeThreadpoolEnvironment(&environment);
	SetThreadpoolCallbackPool(&environment, pool);

	cleanupGroup = CreateThreadpoolCleanupGroup();

	if (!cleanupGroup)
	{
		printf("CreateThreadpoolCleanupGroup failure\n");
		return -1;
	}

	SetThreadpoolCallbackCleanupGroup(&environment, cleanupGroup, NULL);

	work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", &environment);

	if (!work)
	{
		printf("CreateThreadpoolWork failure\n");
		return -1;
	}

	for (index = 0; index < 10; index++)
		SubmitThreadpoolWork(work);

	WaitForThreadpoolWorkCallbacks(work, FALSE);

	CloseThreadpoolCleanupGroupMembers(cleanupGroup, TRUE, NULL);

	CloseThreadpoolCleanupGroup(cleanupGroup);

	DestroyThreadpoolEnvironment(&environment);

	/**
	 * See Remarks at https://msdn.microsoft.com/en-us/library/windows/desktop/ms682043(v=vs.85).aspx
	 * If there is a cleanup group associated with the work object,
	 * it is not necessary to call CloseThreadpoolWork !
	 * calling the CloseThreadpoolCleanupGroupMembers function releases the work, wait,
	 * and timer objects associated with the cleanup group.
	 */

	/* CloseThreadpoolWork(work); // this would segfault, see comment above. */

	CloseThreadpool(pool);

	return 0;
}
예제 #9
0
NamedPipe::NamedPipe()
    : work_(CreateThreadpoolWork(OnRequested, this, nullptr)),
      pipe_(INVALID_HANDLE_VALUE),
      io_(nullptr) {}
예제 #10
0
파일: wcMT_VTP.c 프로젝트: oneminot/wsp
int main (int argc, char * argv[])
{
	DWORD nchar = 0, nword = 0, nline = 0;
    PTP_WORK *pWorkObjects;
    WORK_OBJECT_ARG ** pWorkObjArgsArray, *pObjectArg;
	TP_CALLBACK_ENVIRON cbe;  // Callback environment
	int nThread, iThrd;
	
    if (!WindowsVersionOK (6, 0)) 
        ReportError ("This program requires Windows NT 6.0 or greater", 1, TRUE);

	if (argc < 2) {
		printf ("Usage: wcMT_vtp filename ... filename\n");
		return 1;
	}

	/* Create a worker thread for each file on the command line */
	nThread = (DWORD)argc - 1;
    pWorkObjects = malloc (nThread * sizeof(PTP_WORK));
	if (pWorkObjects != NULL)
		pWorkObjArgsArray = malloc (nThread * sizeof(WORK_OBJECT_ARG *));
	if (pWorkObjects == NULL || pWorkObjArgsArray == NULL)
        ReportError ("Cannot allocate working memory for worke item or argument array.", 2, TRUE);

	InitializeThreadpoolEnvironment (&cbe);

	/* Create a work object argument for each file on the command line.
	   First put the file names in the thread arguments.	*/
	for (iThrd = 0; iThrd < nThread; iThrd++) {
		pObjectArg = (pWorkObjArgsArray[iThrd] = _aligned_malloc (sizeof(WORK_OBJECT_ARG), CACHE_LINE_SIZE));
		if (NULL == pObjectArg)
			ReportError ("Cannot allocate memory for a thread argument structure.", 3, TRUE);
		pObjectArg->filename = argv[iThrd+1];
		pObjectArg->kword = pObjectArg->kchar = pObjectArg->kline = 0;
		pWorkObjects[iThrd] = CreateThreadpoolWork (wcfunc, pObjectArg, &cbe);
        if (pWorkObjects[iThrd] == NULL) 
            ReportError ("Cannot create consumer thread", 4, TRUE);
		SubmitThreadpoolWork (pWorkObjects[iThrd]);
	}
	
	/* Worker objects are all submitted. Wait for them 	*/
	/* to complete and accumulate the results			*/
	for (iThrd = 0; iThrd < nThread; iThrd++) {
		/* Wait for the thread pool work item to complete */
		WaitForThreadpoolWorkCallbacks (pWorkObjects[iThrd], FALSE);
		CloseThreadpoolWork(pWorkObjects[iThrd]);
	}
    free (pWorkObjects);
	
	/* Accumulate the results							*/
	for (iThrd = 0; iThrd < argc - 1; iThrd++) {
		pObjectArg = pWorkObjArgsArray[iThrd]; 
		nchar += pObjectArg->kchar;
		nword += pObjectArg->kword;
		nline += pObjectArg->kline;
		printf ("%10d %9d %9d %s\n", pObjectArg->kline,
			pObjectArg->kword, pObjectArg->kchar,
			pObjectArg->filename);
	}
	free (pWorkObjArgsArray);
	printf ("%10d %9d %9d \n", nline, nword, nchar);
	return 0;
}
예제 #11
0
파일: rfx.c 프로젝트: C4rt/FreeRDP
static BOOL rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, wStream* s, UINT16* pExpecedBlockType)
{
	BOOL rc;
	int i, close_cnt;
	int pos;
	BYTE quant;
	RFX_TILE* tile;
	UINT32* quants;
	UINT16 subtype;
	UINT32 blockLen;
	UINT32 blockType;
	UINT32 tilesDataSize;
	PTP_WORK* work_objects = NULL;
	RFX_TILE_PROCESS_WORK_PARAM* params = NULL;
	void *pmem;

	if (*pExpecedBlockType != WBT_EXTENSION)
	{
		WLog_ERR(TAG, "%s: message unexpeced", __FUNCTION__);
		return FALSE;
	}
	*pExpecedBlockType = WBT_FRAME_END;

	if (Stream_GetRemainingLength(s) < 14)
	{
		WLog_ERR(TAG, "RfxMessageTileSet packet too small");
		return FALSE;
	}

	Stream_Read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */

	if (subtype != CBT_TILESET)
	{
		WLog_ERR(TAG, "invalid subtype, expected CBT_TILESET.");
		return FALSE;
	}

	Stream_Seek_UINT16(s); /* idx (2 bytes), must be set to 0x0000 */
	Stream_Seek_UINT16(s); /* properties (2 bytes) */

	Stream_Read_UINT8(s, context->numQuant); /* numQuant (1 byte) */
	Stream_Seek_UINT8(s); /* tileSize (1 byte), must be set to 0x40 */

	if (context->numQuant < 1)
	{
		WLog_ERR(TAG, "no quantization value.");
		return FALSE;
	}

	Stream_Read_UINT16(s, message->numTiles); /* numTiles (2 bytes) */

	if (message->numTiles < 1)
	{
		WLog_ERR(TAG, "no tiles.");
		return FALSE;
	}

	Stream_Read_UINT32(s, tilesDataSize); /* tilesDataSize (4 bytes) */

	if (!(pmem = realloc((void*) context->quants, context->numQuant * 10 * sizeof(UINT32))))
		return FALSE;

	quants = context->quants = (UINT32*) pmem;

	/* quantVals */
	if (Stream_GetRemainingLength(s) < (size_t) (context->numQuant * 5))
	{
		WLog_ERR(TAG, "RfxMessageTileSet packet too small for num_quants=%d", context->numQuant);
		return FALSE;
	}

	for (i = 0; i < context->numQuant; i++)
	{
		/* RFX_CODEC_QUANT */
		Stream_Read_UINT8(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		Stream_Read_UINT8(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		Stream_Read_UINT8(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		Stream_Read_UINT8(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		Stream_Read_UINT8(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);

		WLog_Print(context->priv->log, WLOG_DEBUG, "quant %d (%d %d %d %d %d %d %d %d %d %d).",
			i, context->quants[i * 10], context->quants[i * 10 + 1],
			context->quants[i * 10 + 2], context->quants[i * 10 + 3],
			context->quants[i * 10 + 4], context->quants[i * 10 + 5],
			context->quants[i * 10 + 6], context->quants[i * 10 + 7],
			context->quants[i * 10 + 8], context->quants[i * 10 + 9]);
	}

	if (!(message->tiles = (RFX_TILE**) calloc(message->numTiles, sizeof(RFX_TILE*))))
	{
		message->numTiles = 0;
		return FALSE;
	}

	if (context->priv->UseThreads)
	{
		work_objects = (PTP_WORK*) calloc(message->numTiles, sizeof(PTP_WORK));
		params = (RFX_TILE_PROCESS_WORK_PARAM*) calloc(message->numTiles, sizeof(RFX_TILE_PROCESS_WORK_PARAM));

		if (!work_objects)
		{
			free(params);
			return FALSE;
		}

		if (!params)
		{
			free(work_objects);
			return FALSE;
		}
	}

	/* tiles */
	close_cnt = 0;
	rc = TRUE;
	for (i = 0; i < message->numTiles; i++)
	{
		if (!(tile = (RFX_TILE*) ObjectPool_Take(context->priv->TilePool)))
		{
			WLog_ERR(TAG, "RfxMessageTileSet failed to get tile from object pool");
			rc = FALSE;
			break;
		}

		message->tiles[i] = tile;

		/* RFX_TILE */
		if (Stream_GetRemainingLength(s) < 6)
		{
			WLog_ERR(TAG, "RfxMessageTileSet packet too small to read tile %d/%d", i, message->numTiles);
			rc = FALSE;
			break;
		}

		Stream_Read_UINT16(s, blockType); /* blockType (2 bytes), must be set to CBT_TILE (0xCAC3) */
		Stream_Read_UINT32(s, blockLen); /* blockLen (4 bytes) */

		if (Stream_GetRemainingLength(s) < blockLen - 6)
		{
			WLog_ERR(TAG, "RfxMessageTileSet not enough bytes to read tile %d/%d with blocklen=%d",
					 i, message->numTiles, blockLen);
			rc = FALSE;
			break;
		}

		pos = Stream_GetPosition(s) - 6 + blockLen;

		if (blockType != CBT_TILE)
		{
			WLog_ERR(TAG, "unknown block type 0x%X, expected CBT_TILE (0xCAC3).", blockType);
			rc = FALSE;
			break;
		}

		Stream_Read_UINT8(s, tile->quantIdxY); /* quantIdxY (1 byte) */
		Stream_Read_UINT8(s, tile->quantIdxCb); /* quantIdxCb (1 byte) */
		Stream_Read_UINT8(s, tile->quantIdxCr); /* quantIdxCr (1 byte) */
		Stream_Read_UINT16(s, tile->xIdx); /* xIdx (2 bytes) */
		Stream_Read_UINT16(s, tile->yIdx); /* yIdx (2 bytes) */
		Stream_Read_UINT16(s, tile->YLen); /* YLen (2 bytes) */
		Stream_Read_UINT16(s, tile->CbLen); /* CbLen (2 bytes) */
		Stream_Read_UINT16(s, tile->CrLen); /* CrLen (2 bytes) */

		Stream_GetPointer(s, tile->YData);
		Stream_Seek(s, tile->YLen);
		Stream_GetPointer(s, tile->CbData);
		Stream_Seek(s, tile->CbLen);
		Stream_GetPointer(s, tile->CrData);
		Stream_Seek(s, tile->CrLen);

		tile->x = tile->xIdx * 64;
		tile->y = tile->yIdx * 64;

		if (context->priv->UseThreads)
		{
			assert(params);

			params[i].context = context;
			params[i].tile = message->tiles[i];

			if (!(work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback,
					(void*) &params[i], &context->priv->ThreadPoolEnv)))
			{
				WLog_ERR(TAG, "CreateThreadpoolWork failed.");
				rc = FALSE;
				break;
			}

			SubmitThreadpoolWork(work_objects[i]);
			close_cnt = i + 1;
		}
		else
		{
			rfx_decode_rgb(context, tile, tile->data, 64 * 4);
		}

		Stream_SetPosition(s, pos);
	}

	if (context->priv->UseThreads)
	{
		for (i = 0; i < close_cnt; i++)
		{
			WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE);
			CloseThreadpoolWork(work_objects[i]);
		}

		free(work_objects);
		free(params);
	}

	for (i = 0; i < message->numTiles; i++)
	{
		if (!(tile = message->tiles[i]))
			continue;
		tile->YLen = tile->CbLen = tile->CrLen = 0;
		tile->YData = tile->CbData = tile->CrData = NULL;
	}

	return rc;
}
예제 #12
0
파일: rfx.c 프로젝트: C4rt/FreeRDP
RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* context, const RFX_RECT* rects, int numRects,
		BYTE* data, int width, int height, int scanline)
{
	int i, maxNbTiles, maxTilesX, maxTilesY;
	int xIdx, yIdx, regionNbRects;
	int gridRelX, gridRelY, ax, ay, bytesPerPixel;
	RFX_TILE* tile;
	RFX_RECT* rfxRect;
	RFX_MESSAGE* message = NULL;
	PTP_WORK* workObject = NULL;
	RFX_TILE_COMPOSE_WORK_PARAM *workParam = NULL;
	BOOL success = FALSE;

	REGION16 rectsRegion, tilesRegion;
	RECTANGLE_16 currentTileRect;
	const RECTANGLE_16 *regionRect;
	const RECTANGLE_16 *extents;

	assert(data);
	assert(rects);
	assert(numRects > 0);
	assert(width > 0);
	assert(height > 0);
	assert(scanline > 0);

	if (!(message = (RFX_MESSAGE*)calloc(1, sizeof(RFX_MESSAGE))))
		return NULL;

	region16_init(&tilesRegion);
	region16_init(&rectsRegion);

	if (context->state == RFX_STATE_SEND_HEADERS)
		rfx_update_context_properties(context);

	message->frameIdx = context->frameIdx++;

	if (!context->numQuant)
	{
		if (!(context->quants = (UINT32*) malloc(sizeof(rfx_default_quantization_values))))
			goto skip_encoding_loop;

		CopyMemory(context->quants, &rfx_default_quantization_values, sizeof(rfx_default_quantization_values));
		context->numQuant = 1;
		context->quantIdxY = 0;
		context->quantIdxCb = 0;
		context->quantIdxCr = 0;
	}

	message->numQuant = context->numQuant;
	message->quantVals = context->quants;

	bytesPerPixel = (context->bits_per_pixel / 8);

	if (!computeRegion(rects, numRects, &rectsRegion, width, height))
		goto skip_encoding_loop;

	extents = region16_extents(&rectsRegion);
	assert(extents->right - extents->left > 0);
	assert(extents->bottom - extents->top > 0);

	maxTilesX = 1 + TILE_NO(extents->right - 1) - TILE_NO(extents->left);
	maxTilesY = 1 + TILE_NO(extents->bottom - 1) - TILE_NO(extents->top);
	maxNbTiles = maxTilesX * maxTilesY;

	if (!(message->tiles = calloc(maxNbTiles, sizeof(RFX_TILE*))))
		goto skip_encoding_loop;

	if (!setupWorkers(context, maxNbTiles))
		goto skip_encoding_loop;

	if (context->priv->UseThreads)
	{
		workObject = context->priv->workObjects;
		workParam = context->priv->tileWorkParams;
	}

	regionRect = region16_rects(&rectsRegion, &regionNbRects);

	if (!(message->rects = calloc(regionNbRects, sizeof(RFX_RECT))))
		goto skip_encoding_loop;

	message->numRects = regionNbRects;

	for (i = 0, rfxRect = message->rects; i < regionNbRects; i++, regionRect++, rfxRect++)
	{
		int startTileX = regionRect->left / 64;
		int endTileX = (regionRect->right - 1) / 64;

		int startTileY = regionRect->top / 64;
		int endTileY = (regionRect->bottom - 1) / 64;

		rfxRect->x = regionRect->left;
		rfxRect->y = regionRect->top;
		rfxRect->width = (regionRect->right - regionRect->left);
		rfxRect->height = (regionRect->bottom - regionRect->top);


		for (yIdx = startTileY, gridRelY = startTileY * 64; yIdx <= endTileY; yIdx++, gridRelY += 64 )
		{
			int tileHeight = 64;

			if ((yIdx == endTileY) && (gridRelY + 64 > height))
				tileHeight = height - gridRelY;

			currentTileRect.top = gridRelY;
			currentTileRect.bottom = gridRelY + tileHeight;

			for (xIdx = startTileX, gridRelX = startTileX * 64; xIdx <= endTileX; xIdx++, gridRelX += 64)
			{
				int tileWidth = 64;

				if ((xIdx == endTileX) && (gridRelX + 64 > width))
					tileWidth = width - gridRelX;

				currentTileRect.left = gridRelX;
				currentTileRect.right = gridRelX + tileWidth;

				/* checks if this tile is already treated */
				if (region16_intersects_rect(&tilesRegion, &currentTileRect))
					continue;

				if (!(tile = (RFX_TILE*) ObjectPool_Take(context->priv->TilePool)))
					goto skip_encoding_loop;

				tile->xIdx = xIdx;
				tile->yIdx = yIdx;
				tile->x = gridRelX;
				tile->y = gridRelY;
				tile->scanline = scanline;
				tile->width = tileWidth;
				tile->height = tileHeight;

				ax = gridRelX;
				ay = gridRelY;

				if (tile->data && tile->allocated)
				{
					free(tile->data);
					tile->allocated = FALSE;
				}
				tile->data = &data[(ay * scanline) + (ax * bytesPerPixel)];

				tile->quantIdxY = context->quantIdxY;
				tile->quantIdxCb = context->quantIdxCb;
				tile->quantIdxCr = context->quantIdxCr;

				tile->YLen = tile->CbLen = tile->CrLen = 0;

				if (!(tile->YCbCrData = (BYTE *)BufferPool_Take(context->priv->BufferPool, -1)))
					goto skip_encoding_loop;

				tile->YData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 0) + 16]);
				tile->CbData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 1) + 16]);
				tile->CrData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 2) + 16]);

				message->tiles[message->numTiles] = tile;
				message->numTiles++;

				if (context->priv->UseThreads)
				{
					workParam->context = context;
					workParam->tile = tile;

					if (!(*workObject = CreateThreadpoolWork(
							(PTP_WORK_CALLBACK)rfx_compose_message_tile_work_callback,
							(void*) workParam,
							&context->priv->ThreadPoolEnv)))
					{
						goto skip_encoding_loop;
					}

					SubmitThreadpoolWork(*workObject);

					workObject++;
					workParam++;
				}
				else
				{
					rfx_encode_rgb(context, tile);
				}

				if (!region16_union_rect(&tilesRegion, &tilesRegion, &currentTileRect))
					goto skip_encoding_loop;
			} /* xIdx */
		}  /* yIdx */
	}  /* rects */

	success = TRUE;

skip_encoding_loop:

	if (success && message->numTiles != maxNbTiles)
	{
		void* pmem = realloc((void*) message->tiles, sizeof(RFX_TILE*) * message->numTiles);

		if (pmem)
			message->tiles = (RFX_TILE**) pmem;
		else
			success = FALSE;
	}

	/* when using threads ensure all computations are done */
	message->tilesDataSize = 0;
	workObject = context->priv->workObjects;

	for (i = 0; i < message->numTiles; i++)
	{
		tile = message->tiles[i];
		if (context->priv->UseThreads)
		{
			if (*workObject)
			{
				WaitForThreadpoolWorkCallbacks(*workObject, FALSE);
				CloseThreadpoolWork(*workObject);
			}
			workObject++;
		}

		message->tilesDataSize += rfx_tile_length(tile);
	}

	region16_uninit(&tilesRegion);
	region16_uninit(&rectsRegion);

	if (success)
		return message;

	WLog_ERR(TAG, "%s: failed", __FUNCTION__);

	message->freeRects = TRUE;
	rfx_message_free(context, message);
	return NULL;
}
예제 #13
0
int TestPoolWork(int argc, char* argv[])
{
	int index;
	PTP_POOL pool;
	PTP_WORK work;
	PTP_CLEANUP_GROUP cleanupGroup;
	TP_CALLBACK_ENVIRON environment;

	printf("Global Thread Pool\n");

	work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", NULL);

	if (!work)
	{
		printf("CreateThreadpoolWork failure\n");
		return -1;
	}

	/**
	 * You can post a work object one or more times (up to MAXULONG) without waiting for prior callbacks to complete.
	 * The callbacks will execute in parallel. To improve efficiency, the thread pool may throttle the threads.
	 */

	for (index = 0; index < 10; index++)
		SubmitThreadpoolWork(work);

	WaitForThreadpoolWorkCallbacks(work, FALSE);
	CloseThreadpoolWork(work);

	printf("Private Thread Pool\n");

	pool = CreateThreadpool(NULL);

	SetThreadpoolThreadMinimum(pool, 4);
	SetThreadpoolThreadMaximum(pool, 8);

	InitializeThreadpoolEnvironment(&environment);
	SetThreadpoolCallbackPool(&environment, pool);

	cleanupGroup = CreateThreadpoolCleanupGroup();

	if (!cleanupGroup)
	{
		printf("CreateThreadpoolCleanupGroup failure\n");
		return -1;
	}

	SetThreadpoolCallbackCleanupGroup(&environment, cleanupGroup, NULL);

	work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", &environment);

	if (!work)
	{
		printf("CreateThreadpoolWork failure\n");
		return -1;
	}

	for (index = 0; index < 10; index++)
		SubmitThreadpoolWork(work);

	WaitForThreadpoolWorkCallbacks(work, FALSE);

	CloseThreadpoolCleanupGroupMembers(cleanupGroup, TRUE, NULL);

	CloseThreadpoolCleanupGroup(cleanupGroup);

	DestroyThreadpoolEnvironment(&environment);

	CloseThreadpoolWork(work);
	CloseThreadpool(pool);

	return 0;
}
//
// This function is invoked only with Airplane mode change, but not with NFC radio state change.
//
STDMETHODIMP CNfcRadioManager::OnSystemRadioStateChange(
        _In_opt_ SYSTEM_RADIO_STATE sysRadioState,
        _In_ UINT32 uTimeoutSec)
{
    UNREFERENCED_PARAMETER(sysRadioState);
    UNREFERENCED_PARAMETER(uTimeoutSec);

    TRACE_METHOD_ENTRY(LEVEL_VERBOSE);

    HRESULT hr = S_OK;
    PTP_POOL threadPool = NULL;
    TP_CALLBACK_ENVIRON callbackEnviron;
    PTP_CLEANUP_GROUP ptpCleanupGroup = NULL;
    
    // Create threadpool to handle all of the radios
    threadPool = CreateThreadpool(NULL);
    if (NULL == threadPool)
    {
        hr = HRESULT_FROM_WIN32(GetLastError());
    }

    if (SUCCEEDED(hr))
    {
        SetThreadpoolThreadMaximum(threadPool, 50);
        InitializeThreadpoolEnvironment(&callbackEnviron);
        SetThreadpoolCallbackPool(&callbackEnviron, threadPool);
        ptpCleanupGroup = CreateThreadpoolCleanupGroup();

        if (NULL == ptpCleanupGroup)
        {
            hr = HRESULT_FROM_WIN32(GetLastError());
        }
        else
        {
            // Associate the cleanup group with our thread pool
            SetThreadpoolCallbackCleanupGroup(&callbackEnviron,
                                                ptpCleanupGroup,
                                                NULL );
        }
    }

    // Lock so that any adds or removes will not cause list changes during system airplane mode
    EnterCriticalSection(&m_csAddRemoveLock);

    if (SUCCEEDED(hr))
    {
        UINT32 i, count = 0;
        SYSTEM_STATE_SWITCH_CONTEXT* pContext = NULL;

        hr = m_nfcRadioCollection->GetCount(&count);

        if (count > 0)
        {
            if (SUCCEEDED(hr))
            {
                pContext = (SYSTEM_STATE_SWITCH_CONTEXT*)malloc(count * sizeof(SYSTEM_STATE_SWITCH_CONTEXT));
                if (NULL == pContext)
                {
                    hr = E_OUTOFMEMORY;
                }
            }
            
            for (i = 0; (SUCCEEDED(hr) && (i < count)); i++)
            {
                PTP_WORK ptpThreadWork = NULL;
                
                pContext[i].sysRadioState = sysRadioState;
                m_nfcRadioCollection->GetAt(i, (IRadioInstance**)&(pContext[i].pRadioInstance));
                
                ptpThreadWork = CreateThreadpoolWork( 
                                     (PTP_WORK_CALLBACK)&CNfcRadioManager::AsyncRadioChange, 
                                     &(pContext[i]), 
                                     &(callbackEnviron) );
                
                if (ptpThreadWork == NULL)
                {   
                    // pRadioInstance context was not successfully added to threadpool.
                    hr = pContext[i].pRadioInstance->SetSystemState(sysRadioState);
                    pContext[i].pRadioInstance->Release();
                }
                else
                {
                    ::SubmitThreadpoolWork(ptpThreadWork);
                }
            }
        }

        // Wait for all threadpools to drain and clean up threads
        CloseThreadpoolCleanupGroupMembers(ptpCleanupGroup, FALSE, NULL);

        if (NULL != pContext)
        {
            free(pContext);
        }
    }
    else
    {
        // Failed to set up threadpool for parallel system radio change. Only choice is to do it serially now
        UINT32 i, count = 0;
        CNfcRadioInstance* pRadioInstance = NULL;

        hr = m_nfcRadioCollection->GetCount(&count);
        for (i = 0; (SUCCEEDED(hr) && (i < count)); i++)
        {
            hr = m_nfcRadioCollection->GetAt(i, (IRadioInstance**)&pRadioInstance);
            if (SUCCEEDED(hr))
            {
                hr = pRadioInstance->SetSystemState(sysRadioState);
            }
            
            pRadioInstance->Release();
        }
    }

    LeaveCriticalSection(&m_csAddRemoveLock);

    DestroyThreadpoolEnvironment(&callbackEnviron);

    if(ptpCleanupGroup)
    {
        CloseThreadpoolCleanupGroup(ptpCleanupGroup);
        ptpCleanupGroup = NULL;
    }
  
    if(threadPool)
    {
        CloseThreadpool(threadPool);
        threadPool = NULL;
    }

    TRACE_METHOD_EXIT_HR(LEVEL_COND, hr);
    return hr;
}
void *LLW_train_thread(void *th_data) {

	// Recover data
	struct ThreadData *data =  (struct ThreadData *)th_data;
	const int thread_id = data->thread_id;
	const int nprocs = data->nprocs;	
	struct Model *model = data->model;
	struct KernelCache *kernelcache = data->kernelcache;
	long chunk_size = data->chunk_size;
	const double accuracy = data->accuracy;
	double **gradient = data->gradient;
	double **H_alpha = data->H_alpha;
	double *best_primal_upper_bound = data->best_primal_upper_bound;
	int *activeset = data->activeset;
	long *nb_SV = data->nb_SV;	
	double *lp_rhs = data->lp_rhs;
	FILE *fp = data->logfile_ptr;	
	
	pthread_mutex_unlock(&thread_data_mutex);	// Release thread_data for next thread 
	 
	// Local variables
	int do_eval;
	char yesno;
	long long return_status = -1;	
	
	// Prepare the cache
	struct TrainingCache cache;
	cache.chunk_size =  chunk_size;
	LLW_alloc_memory(&cache, model->Q, model->nb_data, chunk_size);
	cache.kc = kernelcache;
	cache.activeset = activeset;
	cache.lp_rhs = lp_rhs;
	
	double **delta = matrix(chunk_size, model->Q);
	double previous_ratio = 0.0;
	double improvement = 1.0;	
	double theta_opt;
	int jump = false;
			
	if(accuracy == 0)
		do_eval = 0;
	else 
		do_eval = 1;
	
	/*
		Prepare parallel gradient computations:
		- the gradient vector is split into NUMTHREADS_GRAD parts (along i)
		- each part is updated by a different thread
	*/
	// max number of threads for gradient updates is nprocs
	pthread_t *grad_threads = (pthread_t *)malloc(sizeof(pthread_t) * nprocs); 

	// start with 1 thread (main load on kernel evaluations)
	int numthreads_grad = 1;		

	void *status; 			
	int rc; 		
	long k;	
	struct ThreadGradient_data *grad_data = (struct ThreadGradient_data *)malloc(sizeof(struct ThreadGradient_data) * nprocs);


	// Disable parallel gradient computation for small data sets
	int parallel_gradient_update = 1;
	if(model->nb_data < 5000 || nprocs == 1)
		parallel_gradient_update = 0;

	if(parallel_gradient_update) {
		for(k=0;k<nprocs;k++) {
			grad_data[k].gradient = gradient;
			grad_data[k].H_alpha = H_alpha;
			grad_data[k].cache = &cache;
			grad_data[k].model = model;
		}		
		grad_data[0].start_i = 1;
		grad_data[0].end_i = model->nb_data / numthreads_grad;	
		for(k=1;k<numthreads_grad-1;k++) {	
			grad_data[k].start_i = grad_data[k-1].end_i + 1;
			grad_data[k].end_i = grad_data[k].start_i + model->nb_data / numthreads_grad -1;
		}
		if(numthreads_grad>1) {
			grad_data[numthreads_grad-1].start_i = grad_data[numthreads_grad-2].end_i + 1;
			grad_data[numthreads_grad-1].end_i = model->nb_data;
		}	
	}
#ifdef _WIN32
	// Init POOL
	TP_WORK ** work;
	
	if(parallel_gradient_update) {
		
		work = malloc(sizeof(TP_WORK *) * nprocs);
		for(k=0;k<nprocs;k++)
			work[k] = CreateThreadpoolWork(LLW_update_gradient_thread2, (void *) &grad_data[k], NULL);
	}
#endif
		
	// Switch to nprocs/4 threads for gradient update when 25% of the kernel matrix is cached
	int percentage_step = 1;
	long percentage = model->nb_data / 4;
	int next_numthreads_grad = nprocs/4;
	if(next_numthreads_grad == 0) 
		next_numthreads_grad = 1;
	
	// Main loop
	int thread_stop = 0;
	do {	
	  	if((TRAIN_SMALL_STEP < TRAIN_STEP) && (model->iter%TRAIN_SMALL_STEP) == 0) {
		    	printf(".");
			fflush(stdout);
	  	}
	  
 	  	// Select a random chunk of data to optimize 
		select_random_chunk(&cache,model);
				
		// Compute the kernel submatrix for this chunk
  		compute_K(&cache,model);			
  	
		// Enter Critical Section (using and modifying the model)
		pthread_mutex_lock(&(model->mutex)); 
		
		jump = LLW_solve_lp(gradient, &cache, model);
	  	
	  	if(jump == false)
	    		jump = LLW_check_opt_sol(gradient,&cache,model);
	    		
		if(jump == false) {
			
	      	LLW_compute_delta(delta,&cache,model);
	    	theta_opt = LLW_compute_theta_opt(delta, &cache, model);
	    	
	    	if (theta_opt > 0.0) { 
			
				*nb_SV += LLW_compute_new_alpha(theta_opt,&cache,model);
				
				if(parallel_gradient_update) {
				
					// Update gradient in parallel 
		   			for(k=0;k<numthreads_grad;k++) {
					#ifdef _WIN32
						SubmitThreadpoolWork(work[k]);
					#else
						rc = pthread_create(&grad_threads[k], NULL, LLW_update_gradient_thread, (void *) &grad_data[k]);	
					#endif
					}			
					// Wait for gradient computations to terminate
					for(k=0;k<numthreads_grad;k++) {
					#ifdef _WIN32
						WaitForThreadpoolWorkCallbacks(work[k], FALSE);
					#else
						rc = pthread_join(grad_threads[k],&status);
					#endif
					}
				}
				else {
					// old-style non-threaded gradient update (for small data sets)
					LLW_update_gradient(gradient,H_alpha, &cache,model); 
				}
			}
   		}
				    
		if((do_eval && (model->iter%TRAIN_STEP) == 0) || EVAL || STOP || (do_eval && model->ratio >= accuracy) )  
		    {    	   	
			if(fp != NULL)
				fprintf(fp,"%ld ",model->iter);
	
			if(EVAL)
				printf("\n\n*** Evaluating the model at iteration %ld...\n",model->iter);
								 
			// Evaluate how far we are in the optimization
			// (prints more info if interrutped by user)
			previous_ratio = model->ratio;
			model->ratio = MSVM_eval(best_primal_upper_bound, gradient, H_alpha, NULL, model, EVAL, fp);

			print_training_info(*nb_SV, model);
		
			improvement = model->ratio - previous_ratio;			

			if(EVAL) // if interrupted by user (otherwise let the ratio decide if we go on training)
			  {			  	
				printf("\n *** Do you want to continue training ([y]/n)? ");
				yesno = getchar();
				if(yesno=='n') {
					STOP = 1;
				}
				EVAL = 0; // reset interruption trigger
			  }		
		    }
	    
	    	// Release kernel submatrix in cache
		release_K(&cache);
							
		// Check if a sufficient % of the kernel matrix is cached
		if( parallel_gradient_update && cache.kc->max_idx >= percentage ) {	
			// and switch thread to compute gradient upates instead of kernel rows if it is		
			thread_stop = switch_thread(nprocs, &numthreads_grad, &next_numthreads_grad, &percentage,  &percentage_step, grad_data, thread_id, model->nb_data);				
			// (threads are actually stopped to leave the CPUs
			//  to other threads that will compute gradient updates)
		}				
	
  		model->iter++;

		// Release mutex: End of critical section
		pthread_mutex_unlock(&(model->mutex));			
   		
	} while(model->iter <= MSVM_TRAIN_MAXIT && (!do_eval || (model->ratio < accuracy && improvement != 0.0)) && !STOP && !thread_stop);  
 	
  	// Release mutex: End of critical section (see below)
	pthread_mutex_unlock(&(model->mutex));

#ifdef _WIN32
	if(parallel_gradient_update){
		for(k=0;k<numthreads_grad;k++)
			CloseThreadpoolWork(work[k]);
	}	
#endif
  	// compute return_status
	if(do_eval && (model->ratio >= accuracy || improvement==0.0))
		return_status = 0; // optimum reached or no more improvement. 
		
  	// Free memory
	LLW_free_memory(&cache);
	free(delta[1]);free(delta);
	free(grad_threads);
	free(grad_data);
	
	pthread_exit((void*)return_status);
}
예제 #16
0
NamedPipeChannel::NamedPipeChannel()
    : work_(CreateThreadpoolWork(OnRequested, this, nullptr)),
      handle_(INVALID_HANDLE_VALUE),
      io_(nullptr),
      end_point_(EndPoint::kUnknown) {}
예제 #17
0
파일: rfx.c 프로젝트: nour8394/FreeRDP
static BOOL rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, STREAM* s)
{
	int i;
	int pos;
	BYTE quant;
	UINT32* quants;
	UINT16 subtype;
	UINT32 blockLen;
	UINT32 blockType;
	UINT32 tilesDataSize;
	PTP_WORK* work_objects = NULL;
	RFX_TILE_WORK_PARAM* params = NULL;

	if (stream_get_left(s) < 14)
	{
		DEBUG_WARN("RfxMessageTileSet packet too small");
		return FALSE;
	}

	stream_read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */

	if (subtype != CBT_TILESET)
	{
		DEBUG_WARN("invalid subtype, expected CBT_TILESET.");
		return FALSE;
	}

	stream_seek_UINT16(s); /* idx (2 bytes), must be set to 0x0000 */
	stream_seek_UINT16(s); /* properties (2 bytes) */

	stream_read_BYTE(s, context->num_quants); /* numQuant (1 byte) */
	stream_seek_BYTE(s); /* tileSize (1 byte), must be set to 0x40 */

	if (context->num_quants < 1)
	{
		DEBUG_WARN("no quantization value.");
		return TRUE;
	}

	stream_read_UINT16(s, message->num_tiles); /* numTiles (2 bytes) */

	if (message->num_tiles < 1)
	{
		DEBUG_WARN("no tiles.");
		return TRUE;
	}

	stream_read_UINT32(s, tilesDataSize); /* tilesDataSize (4 bytes) */

	if (context->quants != NULL)
		context->quants = (UINT32*) realloc((void*) context->quants, context->num_quants * 10 * sizeof(UINT32));
	else
		context->quants = (UINT32*) malloc(context->num_quants * 10 * sizeof(UINT32));
	quants = context->quants;

	/* quantVals */
	if (stream_get_left(s) < context->num_quants * 5)
	{
		DEBUG_WARN("RfxMessageTileSet packet too small for num_quants=%d", context->num_quants);
		return FALSE;
	}

	for (i = 0; i < context->num_quants; i++)
	{
		/* RFX_CODEC_QUANT */
		stream_read_BYTE(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		stream_read_BYTE(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		stream_read_BYTE(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		stream_read_BYTE(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);
		stream_read_BYTE(s, quant);
		*quants++ = (quant & 0x0F);
		*quants++ = (quant >> 4);

		DEBUG_RFX("quant %d (%d %d %d %d %d %d %d %d %d %d).",
			i, context->quants[i * 10], context->quants[i * 10 + 1],
			context->quants[i * 10 + 2], context->quants[i * 10 + 3],
			context->quants[i * 10 + 4], context->quants[i * 10 + 5],
			context->quants[i * 10 + 6], context->quants[i * 10 + 7],
			context->quants[i * 10 + 8], context->quants[i * 10 + 9]);
	}

	message->tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * message->num_tiles);
	ZeroMemory(message->tiles, sizeof(RFX_TILE*) * message->num_tiles);

	if (context->priv->UseThreads)
	{
		work_objects = (PTP_WORK*) malloc(sizeof(PTP_WORK) * message->num_tiles);
		params = (RFX_TILE_WORK_PARAM*) malloc(sizeof(RFX_TILE_WORK_PARAM) * message->num_tiles);
	}

	/* tiles */
	for (i = 0; i < message->num_tiles; i++)
	{
		/* RFX_TILE */
		if (stream_get_left(s) < 6)
		{
			DEBUG_WARN("RfxMessageTileSet packet too small to read tile %d/%d", i, message->num_tiles);
			return FALSE;
		}

		stream_read_UINT16(s, blockType); /* blockType (2 bytes), must be set to CBT_TILE (0xCAC3) */
		stream_read_UINT32(s, blockLen); /* blockLen (4 bytes) */

		if (stream_get_left(s) < blockLen - 6)
		{
			DEBUG_WARN("RfxMessageTileSet not enough bytes to read tile %d/%d with blocklen=%d", i, message->num_tiles, blockLen);
			return FALSE;
		}

		pos = stream_get_pos(s) - 6 + blockLen;

		if (blockType != CBT_TILE)
		{
			DEBUG_WARN("unknown block type 0x%X, expected CBT_TILE (0xCAC3).", blockType);
			break;
		}

		message->tiles[i] = rfx_tile_pool_take(context);

		if (context->priv->UseThreads)
		{
			params[i].context = context;
			params[i].tile = message->tiles[i];
			CopyMemory(&(params[i].s), s, sizeof(STREAM));

			work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback,
					(void*) &params[i], &context->priv->ThreadPoolEnv);

			SubmitThreadpoolWork(work_objects[i]);
		}
		else
		{
			rfx_process_message_tile(context, message->tiles[i], s);
		}

		stream_set_pos(s, pos);
	}

	if (context->priv->UseThreads)
	{
		for (i = 0; i < message->num_tiles; i++)
			WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE);

		free(work_objects);
		free(params);
	}
	return TRUE;
}
예제 #18
0
파일: rfx_decode.c 프로젝트: akboom/FreeRDP
/* stride is bytes between rows in the output buffer. */
BOOL rfx_decode_rgb(RFX_CONTEXT* context, wStream* data_in,
	int y_size, const UINT32* y_quants,
	int cb_size, const UINT32* cb_quants,
	int cr_size, const UINT32* cr_quants, BYTE* rgb_buffer, int stride)
{
	INT16* pSrcDst[3];
	static const prim_size_t roi_64x64 = { 64, 64 };
	const primitives_t *prims = primitives_get();

	PROFILER_ENTER(context->priv->prof_rfx_decode_rgb);

	pSrcDst[0] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* y_r_buffer */
	pSrcDst[1] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cb_g_buffer */
	pSrcDst[2] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cr_b_buffer */

#if 0
	if (context->priv->UseThreads)
	{
		PTP_WORK work_objects[3];
		RFX_COMPONENT_WORK_PARAM params[3];

		params[0].context = context;
		params[0].quantization_values = y_quants;
		params[0].buffer = stream_get_tail(data_in);
		params[0].capacity = y_size;
		params[0].buffer = pSrcDst[0];
		stream_seek(data_in, y_size);

		params[1].context = context;
		params[1].quantization_values = cb_quants;
		params[1].buffer = stream_get_tail(data_in);
		params[1].capacity = cb_size;
		params[1].buffer = pSrcDst[1];
		stream_seek(data_in, cb_size);

		params[2].context = context;
		params[2].quantization_values = cr_quants;
		params[2].buffer = stream_get_tail(data_in);
		params[2].capacity = cr_size;
		params[2].buffer = pSrcDst[2];
		stream_seek(data_in, cr_size);

		work_objects[0] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback,
				(void*) &params[0], &context->priv->ThreadPoolEnv);
		work_objects[1] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback,
				(void*) &params[1], &context->priv->ThreadPoolEnv);
		work_objects[2] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback,
				(void*) &params[2], &context->priv->ThreadPoolEnv);

		SubmitThreadpoolWork(work_objects[0]);
		SubmitThreadpoolWork(work_objects[1]);
		SubmitThreadpoolWork(work_objects[2]);

		WaitForThreadpoolWorkCallbacks(work_objects[0], FALSE);
		WaitForThreadpoolWorkCallbacks(work_objects[1], FALSE);
		WaitForThreadpoolWorkCallbacks(work_objects[2], FALSE);
	}
	else
#endif
	{
		if (stream_get_left(data_in) < y_size+cb_size+cr_size)
		{
			DEBUG_WARN("rfx_decode_rgb: packet too small for y_size+cb_size+cr_size");
			return FALSE;
		}
		rfx_decode_component(context, y_quants, stream_get_tail(data_in), y_size, pSrcDst[0]); /* YData */
		stream_seek(data_in, y_size);

		rfx_decode_component(context, cb_quants, stream_get_tail(data_in), cb_size, pSrcDst[1]); /* CbData */
		stream_seek(data_in, cb_size);

		rfx_decode_component(context, cr_quants, stream_get_tail(data_in), cr_size, pSrcDst[2]); /* CrData */
		stream_seek(data_in, cr_size);
	}

	prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16),
			pSrcDst, 64 * sizeof(INT16), &roi_64x64);

	PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb);
		rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2],
			context->pixel_format, rgb_buffer, stride);
	PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb);
	
	PROFILER_EXIT(context->priv->prof_rfx_decode_rgb);

	BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[0] - 16);
	BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[1] - 16);
	BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[2] - 16);
	return TRUE;
}