VOID WINAPI UnregisterNotificationWork(PTP_CALLBACK_INSTANCE Instance, PVOID Context, PTP_WORK Work) { HCMNOTIFICATION Handle = static_cast<HCMNOTIFICATION>(Context); CM_Unregister_Notification(Handle); CloseThreadpoolWork(Work); }
NamedPipe::~NamedPipe() { Close(); base::AutoLock guard(lock_); if (work_ != nullptr) { auto local_work = work_; work_ = nullptr; base::AutoUnlock unlock(lock_); WaitForThreadpoolWorkCallbacks(local_work, FALSE); CloseThreadpoolWork(local_work); } }
virtual void schedule(pplx::TaskProc_t proc, void* param) { pplx::details::atomic_increment(s_flag); auto schedulerParam = std::unique_ptr<_Scheduler_Param>(new _Scheduler_Param(proc, param)); auto work = CreateThreadpoolWork(DefaultWorkCallbackTest, schedulerParam.get(), NULL); if (work == nullptr) { throw utility::details::create_system_error(GetLastError()); } SubmitThreadpoolWork(work); CloseThreadpoolWork(work); schedulerParam.release(); }
void EasyIocp::FreeThreadPool() { int i; for(i = 0; i < worksNum_; ++i) PostQueuedCompletionStatus(hCompPort_, 0, (ULONG_PTR)0, NULL); for(i = 0; i < worksNum_; ++i) { WaitForThreadpoolWorkCallbacks(ptpWorks_[i], FALSE); CloseThreadpoolWork(ptpWorks_[i]); } SysFree(ptpWorks_); ptpWorks_ = NULL; print("EasyIocp::FreeThreadPool: free thread pool."); }
void Server::Destroy() { m_ShuttingDown = true; if (m_AcceptTPWORK != NULL) { WaitForThreadpoolWorkCallbacks(m_AcceptTPWORK, true); CloseThreadpoolWork(m_AcceptTPWORK); m_AcceptTPWORK = NULL; } if (m_listenSocket != INVALID_SOCKET) { Network::CloseSocket(m_listenSocket); CancelIoEx(reinterpret_cast<HANDLE>(m_listenSocket), NULL); m_listenSocket = INVALID_SOCKET; } if (m_pTPIO != NULL) { WaitForThreadpoolIoCallbacks(m_pTPIO, true); CloseThreadpoolIo(m_pTPIO); m_pTPIO = NULL; } if (m_ClientTPCLEAN != NULL) { CloseThreadpoolCleanupGroupMembers(m_ClientTPCLEAN, false, NULL); CloseThreadpoolCleanupGroup(m_ClientTPCLEAN); DestroyThreadpoolEnvironment(&m_ClientTPENV); m_ClientTPCLEAN = NULL; } EnterCriticalSection(&m_CSForClients); for (auto client : m_Clients) { delete client; } m_Clients.clear(); LeaveCriticalSection(&m_CSForClients); DeleteCriticalSection(&m_CSForClients); }
__declspec(noinline) bool benchmark_ntp_fs_stat() { TP_CALLBACK_ENVIRON env; InitializeThreadpoolEnvironment(&env); PTP_POOL pool{nullptr}; pool = CreateThreadpool(nullptr); SetThreadpoolThreadMaximum(pool, 48); SetThreadpoolThreadMinimum(pool, 12); PTP_CLEANUP_GROUP group = CreateThreadpoolCleanupGroup(); SetThreadpoolCallbackPool(&env, pool); SetThreadpoolCallbackCleanupGroup(&env, group, nullptr); PTP_WORK work_fs_stat = CreateThreadpoolWork(WorkCallback_fs_stat, nullptr, &env); SubmitThreadpoolWork(work_fs_stat); WaitForThreadpoolWorkCallbacks(work_fs_stat, false); CloseThreadpoolWork(work_fs_stat); CloseThreadpool(pool); return false; }
void *LLW_train_thread(void *th_data) { // Recover data struct ThreadData *data = (struct ThreadData *)th_data; const int thread_id = data->thread_id; const int nprocs = data->nprocs; struct Model *model = data->model; struct KernelCache *kernelcache = data->kernelcache; long chunk_size = data->chunk_size; const double accuracy = data->accuracy; double **gradient = data->gradient; double **H_alpha = data->H_alpha; double *best_primal_upper_bound = data->best_primal_upper_bound; int *activeset = data->activeset; long *nb_SV = data->nb_SV; double *lp_rhs = data->lp_rhs; FILE *fp = data->logfile_ptr; pthread_mutex_unlock(&thread_data_mutex); // Release thread_data for next thread // Local variables int do_eval; char yesno; long long return_status = -1; // Prepare the cache struct TrainingCache cache; cache.chunk_size = chunk_size; LLW_alloc_memory(&cache, model->Q, model->nb_data, chunk_size); cache.kc = kernelcache; cache.activeset = activeset; cache.lp_rhs = lp_rhs; double **delta = matrix(chunk_size, model->Q); double previous_ratio = 0.0; double improvement = 1.0; double theta_opt; int jump = false; if(accuracy == 0) do_eval = 0; else do_eval = 1; /* Prepare parallel gradient computations: - the gradient vector is split into NUMTHREADS_GRAD parts (along i) - each part is updated by a different thread */ // max number of threads for gradient updates is nprocs pthread_t *grad_threads = (pthread_t *)malloc(sizeof(pthread_t) * nprocs); // start with 1 thread (main load on kernel evaluations) int numthreads_grad = 1; void *status; int rc; long k; struct ThreadGradient_data *grad_data = (struct ThreadGradient_data *)malloc(sizeof(struct ThreadGradient_data) * nprocs); // Disable parallel gradient computation for small data sets int parallel_gradient_update = 1; if(model->nb_data < 5000 || nprocs == 1) parallel_gradient_update = 0; if(parallel_gradient_update) { for(k=0;k<nprocs;k++) { grad_data[k].gradient = gradient; grad_data[k].H_alpha = H_alpha; grad_data[k].cache = &cache; grad_data[k].model = model; } grad_data[0].start_i = 1; grad_data[0].end_i = model->nb_data / numthreads_grad; for(k=1;k<numthreads_grad-1;k++) { grad_data[k].start_i = grad_data[k-1].end_i + 1; grad_data[k].end_i = grad_data[k].start_i + model->nb_data / numthreads_grad -1; } if(numthreads_grad>1) { grad_data[numthreads_grad-1].start_i = grad_data[numthreads_grad-2].end_i + 1; grad_data[numthreads_grad-1].end_i = model->nb_data; } } #ifdef _WIN32 // Init POOL TP_WORK ** work; if(parallel_gradient_update) { work = malloc(sizeof(TP_WORK *) * nprocs); for(k=0;k<nprocs;k++) work[k] = CreateThreadpoolWork(LLW_update_gradient_thread2, (void *) &grad_data[k], NULL); } #endif // Switch to nprocs/4 threads for gradient update when 25% of the kernel matrix is cached int percentage_step = 1; long percentage = model->nb_data / 4; int next_numthreads_grad = nprocs/4; if(next_numthreads_grad == 0) next_numthreads_grad = 1; // Main loop int thread_stop = 0; do { if((TRAIN_SMALL_STEP < TRAIN_STEP) && (model->iter%TRAIN_SMALL_STEP) == 0) { printf("."); fflush(stdout); } // Select a random chunk of data to optimize select_random_chunk(&cache,model); // Compute the kernel submatrix for this chunk compute_K(&cache,model); // Enter Critical Section (using and modifying the model) pthread_mutex_lock(&(model->mutex)); jump = LLW_solve_lp(gradient, &cache, model); if(jump == false) jump = LLW_check_opt_sol(gradient,&cache,model); if(jump == false) { LLW_compute_delta(delta,&cache,model); theta_opt = LLW_compute_theta_opt(delta, &cache, model); if (theta_opt > 0.0) { *nb_SV += LLW_compute_new_alpha(theta_opt,&cache,model); if(parallel_gradient_update) { // Update gradient in parallel for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 SubmitThreadpoolWork(work[k]); #else rc = pthread_create(&grad_threads[k], NULL, LLW_update_gradient_thread, (void *) &grad_data[k]); #endif } // Wait for gradient computations to terminate for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 WaitForThreadpoolWorkCallbacks(work[k], FALSE); #else rc = pthread_join(grad_threads[k],&status); #endif } } else { // old-style non-threaded gradient update (for small data sets) LLW_update_gradient(gradient,H_alpha, &cache,model); } } } if((do_eval && (model->iter%TRAIN_STEP) == 0) || EVAL || STOP || (do_eval && model->ratio >= accuracy) ) { if(fp != NULL) fprintf(fp,"%ld ",model->iter); if(EVAL) printf("\n\n*** Evaluating the model at iteration %ld...\n",model->iter); // Evaluate how far we are in the optimization // (prints more info if interrutped by user) previous_ratio = model->ratio; model->ratio = MSVM_eval(best_primal_upper_bound, gradient, H_alpha, NULL, model, EVAL, fp); print_training_info(*nb_SV, model); improvement = model->ratio - previous_ratio; if(EVAL) // if interrupted by user (otherwise let the ratio decide if we go on training) { printf("\n *** Do you want to continue training ([y]/n)? "); yesno = getchar(); if(yesno=='n') { STOP = 1; } EVAL = 0; // reset interruption trigger } } // Release kernel submatrix in cache release_K(&cache); // Check if a sufficient % of the kernel matrix is cached if( parallel_gradient_update && cache.kc->max_idx >= percentage ) { // and switch thread to compute gradient upates instead of kernel rows if it is thread_stop = switch_thread(nprocs, &numthreads_grad, &next_numthreads_grad, &percentage, &percentage_step, grad_data, thread_id, model->nb_data); // (threads are actually stopped to leave the CPUs // to other threads that will compute gradient updates) } model->iter++; // Release mutex: End of critical section pthread_mutex_unlock(&(model->mutex)); } while(model->iter <= MSVM_TRAIN_MAXIT && (!do_eval || (model->ratio < accuracy && improvement != 0.0)) && !STOP && !thread_stop); // Release mutex: End of critical section (see below) pthread_mutex_unlock(&(model->mutex)); #ifdef _WIN32 if(parallel_gradient_update){ for(k=0;k<numthreads_grad;k++) CloseThreadpoolWork(work[k]); } #endif // compute return_status if(do_eval && (model->ratio >= accuracy || improvement==0.0)) return_status = 0; // optimum reached or no more improvement. // Free memory LLW_free_memory(&cache); free(delta[1]);free(delta); free(grad_threads); free(grad_data); pthread_exit((void*)return_status); }
void OnDestroy( void ){ CloseThreadpoolWork( m_pWorkItem ); }
int TestPoolWork(int argc, char* argv[]) { int index; PTP_POOL pool; PTP_WORK work; PTP_CLEANUP_GROUP cleanupGroup; TP_CALLBACK_ENVIRON environment; printf("Global Thread Pool\n"); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", NULL); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } /** * You can post a work object one or more times (up to MAXULONG) without waiting for prior callbacks to complete. * The callbacks will execute in parallel. To improve efficiency, the thread pool may throttle the threads. */ for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolWork(work); printf("Private Thread Pool\n"); if (!(pool = CreateThreadpool(NULL))) { printf("CreateThreadpool failure\n"); return -1; } if (!SetThreadpoolThreadMinimum(pool, 4)) { printf("SetThreadpoolThreadMinimum failure\n"); return -1; } SetThreadpoolThreadMaximum(pool, 8); InitializeThreadpoolEnvironment(&environment); SetThreadpoolCallbackPool(&environment, pool); cleanupGroup = CreateThreadpoolCleanupGroup(); if (!cleanupGroup) { printf("CreateThreadpoolCleanupGroup failure\n"); return -1; } SetThreadpoolCallbackCleanupGroup(&environment, cleanupGroup, NULL); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", &environment); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolCleanupGroupMembers(cleanupGroup, TRUE, NULL); CloseThreadpoolCleanupGroup(cleanupGroup); DestroyThreadpoolEnvironment(&environment); /** * See Remarks at https://msdn.microsoft.com/en-us/library/windows/desktop/ms682043(v=vs.85).aspx * If there is a cleanup group associated with the work object, * it is not necessary to call CloseThreadpoolWork ! * calling the CloseThreadpoolCleanupGroupMembers function releases the work, wait, * and timer objects associated with the cleanup group. */ /* CloseThreadpoolWork(work); // this would segfault, see comment above. */ CloseThreadpool(pool); return 0; }
static BOOL rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, wStream* s, UINT16* pExpecedBlockType) { BOOL rc; int i, close_cnt; int pos; BYTE quant; RFX_TILE* tile; UINT32* quants; UINT16 subtype; UINT32 blockLen; UINT32 blockType; UINT32 tilesDataSize; PTP_WORK* work_objects = NULL; RFX_TILE_PROCESS_WORK_PARAM* params = NULL; void *pmem; if (*pExpecedBlockType != WBT_EXTENSION) { WLog_ERR(TAG, "%s: message unexpeced", __FUNCTION__); return FALSE; } *pExpecedBlockType = WBT_FRAME_END; if (Stream_GetRemainingLength(s) < 14) { WLog_ERR(TAG, "RfxMessageTileSet packet too small"); return FALSE; } Stream_Read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */ if (subtype != CBT_TILESET) { WLog_ERR(TAG, "invalid subtype, expected CBT_TILESET."); return FALSE; } Stream_Seek_UINT16(s); /* idx (2 bytes), must be set to 0x0000 */ Stream_Seek_UINT16(s); /* properties (2 bytes) */ Stream_Read_UINT8(s, context->numQuant); /* numQuant (1 byte) */ Stream_Seek_UINT8(s); /* tileSize (1 byte), must be set to 0x40 */ if (context->numQuant < 1) { WLog_ERR(TAG, "no quantization value."); return FALSE; } Stream_Read_UINT16(s, message->numTiles); /* numTiles (2 bytes) */ if (message->numTiles < 1) { WLog_ERR(TAG, "no tiles."); return FALSE; } Stream_Read_UINT32(s, tilesDataSize); /* tilesDataSize (4 bytes) */ if (!(pmem = realloc((void*) context->quants, context->numQuant * 10 * sizeof(UINT32)))) return FALSE; quants = context->quants = (UINT32*) pmem; /* quantVals */ if (Stream_GetRemainingLength(s) < (size_t) (context->numQuant * 5)) { WLog_ERR(TAG, "RfxMessageTileSet packet too small for num_quants=%d", context->numQuant); return FALSE; } for (i = 0; i < context->numQuant; i++) { /* RFX_CODEC_QUANT */ Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); WLog_Print(context->priv->log, WLOG_DEBUG, "quant %d (%d %d %d %d %d %d %d %d %d %d).", i, context->quants[i * 10], context->quants[i * 10 + 1], context->quants[i * 10 + 2], context->quants[i * 10 + 3], context->quants[i * 10 + 4], context->quants[i * 10 + 5], context->quants[i * 10 + 6], context->quants[i * 10 + 7], context->quants[i * 10 + 8], context->quants[i * 10 + 9]); } if (!(message->tiles = (RFX_TILE**) calloc(message->numTiles, sizeof(RFX_TILE*)))) { message->numTiles = 0; return FALSE; } if (context->priv->UseThreads) { work_objects = (PTP_WORK*) calloc(message->numTiles, sizeof(PTP_WORK)); params = (RFX_TILE_PROCESS_WORK_PARAM*) calloc(message->numTiles, sizeof(RFX_TILE_PROCESS_WORK_PARAM)); if (!work_objects) { free(params); return FALSE; } if (!params) { free(work_objects); return FALSE; } } /* tiles */ close_cnt = 0; rc = TRUE; for (i = 0; i < message->numTiles; i++) { if (!(tile = (RFX_TILE*) ObjectPool_Take(context->priv->TilePool))) { WLog_ERR(TAG, "RfxMessageTileSet failed to get tile from object pool"); rc = FALSE; break; } message->tiles[i] = tile; /* RFX_TILE */ if (Stream_GetRemainingLength(s) < 6) { WLog_ERR(TAG, "RfxMessageTileSet packet too small to read tile %d/%d", i, message->numTiles); rc = FALSE; break; } Stream_Read_UINT16(s, blockType); /* blockType (2 bytes), must be set to CBT_TILE (0xCAC3) */ Stream_Read_UINT32(s, blockLen); /* blockLen (4 bytes) */ if (Stream_GetRemainingLength(s) < blockLen - 6) { WLog_ERR(TAG, "RfxMessageTileSet not enough bytes to read tile %d/%d with blocklen=%d", i, message->numTiles, blockLen); rc = FALSE; break; } pos = Stream_GetPosition(s) - 6 + blockLen; if (blockType != CBT_TILE) { WLog_ERR(TAG, "unknown block type 0x%X, expected CBT_TILE (0xCAC3).", blockType); rc = FALSE; break; } Stream_Read_UINT8(s, tile->quantIdxY); /* quantIdxY (1 byte) */ Stream_Read_UINT8(s, tile->quantIdxCb); /* quantIdxCb (1 byte) */ Stream_Read_UINT8(s, tile->quantIdxCr); /* quantIdxCr (1 byte) */ Stream_Read_UINT16(s, tile->xIdx); /* xIdx (2 bytes) */ Stream_Read_UINT16(s, tile->yIdx); /* yIdx (2 bytes) */ Stream_Read_UINT16(s, tile->YLen); /* YLen (2 bytes) */ Stream_Read_UINT16(s, tile->CbLen); /* CbLen (2 bytes) */ Stream_Read_UINT16(s, tile->CrLen); /* CrLen (2 bytes) */ Stream_GetPointer(s, tile->YData); Stream_Seek(s, tile->YLen); Stream_GetPointer(s, tile->CbData); Stream_Seek(s, tile->CbLen); Stream_GetPointer(s, tile->CrData); Stream_Seek(s, tile->CrLen); tile->x = tile->xIdx * 64; tile->y = tile->yIdx * 64; if (context->priv->UseThreads) { assert(params); params[i].context = context; params[i].tile = message->tiles[i]; if (!(work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback, (void*) ¶ms[i], &context->priv->ThreadPoolEnv))) { WLog_ERR(TAG, "CreateThreadpoolWork failed."); rc = FALSE; break; } SubmitThreadpoolWork(work_objects[i]); close_cnt = i + 1; } else { rfx_decode_rgb(context, tile, tile->data, 64 * 4); } Stream_SetPosition(s, pos); } if (context->priv->UseThreads) { for (i = 0; i < close_cnt; i++) { WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE); CloseThreadpoolWork(work_objects[i]); } free(work_objects); free(params); } for (i = 0; i < message->numTiles; i++) { if (!(tile = message->tiles[i])) continue; tile->YLen = tile->CbLen = tile->CrLen = 0; tile->YData = tile->CbData = tile->CrData = NULL; } return rc; }
RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* context, const RFX_RECT* rects, int numRects, BYTE* data, int width, int height, int scanline) { int i, maxNbTiles, maxTilesX, maxTilesY; int xIdx, yIdx, regionNbRects; int gridRelX, gridRelY, ax, ay, bytesPerPixel; RFX_TILE* tile; RFX_RECT* rfxRect; RFX_MESSAGE* message = NULL; PTP_WORK* workObject = NULL; RFX_TILE_COMPOSE_WORK_PARAM *workParam = NULL; BOOL success = FALSE; REGION16 rectsRegion, tilesRegion; RECTANGLE_16 currentTileRect; const RECTANGLE_16 *regionRect; const RECTANGLE_16 *extents; assert(data); assert(rects); assert(numRects > 0); assert(width > 0); assert(height > 0); assert(scanline > 0); if (!(message = (RFX_MESSAGE*)calloc(1, sizeof(RFX_MESSAGE)))) return NULL; region16_init(&tilesRegion); region16_init(&rectsRegion); if (context->state == RFX_STATE_SEND_HEADERS) rfx_update_context_properties(context); message->frameIdx = context->frameIdx++; if (!context->numQuant) { if (!(context->quants = (UINT32*) malloc(sizeof(rfx_default_quantization_values)))) goto skip_encoding_loop; CopyMemory(context->quants, &rfx_default_quantization_values, sizeof(rfx_default_quantization_values)); context->numQuant = 1; context->quantIdxY = 0; context->quantIdxCb = 0; context->quantIdxCr = 0; } message->numQuant = context->numQuant; message->quantVals = context->quants; bytesPerPixel = (context->bits_per_pixel / 8); if (!computeRegion(rects, numRects, &rectsRegion, width, height)) goto skip_encoding_loop; extents = region16_extents(&rectsRegion); assert(extents->right - extents->left > 0); assert(extents->bottom - extents->top > 0); maxTilesX = 1 + TILE_NO(extents->right - 1) - TILE_NO(extents->left); maxTilesY = 1 + TILE_NO(extents->bottom - 1) - TILE_NO(extents->top); maxNbTiles = maxTilesX * maxTilesY; if (!(message->tiles = calloc(maxNbTiles, sizeof(RFX_TILE*)))) goto skip_encoding_loop; if (!setupWorkers(context, maxNbTiles)) goto skip_encoding_loop; if (context->priv->UseThreads) { workObject = context->priv->workObjects; workParam = context->priv->tileWorkParams; } regionRect = region16_rects(&rectsRegion, ®ionNbRects); if (!(message->rects = calloc(regionNbRects, sizeof(RFX_RECT)))) goto skip_encoding_loop; message->numRects = regionNbRects; for (i = 0, rfxRect = message->rects; i < regionNbRects; i++, regionRect++, rfxRect++) { int startTileX = regionRect->left / 64; int endTileX = (regionRect->right - 1) / 64; int startTileY = regionRect->top / 64; int endTileY = (regionRect->bottom - 1) / 64; rfxRect->x = regionRect->left; rfxRect->y = regionRect->top; rfxRect->width = (regionRect->right - regionRect->left); rfxRect->height = (regionRect->bottom - regionRect->top); for (yIdx = startTileY, gridRelY = startTileY * 64; yIdx <= endTileY; yIdx++, gridRelY += 64 ) { int tileHeight = 64; if ((yIdx == endTileY) && (gridRelY + 64 > height)) tileHeight = height - gridRelY; currentTileRect.top = gridRelY; currentTileRect.bottom = gridRelY + tileHeight; for (xIdx = startTileX, gridRelX = startTileX * 64; xIdx <= endTileX; xIdx++, gridRelX += 64) { int tileWidth = 64; if ((xIdx == endTileX) && (gridRelX + 64 > width)) tileWidth = width - gridRelX; currentTileRect.left = gridRelX; currentTileRect.right = gridRelX + tileWidth; /* checks if this tile is already treated */ if (region16_intersects_rect(&tilesRegion, ¤tTileRect)) continue; if (!(tile = (RFX_TILE*) ObjectPool_Take(context->priv->TilePool))) goto skip_encoding_loop; tile->xIdx = xIdx; tile->yIdx = yIdx; tile->x = gridRelX; tile->y = gridRelY; tile->scanline = scanline; tile->width = tileWidth; tile->height = tileHeight; ax = gridRelX; ay = gridRelY; if (tile->data && tile->allocated) { free(tile->data); tile->allocated = FALSE; } tile->data = &data[(ay * scanline) + (ax * bytesPerPixel)]; tile->quantIdxY = context->quantIdxY; tile->quantIdxCb = context->quantIdxCb; tile->quantIdxCr = context->quantIdxCr; tile->YLen = tile->CbLen = tile->CrLen = 0; if (!(tile->YCbCrData = (BYTE *)BufferPool_Take(context->priv->BufferPool, -1))) goto skip_encoding_loop; tile->YData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 0) + 16]); tile->CbData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 1) + 16]); tile->CrData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 2) + 16]); message->tiles[message->numTiles] = tile; message->numTiles++; if (context->priv->UseThreads) { workParam->context = context; workParam->tile = tile; if (!(*workObject = CreateThreadpoolWork( (PTP_WORK_CALLBACK)rfx_compose_message_tile_work_callback, (void*) workParam, &context->priv->ThreadPoolEnv))) { goto skip_encoding_loop; } SubmitThreadpoolWork(*workObject); workObject++; workParam++; } else { rfx_encode_rgb(context, tile); } if (!region16_union_rect(&tilesRegion, &tilesRegion, ¤tTileRect)) goto skip_encoding_loop; } /* xIdx */ } /* yIdx */ } /* rects */ success = TRUE; skip_encoding_loop: if (success && message->numTiles != maxNbTiles) { void* pmem = realloc((void*) message->tiles, sizeof(RFX_TILE*) * message->numTiles); if (pmem) message->tiles = (RFX_TILE**) pmem; else success = FALSE; } /* when using threads ensure all computations are done */ message->tilesDataSize = 0; workObject = context->priv->workObjects; for (i = 0; i < message->numTiles; i++) { tile = message->tiles[i]; if (context->priv->UseThreads) { if (*workObject) { WaitForThreadpoolWorkCallbacks(*workObject, FALSE); CloseThreadpoolWork(*workObject); } workObject++; } message->tilesDataSize += rfx_tile_length(tile); } region16_uninit(&tilesRegion); region16_uninit(&rectsRegion); if (success) return message; WLog_ERR(TAG, "%s: failed", __FUNCTION__); message->freeRects = TRUE; rfx_message_free(context, message); return NULL; }
int TestPoolWork(int argc, char* argv[]) { int index; PTP_POOL pool; PTP_WORK work; PTP_CLEANUP_GROUP cleanupGroup; TP_CALLBACK_ENVIRON environment; printf("Global Thread Pool\n"); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", NULL); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } /** * You can post a work object one or more times (up to MAXULONG) without waiting for prior callbacks to complete. * The callbacks will execute in parallel. To improve efficiency, the thread pool may throttle the threads. */ for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolWork(work); printf("Private Thread Pool\n"); pool = CreateThreadpool(NULL); SetThreadpoolThreadMinimum(pool, 4); SetThreadpoolThreadMaximum(pool, 8); InitializeThreadpoolEnvironment(&environment); SetThreadpoolCallbackPool(&environment, pool); cleanupGroup = CreateThreadpoolCleanupGroup(); if (!cleanupGroup) { printf("CreateThreadpoolCleanupGroup failure\n"); return -1; } SetThreadpoolCallbackCleanupGroup(&environment, cleanupGroup, NULL); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", &environment); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolCleanupGroupMembers(cleanupGroup, TRUE, NULL); CloseThreadpoolCleanupGroup(cleanupGroup); DestroyThreadpoolEnvironment(&environment); CloseThreadpoolWork(work); CloseThreadpool(pool); return 0; }
int main (int argc, char * argv[]) { DWORD nchar = 0, nword = 0, nline = 0; PTP_WORK *pWorkObjects; WORK_OBJECT_ARG ** pWorkObjArgsArray, *pObjectArg; TP_CALLBACK_ENVIRON cbe; // Callback environment int nThread, iThrd; if (!WindowsVersionOK (6, 0)) ReportError ("This program requires Windows NT 6.0 or greater", 1, TRUE); if (argc < 2) { printf ("Usage: wcMT_vtp filename ... filename\n"); return 1; } /* Create a worker thread for each file on the command line */ nThread = (DWORD)argc - 1; pWorkObjects = malloc (nThread * sizeof(PTP_WORK)); if (pWorkObjects != NULL) pWorkObjArgsArray = malloc (nThread * sizeof(WORK_OBJECT_ARG *)); if (pWorkObjects == NULL || pWorkObjArgsArray == NULL) ReportError ("Cannot allocate working memory for worke item or argument array.", 2, TRUE); InitializeThreadpoolEnvironment (&cbe); /* Create a work object argument for each file on the command line. First put the file names in the thread arguments. */ for (iThrd = 0; iThrd < nThread; iThrd++) { pObjectArg = (pWorkObjArgsArray[iThrd] = _aligned_malloc (sizeof(WORK_OBJECT_ARG), CACHE_LINE_SIZE)); if (NULL == pObjectArg) ReportError ("Cannot allocate memory for a thread argument structure.", 3, TRUE); pObjectArg->filename = argv[iThrd+1]; pObjectArg->kword = pObjectArg->kchar = pObjectArg->kline = 0; pWorkObjects[iThrd] = CreateThreadpoolWork (wcfunc, pObjectArg, &cbe); if (pWorkObjects[iThrd] == NULL) ReportError ("Cannot create consumer thread", 4, TRUE); SubmitThreadpoolWork (pWorkObjects[iThrd]); } /* Worker objects are all submitted. Wait for them */ /* to complete and accumulate the results */ for (iThrd = 0; iThrd < nThread; iThrd++) { /* Wait for the thread pool work item to complete */ WaitForThreadpoolWorkCallbacks (pWorkObjects[iThrd], FALSE); CloseThreadpoolWork(pWorkObjects[iThrd]); } free (pWorkObjects); /* Accumulate the results */ for (iThrd = 0; iThrd < argc - 1; iThrd++) { pObjectArg = pWorkObjArgsArray[iThrd]; nchar += pObjectArg->kchar; nword += pObjectArg->kword; nline += pObjectArg->kline; printf ("%10d %9d %9d %s\n", pObjectArg->kline, pObjectArg->kword, pObjectArg->kchar, pObjectArg->filename); } free (pWorkObjArgsArray); printf ("%10d %9d %9d \n", nline, nword, nchar); return 0; }
static BOOL rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, wStream* s) { int i; int pos; BYTE quant; UINT32* quants; UINT16 subtype; UINT32 blockLen; UINT32 blockType; UINT32 tilesDataSize; PTP_WORK* work_objects = NULL; RFX_TILE_WORK_PARAM* params = NULL; if (Stream_GetRemainingLength(s) < 14) { DEBUG_WARN("RfxMessageTileSet packet too small"); return FALSE; } Stream_Read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */ if (subtype != CBT_TILESET) { DEBUG_WARN("invalid subtype, expected CBT_TILESET."); return FALSE; } Stream_Seek_UINT16(s); /* idx (2 bytes), must be set to 0x0000 */ Stream_Seek_UINT16(s); /* properties (2 bytes) */ Stream_Read_UINT8(s, context->num_quants); /* numQuant (1 byte) */ Stream_Seek_UINT8(s); /* tileSize (1 byte), must be set to 0x40 */ if (context->num_quants < 1) { DEBUG_WARN("no quantization value."); return TRUE; } Stream_Read_UINT16(s, message->num_tiles); /* numTiles (2 bytes) */ if (message->num_tiles < 1) { DEBUG_WARN("no tiles."); return TRUE; } Stream_Read_UINT32(s, tilesDataSize); /* tilesDataSize (4 bytes) */ if (context->quants != NULL) context->quants = (UINT32*) realloc((void*) context->quants, context->num_quants * 10 * sizeof(UINT32)); else context->quants = (UINT32*) malloc(context->num_quants * 10 * sizeof(UINT32)); quants = context->quants; /* quantVals */ if (Stream_GetRemainingLength(s) < context->num_quants * 5) { DEBUG_WARN("RfxMessageTileSet packet too small for num_quants=%d", context->num_quants); return FALSE; } for (i = 0; i < context->num_quants; i++) { /* RFX_CODEC_QUANT */ Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); DEBUG_RFX("quant %d (%d %d %d %d %d %d %d %d %d %d).", i, context->quants[i * 10], context->quants[i * 10 + 1], context->quants[i * 10 + 2], context->quants[i * 10 + 3], context->quants[i * 10 + 4], context->quants[i * 10 + 5], context->quants[i * 10 + 6], context->quants[i * 10 + 7], context->quants[i * 10 + 8], context->quants[i * 10 + 9]); } message->tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * message->num_tiles); ZeroMemory(message->tiles, sizeof(RFX_TILE*) * message->num_tiles); if (context->priv->UseThreads) { work_objects = (PTP_WORK*) malloc(sizeof(PTP_WORK) * message->num_tiles); params = (RFX_TILE_WORK_PARAM*) malloc(sizeof(RFX_TILE_WORK_PARAM) * message->num_tiles); } /* tiles */ for (i = 0; i < message->num_tiles; i++) { /* RFX_TILE */ if (Stream_GetRemainingLength(s) < 6) { DEBUG_WARN("RfxMessageTileSet packet too small to read tile %d/%d", i, message->num_tiles); return FALSE; } Stream_Read_UINT16(s, blockType); /* blockType (2 bytes), must be set to CBT_TILE (0xCAC3) */ Stream_Read_UINT32(s, blockLen); /* blockLen (4 bytes) */ if (Stream_GetRemainingLength(s) < blockLen - 6) { DEBUG_WARN("RfxMessageTileSet not enough bytes to read tile %d/%d with blocklen=%d", i, message->num_tiles, blockLen); return FALSE; } pos = Stream_GetPosition(s) - 6 + blockLen; if (blockType != CBT_TILE) { DEBUG_WARN("unknown block type 0x%X, expected CBT_TILE (0xCAC3).", blockType); break; } message->tiles[i] = rfx_tile_pool_take(context); if (context->priv->UseThreads) { params[i].context = context; params[i].tile = message->tiles[i]; CopyMemory(&(params[i].s), s, sizeof(wStream)); work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback, (void*) ¶ms[i], &context->priv->ThreadPoolEnv); SubmitThreadpoolWork(work_objects[i]); } else { rfx_process_message_tile(context, message->tiles[i], s); } Stream_SetPosition(s, pos); } if (context->priv->UseThreads) { for (i = 0; i < message->num_tiles; i++) { WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE); CloseThreadpoolWork(work_objects[i]); } free(work_objects); free(params); } return TRUE; }