void OnStartBatch( void ){ m_btnStart.Enable( FALSE ); AddMessage(_T("---- start a new batch ----")); SubmitThreadpoolWork( m_pWorkItem ); SubmitThreadpoolWork( m_pWorkItem ); SubmitThreadpoolWork( m_pWorkItem ); SubmitThreadpoolWork( m_pWorkItem ); AddMessage(_T("4 tasks are submitted.")); }
void EasyIocp::InitThreadPool() { //初始化线程池 int num = GetProcessorsNum(); InitializeThreadpoolEnvironment(&tpCBE_); worksNum_ = num; ptpWorks_ = (PTP_WORK*)SysMalloc(sizeof(PTP_WORK) * worksNum_); workArgs_ = (EasyIocpStruct::WORK_ARG*)SysMalloc(sizeof(EasyIocpStruct::WORK_ARG) * worksNum_); if(!ptpWorks_ || !workArgs_) { print("EasyIocp::Construct: SysMalloc failed."); } assert(ptpWorks_ != NULL); assert(workArgs_ != NULL); for(int i = 0; i < worksNum_; ++i) { workArgs_[i].id = i; workArgs_[i].iocpModel = this; ptpWorks_[i] = CreateThreadpoolWork(TPCallBack, (PVOID)&workArgs_[i], &tpCBE_); SubmitThreadpoolWork(ptpWorks_[i]); } print("EasyIocp::InitThreadPool: init thread pool successfully. num: %d.", worksNum_); }
bool WorkItem<T>::StartWork() { work_ = CreateThreadpoolWork(callback, this, callback_env_); if(work_ == nullptr) { LOG_FATAL(); return false; } SubmitThreadpoolWork(work_); return true; }
virtual void schedule(pplx::TaskProc_t proc, void* param) { pplx::details::atomic_increment(s_flag); auto schedulerParam = std::unique_ptr<_Scheduler_Param>(new _Scheduler_Param(proc, param)); auto work = CreateThreadpoolWork(DefaultWorkCallbackTest, schedulerParam.get(), NULL); if (work == nullptr) { throw utility::details::create_system_error(GetLastError()); } SubmitThreadpoolWork(work); CloseThreadpoolWork(work); schedulerParam.release(); }
HRESULT NamedPipeChannel::DispatchRequest(std::unique_ptr<Request>&& request) { lock_.AssertAcquired(); try { if (work_ == nullptr) return E_HANDLE; queue_.push(std::move(request)); if (queue_.size() == 1) SubmitThreadpoolWork(work_); return S_OK; } catch (...) { return E_UNEXPECTED; } }
void NamedPipe::OnCompleted(PTP_CALLBACK_INSTANCE /*callback*/, void* context, void* overlapped, ULONG error, ULONG_PTR bytes, PTP_IO /*io*/) { std::unique_ptr<Request> request( static_cast<Request*>(static_cast<OVERLAPPED*>(overlapped))); request->Internal = HRESULT_FROM_WIN32(error); request->InternalHigh = bytes; request->completed_command = request->command; request->command = Command::kNotify; auto instance = static_cast<NamedPipe*>(context); base::AutoLock guard(instance->lock_); instance->queue_.push_back(std::move(request)); if (instance->queue_.size() == 1) SubmitThreadpoolWork(instance->work_); }
HRESULT NamedPipe::DispatchRequest( std::unique_ptr<Request>&& request) { // NOLINT(build/c++11) try { base::AutoLock guard(lock_); if (!IsValid()) return E_HANDLE; queue_.push_back(std::move(request)); if (queue_.size() == 1) SubmitThreadpoolWork(work_); return S_OK; } catch (...) { return E_FAIL; } }
HRESULT NamedPipe::Accept(Listener* listener) { if (listener == nullptr) return E_INVALIDARG; try { auto request = CreateRequest(Command::kAccept, listener); if (request == nullptr) return E_OUTOFMEMORY; base::AutoLock guard(lock_); if (!IsValid()) return E_HANDLE; queue_.push_back(std::move(request)); if (queue_.size() == 1) SubmitThreadpoolWork(work_); } catch (...) { return E_FAIL; } return S_OK; }
BOOL CService::UnregisterNotification(NOTIFY_HANDLE Handle) { BOOL ret = FALSE; if (Handle) { #ifdef UNIVERSAL PTP_WORK work; work = CreateThreadpoolWork(UnregisterNotificationWork, Handle, NULL); if (work != NULL) { SubmitThreadpoolWork(work); } ret = TRUE; #else // UNIVERSAL ret = ::UnregisterDeviceNotification(Handle); #endif // UNIVERSAL } return ret; }
__declspec(noinline) bool benchmark_ntp_fs_stat() { TP_CALLBACK_ENVIRON env; InitializeThreadpoolEnvironment(&env); PTP_POOL pool{nullptr}; pool = CreateThreadpool(nullptr); SetThreadpoolThreadMaximum(pool, 48); SetThreadpoolThreadMinimum(pool, 12); PTP_CLEANUP_GROUP group = CreateThreadpoolCleanupGroup(); SetThreadpoolCallbackPool(&env, pool); SetThreadpoolCallbackCleanupGroup(&env, group, nullptr); PTP_WORK work_fs_stat = CreateThreadpoolWork(WorkCallback_fs_stat, nullptr, &env); SubmitThreadpoolWork(work_fs_stat); WaitForThreadpoolWorkCallbacks(work_fs_stat, false); CloseThreadpoolWork(work_fs_stat); CloseThreadpool(pool); return false; }
void NamedPipeChannel::OnRequested(PTP_WORK work) { lock_.Acquire(); auto request = std::move(queue_.front()); queue_.pop(); if (!queue_.empty()) SubmitThreadpoolWork(work); do { base::AutoLock guard(lock_, base::AutoLock::AlreadyAcquired()); if (request->command == Command::kNotify) break; if (handle_ == INVALID_HANDLE_VALUE || io_ == nullptr) { request->result = E_HANDLE; break; } StartThreadpoolIo(io_); bool succeeded; switch (request->command) { case Command::kConnectAsync: succeeded = ConnectNamedPipe(handle_, request.get()) != FALSE; break; case Command::kReadAsync: succeeded = ReadFile(handle_, request->output, request->output_length, &request->length, request.get()) != FALSE; break; case Command::kWriteAsync: succeeded = WriteFile(handle_, request->input, request->input_length, &request->length, request.get()) != FALSE; break; case Command::kTransactAsync: succeeded = TransactNamedPipe(handle_, request->input, request->input_length, request->output, request->output_length, &request->length, request.get()) != FALSE; break; default: LOG(FATAL) << "Invalid command: " << static_cast<int>(request->command); succeeded = false; SetLastError(HRESULT_CODE(E_UNEXPECTED)); break; } auto error = GetLastError(); if (succeeded || error == ERROR_IO_PENDING || error == ERROR_MORE_DATA) { request.release(); return; } request->result = HRESULT_FROM_WIN32(error); CancelThreadpoolIo(io_); } while (false); if (request->command != Command::kNotify) { request->completed_command = request->command; request->command = Command::kNotify; } switch (request->completed_command) { case Command::kConnectAsync: if (HRESULT_CODE(request->result) == ERROR_PIPE_CONNECTED) request->result = HRESULT_CODE(request->result); request->listener->OnConnected(this, request->result); break; case Command::kReadAsync: if (HRESULT_CODE(request->result) == ERROR_MORE_DATA) request->result = HRESULT_CODE(request->result); request->channel_listener->OnRead(this, request->result, request->output, request->length); break; case Command::kWriteAsync: request->channel_listener->OnWritten(this, request->result, request->input, request->length); break; case Command::kTransactAsync: if (HRESULT_CODE(request->result) == ERROR_MORE_DATA) request->result = HRESULT_CODE(request->result); request->listener->OnTransacted(this, request->result, request->input, request->output, request->length); break; default: LOG(FATAL) << "Invalid command: " << static_cast<int>(request->completed_command); break; } }
bool Server::Create(short port, int maxPostAccept) { assert(maxPostAccept > 0); m_MaxPostAccept = maxPostAccept; // Create Client Work Thread Env for using cleaning group. We need this for shutting down // properly. InitializeThreadpoolEnvironment(&m_ClientTPENV); m_ClientTPCLEAN = CreateThreadpoolCleanupGroup(); if (m_ClientTPCLEAN == NULL) { ERROR_CODE(GetLastError(), "Could not create client cleaning group."); return false; } SetThreadpoolCallbackCleanupGroup(&m_ClientTPENV, m_ClientTPCLEAN, NULL); // Create Listen Socket m_listenSocket = Network::CreateSocket(true, port); if (m_listenSocket == INVALID_SOCKET) { return false; } // Make the address re-usable to re-run the same server instantly. bool reuseAddr = true; if (setsockopt(m_listenSocket, SOL_SOCKET, SO_REUSEADDR, reinterpret_cast<const char*>(&reuseAddr), sizeof(reuseAddr)) == SOCKET_ERROR) { ERROR_CODE(WSAGetLastError(), "setsockopt() failed with SO_REUSEADDR."); Destroy(); return false; } // Create & Start ThreaddPool for socket IO m_pTPIO = CreateThreadpoolIo(reinterpret_cast<HANDLE>(m_listenSocket), Server::IoCompletionCallback, NULL, NULL); if (m_pTPIO == NULL) { ERROR_CODE(WSAGetLastError(), "Could not assign the listen socket to the IOCP handle."); Destroy(); return false; } // Start listening StartThreadpoolIo(m_pTPIO); if (listen(m_listenSocket, SOMAXCONN) == SOCKET_ERROR) { ERROR_CODE(WSAGetLastError(), "listen() failed."); return false; } // Create critical sections for m_Clients InitializeCriticalSection(&m_CSForClients); // Create Accept worker m_AcceptTPWORK = CreateThreadpoolWork(Server::WorkerPostAccept, this, NULL); if (m_AcceptTPWORK == NULL) { ERROR_CODE(GetLastError(), "Could not create AcceptEx worker TPIO."); Destroy(); return false; } m_ShuttingDown = false; SubmitThreadpoolWork(m_AcceptTPWORK); return true; }
int TestPoolWork(int argc, char* argv[]) { int index; PTP_POOL pool; PTP_WORK work; PTP_CLEANUP_GROUP cleanupGroup; TP_CALLBACK_ENVIRON environment; printf("Global Thread Pool\n"); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", NULL); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } /** * You can post a work object one or more times (up to MAXULONG) without waiting for prior callbacks to complete. * The callbacks will execute in parallel. To improve efficiency, the thread pool may throttle the threads. */ for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolWork(work); printf("Private Thread Pool\n"); if (!(pool = CreateThreadpool(NULL))) { printf("CreateThreadpool failure\n"); return -1; } if (!SetThreadpoolThreadMinimum(pool, 4)) { printf("SetThreadpoolThreadMinimum failure\n"); return -1; } SetThreadpoolThreadMaximum(pool, 8); InitializeThreadpoolEnvironment(&environment); SetThreadpoolCallbackPool(&environment, pool); cleanupGroup = CreateThreadpoolCleanupGroup(); if (!cleanupGroup) { printf("CreateThreadpoolCleanupGroup failure\n"); return -1; } SetThreadpoolCallbackCleanupGroup(&environment, cleanupGroup, NULL); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", &environment); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolCleanupGroupMembers(cleanupGroup, TRUE, NULL); CloseThreadpoolCleanupGroup(cleanupGroup); DestroyThreadpoolEnvironment(&environment); /** * See Remarks at https://msdn.microsoft.com/en-us/library/windows/desktop/ms682043(v=vs.85).aspx * If there is a cleanup group associated with the work object, * it is not necessary to call CloseThreadpoolWork ! * calling the CloseThreadpoolCleanupGroupMembers function releases the work, wait, * and timer objects associated with the cleanup group. */ /* CloseThreadpoolWork(work); // this would segfault, see comment above. */ CloseThreadpool(pool); return 0; }
int TestPoolWork(int argc, char* argv[]) { int index; PTP_POOL pool; PTP_WORK work; PTP_CLEANUP_GROUP cleanupGroup; TP_CALLBACK_ENVIRON environment; printf("Global Thread Pool\n"); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", NULL); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } /** * You can post a work object one or more times (up to MAXULONG) without waiting for prior callbacks to complete. * The callbacks will execute in parallel. To improve efficiency, the thread pool may throttle the threads. */ for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolWork(work); printf("Private Thread Pool\n"); pool = CreateThreadpool(NULL); SetThreadpoolThreadMinimum(pool, 4); SetThreadpoolThreadMaximum(pool, 8); InitializeThreadpoolEnvironment(&environment); SetThreadpoolCallbackPool(&environment, pool); cleanupGroup = CreateThreadpoolCleanupGroup(); if (!cleanupGroup) { printf("CreateThreadpoolCleanupGroup failure\n"); return -1; } SetThreadpoolCallbackCleanupGroup(&environment, cleanupGroup, NULL); work = CreateThreadpoolWork((PTP_WORK_CALLBACK) test_WorkCallback, "world", &environment); if (!work) { printf("CreateThreadpoolWork failure\n"); return -1; } for (index = 0; index < 10; index++) SubmitThreadpoolWork(work); WaitForThreadpoolWorkCallbacks(work, FALSE); CloseThreadpoolCleanupGroupMembers(cleanupGroup, TRUE, NULL); CloseThreadpoolCleanupGroup(cleanupGroup); DestroyThreadpoolEnvironment(&environment); CloseThreadpoolWork(work); CloseThreadpool(pool); return 0; }
void NamedPipe::OnRequested(PTP_WORK work) { lock_.Acquire(); auto request = std::move(queue_.front()); queue_.pop_front(); if (!queue_.empty()) SubmitThreadpoolWork(work); do { base::AutoLock guard(lock_, base::AutoLock::AlreadyAcquired()); if (request->command == Command::kNotify) break; if (pipe_ == INVALID_HANDLE_VALUE || io_ == nullptr) { request->Internal = static_cast<ULONG_PTR>(E_HANDLE); break; } StartThreadpoolIo(io_); auto succeeded = false; switch (request->command) { case Command::kAccept: succeeded = ConnectNamedPipe(pipe_, request.get()) != FALSE; break; case Command::kRead: succeeded = ReadFile(pipe_, request->buffer, static_cast<DWORD>(request->InternalHigh), nullptr, request.get()) != FALSE; break; case Command::kWrite: succeeded = WriteFile(pipe_, request->buffer, static_cast<DWORD>(request->InternalHigh), nullptr, request.get()) != FALSE; break; default: CHECK(false) << "This must not occur."; } auto error = GetLastError(); if (succeeded || error == ERROR_IO_PENDING) { request.release(); return; } if (request->command == Command::kAccept && error == ERROR_PIPE_CONNECTED) error = ERROR_SUCCESS; CancelThreadpoolIo(io_); request->Internal = HRESULT_FROM_WIN32(error); request->completed_command = request->command; request->command = Command::kNotify; } while (false); switch (request->completed_command) { case Command::kAccept: request->listener->OnAccepted(this, static_cast<HRESULT>(request->Internal)); break; case Command::kRead: request->listener->OnRead(this, static_cast<HRESULT>(request->Internal), request->buffer, request->InternalHigh); break; case Command::kWrite: request->listener->OnWritten(this, static_cast<HRESULT>(request->Internal), request->buffer, request->InternalHigh); break; default: CHECK(false) << "This must not occur."; } }
int main (int argc, char * argv[]) { DWORD nchar = 0, nword = 0, nline = 0; PTP_WORK *pWorkObjects; WORK_OBJECT_ARG ** pWorkObjArgsArray, *pObjectArg; TP_CALLBACK_ENVIRON cbe; // Callback environment int nThread, iThrd; if (!WindowsVersionOK (6, 0)) ReportError ("This program requires Windows NT 6.0 or greater", 1, TRUE); if (argc < 2) { printf ("Usage: wcMT_vtp filename ... filename\n"); return 1; } /* Create a worker thread for each file on the command line */ nThread = (DWORD)argc - 1; pWorkObjects = malloc (nThread * sizeof(PTP_WORK)); if (pWorkObjects != NULL) pWorkObjArgsArray = malloc (nThread * sizeof(WORK_OBJECT_ARG *)); if (pWorkObjects == NULL || pWorkObjArgsArray == NULL) ReportError ("Cannot allocate working memory for worke item or argument array.", 2, TRUE); InitializeThreadpoolEnvironment (&cbe); /* Create a work object argument for each file on the command line. First put the file names in the thread arguments. */ for (iThrd = 0; iThrd < nThread; iThrd++) { pObjectArg = (pWorkObjArgsArray[iThrd] = _aligned_malloc (sizeof(WORK_OBJECT_ARG), CACHE_LINE_SIZE)); if (NULL == pObjectArg) ReportError ("Cannot allocate memory for a thread argument structure.", 3, TRUE); pObjectArg->filename = argv[iThrd+1]; pObjectArg->kword = pObjectArg->kchar = pObjectArg->kline = 0; pWorkObjects[iThrd] = CreateThreadpoolWork (wcfunc, pObjectArg, &cbe); if (pWorkObjects[iThrd] == NULL) ReportError ("Cannot create consumer thread", 4, TRUE); SubmitThreadpoolWork (pWorkObjects[iThrd]); } /* Worker objects are all submitted. Wait for them */ /* to complete and accumulate the results */ for (iThrd = 0; iThrd < nThread; iThrd++) { /* Wait for the thread pool work item to complete */ WaitForThreadpoolWorkCallbacks (pWorkObjects[iThrd], FALSE); CloseThreadpoolWork(pWorkObjects[iThrd]); } free (pWorkObjects); /* Accumulate the results */ for (iThrd = 0; iThrd < argc - 1; iThrd++) { pObjectArg = pWorkObjArgsArray[iThrd]; nchar += pObjectArg->kchar; nword += pObjectArg->kword; nline += pObjectArg->kline; printf ("%10d %9d %9d %s\n", pObjectArg->kline, pObjectArg->kword, pObjectArg->kchar, pObjectArg->filename); } free (pWorkObjArgsArray); printf ("%10d %9d %9d \n", nline, nword, nchar); return 0; }
void *LLW_train_thread(void *th_data) { // Recover data struct ThreadData *data = (struct ThreadData *)th_data; const int thread_id = data->thread_id; const int nprocs = data->nprocs; struct Model *model = data->model; struct KernelCache *kernelcache = data->kernelcache; long chunk_size = data->chunk_size; const double accuracy = data->accuracy; double **gradient = data->gradient; double **H_alpha = data->H_alpha; double *best_primal_upper_bound = data->best_primal_upper_bound; int *activeset = data->activeset; long *nb_SV = data->nb_SV; double *lp_rhs = data->lp_rhs; FILE *fp = data->logfile_ptr; pthread_mutex_unlock(&thread_data_mutex); // Release thread_data for next thread // Local variables int do_eval; char yesno; long long return_status = -1; // Prepare the cache struct TrainingCache cache; cache.chunk_size = chunk_size; LLW_alloc_memory(&cache, model->Q, model->nb_data, chunk_size); cache.kc = kernelcache; cache.activeset = activeset; cache.lp_rhs = lp_rhs; double **delta = matrix(chunk_size, model->Q); double previous_ratio = 0.0; double improvement = 1.0; double theta_opt; int jump = false; if(accuracy == 0) do_eval = 0; else do_eval = 1; /* Prepare parallel gradient computations: - the gradient vector is split into NUMTHREADS_GRAD parts (along i) - each part is updated by a different thread */ // max number of threads for gradient updates is nprocs pthread_t *grad_threads = (pthread_t *)malloc(sizeof(pthread_t) * nprocs); // start with 1 thread (main load on kernel evaluations) int numthreads_grad = 1; void *status; int rc; long k; struct ThreadGradient_data *grad_data = (struct ThreadGradient_data *)malloc(sizeof(struct ThreadGradient_data) * nprocs); // Disable parallel gradient computation for small data sets int parallel_gradient_update = 1; if(model->nb_data < 5000 || nprocs == 1) parallel_gradient_update = 0; if(parallel_gradient_update) { for(k=0;k<nprocs;k++) { grad_data[k].gradient = gradient; grad_data[k].H_alpha = H_alpha; grad_data[k].cache = &cache; grad_data[k].model = model; } grad_data[0].start_i = 1; grad_data[0].end_i = model->nb_data / numthreads_grad; for(k=1;k<numthreads_grad-1;k++) { grad_data[k].start_i = grad_data[k-1].end_i + 1; grad_data[k].end_i = grad_data[k].start_i + model->nb_data / numthreads_grad -1; } if(numthreads_grad>1) { grad_data[numthreads_grad-1].start_i = grad_data[numthreads_grad-2].end_i + 1; grad_data[numthreads_grad-1].end_i = model->nb_data; } } #ifdef _WIN32 // Init POOL TP_WORK ** work; if(parallel_gradient_update) { work = malloc(sizeof(TP_WORK *) * nprocs); for(k=0;k<nprocs;k++) work[k] = CreateThreadpoolWork(LLW_update_gradient_thread2, (void *) &grad_data[k], NULL); } #endif // Switch to nprocs/4 threads for gradient update when 25% of the kernel matrix is cached int percentage_step = 1; long percentage = model->nb_data / 4; int next_numthreads_grad = nprocs/4; if(next_numthreads_grad == 0) next_numthreads_grad = 1; // Main loop int thread_stop = 0; do { if((TRAIN_SMALL_STEP < TRAIN_STEP) && (model->iter%TRAIN_SMALL_STEP) == 0) { printf("."); fflush(stdout); } // Select a random chunk of data to optimize select_random_chunk(&cache,model); // Compute the kernel submatrix for this chunk compute_K(&cache,model); // Enter Critical Section (using and modifying the model) pthread_mutex_lock(&(model->mutex)); jump = LLW_solve_lp(gradient, &cache, model); if(jump == false) jump = LLW_check_opt_sol(gradient,&cache,model); if(jump == false) { LLW_compute_delta(delta,&cache,model); theta_opt = LLW_compute_theta_opt(delta, &cache, model); if (theta_opt > 0.0) { *nb_SV += LLW_compute_new_alpha(theta_opt,&cache,model); if(parallel_gradient_update) { // Update gradient in parallel for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 SubmitThreadpoolWork(work[k]); #else rc = pthread_create(&grad_threads[k], NULL, LLW_update_gradient_thread, (void *) &grad_data[k]); #endif } // Wait for gradient computations to terminate for(k=0;k<numthreads_grad;k++) { #ifdef _WIN32 WaitForThreadpoolWorkCallbacks(work[k], FALSE); #else rc = pthread_join(grad_threads[k],&status); #endif } } else { // old-style non-threaded gradient update (for small data sets) LLW_update_gradient(gradient,H_alpha, &cache,model); } } } if((do_eval && (model->iter%TRAIN_STEP) == 0) || EVAL || STOP || (do_eval && model->ratio >= accuracy) ) { if(fp != NULL) fprintf(fp,"%ld ",model->iter); if(EVAL) printf("\n\n*** Evaluating the model at iteration %ld...\n",model->iter); // Evaluate how far we are in the optimization // (prints more info if interrutped by user) previous_ratio = model->ratio; model->ratio = MSVM_eval(best_primal_upper_bound, gradient, H_alpha, NULL, model, EVAL, fp); print_training_info(*nb_SV, model); improvement = model->ratio - previous_ratio; if(EVAL) // if interrupted by user (otherwise let the ratio decide if we go on training) { printf("\n *** Do you want to continue training ([y]/n)? "); yesno = getchar(); if(yesno=='n') { STOP = 1; } EVAL = 0; // reset interruption trigger } } // Release kernel submatrix in cache release_K(&cache); // Check if a sufficient % of the kernel matrix is cached if( parallel_gradient_update && cache.kc->max_idx >= percentage ) { // and switch thread to compute gradient upates instead of kernel rows if it is thread_stop = switch_thread(nprocs, &numthreads_grad, &next_numthreads_grad, &percentage, &percentage_step, grad_data, thread_id, model->nb_data); // (threads are actually stopped to leave the CPUs // to other threads that will compute gradient updates) } model->iter++; // Release mutex: End of critical section pthread_mutex_unlock(&(model->mutex)); } while(model->iter <= MSVM_TRAIN_MAXIT && (!do_eval || (model->ratio < accuracy && improvement != 0.0)) && !STOP && !thread_stop); // Release mutex: End of critical section (see below) pthread_mutex_unlock(&(model->mutex)); #ifdef _WIN32 if(parallel_gradient_update){ for(k=0;k<numthreads_grad;k++) CloseThreadpoolWork(work[k]); } #endif // compute return_status if(do_eval && (model->ratio >= accuracy || improvement==0.0)) return_status = 0; // optimum reached or no more improvement. // Free memory LLW_free_memory(&cache); free(delta[1]);free(delta); free(grad_threads); free(grad_data); pthread_exit((void*)return_status); }
RFX_MESSAGE* rfx_encode_message(RFX_CONTEXT* context, const RFX_RECT* rects, int numRects, BYTE* data, int width, int height, int scanline) { int i, maxNbTiles, maxTilesX, maxTilesY; int xIdx, yIdx, regionNbRects; int gridRelX, gridRelY, ax, ay, bytesPerPixel; RFX_TILE* tile; RFX_RECT* rfxRect; RFX_MESSAGE* message = NULL; PTP_WORK* workObject = NULL; RFX_TILE_COMPOSE_WORK_PARAM *workParam = NULL; BOOL success = FALSE; REGION16 rectsRegion, tilesRegion; RECTANGLE_16 currentTileRect; const RECTANGLE_16 *regionRect; const RECTANGLE_16 *extents; assert(data); assert(rects); assert(numRects > 0); assert(width > 0); assert(height > 0); assert(scanline > 0); if (!(message = (RFX_MESSAGE*)calloc(1, sizeof(RFX_MESSAGE)))) return NULL; region16_init(&tilesRegion); region16_init(&rectsRegion); if (context->state == RFX_STATE_SEND_HEADERS) rfx_update_context_properties(context); message->frameIdx = context->frameIdx++; if (!context->numQuant) { if (!(context->quants = (UINT32*) malloc(sizeof(rfx_default_quantization_values)))) goto skip_encoding_loop; CopyMemory(context->quants, &rfx_default_quantization_values, sizeof(rfx_default_quantization_values)); context->numQuant = 1; context->quantIdxY = 0; context->quantIdxCb = 0; context->quantIdxCr = 0; } message->numQuant = context->numQuant; message->quantVals = context->quants; bytesPerPixel = (context->bits_per_pixel / 8); if (!computeRegion(rects, numRects, &rectsRegion, width, height)) goto skip_encoding_loop; extents = region16_extents(&rectsRegion); assert(extents->right - extents->left > 0); assert(extents->bottom - extents->top > 0); maxTilesX = 1 + TILE_NO(extents->right - 1) - TILE_NO(extents->left); maxTilesY = 1 + TILE_NO(extents->bottom - 1) - TILE_NO(extents->top); maxNbTiles = maxTilesX * maxTilesY; if (!(message->tiles = calloc(maxNbTiles, sizeof(RFX_TILE*)))) goto skip_encoding_loop; if (!setupWorkers(context, maxNbTiles)) goto skip_encoding_loop; if (context->priv->UseThreads) { workObject = context->priv->workObjects; workParam = context->priv->tileWorkParams; } regionRect = region16_rects(&rectsRegion, ®ionNbRects); if (!(message->rects = calloc(regionNbRects, sizeof(RFX_RECT)))) goto skip_encoding_loop; message->numRects = regionNbRects; for (i = 0, rfxRect = message->rects; i < regionNbRects; i++, regionRect++, rfxRect++) { int startTileX = regionRect->left / 64; int endTileX = (regionRect->right - 1) / 64; int startTileY = regionRect->top / 64; int endTileY = (regionRect->bottom - 1) / 64; rfxRect->x = regionRect->left; rfxRect->y = regionRect->top; rfxRect->width = (regionRect->right - regionRect->left); rfxRect->height = (regionRect->bottom - regionRect->top); for (yIdx = startTileY, gridRelY = startTileY * 64; yIdx <= endTileY; yIdx++, gridRelY += 64 ) { int tileHeight = 64; if ((yIdx == endTileY) && (gridRelY + 64 > height)) tileHeight = height - gridRelY; currentTileRect.top = gridRelY; currentTileRect.bottom = gridRelY + tileHeight; for (xIdx = startTileX, gridRelX = startTileX * 64; xIdx <= endTileX; xIdx++, gridRelX += 64) { int tileWidth = 64; if ((xIdx == endTileX) && (gridRelX + 64 > width)) tileWidth = width - gridRelX; currentTileRect.left = gridRelX; currentTileRect.right = gridRelX + tileWidth; /* checks if this tile is already treated */ if (region16_intersects_rect(&tilesRegion, ¤tTileRect)) continue; if (!(tile = (RFX_TILE*) ObjectPool_Take(context->priv->TilePool))) goto skip_encoding_loop; tile->xIdx = xIdx; tile->yIdx = yIdx; tile->x = gridRelX; tile->y = gridRelY; tile->scanline = scanline; tile->width = tileWidth; tile->height = tileHeight; ax = gridRelX; ay = gridRelY; if (tile->data && tile->allocated) { free(tile->data); tile->allocated = FALSE; } tile->data = &data[(ay * scanline) + (ax * bytesPerPixel)]; tile->quantIdxY = context->quantIdxY; tile->quantIdxCb = context->quantIdxCb; tile->quantIdxCr = context->quantIdxCr; tile->YLen = tile->CbLen = tile->CrLen = 0; if (!(tile->YCbCrData = (BYTE *)BufferPool_Take(context->priv->BufferPool, -1))) goto skip_encoding_loop; tile->YData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 0) + 16]); tile->CbData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 1) + 16]); tile->CrData = (BYTE*) &(tile->YCbCrData[((8192 + 32) * 2) + 16]); message->tiles[message->numTiles] = tile; message->numTiles++; if (context->priv->UseThreads) { workParam->context = context; workParam->tile = tile; if (!(*workObject = CreateThreadpoolWork( (PTP_WORK_CALLBACK)rfx_compose_message_tile_work_callback, (void*) workParam, &context->priv->ThreadPoolEnv))) { goto skip_encoding_loop; } SubmitThreadpoolWork(*workObject); workObject++; workParam++; } else { rfx_encode_rgb(context, tile); } if (!region16_union_rect(&tilesRegion, &tilesRegion, ¤tTileRect)) goto skip_encoding_loop; } /* xIdx */ } /* yIdx */ } /* rects */ success = TRUE; skip_encoding_loop: if (success && message->numTiles != maxNbTiles) { void* pmem = realloc((void*) message->tiles, sizeof(RFX_TILE*) * message->numTiles); if (pmem) message->tiles = (RFX_TILE**) pmem; else success = FALSE; } /* when using threads ensure all computations are done */ message->tilesDataSize = 0; workObject = context->priv->workObjects; for (i = 0; i < message->numTiles; i++) { tile = message->tiles[i]; if (context->priv->UseThreads) { if (*workObject) { WaitForThreadpoolWorkCallbacks(*workObject, FALSE); CloseThreadpoolWork(*workObject); } workObject++; } message->tilesDataSize += rfx_tile_length(tile); } region16_uninit(&tilesRegion); region16_uninit(&rectsRegion); if (success) return message; WLog_ERR(TAG, "%s: failed", __FUNCTION__); message->freeRects = TRUE; rfx_message_free(context, message); return NULL; }
static BOOL rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, wStream* s, UINT16* pExpecedBlockType) { BOOL rc; int i, close_cnt; int pos; BYTE quant; RFX_TILE* tile; UINT32* quants; UINT16 subtype; UINT32 blockLen; UINT32 blockType; UINT32 tilesDataSize; PTP_WORK* work_objects = NULL; RFX_TILE_PROCESS_WORK_PARAM* params = NULL; void *pmem; if (*pExpecedBlockType != WBT_EXTENSION) { WLog_ERR(TAG, "%s: message unexpeced", __FUNCTION__); return FALSE; } *pExpecedBlockType = WBT_FRAME_END; if (Stream_GetRemainingLength(s) < 14) { WLog_ERR(TAG, "RfxMessageTileSet packet too small"); return FALSE; } Stream_Read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */ if (subtype != CBT_TILESET) { WLog_ERR(TAG, "invalid subtype, expected CBT_TILESET."); return FALSE; } Stream_Seek_UINT16(s); /* idx (2 bytes), must be set to 0x0000 */ Stream_Seek_UINT16(s); /* properties (2 bytes) */ Stream_Read_UINT8(s, context->numQuant); /* numQuant (1 byte) */ Stream_Seek_UINT8(s); /* tileSize (1 byte), must be set to 0x40 */ if (context->numQuant < 1) { WLog_ERR(TAG, "no quantization value."); return FALSE; } Stream_Read_UINT16(s, message->numTiles); /* numTiles (2 bytes) */ if (message->numTiles < 1) { WLog_ERR(TAG, "no tiles."); return FALSE; } Stream_Read_UINT32(s, tilesDataSize); /* tilesDataSize (4 bytes) */ if (!(pmem = realloc((void*) context->quants, context->numQuant * 10 * sizeof(UINT32)))) return FALSE; quants = context->quants = (UINT32*) pmem; /* quantVals */ if (Stream_GetRemainingLength(s) < (size_t) (context->numQuant * 5)) { WLog_ERR(TAG, "RfxMessageTileSet packet too small for num_quants=%d", context->numQuant); return FALSE; } for (i = 0; i < context->numQuant; i++) { /* RFX_CODEC_QUANT */ Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); Stream_Read_UINT8(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); WLog_Print(context->priv->log, WLOG_DEBUG, "quant %d (%d %d %d %d %d %d %d %d %d %d).", i, context->quants[i * 10], context->quants[i * 10 + 1], context->quants[i * 10 + 2], context->quants[i * 10 + 3], context->quants[i * 10 + 4], context->quants[i * 10 + 5], context->quants[i * 10 + 6], context->quants[i * 10 + 7], context->quants[i * 10 + 8], context->quants[i * 10 + 9]); } if (!(message->tiles = (RFX_TILE**) calloc(message->numTiles, sizeof(RFX_TILE*)))) { message->numTiles = 0; return FALSE; } if (context->priv->UseThreads) { work_objects = (PTP_WORK*) calloc(message->numTiles, sizeof(PTP_WORK)); params = (RFX_TILE_PROCESS_WORK_PARAM*) calloc(message->numTiles, sizeof(RFX_TILE_PROCESS_WORK_PARAM)); if (!work_objects) { free(params); return FALSE; } if (!params) { free(work_objects); return FALSE; } } /* tiles */ close_cnt = 0; rc = TRUE; for (i = 0; i < message->numTiles; i++) { if (!(tile = (RFX_TILE*) ObjectPool_Take(context->priv->TilePool))) { WLog_ERR(TAG, "RfxMessageTileSet failed to get tile from object pool"); rc = FALSE; break; } message->tiles[i] = tile; /* RFX_TILE */ if (Stream_GetRemainingLength(s) < 6) { WLog_ERR(TAG, "RfxMessageTileSet packet too small to read tile %d/%d", i, message->numTiles); rc = FALSE; break; } Stream_Read_UINT16(s, blockType); /* blockType (2 bytes), must be set to CBT_TILE (0xCAC3) */ Stream_Read_UINT32(s, blockLen); /* blockLen (4 bytes) */ if (Stream_GetRemainingLength(s) < blockLen - 6) { WLog_ERR(TAG, "RfxMessageTileSet not enough bytes to read tile %d/%d with blocklen=%d", i, message->numTiles, blockLen); rc = FALSE; break; } pos = Stream_GetPosition(s) - 6 + blockLen; if (blockType != CBT_TILE) { WLog_ERR(TAG, "unknown block type 0x%X, expected CBT_TILE (0xCAC3).", blockType); rc = FALSE; break; } Stream_Read_UINT8(s, tile->quantIdxY); /* quantIdxY (1 byte) */ Stream_Read_UINT8(s, tile->quantIdxCb); /* quantIdxCb (1 byte) */ Stream_Read_UINT8(s, tile->quantIdxCr); /* quantIdxCr (1 byte) */ Stream_Read_UINT16(s, tile->xIdx); /* xIdx (2 bytes) */ Stream_Read_UINT16(s, tile->yIdx); /* yIdx (2 bytes) */ Stream_Read_UINT16(s, tile->YLen); /* YLen (2 bytes) */ Stream_Read_UINT16(s, tile->CbLen); /* CbLen (2 bytes) */ Stream_Read_UINT16(s, tile->CrLen); /* CrLen (2 bytes) */ Stream_GetPointer(s, tile->YData); Stream_Seek(s, tile->YLen); Stream_GetPointer(s, tile->CbData); Stream_Seek(s, tile->CbLen); Stream_GetPointer(s, tile->CrData); Stream_Seek(s, tile->CrLen); tile->x = tile->xIdx * 64; tile->y = tile->yIdx * 64; if (context->priv->UseThreads) { assert(params); params[i].context = context; params[i].tile = message->tiles[i]; if (!(work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback, (void*) ¶ms[i], &context->priv->ThreadPoolEnv))) { WLog_ERR(TAG, "CreateThreadpoolWork failed."); rc = FALSE; break; } SubmitThreadpoolWork(work_objects[i]); close_cnt = i + 1; } else { rfx_decode_rgb(context, tile, tile->data, 64 * 4); } Stream_SetPosition(s, pos); } if (context->priv->UseThreads) { for (i = 0; i < close_cnt; i++) { WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE); CloseThreadpoolWork(work_objects[i]); } free(work_objects); free(params); } for (i = 0; i < message->numTiles; i++) { if (!(tile = message->tiles[i])) continue; tile->YLen = tile->CbLen = tile->CrLen = 0; tile->YData = tile->CbData = tile->CrData = NULL; } return rc; }
static BOOL rfx_process_message_tileset(RFX_CONTEXT* context, RFX_MESSAGE* message, STREAM* s) { int i; int pos; BYTE quant; UINT32* quants; UINT16 subtype; UINT32 blockLen; UINT32 blockType; UINT32 tilesDataSize; PTP_WORK* work_objects = NULL; RFX_TILE_WORK_PARAM* params = NULL; if (stream_get_left(s) < 14) { DEBUG_WARN("RfxMessageTileSet packet too small"); return FALSE; } stream_read_UINT16(s, subtype); /* subtype (2 bytes) must be set to CBT_TILESET (0xCAC2) */ if (subtype != CBT_TILESET) { DEBUG_WARN("invalid subtype, expected CBT_TILESET."); return FALSE; } stream_seek_UINT16(s); /* idx (2 bytes), must be set to 0x0000 */ stream_seek_UINT16(s); /* properties (2 bytes) */ stream_read_BYTE(s, context->num_quants); /* numQuant (1 byte) */ stream_seek_BYTE(s); /* tileSize (1 byte), must be set to 0x40 */ if (context->num_quants < 1) { DEBUG_WARN("no quantization value."); return TRUE; } stream_read_UINT16(s, message->num_tiles); /* numTiles (2 bytes) */ if (message->num_tiles < 1) { DEBUG_WARN("no tiles."); return TRUE; } stream_read_UINT32(s, tilesDataSize); /* tilesDataSize (4 bytes) */ if (context->quants != NULL) context->quants = (UINT32*) realloc((void*) context->quants, context->num_quants * 10 * sizeof(UINT32)); else context->quants = (UINT32*) malloc(context->num_quants * 10 * sizeof(UINT32)); quants = context->quants; /* quantVals */ if (stream_get_left(s) < context->num_quants * 5) { DEBUG_WARN("RfxMessageTileSet packet too small for num_quants=%d", context->num_quants); return FALSE; } for (i = 0; i < context->num_quants; i++) { /* RFX_CODEC_QUANT */ stream_read_BYTE(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); stream_read_BYTE(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); stream_read_BYTE(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); stream_read_BYTE(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); stream_read_BYTE(s, quant); *quants++ = (quant & 0x0F); *quants++ = (quant >> 4); DEBUG_RFX("quant %d (%d %d %d %d %d %d %d %d %d %d).", i, context->quants[i * 10], context->quants[i * 10 + 1], context->quants[i * 10 + 2], context->quants[i * 10 + 3], context->quants[i * 10 + 4], context->quants[i * 10 + 5], context->quants[i * 10 + 6], context->quants[i * 10 + 7], context->quants[i * 10 + 8], context->quants[i * 10 + 9]); } message->tiles = (RFX_TILE**) malloc(sizeof(RFX_TILE*) * message->num_tiles); ZeroMemory(message->tiles, sizeof(RFX_TILE*) * message->num_tiles); if (context->priv->UseThreads) { work_objects = (PTP_WORK*) malloc(sizeof(PTP_WORK) * message->num_tiles); params = (RFX_TILE_WORK_PARAM*) malloc(sizeof(RFX_TILE_WORK_PARAM) * message->num_tiles); } /* tiles */ for (i = 0; i < message->num_tiles; i++) { /* RFX_TILE */ if (stream_get_left(s) < 6) { DEBUG_WARN("RfxMessageTileSet packet too small to read tile %d/%d", i, message->num_tiles); return FALSE; } stream_read_UINT16(s, blockType); /* blockType (2 bytes), must be set to CBT_TILE (0xCAC3) */ stream_read_UINT32(s, blockLen); /* blockLen (4 bytes) */ if (stream_get_left(s) < blockLen - 6) { DEBUG_WARN("RfxMessageTileSet not enough bytes to read tile %d/%d with blocklen=%d", i, message->num_tiles, blockLen); return FALSE; } pos = stream_get_pos(s) - 6 + blockLen; if (blockType != CBT_TILE) { DEBUG_WARN("unknown block type 0x%X, expected CBT_TILE (0xCAC3).", blockType); break; } message->tiles[i] = rfx_tile_pool_take(context); if (context->priv->UseThreads) { params[i].context = context; params[i].tile = message->tiles[i]; CopyMemory(&(params[i].s), s, sizeof(STREAM)); work_objects[i] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_process_message_tile_work_callback, (void*) ¶ms[i], &context->priv->ThreadPoolEnv); SubmitThreadpoolWork(work_objects[i]); } else { rfx_process_message_tile(context, message->tiles[i], s); } stream_set_pos(s, pos); } if (context->priv->UseThreads) { for (i = 0; i < message->num_tiles; i++) WaitForThreadpoolWorkCallbacks(work_objects[i], FALSE); free(work_objects); free(params); } return TRUE; }
/* stride is bytes between rows in the output buffer. */ BOOL rfx_decode_rgb(RFX_CONTEXT* context, wStream* data_in, int y_size, const UINT32* y_quants, int cb_size, const UINT32* cb_quants, int cr_size, const UINT32* cr_quants, BYTE* rgb_buffer, int stride) { INT16* pSrcDst[3]; static const prim_size_t roi_64x64 = { 64, 64 }; const primitives_t *prims = primitives_get(); PROFILER_ENTER(context->priv->prof_rfx_decode_rgb); pSrcDst[0] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* y_r_buffer */ pSrcDst[1] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cb_g_buffer */ pSrcDst[2] = (INT16*)((BYTE*)BufferPool_Take(context->priv->BufferPool, -1) + 16); /* cr_b_buffer */ #if 0 if (context->priv->UseThreads) { PTP_WORK work_objects[3]; RFX_COMPONENT_WORK_PARAM params[3]; params[0].context = context; params[0].quantization_values = y_quants; params[0].buffer = stream_get_tail(data_in); params[0].capacity = y_size; params[0].buffer = pSrcDst[0]; stream_seek(data_in, y_size); params[1].context = context; params[1].quantization_values = cb_quants; params[1].buffer = stream_get_tail(data_in); params[1].capacity = cb_size; params[1].buffer = pSrcDst[1]; stream_seek(data_in, cb_size); params[2].context = context; params[2].quantization_values = cr_quants; params[2].buffer = stream_get_tail(data_in); params[2].capacity = cr_size; params[2].buffer = pSrcDst[2]; stream_seek(data_in, cr_size); work_objects[0] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback, (void*) ¶ms[0], &context->priv->ThreadPoolEnv); work_objects[1] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback, (void*) ¶ms[1], &context->priv->ThreadPoolEnv); work_objects[2] = CreateThreadpoolWork((PTP_WORK_CALLBACK) rfx_decode_component_work_callback, (void*) ¶ms[2], &context->priv->ThreadPoolEnv); SubmitThreadpoolWork(work_objects[0]); SubmitThreadpoolWork(work_objects[1]); SubmitThreadpoolWork(work_objects[2]); WaitForThreadpoolWorkCallbacks(work_objects[0], FALSE); WaitForThreadpoolWorkCallbacks(work_objects[1], FALSE); WaitForThreadpoolWorkCallbacks(work_objects[2], FALSE); } else #endif { if (stream_get_left(data_in) < y_size+cb_size+cr_size) { DEBUG_WARN("rfx_decode_rgb: packet too small for y_size+cb_size+cr_size"); return FALSE; } rfx_decode_component(context, y_quants, stream_get_tail(data_in), y_size, pSrcDst[0]); /* YData */ stream_seek(data_in, y_size); rfx_decode_component(context, cb_quants, stream_get_tail(data_in), cb_size, pSrcDst[1]); /* CbData */ stream_seek(data_in, cb_size); rfx_decode_component(context, cr_quants, stream_get_tail(data_in), cr_size, pSrcDst[2]); /* CrData */ stream_seek(data_in, cr_size); } prims->yCbCrToRGB_16s16s_P3P3((const INT16**) pSrcDst, 64 * sizeof(INT16), pSrcDst, 64 * sizeof(INT16), &roi_64x64); PROFILER_ENTER(context->priv->prof_rfx_decode_format_rgb); rfx_decode_format_rgb(pSrcDst[0], pSrcDst[1], pSrcDst[2], context->pixel_format, rgb_buffer, stride); PROFILER_EXIT(context->priv->prof_rfx_decode_format_rgb); PROFILER_EXIT(context->priv->prof_rfx_decode_rgb); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[0] - 16); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[1] - 16); BufferPool_Return(context->priv->BufferPool, (BYTE*)pSrcDst[2] - 16); return TRUE; }