static void release_clobj(void) { cl_int ret_code; ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_partial_hashes, calculated_hash, 0, NULL, NULL); HANDLE_CLERROR(ret_code, "Error Ummapping out_hashes"); ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_keys, plaintext, 0, NULL, NULL); HANDLE_CLERROR(ret_code, "Error Ummapping saved_plain"); ret_code = clReleaseMemObject(salt_buffer); HANDLE_CLERROR(ret_code, "Error Releasing data_info"); ret_code = clReleaseMemObject(pass_buffer); HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys"); ret_code = clReleaseMemObject(hash_buffer); HANDLE_CLERROR(ret_code, "Error Releasing buffer_out"); ret_code = clReleaseMemObject(work_buffer); HANDLE_CLERROR(ret_code, "Error Releasing work_out"); ret_code = clReleaseMemObject(pinned_saved_keys); HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys"); ret_code = clReleaseMemObject(pinned_partial_hashes); HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes"); }
static void release_clobj(void) { HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_aes_key, aes_key, 0, NULL, NULL), "Error Unmapping aes_key"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_aes_iv, aes_iv, 0, NULL, NULL), "Error Unmapping aes_iv"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_saved_key, saved_key, 0, NULL, NULL), "Error Unmapping saved_key"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_saved_len, saved_len, 0, NULL, NULL), "Error Unmapping saved_len"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_salt, saved_salt, 0, NULL, NULL), "Error Unmapping saved_salt"); aes_key = NULL; aes_iv = NULL; saved_key = NULL; saved_len = NULL; saved_salt = NULL; }
void allochostptr_roundtrip_func() { timer.Start(timer_id); cl_int err; // Create buffers with CL_MEM_ALLOC_HOST_PTR for zero copy buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, (buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof(T), NULL, &err); buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), NULL, &err); buffer_.buf_c_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, (buffer_.ldc_ * buffer_.c_num_vectors_ + buffer_.offC_) * sizeof(T), NULL, &err); // map the buffers to pointers at host device T *map_a,*map_b,*map_c; map_a = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_a_, CL_TRUE, CL_MAP_WRITE, 0, (buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof(T), 0, NULL, NULL, &err); map_b = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_b_, CL_TRUE, CL_MAP_WRITE, 0, (buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), 0, NULL, NULL, &err); map_c = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_c_, CL_TRUE, CL_MAP_WRITE, 0, (buffer_.lda_*buffer_.c_num_vectors_ + buffer_.offC_) * sizeof(T), 0, NULL, NULL, &err); // memcpy the input A, B, C to the host pointers memcpy( map_a, buffer_.a_, ( buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof( T ) ); memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) ); memcpy( map_c, buffer_.c_, ( buffer_.ldc_*buffer_.c_num_vectors_ + buffer_.offC_) * sizeof( T ) ); // unmap the buffers clEnqueueUnmapMemObject(queues_[0], buffer_.buf_a_, map_a, 0, NULL, NULL); clEnqueueUnmapMemObject(queues_[0], buffer_.buf_b_, map_b, 0, NULL, NULL); clEnqueueUnmapMemObject(queues_[0], buffer_.buf_c_, map_c, 0, NULL, NULL); // calling clBLAS xGemm_Function(false); // map the C buffer again to read output map_c = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_c_, CL_TRUE, CL_MAP_READ, 0, (buffer_.lda_*buffer_.c_num_vectors_ + buffer_.offC_) * sizeof(T), 0, NULL, NULL, &err); memcpy( map_c, buffer_.c_, ( buffer_.ldc_*buffer_.c_num_vectors_ + buffer_.offC_) * sizeof( T ) ); clEnqueueUnmapMemObject(queues_[0], buffer_.buf_c_, map_c, 0, NULL, &event_); clWaitForEvents(1, &event_); timer.Stop(timer_id); }
static void release_clobj(void){ HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_partial_hashes, partial_hashes, 0,NULL,NULL), "Error Unmapping partial_hashes"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_keys, saved_plain, 0, NULL, NULL), "Error Unmapping saved_plain"); HANDLE_CLERROR(clReleaseMemObject(buffer_keys), "Error Releasing buffer_keys"); HANDLE_CLERROR(clReleaseMemObject(buffer_out), "Error Releasing buffer_out"); HANDLE_CLERROR(clReleaseMemObject(pinned_saved_keys), "Error Releasing pinned_saved_keys"); HANDLE_CLERROR(clReleaseMemObject(pinned_partial_hashes), "Error Releasing pinned_partial_hashes"); MEM_FREE(res_hashes); }
vector<cpx> cl_fft<cpx>::run(const vector<cpx> &input) { cl_event upload_unmap_evt, start_evt, download_map_evt, *kernel_evts = new cl_event[launches.size()]; cl_int err; // Upload cl_float2 *input_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue, v_samples, CL_TRUE, CL_MAP_WRITE, 0, samplesMemSize, 0, NULL, NULL, &err); CL_CHECK_ERR("clEnqueueMapBuffer", err); for (int i = 0; i < samplesPerRun; i++) { input_buffer[i].x = real(input[i]); input_buffer[i].y = imag(input[i]); } CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_samples, input_buffer, 0, NULL, &upload_unmap_evt)); // Calcola la FFT cl_mem v_out = runInternal(v_samples, &start_evt, kernel_evts); // Download vector<cpx> result(samplesPerRun); cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue, v_out, CL_TRUE, CL_MAP_READ, 0, tmpMemSize, 1, &kernel_evts[launches.size() - 1], &download_map_evt, &err); CL_CHECK_ERR("clEnqueueMapBuffer", err); for (int i = 0; i < samplesPerRun; i++) result[i] = cpx(output_buffer[i].x, output_buffer[i].y); CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_out, output_buffer, 0, NULL, NULL)); printStatsAndReleaseEvents(upload_unmap_evt, start_evt, kernel_evts, download_map_evt); delete[] kernel_evts; return result; }
void init() { int i, j; double t_start_init, t_end_init; #ifndef ALOCACAO_NORMAL t_start_init = rtclock(); cl_float* x1_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, x1_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode); cl_float* x2_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, x2_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode); cl_float* y_1_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, y1_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode); cl_float* y_2_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, y2_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode); cl_float* a_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, a_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N * N, 0, NULL, NULL, &errcode); t_end_init = rtclock(); tmp_clEnqueueMapBuffer += t_end_init - t_start_init; #else DATA_TYPE *y_1_input = y_1; DATA_TYPE *y_2_input = y_2; DATA_TYPE *a_input = a; #endif t_start_init = rtclock(); for (i=0; i<N; i++) { x1[i] = 0.0; x2[i] = 0.0; #ifndef ALOCACAO_NORMAL x1_input[i] = 0.0; x2_input[i] = 0.0; #endif y_1_input[i] = 0.0; y_2_input[i] = 0.0; for (j=0; j<N; j++) { a_input[i*N + j] = (DATA_TYPE)(i+j+1.0)/N; } } t_end_init = rtclock(); tmp_init += t_end_init - t_start_init; #ifndef ALOCACAO_NORMAL t_start_init = rtclock(); clEnqueueUnmapMemObject(clCommandQue, x1_mem_obj, x1_input, 0, NULL, NULL); clEnqueueUnmapMemObject(clCommandQue, x2_mem_obj, x2_input, 0, NULL, NULL); clEnqueueUnmapMemObject(clCommandQue, y1_mem_obj, y_1_input, 0, NULL, NULL); clEnqueueUnmapMemObject(clCommandQue, y2_mem_obj, y_2_input, 0, NULL, NULL); clEnqueueUnmapMemObject(clCommandQue, a_mem_obj, a_input, 0, NULL, NULL); t_end_init = rtclock(); tmp_clEnqueueUnmapMemObject += t_end_init - t_start_init; #endif }
void QCLVectorBase::unmap() const { if (m_mapped) { #ifndef QT_CL_COPY_VECTOR cl_int error = clEnqueueUnmapMemObject (d_ptr->context->activeQueue(), d_ptr->id, m_mapped, 0, 0, 0); d_ptr->context->reportError("QCLVector<T>::unmap:", error); #else // Write the local copy back to the OpenCL device. if (d_ptr->hostCopy && d_ptr->state == State_InHost) { cl_int error = clEnqueueWriteBuffer (d_ptr->context->activeQueue(), d_ptr->id, CL_FALSE, 0, m_size * m_elemSize, d_ptr->hostCopy, 0, 0, 0); d_ptr->context->reportError("QCLVector<T>::unmap(write):", error); } d_ptr->state = State_InKernel; #endif m_mapped = 0; // Update all of the other owners with the unmap state. if (d_ptr->owners.size() > 1) { QList<QCLVectorBase *>::Iterator it; for (it = d_ptr->owners.begin(); it != d_ptr->owners.end(); ++it) { if (*it != this) (*it)->m_mapped = 0; } } } }
void timedBufUnmap( cl_command_queue queue, cl_mem buf, void **ptr, bool quiet ) { CPerfCounter t1; cl_int ret; cl_event ev; t1.Reset(); t1.Start(); ret = clEnqueueUnmapMemObject( queue, buf, (void *) *ptr, 0, NULL, &ev ); ASSERT_CL_RETURN( ret ); clFlush( queue ); spinForEventsComplete( 1, &ev ); t1.Stop(); if( !quiet ) tlog->Timer( "%32s %lf s [ %8.2lf GB/s ]\n", "clEnqueueUnmapMemObject():", t1.GetElapsedTime(), nBytes, 1 ); }
const std::vector<cl_uint> AgentMover::getAgentCounts() const { cl_int status; cl_uint* buffer = static_cast<cl_uint*> ( clEnqueueMapBuffer(context->queue, agentCounts, CL_TRUE, CL_MAP_READ, 0, sizeof(cl_uint) * 8, 0, NULL, NULL, &status) ); CL_ERROR(status); std::vector<cl_uint> agentCountsVector(8); cl_uint i = 0; std::vector<cl_uint>::iterator iterator; for (iterator = agentCountsVector.begin(); iterator != agentCountsVector.end(); ++iterator) { *iterator = buffer[i]; i++; } clEnqueueUnmapMemObject(context->queue, agentCounts, buffer, 0, NULL, NULL); return agentCountsVector; }
void* Alg::eval(std::vector<uint8_t>* pic, std::vector<uint8_t>* picprev){ uint8_t* t = reinterpret_cast<uint8_t*>(pic->data()); uint8_t* tp = reinterpret_cast<uint8_t*>(picprev->data()); // Copy frame to the device float* result = (float*)clEnqueueMapBuffer(queue, clm_res, CL_FALSE, CL_MAP_READ, 0, 6*sizeof(cl_float), 0, NULL, NULL, &status); for (int i = 0; i < 6; i++) { result[i] = 0; } // Copy frame to the device status = clEnqueueWriteBuffer(queue, clm_pic, CL_FALSE, 0, cols * rows * sizeof(cl_uchar), t, 0, NULL, NULL); status = clEnqueueWriteBuffer(queue, clm_picprev, CL_FALSE, 0, cols * rows * sizeof(cl_uchar), tp, 0, NULL, NULL); status = clEnqueueWriteBuffer(queue, clm_res, CL_FALSE, 0, 6 * sizeof(float), result, 0, NULL, NULL); // Execute the OpenCL kernel on the list status = clEnqueueNDRangeKernel(queue, kern, 1, NULL, &global_threads, NULL, 0, NULL, NULL); status = clEnqueueReadBuffer(queue, clm_res, CL_FALSE, 0, 6*sizeof(float), result, 0, NULL, NULL); // Clean up and wait for all the comands to complete. status = clFlush(queue); status = clFinish(queue); for (int i = 0; i < 2; i++) { percentage[i] = result[i]*100.0 / (cols*rows); } for (int i = 0; i < 2; i++) { percentage[i + 2] = result[i + 2] / (cols*rows); } if (result[5]) percentage[4] = result[4] / result[5]; else percentage[4] = result[4] / 4; clEnqueueUnmapMemObject(queue, clm_res, result, 0, NULL, NULL); return percentage; }
void AgentMover::setAgentCounts(const std::vector<cl_uint>& agentCountsVector) { cl_int status; cl_uint* buffer = static_cast<cl_uint*> ( clEnqueueMapBuffer(context->queue, agentCounts, CL_TRUE, CL_MAP_WRITE, 0, sizeof(cl_uint) * 8, 0, NULL, NULL, &status) ); CL_ERROR(status); cl_uint i = 0; std::vector<cl_uint>::const_iterator iterator; for (iterator = agentCountsVector.begin(); iterator != agentCountsVector.end(); ++iterator) { buffer[i] = *iterator; i++; } clEnqueueUnmapMemObject(context->queue, agentCounts, buffer, 0, NULL, NULL); }
void mclUnmap(mclContext ctx, mclDeviceData dd, void* ptr) { logOclCall("clEnqueueUnmapMemObject"); cl_int ret; ret = clEnqueueUnmapMemObject(ctx.command_queue, dd.data, ptr, 0, 0, 0); MCL_VALIDATE(ret, "mclUnmap: Error unmapping data"); }
static void release_clobj(void) { HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_result, cracked, 0, NULL, NULL), "Error Unmapping cracked"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_key, saved_key, 0, NULL, NULL), "Error Unmapping saved_key"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_idx, saved_idx, 0, NULL, NULL), "Error Unmapping saved_idx"); HANDLE_CLERROR(clFinish(queue[gpu_id]), "Error releasing memory mappings"); HANDLE_CLERROR(clReleaseMemObject(pinned_result), "Release pinned result buffer"); HANDLE_CLERROR(clReleaseMemObject(pinned_key), "Release pinned key buffer"); HANDLE_CLERROR(clReleaseMemObject(pinned_idx), "Release pinned index buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_salt), "Release salt buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_result), "Release result buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_saved_key), "Release key buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_saved_idx), "Release index buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_mid_key), "Release state buffer"); }
void* Optzd::eval(std::vector<uint8_t>* t){ cl_float* sh = (cl_float*)clEnqueueMapBuffer(queue, clm_sh, CL_FALSE, CL_MAP_WRITE, 0, 2*sizeof(cl_float), 0, NULL, NULL, &status); uint8_t* pic = reinterpret_cast<uint8_t*>(t->data()); sh[0] = 0; sh[1] = 0; // Copy frame to the device status = clEnqueueWriteBuffer(queue, clm_pic, CL_TRUE, 0, cols * rows * sizeof(cl_uchar), pic, 0, NULL, NULL); status = clEnqueueWriteBuffer(queue, clm_sh, CL_TRUE, 0, 2 * sizeof(cl_float), sh, 0, NULL, NULL); // Execute the OpenCL kernel on the list status = clEnqueueNDRangeKernel(queue, kern, 1, NULL, &global_threads, NULL, 0, NULL, NULL); status = clEnqueueReadBuffer(queue, clm_sh, CL_TRUE, 0, 2*sizeof(float), sh, 0, NULL, NULL); // Clean up and wait for all the comands to complete. status = clFlush(queue); status = clFinish(queue); //eval-ing BS float shb = sh[1], shnb = sh[0]; // for (int i = 0; i < (global_threads / 2); i++) { // shb += sh[i*2]; // shnb += sh[(i*2)+1]; // } clEnqueueUnmapMemObject(queue, clm_sh, sh, 0, NULL, NULL); if (shnb == 0) shnb = 4; *BS = shb/(shnb); return BS; }
bool CL_Image3D::Unmap(const CL_CommandQueue* pCommandQueue, void** ppMappedData, CL_Event* pNewEvent, const CL_EventPool* pWaitList) { CL_CPP_CONDITIONAL_RETURN_FALSE(!m_Image); CL_CPP_CONDITIONAL_RETURN_FALSE(!pCommandQueue); CL_CPP_CONDITIONAL_RETURN_FALSE(!ppMappedData); cl_uint uNumWaitEvents = pWaitList ? pWaitList->GetNumEvents() : 0; const cl_event* pWaitEvents = pWaitList ? pWaitList->GetEventPool() : NULL; cl_event NewEvent = NULL; // Unmap a location in host memory from a buffer object. const cl_command_queue CommandQueue = pCommandQueue->GetCommandQueue(); cl_int iErrorCode = clEnqueueUnmapMemObject(CommandQueue, m_Image, *ppMappedData, uNumWaitEvents, pWaitEvents, &NewEvent); CL_CPP_CATCH_ERROR(iErrorCode); CL_CPP_ON_ERROR_RETURN_FALSE(iErrorCode); (*ppMappedData) = NULL; if(NewEvent) { if(pNewEvent) pNewEvent->SetEvent(NewEvent); clReleaseEvent(NewEvent); } return true; }
void init() { int i, j; double t_start_init, t_end_init; #ifndef ALOCACAO_NORMAL t_start_init = rtclock(); cl_float* data_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, data_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * (M+1) * (N+1), 0, NULL, NULL, &errcode); t_end_init = rtclock(); tmp_clEnqueueMapBuffer += t_end_init - t_start_init; #endif t_start_init = rtclock(); for (i = 0; i < M; i++) { for (j = 0; j < N; j++) { data[i*(N+1) + j] = ((DATA_TYPE) i*j) / M; #ifndef ALOCACAO_NORMAL data_input[i*(N+1) + j] = ((DATA_TYPE) i*j)/ M; #endif } } t_end_init = rtclock(); tmp_init += t_end_init - t_start_init; #ifndef ALOCACAO_NORMAL t_start_init = rtclock(); clEnqueueUnmapMemObject(clCommandQue, data_mem_obj, data_input, 0, NULL, NULL); t_end_init = rtclock(); tmp_clEnqueueUnmapMemObject += t_end_init - t_start_init; #endif }
cl_int GLCLDraw::UnMapTransferBuffer(Uint8 *p) { cl_int ret; if(p == NULL) return CL_INVALID_MEM_OBJECT; ret = clEnqueueUnmapMemObject(command_queue, inbuf[inbuf_bank], p, 0, NULL, &event_uploadvram[1]); return ret; }
void usepersismem_roundtrip_func() { #if defined(CL_MEM_USE_PERSISTENT_MEM_AMD) timer.Start(timer_id); //set up buffer cl_int err; buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_USE_PERSISTENT_MEM_AMD, (buffer_.lda_ * buffer_.a_num_vectors_ + buffer_.offA_) * sizeof(T), NULL, &err); buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_USE_PERSISTENT_MEM_AMD, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), NULL, &err); // Map the buffers to pointers at host device T *map_a,*map_b; map_a = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_a_, CL_TRUE, CL_MAP_WRITE, 0, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), 0, NULL, NULL, &err); map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_WRITE, 0, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), 0, NULL, NULL, &err); // memcpy the input A, B to the mapped regions memcpy( map_a, buffer_.a_, ( buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof( T ) ); memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) ); // unmap the buffers clEnqueueUnmapMemObject(queue_, buffer_.buf_a_, map_a, 0, NULL, NULL); clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL); //call func xTrsm_Function(false); // map the B buffer again to read the output map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_READ, 0, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), 0, NULL, NULL, &err); memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) ); clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL); clWaitForEvents(1, &event_); timer.Stop(timer_id); #else std::cout<<"CL_MEM_USE_PERSISTENT_MEM_AMD is only supported on AMD hardware"<<std::endl; #endif }
cl_int WINAPI wine_clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr, cl_uint num_events_in_wait_list, cl_event * event_wait_list, cl_event * event) { cl_int ret; TRACE("\n"); ret = clEnqueueUnmapMemObject(command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event); return ret; }
// Function to clean up and exit //***************************************************************************** void Cleanup(int iExitCode) { // Cleanup allocated objects shrLog("\nStarting Cleanup...\n\n"); // Release all the OpenCL Objects if(cpProgram)clReleaseProgram(cpProgram); for (cl_uint i = 0; i < GpuDevMngr->uiUsefulDevCt; i++) { if(ckSobel[i])clReleaseKernel(ckSobel[i]); if(cmDevBufIn[i])clReleaseMemObject(cmDevBufIn[i]); if(cmDevBufOut[i])clReleaseMemObject(cmDevBufOut[i]); } if(uiInput)clEnqueueUnmapMemObject(cqCommandQueue[0], cmPinnedBufIn, (void*)uiInput, 0, NULL, NULL); if(uiOutput)clEnqueueUnmapMemObject(cqCommandQueue[0], cmPinnedBufOut, (void*)uiOutput, 0, NULL, NULL); if(cmPinnedBufIn)clReleaseMemObject(cmPinnedBufIn); if(cmPinnedBufOut)clReleaseMemObject(cmPinnedBufOut); for (cl_uint i = 0; i < GpuDevMngr->uiUsefulDevCt; i++) { if(cqCommandQueue[i])clReleaseCommandQueue(cqCommandQueue[i]); } if(cxGPUContext)clReleaseContext(cxGPUContext); // free the host allocs if(cSourceCL)free(cSourceCL); if(cPathAndName)free(cPathAndName); if(cmDevBufIn) delete [] cmDevBufIn; if(cmDevBufOut) delete [] cmDevBufOut; if(szAllocDevBytes) delete [] szAllocDevBytes; if(uiInHostPixOffsets) delete [] uiInHostPixOffsets; if(uiOutHostPixOffsets) delete [] uiOutHostPixOffsets; if(uiDevImageHeight) delete [] uiDevImageHeight; if(GpuDevMngr) delete GpuDevMngr; if(cqCommandQueue) delete [] cqCommandQueue; // Cleanup GL objects if used if (!bQATest) { DeInitGL(); } shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", cExecutableName); shrQAFinishExit2(bQATest, *pArgc, (const char **)pArgv, ( iExitCode == EXIT_SUCCESS ) ? QA_PASSED : QA_FAILED); }
static void release_clobj(void){ cl_int ret_code; ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pin_part_msha_hashes, par_msha_hashes, 0,NULL,NULL); HANDLE_CLERROR(ret_code, "Error Ummapping par_msha_hashes"); ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_msha_keys, mysqlsha_plain, 0, NULL, NULL); HANDLE_CLERROR(ret_code, "Error Ummapping mysqlsha_plain"); ret_code = clReleaseMemObject(buf_msha_keys); HANDLE_CLERROR(ret_code, "Error Releasing buf_msha_keys"); ret_code = clReleaseMemObject(buf_msha_out); HANDLE_CLERROR(ret_code, "Error Releasing buf_msha_out"); ret_code = clReleaseMemObject(data_info); HANDLE_CLERROR(ret_code, "Error Releasing data_info"); ret_code = clReleaseMemObject(pinned_msha_keys); HANDLE_CLERROR(ret_code, "Error Releasing pinned_msha_keys"); ret_code = clReleaseMemObject(pin_part_msha_hashes); HANDLE_CLERROR(ret_code, "Error Releasing pin_part_msha_hashes"); free(res_hashes); }
static void release_clobj(void){ cl_int ret_code; ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_partial_hashes, partial_hashes, 0,NULL,NULL); HANDLE_CLERROR(ret_code, "Error Ummapping partial_hashes"); ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_keys, saved_plain, 0, NULL, NULL); HANDLE_CLERROR(ret_code, "Error Ummapping saved_plain"); ret_code = clReleaseMemObject(buffer_keys); HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys"); ret_code = clReleaseMemObject(buffer_out); HANDLE_CLERROR(ret_code, "Error Releasing buffer_out"); ret_code = clReleaseMemObject(data_info); HANDLE_CLERROR(ret_code, "Error Releasing data_info"); ret_code = clReleaseMemObject(pinned_saved_keys); HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys"); ret_code = clReleaseMemObject(pinned_partial_hashes); HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes"); free(res_hashes); }
void allochostptr_roundtrip_func() { timer.Start(timer_id); //set up buffer cl_int err; buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, (buffer_.lda_ * buffer_.a_num_vectors_ + buffer_.offA_) * sizeof(T), NULL, &err); buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), NULL, &err); // Map the buffers to pointers at host device T *map_a,*map_b; map_a = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_a_, CL_TRUE, CL_MAP_WRITE, 0, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), 0, NULL, NULL, &err); map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_WRITE, 0, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), 0, NULL, NULL, &err); // memcpy the input A, B to the mapped regions memcpy( map_a, buffer_.a_, ( buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof( T ) ); memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) ); // unmap the buffers clEnqueueUnmapMemObject(queue_, buffer_.buf_a_, map_a, 0, NULL, NULL); clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL); //call func xTrsm_Function(false); // map the B buffer again to read the output map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_READ, 0, (buffer_.ldb_ * buffer_.b_num_vectors_ + buffer_.offB_) * sizeof(T), 0, NULL, NULL, &err); memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) ); clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL); clWaitForEvents(1, &event_); timer.Stop(timer_id); }
static void release_clobj(void) { HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_key, key, 0, NULL, NULL), "Error Unmapping key"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_key, saved_key, 0, NULL, NULL), "Error Unmapping saved_key"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_len, saved_len, 0, NULL, NULL), "Error Unmapping saved_len"); HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_salt, saved_salt, 0, NULL, NULL), "Error Unmapping saved_salt"); HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "Error releasing memory mappings"); HANDLE_CLERROR(clReleaseMemObject(pinned_key), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(pinned_saved_key), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(pinned_saved_len), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(pinned_salt), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_key), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_saved_key), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_saved_len), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_salt), "Release GPU buffer"); HANDLE_CLERROR(clReleaseMemObject(cl_pwhash), "Release GPU buffer"); MEM_FREE(cracked); }
// buffer unmap (asynchronous) void HGPU_GPU_buffer_unmap_async(HGPU_GPU_buffer* buffer,cl_command_queue queue,void* data_ptr){ if (!buffer){ HGPU_GPU_error_note(HGPU_ERROR_BAD_BUFFER,"Trying to use null-buffer!"); return; } cl_event buffer_event = NULL; HGPU_GPU_error_message(clEnqueueUnmapMemObject(queue,buffer->buffer,data_ptr,0,NULL,&buffer_event),"clEnqueueUnmapMemObject failed"); buffer->buffer_write_event = buffer_event; buffer->buffer_write_number_of++; }
/*! \param mem cl_mem object \param ptr A host pointer that points to the mapped region */ void cl_unmapBuffer(cl_mem mem, void *ptr) { // TODO It looks like AMD doesn't support profiling unmapping yet. Leaving the // commented code here until it's supported cl_int status; status = clEnqueueUnmapMemObject(commandQueue, mem, ptr, 0, NULL, NULL); cl_errChk(status, "Error unmapping a buffer or image", true); }
static void release_clobj(void){ cl_int ret_code; ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_partial_hashes, outbuffer, 0,NULL,NULL); HANDLE_CLERROR(ret_code, "Error Ummapping outbuffer"); ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_keys, inbuffer, 0, NULL, NULL); HANDLE_CLERROR(ret_code, "Error Ummapping inbuffer"); ret_code = clReleaseMemObject(buffer_keys); HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys"); ret_code = clReleaseMemObject(buffer_out); HANDLE_CLERROR(ret_code, "Error Releasing buffer_out"); ret_code = clReleaseMemObject(data_info); HANDLE_CLERROR(ret_code, "Error Releasing data_info"); ret_code = clReleaseMemObject(mysalt); HANDLE_CLERROR(ret_code, "Error Releasing mysalt"); ret_code = clReleaseMemObject(pinned_saved_keys); HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys"); ret_code = clReleaseMemObject(pinned_partial_hashes); HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes"); free(outbuffer2); }
cl_int clEnqueueUnmapMemObject_test(cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr, cl_uint num_events_in_wait_list, const cl_event * event_wait_list, cl_event * event) { printf("clEnqueueUnmapMemObject_test: memobj==%p ptr==%p event==%p\n", memobj, mapped_ptr, event); return clEnqueueUnmapMemObject(command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event); }
int ComputeBench::unmapBuffer(cl_mem deviceBuffer, void* hostPointer) { cl_int status; status = clEnqueueUnmapMemObject(commandQueue, deviceBuffer, hostPointer, 0, NULL, NULL); CHECK_OPENCL_ERROR(status, "clEnqueueUnmapMemObject failed"); return SDK_SUCCESS; }
int GLCLDraw::ReleasePixelBuffer(Uint32 *p) { #if 0 return 0; #else int ret; if(p == NULL) return 0; // clFlush(command_queue); ret |= clEnqueueUnmapMemObject(command_queue, outbuf, p, 1, &event_release, NULL); clFinish(command_queue); return ret; #endif }