static void release_clobj(void) {
    cl_int ret_code;

    ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_partial_hashes,
            calculated_hash, 0, NULL, NULL);
    HANDLE_CLERROR(ret_code, "Error Ummapping out_hashes");

    ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_keys,
            plaintext, 0, NULL, NULL);
    HANDLE_CLERROR(ret_code, "Error Ummapping saved_plain");

    ret_code = clReleaseMemObject(salt_buffer);
    HANDLE_CLERROR(ret_code, "Error Releasing data_info");
    ret_code = clReleaseMemObject(pass_buffer);
    HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys");
    ret_code = clReleaseMemObject(hash_buffer);
    HANDLE_CLERROR(ret_code, "Error Releasing buffer_out");
    ret_code = clReleaseMemObject(work_buffer);
    HANDLE_CLERROR(ret_code, "Error Releasing work_out");

    ret_code = clReleaseMemObject(pinned_saved_keys);
    HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys");

    ret_code = clReleaseMemObject(pinned_partial_hashes);
    HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes");
}
static void release_clobj(void)
{
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_aes_key, aes_key, 0, NULL, NULL), "Error Unmapping aes_key");
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_aes_iv, aes_iv, 0, NULL, NULL), "Error Unmapping aes_iv");
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_saved_key, saved_key, 0, NULL, NULL), "Error Unmapping saved_key");
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_saved_len, saved_len, 0, NULL, NULL), "Error Unmapping saved_len");
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], cl_salt, saved_salt, 0, NULL, NULL), "Error Unmapping saved_salt");
	aes_key = NULL; aes_iv = NULL; saved_key = NULL; saved_len = NULL; saved_salt = NULL;
}
	void allochostptr_roundtrip_func()
	{
	timer.Start(timer_id);

		cl_int err;
		// Create buffers with CL_MEM_ALLOC_HOST_PTR for zero copy
        buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
                                       (buffer_.lda_*buffer_.a_num_vectors_ +
                                           buffer_.offA_) * sizeof(T),
                                       NULL, &err);

        buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
                                        (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                            buffer_.offB_) * sizeof(T),
                                        NULL, &err);

        buffer_.buf_c_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
                                        (buffer_.ldc_ * buffer_.c_num_vectors_ +
                                            buffer_.offC_) * sizeof(T),
                                        NULL, &err);

		// map the buffers to pointers at host device
		T *map_a,*map_b,*map_c;
		map_a = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_a_, CL_TRUE, CL_MAP_WRITE, 0, 
										  (buffer_.lda_*buffer_.a_num_vectors_ +
                                           buffer_.offA_) * sizeof(T),
										   0, NULL, NULL, &err);
		map_b = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_b_, CL_TRUE, CL_MAP_WRITE, 0, 
										  (buffer_.ldb_*buffer_.b_num_vectors_ +
                                           buffer_.offB_) * sizeof(T),
										   0, NULL, NULL, &err);
	    map_c = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_c_, CL_TRUE, CL_MAP_WRITE, 0, 
										  (buffer_.lda_*buffer_.c_num_vectors_ +
                                           buffer_.offC_) * sizeof(T),
										   0, NULL, NULL, &err);
		// memcpy the input A, B, C to the host pointers
		memcpy( map_a, buffer_.a_, ( buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof( T ) );
		memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) );
		memcpy( map_c, buffer_.c_, ( buffer_.ldc_*buffer_.c_num_vectors_ + buffer_.offC_) * sizeof( T ) );
		// unmap the buffers
		clEnqueueUnmapMemObject(queues_[0], buffer_.buf_a_, map_a, 0, NULL, NULL);
		clEnqueueUnmapMemObject(queues_[0], buffer_.buf_b_, map_b, 0, NULL, NULL);
		clEnqueueUnmapMemObject(queues_[0], buffer_.buf_c_, map_c, 0, NULL, NULL);
		// calling clBLAS
		xGemm_Function(false);
		// map the C buffer again to read output
	    map_c = (T*)clEnqueueMapBuffer(queues_[0], buffer_.buf_c_, CL_TRUE, CL_MAP_READ, 0, 
										  (buffer_.lda_*buffer_.c_num_vectors_ +
                                           buffer_.offC_) * sizeof(T),
										   0, NULL, NULL, &err);
		memcpy( map_c, buffer_.c_, ( buffer_.ldc_*buffer_.c_num_vectors_ + buffer_.offC_) * sizeof( T ) );
		clEnqueueUnmapMemObject(queues_[0], buffer_.buf_c_, map_c, 0, NULL, &event_);
		clWaitForEvents(1, &event_);

	timer.Stop(timer_id);
	}
static void release_clobj(void){
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_partial_hashes, partial_hashes, 0,NULL,NULL), "Error Unmapping partial_hashes");
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_keys, saved_plain, 0, NULL, NULL), "Error Unmapping saved_plain");

	HANDLE_CLERROR(clReleaseMemObject(buffer_keys), "Error Releasing buffer_keys");
	HANDLE_CLERROR(clReleaseMemObject(buffer_out), "Error Releasing buffer_out");
	HANDLE_CLERROR(clReleaseMemObject(pinned_saved_keys), "Error Releasing pinned_saved_keys");
	HANDLE_CLERROR(clReleaseMemObject(pinned_partial_hashes), "Error Releasing pinned_partial_hashes");
	MEM_FREE(res_hashes);
}
Exemple #5
0
vector<cpx> cl_fft<cpx>::run(const vector<cpx> &input)
{
	cl_event upload_unmap_evt, start_evt, download_map_evt, *kernel_evts = new cl_event[launches.size()];
	cl_int err;

	// Upload
	cl_float2 *input_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue,
		v_samples,
		CL_TRUE,
		CL_MAP_WRITE,
		0,
		samplesMemSize,
		0,
		NULL,
		NULL,
		&err);
	CL_CHECK_ERR("clEnqueueMapBuffer", err);

	for (int i = 0; i < samplesPerRun; i++)
	{
		input_buffer[i].x = real(input[i]);
		input_buffer[i].y = imag(input[i]);
	}

	CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_samples, input_buffer, 0, NULL, &upload_unmap_evt));

	// Calcola la FFT
	cl_mem v_out = runInternal(v_samples, &start_evt, kernel_evts);

	// Download
	vector<cpx> result(samplesPerRun);
	cl_float2 *output_buffer = (cl_float2*)clEnqueueMapBuffer(command_queue,
		v_out,
		CL_TRUE,
		CL_MAP_READ,
		0,
		tmpMemSize,
		1,
		&kernel_evts[launches.size() - 1],
		&download_map_evt,
		&err);
	CL_CHECK_ERR("clEnqueueMapBuffer", err);

	for (int i = 0; i < samplesPerRun; i++)
		result[i] = cpx(output_buffer[i].x, output_buffer[i].y);

	CL_CHECK_ERR("clEnqueueUnmapMemObject", clEnqueueUnmapMemObject(command_queue, v_out, output_buffer, 0, NULL, NULL));

	printStatsAndReleaseEvents(upload_unmap_evt, start_evt, kernel_evts, download_map_evt);

	delete[] kernel_evts;

	return result;
}
Exemple #6
0
void init()
{
    int i, j;
    double t_start_init, t_end_init;

#ifndef ALOCACAO_NORMAL
    t_start_init = rtclock();	
    cl_float* x1_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, x1_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode);
    cl_float* x2_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, x2_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode);
    cl_float* y_1_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, y1_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode);
    cl_float* y_2_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, y2_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N, 0, NULL, NULL, &errcode);
    cl_float* a_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, a_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * N * N, 0, NULL, NULL, &errcode);
    t_end_init = rtclock();
    tmp_clEnqueueMapBuffer += t_end_init - t_start_init;
#else
    DATA_TYPE *y_1_input = y_1;
    DATA_TYPE *y_2_input = y_2;
    DATA_TYPE *a_input = a;
#endif	

    t_start_init = rtclock();
    for (i=0; i<N; i++) 
    {
        x1[i] = 0.0;
        x2[i] = 0.0;
#ifndef ALOCACAO_NORMAL	
        x1_input[i] = 0.0;
        x2_input[i] = 0.0;
#endif
        y_1_input[i] = 0.0;
        y_2_input[i] = 0.0;

        for (j=0; j<N; j++)
        {
            a_input[i*N + j] = (DATA_TYPE)(i+j+1.0)/N;
        }
    }
    t_end_init = rtclock();
    tmp_init += t_end_init - t_start_init;
#ifndef ALOCACAO_NORMAL
    t_start_init = rtclock();
    clEnqueueUnmapMemObject(clCommandQue, x1_mem_obj, x1_input, 0, NULL, NULL);
    clEnqueueUnmapMemObject(clCommandQue, x2_mem_obj, x2_input, 0, NULL, NULL);
    clEnqueueUnmapMemObject(clCommandQue, y1_mem_obj, y_1_input, 0, NULL, NULL);
    clEnqueueUnmapMemObject(clCommandQue, y2_mem_obj, y_2_input, 0, NULL, NULL);
    clEnqueueUnmapMemObject(clCommandQue, a_mem_obj, a_input, 0, NULL, NULL);
    t_end_init = rtclock();
    tmp_clEnqueueUnmapMemObject += t_end_init - t_start_init; 
#endif
}
Exemple #7
0
void QCLVectorBase::unmap() const
{
    if (m_mapped) {
#ifndef QT_CL_COPY_VECTOR
        cl_int error = clEnqueueUnmapMemObject
            (d_ptr->context->activeQueue(), d_ptr->id, m_mapped, 0, 0, 0);
        d_ptr->context->reportError("QCLVector<T>::unmap:", error);
#else
        // Write the local copy back to the OpenCL device.
        if (d_ptr->hostCopy && d_ptr->state == State_InHost) {
            cl_int error = clEnqueueWriteBuffer
                (d_ptr->context->activeQueue(), d_ptr->id, CL_FALSE,
                 0, m_size * m_elemSize, d_ptr->hostCopy, 0, 0, 0);
            d_ptr->context->reportError("QCLVector<T>::unmap(write):", error);
        }
        d_ptr->state = State_InKernel;
#endif
        m_mapped = 0;

        // Update all of the other owners with the unmap state.
        if (d_ptr->owners.size() > 1) {
            QList<QCLVectorBase *>::Iterator it;
            for (it = d_ptr->owners.begin(); it != d_ptr->owners.end(); ++it) {
                if (*it != this)
                    (*it)->m_mapped = 0;
            }
        }
    }
}
void timedBufUnmap( cl_command_queue queue,
                    cl_mem buf,
                    void **ptr,
                    bool quiet )
{
    CPerfCounter t1;
    cl_int ret;
    cl_event ev;

    t1.Reset();
    t1.Start();

    ret = clEnqueueUnmapMemObject( queue,
                                   buf,
                                   (void *) *ptr,
                                   0, NULL, &ev );
    ASSERT_CL_RETURN( ret );

    clFlush( queue );
    spinForEventsComplete( 1, &ev );

    t1.Stop();

    if( !quiet )
        tlog->Timer( "%32s  %lf s [ %8.2lf GB/s ]\n", "clEnqueueUnmapMemObject():", t1.GetElapsedTime(), nBytes, 1 );
}
Exemple #9
0
const std::vector<cl_uint> AgentMover::getAgentCounts() const
{
    cl_int status;

    cl_uint* buffer = static_cast<cl_uint*> ( clEnqueueMapBuffer(context->queue,
                                                                 agentCounts,
                                                                 CL_TRUE,
                                                                 CL_MAP_READ,
                                                                 0,
                                                                 sizeof(cl_uint) * 8,
                                                                 0,
                                                                 NULL,
                                                                 NULL,
                                                                 &status) );
    CL_ERROR(status);

    std::vector<cl_uint> agentCountsVector(8);

    cl_uint i = 0;

    std::vector<cl_uint>::iterator iterator;

    for (iterator = agentCountsVector.begin(); iterator != agentCountsVector.end(); ++iterator)
    {
        *iterator = buffer[i];

        i++;
    }

    clEnqueueUnmapMemObject(context->queue, agentCounts, buffer, 0, NULL, NULL);

    return agentCountsVector;
}
Exemple #10
0
void*
Alg::eval(std::vector<uint8_t>* pic, std::vector<uint8_t>* picprev){
  uint8_t* t = reinterpret_cast<uint8_t*>(pic->data());
  uint8_t* tp = reinterpret_cast<uint8_t*>(picprev->data());
  // Copy frame to the device
  float* result = (float*)clEnqueueMapBuffer(queue, clm_res, CL_FALSE, CL_MAP_READ, 0, 6*sizeof(cl_float), 0, NULL, NULL, &status);
  for (int i = 0; i < 6; i++) {
    result[i] = 0;
  }
  // Copy frame to the device
  status = clEnqueueWriteBuffer(queue, clm_pic, CL_FALSE, 0, cols * rows * sizeof(cl_uchar), t, 0, NULL, NULL);
  status = clEnqueueWriteBuffer(queue, clm_picprev, CL_FALSE, 0, cols * rows * sizeof(cl_uchar), tp, 0, NULL, NULL);
  status = clEnqueueWriteBuffer(queue, clm_res, CL_FALSE, 0, 6 * sizeof(float), result, 0, NULL, NULL);
  // Execute the OpenCL kernel on the list
  status = clEnqueueNDRangeKernel(queue, kern, 1, NULL, &global_threads, NULL, 0, NULL, NULL);
  status = clEnqueueReadBuffer(queue, clm_res, CL_FALSE, 0, 6*sizeof(float), result, 0, NULL, NULL);
  // Clean up and wait for all the comands to complete.
  status = clFlush(queue);
  status = clFinish(queue);
  for (int i = 0; i < 2; i++) {
    percentage[i] = result[i]*100.0 / (cols*rows);
  }
  for (int i = 0; i < 2; i++) {
    percentage[i + 2] = result[i + 2] / (cols*rows);
  }
  if (result[5])
    percentage[4] = result[4] / result[5];
  else
    percentage[4] = result[4] / 4;
  clEnqueueUnmapMemObject(queue, clm_res, result, 0, NULL, NULL);
  return percentage;
}
Exemple #11
0
void AgentMover::setAgentCounts(const std::vector<cl_uint>& agentCountsVector)
{
    cl_int status;

    cl_uint* buffer = static_cast<cl_uint*> ( clEnqueueMapBuffer(context->queue,
                                                                 agentCounts,
                                                                 CL_TRUE,
                                                                 CL_MAP_WRITE,
                                                                 0,
                                                                 sizeof(cl_uint) * 8,
                                                                 0,
                                                                 NULL,
                                                                 NULL,
                                                                 &status) );
    CL_ERROR(status);

    cl_uint i = 0;

    std::vector<cl_uint>::const_iterator iterator;

    for (iterator = agentCountsVector.begin(); iterator != agentCountsVector.end(); ++iterator)
    {
        buffer[i] = *iterator;

        i++;
    }

    clEnqueueUnmapMemObject(context->queue, agentCounts, buffer, 0, NULL, NULL);
}
Exemple #12
0
void mclUnmap(mclContext ctx, mclDeviceData dd, void* ptr)
{
  logOclCall("clEnqueueUnmapMemObject");
  cl_int ret;
  ret = clEnqueueUnmapMemObject(ctx.command_queue, dd.data, ptr, 0, 0, 0);
  MCL_VALIDATE(ret, "mclUnmap: Error unmapping data");
}
static void release_clobj(void)
{
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_result, cracked, 0, NULL, NULL), "Error Unmapping cracked");
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_key, saved_key, 0, NULL, NULL), "Error Unmapping saved_key");
	HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[gpu_id], pinned_idx, saved_idx, 0, NULL, NULL), "Error Unmapping saved_idx");
	HANDLE_CLERROR(clFinish(queue[gpu_id]), "Error releasing memory mappings");

	HANDLE_CLERROR(clReleaseMemObject(pinned_result), "Release pinned result buffer");
	HANDLE_CLERROR(clReleaseMemObject(pinned_key), "Release pinned key buffer");
	HANDLE_CLERROR(clReleaseMemObject(pinned_idx), "Release pinned index buffer");
	HANDLE_CLERROR(clReleaseMemObject(cl_salt), "Release salt buffer");
	HANDLE_CLERROR(clReleaseMemObject(cl_result), "Release result buffer");
	HANDLE_CLERROR(clReleaseMemObject(cl_saved_key), "Release key buffer");
	HANDLE_CLERROR(clReleaseMemObject(cl_saved_idx), "Release index buffer");
	HANDLE_CLERROR(clReleaseMemObject(cl_mid_key), "Release state buffer");
}
Exemple #14
0
void*
Optzd::eval(std::vector<uint8_t>* t){
  cl_float* sh = (cl_float*)clEnqueueMapBuffer(queue, clm_sh, CL_FALSE, CL_MAP_WRITE, 0, 2*sizeof(cl_float), 0, NULL, NULL, &status);
  uint8_t* pic = reinterpret_cast<uint8_t*>(t->data());
  sh[0] = 0;
  sh[1] = 0;
  // Copy frame to the device
  status = clEnqueueWriteBuffer(queue, clm_pic, CL_TRUE, 0, cols * rows * sizeof(cl_uchar), pic, 0, NULL, NULL);
  status = clEnqueueWriteBuffer(queue, clm_sh, CL_TRUE, 0, 2 * sizeof(cl_float), sh, 0, NULL, NULL);
  // Execute the OpenCL kernel on the list
  status = clEnqueueNDRangeKernel(queue, kern, 1, NULL, &global_threads, NULL, 0, NULL, NULL);
  status = clEnqueueReadBuffer(queue, clm_sh, CL_TRUE, 0, 2*sizeof(float), sh, 0, NULL, NULL);
  // Clean up and wait for all the comands to complete.
  status = clFlush(queue);
  status = clFinish(queue);
  //eval-ing BS
  float shb = sh[1], shnb = sh[0];
  // for (int i = 0; i < (global_threads / 2); i++) {
  //   shb += sh[i*2];
  //   shnb += sh[(i*2)+1];
  // }
  
  clEnqueueUnmapMemObject(queue, clm_sh, sh, 0, NULL, NULL);
  if (shnb == 0) shnb = 4;
  *BS = shb/(shnb);
  return BS;
}
Exemple #15
0
bool CL_Image3D::Unmap(const CL_CommandQueue* pCommandQueue, void** ppMappedData, CL_Event* pNewEvent, const CL_EventPool* pWaitList)
{
	CL_CPP_CONDITIONAL_RETURN_FALSE(!m_Image);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!pCommandQueue);
	CL_CPP_CONDITIONAL_RETURN_FALSE(!ppMappedData);

	cl_uint uNumWaitEvents = pWaitList ? pWaitList->GetNumEvents() : 0;
	const cl_event* pWaitEvents = pWaitList ? pWaitList->GetEventPool() : NULL;
	cl_event NewEvent = NULL;

	//	Unmap a location in host memory from a buffer object.
	const cl_command_queue CommandQueue = pCommandQueue->GetCommandQueue();
	cl_int iErrorCode = clEnqueueUnmapMemObject(CommandQueue, m_Image, *ppMappedData, uNumWaitEvents, pWaitEvents, &NewEvent);

	CL_CPP_CATCH_ERROR(iErrorCode);
	CL_CPP_ON_ERROR_RETURN_FALSE(iErrorCode);

	(*ppMappedData) = NULL;

	if(NewEvent)
	{
		if(pNewEvent)
			pNewEvent->SetEvent(NewEvent);

		clReleaseEvent(NewEvent);
	}

	return true;
}
void init()
{
    int i, j;
    double t_start_init, t_end_init;

#ifndef ALOCACAO_NORMAL	
    t_start_init = rtclock();
    cl_float* data_input = (cl_float*)clEnqueueMapBuffer(clCommandQue, data_mem_obj, CL_TRUE, CL_MAP_WRITE, 0, sizeof(DATA_TYPE) * (M+1) * (N+1), 0, NULL, NULL, &errcode);
    t_end_init = rtclock();
    tmp_clEnqueueMapBuffer += t_end_init - t_start_init;
#endif


    t_start_init = rtclock();
    for (i = 0; i < M; i++)
    {
        for (j = 0; j < N; j++)
        {
            data[i*(N+1) + j] = ((DATA_TYPE) i*j) / M;
#ifndef ALOCACAO_NORMAL
            data_input[i*(N+1) + j] = ((DATA_TYPE) i*j)/ M;
#endif	
        }
    }
    t_end_init = rtclock();
    tmp_init += t_end_init - t_start_init;

#ifndef ALOCACAO_NORMAL
    t_start_init = rtclock();
    clEnqueueUnmapMemObject(clCommandQue, data_mem_obj, data_input, 0, NULL, NULL);
    t_end_init = rtclock();
    tmp_clEnqueueUnmapMemObject += t_end_init - t_start_init;
#endif
}
Exemple #17
0
cl_int GLCLDraw::UnMapTransferBuffer(Uint8 *p)
{
  cl_int ret;
  if(p == NULL) return CL_INVALID_MEM_OBJECT;
  ret = clEnqueueUnmapMemObject(command_queue, inbuf[inbuf_bank],
				 p, 0, NULL, &event_uploadvram[1]);
  return ret;
}
Exemple #18
0
    void usepersismem_roundtrip_func()
    {
#if defined(CL_MEM_USE_PERSISTENT_MEM_AMD)
        timer.Start(timer_id);
        //set up buffer
        cl_int err;
        buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_USE_PERSISTENT_MEM_AMD,
                                        (buffer_.lda_ * buffer_.a_num_vectors_ +
                                         buffer_.offA_) * sizeof(T),
                                        NULL, &err);

        buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_USE_PERSISTENT_MEM_AMD,
                                        (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                         buffer_.offB_) * sizeof(T),
                                        NULL, &err);
        // Map the buffers to pointers at host device
        T *map_a,*map_b;
        map_a = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_a_, CL_TRUE, CL_MAP_WRITE, 0,
                                       (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                        buffer_.offB_) * sizeof(T),
                                       0, NULL, NULL, &err);
        map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_WRITE, 0,
                                       (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                        buffer_.offB_) * sizeof(T),
                                       0, NULL, NULL, &err);
        // memcpy the input A, B to the mapped regions
        memcpy( map_a, buffer_.a_, ( buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof( T ) );
        memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) );
        // unmap the buffers
        clEnqueueUnmapMemObject(queue_, buffer_.buf_a_, map_a, 0, NULL, NULL);
        clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL);
        //call func
        xTrsm_Function(false);
        // map the B buffer again to read the output
        map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_READ, 0,
                                       (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                        buffer_.offB_) * sizeof(T),
                                       0, NULL, NULL, &err);
        memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) );
        clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL);
        clWaitForEvents(1, &event_);
        timer.Stop(timer_id);
#else
        std::cout<<"CL_MEM_USE_PERSISTENT_MEM_AMD is only supported on AMD hardware"<<std::endl;
#endif
    }
Exemple #19
0
cl_int WINAPI wine_clEnqueueUnmapMemObject(cl_command_queue command_queue, cl_mem memobj, void * mapped_ptr,
                                           cl_uint num_events_in_wait_list, cl_event * event_wait_list, cl_event * event)
{
    cl_int ret;
    TRACE("\n");
    ret = clEnqueueUnmapMemObject(command_queue, memobj, mapped_ptr, num_events_in_wait_list, event_wait_list, event);
    return ret;
}
// Function to clean up and exit
//*****************************************************************************
void Cleanup(int iExitCode)
{
    // Cleanup allocated objects
    shrLog("\nStarting Cleanup...\n\n");

    // Release all the OpenCL Objects
    if(cpProgram)clReleaseProgram(cpProgram);
    for (cl_uint i = 0; i < GpuDevMngr->uiUsefulDevCt; i++)
    {
        if(ckSobel[i])clReleaseKernel(ckSobel[i]);
        if(cmDevBufIn[i])clReleaseMemObject(cmDevBufIn[i]);
        if(cmDevBufOut[i])clReleaseMemObject(cmDevBufOut[i]);
    }
    if(uiInput)clEnqueueUnmapMemObject(cqCommandQueue[0], cmPinnedBufIn, (void*)uiInput, 0, NULL, NULL);
    if(uiOutput)clEnqueueUnmapMemObject(cqCommandQueue[0], cmPinnedBufOut, (void*)uiOutput, 0, NULL, NULL);
    if(cmPinnedBufIn)clReleaseMemObject(cmPinnedBufIn);
    if(cmPinnedBufOut)clReleaseMemObject(cmPinnedBufOut);
    for (cl_uint i = 0; i < GpuDevMngr->uiUsefulDevCt; i++)
    {
        if(cqCommandQueue[i])clReleaseCommandQueue(cqCommandQueue[i]);
    }
    if(cxGPUContext)clReleaseContext(cxGPUContext);

    // free the host allocs
    if(cSourceCL)free(cSourceCL);
    if(cPathAndName)free(cPathAndName);
    if(cmDevBufIn) delete [] cmDevBufIn;
    if(cmDevBufOut) delete [] cmDevBufOut;
    if(szAllocDevBytes) delete [] szAllocDevBytes;
    if(uiInHostPixOffsets) delete [] uiInHostPixOffsets;
    if(uiOutHostPixOffsets) delete [] uiOutHostPixOffsets;
    if(uiDevImageHeight) delete [] uiDevImageHeight;
    if(GpuDevMngr) delete GpuDevMngr;
    if(cqCommandQueue) delete [] cqCommandQueue;

    // Cleanup GL objects if used
    if (!bQATest)
    {
        DeInitGL();
    }

    shrLogEx(LOGBOTH | CLOSELOG, 0, "%s Exiting...\n", cExecutableName);

    shrQAFinishExit2(bQATest, *pArgc, (const char **)pArgv, ( iExitCode == EXIT_SUCCESS ) ? QA_PASSED : QA_FAILED);
}
static void release_clobj(void){
    cl_int ret_code;

    ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pin_part_msha_hashes, par_msha_hashes, 0,NULL,NULL);
    HANDLE_CLERROR(ret_code, "Error Ummapping par_msha_hashes");
    ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_msha_keys, mysqlsha_plain, 0, NULL, NULL);
    HANDLE_CLERROR(ret_code, "Error Ummapping mysqlsha_plain");
    ret_code = clReleaseMemObject(buf_msha_keys);
    HANDLE_CLERROR(ret_code, "Error Releasing buf_msha_keys");
    ret_code = clReleaseMemObject(buf_msha_out);
    HANDLE_CLERROR(ret_code, "Error Releasing buf_msha_out");
    ret_code = clReleaseMemObject(data_info);
    HANDLE_CLERROR(ret_code, "Error Releasing data_info");
    ret_code = clReleaseMemObject(pinned_msha_keys);
    HANDLE_CLERROR(ret_code, "Error Releasing pinned_msha_keys");
    ret_code = clReleaseMemObject(pin_part_msha_hashes);
    HANDLE_CLERROR(ret_code, "Error Releasing pin_part_msha_hashes");
    free(res_hashes);
}
static void release_clobj(void){
	cl_int ret_code;

	ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_partial_hashes, partial_hashes, 0,NULL,NULL);
	HANDLE_CLERROR(ret_code, "Error Ummapping partial_hashes");
	ret_code = clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_keys, saved_plain, 0, NULL, NULL);
	HANDLE_CLERROR(ret_code, "Error Ummapping saved_plain");
	ret_code = clReleaseMemObject(buffer_keys);
	HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys");
	ret_code = clReleaseMemObject(buffer_out);
	HANDLE_CLERROR(ret_code, "Error Releasing buffer_out");
	ret_code = clReleaseMemObject(data_info);
	HANDLE_CLERROR(ret_code, "Error Releasing data_info");
	ret_code = clReleaseMemObject(pinned_saved_keys);
	HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys");
	ret_code = clReleaseMemObject(pinned_partial_hashes);
	HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes");
	free(res_hashes);
}
Exemple #23
0
    void allochostptr_roundtrip_func()
    {
        timer.Start(timer_id);
        //set up buffer
        cl_int err;
        buffer_.buf_a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
                                        (buffer_.lda_ * buffer_.a_num_vectors_ +
                                         buffer_.offA_) * sizeof(T),
                                        NULL, &err);

        buffer_.buf_b_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
                                        (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                         buffer_.offB_) * sizeof(T),
                                        NULL, &err);
        // Map the buffers to pointers at host device
        T *map_a,*map_b;
        map_a = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_a_, CL_TRUE, CL_MAP_WRITE, 0,
                                       (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                        buffer_.offB_) * sizeof(T),
                                       0, NULL, NULL, &err);
        map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_WRITE, 0,
                                       (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                        buffer_.offB_) * sizeof(T),
                                       0, NULL, NULL, &err);
        // memcpy the input A, B to the mapped regions
        memcpy( map_a, buffer_.a_, ( buffer_.lda_*buffer_.a_num_vectors_ + buffer_.offA_) * sizeof( T ) );
        memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) );
        // unmap the buffers
        clEnqueueUnmapMemObject(queue_, buffer_.buf_a_, map_a, 0, NULL, NULL);
        clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL);
        //call func
        xTrsm_Function(false);
        // map the B buffer again to read the output
        map_b = (T*)clEnqueueMapBuffer(queue_, buffer_.buf_b_, CL_TRUE, CL_MAP_READ, 0,
                                       (buffer_.ldb_ * buffer_.b_num_vectors_ +
                                        buffer_.offB_) * sizeof(T),
                                       0, NULL, NULL, &err);
        memcpy( map_b, buffer_.b_, ( buffer_.ldb_*buffer_.b_num_vectors_ + buffer_.offB_) * sizeof( T ) );
        clEnqueueUnmapMemObject(queue_, buffer_.buf_b_, map_b, 0, NULL, NULL);
        clWaitForEvents(1, &event_);
        timer.Stop(timer_id);
    }
static void release_clobj(void)
{
    HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_key, key, 0, NULL, NULL), "Error Unmapping key");
    HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_key, saved_key, 0, NULL, NULL), "Error Unmapping saved_key");
    HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_saved_len, saved_len, 0, NULL, NULL), "Error Unmapping saved_len");
    HANDLE_CLERROR(clEnqueueUnmapMemObject(queue[ocl_gpu_id], pinned_salt, saved_salt, 0, NULL, NULL), "Error Unmapping saved_salt");
    HANDLE_CLERROR(clFinish(queue[ocl_gpu_id]), "Error releasing memory mappings");

    HANDLE_CLERROR(clReleaseMemObject(pinned_key), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(pinned_saved_key), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(pinned_saved_len), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(pinned_salt), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(cl_key), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(cl_saved_key), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(cl_saved_len), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(cl_salt), "Release GPU buffer");
    HANDLE_CLERROR(clReleaseMemObject(cl_pwhash), "Release GPU buffer");

    MEM_FREE(cracked);
}
Exemple #25
0
// buffer unmap (asynchronous)
void
HGPU_GPU_buffer_unmap_async(HGPU_GPU_buffer* buffer,cl_command_queue queue,void* data_ptr){
    if (!buffer){
        HGPU_GPU_error_note(HGPU_ERROR_BAD_BUFFER,"Trying to use null-buffer!");
        return;
    }

    cl_event buffer_event = NULL;
    HGPU_GPU_error_message(clEnqueueUnmapMemObject(queue,buffer->buffer,data_ptr,0,NULL,&buffer_event),"clEnqueueUnmapMemObject failed");
    buffer->buffer_write_event = buffer_event;
    buffer->buffer_write_number_of++;
}
Exemple #26
0
/*!
    \param mem cl_mem object
    \param ptr A host pointer that points to the mapped region
*/
void cl_unmapBuffer(cl_mem mem, void *ptr)
{

    // TODO It looks like AMD doesn't support profiling unmapping yet. Leaving the
    //      commented code here until it's supported

    cl_int status;

    status = clEnqueueUnmapMemObject(commandQueue, mem, ptr, 0, NULL, NULL);

    cl_errChk(status, "Error unmapping a buffer or image", true);
}
static void release_clobj(void){
    cl_int ret_code;

    ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_partial_hashes, outbuffer, 0,NULL,NULL);
    HANDLE_CLERROR(ret_code, "Error Ummapping outbuffer");
    ret_code = clEnqueueUnmapMemObject(queue[gpu_id], pinned_saved_keys, inbuffer, 0, NULL, NULL);
    HANDLE_CLERROR(ret_code, "Error Ummapping inbuffer");
    ret_code = clReleaseMemObject(buffer_keys);
    HANDLE_CLERROR(ret_code, "Error Releasing buffer_keys");
    ret_code = clReleaseMemObject(buffer_out);
    HANDLE_CLERROR(ret_code, "Error Releasing buffer_out");
    ret_code = clReleaseMemObject(data_info);
    HANDLE_CLERROR(ret_code, "Error Releasing data_info");
    ret_code = clReleaseMemObject(mysalt);
    HANDLE_CLERROR(ret_code, "Error Releasing mysalt");
    ret_code = clReleaseMemObject(pinned_saved_keys);
    HANDLE_CLERROR(ret_code, "Error Releasing pinned_saved_keys");
    ret_code = clReleaseMemObject(pinned_partial_hashes);
    HANDLE_CLERROR(ret_code, "Error Releasing pinned_partial_hashes");
    free(outbuffer2);
}
cl_int
clEnqueueUnmapMemObject_test(cl_command_queue command_queue,
                             cl_mem           memobj,
                             void *           mapped_ptr,
                             cl_uint          num_events_in_wait_list,
                             const cl_event * event_wait_list,
                             cl_event *       event)
{
   printf("clEnqueueUnmapMemObject_test: memobj==%p ptr==%p event==%p\n", memobj, mapped_ptr, event);
   return clEnqueueUnmapMemObject(command_queue, memobj, mapped_ptr,
                                  num_events_in_wait_list, event_wait_list, event);

}
Exemple #29
0
int
ComputeBench::unmapBuffer(cl_mem deviceBuffer, void* hostPointer)
{
    cl_int status;
    status = clEnqueueUnmapMemObject(commandQueue,
            deviceBuffer,
            hostPointer,
            0,
            NULL,
            NULL);
    CHECK_OPENCL_ERROR(status, "clEnqueueUnmapMemObject failed");

    return SDK_SUCCESS;
}
Exemple #30
0
int GLCLDraw::ReleasePixelBuffer(Uint32 *p)
{
#if 0
   return 0;
#else
  int ret;
  if(p == NULL) return 0;
//  clFlush(command_queue);
  ret |= clEnqueueUnmapMemObject(command_queue, outbuf,
				 p, 1, &event_release, NULL);
  clFinish(command_queue);
  return ret;
#endif
}