//----------------------------------------------------------------------------// double OpenCLImpl::Copy(Buffer::Ptr buffer, Buffer::CopyOperation op, void * data, std::string * error) { cl_event event; cl_int cl_err = CL_SUCCESS; //set to success to get rid of compiler warnings if (op == Buffer::COPY_FROM_GPU) { cl_err = clEnqueueReadBuffer( _queue, _clBuffers[buffer->name], CL_TRUE /* function call returns when copy is done */ , 0, _BufferSize(buffer), data, 0 , NULL, &event); } else if (op == Buffer::COPY_TO_GPU) { cl_err = clEnqueueWriteBuffer( _queue, _clBuffers[buffer->name], CL_TRUE /* function call returns when copy is done */ , 0, _BufferSize(buffer), data, 0 , NULL, &event); } if (_clErrorCopy(cl_err, error, buffer->name, op)) { return GPUIP_ERROR; } clWaitForEvents(1, &event); cl_ulong start,end; clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start, NULL); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end, NULL); return (double)(end-start) * 1.0e-6 ; }
//----------------------------------------------------------------------------// double CUDAImpl::Allocate(std::string * err) { _StartTimer(); if (!_FreeBuffers(err)) { return GPUIP_ERROR; } std::map<std::string,Buffer::Ptr>::const_iterator it; for(it = _buffers.begin(); it != _buffers.end(); ++it) { _cudaBuffers[it->second->name] = NULL; cudaError_t c_err = cudaMalloc(&_cudaBuffers[it->second->name], _BufferSize(it->second)); if(_cudaErrorMalloc(c_err, err)) { return GPUIP_ERROR; } } return _StopTimer(); }
//----------------------------------------------------------------------------// double CUDAImpl::Copy(Buffer::Ptr buffer, Buffer::CopyOperation op, void * data, std::string * err) { _StartTimer(); cudaError_t e = cudaSuccess; const size_t size = _BufferSize(buffer); if (op == Buffer::COPY_FROM_GPU) { e =cudaMemcpy(data, _cudaBuffers[buffer->name], size, cudaMemcpyDeviceToHost); } else if (op == Buffer::COPY_TO_GPU) { e = cudaMemcpy(_cudaBuffers[buffer->name],data, size, cudaMemcpyHostToDevice); } if (_cudaErrorCopy(e, err, buffer->name, op)) { return GPUIP_ERROR; } return _StopTimer(); }
//----------------------------------------------------------------------------// double OpenCLImpl::Allocate(std::string * err) { const std::clock_t start = std::clock(); if(!_ReleaseBuffers(err)) { return GPUIP_ERROR; } cl_int cl_err; std::map<std::string,Buffer::Ptr>::const_iterator it; for (it = _buffers.begin(); it != _buffers.end(); ++it) { _clBuffers[it->second->name] = clCreateBuffer( _ctx, CL_MEM_READ_WRITE, _BufferSize(it->second), NULL, &cl_err); if (_clErrorInitBuffers(cl_err, err)) { return GPUIP_ERROR; } } return ( std::clock() - start ) / (long double) CLOCKS_PER_SEC; }