예제 #1
0
cl_int MemoryObjectWrapper::createSubBuffer (cl_mem_flags aFlags,
                                             RegionWrapper const& aRegion,
                                             MemoryObjectWrapper** aResultOut) {
#if CL_WRAPPER_CL_VERSION_SUPPORT >= 110
    D_METHOD_START;
    cl_int err = CL_SUCCESS;
    VALIDATE_ARG_POINTER (aResultOut, &err, err);

    cl_buffer_region region;
    region.origin = aRegion.origin;
    region.size = aRegion.size;

    cl_mem mem = clCreateSubBuffer (mWrapped, aFlags, CL_BUFFER_CREATE_TYPE_REGION,
                                 (void const*)&region, &err);
    if (err != CL_SUCCESS || !mem)
        D_LOG (LOG_LEVEL_ERROR, "clCreateSubBuffer failed. (error %d)", err);

    // NOTE: clCreateSubBuffer can return an existing handle.
    *aResultOut = MemoryObjectWrapper::getNewOrExisting (mem);
    return err;
#else // CL_WRAPPER_CL_VERSION_SUPPORT >= 110
    (void)aFlags; (void)aRegion; (void)aResultOut;
    D_LOG (LOG_LEVEL_ERROR, "CLWrapper support for OpenCL 1.1 API was not enabled at build time.");
    return CL_INVALID_VALUE;
#endif
}
예제 #2
0
파일: memory.c 프로젝트: dche/rcl
/*
 * call-seq:
 *      Memory.create_subbuffer(buffer, , [10, 100])
 *
 */
static VALUE
rcl_mem_create_subbuffer(VALUE mod, VALUE buffer,
                                     VALUE flags,
                                     VALUE region)
{
    EXPECT_RCL_TYPE(buffer, Memory);
    EXPECT_FIXNUM(flags);
    EXPECT_ARRAY(region);

    long i = RARRAY_LEN(region);
    if (i != 2) {
        rb_raise(rb_eArgError, "Expected the parameter region has 2 items, got (%ld).", i);
    }
    VALUE sz = rb_ary_entry(region, 0);
    EXTRACT_SIZE(sz, origin);
    sz = rb_ary_entry(region, 1);
    EXTRACT_SIZE(sz, offset);

    cl_buffer_region br;
    br.origin = origin;
    br.size = offset;

    cl_mem buf = MemoryPtr(buffer);
    cl_mem_flags mf = FIX2INT(flags);

    cl_int res = CL_SUCCESS;
    cl_mem subbuf = clCreateSubBuffer(buf, mf, CL_BUFFER_CREATE_TYPE_REGION, &br, &res);
    CHECK_AND_RAISE(res);

    return RMemory(subbuf);
}
예제 #3
0
/*!
    Creates a new buffer that refers to the \a size bytes,
    starting at \a offset within this buffer.  The data in
    the new buffer will be accessed according to \a access.

    Sub-buffers are an OpenCL 1.1 feature.  On OpenCL 1.0,
    this function will return a null buffer.

    \sa parentBuffer(), offset()
*/
QCLBuffer QCLBuffer::createSubBuffer
(size_t offset, size_t size, QCLMemoryObject::Access access)
{
#ifdef QT_OPENCL_1_1
    cl_int error;
    cl_buffer_region region;
    region.origin = offset;
    region.size = size;
    cl_mem mem = clCreateSubBuffer
                 (memoryId(), cl_mem_flags(access),
                  CL_BUFFER_CREATE_TYPE_REGION, &region, &error);
    context()->reportError("QCLBuffer::createSubBuffer:", error);
    return QCLBuffer(context(), mem);
#else
    Q_UNUSED(offset);
    Q_UNUSED(size);
    Q_UNUSED(access);
    return QCLBuffer();
#endif
}
예제 #4
0
boost::shared_ptr<DeviceMatrixCL> makeDeviceMatrixCL(DeviceMatrixCL3D& src, const int slice)
{
	const int height = src.dim_y;
	const int width = src.dim_x;

	DeviceMatrixCL* mat = new DeviceMatrixCL();
	mat->width = width;
	mat->height = height;

	//size_t mem_size = width * height 

	size_t buffer_region[2] = {src.pitch_t * slice, src.pitch_t};

	cl_int err;

	mat->dataMatrix = clCreateSubBuffer(src.dataMatrix, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, buffer_region, &err);
	
	mat->pitch = src.pitch_y;

	return boost::shared_ptr<DeviceMatrixCL>(mat, deleteDeviceMatrixCL);
}
예제 #5
0
void ConvolutionLayerSpatial<Dtype>::setBufferKernelArg(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top,
    viennacl::ocl::kernel *kernel,
    const cl_uint &argIdx,
    viennacl::ocl::context *ctx,
    cl_mem buffer, size_t offset,
    size_t size, bool readOnly,
    bool preserved) {

  if (offset == 0) {
    kernel->arg(argIdx, WrapHandle((cl_mem) buffer, ctx));
    return;
  }

  if (preserved &&
    subBufferMap.find(std::make_tuple(buffer, offset, size))
      != subBufferMap.end()) {
    kernel->arg(argIdx,
      WrapHandle(subBufferMap.find
                   (std::make_tuple(buffer, offset, size))->second, ctx));
    return;
  }
  cl_buffer_region region;
  region.origin = offset * sizeof(Dtype);
  region.size = size * sizeof(Dtype);
  cl_mem_flags memFlags = readOnly ? CL_MEM_READ_ONLY : CL_MEM_READ_WRITE;
  cl_int error;
  cl_mem sub_buffer = clCreateSubBuffer(buffer, memFlags,
                        CL_BUFFER_CREATE_TYPE_REGION,
                        &region, &error);
  CHECK_EQ(error, CL_SUCCESS) << "Failed to create sub buffer." << std::endl;
  kernel->arg(argIdx, WrapHandle(sub_buffer, ctx));
  if (preserved)
    subBufferMap.insert(std::make_pair(std::make_tuple(buffer, offset, size),
                        sub_buffer));
  else
    tmpSubBuffers.push_back(sub_buffer);
}
예제 #6
0
파일: ocl.c 프로젝트: nasa/QuIP
static void _ocl_offset_data(QSP_ARG_DECL  Data_Obj *dp, index_t offset)
{
#ifndef USE_OPENCL_SUBREGION
	/* The original code used subBuffers, but overlapping subregions
	 * don't work...
	 * So instead we use a common memory buffer, but keep track
	 * of the starting offset (in elements).  This offset has
	 * to be passed to the kernels.
	 */

//fprintf(stderr,"ocl_offset_data:  obj %s, offset = %d\n",OBJ_NAME(dp),offset);
//fprintf(stderr,"\tparent obj %s, parent offset = %d\n",OBJ_NAME(OBJ_PARENT(dp)),
//OBJ_OFFSET(OBJ_PARENT(dp)));

	if( IS_COMPLEX(dp) ){
		assert( (offset & 1) == 0 );
		offset /= 2;
//fprintf(stderr,"Adjusted offset (%d) for complex object %s\n",offset,OBJ_NAME(dp));
	} else if( IS_QUAT(dp) ){
		assert( (offset & 3) == 0 );
		offset /= 4;
	}

	SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp)));
	SET_OBJ_OFFSET( dp, OBJ_OFFSET(OBJ_PARENT(dp)) + offset );

#else // USE_OPENCL_SUBREGION
	cl_mem buf;
	cl_mem parent_buf;
	cl_buffer_region reg;
	cl_int status;
	int extra_offset;

	parent_buf = find_parent_buf(OBJ_PARENT(dp),&extra_offset);
	assert( parent_buf != NULL );

	reg.origin = (offset+extra_offset) * ELEMENT_SIZE(dp);

	// No - the region has to be big enough for all of the elements.
	// The safest thing is to include everything from the start
	// of the subregion to the end of the parent.  Note that this
	// cannot handle negative increments!?
	// reg.size = OBJ_N_MACH_ELTS(dp) * ELEMENT_SIZE(dp);

	//   p p p p p p p
	//   p p c c c p p
	//   p p p p p p p
	//   p p c c c p p

	reg.size =	  OBJ_SEQ_INC(dp)*(OBJ_SEQS(dp)-1)
			+ OBJ_FRM_INC(dp)*(OBJ_FRAMES(dp)-1)
			+ OBJ_ROW_INC(dp)*(OBJ_ROWS(dp)-1)
			+ OBJ_PXL_INC(dp)*(OBJ_COLS(dp)-1)
			+ OBJ_COMP_INC(dp)*(OBJ_COMPS(dp)-1)
			+ 1;
	reg.size *= ELEMENT_SIZE(dp);
//fprintf(stderr,"requesting subregion of %ld bytes at offset %ld\n",
//reg.size,reg.origin);

	buf = clCreateSubBuffer ( parent_buf,
				CL_MEM_READ_WRITE,
				CL_BUFFER_CREATE_TYPE_REGION,
		&reg,
			&status);
	if( status != CL_SUCCESS ){
		report_ocl_error(status, "clCreateSubBuffer");
		SET_OBJ_DATA_PTR(dp,OBJ_DATA_PTR(OBJ_PARENT(dp)));
	} else {
		SET_OBJ_DATA_PTR(dp,buf);
	}
	// BUG - Because this object doesn't "own" the data, the sub-buffer
	// won't be released when the object is destroyed, a possible memory
	// leak...
	// We need to add a special case, or make data releasing a
	// platform-specific function...
#endif // USE_OPENCL_SUBREGION
}
enum piglit_result
piglit_cl_test(const int argc,
	       const char **argv,
	       const struct piglit_cl_custom_test_config *config,
	       const struct piglit_cl_custom_test_env *env)
{
	piglit_cl_context piglit_cl_context = NULL;
	cl_command_queue queue = NULL;
	cl_mem buffer = NULL, sub_buffer = NULL;
	cl_program program = NULL;
	cl_kernel kernel = NULL;
	unsigned i;
	size_t global_size = 1, local_size = 1;
	cl_buffer_region region = {PAD_SIZE, SUB_BUFFER_SIZE };
	cl_int err;
	char *sub_data = malloc(BUFFER_SIZE);
	char *padding = malloc(PAD_SIZE);
	char data_byte = (char)DATA_BYTE;
	char pad_byte = 0xcd;
	char *out_data = malloc(BUFFER_SIZE);

	assert(SUB_BUFFER_SIZE % 4 == 0);
	memset(sub_data, data_byte, SUB_BUFFER_SIZE);
	memset(padding, pad_byte, PAD_SIZE);

	piglit_cl_context = piglit_cl_create_context(env->platform_id,
							&env->device_id, 1);
	queue = piglit_cl_context->command_queues[0];
	buffer = piglit_cl_create_buffer(piglit_cl_context, CL_MEM_READ_WRITE,
                                         BUFFER_SIZE);
	sub_buffer = clCreateSubBuffer(buffer, CL_MEM_READ_WRITE,
                                       CL_BUFFER_CREATE_TYPE_REGION,
                                       &region, &err);
	if (err != CL_SUCCESS) {
		fprintf(stderr, "clCreateSubBuffer() failed.");
		return PIGLIT_FAIL;
	}

	clEnqueueWriteBuffer(queue, buffer, CL_FALSE, 0, PAD_SIZE, padding,
                             0, NULL, NULL);
	clEnqueueWriteBuffer(queue, buffer, CL_FALSE, BUFFER_SIZE - PAD_SIZE,
                             PAD_SIZE, padding, 0, NULL, NULL);
	clFinish(queue);

	program = piglit_cl_build_program_with_source(piglit_cl_context, 1,
                                                      &source, "");
	kernel = piglit_cl_create_kernel(program, "test");

	if (!piglit_cl_set_kernel_arg(kernel, 0, sizeof(cl_mem), &sub_buffer)) {
		return PIGLIT_FAIL;
	}

	if (!piglit_cl_enqueue_ND_range_kernel(queue, kernel, 1, &global_size,
						&local_size)) {
		return PIGLIT_FAIL;
	}
	clFinish(queue);

	clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, BUFFER_SIZE, out_data,
                            0, NULL, NULL);
	for (i = 0; i < PAD_SIZE; i++) {
		if (!piglit_cl_probe_integer(out_data[i], pad_byte, 0)) {
			fprintf(stderr, "Failed at offset %u\n", i);
			return PIGLIT_FAIL;
		}
	}

	for (i = BUFFER_SIZE - PAD_SIZE; i < BUFFER_SIZE; i++) {
		if (!piglit_cl_probe_integer(out_data[i], pad_byte, 0)) {
			fprintf(stderr, "Failed at offset %u\n", i);
			return PIGLIT_FAIL;
		}
	}

	for (i = PAD_SIZE; i < BUFFER_SIZE - PAD_SIZE; i++) {
		if (!piglit_cl_probe_integer(out_data[i], data_byte, 0)) {
			fprintf(stderr, "Failed at offset %u\n", i);
			return PIGLIT_FAIL;
		}
	}
	return PIGLIT_PASS;
}
/**
 * \related cl_Mem_Object_t
 *
 * This function allocates memory for child Memory Object with generic data
 * container & sets function pointers
 *
 * @param[in] self pointer to existing Memory Object, in which function pointer
 * 'Make_Child' is defined to point on this function.
 * @param[in] mem_flags OpenCL memory flags, which will be used for OpenCL
 * sub-buffer creation
 * @param[in] buffer_create_type information about sub-buffer creation type
 * @param[in] buffer_create_info structure with sub-buffer origin & size
 * values.
 *
 * @return pointer to allocated structure in case of success,
 * \ref VOID_MEM_OBJ_PTR otherwise. In case of error it sets error code, which
 * is available via 'error' structure.
 *
 * @warning always use 'Destroy' function pointer to free memory, allocated
 * by this function.
 */
static scow_Mem_Object* Buffer_Make_Sub_Buffer(scow_Mem_Object *self,
        cl_mem_flags flags, cl_buffer_create_type buffer_create_type,
        const void *buffer_create_info)
{
    OCL_CHECK_EXISTENCE(self, VOID_MEM_OBJ_PTR);

    cl_int ret;
    scow_Mem_Object* child;

    /* Sub-buffer creation is defined only for Buffer, which isn't someone's
     * child. */
    if (self->obj_mem_type != BUFFER)
    {
        ret = INVALID_ARG_TYPE;
        self->error->Set_Last_Code(self->error, ret);
        OCL_DIE_ON_ERROR(ret, CL_SUCCESS, NULL, VOID_MEM_OBJ_PTR);
    }

    if (self->obj_paternity != PARENT_OBJECT)
    {
        ret = WRONG_PARENT_OBJECT;
        self->error->Set_Last_Code(self->error, ret);
        OCL_DIE_ON_ERROR(ret, CL_SUCCESS, NULL, VOID_MEM_OBJ_PTR);
    }

    child = (scow_Mem_Object*) calloc(1, sizeof(*child));
    OCL_CHECK_EXISTENCE(child, VOID_MEM_OBJ_PTR);

    child->obj_mem_type = BUFFER;
    child->obj_paternity = CHILD_OBJECT;
    child->mem_flags = flags;
    child->parent_thread = self->parent_thread;

    child->error = Make_Error();
    child->timer = Make_Timer(VOID_KERNEL_PTR);

    child->Get_Mem_Obj = Mem_Object_Get_Mem_Obj;
    child->Destroy = Mem_Object_Destroy;
    child->Swap = Mem_Object_Swap;
    child->Unmap = Mem_Object_Unmap;

    child->Map = Buffer_Map;
    child->Write = Buffer_Send_To_Device;
    child->Read = Buffer_Get_From_Device;
    child->Copy = Buffer_Copy;
    child->Erase = Buffer_Erase;
    child->Sync = Mem_Object_Sync;

    child->Get_Height = Buffer_Get_Height;
    child->Get_Width = Buffer_Get_Width;
    child->Get_Row_Pitch = Buffer_Get_Row_Pitch;
    child->Make_Child = Buffer_Make_Sub_Buffer;

    child->cl_mem_object = clCreateSubBuffer(self->cl_mem_object, flags,
            buffer_create_type, buffer_create_info, &ret);

    OCL_DIE_ON_ERROR(ret, CL_SUCCESS, child->Destroy(child), VOID_MEM_OBJ_PTR);

    child->size = ((cl_buffer_region*) buffer_create_info)->size;
    child->origin = ((cl_buffer_region*) buffer_create_info)->origin;

    if (self->host_ptr)
    {
        child->host_ptr = (unsigned char*)self->host_ptr + child->origin;
    }

    return child;
}
예제 #9
0
파일: simple.cpp 프로젝트: BeauJoh/Simple
// main() for simple buffer and sub-buffer example
//
int main(int argc, char** argv)
{
    cl_int errNum;
    cl_uint numPlatforms;
    cl_uint numDevices;
    cl_platform_id * platformIDs;
    cl_device_id * deviceIDs;
    cl_context context;
    cl_program program;
    std::vector<cl_kernel> kernels;
    std::vector<cl_command_queue> queues;
    std::vector<cl_mem> buffers;
    int * inputOutput;
    std::cout << "Simple buffer and sub-buffer Example" << std::endl;
    // First, select an OpenCL platform to run on.
    errNum = clGetPlatformIDs(0, NULL, &numPlatforms);
    checkErr(
             (errNum != CL_SUCCESS) ?
             errNum : (numPlatforms <= 0 ? -1 : CL_SUCCESS),
             "clGetPlatformIDs");
    platformIDs = (cl_platform_id *)alloca(sizeof(cl_platform_id) * numPlatforms);
    std::cout << "Number of platforms: \t" << numPlatforms << std::endl;
    errNum = clGetPlatformIDs(numPlatforms, platformIDs, NULL);
    checkErr(
             (errNum != CL_SUCCESS) ?
             errNum : (numPlatforms <= 0 ? -1 : CL_SUCCESS),
             "clGetPlatformIDs");
    std::ifstream srcFile("simple.cl");
    
    checkErr(srcFile.is_open() ? CL_SUCCESS : -1, "reading simple.cl");
    
    std::string srcProg(
                        std::istreambuf_iterator<char>(srcFile),
                        (std::istreambuf_iterator<char>()));
    const char * src = srcProg.c_str();
    size_t length = srcProg.length();
    deviceIDs = NULL;
    DisplayPlatformInfo(
                        platformIDs[PLATFORM_INDEX],
                        CL_PLATFORM_VENDOR,
                        "CL_PLATFORM_VENDOR");
    errNum = clGetDeviceIDs(
                            platformIDs[PLATFORM_INDEX],
                            CL_DEVICE_TYPE_ALL,
                            0,
                            NULL,
                            &numDevices);
    if (errNum != CL_SUCCESS && errNum != CL_DEVICE_NOT_FOUND){
        checkErr(errNum, "clGetDeviceIDs");
    }
    
    deviceIDs = (cl_device_id *)alloca(
                                       sizeof(cl_device_id) * numDevices);
    errNum = clGetDeviceIDs(
                            platformIDs[PLATFORM_INDEX],
                            CL_DEVICE_TYPE_ALL,
                            numDevices,
                            &deviceIDs[0],
                            NULL);
    checkErr(errNum, "clGetDeviceIDs");
    
    cl_context_properties contextProperties[] =
    {
        CL_CONTEXT_PLATFORM,
        (cl_context_properties)platformIDs[PLATFORM_INDEX],
        0
    };
    
    context = clCreateContext(
                              contextProperties,
                              numDevices,
                              deviceIDs,
                              NULL,
                              NULL,
                              &errNum);
    
    checkErr(errNum, "clCreateContext");
    // Create program from source
    program = clCreateProgramWithSource(
                                        context,
                                        1,
                                        &src,
                                        &length,
                                        &errNum);
    checkErr(errNum, "clCreateProgramWithSource");
    
    // Build program
    errNum = clBuildProgram(
                            program,
                            numDevices,
                            deviceIDs,
                            "-I.",
                            NULL,
                            NULL);

    if (errNum != CL_SUCCESS){
        // Determine the reason for the error
        char buildLog[16384];
        clGetProgramBuildInfo(
                              program,
                              deviceIDs[0],
                              CL_PROGRAM_BUILD_LOG,
                              sizeof(buildLog),
                              buildLog,
                              NULL);
        std::cerr << "Error in OpenCL C source: " << std::endl;
        std::cerr << buildLog;
        checkErr(errNum, "clBuildProgram");
    }
        // create buffers and sub-buffers
        inputOutput = new int[NUM_BUFFER_ELEMENTS * numDevices];
        for (unsigned int i = 0; i < NUM_BUFFER_ELEMENTS * numDevices; i++)
        {
            inputOutput[i] = i;
        }
        
        // create a single buffer to cover all the input data
        cl_mem buffer = clCreateBuffer(
                                       context,
                                       CL_MEM_READ_WRITE,
                                       sizeof(int) * NUM_BUFFER_ELEMENTS * numDevices,
                                       NULL,
                                       &errNum);
        checkErr(errNum, "clCreateBuffer");
        buffers.push_back(buffer);
        // now for all devices other than the first create a sub-buffer
        for (unsigned int i = 1; i < numDevices; i++)
        {
            cl_buffer_region region =
            {
                NUM_BUFFER_ELEMENTS * i * sizeof(int),
                NUM_BUFFER_ELEMENTS * sizeof(int)
            };
            buffer = clCreateSubBuffer(
                                       buffers[0],
                                       CL_MEM_READ_WRITE,
                                       CL_BUFFER_CREATE_TYPE_REGION,
                                       &region,
                                       &errNum);
            checkErr(errNum, "clCreateSubBuffer");
            buffers.push_back(buffer);
        }
        // Create command queues
        for (int i = 0; i < numDevices; i++)
        {
            InfoDevice<cl_device_type>::display(deviceIDs[i], CL_DEVICE_TYPE, "CL_DEVICE_TYPE");
            cl_command_queue queue =
            clCreateCommandQueue(
                                 context,
                                 deviceIDs[i],
                                 0,
                                 &errNum);
            checkErr(errNum, "clCreateCommandQueue");
            queues.push_back(queue);
            cl_kernel kernel = clCreateKernel(
                                              program,
                                              "square",
                                              &errNum);
            checkErr(errNum, "clCreateKernel(square)");
            errNum = clSetKernelArg(
                                    kernel,
                                    0,
                                    sizeof(cl_mem), (void *)&buffers[i]);
            checkErr(errNum, "clSetKernelArg(square)");
            kernels.push_back(kernel);
            // Write input data
            clEnqueueWriteBuffer(
                                 queues[0],
                                 buffers[0],
                                 CL_TRUE,
                                 0,
                                 sizeof(int) * NUM_BUFFER_ELEMENTS * numDevices,
                                 (void*)inputOutput,
                                 0,
                                 NULL,
                                 NULL);
            std::vector<cl_event> events;
            // call kernel for each device
            for (int i = 0; i < queues.size(); i++)
            {
                cl_event event;
                size_t gWI = NUM_BUFFER_ELEMENTS;
                errNum = clEnqueueNDRangeKernel(
                                                queues[i],
                                                kernels[i],
                                                1,
                                                NULL,
                                                (const size_t*)&gWI,
                                                (const size_t*)NULL,
                                                0,
                                                0,
                                                &event);
                events.push_back(event);
            }
            // Technically don't need this as we are doing a blocking read
            // with in-order queue.
            clWaitForEvents(events.size(), events.data());
            // Read back computed data
            clEnqueueReadBuffer(
                                queues[0],
                                buffers[0],
                                CL_TRUE,
                                0,
                                sizeof(int) * NUM_BUFFER_ELEMENTS * numDevices,
                                (void*)inputOutput,
                                0,
                                NULL,
                                NULL);
            // Display output in rows
            for (unsigned i = 0; i < numDevices; i++)
            {
                for (unsigned elems = i * NUM_BUFFER_ELEMENTS;
                     elems < ((i+1) * NUM_BUFFER_ELEMENTS);
                     elems++)
                {
                    std::cout << " " << inputOutput[elems];
                }
                std::cout << std::endl;
            }
            std::cout << "Program completed successfully" << std::endl;
            return 0; 
        }
}