示例#1
0
文件: bluestein.cpp 项目: rzel/clfft
unsigned 
initExecutionBluesteins(const unsigned size, const unsigned m)
{
  allocateHostMemoryBluesteins(size, m);

      if (deviceCount) {
        printf("Initializing device(s).." );
        // create the OpenCL context on available GPU devices
        init_cl_context(CL_DEVICE_TYPE_GPU);

        const cl_uint ciDeviceCount =  getDeviceCount();


        if (!ciDeviceCount) {
            printf("No opencl specific devices!\n");
            return 0;
        }

        printf("Creating Command Queue...\n");
        // create a command queue on device 1
        for (unsigned i = 0; i < deviceCount; ++i) {
            createCommandQueue(i);
        }
    }
    return 1;

}
示例#2
0
ClWrapper::ClWrapper(cl_device_type device_type) : _device_type(device_type)
{
  createPlatform();
  createDevice();
  createContext();
  createCommandQueue();

  printOpenCLInfo();
}
int main(int argc, char** argv) {
	xdl::IPXdevLCore core = xdl::createCore();
	core->plug(xdl::XdevLPluginName("XdevLComputeDeviceCL"), xdl::XdevLVersion(0,1,0));

	auto computeDevice = xdl::createModule<xdl::IPXdevLComputeDevice>(core, xdl::XdevLModuleName("XdevLComputeDevice"), xdl::XdevLID("MyComputeDevice"));

	// We need a context.
	auto context = computeDevice->createContext();

	// We need a command queue to run commands.
	auto commandQueue = context->createCommandQueue();


	auto program = context->createProgram();
	auto inBuffer = context->createBuffer(xdl::XDEVL_COMPUTE_BUFFER_READ_ONLY, sizeof(float) * 10);
	auto outBuffer = context->createBuffer(xdl::XDEVL_COMPUTE_BUFFER_WRITE_ONLY, sizeof(float) * 10);

	//
	// Load and build the kernel.
	//
	auto kernel = program->compileFromFile(xdl::XdevLFileName("compute_device_demo.cl"), xdl::XdevLString("calculate_sqrt"));


	for(int a = 0; a < 100; a++) {
		kernel->setArgumentBuffer(0, inBuffer);
		kernel->setArgumentBuffer(1, outBuffer);
		kernel->setArgumentFloat(2, 2);

		inBuffer->upload(commandQueue.get(), sizeof(float) * 10, (xdl::xdl_uint8*)data);

		xdl::XdevLComputeExecuteParameter para(commandQueue.get(), kernel.get(), {32});
		program->execute(para);


//		std::cout << "Before: " << std::endl;
//		for(auto item : data) {
//			std::cout << item << " : ";
//		}
//		std::cout << std::endl;

		outBuffer->download(commandQueue.get(), sizeof(float) * 10, (xdl::xdl_uint8*)data);

		std::cout << "After: " << std::endl;
		for(auto item : data) {
			std::cout << item << " : ";
		}
		std::cout << std::endl;
	}
	xdl::destroyCore(core);
}
示例#4
0
float sgemmMain(int rowa,int cola,int colb)
{
	 cl_context context = 0;
	 cl_command_queue commandQueue = 0;
	 cl_program program = 0;
	 cl_device_id device = 0;
	 cl_kernel kernel = 0;
	 const unsigned int numberOfMemoryObjects = 3;
	 cl_mem memoryObjectsa = 0;
	 cl_mem memoryObjectsb = 0;
	 cl_mem memoryObjectsc = 0;
	 cl_int errorNumber;
	 cl_uint clrowa = rowa;
	 cl_uint clcola = cola;
	 cl_uint clcolb = colb;
	 int err;
	 err = createContext(&context);
	 LOGD("create context");
	 err = createCommandQueue(context, &commandQueue, &device);
	 err = createProgram(context, device, "/mnt/sdcard/kernel/sgemm.cl", &program);
	 kernel = clCreateKernel(program, "sgemm", &errorNumber);
	 LOGD("createKernel code %d",errorNumber);
	 LOGD("start computing");
	 float alpha = 1;
	 float beta = 0.1;

	 /* Create the matrices. */
	 size_t matrixSizea = rowa * cola;
	 size_t matrixSizeb = cola * colb;
	 size_t matrixSizec = rowa * colb;

	 /* As all the matrices have the same size, the buffer size is common. */
	 size_t bufferSizea = matrixSizea * sizeof(float);
	 size_t bufferSizeb = matrixSizeb * sizeof(float);
	 size_t bufferSizec = matrixSizec * sizeof(float);

	 /* Create buffers for the matrices used in the kernel. */
	 int createMemoryObjectsSuccess = 0;
	 memoryObjectsa = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSizea, NULL, &errorNumber);
	 createMemoryObjectsSuccess &= errorNumber;
	 memoryObjectsb = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, bufferSizeb, NULL, &errorNumber);
	 createMemoryObjectsSuccess &= errorNumber;
	 memoryObjectsc = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, bufferSizec, NULL, &errorNumber);
	 createMemoryObjectsSuccess &= errorNumber;
	 LOGD("create memory err %d",createMemoryObjectsSuccess);
	 int mapMemoryObjectsSuccess = 0;
	 cl_float* matrixA = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsa, CL_TRUE, CL_MAP_WRITE, 0, bufferSizea, 0, NULL, NULL, &errorNumber);
	 mapMemoryObjectsSuccess &= errorNumber;
	 cl_float* matrixB = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsb, CL_TRUE, CL_MAP_WRITE, 0, bufferSizeb, 0, NULL, NULL, &errorNumber);
	 mapMemoryObjectsSuccess &= errorNumber;
	 cl_float* matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsc, CL_TRUE, CL_MAP_WRITE, 0, bufferSizec, 0, NULL, NULL, &errorNumber);
	 mapMemoryObjectsSuccess &= errorNumber;
	 LOGD("map memory err %d",mapMemoryObjectsSuccess);

	 sgemmInitialize(rowa,cola,colb, matrixA, matrixB, matrixC);
	 LOGD("data initial finish");
	 int unmapMemoryObjectsSuccess = 0;
	 errorNumber = clEnqueueUnmapMemObject(commandQueue, memoryObjectsa, matrixA, 0, NULL, NULL);
	 LOGD("memory code %d",errorNumber);
	 unmapMemoryObjectsSuccess &= errorNumber;
	 errorNumber = clEnqueueUnmapMemObject(commandQueue, memoryObjectsb, matrixB, 0, NULL, NULL);
	 LOGD("memory code %d",errorNumber);
	 unmapMemoryObjectsSuccess &= errorNumber;
	 errorNumber = clEnqueueUnmapMemObject(commandQueue, memoryObjectsc, matrixC, 0, NULL, NULL);
	 LOGD("memory code %d",errorNumber);
	 unmapMemoryObjectsSuccess &= errorNumber;
	 LOGD("unmap memory err %d",unmapMemoryObjectsSuccess);

	 int setKernelArgumentsSuccess = 0;
	 errorNumber = clSetKernelArg(kernel, 0, sizeof(cl_mem), &memoryObjectsa);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 1, sizeof(cl_mem), &memoryObjectsb);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 2, sizeof(cl_mem), &memoryObjectsc);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 3, sizeof(cl_uint), &clrowa);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 4, sizeof(cl_uint), &clcola);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 5, sizeof(cl_uint), &clcolb);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 6, sizeof(cl_float), &alpha);
	 setKernelArgumentsSuccess &= errorNumber;
	 errorNumber = clSetKernelArg(kernel, 7, sizeof(cl_float), &beta);
	 setKernelArgumentsSuccess &= errorNumber;
	 LOGD("setKernel err %d",setKernelArgumentsSuccess);

	 LOGD("start running kernel");
	 clock_t start_t,end_t;
	 float cost_time;
	 start_t = clock();
	 cl_event event = 0;
	 size_t globalWorksize[2] = {rowa, colb};
	 errorNumber = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, globalWorksize, NULL, 0, NULL, &event);
	 //LOGD("Enqueue err code %d",errorNumber);
	 errorNumber = clFinish(commandQueue);
	 end_t = clock();
	 cost_time = (float)(end_t-start_t)/CLOCKS_PER_SEC*1000;
	 LOGD("Finish err code %d",errorNumber);
	 float time;
	 time = printProfilingInfo(event);
	 LOGT("using CPU clock: %f ms",cost_time);
	 LOGT("using GPU clock: %f ms",time);
	 clReleaseEvent(event);
	 matrixC = (cl_float*)clEnqueueMapBuffer(commandQueue, memoryObjectsc, CL_TRUE, CL_MAP_READ, 0, bufferSizec, 0, NULL, NULL, &errorNumber);
	 clEnqueueUnmapMemObject(commandQueue, memoryObjectsc, matrixC, 0, NULL, NULL);
	 LOGD("read out matrixC finish");
	 LOGD("matrixC value C(0,0): %f",matrixC[0]);
	 cleanUpOpenCL(context, commandQueue, program, kernel, memoryObjectsa, memoryObjectsb,memoryObjectsc,numberOfMemoryObjects);
	 LOGD("RUNNING finsh");
	 return time;
}
PassRefPtr<WebCLCommandQueue> WebCLContext::createCommandQueue(ExceptionState& es)
{
    return createCommandQueue(nullptr, 0, es);
}
PassRefPtr<WebCLCommandQueue> WebCLContext::createCommandQueue(WebCLDevice* device, ExceptionState& es)
{
    return createCommandQueue(device, 0, es);
}
PassRefPtr<WebCLCommandQueue> WebCLContext::createCommandQueue(int properties, ExceptionState& es)
{
    return createCommandQueue(nullptr, properties, es);
}