Пример #1
0
bool SkCLImageDiffer::loadKernelStream(SkStream* stream, const char name[], cl_kernel* kernel) {
    // Read the kernel source into memory
    SkString sourceString;
    sourceString.resize(stream->getLength());
    size_t bytesRead = stream->read(sourceString.writable_str(), sourceString.size());
    if (bytesRead != sourceString.size()) {
        SkDebugf("Failed to read kernel source file");
        return false;
    }

    return loadKernelSource(sourceString.c_str(), name, kernel);
}
Пример #2
0
autoencoder_GPU::autoencoder_GPU():autoencoder(){

	// initialize the OpenCL environment
	gpu_init(gpu_env, 0);

	d_weight0 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize0 * nLayerSize1 * sizeof(floatType), NULL, &gpu_env.status);
	d_weight1 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize1 * nLayerSize2 * sizeof(floatType), NULL, &gpu_env.status);
	d_weight2 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize2 * nLayerSize3 * sizeof(floatType), NULL, &gpu_env.status);
	d_weight3 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize3 * nLayerSize4 * sizeof(floatType), NULL, &gpu_env.status);
	d_weight4 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize4 * nLayerSize5 * sizeof(floatType), NULL, &gpu_env.status);
	d_weight5 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize5 * nLayerSize6 * sizeof(floatType), NULL, &gpu_env.status);
	d_weight6 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize6 * nLayerSize7 * sizeof(floatType), NULL, &gpu_env.status);
	d_weight7 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize7 * nLayerSize8 * sizeof(floatType), NULL, &gpu_env.status);

	d_bias0 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize1 * sizeof(floatType), NULL, &gpu_env.status);
	d_bias1 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize2 * sizeof(floatType), NULL, &gpu_env.status);
	d_bias2 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize3 * sizeof(floatType), NULL, &gpu_env.status);
	d_bias3 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize4 * sizeof(floatType), NULL, &gpu_env.status);
	d_bias4 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize5 * sizeof(floatType), NULL, &gpu_env.status);
	d_bias5 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize6 * sizeof(floatType), NULL, &gpu_env.status);
	d_bias6 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize7 * sizeof(floatType), NULL, &gpu_env.status);
	d_bias7 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize8 * sizeof(floatType), NULL, &gpu_env.status);

	d_layer0act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize0 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer0err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize0 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer1act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize1 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer1err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize1 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer2act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize2 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer2err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize2 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer3act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize3 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer3err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize3 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer4act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize4 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer4err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize4 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer4state = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize4 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer5act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize5 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer5err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize5 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer6act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize6 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer6err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize6 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer7act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize7 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer7err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize7 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer8act = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize8 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);
	d_layer8err = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize8 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);

	d_delta_weight0 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize0 * nLayerSize1 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_weight1 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize1 * nLayerSize2 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_weight2 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize2 * nLayerSize3 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_weight3 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize3 * nLayerSize4 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_weight4 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize4 * nLayerSize5 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_weight5 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize5 * nLayerSize6 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_weight6 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize6 * nLayerSize7 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_weight7 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize7 * nLayerSize8 * sizeof(floatType), NULL, &gpu_env.status);

	d_delta_bias0 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize1 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_bias1 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize2 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_bias2 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize3 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_bias3 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize4 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_bias4 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize5 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_bias5 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize6 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_bias6 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize7 * sizeof(floatType), NULL, &gpu_env.status);
	d_delta_bias7 = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize8 * sizeof(floatType), NULL, &gpu_env.status);

	// error vector
	d_error = clCreateBuffer(gpu_env.ctx, CL_MEM_READ_WRITE, nLayerSize0 * nVectorPerBatch * sizeof(floatType), NULL, &gpu_env.status);

	// transfer data from CPU to GPU, TO DO

	// build OpenCL kernels
	char* source = new char[KERNEL_SOURCE_LENGTH];
	loadKernelSource("../src/gpu_rbm.cl", source);
	gpu_env.prog = clCreateProgramWithSource(gpu_env.ctx, 1, (const char**)&source, NULL, &gpu_env.status);

	gpu_env.status = clBuildProgram(gpu_env.prog, 0, NULL, NULL, NULL, NULL);

	if (gpu_env.status == CL_BUILD_PROGRAM_FAILURE) {
		// Determine the size of the log
		size_t log_size;
		clGetProgramBuildInfo(gpu_env.prog, gpu_env.device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);

		// Allocate memory for the log
		char *log = (char *) malloc(log_size);

		// Get the log
		clGetProgramBuildInfo(gpu_env.prog, gpu_env.device, CL_PROGRAM_BUILD_LOG, log_size, log, NULL);

		// Print the log
		printf("%s\n", log);
		
		exit(0);
	}

	squareError		= clCreateKernel(gpu_env.prog, "squareError", &gpu_env.status);
	sigmoid			= clCreateKernel(gpu_env.prog, "sigmoid", &gpu_env.status);
	addBias			= clCreateKernel(gpu_env.prog, "addBias", &gpu_env.status);
	sumBatch		= clCreateKernel(gpu_env.prog, "sumBatch", &gpu_env.status);
	add				= clCreateKernel(gpu_env.prog, "add", &gpu_env.status);
	getStates		= clCreateKernel(gpu_env.prog, "getStates", &gpu_env.status);
	updateWeights	= clCreateKernel(gpu_env.prog, "updateWeights", &gpu_env.status);
	updateBias		= clCreateKernel(gpu_env.prog, "updateBias", &gpu_env.status);
	randNum			= clCreateKernel(gpu_env.prog, "PRNG_threefry4x32", &gpu_env.status);
	randn			= clCreateKernel(gpu_env.prog, "PRNGn_threefry4x32", &gpu_env.status);
	reset			= clCreateKernel(gpu_env.prog, "reset", &gpu_env.status);
	rounding		= clCreateKernel(gpu_env.prog, "rounding", &gpu_env.status);
	subtract		= clCreateKernel(gpu_env.prog, "subtract", &gpu_env.status);
	deriv			= clCreateKernel(gpu_env.prog, "deriv", &gpu_env.status);
	updateAE		= clCreateKernel(gpu_env.prog, "update", &gpu_env.status);

	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight0, CL_TRUE, 0, nLayerSize0 * nLayerSize1 * sizeof(floatType), (void*)weight0, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight1, CL_TRUE, 0, nLayerSize1 * nLayerSize2 * sizeof(floatType), (void*)weight1, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight2, CL_TRUE, 0, nLayerSize2 * nLayerSize3 * sizeof(floatType), (void*)weight2, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight3, CL_TRUE, 0, nLayerSize3 * nLayerSize4 * sizeof(floatType), (void*)weight3, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight4, CL_TRUE, 0, nLayerSize4 * nLayerSize5 * sizeof(floatType), (void*)weight4, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight5, CL_TRUE, 0, nLayerSize5 * nLayerSize6 * sizeof(floatType), (void*)weight5, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight6, CL_TRUE, 0, nLayerSize6 * nLayerSize7 * sizeof(floatType), (void*)weight6, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_weight7, CL_TRUE, 0, nLayerSize7 * nLayerSize8 * sizeof(floatType), (void*)weight7, 0, NULL, NULL);

	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias0, CL_TRUE, 0, nLayerSize1 * sizeof(floatType), (void*)bias0, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias1, CL_TRUE, 0, nLayerSize2 * sizeof(floatType), (void*)bias1, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias2, CL_TRUE, 0, nLayerSize3 * sizeof(floatType), (void*)bias2, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias3, CL_TRUE, 0, nLayerSize4 * sizeof(floatType), (void*)bias3, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias4, CL_TRUE, 0, nLayerSize5 * sizeof(floatType), (void*)bias4, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias5, CL_TRUE, 0, nLayerSize6 * sizeof(floatType), (void*)bias5, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias6, CL_TRUE, 0, nLayerSize7 * sizeof(floatType), (void*)bias6, 0, NULL, NULL);
	gpu_env.status = clEnqueueWriteBuffer(gpu_env.queue, d_bias7, CL_TRUE, 0, nLayerSize8 * sizeof(floatType), (void*)bias7, 0, NULL, NULL);

}
Пример #3
0
int main(int argc, char ** argv)
{
    cl_float4 * x = initializePositions();

    std::string* source = loadKernelSource("src/kernels.cl");

    // Get available platforms
    std::vector<cl::Platform> platforms;
    cl::Platform::get(&platforms);

    // Select the default platform and create a context using this platform and the GPU
    cl_context_properties cps[3] = { 
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)(platforms[0])(), 
        0 
    };

    cl::Context context(CL_DEVICE_TYPE_GPU, cps);

    // Get a list of devices on this platform
    std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();

    // Create a command queue and use the first device
    cl::CommandQueue queue = cl::CommandQueue(context, devices[0]);
    cl::Program::Sources sources(1, std::make_pair(source->c_str(), source->length()+1));

    // Make program of the source code in the context
    cl::Program program = cl::Program(context, sources);

    // Build program for these specific devices
    try{
        program.build(devices);
    } catch(cl::Error error) {
        std::cout << error.what() << "(" << error.err() << ")" << std::endl;

        std::string build_log;
        build_log = program.getBuildInfo<CL_PROGRAM_BUILD_STATUS>(devices[0]);
        std::cout << "Build status: " << build_log << std::endl;

        build_log = program.getBuildInfo<CL_PROGRAM_BUILD_OPTIONS>(devices[0]);
        std::cout << "Build options: " << build_log << std::endl;

        build_log = program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(devices[0]);
        std::cout << "Build log: " << build_log << std::endl;
        exit(0);
    }

    // Create buffers for points, centers of mass, and bins
    
    cl::Buffer pointsBuffer = 
      cl::Buffer(context, CL_MEM_READ_ONLY, POINTS * sizeof(cl_float4));
    cl::Buffer cmBuffer =  cl::Buffer(context, CL_MEM_READ_WRITE, BINS * sizeof(cl_float4));
    cl::Buffer binsBuffer = 
      cl::Buffer(context, CL_MEM_READ_WRITE, POINTS * sizeof(unsigned int));
    cl::Buffer offsetsBuffer = 
      cl::Buffer(context, CL_MEM_READ_WRITE, BINS * sizeof(unsigned int));

    // Upload points to GPU and compute the centers of mass

    queue.enqueueWriteBuffer(pointsBuffer, CL_TRUE, 0, POINTS * sizeof(cl_float4), x);
    computeBins(context, queue, program, &pointsBuffer, &cmBuffer);

    // Generate bin offsets

    genOffsets(context, queue, program, &cmBuffer, &offsetsBuffer);

    // Sort the bins, and then compute all forces

    sortBins(context, queue, program, &pointsBuffer, &offsetsBuffer, &binsBuffer);
    cl_float4* a = 
      forces(context, queue, program, &pointsBuffer, &cmBuffer, &offsetsBuffer, &binsBuffer);

    for(int i = 0; i < POINTS; i++){
        printf("(%2.2f,%2.2f,%2.2f,%2.2f) (%2.3f,%2.3f,%2.3f)\n", 
                   x[i].x, x[i].y, x[i].z, x[i].w,
                   a[i].x, a[i].y, a[i].z);
    }

    free(x);
    return 0;
}