Exemplo n.º 1
0
int setupCL(const char *filename)
{
    //connect to a compute device
    err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_CPU, 1, &devices, NULL);
    //err = clGetDeviceIDs(NULL,CL_DEVICE_TYPE_GPU, 1, &devices, NULL);
    
    //get info about device
    size_t returned_size = 0;
    cl_char vendor_name[1024] = {0};
    cl_char device_name[1024] = {0};
    err = clGetDeviceInfo(devices, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
    err|= clGetDeviceInfo(devices, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
    printf("Connecting to %s %s...\n", vendor_name, device_name);
    
    //read the program
    printf("Loading program '%s'\n\n", filename);
    char *program_source = loadProgramSource(filename);
    
    //create the context and command queue
    context = clCreateContext(0, 1, &devices, NULL, NULL, &err);
    cmd_queue = clCreateCommandQueue(context, devices, 0, NULL);
    
    //create program from .cl file
    program = clCreateProgramWithSource(context,1, (const char**)&program_source, NULL, &err);
    
    //build the kernel program
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    printf("Error: %d\n", err);
    char build[2048];
    clGetProgramBuildInfo(program, devices, CL_PROGRAM_BUILD_LOG, 2048, build, NULL);
    printf("Build Log:\n%s\n",build);
    
    //create the kernel
    kernel = clCreateKernel(program, "rhs", &err);
    
    //get size of buffers
    size_t w_n_buffer_size = sizeof(float)*KSIZE*X*Y;
    size_t K_buffer_size = sizeof(float)*KSIZE*X*Y;
    size_t p_buffer_size = sizeof(float)*X*Y;
    size_t H0_buffer_size = sizeof(float)*X*Y;
    
    //create buffers
    w_n_mem = clCreateBuffer(context, CL_MEM_READ_ONLY,  w_n_buffer_size, NULL, NULL);
    p_mem   = clCreateBuffer(context, CL_MEM_READ_ONLY,  p_buffer_size,   NULL, NULL);
    H0_mem  = clCreateBuffer(context, CL_MEM_READ_ONLY,  H0_buffer_size,  NULL, NULL);
    K_mem   = clCreateBuffer(context, CL_MEM_READ_WRITE, K_buffer_size,   NULL, NULL);
    
    //set kernel arguments
    err  = clSetKernelArg(kernel,  0, sizeof(cl_mem), &w_n_mem);
    err |= clSetKernelArg(kernel,  1, sizeof(cl_mem), &K_mem);
    err |= clSetKernelArg(kernel,  2, sizeof(cl_mem), &p_mem);
    err |= clSetKernelArg(kernel,  3, sizeof(cl_mem), &H0_mem);
    err |= clSetKernelArg(kernel,  4, sizeof(int),    &Y);
    err |= clSetKernelArg(kernel,  5, sizeof(int),    &X);
    err |= clSetKernelArg(kernel,  6, sizeof(float),  &dy);
    err |= clSetKernelArg(kernel,  7, sizeof(float),  &dx);
    err |= clSetKernelArg(kernel,  8, sizeof(float),  &alpha);
    
    return CL_SUCCESS;
}
Exemplo n.º 2
0
/*------------------------------------------------------------------------------------------------------------
 * 指定したソースコードファイルからプログラムオブジェクとを作成する
 * filename:カーネルのソースファイル
 */
void ClHelper::preloadProgram(const char* filename)
{
    cl_int status;
    char *source = 0;
    
    // ファイルからプログラムを読み込む
    try {
        source = loadProgramSource(filename);
    } catch (MyError err) {
        fprintf(stderr, "Error: %s\n", err.cstr());
        throw MyError("failed to load compute program from file.", __FUNCTION__);
    }
    
    // プログラムオブジェクトを作成する
    mProgram = clCreateProgramWithSource(mContext,
                                         1,
                                         (const char **)&source,
                                         NULL,
                                         &status);
    if (mProgram ==(cl_program)0) {
        printError(status);
        delete [] source;
        throw MyError("failed to create program object", __FUNCTION__);
    }
    
    // プログラムをビルドする
    cl_device_id devices[1];
    devices[0] = mDevaiceId;
    
    status = clBuildProgram(mProgram, 1, devices, NULL, NULL, NULL);
    if (status != CL_SUCCESS) {
        printError(status);
        showBuildingLog(mProgram, devices[0]);
        delete [] source;
        throw MyError("failed to build program object.", __FUNCTION__);
    }
    
    delete [] source;
}
Exemplo n.º 3
0
int main()
{
    cl_platform_id platform_id = NULL;
    cl_device_id device_id = NULL;
    cl_context context = NULL;
    cl_command_queue command_queue = NULL;
    cl_mem objA = NULL;
    cl_mem objB = NULL;
    cl_mem objC = NULL;
    cl_program program = NULL;
    cl_kernel kernel = NULL;
    cl_uint ret_num_devices;
    cl_uint ret_num_platforms;
    cl_int ret;
 
    cl_event event1;
 
    int i, j;
    float *A;
    float *B;
    float *C;
 
    A = (float *)malloc(4*4*sizeof(float));
    B = (float *)malloc(4*4*sizeof(float));
    C = (float *)malloc(4*4*sizeof(float));
 
    /* Initialize input data */
    for (i=0; i<4; i++) {
        for (j=0; j<4; j++) {
            A[i*4+j] = i*4+j+1;
            B[i*4+j] = j*4+i+1;
        }
    }
 
    /* Get Platform/Device Information*/
    ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
    ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
 
    /* Create OpenCL Context */
    context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
 
    /* Create command queue */
    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
 
    /* Create Buffer Object */
    objA = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret);
    objB = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret);
    objC = clCreateBuffer(context, CL_MEM_READ_WRITE, 4*4*sizeof(float), NULL, &ret);
 
    /*
     * Creating an user event
     * As a user event is created, its execution status is set to be CL_SUBMITTED
     * and we tag the event to a callback so when event reaches CL_COMPLETE, it will 
     * execute postProcess
     */ 
    event1 = clCreateUserEvent(context, &ret);
    clSetEventCallback(event1, CL_COMPLETE, &postProcess, "Looks like its done.");

    /* Copy input data to the memory buffer */
 
    ret = clEnqueueWriteBuffer(command_queue, objA, CL_TRUE, 0, 4*4*sizeof(float), A, 0, NULL, NULL );
    printf("A has been written\n");
 
    /* The next command will wait for event1 according to its status*/
    ret = clEnqueueWriteBuffer(command_queue, objB, CL_TRUE, 0, 4*4*sizeof(float), B, 1, &event1, NULL);
    printf("B has been written\n");

    /* Tell event1 to complete */
    clSetUserEventStatus(event1, CL_COMPLETE);
	const char *file_names[] = {"sample_kernel.cl"}; 
	const int NUMBER_OF_FILES = 1;
	char* buffer[NUMBER_OF_FILES];
	size_t sizes[NUMBER_OF_FILES];
	loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);
	
    /* Create kernel program from source file*/
    program = clCreateProgramWithSource(context, 1, (const char **)buffer, sizes, &ret);
    ret     = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
 
    /* Create data parallel OpenCL kernel */
    kernel = clCreateKernel(program, "sample", &ret);
 
    /* Set OpenCL kernel arguments */
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&objA);
    ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&objB);
    ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&objC);
 
    size_t global_item_size = 4;
    size_t local_item_size = 1;
 
    /* Execute OpenCL kernel as data parallel */
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, 
                                 &global_item_size, &local_item_size, 0, NULL, NULL);
 
    /* Transfer result to host */
    ret = clEnqueueReadBuffer(command_queue, objC, CL_TRUE, 0, 4*4*sizeof(float), C, 0, NULL, NULL);
 
    /* Display Results */
    for (i=0; i<4; i++) {
        for (j=0; j<4; j++) {
            printf("%7.2f ", C[i*4+j]);
        }
        printf("\n");
    }
 
 
    /* Finalization */
    ret = clFlush(command_queue);
    ret = clFinish(command_queue);
    ret = clReleaseKernel(kernel);
    ret = clReleaseProgram(program);
    ret = clReleaseMemObject(objA);
    ret = clReleaseMemObject(objB);
    ret = clReleaseMemObject(objC);
    ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);
 
    free(A);
    free(B);
    free(C);
 
    return 0;
}
Exemplo n.º 4
0
int main(int argc, char** argv) {

   /* OpenCL 1.1 data structures */
   cl_platform_id* platforms;
   cl_program program;
   cl_device_id device;
   cl_context context;

   /* OpenCL 1.1 scalar data types */
   cl_uint numOfPlatforms;
   cl_int  error;

   /* 
      Get the number of platforms 
      Remember that for each vendor's SDK installed on the computer,
      the number of available platform also increased. 
    */
   error = clGetPlatformIDs(0, NULL, &numOfPlatforms);
   if(error != CL_SUCCESS) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }

   platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms);
   printf("Number of OpenCL platforms found: %d\n", numOfPlatforms);

   error = clGetPlatformIDs(numOfPlatforms, platforms, NULL);
   if(error != CL_SUCCESS) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }
   // Search for a CPU/GPU device through the installed platforms
   // Build a OpenCL program and do not run it.
   for(cl_uint i = 0; i < numOfPlatforms; i++ ) {
       // Get the GPU device
       error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 1, &device, NULL);
       if(error != CL_SUCCESS) {
          // Otherwise, get the CPU
          error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
       }
        if(error != CL_SUCCESS) {
            perror("Can't locate any OpenCL compliant device");
            exit(1);
        }
        /* Create a context */
        context = clCreateContext(NULL, 1, &device, NULL, NULL, &error);
        if(error != CL_SUCCESS) {
            perror("Can't create a valid OpenCL context");
            exit(1);
        }

        /* Load the two source files into temporary datastores */
        const char *file_names[] = {"simple.cl", "simple_2.cl"};
        const int NUMBER_OF_FILES = 2;
        char* buffer[NUMBER_OF_FILES];
        size_t sizes[NUMBER_OF_FILES];
        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);

        /* Create the OpenCL program object */
        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);				
	    if(error != CL_SUCCESS) {
	      perror("Can't create the OpenCL program object");
	      exit(1);   
	    }
        /* Build OpenCL program object and dump the error message, if any */
        char *program_log;
        const char options[] = "-cl-finite-math-only -cl-no-signed-zeros";  
        size_t log_size;
        //error = clBuildProgram(program, 1, &device, argv[1], NULL, NULL);		
        // Uncomment the line below, comment the line above; re-build the program to use build options statically
        error = clBuildProgram(program, 1, &device, options, NULL, NULL);		
	    if(error != CL_SUCCESS) {
	      // If there's an error whilst building the program, dump the log
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
	      program_log = (char*) malloc(log_size+1);
	      program_log[log_size] = '\0';
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
	            log_size+1, program_log, NULL);
	      printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
	      free(program_log);
	      exit(1);
	    }
  
        /* Query the program as to how many kernels were detected */
        cl_uint numOfKernels;
        error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
        if (error != CL_SUCCESS) {
            perror("Unable to retrieve kernel count from program");
            exit(1);
        }
        cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
        error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);
        for(cl_uint i = 0; i < numOfKernels; i++) {
            char kernelName[32];
            cl_uint argCnt;
            clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
            clGetKernelInfo(kernels[i], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
            printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
        }

        /* Clean up */
        for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); }
        for(i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
        clReleaseProgram(program);
        clReleaseContext(context);
   }
}
int main(int argc, char** argv) {
    /* OpenCL 1.1 data structures */
    cl_platform_id* platforms;
    cl_program program;
    cl_device_id device;
    cl_context context;
    cl_command_queue queue;
    cl_uint numOfPlatforms;
    cl_int  error;

    cl_mem matrixAMemObj; // input matrix A mem buffer
    cl_mem matrixBMemObj; // input matrix B mem buffer
    cl_mem matrixCMemObj; // input matrix C mem buffer
    cl_int* matrixA;      // input matrix A
    cl_int* matrixB;      // input matrix B
    cl_int* matrixC;      // input matrix C
    cl_uint widthA = WIDTH_G;
    cl_uint heightA = HEIGHT_G;
    cl_uint widthB = WIDTH_G;
    cl_uint heightB = HEIGHT_G;

	{
	    // allocate memory for input and output matrices 
        // based on whatever matrix theory i know.
	    matrixA = (cl_int*)malloc(widthA * heightA * sizeof(cl_int));
	    matrixB = (cl_int*)malloc(widthB * heightB * sizeof(cl_int));
	    matrixC = (cl_int*)malloc(widthB * heightA * sizeof(cl_int));

	    memset(matrixA, 0, widthA * heightA * sizeof(cl_int));
	    memset(matrixB, 0, widthB * heightB * sizeof(cl_int));
	    memset(matrixC, 0, widthB * heightA * sizeof(cl_int));
        
        fillRandom(matrixA, widthA, heightA, 643);
        fillRandom(matrixB, widthB, heightB, 991);
    }

    /*
     Get the number of platforms
     Remember that for each vendor's SDK installed on the computer,
     the number of available platform also increased.
     */
    error = clGetPlatformIDs(0, NULL, &numOfPlatforms);
    if(error != CL_SUCCESS) {
        perror("Unable to find any OpenCL platforms");
        exit(1);
    }
    
    platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms);
    printf("Number of OpenCL platforms found: %d\n", numOfPlatforms);
    
    error = clGetPlatformIDs(numOfPlatforms, platforms, NULL);
    if(error != CL_SUCCESS) {
        perror("Unable to find any OpenCL platforms");
        exit(1);
    }
    // Search for a GPU device through the installed platforms
    // Build a OpenCL program and do not run it.
    for(cl_int i = 0; i < numOfPlatforms; i++ ) {
        // Get the GPU device
        error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 1, &device, NULL);

        if(error != CL_SUCCESS) {
            perror("Can't locate a OpenCL compliant device i.e. GPU");
            exit(1);
        }
        /* Create a context */
        context = clCreateContext(NULL, 1, &device, NULL, NULL, &error);
        if(error != CL_SUCCESS) {
            perror("Can't create a valid OpenCL context");
            exit(1);
        }
        
        /* Load the two source files into temporary datastores */
        const char *file_names[] = {"simple_mm_mult.cl"};
        const int NUMBER_OF_FILES = 1;
        char* buffer[NUMBER_OF_FILES];
        size_t sizes[NUMBER_OF_FILES];
        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);
        
        /* Create the OpenCL program object */
        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);
	    if(error != CL_SUCCESS) {
            perror("Can't create the OpenCL program object");
            exit(1);
	    }
        /* Build OpenCL program object and dump the error message, if any */
        char *program_log;
        const char options[] = "";
        size_t log_size;

        error = clBuildProgram(program, 1, &device, options, NULL, NULL);
	    if(error != CL_SUCCESS) {
            // If there's an error whilst building the program, dump the log
            clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
            program_log = (char*) malloc(log_size+1);
            program_log[log_size] = '\0';
            clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,
                                  log_size+1, program_log, NULL);
            printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
            free(program_log);
            exit(1);
	    }
       
        // Queue is created with profiling enabled 
        cl_command_queue_properties props;
        props |= CL_QUEUE_PROFILING_ENABLE;

        queue = clCreateCommandQueue(context, device, props, &error);

        cl_kernel kernel = clCreateKernel(program, "mmmult", &error);

        matrixAMemObj = clCreateBuffer(context,
                                       CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
                                       widthA * heightA * sizeof(cl_int),
                                       matrixA,
                                       &error);

        matrixBMemObj = clCreateBuffer(context,
                                       CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR,
                                       widthB * heightB * sizeof(cl_int),
                                       matrixB,
                                       &error);

        matrixCMemObj = clCreateBuffer(context,
                                       CL_MEM_WRITE_ONLY|CL_MEM_ALLOC_HOST_PTR,
                                       widthB * heightA * sizeof(cl_int),
                                       0,
                                       &error);

        clSetKernelArg(kernel, 0, sizeof(cl_int),(void*)&widthB);
        clSetKernelArg(kernel, 1, sizeof(cl_int),(void*)&heightA);
        clSetKernelArg(kernel, 2, sizeof(cl_mem),(void*)&matrixAMemObj);
        clSetKernelArg(kernel, 3, sizeof(cl_mem),(void*)&matrixBMemObj);
        clSetKernelArg(kernel, 4, sizeof(cl_mem),(void*)&matrixCMemObj);
         
        size_t globalThreads[] = {widthB, heightA};

		cl_event exeEvt; 
        cl_ulong executionStart, executionEnd;
		error = clEnqueueNDRangeKernel(queue,
		                               kernel,
		                               2,
		                               NULL,
                                       globalThreads,
                                       NULL, 
                                       0,
                                       NULL,
                                       &exeEvt);
		clWaitForEvents(1, &exeEvt);
		if(error != CL_SUCCESS) {
			printf("Kernel execution failure!\n");
			exit(-22);
		}	

        // let's understand how long it took?
        clGetEventProfilingInfo(exeEvt, CL_PROFILING_COMMAND_START, sizeof(executionStart), &executionStart, NULL);
        clGetEventProfilingInfo(exeEvt, CL_PROFILING_COMMAND_END, sizeof(executionEnd), &executionEnd, NULL);
        clReleaseEvent(exeEvt);

//        printf("Execution the matrix-matrix multiplication took %lu.%lu s\n", (executionEnd - executionStart)/1000000000, (executionEnd - executionStart)%1000000000);
        printf("Execution the matrix-matrix multiplication took %lu s\n", (executionEnd - executionStart));
        clEnqueueReadBuffer(queue,
                            matrixCMemObj,
                            CL_TRUE,
                            0,
                            widthB * heightA * sizeof(cl_int),
                            matrixC,
                            0,
                            NULL,
                            NULL);
       
        if (compare(matrixC, matrixA, matrixB, heightA, widthA, widthB))
            printf("Passed!\n");
        else 
            printf("Failed!\n");
 
        /* Clean up */
        for(i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
        clReleaseProgram(program);
        clReleaseContext(context);
        clReleaseMemObject(matrixAMemObj);
        clReleaseMemObject(matrixBMemObj);
        clReleaseMemObject(matrixCMemObj);
    }
    
    free(matrixA);
    free(matrixB);
    free(matrixC);
}
Exemplo n.º 6
0
int main(int argc, char** argv) {

   /* OpenCL 1.1 data structures */
   cl_platform_id* platforms;
   cl_program program;
   cl_device_id device;
   cl_context context;

   /* OpenCL 1.1 scalar data types */
   cl_uint numOfPlatforms;
   cl_int  error;

   /*
    Prepare an array of UserData via dynamic memory allocation
   */
   UserData* ud_in = (UserData*) malloc( sizeof(UserData) * DATA_SIZE); // input to device
   UserData* ud_out = (UserData*) malloc( sizeof(UserData) * DATA_SIZE); // output from device
   for( int i = 0; i < DATA_SIZE; ++i) {
      (ud_in + i)->x = i;
      (ud_in + i)->y = i;
      (ud_in + i)->z = i;
      (ud_in + i)->w = 3 * i;
   }
   /* 
      Get the number of platforms 
      Remember that for each vendor's SDK installed on the computer,
      the number of available platform also increased. 
    */
   error = clGetPlatformIDs(0, NULL, &numOfPlatforms);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }

   platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms);
   printf("Number of OpenCL platforms found: %d\n", numOfPlatforms);

   error = clGetPlatformIDs(numOfPlatforms, platforms, NULL);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }
   // Search for a CPU/GPU device through the installed platforms
   // Build a OpenCL program and do not run it.
   for(cl_uint i = 0; i < numOfPlatforms; i++ ) {
       // Get the GPU device
       error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 1, &device, NULL);
       if(error != CL_SUCCESS) {
          // Otherwise, get the CPU
          error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_CPU, 1, &device, NULL);
       }
        if(error != CL_SUCCESS) {
            perror("Can't locate any OpenCL compliant device");
            exit(1);
        }
        /* Create a context */
        context = clCreateContext(NULL, 1, &device, NULL, NULL, &error);
        if(error != CL_SUCCESS) {
            perror("Can't create a valid OpenCL context");
            exit(1);
        }

        /* Load the two source files into temporary datastores */
        const char *file_names[] = {"user_test.cl"}; 
        const int NUMBER_OF_FILES = 1;
        char* buffer[NUMBER_OF_FILES];
        size_t sizes[NUMBER_OF_FILES];
        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);

        /* Create the OpenCL program object */
        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);				
	    if(error != CL_SUCCESS) {
	      perror("Can't create the OpenCL program object");
	      exit(1);   
	    }
        /* Build OpenCL program object and dump the error message, if any */
        char *program_log;
        size_t log_size;
        error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);		
	    if(error != CL_SUCCESS) {
	      // If there's an error whilst building the program, dump the log
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
	      program_log = (char*) malloc(log_size+1);
	      program_log[log_size] = '\0';
	      clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 
	            log_size+1, program_log, NULL);
	      printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
	      free(program_log);
	      exit(1);
	    }
  
        /* Query the program as to how many kernels were detected */
        cl_uint numOfKernels;
        error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
        if (error != CL_SUCCESS) {
            perror("Unable to retrieve kernel count from program");
            exit(1);
        }
        cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
        error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);
        for(cl_uint i = 0; i < numOfKernels; i++) {
            char kernelName[32];
            cl_uint argCnt;
            clGetKernelInfo(kernels[i], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
            clGetKernelInfo(kernels[i], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
            printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
            printf("About to create command queue and enqueue this kernel...\n");

            /* Create a command queue */
            cl_command_queue cQ = clCreateCommandQueue(context, device, 0, &error);
            if (error != CL_SUCCESS) { 
                perror("Unable to create command-queue");
                exit(1);
            }

            /* Create a OpenCL buffer object */
            cl_mem UDObj = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, 
                                           sizeof(UserData) * DATA_SIZE, ud_in, &error);
            if (error != CL_SUCCESS) { 
                perror("Unable to create buffer object");
                exit(1);
            }

            /* Let OpenCL know that the kernel is suppose to receive an argument */
            error = clSetKernelArg(kernels[i], 0, sizeof(cl_mem), &UDObj);
            if (error != CL_SUCCESS) { 
                perror("Unable to set buffer object as kernel argument");
                exit(1);
            }

            /* Enqueue the kernel to the command queue */
            error = clEnqueueTask(cQ, kernels[i], 0, NULL, NULL);
            if (error != CL_SUCCESS) { 
                perror("Unable to enqueue task to command-queue");
                exit(1);
            }
            printf("Task has been enqueued successfully!\n");

            /* Enqueue the read-back from device to host */
            error = clEnqueueReadBuffer(cQ, UDObj,
                                         CL_TRUE,                    // blocking read
                                         0,                          // write from the start
                                         sizeof(UserData) * DATA_SIZE, // how much to copy
                                         ud_out, 0, NULL, NULL);
            if ( valuesOK(ud_in, ud_out) ) {
                printf("Check passed!\n");
            } else printf("Check failed!\n");

            /* Release the command queue */
            clReleaseCommandQueue(cQ);
            clReleaseMemObject(UDObj);
        }

        /* Clean up */
        
        for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); }
        for(i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
        clReleaseProgram(program);
        clReleaseContext(context);
   }

   free(ud_in);
   free(ud_out);
}
Exemplo n.º 7
0
int main(int argc, char** argv) {

   /* OpenCL 1.1 data structures */
   cl_platform_id* platforms;
   cl_program program;
   cl_context context;

   /* OpenCL 1.1 scalar data types */
   cl_uint numOfPlatforms;
   cl_int  error;

   cl_float16* ud_in = (cl_float16*) malloc( sizeof(cl_float16) * DATA_SIZE); // input to device
   cl_float16* ud_out = (cl_float16*) malloc( sizeof(cl_float16) * DATA_SIZE); // output from device
   for( int i = 0; i < DATA_SIZE; ++i) {
       ud_in[i] = (cl_float16){ (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i, (float)i,(float)i,(float)i,(float)i };
   }

   /* 
      Get the number of platforms 
      Remember that for each vendor's SDK installed on the computer,
      the number of available platform also increased. 
    */
   error = clGetPlatformIDs(0, NULL, &numOfPlatforms);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }

   platforms = (cl_platform_id*) alloca(sizeof(cl_platform_id) * numOfPlatforms);
   printf("Number of OpenCL platforms found: %d\n", numOfPlatforms);

   error = clGetPlatformIDs(numOfPlatforms, platforms, NULL);
   if(error != CL_SUCCESS ) {			
      perror("Unable to find any OpenCL platforms");
      exit(1);
   }
   // Search for a CPU/GPU device through the installed platforms
   // Build a OpenCL program and do not run it.
   for(cl_uint i = 0; i < numOfPlatforms; i++ ) {

        cl_uint numOfDevices = 0;

        /* Determine how many devices are connected to your platform */
        error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &numOfDevices);
        if (error != CL_SUCCESS ) { 
            perror("Unable to obtain any OpenCL compliant device info");
            exit(1);
        }
        cl_device_id* devices = (cl_device_id*) alloca(sizeof(cl_device_id) * numOfDevices);

        /* Load the information about your devices into the variable 'devices' */
        error = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, numOfDevices, devices, NULL);
        if (error != CL_SUCCESS ) { 
            perror("Unable to obtain any OpenCL compliant device info");
            exit(1);
        }
        printf("Number of detected OpenCL devices: %d\n", numOfDevices);

	    /* Create a context */
        cl_context_properties ctx[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platforms[i], 0 };
	    context = clCreateContext(ctx, numOfDevices, devices, NULL, NULL, &error);
	    if(error != CL_SUCCESS) {
	        perror("Can't create a valid OpenCL context");
	        exit(1);
	    }

	    /* For each device, create a buffer and partition that data among the devices for compute! */
	    cl_mem inobj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
	                                  sizeof(cl_float16) * DATA_SIZE, ud_in, &error);
	    if(error != CL_SUCCESS) {
	        perror("Can't create a buffer");
	        exit(1);
	    }

        int offset = 0; 
        for(int i = 0; i < numOfDevices; ++i, ++offset ) {
	        /* Load the two source files into temporary datastores */
	        const char *file_names[] = {"vector_load.cl"}; 
	        const int NUMBER_OF_FILES = 1;
	        char* buffer[NUMBER_OF_FILES];
	        size_t sizes[NUMBER_OF_FILES];
	        loadProgramSource(file_names, NUMBER_OF_FILES, buffer, sizes);
	
	        /* Create the OpenCL program object */
	        program = clCreateProgramWithSource(context, NUMBER_OF_FILES, (const char**)buffer, sizes, &error);				
		    if(error != CL_SUCCESS) {
		      perror("Can't create the OpenCL program object");
		      exit(1);   
		    }

	        /* Build OpenCL program object and dump the error message, if any */
	        char *program_log;
	        size_t log_size;
	        error = clBuildProgram(program, 1, &devices[i], NULL, NULL, NULL);		
		    if(error != CL_SUCCESS) {
		      // If there's an error whilst building the program, dump the log
		      clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
		      program_log = (char*) malloc(log_size+1);
		      program_log[log_size] = '\0';
		      clGetProgramBuildInfo(program, devices[i], CL_PROGRAM_BUILD_LOG, 
		            log_size+1, program_log, NULL);
		      printf("\n=== ERROR ===\n\n%s\n=============\n", program_log);
		      free(program_log);
		      exit(1);
		    }
	  
	        /* Query the program as to how many kernels were detected */
	        cl_uint numOfKernels;
	        error = clCreateKernelsInProgram(program, 0, NULL, &numOfKernels);
	        if (error != CL_SUCCESS) {
	            perror("Unable to retrieve kernel count from program");
	            exit(1);
	        }
	        cl_kernel* kernels = (cl_kernel*) alloca(sizeof(cl_kernel) * numOfKernels);
	        error = clCreateKernelsInProgram(program, numOfKernels, kernels, NULL);

            /* Loop thru each kernel and execute on device */
	        for(cl_uint j = 0; j < numOfKernels; j++) {
	            char kernelName[32];
	            cl_uint argCnt;
	            clGetKernelInfo(kernels[j], CL_KERNEL_FUNCTION_NAME, sizeof(kernelName), kernelName, NULL);
	            clGetKernelInfo(kernels[j], CL_KERNEL_NUM_ARGS, sizeof(argCnt), &argCnt, NULL);
	            printf("Kernel name: %s with arity: %d\n", kernelName, argCnt);
	            printf("About to create command queue and enqueue this kernel...\n");
	
	            /* Create a command queue */
	            cl_command_queue cQ = clCreateCommandQueue(context, devices[i], 0, &error);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to create command-queue");
	                exit(1);
	            }
	
                /* Create a buffer and copy the data from the main buffer */
	            cl_mem outobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
	                                           sizeof(cl_float16) * DATA_SIZE, 0, &error);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to create sub-buffer object");
	                exit(1);
	            }

	            /* Let OpenCL know that the kernel is suppose to receive an argument */
	            error = clSetKernelArg(kernels[j], 0, sizeof(cl_mem), &inobj);
	            error = clSetKernelArg(kernels[j], 1, sizeof(cl_mem), &outobj);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to set buffer object in kernel");
	                exit(1);
	            }
	
	            /* Enqueue the kernel to the command queue */
                size_t threadsPerGroup[] = {4}; 
                size_t numOfGroups[] = { DATA_SIZE / threadsPerGroup[0] };
                error = clEnqueueNDRangeKernel(cQ,
                                               kernels[j],
                                               1,
                                               0,
                                               numOfGroups,
                                               threadsPerGroup,0, NULL, NULL);
	            if (error != CL_SUCCESS) { 
	                perror("Unable to enqueue task to command-queue");
	                exit(1);
	            }
	            printf("Task has been enqueued successfully!\n");

	            /* Enqueue the read-back from device to host */
	            error = clEnqueueReadBuffer(cQ, outobj,
	                                        CL_TRUE,               // blocking read
	                                        0,                      // read from the start
	                                        sizeof(cl_float16)*DATA_SIZE,          // how much to copy
	                                        ud_out, 0, NULL, NULL);
                /* Check the returned data */
	            if ( valuesOK(ud_in, ud_out, DATA_SIZE) ) {
	                printf("Check passed!\n");
	            } else printf("Check failed!\n");
	
	            /* Release the command queue */
	            clReleaseCommandQueue(cQ);
	            clReleaseMemObject(outobj);
	        } 

        /* Clean up */
        
        for(cl_uint i = 0; i < numOfKernels; i++) { clReleaseKernel(kernels[i]); }
        for(int i=0; i< NUMBER_OF_FILES; i++) { free(buffer[i]); }
        clReleaseProgram(program);
    }// end of device loop and execution
    
	    clReleaseMemObject(inobj);
        clReleaseContext(context);
   }// end of platform loop

   free(ud_in);
   free(ud_out);
}