Пример #1
0
static int
hwloc_opencl_discover(struct hwloc_backend *backend)
{
  struct hwloc_topology *topology = backend->topology;
  cl_platform_id *platform_ids = NULL;
  cl_uint nr_platforms;
  cl_int clret;
  unsigned j, res = 0;

  if (!(hwloc_topology_get_flags(topology) & (HWLOC_TOPOLOGY_FLAG_IO_DEVICES|HWLOC_TOPOLOGY_FLAG_WHOLE_IO)))
    return 0;

  if (!hwloc_topology_is_thissystem(topology)) {
    hwloc_debug("%s", "\nno OpenCL detection (not thissystem)\n");
    return 0;
  }

  clret = clGetPlatformIDs(0, NULL, &nr_platforms);
  if (CL_SUCCESS != clret || !nr_platforms)
    return 0;
  hwloc_debug("%u OpenCL platforms\n", nr_platforms);
  platform_ids = malloc(nr_platforms * sizeof(*platform_ids));
  if (!platform_ids)
    return 0;
  clret = clGetPlatformIDs(nr_platforms, platform_ids, &nr_platforms);
  if (CL_SUCCESS != clret || !nr_platforms) {
    free(platform_ids);
    return 0;
  }

  for(j=0; j<nr_platforms; j++) {
    cl_device_id *device_ids = NULL;
    cl_uint nr_devices;
    unsigned i;

    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, 0, NULL, &nr_devices);
    if (CL_SUCCESS != clret)
      continue;
    device_ids = malloc(nr_devices * sizeof(*device_ids));
    clret = clGetDeviceIDs(platform_ids[j], CL_DEVICE_TYPE_ALL, nr_devices, device_ids, &nr_devices);
    if (CL_SUCCESS != clret) {
      free(device_ids);
      continue;
    }

    for(i=0; i<nr_devices; i++) {
      cl_platform_id platform_id = 0;
      cl_device_type type;
#ifdef CL_DEVICE_TOPOLOGY_AMD
      cl_device_topology_amd amdtopo;
#endif
      cl_ulong globalmemsize;
      cl_uint computeunits;
      hwloc_obj_t osdev, parent;
      char buffer[64];

      hwloc_debug("This is opencl%dd%d\n", j, i);

#ifdef CL_DEVICE_TOPOLOGY_AMD
      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
      if (CL_SUCCESS != clret) {
	hwloc_debug("no AMD-specific device information: %d\n", clret);
	continue;
      } else if (CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD != amdtopo.raw.type) {
	hwloc_debug("AMD-specific device topology reports non-PCIe device type: %u\n", amdtopo.raw.type);
	continue;
      }
#else
      continue;
#endif

      osdev = hwloc_alloc_setup_object(HWLOC_OBJ_OS_DEVICE, -1);
      snprintf(buffer, sizeof(buffer), "opencl%dd%d", j, i);
      osdev->name = strdup(buffer);
      osdev->depth = (unsigned) HWLOC_TYPE_DEPTH_UNKNOWN;
      osdev->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;

      hwloc_obj_add_info(osdev, "CoProcType", "OpenCL");
      hwloc_obj_add_info(osdev, "Backend", "OpenCL");

      clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(type), &type, NULL);
      if (type == CL_DEVICE_TYPE_GPU)
	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "GPU");
      else if (type == CL_DEVICE_TYPE_ACCELERATOR)
	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Accelerator");
      else if (type == CL_DEVICE_TYPE_CPU)
	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "CPU");
      else if (type == CL_DEVICE_TYPE_CUSTOM)
	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Custom");
      else
	hwloc_obj_add_info(osdev, "OpenCLDeviceType", "Unknown");

      buffer[0] = '\0';
      clGetDeviceInfo(device_ids[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL);
      if (buffer[0] != '\0')
	hwloc_obj_add_info(osdev, "GPUVendor", buffer);

      buffer[0] = '\0';
#ifdef CL_DEVICE_BOARD_NAME_AMD
      clGetDeviceInfo(device_ids[i], CL_DEVICE_BOARD_NAME_AMD, sizeof(buffer), buffer, NULL);
#else
      clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
#endif
      if (buffer[0] != '\0')
	hwloc_obj_add_info(osdev, "GPUModel", buffer);

      snprintf(buffer, sizeof(buffer), "%u", j);
      hwloc_obj_add_info(osdev, "OpenCLPlatformIndex", buffer);

      buffer[0] = '\0';
      clret = clGetDeviceInfo(device_ids[i], CL_DEVICE_PLATFORM, sizeof(platform_id), &platform_id, NULL);
      if (CL_SUCCESS == clret) {
	clGetPlatformInfo(platform_id, CL_PLATFORM_NAME, sizeof(buffer), buffer, NULL);
	if (buffer[0] != '\0')
	  hwloc_obj_add_info(osdev, "OpenCLPlatformName", buffer);
      }

      snprintf(buffer, sizeof(buffer), "%u", i);
      hwloc_obj_add_info(osdev, "OpenCLPlatformDeviceIndex", buffer);

      clGetDeviceInfo(device_ids[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(computeunits), &computeunits, NULL);
      snprintf(buffer, sizeof(buffer), "%u", computeunits);
      hwloc_obj_add_info(osdev, "OpenCLComputeUnits", buffer);

      clGetDeviceInfo(device_ids[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(globalmemsize), &globalmemsize, NULL);
      snprintf(buffer, sizeof(buffer), "%llu", (unsigned long long) globalmemsize / 1024);
      hwloc_obj_add_info(osdev, "OpenCLGlobalMemorySize", buffer);

      parent = NULL;
#ifdef CL_DEVICE_TOPOLOGY_AMD
      parent = hwloc_pci_belowroot_find_by_busid(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
      if (!parent)
	parent = hwloc_pci_find_busid_parent(topology, 0, amdtopo.pcie.bus, amdtopo.pcie.device, amdtopo.pcie.function);
#endif
      if (!parent)
	parent = hwloc_get_root_obj(topology);

      hwloc_insert_object_by_parent(topology, parent, osdev);
      res++;
    }
    free(device_ids);
  }
  free(platform_ids);
  return res;
}
Пример #2
0
//------------------------------------------------------------------------------
void print_platforms() {
    cl_uint numPlatforms = 0;
    cl_platform_id platform = 0;
    cl_int status = clGetPlatformIDs(0, 0, &numPlatforms);
    if(status != CL_SUCCESS) {
        std::cerr << "ERROR - clGetPlatformIDs()" << std::endl;
        exit(EXIT_FAILURE);
    }
    if(numPlatforms < 1) {
        std::cout << "No OpenCL platform detected" << std::endl;
        exit(EXIT_SUCCESS);
    }
    typedef std::vector< cl_platform_id > PlatformIds;
    PlatformIds platforms(numPlatforms);
    status = clGetPlatformIDs(platforms.size(), &platforms[0], 0);
    if(status != CL_SUCCESS) {
        std::cerr << "ERROR - clGetPlatformIDs()" << std::endl;
        exit(EXIT_FAILURE);
    }
    std::vector< char > buf(0x10000, char(0));
    int p = 0;
    std::cout << "\n***************************************************\n";  
    std::cout << "Number of platforms: " << platforms.size() << std::endl;
    for(PlatformIds::const_iterator i = platforms.begin();
        i != platforms.end(); ++i, ++p) {
        
        std::cout << "\n-----------\n"; 
        std::cout << "Platform " << p << std::endl;
        std::cout << "-----------\n";  
        status = ::clGetPlatformInfo(*i, CL_PLATFORM_VENDOR,
                                     buf.size(), &buf[ 0 ], 0 );
        if(status != CL_SUCCESS) {
            std::cerr << "ERROR - clGetPlatformInfo(): " << std::endl;
            exit(EXIT_FAILURE);    
        }
        std::cout << "Vendor: " << &buf[ 0 ] << '\n'; 
        status = ::clGetPlatformInfo(*i, CL_PLATFORM_PROFILE,
                                     buf.size(), &buf[ 0 ], 0 );
        if(status != CL_SUCCESS) {
            std::cerr << "ERROR - clGetPlatformInfo(): " << std::endl;
            exit(EXIT_FAILURE);
        }
        std::cout << "Profile: " << &buf[ 0 ] << '\n'; 
        status = ::clGetPlatformInfo(*i, CL_PLATFORM_VERSION,
                                     buf.size(), &buf[ 0 ], 0 );
        if(status != CL_SUCCESS) {
            std::cerr << "ERROR - clGetPlatformInfo(): " << std::endl;
            exit(EXIT_FAILURE);
        }
        std::cout << "Version: " << &buf[ 0 ] << '\n';     
        status = ::clGetPlatformInfo(*i, CL_PLATFORM_NAME,
                                     buf.size(), &buf[ 0 ], 0 );
        if(status != CL_SUCCESS) {
            std::cerr << "ERROR - clGetPlatformInfo(): " << std::endl;
            exit(EXIT_FAILURE);
        }
        std::cout << "Name: " << &buf[ 0 ] << '\n';  
        status = ::clGetPlatformInfo(*i, CL_PLATFORM_EXTENSIONS,
                                     buf.size(), &buf[ 0 ], 0 );
        if(status != CL_SUCCESS) {
            std::cerr << "ERROR - clGetPlatformInfo(): " << std::endl;
            exit(EXIT_FAILURE);
        }
        std::cout << "Extensions: " << &buf[ 0 ] << '\n';
        print_devices(*i);
        std::cout << "\n===================================================\n"; 

    }
}
Пример #3
0
int main()
{
	int i,j,k;
	// nb of operations:
	const int dsize = 512;
	int nthreads = 1;
	int nbOfAverages = 1e4;
	int opsMAC = 2; // operations per MAC
	cl_float4 *in, *out;
	cl_float *ck;
	double tops; //total ops

#define NQUEUES 1
	cl_int err;
	cl_platform_id platform = 0;
	cl_device_id device = 0;
	cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
	cl_context ctx = 0;
	cl_command_queue queues[NQUEUES];
	cl_mem bufin, bufck, bufout;
	cl_event event = NULL;
	cl_program program;
	cl_kernel kernel;
	size_t global[2], local[2];
	size_t param[5];
	char version[300];
  
	// allocate matrices
	
	in = (cl_float4 *) calloc(dsize*dsize, sizeof(*in));
	out = (cl_float4 *) calloc(dsize*dsize, sizeof(*out));
	ck = (cl_float *) calloc(9*9, sizeof(*ck));
	in[0].x = 2.0f;
	in[1].x = 3.0f;
	in[dsize].x = 1.0;
	ck[0] = 1.0f;
	ck[1] = 0.5f;
	ck[9] = 0.001f;

    /* Setup OpenCL environment. */
    err = clGetPlatformIDs( 1, &platform, NULL );
    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL );

    props[1] = (cl_context_properties)platform;
    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
    for(i = 0; i < NQUEUES; i++)
    	queues[i] = clCreateCommandQueue( ctx, device, 0, &err );

	// Print some info about the system
	clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(version), version, NULL);
	printf("CL_DEVICE_VERSION=%s\n", version);
	clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(version), version, NULL);
	printf("CL_DRIVER_VERSION=%s\n", version);
	program = clCreateProgramWithSource(ctx, 1, (const char **)&source, NULL, &err);
	clGetDeviceInfo(device, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(param[0]), param, NULL);
	printf("CL_DEVICE_LOCAL_MEM_SIZE=%d\n", (int)param[0]);
	clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(param[0]), param, NULL);
	printf("CL_DEVICE_MAX_WORK_GROUP_SIZE=%d\n", (int)param[0]);
	clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(param[0]), param, NULL);
	printf("CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS=%d\n", (int)param[0]);
	j = param[0];
	clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(param[0])*j, param, NULL);
	printf("CL_DEVICE_MAX_WORK_ITEM_SIZES=");
	for(i = 0; i < j; i++)
		printf("%d ", (int)param[i]);
	printf("\n");
        clGetDeviceInfo(device, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(param[0]), param, NULL);
        printf("CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE=%d\n", (int)param[0]);
		
		
	program = clCreateProgramWithSource(ctx, 1, (const char **)&source, NULL, &err);
	if(!program)
	{
		printf("Error creating program\n");
		return -1;
	}
	err = clBuildProgram(program, 0, 0, 0, 0, 0);
	if(err != CL_SUCCESS)
	{
		char buffer[20000];
		size_t len;
		
		clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		puts(buffer);
		return -1;
	}
	kernel = clCreateKernel(program, "conv9x9", &err);
	if(!kernel || err != CL_SUCCESS)
	{
		printf("Error creating kernel\n");
		return -1;
	}

    /* Prepare OpenCL memory objects and place matrices inside them. */
	cl_image_format fmt = {CL_RGBA, CL_FLOAT};
	cl_int rc;
	bufin = clCreateImage2D(ctx, CL_MEM_READ_ONLY, &fmt, dsize, dsize, 0, 0, &rc);
	bufout = clCreateImage2D(ctx, CL_MEM_WRITE_ONLY, &fmt, dsize, dsize, 0, 0, &rc);
    bufck = clCreateBuffer( ctx, CL_MEM_READ_ONLY, 9 * 9 * sizeof(*ck),
                          NULL, &err );

	size_t origin[3] = {0,0,0};
	size_t region[3] = {dsize, dsize, 1};
    err = clEnqueueWriteImage(queues[0], bufin, CL_TRUE, origin, region, dsize * sizeof(*in), 0, in, 0, NULL, NULL );
    err = clEnqueueWriteBuffer( queues[0], bufck, CL_TRUE, 0, 9 * 9 * sizeof( *ck ), ck, 0, NULL, NULL );
	clSetKernelArg(kernel, 0, sizeof(int), &dsize);
	clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufin);
	clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufck);
	clSetKernelArg(kernel, 3, sizeof(cl_mem), &bufout);
	local[0] = 8;
	local[1] = 8;
	global[0] = global[1] = dsize-32;
    usleep(100000);

	struct timeval start,end;
	gettimeofday(&start, NULL);

	for (k=0; k<nthreads; k++) {
		//printf("Hello from thread %d, nthreads %d\n", omp_get_thread_num(), omp_get_num_threads());
		for(i=0;i<nbOfAverages;i++) {
		// do the 2D convolution
			err = clEnqueueNDRangeKernel(queues[0], kernel, 2, NULL, global, local, 0, NULL, NULL);
			if(err != CL_SUCCESS)
			{
				printf("clEnqueueNDRangeKernel error %d\n", err);
				return -1;
			}
		}
	}

	clFinish(queues[0]);
	gettimeofday(&end, NULL);
	double t = ((double) (end.tv_sec - start.tv_sec))
	+ ((double) (end.tv_usec - start.tv_usec)) / 1e6; //reports time in [s] - verified!

    /* Wait for calculations to be finished. */

    /* Fetch results of calculations from GPU memory. */
    err = clEnqueueReadImage(queues[0], bufout, CL_TRUE, origin, region, dsize * sizeof(*out), 0, out, 0, NULL, NULL );
	clFinish(queues[0]);
	
	printf("%f %f %f %f\n", out[0].x, out[1].x, out[dsize].x, out[dsize+1].x);

    /* Release OpenCL memory objects. */
    clReleaseMemObject( bufin );
    clReleaseMemObject( bufck );
    clReleaseMemObject( bufout );

    /* Release OpenCL working objects. */
    for(i = 0; i < NQUEUES; i++)
    	clReleaseCommandQueue( queues[i] );
    clReleaseContext( ctx );
	
	// report performance:
	tops = 4 * nthreads * opsMAC * (dsize-32)*(dsize-32)*9*9; // total ops
	printf("Total M ops = %.0lf, # of threads = %d", nbOfAverages*tops*1e-6, nthreads);
	printf("\nTime in s: %lf:", t);
	printf("\nTest performance [G OP/s] %lf:", tops*nbOfAverages/t*1e-9);
	printf("\n");
	return(0);
}
Пример #4
0
void
setup_opencl(const char* cl_source_filename, const char* cl_source_main, cl_device_id* device_id,
             cl_kernel* kernel, cl_context* context, cl_command_queue* queue)
{
        cl_int err;					// error code returned from api calls

        cl_platform_id platform_id;			// compute device id
        cl_program program;				// compute program
        cl_device_id devices[MAX_RESOURCES];
        cl_platform_id platforms[MAX_RESOURCES];


        unsigned int best_platform = 0;
        unsigned int best_device = 0;
        print_devices(0);

        if(!get_best_device(&best_platform, &best_device)) {
                printf("No suitable device was found! Try using an OpenCL1.1 compatible device.\n");
                exit(1);
        }
        printf("Initiating platform-%d device-%d.\n", best_platform, best_device);



        // Platform
        err = clGetPlatformIDs(MAX_RESOURCES, platforms, NULL);
	ocl_error("Getting platform id", err);

        platform_id = platforms[best_platform];

        // Device
        err = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, sizeof(devices), devices, NULL); //NULL, ignore number returned devices.
	ocl_error("Getting device ids", err);

        *device_id = devices[best_device];

        // Context
        *context = clCreateContext(0, 1, device_id, NULL, NULL, &err);
	ocl_error("Creating context", err);

        // Command-queue
        *queue = clCreateCommandQueue(*context, *device_id, 0, &err);
	ocl_error("Creating command queue", err);


        // Read .cl source into memory
        int cl_source_len = 0;
        char* cl_source = file_contents(cl_source_filename, &cl_source_len);


        // Create thes compute program from the source buffer
        program = clCreateProgramWithSource(*context, 1, (const char **) &cl_source, NULL, &err);
	ocl_error("Failed to create compute program", err);


        // Build the program executable
        err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
        if (err != CL_SUCCESS) {
                char* build_log;
                size_t log_size;
                // First call to know the proper size
                clGetProgramBuildInfo(program, *device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
                build_log = malloc(sizeof(char)*(log_size+1));
                if(log_size > 0 && build_log != NULL) {
	                // Second call to get the log
	                clGetProgramBuildInfo(program, *device_id, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
	                build_log[log_size] = '\0';
	                printf("%s\n", build_log);
	                free(build_log);
                }

                exit(err);
        }


        // Create the compute kernel in the program we wish to run
        *kernel = clCreateKernel(program, cl_source_main, &err);
	ocl_error("Failed to create compute kernel", err);
}
Пример #5
0
PIGLIT_CL_API_TEST_CONFIG_END


enum piglit_result
piglit_cl_test(const int argc,
               const char** argv,
               const struct piglit_cl_api_test_config* config,
               const struct piglit_cl_api_test_env* env)
{
	enum piglit_result result = PIGLIT_PASS;

	int i;
	cl_int errNo;
	cl_uint num_platforms;
	cl_platform_id* platforms = NULL;

	/*** Normal usage ***/

	/* get number of platforms */
	errNo = clGetPlatformIDs(0, NULL, &num_platforms);
	if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
		piglit_cl_check_error(errNo, CL_SUCCESS);
		fprintf(stderr,
		        "Failed (error code: %s): Get size of platform list.\n",
		        piglit_cl_get_error_name(errNo));
		piglit_merge_result(&result, PIGLIT_FAIL);
	} else {
		/*
		 * Get platform list.
		 * Try returning from 1 to num_platforms platforms.
		 */
		for(i = 1; i <= num_platforms; i++) {
			platforms = malloc(i * sizeof(cl_platform_id));
			errNo = clGetPlatformIDs(i, platforms, NULL);
			if(!piglit_cl_check_error(errNo, CL_SUCCESS)) {
				fprintf(stderr,
				        "Failed (error code: %s): Get platform list.\n",
				        piglit_cl_get_error_name(errNo));
				piglit_merge_result(&result, PIGLIT_FAIL);
			}
			free(platforms);
		}
	}
	
	/*** Errors ***/

	/*
	 * CL_INVALID_VALUE if num_entries is equal
	 * to zero and platforms is not NULL, or if both num_platforms 
	 * and platforms are NULL.
	 */
	errNo = clGetPlatformIDs(0, platforms, NULL);
	if(!piglit_cl_check_error(errNo, CL_INVALID_VALUE)) {
		fprintf(stderr,
		        "Failed (error code: %s): Trigger CL_INVALID_VALUE if num_entries is equeal to zero and platforms is not NULL.\n",
		        piglit_cl_get_error_name(errNo));
		piglit_merge_result(&result, PIGLIT_FAIL);
	}
	errNo = clGetPlatformIDs(100, NULL, NULL);
	if(!piglit_cl_check_error(errNo, CL_INVALID_VALUE)) {
		fprintf(stderr,
		        "Failed (error code: %s): Trigger CL_INVALID_VALUE if both num_platforms and platforms are NULL.\n",
		        piglit_cl_get_error_name(errNo));
		piglit_merge_result(&result, PIGLIT_FAIL);
	}

	return result;
}
Пример #6
0
  bool Filter::initCL(const Params& params,
                      const char *source, const char *options)
  {
    // Ensure no existing context
    releaseCL();

    cl_int err;
    cl_uint numPlatforms, numDevices;

    cl_platform_id platform, platforms[params.platformIndex+1];
    err = clGetPlatformIDs(params.platformIndex+1, platforms, &numPlatforms);
    CHECK_ERROR_OCL(err, "getting platforms", return false);
    if (params.platformIndex >= numPlatforms)
    {
      reportStatus("Platform index %d out of range (%d platforms found)",
        params.platformIndex, numPlatforms);
      return false;
    }
    platform = platforms[params.platformIndex];

    cl_device_id devices[params.deviceIndex+1];
    err = clGetDeviceIDs(platform, params.type,
                         params.deviceIndex+1, devices, &numDevices);
    CHECK_ERROR_OCL(err, "getting devices", return false);
    if (params.deviceIndex >= numDevices)
    {
      reportStatus("Device index %d out of range (%d devices found)",
        params.deviceIndex, numDevices);
      return false;
    }
    m_device = devices[params.deviceIndex];

    char name[64];
    clGetDeviceInfo(m_device, CL_DEVICE_NAME, 64, name, NULL);
    reportStatus("Using device: %s", name);

    m_context = clCreateContext(NULL, 1, &m_device, NULL, NULL, &err);
    CHECK_ERROR_OCL(err, "creating context", return false);

    m_queue = clCreateCommandQueue(m_context, m_device,
                                   CL_QUEUE_PROFILING_ENABLE, &err);
    CHECK_ERROR_OCL(err, "creating command queue", return false);

    m_program = clCreateProgramWithSource(m_context, 1, &source, NULL, &err);
    CHECK_ERROR_OCL(err, "creating program", return false);

    err = clBuildProgram(m_program, 1, &m_device, options, NULL, NULL);
    if (err == CL_BUILD_PROGRAM_FAILURE)
    {
      size_t sz;
      clGetProgramBuildInfo(
        m_program, m_device, CL_PROGRAM_BUILD_LOG, 0, NULL, &sz);
      char *log = (char*)malloc(++sz);
      clGetProgramBuildInfo(
        m_program, m_device, CL_PROGRAM_BUILD_LOG, sz, log, NULL);
      reportStatus(log);
      free(log);
    }
    CHECK_ERROR_OCL(err, "building program", return false);

    reportStatus("OpenCL context initialised.");
    return true;
  }
Пример #7
0
int main(int argc, char *argv[])
{
    cl_platform_id platform;
    cl_device_id device;
    cl_context context;
    cl_command_queue queue;
    cl_program program;
    cl_kernel kernel_one, kernel_path;
    cl_mem d_mt_state, d_mt_emit, d_max_prob_old;
    cl_mem d_max_prob_new, d_path, v_prob, v_path;

    int wg_size = 256;
    int n_state = 256*16;
    int n_emit = 128;
    int n_obs = 100;

    size_t init_prob_size = sizeof(float) * n_state;
    size_t mt_state_size = sizeof(float) * n_state * n_state;
    size_t mt_emit_size = sizeof(float) * n_emit * n_state;

    float *init_prob = (float *) malloc(init_prob_size);
    float *mt_state = (float *) malloc(mt_state_size);
    float *mt_emit = (float *) malloc(mt_emit_size);
    int *obs = (int *) malloc(sizeof(int) * n_obs);
    int *viterbi_gpu = (int *) malloc(sizeof(int) * n_obs);

    srand(2012);
    initHMM(init_prob, mt_state, mt_emit, n_state, n_emit);

    int i;
    for (i = 0; i < n_obs; i++) {
        obs[i] = i % 15;
    }

    const char *source = load_program_source("Viterbi.cl");
    size_t source_len = strlen(source);;
    cl_uint err = 0;

    char *flags = "-cl-fast-relaxed-math";

    clGetPlatformIDs(1, &platform, NULL);
    printf("platform %p err %d\n", platform, err);

    clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device, &err);
    printf("device %p err %d\n", device, err);

    context = clCreateContext(0, 1, &device, NULL, NULL, &err);
    printf("context %p err %d\n", context, err);

    queue = clCreateCommandQueue(context, device, 0, &err);
    printf("queue %p err %d\n", queue, err);

    program = clCreateProgramWithSource(context, 1, &source, &source_len, &err);
    printf("program %p err %d\n", program, err);

    err = clBuildProgram(program, 0, NULL, flags, NULL, NULL);
    printf("err %d\n", err);

    /*
    char tmp[102400];
    clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(tmp),
        tmp, NULL);

    printf("error %s\n", tmp);
    */

    kernel_one = clCreateKernel(program, "ViterbiOneStep", &err);
    printf("kernel %p err %d\n", kernel_one, err);

    kernel_path = clCreateKernel(program, "ViterbiPath", &err);
    printf("kernel %p err %d\n", kernel_path, err);

    d_mt_state = clCreateBuffer(context, CL_MEM_READ_ONLY, mt_state_size, 
        NULL, &err);
    printf("buffer %p\n", d_mt_state);

    d_mt_emit = clCreateBuffer(context, CL_MEM_READ_ONLY, mt_emit_size, 
        NULL, &err);
    printf("buffer %p\n", d_mt_emit);

    d_max_prob_new = clCreateBuffer(context, CL_MEM_READ_WRITE, 
        init_prob_size, NULL, &err);
    printf("buffer %p\n", d_max_prob_new);

    d_max_prob_old = clCreateBuffer(context, CL_MEM_READ_WRITE, 
        init_prob_size, NULL, &err);
    printf("buffer %p\n", d_max_prob_old);

    d_path = clCreateBuffer(context, CL_MEM_READ_WRITE, 
        sizeof(int)*(n_obs-1)*n_state, NULL, &err);
    printf("buffer %p\n", d_path);

    v_prob = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float),
        NULL, &err);
    printf("buffer %p\n", v_prob);

    v_path = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int)*n_obs,
        NULL, &err);
    printf("buffer %p\n", v_prob);

    err = clEnqueueWriteBuffer(queue, d_mt_state, CL_TRUE, 0, mt_state_size,
        mt_state, 0, NULL, NULL);
    printf("err %d\n", err);

    err = clEnqueueWriteBuffer(queue, d_mt_emit, CL_TRUE, 0, mt_emit_size,
        mt_emit, 0, NULL, NULL);
    printf("err %d\n", err);

    err = clEnqueueWriteBuffer(queue, d_max_prob_old, CL_TRUE, 0, init_prob_size,
        init_prob, 0, NULL, NULL);
    printf("err %d\n", err);

    // max_wg_size is 1024 for Intel Core 2 CPU
    size_t max_wg_size;
    err = clGetKernelWorkGroupInfo(kernel_one, device, 
        CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &max_wg_size, NULL);
    printf("max_wg_size %d\n", max_wg_size);

    size_t local_work_size[2], global_work_size[2];
    local_work_size[0] = wg_size;
    local_work_size[1] = 1;
    global_work_size[0] = local_work_size[0] * 256;
    global_work_size[1] = n_state/256;

    for (i = 1; i < n_obs; i++) {
        err = clSetKernelArg(kernel_one, 0, sizeof(cl_mem), 
            (void*)&d_max_prob_new);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 1, sizeof(cl_mem), 
            (void*)&d_path);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 2, sizeof(cl_mem), 
            (void*)&d_max_prob_old);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 3, sizeof(cl_mem), 
            (void*)&d_mt_state);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 4, sizeof(cl_mem),
            (void*)&d_mt_emit);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 5, sizeof(float)*local_work_size[0],
            NULL);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 6, sizeof(int)*local_work_size[0],
            NULL);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 7, sizeof(int),
            (void*)&n_state);
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 8, sizeof(int),
            (void*)&(obs[i]));
        printf("err %d\n", err);

        err = clSetKernelArg(kernel_one, 9, sizeof(int),
            (void*)&i);
        printf("err %d\n", err);


        err = clEnqueueNDRangeKernel(queue, kernel_one, 2, NULL, 
            global_work_size, local_work_size, 0, NULL, NULL);
        printf("err %d\n", err);

        err = clEnqueueCopyBuffer(queue, d_max_prob_new, d_max_prob_old, 0, 0,
            sizeof(float)*n_state, 0, NULL, NULL);
        printf("err %d\n", err);
    }

    local_work_size[0] = 1;
    global_work_size[0] = 1;

    err = clSetKernelArg(kernel_path, 0, sizeof(cl_mem), (void*)&v_prob);
    printf("err %d\n", err);

    err = clSetKernelArg(kernel_path, 1, sizeof(cl_mem), (void*)&v_path);
    printf("err %d\n", err);

    err = clSetKernelArg(kernel_path, 2, sizeof(cl_mem), 
        (void*)&d_max_prob_new);
    printf("err %d\n", err);

    err = clSetKernelArg(kernel_path, 3, sizeof(cl_mem), (void*)&d_path);
    printf("err %d\n", err);

    err = clSetKernelArg(kernel_path, 4, sizeof(int), (void*)&n_state);
    printf("err %d\n", err);

    err = clSetKernelArg(kernel_path, 5, sizeof(int), (void*)&n_obs);
    printf("err %d\n", err);

    err = clEnqueueNDRangeKernel(queue, kernel_path, 1, NULL, 
        global_work_size, local_work_size, 0, NULL, NULL);
    printf("err %d\n", err);

    clFinish(queue);
    printf("finish done\n");

    err = clEnqueueReadBuffer(queue, v_path, CL_TRUE, 0, sizeof(int)*n_obs, 
        viterbi_gpu, 0, NULL, NULL);
    printf("err %d\n", err);

    for (i = 0; i < n_obs; i++) {
        printf("%d %d\n", i, viterbi_gpu[i]);
    }

    clReleaseMemObject(d_mt_state);
    clReleaseMemObject(d_mt_emit);
    clReleaseMemObject(d_max_prob_old);
    clReleaseMemObject(d_max_prob_new);
    clReleaseMemObject(d_path);
    clReleaseMemObject(v_prob);
    clReleaseMemObject(v_path);
    clReleaseProgram(program);
    clReleaseKernel(kernel_one);
    clReleaseKernel(kernel_path);
    clReleaseCommandQueue(queue);
}
Пример #8
0
static int 
SetupComputeDevices(int gpu)
{
	int err;
	size_t returned_size;
	ComputeDeviceType = gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;

#if (USE_GL_ATTACHMENTS)

	printf(SEPARATOR);
	printf("Using active OpenGL context...\n");

	// Bind to platform
	cl_platform_id platform_id;

	cl_uint numPlatforms;
	cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
	if (status != CL_SUCCESS)
	{
		printf("clGetPlatformIDs Failed\n");
		return EXIT_FAILURE;
	}

	if (0 < numPlatforms)
	{
		cl_platform_id* platforms = (cl_platform_id*)calloc(numPlatforms, sizeof(cl_platform_id));

		status = clGetPlatformIDs(numPlatforms, platforms, NULL);

		char platformName[100];
		for (unsigned i = 0; i < numPlatforms; ++i)
		{
			status = clGetPlatformInfo(platforms[i],
				CL_PLATFORM_VENDOR,
				sizeof(platformName),
				platformName,
				NULL);
			platform_id = platforms[i];
			if (!strcmp(platformName, "Advanced Micro Devices, Inc."))
			{
				break;
			}
		}
		printf("Platform found : %s\n", platformName);
		free(platforms);
	}
	if(NULL == platform_id)
	{
		printf("NULL platform found so Exiting Application.\n");
		return EXIT_FAILURE;
	}

	// Get ID for the device
	err = clGetDeviceIDs(platform_id, ComputeDeviceType, 1, &ComputeDeviceId, NULL);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to locate compute device!\n");
		return EXIT_FAILURE;
	}

	// Create a context  
	cl_context_properties properties[] =
	{
		CL_GL_CONTEXT_KHR, (cl_context_properties)glXGetCurrentContext(),
		CL_GLX_DISPLAY_KHR, (cl_context_properties)glXGetCurrentDisplay(),
		CL_CONTEXT_PLATFORM, (cl_context_properties)(platform_id),
		0
	};

	// Create a context from a CGL share group
	//
	ComputeContext = clCreateContext(properties, 1, &ComputeDeviceId, NULL, 0, 0);
	if (!ComputeContext)
	{
		printf("Error: Failed to create a compute context!\n");
		return EXIT_FAILURE;
	}

#else

	// Bind to platform
	cl_platform_id platform_id;

	cl_uint numPlatforms;
	cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);
	if (status != CL_SUCCESS)
	{
		printf("clGetPlatformIDs Failed\n");
		return EXIT_FAILURE;
	}

	if (0 < numPlatforms)
	{
		cl_platform_id* platforms = (cl_platform_id*)calloc(numPlatforms, sizeof(cl_platform_id));

		status = clGetPlatformIDs(numPlatforms, platforms, NULL);

		char platformName[100];
		for (unsigned i = 0; i < numPlatforms; ++i)
		{
			status = clGetPlatformInfo(platforms[i],
				CL_PLATFORM_VENDOR,
				sizeof(platformName),
				platformName,
				NULL);
			platform_id = platforms[i];
			if (!strcmp(platformName, "Advanced Micro Devices, Inc."))
			{
				break;
			}
		}
		printf("Platform found : %s\n", platformName);
		free(platforms);
	}
	if(NULL == platform_id)
	{
		printf("NULL platform found so Exiting Application.\n");
		return EXIT_FAILURE;
	}

	// Get ID for the device
	err = clGetDeviceIDs(platform_id, ComputeDeviceType, 1, &ComputeDeviceId, NULL);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to locate compute device!\n");
		return EXIT_FAILURE;
	}

	// Create a context containing the compute device(s)
	//
	ComputeContext = clCreateContext(0, 1, &ComputeDeviceId, NULL, NULL, &err);
	if (!ComputeContext)
	{
		printf("Error: Failed to create a compute context!\n");
		return EXIT_FAILURE;
	}

#endif

	unsigned int device_count;
	cl_device_id device_ids[16];

	err = clGetContextInfo(ComputeContext, CL_CONTEXT_DEVICES, sizeof(device_ids), device_ids, &returned_size);
	if(err)
	{
		printf("Error: Failed to retrieve compute devices for context!\n");
		return EXIT_FAILURE;
	}

	device_count = returned_size / sizeof(cl_device_id);

	unsigned int i = 0;
	int device_found = 0;
	cl_device_type device_type; 
	for(i = 0; i < device_count; i++) 
	{
		clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &device_type, NULL);
		if(device_type == ComputeDeviceType) 
		{
			ComputeDeviceId = device_ids[i];
			device_found = 1;
			break;
		} 
	}

	if(!device_found)
	{
		printf("Error: Failed to locate compute device!\n");
		return EXIT_FAILURE;
	}

	// Create a command queue
	//
	ComputeCommands = clCreateCommandQueue(ComputeContext, ComputeDeviceId, 0, &err);
	if (!ComputeCommands)
	{
		printf("Error: Failed to create a command queue!\n");
		return EXIT_FAILURE;
	}

	// Report the device vendor and device name
	// 
	cl_char vendor_name[1024] = {0};
	cl_char device_name[1024] = {0};
	err = clGetDeviceInfo(ComputeDeviceId, CL_DEVICE_VENDOR, sizeof(vendor_name), vendor_name, &returned_size);
	err|= clGetDeviceInfo(ComputeDeviceId, CL_DEVICE_NAME, sizeof(device_name), device_name, &returned_size);
	if (err != CL_SUCCESS)
	{
		printf("Error: Failed to retrieve device info!\n");
		return EXIT_FAILURE;
	}

	printf(SEPARATOR);
	printf("Connecting to %s %s...\n", vendor_name, device_name);

	return CL_SUCCESS;
}
Пример #9
0
int main(int argc, char*argv[]) {

	if (argc != 4) {
		printf("Usage: %s #m #n #k\n", argv[0]);
		exit(1);
	}
	int *m,*n,*k,i;
	m = (int *) malloc(sizeof(int));
	n = (int *) malloc(sizeof(int));
	k = (int *) malloc(sizeof(int));

	//Initilizing the matrix dimensions
	m[0] = atoi(argv[1]);
	n[0] = atoi(argv[2]);
	k[0] = atoi(argv[3]);

	double time = 0;

	clock_t begin = clock();
	cl_device_id deviceId = NULL;
	cl_context context = NULL;
	cl_command_queue commandQueue = NULL;

	double *alpha, *beta;

	//allocating memory for the vectors
	
	alpha = (double *) malloc(sizeof(double));
	beta = (double *) malloc(sizeof(double));

    double *A, *B, *C;
	
    A = (double *) malloc(m[0]*k[0]*sizeof(double));
    B = (double *) malloc(k[0]*n[0]*sizeof(double));
    C = (double *) malloc(m[0]*n[0]*sizeof(double));

	//initializing values of alpha and beta
	alpha[0] = 1.0;
	beta[0] = 0.0;

	clock_t end = clock();

	time += (double)(end - begin) * 1000 / CLOCKS_PER_SEC;

	//printf (" Intializing matrix data \n\n");
	begin = clock();
	for (i = 0; i < (m[0]*k[0]); i++) {
		A[i] = (double)(i+1);
	}

	for (i = 0; i < (k[0]*n[0]); i++) {
		B[i] = (double)(-i-1);
	}

	for (i = 0; i < (m[0]*n[0]); i++) {
		C[i] = 0.0;
	}

	//Memory objects for kernel parameters
	cl_mem AMemobj = NULL;
	cl_mem BMemobj = NULL;
	cl_mem CMemobj = NULL;
	cl_mem mMemobj = NULL;
	cl_mem nMemobj = NULL;
	cl_mem kMemobj = NULL;
	cl_mem alphaMemobj = NULL;
	cl_mem betaMemobj = NULL;

	//Some opencl objects
	cl_program program = NULL;
	cl_kernel kernel = NULL;
	cl_platform_id platformId = NULL;
	cl_uint numDevices;
	cl_uint numPlatforms;
	cl_int ret;
	size_t contextDescriptorSize;

	//reading kernel from file
	FILE *file;
	char fileName[] = "./dgemm.cl";
	char *kernelSource;
	size_t sourceSize;
	
	//Load the source code from file
	file = fopen(fileName, "r");
	if(!file) {
		printf("Failed to load the kernel file. \n");
		exit(1);
	}

	kernelSource = (char *) malloc(SOURCE_SIZE_MAX);
	sourceSize = fread(kernelSource, 1, SOURCE_SIZE_MAX, file);
	fclose(file);

	//Get platform information
	ret = clGetPlatformIDs(1, &platformId, &numPlatforms);

	//get list of devices
	ret = clGetDeviceIDs(platformId, CL_DEVICE_TYPE_DEFAULT, 1, &deviceId, &numDevices);
	
	//create opencl device context
	context = clCreateContext(NULL, 1, &deviceId, NULL, NULL, &ret);

	//get device context
	clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, 0, &contextDescriptorSize); 

	//command queue for the first device
	commandQueue = clCreateCommandQueue(context, deviceId, 0, &ret);

	//Create memory object
	AMemobj = clCreateBuffer(context, CL_MEM_READ_ONLY, m[0]*k[0] * sizeof(double), A, &ret);
	BMemobj = clCreateBuffer(context, CL_MEM_READ_ONLY, k[0]*n[0] * sizeof(double), B, &ret);
	CMemobj = clCreateBuffer(context, CL_MEM_READ_WRITE, m[0]*n[0] * sizeof(double), C, &ret);
	mMemobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 1 * sizeof(int), m, &ret);
	nMemobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 1 * sizeof(int), n, &ret);
	kMemobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 1 * sizeof(int), k, &ret);
	alphaMemobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 1 * sizeof(double), alpha, &ret);
	betaMemobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 1 * sizeof(double), beta, &ret);

	//Create kernel program from source
	program = clCreateProgramWithSource(context, 1, (const char **)&kernelSource,(const size_t *)&sourceSize, &ret);

	//Write data to the buffer
	ret = clEnqueueWriteBuffer(commandQueue, AMemobj, CL_TRUE, 0, sizeof(double) * m[0]*k[0], A, 0, NULL, NULL);
	ret = clEnqueueWriteBuffer(commandQueue, BMemobj, CL_TRUE, 0, sizeof(double) * k[0]*n[0], B, 0, NULL, NULL);
	ret = clEnqueueWriteBuffer(commandQueue, CMemobj, CL_TRUE, 0, sizeof(double) * m[0]*n[0], C, 0, NULL, NULL);
	ret = clEnqueueWriteBuffer(commandQueue, mMemobj, CL_TRUE, 0, 1 * sizeof(int), m, 0, NULL, NULL);
	ret = clEnqueueWriteBuffer(commandQueue, nMemobj, CL_TRUE, 0, 1 * sizeof(int), n, 0, NULL, NULL);
	ret = clEnqueueWriteBuffer(commandQueue, kMemobj, CL_TRUE, 0, 1 * sizeof(int), k, 0, NULL, NULL);
	ret = clEnqueueWriteBuffer(commandQueue, alphaMemobj, CL_TRUE, 0, 1 * sizeof(double), alpha, 0, NULL, NULL);
	ret = clEnqueueWriteBuffer(commandQueue, betaMemobj, CL_TRUE, 0, 1 * sizeof(double), beta, 0, NULL, NULL);

	//Build the kernel program
	ret = clBuildProgram(program, 1, &deviceId, "-I ./", NULL, NULL);

	//Create a opencl kernel
	kernel = clCreateKernel(program, "dgemm", &ret);	

	//Pass arguments to kernel
	ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&AMemobj);
	ret = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&BMemobj);
	ret = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&CMemobj);
	ret = clSetKernelArg(kernel, 3, sizeof(cl_mem), (void *)&mMemobj);
	ret = clSetKernelArg(kernel, 4, sizeof(cl_mem), (void *)&nMemobj);
	ret = clSetKernelArg(kernel, 5, sizeof(cl_mem), (void *)&kMemobj);
	ret = clSetKernelArg(kernel, 6, sizeof(cl_mem), (void *)&alphaMemobj);
	ret = clSetKernelArg(kernel, 7, sizeof(cl_mem), (void *)&betaMemobj);

	// Execute OpenCL kernel in data parallel
    const int TS = 32;
	const size_t local[2] = { TS, TS };
	const size_t global[2] = { (int)(pow(2,ceil(log(m[0])/log(2)))), (int)(pow(2,ceil(log(n[0])/log(2)))) };

    //Execute opencl kernel
    ret = clEnqueueNDRangeKernel( commandQueue, kernel, 2, NULL, global, local, 0, 0, 0 );

	//copy the result back
	ret = clEnqueueReadBuffer(commandQueue, CMemobj, CL_TRUE, 0, m[0]*n[0]*sizeof(double), C, 0, NULL, NULL);

	//Before program termination
	ret = clFlush(commandQueue);
	ret = clFinish(commandQueue);
	ret = clReleaseKernel(kernel);
	ret = clReleaseProgram(program);
	ret = clReleaseMemObject(AMemobj);
	ret = clReleaseMemObject(BMemobj);
	ret = clReleaseMemObject(CMemobj);
	ret = clReleaseMemObject(mMemobj);
	ret = clReleaseMemObject(nMemobj);
	ret = clReleaseMemObject(kMemobj);
	ret = clReleaseMemObject(alphaMemobj);
	ret = clReleaseMemObject(betaMemobj);
	ret = clReleaseCommandQueue(commandQueue);
	ret = clReleaseContext(context);
	 
	free(kernelSource);
	free(A);
	free(B);
	free(C);
	free(m);
	free(n);
	free(k);
	free(alpha);
	free(beta);

	//Printing timing result
	end = clock();
	time += (double)(end - begin) *1000 / CLOCKS_PER_SEC;

	printf("%lf\n", time);

	return 0;
}
Пример #10
0
void OCL_base::init_kernel(const char* kernel_source, const char* kernel_name, 
			   std::string define_statements, bool compile_source)
{
  if(!compile_source){
    Rprintf("Binary sources not supported yet\n");
    return;
  }  

  char* kernel_buffer = 0;
  size_t k_buffer_size=0;
  // if the kernel source has been predefined in "../oCL_Kernels/oCL_Kernels.h"
  // then use that.
  if(!strcmp(kernel_source, "move_deltoids")){  // strcmp returns 0 for equal
    kernel_buffer = make_source(define_statements, move_deltoids, k_buffer_size);
  }
  if(!strcmp(kernel_source, "move_deltoids_2")){
    kernel_buffer = make_source(define_statements, move_deltoids_2, k_buffer_size);
  }
  if(!strcmp(kernel_source, "move_deltoids_dummy")){
    kernel_buffer = make_source(define_statements, move_deltoids_dummy, k_buffer_size);
  }
    

  // if kernel buffer is not defined yet, try to read it from a file
  if(!kernel_buffer){
    std::ifstream in(kernel_source, std::ios::binary);
    if(!in){
      Rprintf("Unable to open kernel source file\n");
      return;
    }
    in.seekg(0, std::ios::end);
    ssize_t end_pos = in.tellg();
    Rprintf("in.tellg() reports : %d\n", end_pos);
    in.seekg(0, std::ios::beg);
    if(!end_pos){
      Rprintf("Unable to read from kernel source file\n");
      return;
    }
    // prepend #define statements to the kernal source.
    // add one extra \n to the 
    
    k_buffer_size = 1 + define_statements.size() + (size_t)end_pos + 1;
    kernel_buffer = new char[ k_buffer_size ];
    memset((void*)kernel_buffer, 0, sizeof(char) * k_buffer_size);
    //  kernel_buffer[k_buffer_size] = 0;  // this may not be needed.
    
    size_t copied_bytes = define_statements.copy(kernel_buffer, define_statements.size());
    if(copied_bytes != define_statements.size()){
      Rprintf("Unable to copy the full define statements\n");
      delete []kernel_buffer;
      return;
    }

    kernel_buffer[ define_statements.size() ] = '\n'; // add a new line for safety

    in.read((kernel_buffer + 1 + define_statements.size()), end_pos);
    if(in.gcount() != end_pos){
      Rprintf("Unable to read to end of kernel source file: %d != %d\n",
	      end_pos, in.gcount());
      delete []kernel_buffer;
      return;
    }
  }
  // at this point kernel_buffer should be defined.
  // and we can use it to compile the kernel.

  cl_int ret = 0; // return value. Use the same for all.
  
  ret = clGetPlatformIDs(1, &platform_id, &num_platforms);
  if(ret) report_error_pf("clGetPlatformIDs", ret);
  ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_GPU, 1,
			&device_id, &num_devices );
  if(ret) report_error_pf("clGetDeviceIDs", ret);
  if(ret != CL_SUCCESS){
    Rprintf("clGetDevice returned with error: %d\n", (int)ret);
    return;
  }

  context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
  report_error_pf("clCreateContext", ret);

  command_que = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret);
  report_error_pf("clCreateCommandQueue", ret);

  program = clCreateProgramWithSource(context, 1, (const char**)&kernel_buffer, 
				      (const size_t*)&k_buffer_size, &ret);
  report_error_pf("clCreateProgramWithSource", ret);

  
  clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
  report_error_pf("clBuildProgram", ret);

  char* build_log = NULL;
  size_t log_size = 1000;
  ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, build_log, &log_size);
  report_error_pf("clGetProgramBuildInfo", ret);
  build_log = new char[log_size+1];
  ret = clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, build_log, NULL);
  report_error_pf("clGetProgramBuildInfo", ret);
  
  // the below doesn't make much sense. But log_size is never 0, so, need to do something
  Rprintf("After building the kernel the log size is : %d\n", log_size);
  if(log_size > 2){
    Rprintf("clBuildProgram Error encountered:\n%s\n", build_log);
    Rprintf(".....\n%s\n....\n", kernel_buffer);
  }
  delete []build_log;

  kernel = clCreateKernel(program, kernel_name, &ret);
  report_error_pf("clCreateKernel", ret);
  
  delete []kernel_buffer;
  
}
Пример #11
0
LIBSTDCL_API CONTEXT* 
clcontext_create( 
	const char* platform_name, 
	int devtyp, 
	size_t ndevmax,
	cl_context_properties* ctxprop_ext, 
	int lock_key
)
{

	int n;
	int err = 0;
	int i;
	size_t devlist_sz;
	CONTEXT* cp = 0;
	cl_platform_id* platforms = 0;
	cl_uint nplatforms;
	char info[1024];
	cl_platform_id platformid;
	int nctxprop = 0;
	cl_context_properties* ctxprop;
	size_t sz;
	cl_uint ndev = 0;
	cl_command_queue_properties prop = 0;

	DEBUG(__FILE__,__LINE__,"clcontext_create() called");


//	if (ndevmax) 
//		WARN(__FILE__,__LINE__,"__clcontext_create(): ndevmax argument ignored");


	/***
	 *** allocate CONTEXT struct
	 ***/

	DEBUG(__FILE__,__LINE__,
		"clcontext_create: sizeof CONTEXT %d",sizeof(CONTEXT));

//	cp = (CONTEXT*)malloc(sizeof(CONTEXT));
	assert(sizeof(CONTEXT)<getpagesize());
#ifdef _WIN64
	cp = (CONTEXT*)_aligned_malloc(sizeof(CONTEXT),getpagesize());
	if (!cp) {
		WARN(__FILE__,__LINE__,"memalign failed");
	}
#else
	if (posix_memalign((void**)&cp,getpagesize(),sizeof(CONTEXT))) {
		WARN(__FILE__,__LINE__,"posix_memalign failed");
	}
#endif

	DEBUG(__FILE__,__LINE__,"clcontext_create: context_ptr=%p",cp);
	
	if ((intptr_t)cp & (getpagesize()-1)) {
		ERROR(__FILE__,__LINE__,
			"clcontext_create: fatal error: unaligned context_ptr");
		exit(-1);
	}

	if (!cp) { errno=ENOMEM; return(0); }

	

   /***
    *** get platform id
    ***/



   clGetPlatformIDs(0,0,&nplatforms);

//	printf("XXX %d\n",nplatforms);

   if (nplatforms) {

      platforms = (cl_platform_id*)malloc(nplatforms*sizeof(cl_platform_id));
      clGetPlatformIDs(nplatforms,platforms,0);

      for(i=0;i<nplatforms;i++) { 

         char info[1024];

         DEBUG(__FILE__,__LINE__,"_libstdcl_init: available platform:");

         clGetPlatformInfo(platforms[i],CL_PLATFORM_PROFILE,1024,info,0);
         DEBUG(__FILE__,__LINE__,
            "_libstdcl_init: [%p]CL_PLATFORM_PROFILE=%s",platforms[i],info);

         clGetPlatformInfo(platforms[i],CL_PLATFORM_VERSION,1024,info,0);
         DEBUG(__FILE__,__LINE__,
            "_libstdcl_init: [%p]CL_PLATFORM_VERSION=%s",platforms[i],info);

         clGetPlatformInfo(platforms[i],CL_PLATFORM_NAME,1024,info,0);
         DEBUG(__FILE__,__LINE__,
            "_libstdcl_init: [%p]CL_PLATFORM_NAME=%s",platforms[i],info);

         clGetPlatformInfo(platforms[i],CL_PLATFORM_VENDOR,1024,info,0);
         DEBUG(__FILE__,__LINE__,
            "_libstdcl_init: [%p]CL_PLATFORM_VENDOR=%s",platforms[i],info);

         clGetPlatformInfo(platforms[i],CL_PLATFORM_EXTENSIONS,1024,info,0);
         DEBUG(__FILE__,__LINE__,
            "_libstdcl_init: [%p]CL_PLATFORM_EXTENSIONS=%s",platforms[i],info);

      }

   } else {

      WARN(__FILE__,__LINE__,
         "_libstdcl_init: no platforms found, continue and hope for the best");

   }

	platformid 
		= __get_platformid(nplatforms, platforms, platform_name);

	DEBUG(__FILE__,__LINE__,"clcontext_create: platformid=%p",platformid);

	

	/***
	 *** create context
	 ***/

	

	while (ctxprop_ext != 0 && ctxprop_ext[nctxprop] != 0) ++nctxprop;

//	cl_context_properties ctxprop[3] = {
//		(cl_context_properties)CL_CONTEXT_PLATFORM,
//		(cl_context_properties)platformid,
//		(cl_context_properties)0
//	};

	nctxprop += 3;

	ctxprop = (cl_context_properties*)malloc(nctxprop*sizeof(cl_context_properties));

	ctxprop[0] = (cl_context_properties)CL_CONTEXT_PLATFORM;
	ctxprop[1] = (cl_context_properties)platformid;

	for(i=0;i<nctxprop-3;i++) ctxprop[2+i] = ctxprop_ext[i];

	ctxprop[nctxprop-1] =  (cl_context_properties)0;

	

	clGetPlatformInfo(platformid,CL_PLATFORM_PROFILE,0,0,&sz);
	cp->platform_profile = (char*)malloc(sz);
	clGetPlatformInfo(platformid,CL_PLATFORM_PROFILE,sz,cp->platform_profile,0);

	clGetPlatformInfo(platformid,CL_PLATFORM_VERSION,0,0,&sz);
	cp->platform_version = (char*)malloc(sz);
	clGetPlatformInfo(platformid,CL_PLATFORM_VERSION,sz,cp->platform_version,0);

	clGetPlatformInfo(platformid,CL_PLATFORM_NAME,0,0,&sz);
	cp->platform_name = (char*)malloc(sz);
	clGetPlatformInfo(platformid,CL_PLATFORM_NAME,sz,cp->platform_name,0);

	clGetPlatformInfo(platformid,CL_PLATFORM_VENDOR,0,0,&sz);
	cp->platform_vendor = (char*)malloc(sz);
	clGetPlatformInfo(platformid,CL_PLATFORM_VENDOR,sz,cp->platform_vendor,0);

	clGetPlatformInfo(platformid,CL_PLATFORM_EXTENSIONS,0,0,&sz);
	cp->platform_extensions = (char*)malloc(sz);
	clGetPlatformInfo(platformid,CL_PLATFORM_EXTENSIONS,sz,
		cp->platform_extensions,0);


#ifdef _WIN64
	cp->ctx = clCreateContextFromType(ctxprop,devtyp,0,0,&err);
#else

	if (lock_key > 0) {

		if (ndevmax == 0) ndevmax = 1;

		cl_uint platform_ndev;
		err = clGetDeviceIDs(platformid,devtyp,0,0,&platform_ndev);
//		cl_uint platform_vndev = 2*platform_ndev;
		cl_uint platform_vndev = platform_ndev;

//DEBUG(__FILE__,__LINE__,"%d %d",platform_ndev,platform_vndev);
	
		cl_device_id* platform_dev 
			= (cl_device_id*)malloc(platform_ndev*sizeof(cl_device_id));

		err = clGetDeviceIDs(platformid,devtyp,platform_ndev,platform_dev,0);

		DEBUG(__FILE__,__LINE__,"clcontext_create: lock_key=%d",lock_key);

		pid_t pid = getpid();

		size_t sz_page = getpagesize();

//		system("ls /dev/shm");

		char shmobj[64];
		snprintf(shmobj,64,"/stdcl_ctx_lock%d.%d",devtyp,lock_key);

		DEBUG(__FILE__,__LINE__,
			"clcontext_create: attempt master shm_open %s from %d",shmobj,pid);

		int fd = shm_open(shmobj,O_RDWR|O_CREAT|O_EXCL,0);
		void* p0;

		struct timeval t0,t1;
		int timeout = 0;

		int noff = 0;

		if (fd < 0) {
			
			DEBUG(__FILE__,__LINE__,
				"clcontext_create: master shm_open failed from %d (%d)",pid,fd);

			DEBUG(__FILE__,__LINE__,
				"clcontext_create: attempt slave shm_open from %d",pid);

			timeout = 0;
			gettimeofday(&t0,0);
			t0.tv_sec += 10;

			do {
	
				fd = shm_open(shmobj,O_RDWR,0);
				gettimeofday(&t1,0);

				if (t1.tv_sec > t0.tv_sec && t1.tv_usec > t0.tv_usec) timeout = 1;

			} while (fd < 0 && !timeout);

			if (timeout) {

				ERROR(__FILE__,__LINE__,"clcontext_create: shm_open timeout");

			}

			ftruncate(fd,sz_page);

			p0 = mmap(0,sz_page,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);

			if (!p0) return(0);

			__ctx_lock = (struct __ctx_lock_struct*)p0;

			pthread_mutex_lock(&__ctx_lock->mtx);
//			if (__ctx_lock->refc < platform_ndev) {
			if (__ctx_lock->refc < platform_vndev) {
				noff = __ctx_lock->refc;
//				ndev = min(ndevmax,platform_ndev-noff);
				ndev = min(ndevmax,platform_vndev-noff);
				__ctx_lock->refc += ndev;
			}
			pthread_mutex_unlock(&__ctx_lock->mtx);

			close(fd);

		} else {

			DEBUG(__FILE__,__LINE__,
				"clcontext_create: master shm_open succeeded from %d",pid);

			ftruncate(fd,sz_page);

			p0 = mmap(0,sz_page,PROT_READ|PROT_WRITE,MAP_SHARED,fd,0);

			if (!p0) return(0);

			__ctx_lock = (struct __ctx_lock_struct*)p0;

			__ctx_lock->magic = 20110415;
			__ctx_lock->key = lock_key;
			pthread_mutex_init(&__ctx_lock->mtx,0);
//			ndev = min(ndevmax,platform_ndev);
			ndev = min(ndevmax,platform_vndev);
		DEBUG(__FILE__,__LINE__,"ndev=%d %d %d",ndev,ndevmax,platform_vndev);
			__ctx_lock->refc = ndev;

			fchmod(fd,S_IRUSR|S_IWUSR);

			close(fd);

		}

		DEBUG(__FILE__,__LINE__,"ndev=%d",ndev);

//		if (noff < platform_ndev) {
		if (noff < platform_vndev) {

//			cp->ctx = clCreateContext(ctxprop,ndev,platform_dev + noff,0,0,&err);
			cp->ctx = clCreateContext(ctxprop,ndev,platform_dev + noff%platform_ndev,0,0,&err);

			DEBUG(__FILE__,__LINE__,
				"clcontext_create: platform_ndev=%d ndev=%d noffset=%d",
				platform_ndev,ndev,noff);

			if (platform_dev) free(platform_dev);

		} else {

			cp->ctx = 0;

		}

	} else {

		cp->ctx = clCreateContextFromType(ctxprop,devtyp,0,0,&err);

	}
#endif

	if (cp->ctx) {

		cp->devtyp = devtyp;
		err = clGetContextInfo(cp->ctx,CL_CONTEXT_DEVICES,0,0,&devlist_sz);
		cp->ndev = devlist_sz/sizeof(cl_device_id);
		cp->dev = (cl_device_id*)malloc(10*devlist_sz);
		err=clGetContextInfo(cp->ctx,CL_CONTEXT_DEVICES,devlist_sz,cp->dev,0);

//		cp->devtyp = devtyp;
//		err = clGetDeviceIDs(platformid,devtyp,0,0,&(cp->ndev));
//		DEBUG(__FILE__,__LINE__,"xxx %d",err);
//		DEBUG(__FILE__,__LINE__,"number of devices %d",cp->ndev);
//		cp->dev = (cl_device_id*)malloc(cp->ndev * sizeof(cl_device_id) );
//		err = clGetDeviceIDs(platformid,devtyp,cp->ndev,cp->dev,&(cp->ndev));
//		DEBUG(__FILE__,__LINE__,"xxx %d",err);
//		DEBUG(__FILE__,__LINE__," %p device[0]",cp->dev[0]);

		
	} else {

		WARN(__FILE__,__LINE__,"clcontext_create: failed");

#ifndef _WIN64
		if (lock_key > 0 && ndev > 0) {
			pthread_mutex_lock(&__ctx_lock->mtx);
			__ctx_lock->refc -= ndev;
			pthread_mutex_unlock(&__ctx_lock->mtx);
		}

		free(cp);
#else
		_aligned_free(cp);
#endif

      return((CONTEXT*)0);

   }


	DEBUG(__FILE__,__LINE__,"number of devices %d",cp->ndev);

		
	/* XXX XXX TESTING ONLY!!!! */
	//_aligned_free(cp);
	//DEBUG(__FILE__,__LINE__,"MADE IT"); return (CONTEXT*)0;


	/***
	 *** create command queues
	 ***/

	cp->cmdq = (cl_command_queue*)malloc(sizeof(cl_command_queue)*cp->ndev);

	DEBUG(__FILE__,__LINE__,"will try to create cmdq");

	

// XXX something is broken in clCreateCommandQueue, using lazy creation
// XXX as a workaround -DAR
//	{
		//cl_command_queue_properties prop = 00;
		//prop |= CL_QUEUE_PROFILING_ENABLE; /* XXX this should be choice -DAR */
		for(i=0;i<cp->ndev;i++) {
			//DEBUG(__FILE__,__LINE__,"%d calling clCreateCommandQueue(%p,%p,%x,%p)",i,cp->ctx,cp->dev[i],prop,&err);
#ifdef _WIN64
			cp->cmdq[i] = 0; /* have to defer, dllmain limitations */
#else
			cp->cmdq[i] = clCreateCommandQueue(cp->ctx,cp->dev[i],prop,&err);
			//cp->cmdq[i] = clCreateCommandQueue(cp->ctx,cp->dev[i],0,&err);
			//cl_command_queue cmdq = clCreateCommandQueue(cp->ctx,cp->dev[0],0,&err);

			DEBUG(__FILE__,__LINE__,"clcontext_create: error from create cmdq %d (%p)\n",
				err,cp->cmdq[i]);
#endif
			//DEBUG(__FILE__,__LINE__,"MADE IT"); return (CONTEXT*)0;
		}
//	}
//	printf("WARNING CMDQs NOT CREATED\n");
//	for(i=0;i<cp->ndev;i++) cp->cmdq[i] = (cl_command_queue)0;



	/***
	 *** init context resources
	 ***/

	LIST_INIT(&cp->prgs_listhead);

	LIST_INIT(&cp->txt_listhead);

	LIST_INIT(&cp->memd_listhead);


//	struct _prgs_struct* prgs 
//		= (struct _prgs_struct*)malloc(sizeof(struct _prgs_struct));
//	prgs->len=-1;
//	LIST_INSERT_HEAD(&cp->prgs_listhead, prgs, prgs_list);
//
//	prgs = (struct _prgs_struct*)malloc(sizeof(struct _prgs_struct));
//	prgs->len=-2;
//	LIST_INSERT_HEAD(&cp->prgs_listhead, prgs, prgs_list);

/*
	printf("%p searching _proc_cl for prgs...\n",_proc_cl.clstrtab);
	printf("%s\n",&_proc_cl.clstrtab[1]);
	struct clprgs_entry* sp;
	for(n=0,sp=_proc_cl.clprgs;n<_proc_cl.clprgs_n;n++,sp++) {
		printf("found %s (%d bytes)\n",&_proc_cl.clstrtab[sp->e_name],sp->e_size);
		struct _prgs_struct* prgs = (struct _prgs_struct*)
			clload(cp,_proc_cl.cltexts+sp->e_offset,sp->e_size,0);
	}
*/


	/*** 
	 *** initialize event lists
	 ***/
	
//	cp->nkev = cp->kev_first = cp->kev_free = 0;
	cp->kev = (struct _event_list_struct*)
		malloc(cp->ndev*sizeof(struct _event_list_struct));

	for(i=0;i<cp->ndev;i++) {
		cp->kev[i].nev = cp->kev[i].ev_first = cp->kev[i].ev_free = 0;
	}

//	cp->nmev = cp->mev_first = cp->mev_free = 0;
	cp->mev = (struct _event_list_struct*)
		malloc(cp->ndev*sizeof(struct _event_list_struct));

	for(i=0;i<cp->ndev;i++) {
		cp->mev[i].nev = cp->mev[i].ev_first = cp->mev[i].ev_free = 0;
	}

//#ifdef ENABLE_CLEXPORT
//	cp->ndev_v = 0;
//	cp->extd = 0;
//	cp->imtd = 0;
//#endif


	if (platforms) free(platforms);

	return(cp);

}
cl_int OCL_Environment::init(OCL_Environment_Desc desc)
{

	OCL_LoadLibrary();

	// Get number of  available platforms
	OCL_RETURN_ON_ERR( clGetPlatformIDs( 0, NULL, &uiNumPlatforms ) );

	if( !(uiNumPlatforms > 0) )
	{
		printf("No available platform!");
		abort();
	}

	// Create platforms
	cl_platform_id*	pPlatformIDs = (cl_platform_id*)calloc( sizeof(cl_platform_id), uiNumPlatforms );
	
	OCL_RETURN_ON_ERR( clGetPlatformIDs( uiNumPlatforms, pPlatformIDs, NULL ) );

	// Alloc OCL_Platforms
	if( desc.sPlatformName )
		mpPlatforms = new OCL_Platform[1];
	else
		mpPlatforms = new OCL_Platform[uiNumPlatforms];
	
	bool founddev = false;

	for( cl_uint i=0; i<uiNumPlatforms; i++ )
	{
		char* sPlatname;
		size_t nameSize;
		OCL_RETURN_ON_ERR( clGetPlatformInfo( pPlatformIDs[i], CL_PLATFORM_NAME, 0, NULL, &nameSize ) );

		sPlatname = new char[nameSize];
		OCL_RETURN_ON_ERR( clGetPlatformInfo( pPlatformIDs[i], CL_PLATFORM_NAME, nameSize, sPlatname, NULL ) );

		if( desc.sPlatformName )
		{
			if( strcmp(sPlatname,desc.sPlatformName ) == 0 )
			{
				uiNumPlatforms = 1;
				OCL_RETURN_ON_ERR( mpPlatforms[0].init(pPlatformIDs[i],desc) );
                founddev = true;
                break;
			}
		}
		else
		{
			OCL_RETURN_ON_ERR( mpPlatforms[i].init(pPlatformIDs[i],desc) );
			if(mpPlatforms[i].uiNumDevices > 0)
				founddev = true;//at least 1 compatible device found
		}
	}

	delete pPlatformIDs;
	//if no device found return the error
	if(!founddev)
		OCL_RETURN_ON_ERR(CL_DEVICE_NOT_FOUND);

	return CL_SUCCESS;
}
Пример #13
0
static cl::Context PlatformContext(cl_device_type device_type, char* platform_vendor_name, bool enable_gl_interop = false)
    {
        cl_uint numPlatforms;
        cl_platform_id platform = NULL;
        clGetPlatformIDs(0, NULL, &numPlatforms);
        if (numPlatforms > 0)
        {
            cl_platform_id* platforms = new cl_platform_id[numPlatforms];
            clGetPlatformIDs(numPlatforms, platforms, NULL);
            for (unsigned i = 0; i < numPlatforms; ++i)
            {
                char pbuf[100];
                clGetPlatformInfo(platforms[i],
                                   CL_PLATFORM_VENDOR,
                                   sizeof(pbuf),
                                   pbuf,
                                   NULL);

                platform = platforms[i];
                std::cout << "platform: " << pbuf << std::endl;
                if (!strcmp(pbuf, platform_vendor_name))
                {
                    break;
                }
            }
            delete[] platforms;
        }

        if (enable_gl_interop)
        {
        // Define OS-specific context properties and create the OpenCL context
        #if defined (__APPLE__)
            CGLContextObj kCGLContext = CGLGetCurrentContext();
            CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext);
            cl_context_properties cps[] =
            {
                CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup,
                0
            };
        #else
            #if defined(linux)
                cl_context_properties cps[] =
                {
                    CL_GL_CONTEXT_KHR, cl_context_properties(glXGetCurrentContext()),
                    CL_GLX_DISPLAY_KHR, cl_context_properties(glXGetCurrentDisplay()),
                    CL_CONTEXT_PLATFORM, cl_context_properties(platform),
                    0
                };
            #else // Win32
                cl_context_properties cps[] =
                {
                    CL_GL_CONTEXT_KHR, (cl_context_properties)wglGetCurrentContext(),
                    CL_WGL_HDC_KHR, (cl_context_properties)wglGetCurrentDC(),
                    CL_CONTEXT_PLATFORM, (cl_context_properties)platform,
                    0
                };
            #endif
        #endif

        cl::Platform _platform(platform);
        cl::vector<cl::Device> *_devices = new cl::vector<cl::Device>();
        _platform.getDevices(CL_DEVICE_TYPE_GPU, _devices);

        if(_devices->size() > 1)
            _devices->pop_back();
        if (platform == NULL)
            return cl::Context(device_type, NULL);
        else
            return cl::Context(*_devices,cps);

        return (NULL == platform) ? cl::Context(device_type, NULL) :  cl::Context(*_devices, cps);

        }else //no opengl interoperability
        {
            cl_context_properties cps[] =
            {
                CL_CONTEXT_PLATFORM, cl_context_properties(platform),
                0
            };
            return (NULL == platform) ? cl::Context(device_type, NULL) : cl::Context(device_type, cps);
        }
    }
Пример #14
0
int main() {
    // This code executes on the OpenCL host
    
    // Host data
    int *A = NULL;  // Input array
    int *B = NULL;  // Input array
    int *C = NULL;  // Output array
    
    // Elements in each array
    const int elements = 2048;   
    
    // Compute the size of the data 
    size_t datasize = sizeof(int)*elements;

    // Allocate space for input/output data
    A = (int*)malloc(datasize);
    B = (int*)malloc(datasize);
    C = (int*)malloc(datasize);

    // Initialize the input data
    int i;
    for(i = 0; i < elements; i++) {
        A[i] = i;
        B[i] = i;
    }

    // Use this to check the output of each API call
    cl_int status;  
     
    // Retrieve the number of platforms
    cl_uint numPlatforms = 0;
    status = clGetPlatformIDs(0, NULL, &numPlatforms);

    // Allocate enough space for each platform
    cl_platform_id *platforms = NULL;
    platforms = (cl_platform_id*)malloc(
        numPlatforms*sizeof(cl_platform_id));
 
    // Fill in the platforms
    status = clGetPlatformIDs(numPlatforms, platforms, NULL);

	cl_int platform_index = -1;
	char cBuffer[1024];
	for (i=0; i<numPlatforms; i++) {
		clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, sizeof(cBuffer), cBuffer, NULL);
		if (strstr(cBuffer, "Intel") != NULL) {
			platform_index = i;
			break;
		}
	}
	if (platform_index < 0) {
		printf("Cannot find platforms support OpenCL.\n");
		return -1;
	}
	else {
		printf("Selected platform '%s'. %d\n", cBuffer, platform_index);
	}

    // Retrieve the number of devices
    cl_uint numDevices = 0;
    status = clGetDeviceIDs(platforms[platform_index], CL_DEVICE_TYPE_CPU, 0, 
        NULL, &numDevices);

    // Allocate enough space for each device
    cl_device_id *devices;
    devices = (cl_device_id*)malloc(
        numDevices*sizeof(cl_device_id));

    // Fill in the devices 
    status = clGetDeviceIDs(platforms[platform_index], CL_DEVICE_TYPE_ALL,        
        numDevices, devices, NULL);

    // Create a context and associate it with the devices
    cl_context context;
    context = clCreateContext(NULL, numDevices, devices, NULL, 
        NULL, &status);

    // Create a command queue and associate it with the device 
    cl_command_queue cmdQueue;
    cmdQueue = clCreateCommandQueue(context, devices[0], 0, 
        &status);

    // Create a buffer object that will contain the data 
    // from the host array A
    cl_mem bufA;
    bufA = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize,                       
       NULL, &status);

    // Create a buffer object that will contain the data 
    // from the host array B
    cl_mem bufB;
    bufB = clCreateBuffer(context, CL_MEM_READ_ONLY, datasize,                        
        NULL, &status);

    // Create a buffer object that will hold the output data
    cl_mem bufC;
    bufC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, datasize,
        NULL, &status); 
    
    // Write input array A to the device buffer bufferA
    status = clEnqueueWriteBuffer(cmdQueue, bufA, CL_FALSE, 
        0, datasize, A, 0, NULL, NULL);
    
    // Write input array B to the device buffer bufferB
    status = clEnqueueWriteBuffer(cmdQueue, bufB, CL_FALSE, 
        0, datasize, B, 0, NULL, NULL);

    // Create a program with source code
    cl_program program = clCreateProgramWithSource(context, 1, 
        (const char**)&programSource, NULL, &status);

    // Build (compile) the program for the device
    status = clBuildProgram(program, numDevices, devices, 
        NULL, NULL, NULL);

	// Create the vector addition kernel
    cl_kernel kernel;
    kernel = clCreateKernel(program, "vecadd", &status);

    // Associate the input and output buffers with the kernel 
    status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &bufA);
    status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &bufB);
    status = clSetKernelArg(kernel, 2, sizeof(cl_mem), &bufC);

    // Define an index space (global work size) of work 
    // items for execution. A workgroup size (local work size) 
    // is not required, but can be used.
    size_t globalWorkSize[1];   
 
    // There are 'elements' work-items 
    globalWorkSize[0] = elements;

    // Execute the kernel for execution
    status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, 
        globalWorkSize, NULL, 0, NULL, NULL);

    // Read the device output buffer to the host output array
    clEnqueueReadBuffer(cmdQueue, bufC, CL_TRUE, 0, 
        datasize, C, 0, NULL, NULL);

    // Verify the output
    int result = 1;
    for(i = 0; i < elements; i++) {
        if(C[i] != i+i) {
            result = 0;
            break;
        }
    }
    if(result) {
        printf("Output is correct\n");
    } else {
        printf("Output is incorrect\n");
    }

    // Free OpenCL resources
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(cmdQueue);
    clReleaseMemObject(bufA);
    clReleaseMemObject(bufB);
    clReleaseMemObject(bufC);
    clReleaseContext(context);

    // Free host resources
    free(A);
    free(B);
    free(C);
    free(platforms);
    free(devices);

    return 0;
}
Пример #15
0
struct cl_package initFPGA( const char* xclbin, const char* kernel_name )
{
	/*****************************************/
	/* Initialize OpenCL */
	/*****************************************/

	// Retrieve the number of platforms
    cl_uint numPlatforms = 0;
    cl_int status = clGetPlatformIDs(0, NULL, &numPlatforms);

	//printf("Found %d platforms support OpenCL, return code %d.\n", numPlatforms, status);
 
    // Allocate enough space for each platform
    cl_platform_id *platforms = (cl_platform_id*)malloc( numPlatforms*sizeof(cl_platform_id));
 
    status = clGetPlatformIDs(numPlatforms, platforms, NULL);
	if (status != CL_SUCCESS)
		printf("clGetPlatformIDs error(%d)\n", status);
	
	// Retrieve the number of devices
    cl_uint numDevices = 0;
#ifndef FPGA_DEVICE
    status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
#else
    status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ACCELERATOR, 0, NULL, &numDevices);
#endif
	printf("Found %d devices support OpenCL.\n", numDevices);

    // Allocate enough space for each device
    cl_device_id *devices = (cl_device_id*)malloc( numDevices*sizeof(cl_device_id));

    // Fill in the devices 
#ifndef FPGA_DEVICE
    status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);
#else
    status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ACCELERATOR, numDevices, devices, NULL);
#endif
	
	if (status != CL_SUCCESS)
		printf("clGetDeviceIDs error(%d)\n", status);

    // Create a context and associate it with the devices
    cl_context context;
    context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);
	if (status != CL_SUCCESS)
		printf("clCreateContext error(%d)\n", status);


	//Create a command-queue
	cl_command_queue clCommandQue = clCreateCommandQueue(context, devices[0], 0, &status);

	if (status != CL_SUCCESS)
		printf("clCreateCommandQueue error(%d)\n", status);

	// 6. Load and build OpenCL kernel
	
#ifndef FPGA_DEVICE
	// Create a program with source code
    cl_program program = clCreateProgramWithSource(context, 1, 
        (const char**)&logistic_cl, NULL, &status);
	if (status != 0)
		printf("clCreateProgramWithSource error(%d)\n", status);

    // Build (compile) the program for the device
    status = clBuildProgram(program, 1, devices, NULL, NULL, NULL);
#else
	// Load binary from disk
	unsigned char *kernelbinary;
	printf("loading %s\n", xclbin);
	int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary);
	if (n_i < 0) {
		printf("ERROR: failed to load kernel from xclbin: %s\n", xclbin);
		exit(1);
	}
	size_t n_bit = n_i;

	// Create the compute program from offline
	cl_program program = clCreateProgramWithBinary(context, 1, &devices[0], &n_bit,
			(const unsigned char **) &kernelbinary, NULL, &status);
	if ((!program) || (status != CL_SUCCESS)) {
		printf("Error: Failed to create compute program from binary %d!\n", status);
		exit(1);
	}

	// Build the program executable
	status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
#endif

	if (status != 0) {
		char errmsg[2048];
		size_t sizemsg = 0;

		status = clGetProgramBuildInfo(program, devices[0], CL_PROGRAM_BUILD_LOG, 2048*sizeof(char), errmsg, &sizemsg);

		printf("clBuildProgram error(%d)\n", status);
		printf("Compilation messages: \n %s", errmsg);
	}

	cl_kernel clKernel = clCreateKernel(program, kernel_name, &status);
	if (status != CL_SUCCESS)
		printf("clCreateKernel error(%d)\n", status);

	// TODO: parameterize the size of buffers
	cl_mem d_gradient = clCreateBuffer(context, CL_MEM_READ_WRITE, FEATURE_SIZE*LABEL_SIZE*GROUP_SIZE*sizeof(float), NULL, &status);
	if (status != CL_SUCCESS)
		printf("d_gradient clCreateBuffer error(%d)\n", status);

	cl_mem d_weights = clCreateBuffer(context, CL_MEM_READ_ONLY, FEATURE_SIZE*LABEL_SIZE*sizeof(float), NULL, &status);
	if (status != CL_SUCCESS)
		printf("d_weights clCreateBuffer error(%d)\n", status);

	cl_mem d_data = clCreateBuffer(context, CL_MEM_READ_ONLY, (FEATURE_SIZE+LABEL_SIZE)*CHUNK_SIZE*sizeof(float), NULL, &status);
	if (status != CL_SUCCESS)
		printf("d_data clCreateBuffer error(%d)\n", status);

    struct cl_package result;
    result.context = context;
    result.kernel = clKernel;
    result.commandQueue = clCommandQue;
    result.d_gradient = d_gradient;
    result.d_weights = d_weights;
    result.d_data = d_data;

    return result;
}
Пример #16
0
int main(int argc, char ** argv) {
	if(argc != 3){
		printf("wrong option... \n");
		return 0;
	}
    char *needleData = argv[1];
    int needleLen = strlen(needleData);
    int cccharsPerItem;
    int ggcharsPerItem;
    int characterSetSize = 128;//ASCii set
    int *skipTable; // skipTable
    char *needle = &needleData[0];//!!!Pass it in a wrong way
    skipTable = HorspoolPrecomputation(needle, characterSetSize); 

    char *cckernelFile = "./kernel/HS_CPU_LocalCounter_OpenCL_Kernel.cl";//the kernel for CPU
    char *ggkernelFile = "./kernel/HS_GPU_LocalMem_LocalCounter_OpenCL_Kernel.cl";//the kernel for GPU
    char *cckernelSrc = LoadKernelSrcFromFile(cckernelFile);
    char *ggkernelSrc = LoadKernelSrcFromFile(ggkernelFile);
    char *fileName = argv[2];
    
    //Load the haystacks from file
    FILE *filePtr;
    filePtr = fopen(fileName, "r");
    if(!filePtr) {
        fprintf(stderr, "can not open the text file!\n");
    }
    fseek(filePtr, 0 , SEEK_END);
    int fileSize = ftell(filePtr);
    int ggfileSize = fileSize*0.5;
    int ccfileSize = fileSize - ggfileSize;
    rewind(filePtr);
    //Split the file then I need overlaping!
    char *gghaystackData = (char*)calloc(ggfileSize, sizeof(char));
    char *cchaystackData = (char*)calloc(ccfileSize, sizeof(char));
    int textLength;
    textLength = fread(gghaystackData, sizeof(char), ggfileSize, filePtr);
    textLength += fread(cchaystackData, sizeof(char), ccfileSize, filePtr);
    if(textLength != fileSize) {
        fprintf(stderr, "reading error");
    }
    fclose(filePtr);
    //~ char *cchaystackData = LoadHaystackDataFromFile(fileName);
    int cchaystackLen = strlen(cchaystackData);
    int gghaystackLen = strlen(gghaystackData);


    cl_int err;
    cl_platform_id platform;
    cl_device_id cdevice;
    cl_device_id ggdevice;

    //Find a platform
    err = clGetPlatformIDs(1, &platform, NULL);
    if(err != CL_SUCCESS) {
        printf("cant find a platform! \n");
    }
    
    //Set up devices
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &cdevice, NULL);
    if(err != CL_SUCCESS) {
        printf("cant find CPU! \n");
    }
    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &ggdevice, NULL);
    if(err != CL_SUCCESS) {
        printf("cant find GPU! \n");
    }
    
    //Create context
    cl_context ccontext = clCreateContext(NULL, 1, &cdevice, NULL, NULL, &err);
    if(err != CL_SUCCESS) {
        printf("cant create a ccontext! \n");
    }
    cl_context ggcontext = clCreateContext(NULL, 1, &ggdevice, NULL, NULL, &err);
    if(err != CL_SUCCESS) {
        printf("cant create a ggcontext! \n");
    }
    
    //Create command queues
    cl_command_queue cqueue = clCreateCommandQueue(ccontext, cdevice, 0, &err);
    if(err != CL_SUCCESS) {
        printf("cant create a  cqueue! \n");
    }
    cl_command_queue ggqueue = clCreateCommandQueue(ggcontext, ggdevice, 0, &err);
    if(err != CL_SUCCESS) {
        printf("cant create a  ggqueue! \n");
    }

    
    //Create the programm object
    cl_program cprogram = clCreateProgramWithSource(ccontext, 1, (const char**)&cckernelSrc, NULL, &err);
    if(err != CL_SUCCESS) {
        printf("cant build the program! \n");
    }
    cl_program ggprogram = clCreateProgramWithSource(ggcontext, 1, (const char**)&ggkernelSrc, NULL, &err);
    if(err != CL_SUCCESS) {
        printf("cant build the program! \n");
    }

    
    //Build the programm executable
    err = clBuildProgram(cprogram, 0, NULL, NULL, NULL, NULL);
    if(err != CL_SUCCESS) {
        printf("cant build the cprogramm exe! \n");
    }
    err = clBuildProgram(ggprogram, 0, NULL, NULL, NULL, NULL);
    if(err != CL_SUCCESS) {
        printf("cant build the ggprogramm exe! \n");
    }

    
    //Create the kernel
    cl_kernel ckernel = clCreateKernel(cprogram, "QSMatch", &err);
    if(err != CL_SUCCESS) {
        printf("cant create the ckernel! \n");
    }
    cl_kernel ggkernel = clCreateKernel(ggprogram, "QSMatch", &err);
    if(err != CL_SUCCESS) {
        printf("cant create the ggkernel! \n");
    }

    
    //Create the haystack buffer
    cl_mem cchaystackBuffer = clCreateBuffer(ccontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, cchaystackLen, cchaystackData, &err);
    if(err != CL_SUCCESS) {
        printf("couldn't create the cchaystackBuffer \n");
    }
    cl_mem gghaystackBuffer = clCreateBuffer(ggcontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, gghaystackLen, gghaystackData, &err);
    if(err != CL_SUCCESS) {
        printf("couldn't create the gghaystackBuffer \n");
    }

    
    //Create the results buffer
    int ccres[4] = {0};
    int ggres[80] = {0};
    cl_mem ccresBuffer = clCreateBuffer(ccontext, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(ccres), ccres, &err);
    if(err != CL_SUCCESS){
		printf("couldn't create the ccresBuffer");
	}
	cl_mem ggresBuffer = clCreateBuffer(ggcontext, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(ggres), ggres, &err);
    if(err != CL_SUCCESS){
		printf("couldn't create the ggresBuffer");
	}
	
    //Create the needleBuffer
    cl_mem ccneedleBuffer = clCreateBuffer(ccontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(char)*needleLen, needleData, &err);
    if(err != CL_SUCCESS){
		printf("couldn't create the ccneedleBuffer \n");
	}
	cl_mem ggneedleBuffer = clCreateBuffer(ggcontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(char)*needleLen, needleData, &err);
    if(err != CL_SUCCESS){
		printf("couldn't create the ggneedleBuffer \n");
	}
	
    //Create the skipTableBuffer
    cl_mem ccskipTableBuffer = clCreateBuffer(ccontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int)*characterSetSize, skipTable, &err);
    if(err != CL_SUCCESS){
		printf("couldn't create the ccskipTableBuffer \n");
	}
	cl_mem ggskipTableBuffer = clCreateBuffer(ggcontext, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(int)*characterSetSize, skipTable, &err);
    if(err != CL_SUCCESS){
		printf("couldn't create the ggskipTableBuffer \n");
	}
	
	//Determine global size and local size
    size_t cclocalSize = 64;
    size_t gglocalSize = 256;
    size_t ccGlobalSize = 64*4;
    size_t ggGlobalSize = 256*5*8;
    cccharsPerItem = cchaystackLen/ccGlobalSize + 1;//Add 1 it important otherwise some patterns will be lost
    ggcharsPerItem = cchaystackLen/ggGlobalSize + 1;
    
    //Set the kernel arguments for CPU
    err = clSetKernelArg(ckernel, 0, sizeof(cl_mem), &cchaystackBuffer);
    err |= clSetKernelArg(ckernel, 1, sizeof(cl_mem), &ccneedleBuffer);
    err |= clSetKernelArg(ckernel, 2, sizeof(cl_mem), &ccskipTableBuffer);
    err |= clSetKernelArg(ckernel, 3, sizeof(int)*1, NULL);
    err |= clSetKernelArg(ckernel, 4, sizeof(needleLen), &needleLen);
    err |= clSetKernelArg(ckernel, 5, sizeof(cccharsPerItem), &cccharsPerItem);
    err |= clSetKernelArg(ckernel, 6, sizeof(cl_mem), &ccresBuffer);
    if(err != CL_SUCCESS) {
        printf("couldn't set the ckernel arguments \n");
    }
    
    //Set the kernel arguments for GPU
    err = clSetKernelArg(ggkernel, 0, sizeof(cl_mem), &gghaystackBuffer);
    err |= clSetKernelArg(ggkernel, 1, sizeof(cl_mem), &ggneedleBuffer);
    err |= clSetKernelArg(ggkernel, 2, sizeof(char)*needleLen, NULL);
    err |= clSetKernelArg(ggkernel, 3, sizeof(cl_mem), &ggskipTableBuffer);
    err |= clSetKernelArg(ggkernel, 4, sizeof(int)*128, NULL);
    err |= clSetKernelArg(ggkernel, 5, sizeof(int)*1, NULL);
    err |= clSetKernelArg(ggkernel, 6, sizeof(needleLen), &needleLen);
    err |= clSetKernelArg(ggkernel, 7, sizeof(ggcharsPerItem), &ggcharsPerItem);
    err |= clSetKernelArg(ggkernel, 8, sizeof(cl_mem), &ggresBuffer);
    if(err != CL_SUCCESS) {
        printf("couldn't set the ggkernel arguments \n");
    }
    
    //Execut the kernel
    //On CPU
    err = clEnqueueNDRangeKernel(cqueue, ckernel, 1, NULL, &ccGlobalSize, &cclocalSize, 0, NULL, NULL);
    if(err != CL_SUCCESS) {
        printf("ckernel could not be executed... \n");
    } 
    else {
        printf("ckernel has been executed successfully! \n");
    }
      
    //~ //On GPU
    err = clEnqueueNDRangeKernel(ggqueue, ggkernel, 1, NULL, &ggGlobalSize, &gglocalSize, 0, NULL, NULL);
    if(err != CL_SUCCESS) {
        printf("ggkernel could not be executed... \n");
    } 
    else {
        printf("ggkernel has been executed successfully! \n");
    }
    
    //Finish the queue lists
    clFinish(ggqueue);
    clFinish(cqueue);
    

    //Copy the results
    clEnqueueReadBuffer(cqueue, ccresBuffer, CL_TRUE, 0, sizeof(ccres), ccres, 0, NULL, NULL);
    clEnqueueReadBuffer(ggqueue, ggresBuffer, CL_TRUE, 0, sizeof(ggres), ggres, 0, NULL, NULL);
    PrintTheResults(ccres, ggres);
    
    //Clean up
    free(skipTable);
    free(cckernelSrc);
    free(ggkernelSrc);
    free(cchaystackData);
    free(gghaystackData);
    clReleaseMemObject(cchaystackBuffer);
    clReleaseMemObject(ccneedleBuffer);
    clReleaseMemObject(ccskipTableBuffer);
    clReleaseMemObject(ccresBuffer);
    clReleaseMemObject(gghaystackBuffer);
    clReleaseMemObject(ggneedleBuffer);
    clReleaseMemObject(ggskipTableBuffer);
    clReleaseMemObject(ggresBuffer);
    clReleaseKernel(ckernel);
    clReleaseKernel(ggkernel);
    clReleaseProgram(cprogram);
    clReleaseProgram(ggprogram);
    clReleaseCommandQueue(cqueue);
    clReleaseCommandQueue(ggqueue);
    clReleaseContext(ccontext);
    clReleaseContext(ggcontext);

    return 0;
}
void mandelbrot(int m, int n)
{
  cl_platform_id   *platform;
  cl_device_type   dev_type = CL_DEVICE_TYPE_GPU;
  cl_device_id     *devs = NULL;
  cl_context       context;
  cl_command_queue *cmd_queues;
  cl_program       program;
  cl_kernel        *kernels;
  cl_mem           *mem_R;
  cl_mem		   *mem_G;
  cl_mem		   *mem_B;
  cl_int           err;
  cl_uint          num_platforms;
  cl_uint          num_devs = 0;
  cl_event		   *ev_kernels;

	
		
  int count_max = COUNT_MAX;
  int i, j, jhi, jlo;
  char *output_filename = "mandelbrot.ppm";
  FILE *output_unit;
  double wtime;

  float x_max =   1.25;
  float x_min = - 2.25;
//  float x;
//  float x1;
//  float x2;
  float y_max =   1.75;
  float y_min = - 1.75;
  //float y;
  //float y1;
  //float y2;

  size_t size_color;

  size_color = sizeof(int) * m * n;

  int (*r)[n] = (int (*)[n])calloc(m * n, sizeof(int));
  int (*g)[n] = (int (*)[n])calloc(m * n, sizeof(int));
  int (*b)[n] = (int (*)[n])calloc(m * n, sizeof(int));

  printf( "  Sequential C version\n" );
  printf( "\n" );
  printf( "  Create an ASCII PPM image of the Mandelbrot set.\n" );
  printf( "\n" );
  printf( "  For each point C = X + i*Y\n" );
  printf( "  with X range [%g,%g]\n", x_min, x_max );
  printf( "  and  Y range [%g,%g]\n", y_min, y_max );
  printf( "  carry out %d iterations of the map\n", count_max );
  printf( "  Z(n+1) = Z(n)^2 + C.\n" );
  printf( "  If the iterates stay bounded (norm less than 2)\n" );
  printf( "  then C is taken to be a member of the set.\n" );
  printf( "\n" );
  printf( "  An ASCII PPM image of the set is created using\n" );
  printf( "    M = %d pixels in the X direction and\n", m );
  printf( "    N = %d pixels in the Y direction.\n", n );

  timer_init();
  timer_start(0);

  // Platform
  err = clGetPlatformIDs(0, NULL, &num_platforms);
  CHECK_ERROR(err);
  if (num_platforms == 0) {
    fprintf(stderr, "[%s:%d] ERROR: No OpenCL platform\n", __FILE__,__LINE__);
    exit(EXIT_FAILURE);
  }

  printf("Number of platforms: %u\n", num_platforms);
  platform = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
  err = clGetPlatformIDs(num_platforms, platform, NULL);
  CHECK_ERROR(err);
  
  // Device
  for (i = 0; i < num_platforms; i++) {
    err = clGetDeviceIDs(platform[i], dev_type, 0, NULL, &num_devs);
    if (err != CL_DEVICE_NOT_FOUND) CHECK_ERROR(err);
	num_devs = 1; //**
    if (num_devs >= 1)
	{
		devs = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devs);

		err = clGetDeviceIDs(platform[i], dev_type, num_devs, devs, NULL);
		break;
	}
  }
  if ( devs == NULL || num_devs < 1) {
    fprintf(stderr, "[%s:%d] ERROR: No device\n", __FILE__, __LINE__);
    exit(EXIT_FAILURE);
  }

  for( i = 0; i < num_devs; ++i ) {
	printf("dev[%d] : ", i);
  	print_device_name(devs[i]);
  }

  // Context
  context = clCreateContext(NULL, num_devs, devs, NULL, NULL, &err);
  CHECK_ERROR(err);

  // Command queue
  cmd_queues = (cl_command_queue*)malloc(sizeof(cl_command_queue)*num_devs);
  for( i = 0; i < num_devs; ++i) {
	cmd_queues[i] = clCreateCommandQueue(context, devs[i], 0, &err);
  	CHECK_ERROR(err);
  }

  // Create a program.
  size_t source_len;
  char *source_code = get_source_code("./mandelbrot_kernel.cl", &source_len);
  program = clCreateProgramWithSource(context,
                                      1,
                                      (const char **)&source_code,
                                      &source_len,
                                      &err);
  free(source_code);
  CHECK_ERROR(err);

  // Build the program.
  char build_opts[200];
  sprintf(build_opts, "-Dm=%d -Dn=%d -Dnum_devs=%d", m, n, num_devs);
  err = clBuildProgram(program, num_devs, devs, build_opts, NULL, NULL);
  if (err != CL_SUCCESS) {
    print_build_log(program, devs[0]);
    CHECK_ERROR(err);
  }
  
  // Kernel
  kernels = (cl_kernel*)malloc(sizeof(cl_kernel)*num_devs);
  for (i = 0; i < num_devs; i++) {
	  kernels[i] = clCreateKernel(program, "mandelbrot_kernel", NULL);
  }
 
  // Buffers  
  mem_R = (cl_mem*)malloc(sizeof(cl_mem)*num_devs);
  mem_G = (cl_mem*)malloc(sizeof(cl_mem)*num_devs);
  mem_B = (cl_mem*)malloc(sizeof(cl_mem)*num_devs);

  for(i = 0; i < num_devs; i++) {
	  mem_R[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                         size_color / num_devs, NULL, NULL);
	  mem_G[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                         size_color / num_devs, NULL, NULL);
	  mem_B[i] = clCreateBuffer(context, CL_MEM_READ_WRITE,
                         size_color / num_devs, NULL, NULL);
  }

/*
  // Write to Buffers
  for(i = 0; i < num_devs; i++) {
  	clEnqueueWriteBuffer(cmd_queues[i],
                         mem_CHECK[i], 
                         CL_FALSE, 0,
                         size_CHECK / num_devs,
                         (CHECK + (N / num_devs) * i),
                         0, NULL, NULL);
  }
*/

  // Set the arguments.
  for(i = 0; i < num_devs; i++) {
//	  flag = i * (m * n / num_devs);
  	clSetKernelArg(kernels[i], 0, sizeof(cl_mem), (void*) &mem_R[i]);
	clSetKernelArg(kernels[i], 1, sizeof(cl_mem), (void*) &mem_G[i]);
  	clSetKernelArg(kernels[i], 2, sizeof(cl_mem), (void*) &mem_B[i]);

	clSetKernelArg(kernels[i], 3, sizeof(int), &count_max);
	clSetKernelArg(kernels[i], 4, sizeof(float), &x_max);
	clSetKernelArg(kernels[i], 5, sizeof(float), &x_min);
	clSetKernelArg(kernels[i], 6, sizeof(float), &y_max);
	clSetKernelArg(kernels[i], 7, sizeof(float), &y_min);
  }

  // Enqueue the kernel.
  size_t lws[1] = {256};
  size_t gws[1] = { m * n /num_devs };
  gws[0] = (size_t)ceil((double)m * n / lws[0]) * lws[0];
  ev_kernels = (cl_event*)malloc(sizeof(cl_event)*num_devs);
  for(i = 0; i < num_devs; i++) {
 	 err = clEnqueueNDRangeKernel(cmd_queues[i], kernels[i], 1, NULL, gws, lws, 0, NULL, &ev_kernels[i]);
  	 CHECK_ERROR(err);
  }

  // Read the result.
  for(i = 0; i < num_devs; i++) {
  	err = clEnqueueReadBuffer(cmd_queues[i],
                            mem_R[i],
                            CL_TRUE, 0,
                            size_color / num_devs,
                            r,
                            1, &ev_kernels[i], NULL);
  	err = clEnqueueReadBuffer(cmd_queues[i],
                            mem_G[i],
                            CL_TRUE, 0,
                            size_color / num_devs,
                            g,
							1, &ev_kernels[i], NULL);
   	err = clEnqueueReadBuffer(cmd_queues[i],
                            mem_B[i],
                            CL_TRUE, 0,
							size_color / num_devs,
							b,
                            1, &ev_kernels[i], NULL);
  }

  // Release
  for( i = 0; i < num_devs; ++i ) {
  clFinish(cmd_queues[i]); 
  clReleaseMemObject(mem_R[i]);
  clReleaseMemObject(mem_G[i]);
  clReleaseMemObject(mem_B[i]);
  clReleaseKernel(kernels[i]);
  clReleaseCommandQueue(cmd_queues[i]);
  clReleaseEvent(ev_kernels[i]);
  }
  clReleaseProgram(program);
  clReleaseContext(context);
  free(mem_R);
  free(mem_G);
  free(mem_B);
  free(cmd_queues);
  free(kernels);
  free(devs);
  free(ev_kernels);
  free(platform);

  timer_stop(0);
  wtime = timer_read(0);
  printf( "\n" );
  printf( "  Time = %lf seconds.\n", wtime );

  // Write data to an ASCII PPM file.
  output_unit = fopen( output_filename, "wt" );

  fprintf( output_unit, "P3\n" );
  fprintf( output_unit, "%d  %d\n", n, m );
  fprintf( output_unit, "%d\n", 255 );
  for ( i = 0; i < m; i++ )
  {
    for ( jlo = 0; jlo < n; jlo = jlo + 4 )
    {
      jhi = MIN( jlo + 4, n );
      for ( j = jlo; j < jhi; j++ )
      {
        fprintf( output_unit, "  %d  %d  %d", r[i][j], g[i][j], b[i][j] );
      }
      fprintf( output_unit, "\n" );
    }
  }

  fclose( output_unit );
  printf( "\n" );
  printf( "  Graphics data written to \"%s\".\n\n", output_filename );

  // Terminate.
  free(r);
  free(g);
  free(b);
}
Пример #18
0
int main() {
    // This code executes on the OpenCL host
    
    // Host data
    int *A = NULL;  // Input array
    int *B = NULL;  // Input array
    int *C = NULL;  // Output array
    
    // Elements in each array
    const int elements = 2048;   
    
    // Compute the size of the data 
    size_t datasize = sizeof(int)*elements;

    // Allocate space for input/output data
    A = (int*)malloc(datasize);
    B = (int*)malloc(datasize);
    C = (int*)malloc(datasize);
    // Initialize the input data
    for(int i = 0; i < elements; i++) {
        A[i] = i;
        B[i] = i;
    }

    // Use this to check the output of each API call
    cl_int status;  
     
    //-----------------------------------------------------
    // STEP 1: Discover and initialize the platforms
    //-----------------------------------------------------
    
    cl_uint numPlatforms = 0;
    cl_platform_id *platforms = NULL;
    
    // Use clGetPlatformIDs() to retrieve the number of 
    // platforms
    status = clGetPlatformIDs(0, NULL, &numPlatforms);
 
    // Allocate enough space for each platform
    platforms =   
        (cl_platform_id*)malloc(
            numPlatforms*sizeof(cl_platform_id));
 
    // Fill in platforms with clGetPlatformIDs()
    status = clGetPlatformIDs(numPlatforms, platforms, 
                NULL);

    //-----------------------------------------------------
    // STEP 2: Discover and initialize the devices
    //----------------------------------------------------- 
    
    cl_uint numDevices = 0;
    cl_device_id *devices = NULL;

    // Use clGetDeviceIDs() to retrieve the number of 
    // devices present
    status = clGetDeviceIDs(
        platforms[0], 
        CL_DEVICE_TYPE_ALL, 
        0, 
        NULL, 
        &numDevices);

    // Allocate enough space for each device
    devices = 
        (cl_device_id*)malloc(
            numDevices*sizeof(cl_device_id));

    // Fill in devices with clGetDeviceIDs()
    status = clGetDeviceIDs(
        platforms[0], 
        CL_DEVICE_TYPE_ALL,        
        numDevices, 
        devices, 
        NULL);

    //-----------------------------------------------------
    // STEP 3: Create a context
    //----------------------------------------------------- 
    
    cl_context context = NULL;

    // Create a context using clCreateContext() and 
    // associate it with the devices
    context = clCreateContext(
        NULL, 
        numDevices, 
        devices, 
        NULL, 
        NULL, 
        &status);

    //-----------------------------------------------------
    // STEP 4: Create a command queue
    //----------------------------------------------------- 
    
    cl_command_queue cmdQueue;

    // Create a command queue using clCreateCommandQueue(),
    // and associate it with the device you want to execute 
    // on
    cmdQueue = clCreateCommandQueue(
        context, 
        devices[0], 
        0, 
        &status);

    //-----------------------------------------------------
    // STEP 5: Create device buffers
    //----------------------------------------------------- 
    
    cl_mem bufferA;  // Input array on the device
    cl_mem bufferB;  // Input array on the device
    cl_mem bufferC;  // Output array on the device

    // Use clCreateBuffer() to create a buffer object (d_A) 
    // that will contain the data from the host array A
    bufferA = clCreateBuffer(
        context, 
        CL_MEM_READ_ONLY,                         
        datasize, 
        NULL, 
        &status);

    // Use clCreateBuffer() to create a buffer object (d_B)
    // that will contain the data from the host array B
    bufferB = clCreateBuffer(
        context, 
        CL_MEM_READ_ONLY,                         
        datasize, 
        NULL, 
        &status);

    // Use clCreateBuffer() to create a buffer object (d_C) 
    // with enough space to hold the output data
    bufferC = clCreateBuffer(
        context, 
        CL_MEM_WRITE_ONLY,                 
        datasize, 
        NULL, 
        &status);
    
    //-----------------------------------------------------
    // STEP 6: Write host data to device buffers
    //----------------------------------------------------- 
    
    // Use clEnqueueWriteBuffer() to write input array A to
    // the device buffer bufferA
    status = clEnqueueWriteBuffer(
        cmdQueue, 
        bufferA, 
        CL_FALSE, 
        0, 
        datasize,                         
        A, 
        0, 
        NULL, 
        NULL);
    
    // Use clEnqueueWriteBuffer() to write input array B to 
    // the device buffer bufferB
    status = clEnqueueWriteBuffer(
        cmdQueue, 
        bufferB, 
        CL_FALSE, 
        0, 
        datasize,                                  
        B, 
        0, 
        NULL, 
        NULL);

    //-----------------------------------------------------
    // STEP 7: Create and compile the program
    //----------------------------------------------------- 
     
    // Create a program using clCreateProgramWithSource()
    cl_program program = clCreateProgramWithSource(
        context, 
        1, 
        (const char**)&programSource,                                 
        NULL, 
        &status);

    // Build (compile) the program for the devices with
    // clBuildProgram()
    status = clBuildProgram(
        program, 
        numDevices, 
        devices, 
        NULL, 
        NULL, 
        NULL);
   
    //-----------------------------------------------------
    // STEP 8: Create the kernel
    //----------------------------------------------------- 

    cl_kernel kernel = NULL;

    // Use clCreateKernel() to create a kernel from the 
    // vector addition function (named "vecadd")
    kernel = clCreateKernel(program, "vecadd", &status);

    //-----------------------------------------------------
    // STEP 9: Set the kernel arguments
    //----------------------------------------------------- 
    
    // Associate the input and output buffers with the 
    // kernel 
    // using clSetKernelArg()
    status  = clSetKernelArg(
        kernel, 
        0, 
        sizeof(cl_mem), 
        &bufferA);
    status |= clSetKernelArg(
        kernel, 
        1, 
        sizeof(cl_mem), 
        &bufferB);
    status |= clSetKernelArg(
        kernel, 
        2, 
        sizeof(cl_mem), 
        &bufferC);

    //-----------------------------------------------------
    // STEP 10: Configure the work-item structure
    //----------------------------------------------------- 
    
    // Define an index space (global work size) of work 
    // items for 
    // execution. A workgroup size (local work size) is not 
    // required, 
    // but can be used.
    size_t globalWorkSize[1];    
    // There are 'elements' work-items 
    globalWorkSize[0] = elements;

    //-----------------------------------------------------
    // STEP 11: Enqueue the kernel for execution
    //----------------------------------------------------- 
    
    // Execute the kernel by using 
    // clEnqueueNDRangeKernel().
    // 'globalWorkSize' is the 1D dimension of the 
    // work-items
    status = clEnqueueNDRangeKernel(
        cmdQueue, 
        kernel, 
        1, 
        NULL, 
        globalWorkSize, 
        NULL, 
        0, 
        NULL, 
        NULL);

    //-----------------------------------------------------
    // STEP 12: Read the output buffer back to the host
    //----------------------------------------------------- 
    
    // Use clEnqueueReadBuffer() to read the OpenCL output  
    // buffer (bufferC) 
    // to the host output array (C)
    clEnqueueReadBuffer(
        cmdQueue, 
        bufferC, 
        CL_TRUE, 
        0, 
        datasize, 
        C, 
        0, 
        NULL, 
        NULL);

    // Verify the output
    bool result = true;
    for(int i = 0; i < elements; i++) {
        if(C[i] != i+i) {
            result = false;
            break;
        }
    }
    if(result) {
        printf("Output is correct\n");
    } else {
        printf("Output is incorrect\n");
    }

    //-----------------------------------------------------
    // STEP 13: Release OpenCL resources
    //----------------------------------------------------- 
    
    // Free OpenCL resources
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseCommandQueue(cmdQueue);
    clReleaseMemObject(bufferA);
    clReleaseMemObject(bufferB);
    clReleaseMemObject(bufferC);
    clReleaseContext(context);

    // Free host resources
    free(A);
    free(B);
    free(C);
    free(platforms);
    free(devices);
}
Пример #19
0
/**
 * @brief Main program function.
 * 
 * @param argc Number of cli parameters. If argc > 1, more detailed information will be shown.
 * @param argv Not relevant.
 * @return 
 */
int main(int argc, char ** argv) {
	
	/* Program variables. */
	GError* err = NULL;                      /* Error reporting object. */
	cl_int status;                           /* Program/function return status variable. */
	cl_uint numPlatforms;                    /* Number of platforms. */
	cl_platform_id* platforms = NULL;        /* Array of platform IDs. */
	cl_uint numDevices;                      /* Number of devices. */
	cl_device_id devices[MAX_DEVICES_QUERY]; /* Array of devices for a given platform. */

	/* Auxiliary variables for getting information about platforms and devices. */
	char pbuff[MAX_INFO_STRING];
	size_t sizetaux;
	cl_uint uintaux;
	cl_ulong ulongaux, ulongaux2;
	cl_device_type dtypeaux;
	cl_device_local_mem_type dlmt;
	cl_bool boolaux;
	cl_command_queue_properties cqpaux;
	
	/* Avoid compiler warning. */
	argv = argv;

	/* Get number of platforms. */
	status = clGetPlatformIDs(0, NULL, &numPlatforms);
	gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get number of platforms.", status);
	
	/* Get platform IDs. */
	platforms = (cl_platform_id*) malloc(numPlatforms * sizeof(cl_platform_id));
	gef_if_error_create_goto(err, CLU_UTILS_ERROR, platforms == NULL, CLU_ERROR_NOALLOC, error_handler, "Unable to allocate memory for list of platform IDs.");

	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
	gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get list of platform IDs.", status);
	
	/* Print number of platforms. */
	printf("Number of platforms: %d\n", numPlatforms);

	/* Cycle through platforms */
	for(unsigned int i = 0; i < numPlatforms; i++) {
	
		/* Get platform vendor. */
		status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
		gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get platform vendor.", status);
		
		/* Print plaform vendor. */
		printf("Platform #%d: %s\n", i, pbuff);
		
		/* Get devices in platform */
		status = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, MAX_DEVICES_QUERY, devices, &numDevices);
		gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to platform devices.", status);

		/* Cycle through devices in current platform. */
		for (unsigned int j = 0; j < numDevices; j++) {
			
			/* Device name. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device name.", status);

			printf("\tDevice #%d: %s\n", j, pbuff);
			
			/* Device vendor. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_VENDOR, sizeof(pbuff), pbuff, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device vendor.", status);

			printf("\t           Vendor: %s\n", pbuff);

			/* Device type. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_TYPE, sizeof(cl_device_type), &dtypeaux, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device type.", status);

			printf("\t           Type: %s\n", clu_device_type_str_get(dtypeaux, 0, pbuff, MAX_INFO_STRING));

			/* OpenCL C version. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_OPENCL_C_VERSION, sizeof(pbuff), pbuff, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device OpenCL C version.", status);
			
			printf("\t           %s\n", pbuff);
			
			/* Max. compute units. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(uintaux), &uintaux, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device max. compute units.", status);

			printf("\t           Max. Compute units: %d\n", uintaux);

			/* Global memory info. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(ulongaux), &ulongaux, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device global memory size.", status);

			status = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(ulongaux2), &ulongaux2, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device maximum allocable memory.", status);

			status = clGetDeviceInfo(devices[j], CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(boolaux), &boolaux, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get determine if device memory is unified with host.", status);

			printf("\t           Global mem. size: %ld Mb %s (max. alloc. %ld Mb)\n", (unsigned long int) ulongaux / 1024l / 1024l, boolaux ? "shared with host" : "dedicated", (unsigned long int) ulongaux2 / 1024l / 1024l);

			/* Local memory info. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_LOCAL_MEM_TYPE, sizeof(dlmt), &dlmt, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get type of local memory in device.", status);

			status = clGetDeviceInfo(devices[j], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(ulongaux), &ulongaux, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get size of local memory in device.", status);

			printf("\t           Local mem. size (type): %ld Kb (%s)\n", (unsigned long int) ulongaux / 1024l, (dlmt == CL_LOCAL ? "local" : "global"));

			/* Maximum work group size. */
			status = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(sizetaux), &sizetaux, NULL);
			gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get size of local memory in device.", status);

			printf("\t           Max. work-group size: %d\n", (int) sizetaux);

			/* Print extra info if any arg is given */
			if (argc > 1) {

				/* Maximum constant buffer size. */
				status = clGetDeviceInfo(devices[j], CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, sizeof(ulongaux), &ulongaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get maximum constant buffer size.", status);
				
				printf("\t           Max. constant buffer size: %lu Kb\n", (unsigned long) (ulongaux / 1024));

				/* Device endianess.*/
				status = clGetDeviceInfo(devices[j], CL_DEVICE_ENDIAN_LITTLE, sizeof(boolaux), &boolaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get device endianess.", status);

				printf("\t           Endianness: %s\n", boolaux ? "Little" : "Big");
				
				/* Preferred vector width.  */
				printf("\t           Pref. vec. width:");

				status = clGetDeviceInfo(devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, sizeof(uintaux), &uintaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get preferred vector width for char.", status);

				printf(" Char=%d,", uintaux);

				status = clGetDeviceInfo(devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, sizeof(uintaux), &uintaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get preferred vector width for short.", status);

				printf(" Short=%d,", uintaux);

				status = clGetDeviceInfo(devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(uintaux), &uintaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get preferred vector width for int.", status);

				printf(" Int=%d,", uintaux);

				status = clGetDeviceInfo(devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, sizeof(uintaux), &uintaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get preferred vector width for long.", status);

				printf(" Long=%d,", uintaux);

				status = clGetDeviceInfo(devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, sizeof(uintaux), &uintaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get preferred vector width for float.", status);

				printf(" Float=%d,", uintaux);

				status = clGetDeviceInfo(devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(uintaux), &uintaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get preferred vector width for double.", status);

				printf(" Double=%d,", uintaux);

				status = clGetDeviceInfo(devices[j], CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, sizeof(uintaux), &uintaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get preferred vector width for half.", status);

				printf(" Half=%d.\n", uintaux);
				
				/* Acceptable command queue properties. */
				status = clGetDeviceInfo(devices[j], CL_DEVICE_QUEUE_PROPERTIES, sizeof(cqpaux), &cqpaux, NULL);
				gef_if_error_create_goto(err, CLU_UTILS_ERROR, CL_SUCCESS != status, status, error_handler, "OpenCL error %d: unable to get acceptable command queue properties.", status);
				
				printf("\t           Command queue properties:");
				if (cqpaux & CL_QUEUE_PROFILING_ENABLE) printf(" Prof. OK,"); else printf("Prof. KO,");
				if (cqpaux & CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE) printf(" Out-of-order OK\n"); else printf(" Out-of-order KO\n");

			}
		}
	}
	
	/* If we get here, no need for error checking, jump to cleanup. */
	g_assert (err == NULL);
	status = CL_SUCCESS;
	goto cleanup;
	
error_handler:
	/* If we got here there was an error, verify that it is so. */
	g_assert (err != NULL);
	fprintf(stderr, "%s", err->message);
	status = err->code;
	g_error_free(err);

cleanup:
		
	/* Free stuff! */
	if (platforms) free(platforms);

	/* Return status. */
	return status;

}
Пример #20
0
int
main(void)
{
    cl_int err;
    cl_platform_id platform = 0;
    cl_device_id device = 0;
    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
    cl_context ctx = 0;
    cl_command_queue queue = 0;
    cl_mem bufX, bufY;
    cl_event event = NULL;
    int ret = 0;
    int lenX = 1 + (N-1)*abs(incx);
    int lenY = 1 + (N-1)*abs(incy);

    /* Setup OpenCL environment. */
    err = clGetPlatformIDs(1, &platform, NULL);
    if (err != CL_SUCCESS) {
        printf( "clGetPlatformIDs() failed with %d\n", err );
        return 1;
    }

    err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
    if (err != CL_SUCCESS) {
        printf( "clGetDeviceIDs() failed with %d\n", err );
        return 1;
    }

    props[1] = (cl_context_properties)platform;
    ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
    if (err != CL_SUCCESS) {
        printf( "clCreateContext() failed with %d\n", err );
        return 1;
    }

    queue = clCreateCommandQueue(ctx, device, 0, &err);
    if (err != CL_SUCCESS) {
        printf( "clCreateCommandQueue() failed with %d\n", err );
        clReleaseContext(ctx);
        return 1;
    }

    /* Setup clblas. */
    err = clblasSetup();
    if (err != CL_SUCCESS) {
        printf("clblasSetup() failed with %d\n", err);
        clReleaseCommandQueue(queue);
        clReleaseContext(ctx);
        return 1;
    }

    /* Prepare OpenCL memory objects and place vectors inside them. */
    bufX = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenX*sizeof(cl_float)), NULL, &err);
    bufY = clCreateBuffer(ctx, CL_MEM_READ_WRITE, (lenY*sizeof(cl_float)), NULL, &err);

    err = clEnqueueWriteBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)), X, 0, NULL, NULL);
    err = clEnqueueWriteBuffer(queue, bufY, CL_TRUE, 0, (lenY*sizeof(cl_float)), Y, 0, NULL, NULL);

    /* Call clblas function. */
    err = clblasSswap( N, bufX, 0, incx, bufY, 0, incy, 1, &queue, 0, NULL, &event);
    if (err != CL_SUCCESS) {
        printf("clblasSswap() failed with %d\n", err);
        ret = 1;
    }
    else {
        /* Wait for calculations to be finished. */
        err = clWaitForEvents(1, &event);

        /* Fetch results of calculations from GPU memory. */
        err = clEnqueueReadBuffer(queue, bufX, CL_TRUE, 0, (lenX*sizeof(cl_float)),
                                  X, 0, NULL, NULL);
        err = clEnqueueReadBuffer(queue, bufY, CL_TRUE, 0, (lenY*sizeof(cl_float)),
                                  Y, 0, NULL, NULL);

        /* At this point you will get the result of SSWAP placed in vector X. */
        printResult();
    }

    /* Release OpenCL memory objects. */
    clReleaseMemObject(bufY);
    clReleaseMemObject(bufX);

    /* Finalize work with clblas. */
    clblasTeardown();

    /* Release OpenCL working objects. */
    clReleaseCommandQueue(queue);
    clReleaseContext(ctx);

    return ret;
}
int main(int argc, char const *argv[])
{
        /* Get platform */
        cl_platform_id platform;
        cl_uint num_platforms;
        cl_int ret = clGetPlatformIDs(1, &platform, &num_platforms);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetPlatformIDs' failed\n");
                exit(1);
        }
        
        printf("Number of platforms: %d\n", num_platforms);
        printf("platform=%p\n", platform);
        
        /* Get platform name */
        char platform_name[100];
        ret = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetPlatformInfo' failed\n");
                exit(1);
        }
        
        printf("platform.name='%s'\n\n", platform_name);
        
        /* Get device */
        cl_device_id device;
        cl_uint num_devices;
        ret = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &num_devices);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetDeviceIDs' failed\n");
                exit(1);
        }
        
        printf("Number of devices: %d\n", num_devices);
        printf("device=%p\n", device);
        
        /* Get device name */
        char device_name[100];
        ret = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
        device_name, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetDeviceInfo' failed\n");
                exit(1);
        }
        
        printf("device.name='%s'\n", device_name);
        printf("\n");
        
        /* Create a Context Object */
        cl_context context;
        context = clCreateContext(NULL, 1, &device, NULL, NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateContext' failed\n");
                exit(1);
        }
        
        printf("context=%p\n", context);
        
        /* Create a Command Queue Object*/
        cl_command_queue command_queue;
        command_queue = clCreateCommandQueue(context, device, 0, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateCommandQueue' failed\n");
                exit(1);
        }
        
        printf("command_queue=%p\n", command_queue);
        printf("\n");

        /* Program source */
        unsigned char *source_code;
        size_t source_length;

        /* Read program from 'add_sat_ushort2ushort2.cl' */
        source_code = read_buffer("add_sat_ushort2ushort2.cl", &source_length);

        /* Create a program */
        cl_program program;
        program = clCreateProgramWithSource(context, 1, (const char **)&source_code, &source_length, &ret);

        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateProgramWithSource' failed\n");
                exit(1);
        }
        printf("program=%p\n", program);

        /* Build program */
        ret = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
        if (ret != CL_SUCCESS )
        {
                size_t size;
                char *log;

                /* Get log size */
                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,0, NULL, &size);

                /* Allocate log and print */
                log = malloc(size);
                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,size, log, NULL);
                printf("error: call to 'clBuildProgram' failed:\n%s\n", log);
                
                /* Free log and exit */
                free(log);
                exit(1);
        }

        printf("program built\n");
        printf("\n");
        
        /* Create a Kernel Object */
        cl_kernel kernel;
        kernel = clCreateKernel(program, "add_sat_ushort2ushort2", &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateKernel' failed\n");
                exit(1);
        }
        
        /* Create and allocate host buffers */
        size_t num_elem = 10;
        
        /* Create and init host side src buffer 0 */
        cl_ushort2 *src_0_host_buffer;
        src_0_host_buffer = malloc(num_elem * sizeof(cl_ushort2));
        for (int i = 0; i < num_elem; i++)
                src_0_host_buffer[i] = (cl_ushort2){{2, 2}};
        
        /* Create and init device side src buffer 0 */
        cl_mem src_0_device_buffer;
        src_0_device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, num_elem * sizeof(cl_ushort2), NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: could not create source buffer\n");
                exit(1);
        }        
        ret = clEnqueueWriteBuffer(command_queue, src_0_device_buffer, CL_TRUE, 0, num_elem * sizeof(cl_ushort2), src_0_host_buffer, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueWriteBuffer' failed\n");
                exit(1);
        }

        /* Create and init host side src buffer 1 */
        cl_ushort2 *src_1_host_buffer;
        src_1_host_buffer = malloc(num_elem * sizeof(cl_ushort2));
        for (int i = 0; i < num_elem; i++)
                src_1_host_buffer[i] = (cl_ushort2){{2, 2}};
        
        /* Create and init device side src buffer 1 */
        cl_mem src_1_device_buffer;
        src_1_device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, num_elem * sizeof(cl_ushort2), NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: could not create source buffer\n");
                exit(1);
        }        
        ret = clEnqueueWriteBuffer(command_queue, src_1_device_buffer, CL_TRUE, 0, num_elem * sizeof(cl_ushort2), src_1_host_buffer, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueWriteBuffer' failed\n");
                exit(1);
        }

        /* Create host dst buffer */
        cl_ushort2 *dst_host_buffer;
        dst_host_buffer = malloc(num_elem * sizeof(cl_ushort2));
        memset((void *)dst_host_buffer, 1, num_elem * sizeof(cl_ushort2));

        /* Create device dst buffer */
        cl_mem dst_device_buffer;
        dst_device_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, num_elem *sizeof(cl_ushort2), NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: could not create dst buffer\n");
                exit(1);
        }
        
        /* Set kernel arguments */
        ret = CL_SUCCESS;
        ret |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_0_device_buffer);
        ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &src_1_device_buffer);
        ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_device_buffer);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clSetKernelArg' failed\n");
                exit(1);
        }

        /* Launch the kernel */
        size_t global_work_size = num_elem;
        size_t local_work_size = num_elem;
        ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueNDRangeKernel' failed\n");
                exit(1);
        }

        /* Wait for it to finish */
        clFinish(command_queue);

        /* Read results from GPU */
        ret = clEnqueueReadBuffer(command_queue, dst_device_buffer, CL_TRUE,0, num_elem * sizeof(cl_ushort2), dst_host_buffer, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueReadBuffer' failed\n");
                exit(1);
        }

        /* Dump dst buffer to file */
        char dump_file[100];
        sprintf((char *)&dump_file, "%s.result", argv[0]);
        write_buffer(dump_file, (const char *)dst_host_buffer, num_elem * sizeof(cl_ushort2));
        printf("Result dumped to %s\n", dump_file);
        /* Free host dst buffer */
        free(dst_host_buffer);

        /* Free device dst buffer */
        ret = clReleaseMemObject(dst_device_buffer);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseMemObject' failed\n");
                exit(1);
        }
        
        /* Free host side src buffer 0 */
        free(src_0_host_buffer);

        /* Free device side src buffer 0 */
        ret = clReleaseMemObject(src_0_device_buffer);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseMemObject' failed\n");
                exit(1);
        }

        /* Free host side src buffer 1 */
        free(src_1_host_buffer);

        /* Free device side src buffer 1 */
        ret = clReleaseMemObject(src_1_device_buffer);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseMemObject' failed\n");
                exit(1);
        }

        /* Release kernel */
        ret = clReleaseKernel(kernel);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseKernel' failed\n");
                exit(1);
        }

        /* Release program */
        ret = clReleaseProgram(program);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseProgram' failed\n");
                exit(1);
        }
        
        /* Release command queue */
        ret = clReleaseCommandQueue(command_queue);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseCommandQueue' failed\n");
                exit(1);
        }
        
        /* Release context */
        ret = clReleaseContext(context);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseContext' failed\n");
                exit(1);
        }
                
        return 0;
}
Пример #22
0
int main(int argc, char *argv[])
{
    // Declare variables - y', L
    // Load from file? Declare within host?
	Type y[K];
	Type L[K*K];
	Type R[K];
	Type m[K];
	Complex Xml[K];
    int check_result;
	
	cl_mem input_y;
	cl_mem input_L;
	cl_mem output_xml;

    // OpenCL-specific variables
    cl_device_id        device_id;
    cl_platform_id      platform_id;
    cl_context          context;
    cl_command_queue    commands;
    cl_program          program;

    cl_kernel SDkernel;
    cl_int dev_type;

    cl_int error;
    cl_event event;

    FILE *kernel;
    char *kernelSRC;
	
	size_t global[2];
	size_t local[2];

    if(argc > 1)
    {
        kernel = argv[1];
    }
    else
        printf("\nError - must specify arguments\n");

    //--------------------------------------------------------------------------------
    // Create a context, queue and device.
    //--------------------------------------------------------------------------------

    cl_uint numPlatforms;
    // Find number of platforms
    err = clGetPlatformIDs(0, NULL, &numPlatforms);
    if (err != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Error: Failed to find a platform!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }
    // Get all platforms
    cl_platform_id Platform[numPlatforms];
    err = clGetPlatformIDs(numPlatforms, Platform, NULL);
    if (err != CL_SUCCESS || numPlatforms <= 0)
    {
        printf("Error: Failed to get the platform!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }
    // Secure a device
    for (int i = 0; i < numPlatforms; i++)
    {
        err = clGetDeviceIDs(Platform[i], DEVICE, 1, &device_id, NULL);
        if (err == CL_SUCCESS)
            break;
    }
    if (device_id == NULL)
    {
        printf("Error: Failed to create a device group!\n%s\n",err_code(err));
        return EXIT_FAILURE;
    }

    // Create a compute context
    context = clCreateContext(0, 1, &device_id, NULL, NULL, &error);
    if (!context)
    {
        printf("Error: Failed to create a compute context!\n%s\n", err_code(error));
        return EXIT_FAILURE;
    }
    // Create a command queue
    commands = clCreateCommandQueue(context, device_id, 0, &error);
    if (!commands)
    {
        printf("Error: Failed to create a command commands!\n%s\n", err_code(error));
        return EXIT_FAILURE;
    }


	// Create buffers for each argument of kernel
	// Need to add for R^2 and m
    input_y = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(Type) * K, y, &err);
	input_L = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(Type) * K * K, L, &err);
	output_xml = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(Complex) * K, Xml, &err);
    if (err != CL_SUCCESS)
    {
        printf("Error: failed to create buffer\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }

    // Create the compute program from the source buffer
    program = clCreateProgramWithSource(context, 1, (const char **)&kernelSRC, NULL, &error);
    if (err != CL_SUCCESS)
    {
        printf("Error: could not create program\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }
    // Build the program
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];

        printf("Error: Failed to build program executable!\n%s\n", err_code(err));
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        printf("%s\n", buffer);
        return EXIT_FAILURE;
    }

    // Create the compute kernel from the program
    kernel = clCreateKernel(program, "SD", &err);
    if (!kernel || err != CL_SUCCESS)
    {
        printf("Error: Failed to create compute kernel!\n%s\n", err_code(err));
        return EXIT_FAILURE;
    }

	err = clEnqueueWriteBuffer(commands, input_y, CL_TRUE, 0, sizeof(Type)*K, y, 0, NULL, NULL);
	err = clEnqueueWriteBuffer(commands, input_L, CL_TRUE, 0, sizeof(Type)*K*K, L, 0, NULL, NULL);
	if(err != CL_SUCCESS)
	{
		printf("Error: could not write buffer\nError code %d\n", err);
		return EXIT_FAILURE;
	}
	
	err  = clSetKernelArg(SDkernel, 0, sizeof(cl_mem), &input_y);
	err |= clSetKernelArg(SDkernel, 1, sizeof(cl_mem), &input_L);
	err |= clSetKernelArg(SDkernel, 2, sizeof(cl_mem), &input_R);
	err |= clSetKernelArg(SDkernel, 3, sizeof(cl_mem), &output_m);
	err |= clSetKernelArg(SDkernel, 4, sizeof(cl_mem), &output_xml);
	if(err != CL_SUCCESS)
	{
		printf("Error: could not set kernel arguments\nError code %d\n", err);
		return EXIT_FAILURE;
	}
	
	global[0] = K;
	global[1] = K;
	local[0] = K;
	local[1] = 1;
	
	
	err = clEnqueueNDRangeKernel(commands,
								 kernel,
								 2,
								 NULL,
								 (size_t*)&global,
								 (size_t*)&local,
								 0,
								 NULL,
								 &event);
	if(err != CL_SUCCESS)
	{
		printf("Error: could not set ND range\nError code %d\n", err);
		return EXIT_FAILURE;
	}			

	clEnqueueReadBuffer(commands, output_m, CL_TRUE, 0, sizeof(Type)*K, m, 0, NULL, NULL);
	clEnqueueReadBuffer(commands, output_xml, CL_TRUE, 0, sizeof(Type)*K, m, 0, NULL, NULL);
	
	clReleaseMemObject(input_y);
	clReleaseMemObject(input_L);
	clReleaseMemObject(output_xml);
	clReleaseProgram(program);
	clReleaseKernel(SDkernel);
	clReleaseCommandQueue(commands);
	clReleaseContext(context);
	
    return EXIT_SUCCESS;
}
int SDKSample::validatePlatformAndDeviceOptions()
{
    cl_int status = CL_SUCCESS;
    cl_uint numPlatforms;
    cl_platform_id platform = NULL;
    status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if(status != CL_SUCCESS)
    {
        std::cout<<"Error: clGetPlatformIDs failed. Error code : ";
        std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
        return SDK_FAILURE;
    }

    if (0 < numPlatforms) 
    {
        // Validate platformId
        if(platformId >= numPlatforms)
        {
            if(numPlatforms - 1 == 0)
                std::cout << "platformId should be 0" << std::endl;
            else
                std::cout << "platformId should be 0 to " << numPlatforms - 1 << std::endl;
            usage();
            return SDK_FAILURE;
        }

        // Get selected platform
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        if(status != CL_SUCCESS)
        {
            std::cout<<"Error: clGetPlatformIDs failed. Error code : ";
            std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
            return SDK_FAILURE;
        }

        // Print all platforms
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            char pbuf[100];
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(pbuf),
                                       pbuf,
                                       NULL);

            if(status != CL_SUCCESS)
            {
                std::cout<<"Error: clGetPlatformInfo failed. Error code : ";
                std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
                return SDK_FAILURE;
            }

            std::cout << "Platform " << i << " : " << pbuf << std::endl;
        }

        // Get AMD platform
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            char pbuf[100];
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(pbuf),
                                       pbuf,
                                       NULL);

            if(status != CL_SUCCESS)
            {
                std::cout<<"Error: clGetPlatformInfo failed. Error code : ";
                std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
                return SDK_FAILURE;
            }

            platform = platforms[i];
            if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) 
            {
                break;
            }
        }

        if(isPlatformEnabled())
            platform = platforms[platformId];


        // Check for AMD platform
        char pbuf[100];
        status = clGetPlatformInfo(platform,
                                   CL_PLATFORM_VENDOR,
                                   sizeof(pbuf),
                                   pbuf,
                                   NULL);

        if(status != CL_SUCCESS)
        {
            std::cout<<"Error: clGetPlatformInfo failed. Error code : ";
            std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
            return SDK_FAILURE;
        }
        if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) 
            amdPlatform = true; 


        cl_device_type dType = CL_DEVICE_TYPE_GPU;
        if(deviceType.compare("cpu") == 0)
            dType = CL_DEVICE_TYPE_CPU;
        if(deviceType.compare("gpu") == 0)
            dType = CL_DEVICE_TYPE_GPU;
        else
            dType = CL_DEVICE_TYPE_ALL;

        // Check for GPU
        if(dType == CL_DEVICE_TYPE_GPU)
        {
            cl_context_properties cps[3] = 
            {
                CL_CONTEXT_PLATFORM, 
                (cl_context_properties)platform, 
                0
            };

            cl_context context = clCreateContextFromType(cps,
                                                        dType,
                                                        NULL,
                                                        NULL,
                                                        &status);

            if(status == CL_DEVICE_NOT_FOUND)
            {
                dType = CL_DEVICE_TYPE_CPU;
                gpu = false;
            }

            clReleaseContext(context);
        }

        // Get device count
        cl_uint deviceCount = 0;
        status = clGetDeviceIDs(platform, dType, 0, NULL, &deviceCount);
        if(status != CL_SUCCESS)
        {
            std::cout<<"Error: clGetDeviceIDs failed. Error code : ";
            std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
            return SDK_FAILURE;
        }

        // Validate deviceId
        if(deviceId >= deviceCount)
        {
            if(deviceCount - 1 == 0)
                std::cout << "deviceId should be 0" << std::endl;
            else
                std::cout << "deviceId should be 0 to " << deviceCount - 1 << std::endl;
            usage();
            return SDK_FAILURE;
        }

        delete[] platforms;
    }
    return SDK_SUCCESS;
}
Пример #24
0
void mat_mul_opencl_1d(float *M_A, float *M_B, float *M_C,
                       size_t ROW_A, size_t COL_A, size_t COL_B) {
  cl_platform_id   *platform;
  cl_device_type   dev_type;
  cl_device_id     dev;
  cl_context       context;
  cl_command_queue cmd_queue;
  cl_program       program;
  cl_kernel        kernel;
  cl_mem           mem_A, mem_B, mem_C;
  cl_event         ev_kernel;
  cl_int           err;
  cl_uint          num_platforms;
  cl_uint          num_dev = 0;
  int i;

  // Platform
  err = clGetPlatformIDs(0, NULL, &num_platforms);
  CHECK_ERROR(err);
  if (num_platforms == 0) {
    fprintf(stderr, "[%s:%d] ERROR: No OpenCL platform\n", __FILE__,__LINE__);
    exit(EXIT_FAILURE);
  }
  printf("Number of platforms: %u\n", num_platforms);
  platform = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms);
  err = clGetPlatformIDs(num_platforms, platform, NULL);
  CHECK_ERROR(err);

  // Device
  dev_type = get_device_type();
  for (i = 0; i < num_platforms; i++) {
    err = clGetDeviceIDs(platform[i], dev_type, 1, &dev, &num_dev);
    if (err != CL_DEVICE_NOT_FOUND) CHECK_ERROR(err);
    if (num_dev == 1) break;
  }
  if (num_dev < 1) {
    fprintf(stderr, "[%s:%d] ERROR: No device\n", __FILE__, __LINE__);
    exit(EXIT_FAILURE);
  }
  print_device_name(dev);
  free(platform);

  // Context
  context = clCreateContext(NULL, 1, &dev, NULL, NULL, &err);
  CHECK_ERROR(err);

  // Command queue
  cmd_queue = clCreateCommandQueue(context, dev,
                                   CL_QUEUE_PROFILING_ENABLE,
                                   &err);
  CHECK_ERROR(err);

  // Create a program.
  char *source_code = get_source_code("./kernel_1d.cl");
  program = clCreateProgramWithSource(context,
                                      1, (const char **)&source_code,
                                      NULL, &err);
  free(source_code);
  CHECK_ERROR(err);

  // Build the program.
  char build_opts[200];
  sprintf(build_opts, "-DROW_A=%lu -DCOL_A=%lu -DCOL_B=%lu",
          ROW_A, COL_A, COL_B);
  err = clBuildProgram(program, 1, &dev, build_opts, NULL, NULL);
  if (err != CL_SUCCESS) {
    print_build_log(program, dev);
    CHECK_ERROR(err);
  }

  // Kernel
  kernel = clCreateKernel(program, "mat_mul", &err);
  CHECK_ERROR(err);

  // Buffers
  mem_A = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, 
                         sizeof(float) * ROW_A * COL_A,
                         M_A, &err);
  CHECK_ERROR(err);

  mem_B = clCreateBuffer(context, CL_MEM_READ_ONLY, 
                         sizeof(float) * COL_A * COL_B,
                         NULL, &err);
  CHECK_ERROR(err);
  err = clEnqueueWriteBuffer(cmd_queue,
                             mem_B,
                             CL_FALSE, 0,
                             sizeof(float) * COL_A * COL_B,
                             M_B,
                             0, NULL, NULL);
  CHECK_ERROR(err)

  mem_C = clCreateBuffer(context, CL_MEM_READ_WRITE, 
                         sizeof(float) * ROW_A * COL_B,
                         NULL, &err);
  CHECK_ERROR(err);

  // Set the arguments.
  err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_A);
  CHECK_ERROR(err);
  err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_B);
  CHECK_ERROR(err);
  err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_C);
  CHECK_ERROR(err);

  // Enqueue the kernel.
  size_t lws[1] = {256};
  size_t gws[1];
  gws[0] = (size_t)ceil((double)ROW_A / lws[0]) * lws[0];
  err = clEnqueueNDRangeKernel(cmd_queue,
                               kernel,
                               1, NULL,
                               gws, lws,
                               0, NULL,
                               &ev_kernel);
  CHECK_ERROR(err);

  // Read the result.
  err = clEnqueueReadBuffer(cmd_queue,
                            mem_C,
                            CL_TRUE, 0,
                            sizeof(float) * ROW_A * COL_B, 
                            M_C,
                            0, NULL, NULL);
  CHECK_ERROR(err);

  // Read the profiling info.
  cl_ulong start_time, end_time;
  err = clGetEventProfilingInfo(ev_kernel, CL_PROFILING_COMMAND_START, 
                                sizeof(cl_ulong), &start_time, NULL);
  CHECK_ERROR(err);
  err = clGetEventProfilingInfo(ev_kernel, CL_PROFILING_COMMAND_END, 
                                sizeof(cl_ulong), &end_time, NULL);
  CHECK_ERROR(err);
  printf("Kernel time : %lf sec\n", (double)(end_time - start_time) / 10e9);

  // Release
  clReleaseEvent(ev_kernel);
  clReleaseMemObject(mem_A);
  clReleaseMemObject(mem_B);
  clReleaseMemObject(mem_C);
  clReleaseKernel(kernel);
  clReleaseProgram(program);
  clReleaseCommandQueue(cmd_queue);
  clReleaseContext(context);
}
Пример #25
0
int main( void )
{
    cl_int err;
    cl_platform_id platform = 0;
    cl_device_id device = 0;
    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
    cl_context ctx = 0;
    cl_command_queue queue = 0;
    cl_mem bufX;
    float *X;
    cl_event event = NULL;
    int ret = 0;
    size_t N = 16;
    char platform_name[128];
    char device_name[128];

    /* FFT library realted declarations */
    clfftPlanHandle planHandle;
    clfftDim dim = CLFFT_1D;
    size_t clLengths[1] = {N};

    /* Setup OpenCL environment. */
    err = clGetPlatformIDs( 1, &platform, NULL );

    size_t ret_param_size = 0;
    err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
            sizeof(platform_name), platform_name,
            &ret_param_size);
    printf("Platform found: %s\n", platform_name);

    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );

    err = clGetDeviceInfo(device, CL_DEVICE_NAME,
            sizeof(device_name), device_name,
            &ret_param_size);
    printf("Device found on the above platform: %s\n", device_name);

    props[1] = (cl_context_properties)platform;
    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
    queue = clCreateCommandQueue( ctx, device, 0, &err );

    /* Setup clFFT. */
    clfftSetupData fftSetup;
    err = clfftInitSetupData(&fftSetup);
    err = clfftSetup(&fftSetup);

    /* Allocate host & initialize data. */
    /* Only allocation shown for simplicity. */
    X = (float *)malloc(N * 2 * sizeof(*X));

    /* print input array */
    printf("\nPerforming fft on an one dimensional array of size N = %ld\n", N);
    int print_iter = 0;
    while(print_iter<N) {
        float x = (float)print_iter;
        float y = (float)print_iter*3;
        X[2*print_iter  ] = x;
        X[2*print_iter+1] = y;
        printf("(%f, %f) ", x, y);
        print_iter++;
    }
    printf("\n\nfft result: \n");

    /* Prepare OpenCL memory objects and place data inside them. */
    bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, N * 2 * sizeof(*X), NULL, &err );

    err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0,
            N * 2 * sizeof( *X ), X, 0, NULL, NULL );

    /* Create a default plan for a complex FFT. */
    err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);

    /* Set plan parameters. */
    err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
    err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
    err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);

    /* Bake the plan. */
    err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);

    /* Execute the plan. */
    err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);

    /* Wait for calculations to be finished. */
    err = clFinish(queue);

    /* Fetch results of calculations. */
    err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, N * 2 * sizeof( *X ), X, 0, NULL, NULL );

    /* print output array */
    print_iter = 0;
    while(print_iter<N) {
        printf("(%f, %f) ", X[2*print_iter], X[2*print_iter+1]);
        print_iter++;
    }
    printf("\n");

    /* Release OpenCL memory objects. */
    clReleaseMemObject( bufX );

    free(X);

    /* Release the plan. */
    err = clfftDestroyPlan( &planHandle );

    /* Release clFFT library. */
    clfftTeardown( );

    /* Release OpenCL working objects. */
    clReleaseCommandQueue( queue );
    clReleaseContext( ctx );

    return ret;
}
Пример #26
0
int main(int argc, char** argv) {


	cl_int error;
	cl_uint num_platforms;

	// Get the number of platforms
	error = clGetPlatformIDs(0, NULL, &num_platforms);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Get the list of platforms
	cl_platform_id* platforms = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);
	error = clGetPlatformIDs(num_platforms, platforms, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Print the chosen platform (if there are multiple platforms, choose the first one)
	cl_platform_id platform = platforms[0];
	char pbuf[100];
	error = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	printf("Platform: %s\n", pbuf);

	//	Create a GPU context
	//	cl_context_properties context_properties[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) platform, 0};
	//   	context = clCreateContextFromType(context_properties, CL_DEVICE_TYPE_GPU, NULL, NULL, &error);
	//    	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	cl_int result;
	cl_device_id device_id = NULL;   
	cl_uint ret_num_devices;
	result = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
	if(result!=CL_SUCCESS){
		printf("Runtime error! unable to retrieve the device id of CL_DEVICE_TYPE_DEFAULT\n");
		exit(-1);
	}
	cl_device_id* __ipmacc_cldevs;
	__ipmacc_cldevs=(cl_device_id*)malloc(sizeof(cl_device_id)*1);
	__ipmacc_cldevs[0]=device_id;
	context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &result);
	if(result!=CL_SUCCESS){
		printf("Runtime error! Cannot open context on the device %d of CL_DEVICE_TYPE_DEFAULT\n",device_id);
		exit(-1);
	}
	// END OF CODE



	// Get and print the chosen device (if there are multiple devices, choose the first one)
	size_t devices_size;
	error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &devices_size);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	cl_device_id *devices = (cl_device_id *) malloc(devices_size);
	error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size, devices, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	device = devices[0];
	error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(pbuf), pbuf, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	printf("Device: %s\n", pbuf);

	// Create a command queue
	command_queue = clCreateCommandQueue(context, device, 0, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);



	int size;
	int grid_rows,grid_cols = 0;
	double *FilesavingTemp,*FilesavingPower; //,*MatrixOut; 
	char *tfile, *pfile, *ofile;

	int total_iterations = 60;
	int pyramid_height = 1; // number of iterations

	if (argc < 7)
		usage(argc, argv);
	if((grid_rows = atoi(argv[1]))<=0||
			(grid_cols = atoi(argv[1]))<=0||
			(pyramid_height = atoi(argv[2]))<=0||
			(total_iterations = atoi(argv[3]))<=0)
		usage(argc, argv);

	tfile=argv[4];
	pfile=argv[5];
	ofile=argv[6];

	size=grid_rows*grid_cols;

	// --------------- pyramid parameters --------------- 
	int borderCols = (pyramid_height)*EXPAND_RATE/2;
	int borderRows = (pyramid_height)*EXPAND_RATE/2;
	int smallBlockCol = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
	int smallBlockRow = BLOCK_SIZE-(pyramid_height)*EXPAND_RATE;
	int blockCols = grid_cols/smallBlockCol+((grid_cols%smallBlockCol==0)?0:1);
	int blockRows = grid_rows/smallBlockRow+((grid_rows%smallBlockRow==0)?0:1);

	FilesavingTemp = (double *) malloc(size*sizeof(double));
	FilesavingPower = (double *) malloc(size*sizeof(double));
	// MatrixOut = (double *) calloc (size, sizeof(double));

	if( !FilesavingPower || !FilesavingTemp) // || !MatrixOut)
		fatal("unable to allocate memory");

	// Read input data from disk
	readinput(FilesavingTemp, grid_rows, grid_cols, tfile);
	readinput(FilesavingPower, grid_rows, grid_cols, pfile);

	// Load kernel source from file
	const char *source = load_kernel_source("hotspot_kernel.cl");
	size_t sourceSize = strlen(source);

	// Compile the kernel
	cl_program program = clCreateProgramWithSource(context, 1, &source, &sourceSize, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Create an executable from the kernel
	error = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
	// Show compiler warnings/errors
	static char log[65536]; memset(log, 0, sizeof(log));
	clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(log)-1, log, NULL);
	if (strstr(log,"warning:") || strstr(log, "error:")) printf("<<<<\n%s\n>>>>\n", log);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	kernel = clCreateKernel(program, "hotspot", &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);


	long long start_time = get_time();

	// Create two temperature matrices and copy the temperature input data
	cl_mem MatrixTemp[2];
	// Create input memory buffers on device
	MatrixTemp[0] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR, sizeof(double) * size, FilesavingTemp, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);
	MatrixTemp[1] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(double) * size, NULL, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Copy the power input data
	cl_mem MatrixPower = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR, sizeof(double) * size, FilesavingPower, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	// Perform the computation
	int ret = compute_tran_temp(MatrixPower, MatrixTemp, grid_cols, grid_rows, total_iterations, pyramid_height,
			blockCols, blockRows, borderCols, borderRows, FilesavingTemp, FilesavingPower);

	// Copy final temperature data back
	cl_double *MatrixOut = (cl_double *) clEnqueueMapBuffer(command_queue, MatrixTemp[ret], CL_TRUE, CL_MAP_READ, 0, sizeof(double) * size, 0, NULL, NULL, &error);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	long long end_time = get_time();	
	printf("Total time: %.3f seconds\n", ((double) (end_time - start_time)) / (1000*1000));

	// Write final output to output file
	#ifdef DUMPOUT
	writeoutput(MatrixOut, grid_rows, grid_cols, ofile);
	#endif
	error = clEnqueueUnmapMemObject(command_queue, MatrixTemp[ret], (void *) MatrixOut, 0, NULL, NULL);
	if (error != CL_SUCCESS) fatal_CL(error, __LINE__);

	clReleaseMemObject(MatrixTemp[0]);
	clReleaseMemObject(MatrixTemp[1]);
	clReleaseMemObject(MatrixPower);

	return 0;
}
Пример #27
0
//------------------------------------------------------------------------------
// returns context associated with single device only,
// to make it support multiple devices, a list of
// <device type, device num> pairs is required
cl_context create_cl_context(const std::string& platformName,
                             const std::string& deviceTypeName,
                             int deviceNum) {
    cl_int status = 0;
    //1) get platfors and search for platform matching platformName
    cl_uint numPlatforms = 0;
    status = clGetPlatformIDs(0, 0, &numPlatforms);
    check_cl_error(status, "clGetPlatformIDs");
    if(numPlatforms < 1) {
        std::cout << "No OpenCL platforms found" << std::endl;
        exit(EXIT_SUCCESS);
    }
    typedef std::vector< cl_platform_id > PlatformIDs;
    PlatformIDs platformIDs(numPlatforms);
    status = clGetPlatformIDs(numPlatforms, &platformIDs[0], 0);
    check_cl_error(status, "clGetPlatformIDs");
    std::vector< char > buf(0x10000, char(0));
    cl_platform_id platformID;
    PlatformIDs::const_iterator pi = platformIDs.begin();
    for(; pi != platformIDs.end(); ++pi) {
        status = clGetPlatformInfo(*pi, CL_PLATFORM_NAME,
                                 buf.size(), &buf[0], 0);
        check_cl_error(status, "clGetPlatformInfo");
        if(platformName == &buf[0]) {
            platformID = *pi;
            break; 
        }
    } 
    if(pi == platformIDs.end()) {
        std::cerr << "ERROR - Couldn't find platform " 
                  << platformName << std::endl;
        exit(EXIT_FAILURE);
    }
    //2) get devices of deviceTypeName type and store their ids into
    //   an array then select device id at position deviceNum
    cl_device_type deviceType;
    if(deviceTypeName == "default") 
        deviceType = CL_DEVICE_TYPE_DEFAULT;
    else if(deviceTypeName == "cpu")
        deviceType = CL_DEVICE_TYPE_CPU;
    else if(deviceTypeName == "gpu")
        deviceType = CL_DEVICE_TYPE_GPU;
    else if(deviceTypeName == "acc")
        deviceType = CL_DEVICE_TYPE_ACCELERATOR; 
    else if(deviceTypeName == "all")
        deviceType = CL_DEVICE_TYPE_CPU;
    else {
        std::cerr << "ERROR - device type " << deviceTypeName << " unknown"
                  << std::endl;
        exit(EXIT_FAILURE);          
    }                      
    cl_uint numDevices = 0; 
    status = clGetDeviceIDs(platformID, deviceType, 0, 0, &numDevices);
    check_cl_error(status, "clGetDeviceIDs");
    if(numDevices < 1) {
        std::cerr << "ERROR - Cannot find device of type " 
                  << deviceTypeName << std::endl;
        exit(EXIT_FAILURE);          
    }
    typedef std::vector< cl_device_id > DeviceIDs;
    DeviceIDs deviceIDs(numDevices);
    status = clGetDeviceIDs(platformID, deviceType, numDevices,
                            &deviceIDs[0], 0);
    check_cl_error(status, "clGetDeviceIDs");
    if(deviceNum < 0 || deviceNum >= numDevices) {
        std::cerr << "ERROR - device number out of range: [0," 
                  << (numDevices - 1) << ']' << std::endl;
        exit(EXIT_FAILURE);
    }
    cl_device_id deviceID = deviceIDs[deviceNum]; 
    //3) create and return context
    cl_context_properties ctxProps[] = {
        CL_CONTEXT_PLATFORM,
        cl_context_properties(platformID),
        0
    };
    //only a single device supported
    cl_context ctx = clCreateContext(ctxProps, 1, &deviceID,
                                     &context_callback, 0, &status);
    check_cl_error(status, "clCreateContext");
    return ctx;
}
Пример #28
0
int ocl_t::init()
{
	std::cout << "Query available compute devices ...\n";

	cl_int err;
	cl_uint num;
	err = clGetPlatformIDs(0, 0, &num);
	if (err != CL_SUCCESS) {
		std::cerr << "Unable to get platforms\n";
		return 0;
	}

	std::vector<cl_platform_id> platforms(num);
	err = clGetPlatformIDs(num, &platforms[0], &num);
	if (err != CL_SUCCESS) {
		std::cerr << "Unable to get platform ID\n";
		return 0;
	}

	int device_counter = 0;
	for (size_t platform_id = 0; platform_id < num; platform_id++){
		size_t dev_c, info_c;
		clGetPlatformInfo(platforms[platform_id], CL_PLATFORM_NAME, 0, NULL, &info_c);
		std::string platname;
		platname.resize(info_c);
		clGetPlatformInfo(platforms[platform_id], CL_PLATFORM_NAME, info_c, &platname[0], 0);
		std::cout << "Platform :" << platname << "\n";

		cl_context_properties prop[] = { CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platforms[platform_id]), 0 };
		context = clCreateContextFromType(prop, CL_DEVICE_TYPE_ALL, NULL, NULL, NULL);
		if (context == 0) {
			std::cerr << "Can't create OpenCL context\n";
			return 0;
		}
		
		clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &dev_c);
		std::vector<cl_device_id> devices(dev_c / sizeof(cl_device_id));
		clGetContextInfo(context, CL_CONTEXT_DEVICES, dev_c, &devices[0], 0);

		for (auto i = devices.begin(); i != devices.end(); i++){
			clGetDeviceInfo(*i, CL_DEVICE_NAME, 0, NULL, &info_c);
			std::string devname;
			devname.resize(info_c);
			clGetDeviceInfo(*i, CL_DEVICE_NAME, info_c, &devname[0], 0);
			std::cout << "\tDevice " << device_counter++ << ": " << devname.c_str() << "\n";
			pdpair_t pd;
			pd.device_id = i - devices.begin();
			pd.platform_id = platform_id;
			ocl_device_list.push_back(pd);
		}
		clReleaseContext(context);
	}

	if (list_available_devices) return 0;

	cl_context_properties prop[] = { CL_CONTEXT_PLATFORM, reinterpret_cast<cl_context_properties>(platforms[ocl_device_list[opencl_device_id].platform_id]), 0 };
	context = clCreateContextFromType(prop, CL_DEVICE_TYPE_ALL, NULL, NULL, NULL);
	if (context == 0) {
		std::cerr << "Can't create OpenCL context\n";
		return 0;
	}

	size_t dev_c, info_c;
	clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &dev_c);
	std::vector<cl_device_id> devices(dev_c / sizeof(cl_device_id));
	clGetContextInfo(context, CL_CONTEXT_DEVICES, dev_c, &devices[0], 0);

	device_used = devices[ocl_device_list[opencl_device_id].device_id];
	clGetDeviceInfo(device_used, CL_DEVICE_NAME, 0, NULL, &info_c);
	std::string devname;
	devname.resize(info_c);
	clGetDeviceInfo(device_used, CL_DEVICE_NAME, info_c, &devname[0], 0);
	std::cout << "Execute on Device " << opencl_device_id << ": " << devname << std::endl;
	std::cout << "OK!\n";

	queue = clCreateCommandQueue(context, device_used, 0, 0);
	if (queue == 0) {
		std::cerr << "Can't create command queue\n";
		return 0;
	}

	cl_res = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(cl_float)* iterations, NULL, NULL);
	if (cl_res == 0) {
		std::cerr << "Can't create OpenCL buffer\n";
		return 0;
	}

	FILE* f = fopen("kernel.c", "rb");
	fseek(f, 0, SEEK_END);
	size_t tell = ftell(f);
	rewind(f);
	ocl_src_char = (char*)calloc(tell + 1, 1);
	fread(ocl_src_char, tell, 1, f);

	initialized = 1;
	return 0;
}
Пример #29
0
int main(int argc, char *argv[])
{
	// selected platform and device number
	cl_uint pn = 0, dn = 0;

	// OpenCL error
	cl_int error;

	// generic iterator
	cl_uint i;

	// major/minor version of the platform OpenCL version
	cl_uint ocl_major, ocl_minor;

	// set platform/device num from command line
	if (argc > 1)
		pn = atoi(argv[1]);
	if (argc > 2)
		dn = atoi(argv[2]);

	error = clGetPlatformIDs(0, NULL, &np);
	CHECK_ERROR("getting amount of platform IDs");
	printf("%u platforms found\n", np);
	if (pn >= np) {
		fprintf(stderr, "there is no platform #%u\n" , pn);
		exit(1);
	}
	// only allocate for IDs up to the intended one
	platform = calloc(pn+1,sizeof(*platform));
	// if allocation failed, next call will bomb. rely on this
	error = clGetPlatformIDs(pn+1, platform, NULL);
	CHECK_ERROR("getting platform IDs");

	// choose platform
	p = platform[pn];

	error = clGetPlatformInfo(p, CL_PLATFORM_NAME, BUFSZ, strbuf, NULL);
	CHECK_ERROR("getting platform name");
	printf("using platform %u: %s\n", pn, strbuf);

	error = clGetPlatformInfo(p, CL_PLATFORM_VERSION, BUFSZ, strbuf, NULL);
	CHECK_ERROR("getting platform version");
	// we need 1.2 at least
	i = sscanf(strbuf, "OpenCL %u.%u ", &ocl_major, &ocl_minor);
	if (i != 2) {
		fprintf(stderr, "%s:%u: unable to determine platform OpenCL version\n",
			__func__, __LINE__);
		exit(1);
	}
	if (ocl_major == 1 && ocl_minor < 2) {
		fprintf(stderr, "%s:%u: Platform version %s is not at least 1.2\n",
			__func__, __LINE__, strbuf);
		exit(1);
	}

	error = clGetDeviceIDs(p, CL_DEVICE_TYPE_ALL, 0, NULL, &nd);
	CHECK_ERROR("getting amount of device IDs");
	printf("%u devices found\n", nd);
	if (dn >= nd) {
		fprintf(stderr, "there is no device #%u\n", dn);
		exit(1);
	}
	// only allocate for IDs up to the intended one
	device = calloc(dn+1,sizeof(*device));
	// if allocation failed, next call will bomb. rely on this
	error = clGetDeviceIDs(p, CL_DEVICE_TYPE_ALL, dn+1, device, NULL);
	CHECK_ERROR("getting device IDs");

	// choose device
	d = device[dn];
	error = clGetDeviceInfo(d, CL_DEVICE_NAME, BUFSZ, strbuf, NULL);
	CHECK_ERROR("getting device name");
	printf("using device %u: %s\n", dn, strbuf);

	error = clGetDeviceInfo(d, CL_DEVICE_GLOBAL_MEM_SIZE,
			sizeof(gmem), &gmem, NULL);
	CHECK_ERROR("getting device global memory size");
	error = clGetDeviceInfo(d, CL_DEVICE_MAX_MEM_ALLOC_SIZE,
			sizeof(alloc_max), &alloc_max, NULL);
	CHECK_ERROR("getting device max memory allocation size");

	// create context
	ctx_prop[1] = (cl_context_properties)p;
	ctx = clCreateContext(ctx_prop, 1, &d, NULL, NULL, &error);
	CHECK_ERROR("creating context");

	// create queue
	q = clCreateCommandQueue(ctx, d, CL_QUEUE_PROFILING_ENABLE, &error);
	CHECK_ERROR("creating queue");

	// create program
	pg = clCreateProgramWithSource(ctx, sizeof(src)/sizeof(*src), src, NULL, &error);
	CHECK_ERROR("creating program");

	// build program
	error = clBuildProgram(pg, 1, &d, NULL, NULL, NULL);
	CHECK_ERROR("building program");

	// get kernel
	k = clCreateKernel(pg, "add", &error);
	CHECK_ERROR("creating kernel");

	error = clGetKernelWorkGroupInfo(k, d, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE,
			sizeof(wgm), &wgm, NULL);
	CHECK_ERROR("getting preferred workgroup size multiple");

	// number of elements on which kernel will be launched. it's ok if we don't
	// cover every byte of the buffers
	nels = alloc_max/sizeof(cl_float);

	gws = ROUND_MUL(nels, wgm);

	printf("will use %zu workitems grouped by %zu to process %u elements\n",
			gws, wgm, nels);

	// we will try and allocate at least one buffer more than needed to fill
	// the device memory, and no less than 3 anyway
	nbuf = gmem/alloc_max + 1;
	if (nbuf < 3)
		nbuf = 3;

#define MB (1024*1024.0)

	printf("will try allocating %u host buffers of %gMB each to overcommit %gMB\n",
			nbuf, alloc_max/MB, gmem/MB);

	hostbuf = calloc(nbuf, sizeof(cl_mem));

	if (!hostbuf) {
		fprintf(stderr, "could not prepare support for %u buffers\n", nbuf);
		exit(1);
	}

	// allocate ‘host’ buffers
	for (i = 0; i < nbuf; ++i) {
		hostbuf[i] = clCreateBuffer(ctx, CL_MEM_ALLOC_HOST_PTR | CL_MEM_READ_ONLY, alloc_max,
				NULL, &error);
		CHECK_ERROR("allocating host buffer");
		printf("host buffer %u allocated\n", i);
		error = clEnqueueMigrateMemObjects(q, 1, hostbuf + i,
				CL_MIGRATE_MEM_OBJECT_HOST | CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED,
				0, NULL, NULL);
		CHECK_ERROR("migrating buffer to host");
		printf("buffer %u migrated to host\n", i);
	}

	// allocate ‘device’ buffers
	for (i = 0; i < 2; ++i) {
		devbuf[i] = clCreateBuffer(ctx, CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS, alloc_max,
				NULL, &error);
		CHECK_ERROR("allocating devbuffer");
		printf("dev buffer %u allocated\n", i);
		if (i == 0) {
			float patt = 0;
			error = clEnqueueFillBuffer(q, devbuf[0], &patt, sizeof(patt),
					0, nels*sizeof(patt), 0, NULL, &mem_evt);
			CHECK_ERROR("enqueueing memset");
		}
	}
	error = clWaitForEvents(1, &mem_evt);
	CHECK_ERROR("waiting for buffer fill");
	clReleaseEvent(mem_evt); mem_evt = NULL;

	// use the buffers
	for (i = 0; i < nbuf; ++i) {
		printf("testing buffer %u\n", i);

		// for each buffer, we do a setup on CPU and then use it as second
		// argument for the kernel
		hbuf = clEnqueueMapBuffer(q, hostbuf[i], CL_TRUE, CL_MAP_WRITE_INVALIDATE_REGION,
				0, alloc_max, 0, NULL, NULL, &error);
		CHECK_ERROR("mapping buffer");
		for (e = 0; e < nels; ++e)
			hbuf[e] = i;
		error = clEnqueueUnmapMemObject(q, hostbuf[i], hbuf, 0, NULL, NULL);
		CHECK_ERROR("unmapping buffer");
		hbuf = NULL;

		// copy ‘host’ to ‘device’ buffer
		clEnqueueCopyBuffer(q, hostbuf[i], devbuf[1], 0, 0, alloc_max,
				0, NULL, NULL);
		// make sure all pending actions are completed
		error =	clFinish(q);
		CHECK_ERROR("settling down");

		clSetKernelArg(k, 0, sizeof(cl_mem), devbuf);
		clSetKernelArg(k, 1, sizeof(cl_mem), devbuf + 1);
		clSetKernelArg(k, 2, sizeof(nels), &nels);
		error = clEnqueueNDRangeKernel(q, k, 1, NULL, &gws, &wgm,
				0, NULL, &krn_evt);
		CHECK_ERROR("enqueueing kernel");

		error = clEnqueueCopyBuffer(q, devbuf[0], hostbuf[0],
				0, 0, alloc_max, 1, &krn_evt, &mem_evt);
		CHECK_ERROR("copying data to host");

		expected = i*(i+1)/2.0f;
		hbuf = clEnqueueMapBuffer(q, hostbuf[0], CL_TRUE, CL_MAP_READ,
				0, alloc_max, 1, &mem_evt, NULL, &error);
		CHECK_ERROR("mapping buffer 0");
		for (e = 0; e < nels; ++e)
			if (hbuf[e] != expected) {
				fprintf(stderr, "mismatch @ %u: %g instead of %g\n",
						e, hbuf[e], expected);
				exit(1);
			}
		error = clEnqueueUnmapMemObject(q, hostbuf[0], hbuf, 0, NULL, NULL);
		CHECK_ERROR("unmapping buffer 0");
		hbuf = NULL;
		clReleaseEvent(krn_evt);
		clReleaseEvent(mem_evt);
		krn_evt = mem_evt = NULL;
	}

	for (i = 1; i <= 2; ++i) {
		clReleaseMemObject(devbuf[2 - i]);
		printf("dev buffer %u freed\n", nbuf  - i);
	}
	for (i = 1; i <= nbuf; ++i) {
		clReleaseMemObject(hostbuf[nbuf - i]);
		printf("host buffer %u freed\n", nbuf  - i);
	}

	return 0;
}
Пример #30
0
int clDevicesNum(void) {
	cl_int status;
	char pbuff[256];
	cl_uint numDevices;
	cl_uint numPlatforms;
	int most_devices = -1;
	cl_platform_id *platforms;
	cl_platform_id platform = NULL;
	unsigned int i, mdplatform = 0;

	status = clGetPlatformIDs(0, NULL, &numPlatforms);
	/* If this fails, assume no GPUs. */
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: clGetPlatformsIDs failed (no OpenCL SDK installed?)", status);
		return -1;
	}

	if (numPlatforms == 0) {
		applog(LOG_ERR, "clGetPlatformsIDs returned no platforms (no OpenCL SDK installed?)");
		return -1;
	}

	platforms = (cl_platform_id *)alloca(numPlatforms*sizeof(cl_platform_id));
	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
		return -1;
	}

	for (i = 0; i < numPlatforms; i++) {
		if (opt_platform_id >= 0 && (int)i != opt_platform_id)
			continue;

		status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
			return -1;
		}
		platform = platforms[i];
		applog(LOG_INFO, "CL Platform %d vendor: %s", i, pbuff);
		status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(pbuff), pbuff, NULL);
		if (status == CL_SUCCESS)
			applog(LOG_INFO, "CL Platform %d name: %s", i, pbuff);
		status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(pbuff), pbuff, NULL);
		if (status == CL_SUCCESS)
			applog(LOG_INFO, "CL Platform %d version: %s", i, pbuff);
		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
		if (status != CL_SUCCESS) {
			applog(LOG_INFO, "Error %d: Getting Device IDs (num)", status);
			continue;
		}
		applog(LOG_INFO, "Platform %d devices: %d", i, numDevices);
		if ((int)numDevices > most_devices) {
			most_devices = numDevices;
			mdplatform = i;
		}
		if (numDevices) {
			unsigned int j;
			cl_device_id *devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));

			clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
			for (j = 0; j < numDevices; j++) {
				clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
				applog(LOG_INFO, "\t%i\t%s", j, pbuff);
			}
			free(devices);
		}
	}

	if (opt_platform_id < 0)
		opt_platform_id = mdplatform;;

	return most_devices;
}