Esempio n. 1
0
    /// Partitions the device into multiple sub-devices according to
    /// \p properties.
    ///
    /// \opencl_version_warning{1,2}
    std::vector<device>
    partition(const cl_device_partition_property *properties) const
    {
        // get sub-device count
        uint_ count = 0;
        int_ ret = clCreateSubDevices(m_id, properties, 0, 0, &count);
        if(ret != CL_SUCCESS){
            BOOST_THROW_EXCEPTION(opencl_error(ret));
        }

        // get sub-device ids
        std::vector<cl_device_id> ids(count);
        ret = clCreateSubDevices(m_id, properties, count, &ids[0], 0);
        if(ret != CL_SUCCESS){
            BOOST_THROW_EXCEPTION(opencl_error(ret));
        }

        // convert ids to device objects
        std::vector<device> devices(count);
        for(size_t i = 0; i < count; i++){
            devices[i] = device(ids[i], false);
        }

        return devices;
    }
Esempio n. 2
0
	std::vector<Device> Device::partition(
		std::vector<cl_device_partition_property> const& properties,
		error::ErrorMap error_map
	) {
		auto error = cl_int{CL_INVALID_VALUE};
		auto count_elems = cl_uint{0};
		error = clCreateSubDevices(m_id, properties.data(), 0, nullptr, std::addressof(count_elems));
		error::handle<DeviceException>(error, error_map);
		auto subdevices = std::vector<Device>(count_elems, m_id);
		error = clCreateSubDevices(
			m_id, properties.data(), count_elems, reinterpret_cast<cl_device_id*>(subdevices.data()), nullptr
		);
		error::handle<DeviceException>(error, error_map);
		return std::move(subdevices);
	}
JNIEXPORT jint JNICALL Java_org_lwjgl_opencl_CL12_nclCreateSubDevices(JNIEnv *env, jclass clazz, jlong in_device, jlong properties, jint num_devices, jlong out_devices, jlong num_devices_ret, jlong function_pointer) {
	const cl_device_partition_property *properties_address = (const cl_device_partition_property *)(intptr_t)properties;
	cl_device_id *out_devices_address = (cl_device_id *)(intptr_t)out_devices;
	cl_uint *num_devices_ret_address = (cl_uint *)(intptr_t)num_devices_ret;
	clCreateSubDevicesPROC clCreateSubDevices = (clCreateSubDevicesPROC)((intptr_t)function_pointer);
	cl_int __result = clCreateSubDevices((cl_device_id)(intptr_t)in_device, properties_address, num_devices, out_devices_address, num_devices_ret_address);
	return __result;
}
Esempio n. 4
0
cl_context OclHost::partitionDevice(cl_platform_id platform,
                                    cl_uint ciDeviceCount, cl_device_id *cdDevices, cl_int cores) {
    cl_uint numSubDevices = 0;
    cl_int ciErrNum = 0;
//	cl_context oclCPUContext = clCreateContext(0, ciDeviceCount, cdDevices,
//			NULL, NULL, &ciErrNum);
    //cl_device_id device_id = 0;

    Log.Message("%d", ciDeviceCount);
//	clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 1, &device_id, &ciDeviceCount);
//	Log.Message("%d", ciDeviceCount);

    //pfn_clCreateSubDevicesEXT = (clCreateSubDevicesEXT_fn) (clGetExtensionFunctionAddress("clCreateSubDevicesEXT"));
    cl_device_partition_property partitionPrty[3];

    partitionPrty[0] = CL_DEVICE_PARTITION_EQUALLY;
    partitionPrty[1] = 1;
    partitionPrty[2] = 0;

//	pfn_clCreateSubDevicesEXT(cdDevices[0], partitionPrty, 0, NULL, &numSubDevices);
    clCreateSubDevices(cdDevices[0], partitionPrty, 0, NULL, &numSubDevices);
    Log.Message("%d", numSubDevices);
    cl_device_id *subDevices = (cl_device_id*) (malloc(
                                   numSubDevices * sizeof(cl_device_id)));
    clCreateSubDevices(cdDevices[0], partitionPrty, numSubDevices, subDevices,
                       NULL);
    // Create context for sub-devices
    cl_context context = clCreateContext(0, 1, subDevices, NULL, NULL,
                                         &ciErrNum);
    checkClError("BLABLABLAB", ciErrNum);
    Log.Verbose("Dividing CPU into %d devices.", numSubDevices);
    free(subDevices);
//	clReleaseDevice(device);
//	clReleaseContext(oclCPUContext);
    return context;
}
Esempio n. 5
0
int main(int argc, char **argv)
{
  cl_context ctx;
  cl_command_queue q;
  // root device, all devices
#define NUMDEVS 6
  cl_device_id rootdev, alldevs[NUMDEVS];
  // pointers to the sub devices of the partitions EQUALLY and BY_COUNTS
  // respectively
  cl_device_id
    *eqdev = alldevs + 1,
    *countdev = alldevs + 4;
  cl_uint max_cus, max_subs, split;
  cl_uint i, j;

  cl_int err = poclu_get_any_device(&ctx, &rootdev, &q);
  CHECK_OPENCL_ERROR_IN("poclu_get_any_device");
  TEST_ASSERT( ctx );
  TEST_ASSERT( rootdev );
  TEST_ASSERT( q );

  alldevs[0] = rootdev;

  err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS,
    sizeof(max_cus), &max_cus, NULL);
  CHECK_OPENCL_ERROR_IN("CL_DEVICE_MAX_COMPUTE_UNITS");
  if (max_cus < 2)
    {
      printf("This test requires a cl device with at least 2 compute units"
             " (a dual-core or better CPU)\n");
      return 1;
    }

  err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_MAX_SUB_DEVICES,
    sizeof(max_subs), &max_subs, NULL);
  CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_MAX_SUB_DEVICES");

  // test fails without possible sub-devices, e.g. with basic pocl device
  TEST_ASSERT(max_subs > 1);

  cl_device_partition_property *dev_pt;
  size_t dev_pt_size;

  err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES,
    0, NULL, &dev_pt_size);
  CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES size");

  dev_pt = malloc(dev_pt_size);
  TEST_ASSERT(dev_pt);
  err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_PROPERTIES,
    dev_pt_size, dev_pt, NULL);
  CHECK_OPENCL_ERROR_IN("CL_DEVICE_PARTITION_PROPERTIES");

  j = dev_pt_size / sizeof (*dev_pt); // number of partition types

  // check that partition types EQUALLY and BY_COUNTS are supported
  int found = 0;
  for (i = 0; i < j; ++i)
    {
      if (dev_pt[i] == CL_DEVICE_PARTITION_EQUALLY
          || dev_pt[i] == CL_DEVICE_PARTITION_BY_COUNTS)
        ++found;
    }

  TEST_ASSERT(found == 2);

  // here we will store the partition types returned by the subdevices
  cl_device_partition_property *ptype = NULL;
  size_t ptype_size;
  cl_uint numdevs = 0;

  cl_device_id parent;
  cl_uint sub_cus;

  /* CL_DEVICE_PARTITION_EQUALLY */

  printf("Max CUs: %u\n", max_cus);

  /* if the device has 3 CUs, 3 subdevices will be created, otherwise 2. */
  if (max_cus == 3)
    split = 3;
  else
    split = 2;

  const cl_device_partition_property equal_splitter[] = {
    CL_DEVICE_PARTITION_EQUALLY, max_cus/split, 0 };

  err = clCreateSubDevices(rootdev, equal_splitter, 0, NULL, &numdevs);
  CHECK_OPENCL_ERROR_IN("count sub devices");
  TEST_ASSERT(numdevs == split);

  err = clCreateSubDevices(rootdev, equal_splitter, split, eqdev, NULL);
  CHECK_OPENCL_ERROR_IN("partition equally");
  if (split == 2)
     eqdev[2] = NULL;

  cl_uint refc;
  err = clGetDeviceInfo (eqdev[0], CL_DEVICE_REFERENCE_COUNT, sizeof (refc),
                         &refc, NULL);
  CHECK_OPENCL_ERROR_IN ("get refcount");
  TEST_ASSERT (refc == 1);

  /* First, check that the root device is untouched */

  err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS,
    sizeof(sub_cus), &sub_cus, NULL);
  CHECK_OPENCL_ERROR_IN("parenty CU");
  TEST_ASSERT(sub_cus == max_cus);

  err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE,
    sizeof(parent), &parent, NULL);
  CHECK_OPENCL_ERROR_IN("root parent device");
  TEST_ASSERT(parent == NULL);

  /* partition type may either be NULL or contain a 0 entry */
  err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE,
    0, NULL, &ptype_size);
  CHECK_OPENCL_ERROR_IN("root partition type");

  if (ptype_size != 0) {
    /* abuse dev_pt which should be large enough */
    TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property));
    TEST_ASSERT(ptype_size <= dev_pt_size);
    err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE,
      ptype_size, dev_pt, NULL);
    CHECK_OPENCL_ERROR_IN("root partition type #2");
    TEST_ASSERT(dev_pt[0] == 0);
  }

  /* now test the subdevices */
  for (i = 0; i < split; ++i) {
    err = clGetDeviceInfo(eqdev[i], CL_DEVICE_MAX_COMPUTE_UNITS,
      sizeof(sub_cus), &sub_cus, NULL);
    CHECK_OPENCL_ERROR_IN("sub CU");
    TEST_ASSERT(sub_cus == max_cus/split);

    err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARENT_DEVICE,
      sizeof(parent), &parent, NULL);
    CHECK_OPENCL_ERROR_IN("sub parent device");
    TEST_ASSERT(parent == rootdev);

    err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE,
      0, NULL, &ptype_size);
    CHECK_OPENCL_ERROR_IN("sub partition type");
    TEST_ASSERT(ptype_size == sizeof(equal_splitter));

    ptype = malloc(ptype_size);
    TEST_ASSERT(ptype);
    err = clGetDeviceInfo(eqdev[i], CL_DEVICE_PARTITION_TYPE,
      ptype_size, ptype, NULL);
    CHECK_OPENCL_ERROR_IN("sub partition type #2");

    TEST_ASSERT(memcmp(ptype, equal_splitter, ptype_size) == 0);

    /* free the partition type */
    free(ptype) ; ptype = NULL;
  }

  /* CL_DEVICE_PARTITION_BY_COUNTS */

  /* Note that the platform will only read this to the first 0,
   * which is actually CL_DEVICE_PARTITION_BY_COUNTS_LIST_END;
   * the test is structured with an additional final 0 intentionally,
   * to follow the Khoronos doc example
   */
  const cl_device_partition_property count_splitter[] = {
    CL_DEVICE_PARTITION_BY_COUNTS, 1, max_cus - 1,
    CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 0 };

  err = clCreateSubDevices(rootdev, count_splitter, 0, NULL, &numdevs);
  CHECK_OPENCL_ERROR_IN("count sub devices");
  TEST_ASSERT(numdevs == 2);

  err = clCreateSubDevices(rootdev, count_splitter, 2, countdev, NULL);
  CHECK_OPENCL_ERROR_IN("partition by counts");

  /* First, check that the root device is untouched */

  err = clGetDeviceInfo(rootdev, CL_DEVICE_MAX_COMPUTE_UNITS,
    sizeof(sub_cus), &sub_cus, NULL);
  CHECK_OPENCL_ERROR_IN("parenty CU");
  TEST_ASSERT(sub_cus == max_cus);

  err = clGetDeviceInfo(rootdev, CL_DEVICE_PARENT_DEVICE,
    sizeof(parent), &parent, NULL);
  CHECK_OPENCL_ERROR_IN("root parent device");
  TEST_ASSERT(parent == NULL);

  /* partition type may either be NULL or contain a 0 entry */
  err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE,
    0, NULL, &ptype_size);
  CHECK_OPENCL_ERROR_IN("root partition type");

  if (ptype_size != 0) {
    /* abuse dev_pt which should be large enough */
    TEST_ASSERT(ptype_size == sizeof(cl_device_partition_property));
    TEST_ASSERT(ptype_size <= dev_pt_size);
    err = clGetDeviceInfo(rootdev, CL_DEVICE_PARTITION_TYPE,
      ptype_size, dev_pt, NULL);
    CHECK_OPENCL_ERROR_IN("root partition type #2");
    TEST_ASSERT(dev_pt[0] == 0);
  }

  // devices might be returned in different order than the counts
  // in the count_splitter

  int found_cus[2] = {0, 0};

  /* now test the subdevices */
  for (i = 0; i < 2; ++i) {
    err = clGetDeviceInfo(countdev[i], CL_DEVICE_MAX_COMPUTE_UNITS,
      sizeof(sub_cus), &sub_cus, NULL);
    CHECK_OPENCL_ERROR_IN("sub CU");
    if (sub_cus == count_splitter[1])
        found_cus[0] += 1;
    else if (sub_cus == count_splitter[2])
        found_cus[1] += 1;

    err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARENT_DEVICE,
      sizeof(parent), &parent, NULL);
    CHECK_OPENCL_ERROR_IN("sub parent device");
    TEST_ASSERT(parent == rootdev);

    /* The partition type returned is up to the first 0,
     * which happens to be the CL_DEVICE_PARTITION_BY_COUNTS_LIST_END,
     * not the final terminating 0 in count_splitter, so it has one less
     * element. It should be otherwise equal */
    err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE,
      0, NULL, &ptype_size);
    CHECK_OPENCL_ERROR_IN("sub partition type");
    TEST_ASSERT(ptype_size == sizeof(count_splitter) - sizeof(*count_splitter));

    ptype = malloc(ptype_size);
    TEST_ASSERT(ptype);
    err = clGetDeviceInfo(countdev[i], CL_DEVICE_PARTITION_TYPE,
      ptype_size, ptype, NULL);
    CHECK_OPENCL_ERROR_IN("sub partition type #2");

    TEST_ASSERT(memcmp(ptype, count_splitter, ptype_size) == 0);

    /* free the partition type */
    free(ptype) ; ptype = NULL;
  }

  /* the previous loop finds 1+1 subdevices only on >dual core systems;
   * on dual cores, the count_splitter is [1, 1] and the above
   * "(sub_cus == count_splitter[x])" results in 2+0 subdevices found */
  if (max_cus > 2)
    TEST_ASSERT(found_cus[0] == 1 && found_cus[1] == 1);
  else
    TEST_ASSERT((found_cus[0] + found_cus[1]) == 2);

  /* So far, so good. Let's now try and use these devices,
   * by building a program for all of them and launching kernels on them.
   *
   * Note that there's a discrepancy in behavior between implementations:
   * some assume you can treat sub-devices as their parent device, and thus
   * e.g. using them through any context which includes their parent devices,
   * other fail miserably if you try this.
   *
   * For the time being we will test the stricter behavior, where
   * sub-devices should be added manually to a context.
   */

  err = clReleaseCommandQueue(q);
  CHECK_OPENCL_ERROR_IN("clReleaseCommandQueue");
  err = clReleaseContext(ctx);
  CHECK_OPENCL_ERROR_IN("clReleaseContext");

  /* if we split into 2 equal parts, third pointer is NULL. Let's copy the
   * previous device to it */
  if (split == 2)
    eqdev[2] = eqdev[1];

  ctx = clCreateContext(NULL, NUMDEVS, alldevs, NULL, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateContext");
  TEST_ASSERT( test_context(ctx, prog_src_all, 1, NUMDEVS, alldevs) == CL_SUCCESS );

  ctx = clCreateContext(NULL, NUMDEVS - 1, alldevs + 1, NULL, NULL, &err);
  CHECK_OPENCL_ERROR_IN("clCreateContext");
  TEST_ASSERT( test_context(ctx, prog_src_two, -1, NUMDEVS - 1, alldevs + 1)
    == CL_SUCCESS );

  /* Don't release the same device twice. clReleaseDevice(NULL) should return
   * an error but not crash. */
  if (split == 2)
    eqdev[2] = NULL;

  for (i = 0; i < NUMDEVS; i++)
    clReleaseDevice (alldevs[i]);

  CHECK_CL_ERROR (clUnloadCompiler ());
  free (dev_pt);

  printf ("OK\n");

  return 0;
}
int
DeviceFission::setupCLPlatform()
{
    cl_int status = CL_SUCCESS;

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */
    cl_platform_id platform = NULL;
    int retValue = sampleCommon->getPlatform(platform, platformId, isPlatformEnabled());
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::getPlatform(rootplatform) failed");

    // Display available devices.
    retValue = sampleCommon->displayDevices(platform, CL_DEVICE_TYPE_ALL);
    CHECK_ERROR(retValue, SDK_SUCCESS, "sampleCommon::displayDevices(rootplatform) failed");

    /*
     * If we could find our platform, use it. Otherwise use just available platform.
     */
    cl_context_properties cps[3] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        0
    };

    rContext = clCreateContextFromType(platform ? cps : NULL,
									   CL_DEVICE_TYPE_ALL,
									   NULL,
									   NULL,
									   &status);
    CHECK_OPENCL_ERROR( status, "clCreateContextFromType failed.");

    // getting devices on which to run the sample
    status = sampleCommon->getDevices(rContext, &Devices, 0, isDeviceIdEnabled());
    CHECK_ERROR(status, SDK_SUCCESS, "sampleCommon::getDevices() failed");

    // Set deviceListSize from clGetContextInfo
	status = clGetContextInfo(rContext, CL_CONTEXT_DEVICES, 0, 0, &deviceListSize);
	CHECK_ERROR(status, SDK_SUCCESS, "clGetContextInfo failed. (deviceListSize)");

	// Get GPU device and CPU devices by the deviceInfo.
	for (cl_uint i = 0 ; i < deviceListSize / sizeof(cl_device_id) ; i++)
	{
		retValue = deviceInfo.setDeviceInfo(Devices[i]);
		CHECK_ERROR(retValue, 0, "SDKDeviceInfo::setDeviceInfo() failed");
		if (deviceInfo.dType == CL_DEVICE_TYPE_GPU)
		{
			gpuAvailable = CL_TRUE;
			gpuDevice = Devices[i];
			groupSize = deviceInfo.maxWorkGroupSize;
		}
		else if (deviceInfo.dType == CL_DEVICE_TYPE_CPU)
		{
			cpuDevice = Devices[i];
		}
	}

	// Using CPU to replace GPU if unable to find GPU.
	if(gpuAvailable == CL_FALSE)
	{
		std::cout << "\nUnable to find GPU, disable cpu2gpu mode."<< std::endl;
		gpuDevice = cpuDevice;
		cpu2gpu = CL_FALSE;
	}

	// Get allocate memory for subDevices
    subDevices = (cl_device_id*)malloc(numSubDevices * sizeof(cl_device_id));
    CHECK_ALLOCATION(subDevices, "Failed to allocate memory. (subDevices)");

	// Get allocate memory for subKernel
	subKernel = (cl_kernel*)malloc(numSubDevices * sizeof(cl_kernel));
	CHECK_ALLOCATION(subKernel, "Failed to allocate memory. (subKernel)");

	// Get allocate memory for gpuKernel
	gpuKernel = (cl_kernel*)malloc(numSubDevices * sizeof(cl_kernel));
	CHECK_ALLOCATION(gpuKernel, "Failed to allocate memory. (gpuKernel)");

	// Get maxSubDevices from clGetDeviceInfo
	cl_uint maxSubDevices;
	status = clGetDeviceInfo(cpuDevice, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, sizeof(maxSubDevices), &maxSubDevices, NULL);
	CHECK_OPENCL_ERROR(status, "clGetDeviceInfo failed. (maxSubDevices)")

	if(maxSubDevices <= 1)
	{
		std::cout<<"Error: The CPU should have than one core to run this sample."<<std::endl;
		return SDK_FAILURE;
	}

	// Initialize required partition property
	cl_device_partition_property partitionPrty[5] =
	{
		CL_DEVICE_PARTITION_BY_COUNTS,  
		maxSubDevices / 2, maxSubDevices / 2,
		CL_DEVICE_PARTITION_BY_COUNTS_LIST_END, 
		0 };

	// Create sub-devices
	status = clCreateSubDevices(cpuDevice, partitionPrty, numSubDevices, subDevices, NULL);
	CHECK_OPENCL_ERROR( status, "clCreateSubDevices failed.");

    return SDK_SUCCESS;
}
Esempio n. 7
0
void initialize_ocl(cl_vars_t& cv)
{
  cl_uint num_platforms;
  cv.err = clGetPlatformIDs(1, &(cv.platform), &(num_platforms));
  if(cv.err != CL_SUCCESS)
  {
    std::cout << "Could not get platform ID" << std::endl;
    exit(1);
  }

  if(getenv("HM_CPU0"))
  {
    std::cout << "Running on CPU 0" << std::endl;
    cl_uint max_devices = 1;
    cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_CPU, max_devices, cv.device_ids, &(cv.num_devices));
    cv.num_devices = 1;
  }
  else if(getenv("HM_CPU0_SUB1"))
  {
    std::cout << "Running on Subdivided1 CPU 0" << std::endl;
    cl_uint max_devices = 1;
    cl_device_id dev0;
    cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_CPU, max_devices, &dev0, &(cv.num_devices));
    cl_uint num_subdevices;
    cl_device_partition_property props[3];
    props[0] = CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
    props[1] = CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE;
    props[2] = 0;
    cl_device_id id4[4];
    cv.err = clCreateSubDevices(dev0, props, 2, id4, &num_subdevices);
    std::cout << "num subdevices: " << num_subdevices << std::endl;
    cv.device_ids[0] = id4[1];
    cv.num_devices = 1;
  }
  else if(getenv("HM_GPU0"))
  {
    std::cout << "Running on GPU 0" << std::endl;
    cl_uint max_devices = 1;
    cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_GPU, max_devices, cv.device_ids, &(cv.num_devices));
    cv.num_devices = 1;
  }
  else if(getenv("HM_GPU01"))
  {
    std::cout << "Running on GPU 0 and GPU 1" << std::endl;
    cl_uint max_devices = 2;
    cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_GPU, max_devices, cv.device_ids, &(cv.num_devices));
    cv.num_devices = 2;
  }
  else if(getenv("HM_GPU1"))
  {
    std::cout << "Running on GPU 1" << std::endl;
    cl_uint max_devices = 2;
    cv.err = clGetDeviceIDs(cv.platform, CL_DEVICE_TYPE_GPU, max_devices, cv.device_ids, &(cv.num_devices));
    assert(cv.num_devices > 1);
    cv.device_ids[0] = cv.device_ids[1];
    cv.num_devices = 1;
  }
  else
  {
    std::cout << "Error: Specify target either HM_CPU0, HM_GPU0, HM_GPU01, or HM_GPU1" << std::endl;
  }
  if(cv.err != CL_SUCCESS)
  {
    std::cout << "Could not get GPU device ID" << std::endl;
    exit(1);
  }

  cv.context = clCreateContext(0, cv.num_devices, cv.device_ids, NULL, NULL, &(cv.err));
  if(!cv.context)
  {
    std::cout << "Could not create context" << std::endl;
    exit(1);
  }

  //cv.commands = clCreateCommandQueue(cv.context, cv.device_id, 0, &(cv.err));
  for(size_t devId = 0 ; devId < cv.num_devices ; devId++)
  {
    cv.commands[devId] = clCreateCommandQueue(cv.context, cv.device_ids[devId], CL_QUEUE_PROFILING_ENABLE, &(cv.err));
    if(!cv.commands[devId])
    {
      std::cout << "Could not create command queue" << std::endl;
      exit(1);
    }
  }
  compile_ocl_program(cv.memset_program, cv.memset_kernel, cv, memset_kernel_str, "memset_kernel");

#ifdef VERBOSE_COMPILATION
  docs.opencl_ss << "CL fill vars success" << std::endl;

  // Device info
  for(size_t devId = 0 ; devId < cv.num_devices ; devId++)
  {
    docs.opencl_ss << "Device ID: " << devId << std::endl;

    char device_name[255];
    cv.err = clGetDeviceInfo(cv.device_ids[devId], CL_DEVICE_NAME, 255, device_name, NULL);
    docs.opencl_ss << "Device Name: " << device_name << std::endl;

    cl_ulong mem_size;
    cv.err = clGetDeviceInfo(cv.device_ids[devId], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(cl_ulong), &mem_size, NULL);
    docs.opencl_ss << "Global mem size: " << mem_size << std::endl;

    size_t max_work_item[3];
    cv.err = clGetDeviceInfo(cv.device_ids[devId], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(max_work_item), max_work_item, NULL);
    docs.opencl_ss << "Max work item sizes: " << max_work_item[0] << ", " << max_work_item[1] << ", " << max_work_item[2] << std::endl;
  }
#endif
}
Esempio n. 8
0
OclHost::OclHost(int const device_type, int gpu_id, int const cpu_cores) :
    devType(device_type), maxGlobalMem(0), maxLocalMem(0) {
//		if (!isGPU()) {
//				gpu_id = 0;
//		}

    cl_int ciErrNum = CL_SUCCESS;
    Log.Verbose("Using device number %d", gpu_id);
//#pragma omp critical
//	{
    if (contextUserCount == 0) {
        Log.Verbose("Creating ocl context.");
//		cl_uint ciDeviceCount = 0;
        cl_platform_id cpPlatform = NULL;

        cpPlatform = getPlatform();
        //Get the devices

        //Get number of devices
        ciErrNum = clGetDeviceIDs(cpPlatform, devType, 0, NULL, &ciDeviceCount);
        checkClError("Couldn't get number of OpenCl devices. Error: ",
                     ciErrNum);

        if (isGPU()) {
            //Getting device ids
            devices = (cl_device_id *) malloc(
                          ciDeviceCount * sizeof(cl_device_id));
            ciErrNum = clGetDeviceIDs(cpPlatform, devType, ciDeviceCount,
                                      devices, NULL);
            checkClError("Couldn't get OpenCl device ids. Error: ", ciErrNum);

            //Create context
            oclGpuContext = clCreateContext(0, ciDeviceCount, devices, NULL,
                                            NULL, &ciErrNum);
            checkClError("Couldn't create context. Error: ", ciErrNum);
            Log.Message("Context for GPU devices created.");

            Log.Message("%d GPU device(s) found: ", ciDeviceCount);
            for (int i = 0; i < ciDeviceCount; ++i) {
                char device_string[1024];
                char driver_string[1024];
                clGetDeviceInfo(devices[i], CL_DEVICE_NAME,
                                sizeof(device_string), &device_string, NULL);
                clGetDeviceInfo(devices[i], CL_DRIVER_VERSION,
                                sizeof(driver_string), &driver_string, NULL);
                Log.Message("Device %d: %s (Driver: %s)", i, device_string, driver_string);
            }

        } else {
            if (ciDeviceCount > 1) {
                Log.Error("More than one CPU device found.");
                exit(-1);
            }

            cl_device_id device_id;
            ciErrNum = clGetDeviceIDs(cpPlatform, CL_DEVICE_TYPE_CPU, 1,
                                      &device_id, NULL);
            checkClError("Couldn't get CPU device id. Error: ", ciErrNum);

            Log.Message("%d CPU device found.", ciDeviceCount);
            char device_string[1024];
            char driver_string[1024];
            clGetDeviceInfo(device_id, CL_DEVICE_NAME, sizeof(device_string),
                            &device_string, NULL);
            clGetDeviceInfo(device_id, CL_DRIVER_VERSION, sizeof(driver_string),
                            &driver_string, NULL);
            Log.Message("Device %d: %s (Driver: %s)", 0, device_string, driver_string);

            cl_device_partition_property props[3];

            props[0] = CL_DEVICE_PARTITION_EQUALLY; // Equally
            props[1] = 1; // 4 compute units per sub-device
            props[2] = 0;

            devices = (cl_device_id *) malloc(256 * sizeof(cl_device_id));
            ciErrNum = clCreateSubDevices(device_id, props, 256, devices,
                                          &ciDeviceCount);
            if (ciErrNum == -18) {
                ciDeviceCount = 1;
                devices[0] = device_id;
            } else {
                checkClError("Couldn't create sub-devices. Error: ", ciErrNum);
            }

            Log.Message("%d CPU cores available.", ciDeviceCount);

            //Create context
            oclGpuContext = clCreateContext(0, ciDeviceCount, devices, NULL,
                                            NULL, &ciErrNum);
            checkClError("Couldn't create context. Error: ", ciErrNum);

        }
    }
    contextUserCount += 1;
    //}

    if (!isGPU()) {
        gpu_id = gpu_id % ciDeviceCount;
    }
    oclDevice = devices[gpu_id];
    //Create context
    //oclGpuContext = clCreateContext(0, 1, &oclDevice, NULL, NULL, &ciErrNum);
    //checkClError("Couldn't create context. Error: ", ciErrNum);

    // create command queue
    oclCommandQueue = clCreateCommandQueue(oclGpuContext, oclDevice, 0,
                                           &ciErrNum);

    checkClError("Couldn't create command queue for device: ", ciErrNum);

}