示例#1
2
int main(int argc, char** argv)
{
  int err;                            // error code returned from api calls
  cl_platform_id platform_id;         // platform id
  cl_device_id device_id;             // compute device id 
  cl_context context;                 // compute context
  cl_command_queue commands;          // compute command queue
  cl_program program;                 // compute program
  cl_kernel kernel;                   // compute kernel

  size_t global[2];                   // global domain size for our calculation
  size_t local[2];                    // local domain size for our calculation

  char cl_platform_vendor[1001];
  char cl_platform_name[1001];
   

  cl_mem in_array;                     // device memory used for the input array
  //cl_mem synaptic_weights;             // device memory used for the input array
  cl_mem out_array;                    // device memory used for the output array
   
  if (argc != 2){
    printf("%s <inputfile>\n", argv[0]);
    return -1;
  }

	//float in_array[NO_NODES];
	//float out_array[NO_NODES];
	//float synaptic_weights[NO_NODES*NO_NODES];
	float in_array_tb[NO_NODES];
	float out_array_tb[NO_NODES];
	//float synaptic_weights_tb[NO_NODES*NO_NODES];
	float temp =0;
	int i = 0;
    	int j = 0;
	int index = 0;
	FILE* ifp;
	char* mode = "r";
  //
  // Connect to first platform
  //
  err = clGetPlatformIDs(1,&platform_id,NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to find an OpenCL platform!\n");
    printf("Test failed\n");
    return -1;
  }
  err = clGetPlatformInfo(platform_id,CL_PLATFORM_VENDOR,1000,(void *)cl_platform_vendor,NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n");
    printf("Test failed\n");
    return -1;
  }
  printf("CL_PLATFORM_VENDOR %s\n",cl_platform_vendor);
  err = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME,1000,(void *)cl_platform_name,NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: clGetPlatformInfo(CL_PLATFORM_NAME) failed!\n");
    printf("Test failed\n");
    return -1;
  }
  printf("CL_PLATFORM_NAME %s\n",cl_platform_name);
 
  // Connect to a compute device
  //
  int fpga = 0;
#if defined (FPGA_DEVICE)
  fpga = 1;
#endif
  err = clGetDeviceIDs(platform_id, fpga ? CL_DEVICE_TYPE_ACCELERATOR : CL_DEVICE_TYPE_CPU,
                       1, &device_id, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to create a device group!\n");
    printf("Test failed\n");
    return -1;
  }
  
  //
  // Create a compute context 
  //
  context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
  if (!context)
  {
    printf("Error: Failed to create a compute context!\n");
    printf("Test failed\n");
    return -1;
  }

  //relu_1(in_array,synaptic_weights,out_array);


  // Fill our data sets with pattern
  //
  //int i = 0;
  //for(i = 0; i < DATA_SIZE; i++) {
  //  a[i] = (int)i;
  //  b[i] = (int)i;
  //  results[i] = 0;
  //}
  //
  
  
  // Create a command commands
  commands = clCreateCommandQueue(context, device_id, 0, &err);
  if (!commands)
  {
    printf("Error: Failed to create a command commands!\n");
    printf("Error: code %i\n",err);
    printf("Test failed\n");
    return -1;
  }

  int status;

  // Create Program Objects
  //
  
  // Load binary from disk
  unsigned char *kernelbinary;
  char *xclbin=argv[1];
  printf("loading %s\n", xclbin);
  int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary);
  if (n_i < 0) {
    printf("failed to load kernel from xclbin: %s\n", xclbin);
    printf("Test failed\n");
    return -1;
  }
  size_t n = n_i;
  // Create the compute program from offline
  program = clCreateProgramWithBinary(context, 1, &device_id, &n,
                                      (const unsigned char **) &kernelbinary, &status, &err);
  if ((!program) || (err!=CL_SUCCESS)) {
    printf("Error: Failed to create compute program from binary %d!\n", err);
    printf("Test failed\n");
    printf("err : %d %s\n",err,err);
  }

  // Build the program executable
  //
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    size_t len;
    char buffer[2048];

    printf("Error: Failed to build program executable!\n");
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
    printf("%s\n", buffer);
    printf("Test failed\n");
    return -1;
  }

  // Create the compute kernel in the program we wish to run
  //
  kernel = clCreateKernel(program, "relu_1", &err);
  if (!kernel || err != CL_SUCCESS)
  {
    printf("Error: Failed to create compute kernel!\n");
    printf("Test failed\n");
    return -1;
  }

  // Create the input and output arrays in device memory for our calculation
  //
  in_array = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * NO_NODES, NULL, NULL);
  //synaptic_weights = clCreateBuffer(context,  CL_MEM_READ_ONLY,  sizeof(float) * NO_NODES * NO_NODES, NULL, NULL);
  out_array = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(float) * NO_NODES, NULL, NULL);
  if (!in_array || /*!synaptic_weights ||*/ !out_array)
  {
    printf("Error: Failed to allocate device memory!\n");
    printf("Test failed\n");
    return -1;
  }    
    
	ifp = fopen("/home/agandhi92/sdaccel/relu_1/input.txt",mode);

	if(ifp == NULL)
	{
		printf("Input file not found \n");
  		return -1;
	}
	while (fscanf(ifp, "%f", &temp) != EOF && index < NO_NODES) {

		in_array_tb[index++] = temp;
	}
	index = 0;
	temp = 0;

	//ifp = fopen("/home/agandhi92/sdaccel/relu_1/weight.txt",mode);
	//if(ifp == NULL)
	//{
	//	printf("Weight file not found \n");
  	//	return -1;
	//}
	//while (fscanf(ifp, "%f", &temp) != EOF && index < (NO_NODES*NO_NODES)) {
	//	synaptic_weights_tb[index++] = temp;
	//}
   
  //
  // Write our data set into the input array in device memory 
  //
  err = clEnqueueWriteBuffer(commands, in_array, CL_TRUE, 0, sizeof(float) * NO_NODES, in_array_tb, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to write to source array a!\n");
    printf("Test failed\n");
    return -1;
  }

  // Write our data set into the input array in device memory 
  //
  //err = clEnqueueWriteBuffer(commands, synaptic_weights, CL_TRUE, 0, sizeof(float) *  NO_NODES *  NO_NODES, synaptic_weights_tb, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to write to source array b!\n");
    printf("Test failed\n");
    return -1;
  }
    
  // Set the arguments to our compute kernel
  //
  err = 0;
  err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &in_array);
  //err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &synaptic_weights);
  err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &out_array);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to set kernel arguments! %d\n", err);
    printf("Test failed\n");
    return -1;
  }

  // Execute the kernel over the entire range of our 1d input data set
  // using the maximum number of work group items for this device
  //

  err = clEnqueueTask(commands, kernel, 0, NULL, NULL);

  if (err)
  {
    printf("Error: Failed to execute kernel! %d\n", err);
    printf("Test failed\n");
    return -1;
  }

  // Read back the results from the device to verify the output
  //
  cl_event readevent;
  err = clEnqueueReadBuffer( commands, out_array, CL_TRUE, 0, sizeof(float) * NO_NODES, out_array_tb, 0, NULL, &readevent );  
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to read output array! %d\n", err);
    printf("Test failed\n");
    return -1;
  }

  clWaitForEvents(1, &readevent);
    
  //printf("A\n");
  //for (i=0;i<DATA_SIZE;i++) {
  //  printf("%x ",a[i]);
  //  if (((i+1) % 16) == 0)
  //    printf("\n");
  //}
  //printf("B\n");
  //for (i=0;i<DATA_SIZE;i++) {
  //  printf("%x ",b[i]);
  //  if (((i+1) % 16) == 0)
  //    printf("\n");
  //}
  //printf("res\n");
  //for (i=0;i<DATA_SIZE;i++) {
  //  printf("%x ",results[i]);
  //  if (((i+1) % 16) == 0)
  //    printf("\n");
  //}
    
  // Validate our results
  //
  //correct = 0;
  //for(i = 0; i < DATA_SIZE; i++)
  //{
  //  int row = i/MATRIX_RANK;
  //  int col = i%MATRIX_RANK;
  //  int running = 0;
  //  int index;
  //  for (index=0;index<MATRIX_RANK;index++) {
  //    int aIndex = row*MATRIX_RANK + index;
  //    int bIndex = col + index*MATRIX_RANK;
  //    running += a[aIndex] * b[bIndex];
  //  }
  //  sw_results[i] = running;
  //}
  //  
  //for (i = 0;i < DATA_SIZE; i++) 
  //  if(results[i] == sw_results[i])
  //    correct++;
  //printf("Software\n");
  //for (i=0;i<DATA_SIZE;i++) {
  //  //printf("%0.2f ",sw_results[i]);
  //  printf("%d ",sw_results[i]);
  //  if (((i+1) % 16) == 0)
  //    printf("\n");
  //}
  //  
  //  
  //// Print a brief summary detailing the results
  ////
  //printf("Computed '%d/%d' correct values!\n", correct, DATA_SIZE);
  //  
        
  // Shutdown and cleanup
	int temp_ = 0;


 for (j = 0; j < NO_NODES; j++)
 {
 	if (out_array_tb[j] >= 0) // || out_array_tb[j]== 0)
 	{
 		//printf("out_array[%d] = %f \n", j, out_array[j]);
 		temp_++;
 	}
 }


  clReleaseMemObject(in_array);
  //clReleaseMemObject(synaptic_weights);
  clReleaseMemObject(out_array);
  clReleaseProgram(program);
  clReleaseKernel(kernel);
  clReleaseCommandQueue(commands);
  clReleaseContext(context);

	if (temp_ == NO_NODES)
	{
		printf("*********************************************************** \n");
		printf("TEST PASSED !!!!!! The output matches the desired output. \n");
		printf("*********************************************************** \n");
		return EXIT_SUCCESS;
	}
	else
	{
		printf("**************************************************************** \n");
		printf("TEST Failed !!!!!! The output does not match the desired output. \n");
		printf("**************************************************************** \n");
		return -1;
	}

  //if(correct == DATA_SIZE){
  //  printf("Test passed!\n");
  //  return EXIT_SUCCESS;
  //}
  //else{
  //  printf("Test failed\n");
  //  return -1;
  //}
}
int
MemoryOptimizations::setupCL(void)
{
    cl_int status = 0;
    size_t deviceListSize;

    cl_device_type dType;
    
    if(deviceType.compare("cpu") == 0)
    {
        dType = CL_DEVICE_TYPE_CPU;
    }
    else //deviceType = "gpu" 
    {
        dType = CL_DEVICE_TYPE_GPU;
    }

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */

    cl_uint numPlatforms;
    cl_platform_id platform = NULL;
    status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetPlatformIDs failed."))
    {
        return SDK_FAILURE;
    }
    if (0 < numPlatforms) 
    {
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        if(!sampleCommon->checkVal(status,
                                   CL_SUCCESS,
                                   "clGetPlatformIDs failed."))
        {
            return SDK_FAILURE;
        }
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            char pbuf[100];
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(pbuf),
                                       pbuf,
                                       NULL);

            if(!sampleCommon->checkVal(status,
                                       CL_SUCCESS,
                                       "clGetPlatformInfo failed."))
            {
                return SDK_FAILURE;
            }

            platform = platforms[i];
            if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) 
            {
                break;
            }
        }
        delete[] platforms;
    }

    if(NULL == platform)
    {
        sampleCommon->error("NULL platform found so Exiting Application.");
        return SDK_FAILURE;
    }

    // Display available devices.
    if(!sampleCommon->displayDevices(platform, dType))
    {
        sampleCommon->error("sampleCommon::displayDevices() failed");
        return SDK_FAILURE;
    }

    /*
     * If we could find our platform, use it. Otherwise use just available platform.
     */

    cl_context_properties cps[3] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        0
    };

    context = clCreateContextFromType(cps,
                                      dType,
                                      NULL,
                                      NULL,
                                      &status);
    if(!sampleCommon->checkVal(status, 
                               CL_SUCCESS,
                               "clCreateContextFromType failed."))
        return SDK_FAILURE;

    /* First, get the size of device list data */
    status = clGetContextInfo(context, 
                              CL_CONTEXT_DEVICES, 
                              0, 
                              NULL, 
                              &deviceListSize);
    if(!sampleCommon->checkVal(status, 
                               CL_SUCCESS,
                               "clGetContextInfo failed."))
        return SDK_FAILURE;

    int deviceCount = (int)(deviceListSize / sizeof(cl_device_id));
    if(!sampleCommon->validateDeviceId(deviceId, deviceCount))
    {
        sampleCommon->error("sampleCommon::validateDeviceId() failed");
        return SDK_FAILURE;
    }

    /* Now allocate memory for device list based on the size we got earlier */
    devices = (cl_device_id*)malloc(deviceListSize);
    if(devices == NULL) 
    {
        sampleCommon->error("Failed to allocate memory (devices).");
        return SDK_FAILURE;
    }

    /* Now, get the device list data */
    status = clGetContextInfo(context, 
                              CL_CONTEXT_DEVICES, 
                              deviceListSize, 
                              devices, 
                              NULL);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS, 
                               "clGetGetContextInfo failed."))
        return SDK_FAILURE;


    /* Get Device specific Information */
    /* Get device extensions */
    char deviceExtensions[2048];
    status = clGetDeviceInfo(devices[deviceId], 
                             CL_DEVICE_EXTENSIONS, 
                             sizeof(deviceExtensions), 
                             deviceExtensions, 
                             0);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS, 
                               "clGetDeviceInfo failed.(extensions)"))
        return SDK_FAILURE;

    if(!strstr(deviceExtensions, "cl_khr_global_int32_base_atomics"))
    {
        sampleCommon->error("Device does not support global_int32_base_atomics!");
        return SDK_EXPECTED_FAILURE;
    }

    status = clGetDeviceInfo(devices[deviceId],
                             CL_DEVICE_MAX_WORK_GROUP_SIZE,
                             sizeof(size_t),
                             (void *)&maxWorkGroupSize,
                             NULL);

    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS, 
                               "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed."))
        return SDK_FAILURE;


    status = clGetDeviceInfo(devices[deviceId],
                             CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
                             sizeof(cl_uint),
                             (void *)&maxDimensions,
                             NULL);

    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS, 
                               "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed."))
        return SDK_FAILURE;


    maxWorkItemSizes = (size_t*)malloc(maxDimensions*sizeof(size_t));
    status = clGetDeviceInfo(devices[deviceId],
                             CL_DEVICE_MAX_WORK_ITEM_SIZES,
                             sizeof(size_t)*maxDimensions,
                             (void *)maxWorkItemSizes,
                             NULL);

    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS, 
                               "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed."))
        return SDK_FAILURE;




    {
        /* The block is to move the declaration of prop closer to its use */
        cl_command_queue_properties prop = 0;
        prop |= CL_QUEUE_PROFILING_ENABLE;

        commandQueue = clCreateCommandQueue(context, 
                                            devices[deviceId], 
                                            prop, 
                                            &status);
        if(!sampleCommon->checkVal(status,
                                   0,
                                   "clCreateCommandQueue failed."))
            return SDK_FAILURE;
    }

    /* Input buffer */
    inputBuffer = clCreateBuffer(context, 
                                 CL_MEM_READ_ONLY,
                                 sizeof(cl_float4) * length,
                                 0, 
                                 &status);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clCreateBuffer failed. (inputBuffer)"))
        return SDK_FAILURE;

    /* Write data to buffer */
    status = clEnqueueWriteBuffer(commandQueue,
                                  inputBuffer,
                                  1,
                                  0,
                                  sizeof(cl_float4) * length,
                                  input,
                                  0,
                                  0,
                                  0);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clEnqueueWriteBuffer failed. (inputBuffer)"))
        return SDK_FAILURE;


    outputBuffer = clCreateBuffer(context, 
                                  CL_MEM_WRITE_ONLY,
                                  sizeof(cl_float4) * length,
                                  0, 
                                  &status);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clCreateBuffer failed. (outputBuffer)"))
        return SDK_FAILURE;

    /* create a CL program using the kernel source */
    streamsdk::SDKFile kernelFile;
    std::string kernelPath = sampleCommon->getPath();

    if(isLoadBinaryEnabled())
    {
        kernelPath.append(loadBinary.c_str());
        if(!kernelFile.readBinaryFromFile(kernelPath.c_str()))
        {
            std::cout << "Failed to load kernel file : " << kernelPath << std::endl;
            return SDK_FAILURE;
        }

        const char * binary = kernelFile.source().c_str();
        size_t binarySize = kernelFile.source().size();
        program = clCreateProgramWithBinary(context,
                                            1,
                                            &devices[deviceId], 
                                            (const size_t *)&binarySize,
                                            (const unsigned char**)&binary,
                                            NULL,
                                            &status);
        if(!sampleCommon->checkVal(status,
                                   CL_SUCCESS,
                                   "clCreateProgramWithBinary failed."))
        {
            return SDK_FAILURE;
        }

    }
    else
    {
        kernelPath.append("MemoryOptimizations_Kernels.cl");
        if(!kernelFile.open(kernelPath.c_str()))
        {
            std::cout << "Failed to load kernel file: " << kernelPath << std::endl;
            return SDK_FAILURE;
        }
        const char * source = kernelFile.source().c_str();
        size_t sourceSize[] = {strlen(source)};
        program = clCreateProgramWithSource(context,
                                            1,
                                            &source,
                                            sourceSize,
                                            &status);
        if(!sampleCommon->checkVal(status,
                                   CL_SUCCESS,
                                   "clCreateProgramWithSource failed."))
            return SDK_FAILURE;
    }

    /* create a cl program executable for all the devices specified */
    status = clBuildProgram(program, 1, &devices[deviceId], NULL, NULL, NULL);
    if(status != CL_SUCCESS)
    {
        if(status == CL_BUILD_PROGRAM_FAILURE)
        {
            cl_int logStatus;
            char *buildLog = NULL;
            size_t buildLogSize = 0;
            logStatus = clGetProgramBuildInfo (program, 
                devices[deviceId], 
                CL_PROGRAM_BUILD_LOG, 
                buildLogSize, 
                buildLog, 
                &buildLogSize);
            if(!sampleCommon->checkVal(
                logStatus,
                CL_SUCCESS,
                "clGetProgramBuildInfo failed."))
                return SDK_FAILURE;

            buildLog = (char*)malloc(buildLogSize);
            if(buildLog == NULL)
            {
                sampleCommon->error("Failed to allocate host memory. (buildLog)");
                return SDK_FAILURE;
            }
            memset(buildLog, 0, buildLogSize);

            logStatus = clGetProgramBuildInfo (program, 
                devices[deviceId], 
                CL_PROGRAM_BUILD_LOG, 
                buildLogSize, 
                buildLog, 
                NULL);
            if(!sampleCommon->checkVal(
                logStatus,
                CL_SUCCESS,
                "clGetProgramBuildInfo failed."))
            {
                free(buildLog);
                return SDK_FAILURE;
            }

            std::cout << " \n\t\t\tBUILD LOG\n";
            std::cout << " ************************************************\n";
            std::cout << buildLog << std::endl;
            std::cout << " ************************************************\n";
            free(buildLog);
        }

        if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clBuildProgram failed."))
            return SDK_FAILURE;
    }

    /* Copy 1D Fast Path */
    kernel[0] = clCreateKernel(program, "copy1DFastPath", &status);
    if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clCreateKernel failed.(copy1DFastPath)"))
        return SDK_FAILURE;

    /* Copy 1D Complete Path */
    kernel[1] = clCreateKernel(program, "copy1DCompletePath", &status);
    if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clCreateKernel failed. (copy1DCompletePath)"))
        return SDK_FAILURE;

    /* Copy 2D float */
    kernel[2] = clCreateKernel(program, "copy2Dfloat", &status);
    if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clCreateKernel failed. (copy2Dfloat)"))
        return SDK_FAILURE;

    /* Copy 2D float4 */
    kernel[3] = clCreateKernel(program, "copy2Dfloat4", &status);
    if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clCreateKernel failed. (copy2Dfloat4)"))
        return SDK_FAILURE;

    /* Copy 1D float4 */
    kernel[4] = clCreateKernel(program, "copy1Dfloat4", &status);
    if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clCreateKernel failed. (copy1Dfloat4)"))
        return SDK_FAILURE;

    /* Copy No Coalesced */
    kernel[5] = clCreateKernel(program, "NoCoal", &status);
    if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clCreateKernel failed. (NoCoal)"))
        return SDK_FAILURE;

    /* Copy Split */
    kernel[6] = clCreateKernel(program, "Split", &status);
    if(!sampleCommon->checkVal(
            status,
            CL_SUCCESS,
            "clCreateKernel failed. (Split)"))
        return SDK_FAILURE;

    return SDK_SUCCESS;
}
示例#3
0
int main() {
    // Get platform information
    err = clGetPlatformIDs(0, NULL, &numOfPlatforms);
    if (err) Error("Fail to get the number of platforms");
    printf("The machine has %d platform(s) for OpenCL.\n", numOfPlatforms);

    platformIDs = new cl_platform_id [numOfPlatforms];
    err = clGetPlatformIDs(numOfPlatforms, platformIDs, NULL);
    if (err) Error("Fail to get the platform list");

    int cudaPlatformID = -1;

    for (int i = 0; i < numOfPlatforms; i++) {
        char platformName[50];
        err = clGetPlatformInfo(platformIDs[i], CL_PLATFORM_NAME, 50, platformName, NULL);
        if (err) Error("Fail to get the platform name");
        printf("Platform %d is %s\n", i + 1, platformName);
        if (!strcmp(platformName, "NVIDIA CUDA")) cudaPlatformID = i;
    }
    printf("\n");

    if (cudaPlatformID == -1) Error("Fail to find an NVIDIA CUDA platform");

    printf("Platform %d (NVIDIA CUDA) is chosen for use.\n", cudaPlatformID + 1);
    printf("\n");

    // Get device information
    err = clGetDeviceIDs(platformIDs[cudaPlatformID], CL_DEVICE_TYPE_GPU, 0, NULL, &numOfDevices);
    if (err) Error("Fail to get the number of devices");
    printf("CUDA platform has %d device(s).\n", numOfDevices);

    deviceIDs = new cl_device_id [numOfDevices];
    err = clGetDeviceIDs(platformIDs[cudaPlatformID], CL_DEVICE_TYPE_GPU, numOfDevices, deviceIDs, NULL);
    if (err) Error("Fail to get the device list");
    for (int i = 0; i < numOfDevices; i++) {
        char deviceName[50];
        err = clGetDeviceInfo(deviceIDs[i], CL_DEVICE_NAME, 50, deviceName, NULL);
        if (err) Error("Fail to get the device name");
        printf("Device %d is %s\n", i + 1, deviceName);
    }
    printf("\n");

    // Create a context
    context = clCreateContext(NULL, numOfDevices, deviceIDs, NULL, NULL, &err);
    if (err) Error("Fail to create a context");

    printf("Device 1 is chosen for use.\n");
    printf("\n");

    // Create a command queue for the first device
    commandQueue = clCreateCommandQueue(context, deviceIDs[0],
                                        CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_PROFILING_ENABLE, &err);
    if (err) Error("Fail to create a command queue");

    // create the program
    cl_program program = CreateProgram(exclusiveScanKernels, "exclusive scan");

    // create two kernels
    cl_kernel scanKernel = clCreateKernel(program, "Scan", &err);
    if (err) Error("Fail to create the kernel for scan");

    cl_kernel reverseUpdateKernel = clCreateKernel(program, "ReverseUpdate", &err);
    if (err) Error("Fail to create the kernel for reverse update");

    // Get the work group size
    size_t maxWorkGroupSize;
    err = clGetKernelWorkGroupInfo(scanKernel, deviceIDs[0], CL_KERNEL_WORK_GROUP_SIZE,
                                   sizeof(size_t), &maxWorkGroupSize, NULL);
    printf("maxWorkGroupSize = %d\n", maxWorkGroupSize);

    err = clGetKernelWorkGroupInfo(reverseUpdateKernel, deviceIDs[0], CL_KERNEL_WORK_GROUP_SIZE,
                                   sizeof(size_t), &maxWorkGroupSize, NULL);
    printf("maxWorkGroupSize = %d\n", maxWorkGroupSize);

    // Set work group size to 64

    int workGroupSize = 512;

    int length = 2048000;
    int *arr = new int [length];
    for (int i = 0; i < length; i++)
        arr[i] = rand() % 100;

    int *prefixSum = new int [length];
    prefixSum[0] = 0;

    int t0 = clock();

    for (int i = 1; i < length; i++)
        prefixSum[i] = prefixSum[i - 1] + arr[i - 1];

    int t1 = clock();

    printf("time1: %lf\n", (t1 - t0) * 1.0 / CLOCKS_PER_SEC);

    cl_mem d_arr = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(int) * length, NULL, &err);
    if (err) Error("Fail to create d_arr");

    err = clEnqueueWriteBuffer(commandQueue, d_arr, CL_TRUE, 0, sizeof(int) * length, arr, 0, NULL, NULL);
    if (err) Error("Fail to write d_arr");

    clSetKernelArg(scanKernel, 0, sizeof(cl_mem), &d_arr);
    cl_int d_length = length;
    clSetKernelArg(scanKernel, 1, sizeof(cl_int), &d_length);
    cl_int d_step = 1;
    clSetKernelArg(scanKernel, 2, sizeof(cl_int), &d_step);
    clSetKernelArg(scanKernel, 3, sizeof(cl_int) * (workGroupSize * 2 + workGroupSize * 2 / 16 + 1), NULL);

    int problemSize = length;
    int records[10];
    int num = 0;

    int t2 = clock();

    for (; problemSize > 1; problemSize = (problemSize - 1) / (workGroupSize * 2) + 1) {

        if (num) d_step *= workGroupSize * 2;

        printf("d_step = %d\n", d_step);

        records[num++] = problemSize;

        printf("problemSize = %d\n", problemSize);

        clSetKernelArg(scanKernel, 2, sizeof(cl_int), &d_step);

        size_t globalWorkSize = ((problemSize - 1) / (workGroupSize * 2) + 1) * workGroupSize;
        size_t localWorkSize = workGroupSize;

        err = clEnqueueNDRangeKernel(commandQueue, scanKernel, 1, NULL, &globalWorkSize, &localWorkSize,
                                     0, NULL, NULL);
        if (err) Error("Fail to enqueue scan");
        clFinish(commandQueue);
    }

    //CheckValues(length, d_arr);

    int zero = 0;
    clEnqueueWriteBuffer(commandQueue, d_arr, CL_TRUE, 0, sizeof(int), &zero, 0, NULL, NULL);

    printf("d_step = %d\n", d_step);

    //scanf("%*c");

    clSetKernelArg(reverseUpdateKernel, 0, sizeof(cl_mem), &d_arr);
    clSetKernelArg(reverseUpdateKernel, 1, sizeof(cl_int), &d_length);

    for (int i = num - 1; i >= 0; i--, d_step /= workGroupSize * 2) {
        printf("d_step = %d\n", d_step);

        clSetKernelArg(reverseUpdateKernel, 2, sizeof(cl_int), &d_step);
        size_t globalWorkSize = ((records[i] - 1) / (workGroupSize * 2) + 1) * workGroupSize;
        size_t localWorkSize = workGroupSize;

        printf("globalWorkSize = %d, localWorkSize = %d\n", globalWorkSize, localWorkSize);

        err = clEnqueueNDRangeKernel(commandQueue, reverseUpdateKernel, 1, NULL, &globalWorkSize, &localWorkSize,
                                     0, NULL, NULL);
        if (err) Error("Fail to enqueue scan");
        clFinish(commandQueue);
    }

    int t3 = clock();

    printf("time: %lf\n", (t3 - t2) * 1.0 / CLOCKS_PER_SEC);

    int *GPUResult = new int [length];
    memset(GPUResult, 0, sizeof(int) * length);
    err = clEnqueueReadBuffer(commandQueue, d_arr, CL_TRUE, 0, sizeof(int) * length, GPUResult, 0, NULL, NULL);
    printf("err = %d\n", err);
    if (err) Error("Fail to read d_arr");

    for (int i = 0; i < length; i++)
        if (GPUResult[i] != prefixSum[i]) printf("at i = %d, GPUResult[%d] = %d, prefixSum[%d] = %d\n", i, i, GPUResult[i], i, prefixSum[i]);

    system("pause");
    return 0;
}
示例#4
0
int device_check()
{

    
    cl_int err;
    cl_int i,j,cnt;
    
    cl_platform_id *platforms;
    cl_uint num_platforms;
    cl_platform_id platform;

    char* ext_data;
    size_t ext_size;
        
    cl_device_id *devs;
    size_t num_devs;
    cl_device_id device;
    
    /* Program data structures */
    cl_program program;
    FILE *program_handle;
    char *program_buffer[NUM_FILES];
    char *program_log;
    const char *file_name[] = {PROGRAM_FILE_1, PROGRAM_FILE_2};
    const char options[] = "-cl-finite-math-only -cl-no-signed-zeros";  
    size_t program_size[NUM_FILES];
    size_t log_size;
    
    /*kernel data*/
    cl_kernel *kernels;
    cl_uint num_kernels;
    

    /*枚举所有的平台,最多10个*/
    err = clGetPlatformIDs(10, NULL, &num_platforms);  /*参数1:要枚举的数量,参数2:返回结果的存放空间,参数3:返回结果的条数*/
    if(err < 0) {
        perror("Couldn't find any platforms");
        exit(1);
    }
    platforms=(cl_platform_id *)malloc( sizeof(cl_platform_id) * num_platforms );
    clGetPlatformIDs(num_platforms, platforms, NULL);

    /*现在num_platforms和platforms是平台的数量和数据指针*/
    /* Find infor of all platforms */
    for (i=0; i<num_platforms; i++)
    {
        /* Find size of extension data */
        /*clGetPlatformInfo*/
        /*
        参数1:平台
        参数2:所需信息的枚举
        参数3:返回值需要保存的长度
        参数4:返回值的存储空间
        参数5:所需数据的真实长度
        */

        
        platform = platforms[i]

        /*NAME*/
        err = clGetPlatformInfo(platform, CL_PLATFORM_NAME, 0, NULL, &ext_size);  
        if(err < 0) {
            perror("Couldn't read CL_PLATFORM_NAME data.");
        }
        ext_data = (char *)malloc(ext_size);
        clGetPlatformInfo(platform, CL_PLATFORM_NAME, ext_size, ext_data, NULL);
        printf("Platform %d name: %s\n", i, ext_data);
        free(ext_data);

        /*VRNDOR*/
        err = clGetPlatformInfo(platform, CL_PLATFORM_VENDER, 0, NULL, &ext_size);
        if(err < 0) {
            perror("Couldn't read CL_PLATFORM_VENDER data.");
        }
        ext_data = (char *)malloc(ext_size);
        clGetPlatformInfo(platform, CL_PLATFORM_VENDER, ext_size, ext_data, NULL);
        printf("Platform %d vender: %s\n", i, ext_data);
        free(ext_data)

        /*VERSION*/
        err = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &ext_size);
        if(err < 0) {
            perror("Couldn't read CL_PLATFORM_VERSION data.");
        }
        ext_data = (char *)malloc(ext_size);
        clGetPlatformInfo(platform, CL_PLATFORM_VERSION, ext_size, ext_data, NULL);
        printf("Platform %d support OpenCL version: %s\n", i, ext_data);
        free(ext_data)
        
        /*PROFILE*/
        err = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, 0, NULL, &ext_size);
        if(err < 0) {
            perror("Couldn't read CL_PLATFORM_PROFILE data.");
        }
        ext_data = (char *)malloc(ext_size);
        clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, ext_size, ext_data, NULL);
        printf("Platform %d support OpenCL profile: %s\n", i, ext_data);
        free(ext_data)
        
        /*EXTENSIONS*/
        err = clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, 0, NULL, &ext_size);
        if(err < 0) {
            perror("Couldn't read CL_PLATFORM_EXTENSIONS data.");
        }
        ext_data = (char *)malloc(ext_size);
        clGetPlatformInfo(platform, CL_PLATFORM_EXTENSIONS, ext_size, ext_data, NULL);
        printf("Platform %d support OpenCL extensions: %s\n", i, ext_data);
        free(ext_data)
        
        /*现在对这个platform进一步的提取信息*/
        /*获取Device信息*/
        /*clGetDeviceIDs*/
        /*
        参数1:平台句柄
        参数2:要获取设备的类型
        参数3:要获取的数量
        参数4:返回信息的数据指针
        参数5:返回信息的实际条数
        */
        for (cnt=0; cnt<2; cnt++)
        {
            if (0==cnt) err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devs);
            if (1==cnt) err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_CPU, 0, NULL, &num_devs);
            if(err == CL_DEVICE_NOT_FOUND)
            {
                if (0==cnt) printf("No GPU support OpenCL found.\n");
                if (1==cnt) printf("No CPU support OpenCL found.\n");
            }
            else if(err < 0)
            {
                if (0==cnt) printf("Couldn't access any GPU devices.\n");
                if (1==cnt) printf("Couldn't access any CPU devices.\n");
            }
            else
            {
                devs = (cl_device_id *)malloc( sizeof(cl_device_id) * num_devs );
                clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devs, devs, NULL);
                for (j=0; j<num_devs; j++)
                {
                    cl_device_id dev;
                    char dev_name_data[48];
                    cl_uint addr_data;
                    cl_ulong global_mem_size;
                    
                    /*name*/
                    err = clGetDeviceInfo(dev, CL_DEVICE_NAME, 48 * sizeof(char), dev_name_data, NULL);
                    if(err < 0) {
                        perror("Couldn't read dev name data");
                        exit(1);
                    }
                    printf("Dev %d: NAME: %s\n", j, name_data);
                                    
                    /*address size*/
                    err = clGetDeviceInfo(dev, CL_DEVICE_ADDRESS_BITS, sizeof(addr_data), &addr_data, NULL);
                    printf("Dev %d: ADDRESS_WIDTH: %u\n", j, addr_data);
                    
                    /*device extensions*/
                    ext_data=malloc(4096)
                    clGetDeviceInfo(dev, CL_DEVICE_EXTENSIONS, 4096 * sizeof(char), ext_data, NULL);
                    printf("Dev %d: EXTENSIONS: %s\n", j, ext_data);
                    free(ext_data)
                    
                    err = clGetDeviceInfo(dev, CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(global_mem_size), &global_mem_size, NULL);
                    printf("Dev %d: GLOBAL_MEM_SIZE: %u\n", j, global_mem_size);
                }
            }

        }
    }

    return 0;
}
示例#5
0
QT_BEGIN_NAMESPACE

/*!
    \class QCLPlatform
    \brief The QCLPlatform class represents an OpenCL platform definition.
    \since 4.7
    \ingroup opencl

    An OpenCL platform consists of the host CPU plus one or more
    devices, and manages memory resources and executable kernels.

    The platforms() function can be used to obtain the list of
    OpenCL platforms that are accessible to the host.  For each
    platform, QCLDevice::devices() can be used to enumerate the
    devices that are managed by the platform.

    QCLPlatform functions can be used to query information about
    the platform:

    \list
    \o profile() - describes the level of OpenCL support that
       is available; either \c{FULL_PROFILE} or \c{EMBEDDED_PROFILE}.
       The isFullProfile() and isEmbeddedProfile() convenience
       functions can be used to check for specific profile strings.
    \o version() - version of OpenCL supported by the platform;
       usually something like \c{OpenCL 1.0}.
    \o versionFlags() - flag bits indicating which versions of
       OpenCL are supported by this platform, in an easier to
       use form than the string from version().
    \o name() - name of the platform.
    \o vendor() - name of the vendor that created the platform.
    \o extensionSuffix() - the vendor extension suffix if the \c{cl_khr_icd}
       extension is supported; an empty string otherwise.
    \o extensions() - list of OpenCL extensions that are supported
       by the platform.  The hasExtension() function can be used
       to check for a specific extension.
    \endlist

    The \l{Querying OpenCL Device Capabilities}{clinfo} utility
    program can be used to dump all of the platforms that are
    supported by the system's OpenCL implementation.

    \sa QCLDevice
*/

/*!
    \fn QCLPlatform::QCLPlatform()

    Constructs a default OpenCL platform identifier.
*/

/*!
    \fn QCLPlatform::QCLPlatform(cl_platform_id id)

    Constructs an OpenCL platform identifier that corresponds to the
    native OpenCL value \a id.
*/

/*!
    \fn bool QCLPlatform::isNull() const

    Returns true if this OpenCL platform identifier is null.
*/

static QString qt_cl_platform_string(cl_platform_id id, cl_platform_info name)
{
    size_t size;
    if (!id || clGetPlatformInfo(id, name, 0, 0, &size) != CL_SUCCESS)
        return QString();
    QVarLengthArray<char> buf(size);
    clGetPlatformInfo(id, name, size, buf.data(), &size);
    return QString::fromLatin1(buf.data());
}
示例#6
0
文件: xcl.c 项目: shvo/Rodinia-FPGA
xcl_world xcl_world_single(cl_device_type device_type, char *target_vendor, char *target_device) {
	int err;
	xcl_world world;
	cl_uint num_platforms;

	err = clGetPlatformIDs(0, NULL, &num_platforms);
	if (err != CL_SUCCESS) {
		printf("Error: no platforms available or OpenCL install broken");
		printf("Test failed\n");
		exit(EXIT_FAILURE);
	}

	cl_platform_id *platform_ids = (cl_platform_id *) malloc(sizeof(cl_platform_id) * num_platforms);

	if (platform_ids == NULL) {
		printf("Error: Out of Memory\n");
		printf("Test failed\n");
		exit(EXIT_FAILURE);
	}

	err = clGetPlatformIDs(num_platforms, platform_ids, NULL);
	if (err != CL_SUCCESS) {
		printf("Error: Failed to find an OpenCL platform!\n");
		printf("Test failed\n");
		exit(EXIT_FAILURE);
	}

	int i;
        char cl_platform_vendor[1001];
        //find target vendor if target_vendor is specified
        if (target_vendor != NULL) {
                for(i = 0; i < num_platforms; i++) {
                        err = clGetPlatformInfo(platform_ids[i], CL_PLATFORM_VENDOR, 1000, (void *)cl_platform_vendor,NULL);
                        if (err != CL_SUCCESS) {
                                printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n");
                                printf("Test failed\n");
                                exit(EXIT_FAILURE);
                        }
                        if ((target_vendor != NULL) && (strcmp(cl_platform_vendor, target_vendor) == 0)) {
                                printf("INFO: Selected platform %d from %s\n", i, cl_platform_vendor);
                                world.platform_id = platform_ids[i];
                                break;
                        }
                }
        } else {
                for(i = 0; i < num_platforms; i++) {
                        err = clGetDeviceIDs(platform_ids[i], device_type,
                                             1, &world.device_id, NULL);
                        if (err == CL_SUCCESS) {
                                world.platform_id = platform_ids[i];
                                break;
                        }
                }            
        }
	free(platform_ids);
	if (i == num_platforms) {
		printf("Error: Failed to find a platform\n");
		printf("Test failed\n");
		exit(EXIT_FAILURE);
	}

        if (target_device != NULL) {
                //find target device
                cl_device_id devices[16];  // compute device id 
                cl_uint num_devices;
                char cl_device_name[100];
                err = clGetDeviceIDs(world.platform_id, CL_DEVICE_TYPE_ACCELERATOR,
                                     16, devices, &num_devices);
                if (err != CL_SUCCESS) {
                        printf("Error: Failed to create a device group!\n");
                        printf("Test failed\n");
                        exit(EXIT_FAILURE);
                }

                //iterate all devices to select the target device. 
                for (i=0; i<num_devices; i++) {
                        err = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 100, cl_device_name, 0);
                        if (err != CL_SUCCESS) {
                                printf("Error: Failed to get device name for device %d!\n", i);
                                printf("Test failed\n");
                                exit(EXIT_FAILURE);
                        }
                        //printf("CL_DEVICE_NAME %s\n", cl_device_name);
                        if (strcmp(cl_device_name, target_device) == 0) {
                                world.device_id = devices[i];
                                printf("INFO: Selected %s as the target device\n", cl_device_name);
                                break;
                        }
                }

                if (i == num_devices) {
                        printf("Error: Failed to find target device %s\n", target_device);
                        printf("Test failed\n");
                        exit(EXIT_FAILURE);
                }
        }

	world.context = clCreateContext(0, 1, &world.device_id,
	                                NULL, NULL, &err);
	if (err != CL_SUCCESS) {
		printf("Error: Failed to create a compute context!\n");
		printf("Test failed\n");
		exit(EXIT_FAILURE);
	}

	world.command_queue = clCreateCommandQueue(world.context,
	                                           world.device_id,
	                                           CL_QUEUE_PROFILING_ENABLE,
	                                           &err);
	if (err != CL_SUCCESS) {
		printf("Error: Failed to create a command queue!\n");
		printf("Test failed\n");
		exit(EXIT_FAILURE);
	}

	return world;
}
/*
 * @brief sets up the OpenCL framework by detecting and initializing the available device
 * @param use_gpu flag denoting if the gpu is the desired platform
 */
static int initialize(int use_gpu) {
    cl_int result;
    size_t size;

    // create OpenCL context
    // you have to specify what platform you want to use
    // not uncommon for both NVIDIA and AMD to be installed
    cl_platform_id platform_id[2];

    cl_uint num_avail;
    cl_int err = clGetPlatformIDs(2, platform_id, &num_avail);
    if (err != CL_SUCCESS) {
        if (err == CL_INVALID_VALUE)printf("clGetPlatformIDs() returned invalid_value\n");
        printf("ERROR: clGetPlatformIDs(1,*,0) failed\n");
        return -1;
    }
    printf("number of available platforms:%d.\n",num_avail);
    char info[100];
    clGetPlatformInfo(platform_id[0], CL_PLATFORM_VENDOR, 100, info, NULL);
    printf("clGetPlatformInfo: %s\n", info);

    cl_context_properties ctxprop[] = {CL_CONTEXT_PLATFORM, (cl_context_properties) platform_id[0], 0};
    device_type = use_gpu ? CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU;
    context = clCreateContextFromType(ctxprop, device_type, NULL, NULL, &err);

    if (!context) {
        if (CL_INVALID_PLATFORM == err)
            printf("CL_INVALID_PLATFORM returned by clCreateContextFromType()\n");
        else if (CL_INVALID_VALUE == err)
            printf("CL_INVALID_VALUE returned by clCreateContextFromType()\n");
        else if (CL_INVALID_DEVICE_TYPE == err)
            printf("CL_INVALID_DEVICE_TYPE returned by clCreateContextFromType()\n");
        else if (CL_INVALID_OPERATION == err)
            printf("CL_INVALID_OPERATION returned by clCreateContextFromType()\n");
        else if (CL_DEVICE_NOT_AVAILABLE == err)
            printf("CL_DEVICE_NOT_AVAILABLE returned by clCreateContextFromType()\n");
        else if (CL_DEVICE_NOT_FOUND == err)
            printf("CL_DEVICE_NOT_FOUND returned by clCreateContextFromType()\n");
        else if (CL_OUT_OF_RESOURCES == err)
            printf("CL_OUT_OF_RESOURCES returned by clCreateContextFromType()\n");


        printf("ERROR: clCreateContextFromType(%s) failed\n", use_gpu ? "GPU" : "CPU");
        return -1;
    }

    // get the list of GPUs
    result = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
    num_devices = (int) (size / sizeof (cl_device_id));

    if (result != CL_SUCCESS || num_devices < 1) {
        printf("ERROR: clGetContextInfo() failed\n");
        return -1;
    }
    device_list = new cl_device_id[num_devices];
    if (!device_list) {
        printf("ERROR: new cl_device_id[] failed\n");
        return -1;
    }
    result = clGetContextInfo(context, CL_CONTEXT_DEVICES, size, device_list, NULL);
    if (result != CL_SUCCESS) {
        printf("ERROR: clGetContextInfo() failed\n");
        return -1;
    }
    size_t max_work_item_sizes[3];
    result = clGetDeviceInfo(device_list[0], CL_DEVICE_MAX_WORK_ITEM_SIZES,
                sizeof(max_work_item_sizes), (void*)max_work_item_sizes, NULL);
    if (result != CL_SUCCESS) {
        printf("ERROR: clGetDeviceInfo() failed\n");
        return -1;
    }
  if (max_work_item_sizes[0] < threads_per_block)
    threads_per_block = max_work_item_sizes[0];

   // create command queue for the first device
    cmd_queue = clCreateCommandQueue(context, device_list[0], 0, NULL);
    if (!cmd_queue) {
        printf("ERROR: clCreateCommandQueue() failed\n");
        return -1;
    }

    return 0;
}
示例#8
0
文件: ocl.c 项目: mprymek/OpenCL
static SEXP getPlatformInfo(cl_platform_id platform_id, cl_device_info di) {
    if ((last_ocl_error = clGetPlatformInfo(platform_id, di, sizeof(infobuf), &infobuf, NULL)) != CL_SUCCESS)
	ocl_err("clGetPlatformInfo");
    return Rf_mkString(infobuf);
}
示例#9
0
static int init_cladsyn(CSOUND *csound, CLADSYN *p){

  int asize, ipsize, fpsize, err;
  cl_device_id device_ids[32], device_id;             
  cl_context context;                
  cl_command_queue commands;          
  cl_program program;                
  cl_kernel kernel1, kernel2;                 
  cl_uint num = 0, nump =  0;
  cl_platform_id platforms[16];
    uint i;

  if(p->fsig->overlap > 1024)
     return csound->InitError(csound, "overlap is too large\n");



  err = clGetDeviceIDs(NULL, CL_DEVICE_TYPE_ALL, 32, device_ids, &num);
  if (err != CL_SUCCESS){
    clGetPlatformIDs(16, platforms, &nump);
    int devs = 0;
    for(i=0; i < nump && devs < 32; i++){
     char name[128];
     clGetPlatformInfo(platforms[i], CL_PLATFORM_NAME, 128, name, NULL);
     csound->Message(csound, "available platform[%d] %s\n",i, name);
     err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_ALL, 32-devs, &device_ids[devs], &num);
    if (err != CL_SUCCESS)
     csound->InitError(csound, "failed to find an OpenCL device! %s \n", cl_error_string(err));
    }
    devs += num;
  }

  
  for(i=0; i < num; i++){
  char name[128];
  cl_device_type type;
  clGetDeviceInfo(device_ids[i], CL_DEVICE_NAME, 128, name, NULL);
  clGetDeviceInfo(device_ids[i], CL_DEVICE_TYPE, sizeof(cl_device_type), &type, NULL);
  if(type & CL_DEVICE_TYPE_CPU)
  csound->Message(csound, "available CPU[device %d] %s\n",i, name);
  else  if(type & CL_DEVICE_TYPE_GPU)
  csound->Message(csound, "available GPU[device %d] %s\n",i, name);
  else  if(type & CL_DEVICE_TYPE_ACCELERATOR)
  csound->Message(csound, "available ACCELLERATOR[device %d] %s\n",i, name);
  else 
  csound->Message(csound, "available generic [device %d] %s\n",i, name);;
  }

  // SELECT THE GPU HERE
  if(*p->idev < num)
   device_id = device_ids[(int)*p->idev];
  else
   device_id = device_ids[num-1];

   context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
   if (!context)
     return csound->InitError(csound, "Failed to create a compute context! %s\n", 
                             cl_error_string(err));
  
    // Create a command commands
    //
    commands = clCreateCommandQueue(context, device_id, 0, &err);
    if (!commands)
       return csound->InitError(csound, "Failed to create a command commands! %s\n", 
                             cl_error_string(err));
    // Create the compute program from the source buffer
    //
    program = clCreateProgramWithSource(context, 1, (const char **) &code, NULL, &err);
    if (!program)
       return csound->InitError(csound, "Failed to create compute program! %s\n", 
                             cl_error_string(err));
  
    err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
    if (err != CL_SUCCESS)
    {
        size_t len;
        char buffer[2048];
        csound->Message(csound, "Failed to build program executable! %s\n", 
                             cl_error_string(err));
        clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
        return csound->InitError(csound, "%s\n", buffer);
    }

    kernel1 = clCreateKernel(program, "sample", &err);
    if (!kernel1 || err != CL_SUCCESS)
      return csound->InitError(csound, "Failed to create sample compute kernel! %s\n", 
                             cl_error_string(err));

   kernel2 = clCreateKernel(program, "update", &err);
    if (!kernel2 || err != CL_SUCCESS)
      return csound->InitError(csound,"Failed to create update compute kernel! %s\n", 
                             cl_error_string(err));
 
  char name[128];
  clGetDeviceInfo(device_id, CL_DEVICE_NAME, 128, name, NULL);
  csound->Message(csound, "using device: %s\n",name);

  p->bins = (p->fsig->N)/2;

  if(*p->inum > 0 && *p->inum < p->bins) p->bins = *p->inum;

  p->vsamps = p->fsig->overlap;
  p->threads = p->bins*p->vsamps;
  p->mthreads = (p->bins > p->vsamps ? p->bins : p->vsamps);

  asize =  p->vsamps*sizeof(cl_float);
  ipsize = (p->bins > p->vsamps ? p->bins : p->vsamps)*sizeof(cl_long);
  fpsize = p->fsig->N*sizeof(cl_float);

  p->out = clCreateBuffer(context,0, asize, NULL, NULL);
  p->frame =   clCreateBuffer(context, CL_MEM_READ_ONLY, fpsize, NULL, NULL);
  p->ph =  clCreateBuffer(context,0, ipsize, NULL, NULL);
  p->amps =  clCreateBuffer(context,0,(p->bins > p->vsamps ? p->bins : p->vsamps)*sizeof(cl_float), NULL, NULL);
 
  // memset needed?

  asize = p->vsamps*sizeof(float);
  if(p->out_.auxp == NULL ||
      p->out_.size < (unsigned long) asize)
    csound->AuxAlloc(csound, asize , &p->out_);

  csound->RegisterDeinitCallback(csound, p, destroy_cladsyn);
  p->count = 0;
  p->context = context;
  p->program = program;
  p->commands = commands;
  p->kernel1 = kernel1;
  p->kernel2 = kernel2;
 
  clGetKernelWorkGroupInfo(p->kernel1, 
       device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(p->wgs1), &p->wgs1, NULL);
  clGetKernelWorkGroupInfo(p->kernel2, 
       device_id, CL_KERNEL_WORK_GROUP_SIZE, sizeof(p->wgs1), &p->wgs2, NULL);
 
  p->sr = csound->GetSr(csound); 
  clSetKernelArg(p->kernel1, 0, sizeof(cl_mem), &p->out);
  clSetKernelArg(p->kernel1, 1, sizeof(cl_mem), &p->frame);
  clSetKernelArg(p->kernel1, 2, sizeof(cl_mem), &p->ph);
  clSetKernelArg(p->kernel1, 3, sizeof(cl_mem), &p->amps);
  clSetKernelArg(p->kernel1, 5, sizeof(cl_int), &p->bins);
  clSetKernelArg(p->kernel1, 6, sizeof(cl_int), &p->vsamps);
  clSetKernelArg(p->kernel1, 7, sizeof(cl_float), &p->sr);

  clSetKernelArg(p->kernel2, 0, sizeof(cl_mem), &p->out);
  clSetKernelArg(p->kernel2, 1, sizeof(cl_mem), &p->frame);
  clSetKernelArg(p->kernel2, 2, sizeof(cl_mem), &p->ph);
  clSetKernelArg(p->kernel2, 3, sizeof(cl_mem), &p->amps);
  clSetKernelArg(p->kernel2, 5, sizeof(cl_int), &p->bins);
  clSetKernelArg(p->kernel2, 6, sizeof(cl_int), &p->vsamps);
  clSetKernelArg(p->kernel2, 7, sizeof(cl_float),  &p->sr); 
  return OK;
}
int SDKSample::validatePlatfromAndDeviceOptions()
{
    cl_int status = CL_SUCCESS;
    cl_uint numPlatforms;
    cl_platform_id platform = NULL;
    status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if(status != CL_SUCCESS)
    {
        std::cout<<"Error: clGetPlatformIDs failed. Error code : ";
        std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
        return 0;
    }

    if (0 < numPlatforms) 
    {
        // Validate platformId
        if(platformId >= numPlatforms)
        {
            if(numPlatforms - 1 == 0)
                std::cout << "platformId should be 0" << std::endl;
            else
                std::cout << "platformId should be 0 to " << numPlatforms - 1 << std::endl;
            usage();
            return 0;
        }

        // Get selected platform
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        if(status != CL_SUCCESS)
        {
            std::cout<<"Error: clGetPlatformIDs failed. Error code : ";
            std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
            return 0;
        }

        // Print all platforms
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            char pbuf[100];
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(pbuf),
                                       pbuf,
                                       NULL);

            if(status != CL_SUCCESS)
            {
                std::cout<<"Error: clGetPlatformInfo failed. Error code : ";
                std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
                return 0;
            }

            std::cout << "Platform " << i << " : " << pbuf << std::endl;
        }

        // Get AMD platform
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            char pbuf[100];
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(pbuf),
                                       pbuf,
                                       NULL);

            if(status != CL_SUCCESS)
            {
                std::cout<<"Error: clGetPlatformInfo failed. Error code : ";
                std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
                return 0;
            }

            platform = platforms[i];
            if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) 
            {
                break;
            }
        }

        if(isPlatformEnabled())
            platform = platforms[platformId];

        cl_device_type dType = CL_DEVICE_TYPE_GPU;
        if(deviceType.compare("cpu") == 0)
            dType = CL_DEVICE_TYPE_CPU;
        if(deviceType.compare("gpu") == 0)
            dType = CL_DEVICE_TYPE_GPU;
        else
            dType = CL_DEVICE_TYPE_ALL;

        // Check for GPU
        if(dType == CL_DEVICE_TYPE_GPU)
        {
            cl_context_properties cps[3] = 
            {
                CL_CONTEXT_PLATFORM, 
                (cl_context_properties)platform, 
                0
            };

            cl_context context = clCreateContextFromType(cps,
                                                        dType,
                                                        NULL,
                                                        NULL,
                                                        &status);

            if(status == CL_DEVICE_NOT_FOUND)
            {
                dType = CL_DEVICE_TYPE_CPU;
                gpu = false;
            }

            clReleaseContext(context);
        }

        // Get device count
        cl_uint deviceCount = 0;
        status = clGetDeviceIDs(platform, dType, 0, NULL, &deviceCount);
        if(status != CL_SUCCESS)
        {
            std::cout<<"Error: clGetDeviceIDs failed. Error code : ";
            std::cout << streamsdk::getOpenCLErrorCodeStr(status) << std::endl;
            return 0;
        }

        // Validate deviceId
        if(deviceId >= deviceCount)
        {
            if(deviceCount - 1 == 0)
                std::cout << "deviceId should be 0" << std::endl;
            else
                std::cout << "deviceId should be 0 to " << deviceCount - 1 << std::endl;
            usage();
            return 0;
        }

        delete[] platforms;
    }
    return 1;
}
示例#11
0
void opencl_init(void) {

	// get the platform

	cl_uint num_platforms;
	clError = clGetPlatformIDs(0, NULL, &num_platforms);
	checkErr(clError, "clGetPlatformIDs( 0, NULL, &num_platforms );");

	if (num_platforms <= 0) {
		std::cout << "No platform..." << std::endl;
		exit(1);
	}

	cl_platform_id* platforms = new cl_platform_id[num_platforms];
	clError = clGetPlatformIDs(num_platforms, platforms, NULL);
	checkErr(clError, "clGetPlatformIDs( num_platforms, &platforms, NULL );");
	if (num_platforms > 1) {
		char platformName[256];
		clError = clGetPlatformInfo(platforms[0], CL_PLATFORM_VENDOR,
				sizeof(platformName), platformName, NULL);
		std::cerr << "Multiple platforms found defaulting to: " << platformName
				<< std::endl;
	}
	platform_id = platforms[0];
	if (getenv("OPENCL_PLATEFORM"))
		platform_id = platforms[1];
	delete platforms;

	// Connect to a compute device
	//
	cl_uint device_count = 0;
	clError = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL,
			&device_count);
	checkErr(clError, "Failed to create a device group");
	cl_device_id* deviceIds = (cl_device_id*) malloc(
			sizeof(cl_device_id) * device_count);
	clError = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, device_count,
			deviceIds, NULL);
	if (device_count > 1) {
		char device_name[256];
		int compute_units;
		clError = clGetDeviceInfo(deviceIds[0], CL_DEVICE_NAME,
				sizeof(device_name), device_name, NULL);
		checkErr(clError, "clGetDeviceInfo failed");
		clError = clGetDeviceInfo(deviceIds[0], CL_DEVICE_MAX_COMPUTE_UNITS,
				sizeof(cl_uint), &compute_units, NULL);
		checkErr(clError, "clGetDeviceInfo failed");
		std::cerr << "Multiple devices found defaulting to: " << device_name;
		std::cerr << " with " << compute_units << " compute units" << std::endl;
	}
	device_id = deviceIds[0];
	delete deviceIds;
	// Create a compute context 
	//
	context = clCreateContext(0, 1, &device_id, NULL, NULL, &clError);
	checkErr(clError, "Failed to create a compute context!");

	// Create a command commands
	//
	commandQueue = clCreateCommandQueue(context, device_id, 0, &clError);
	checkErr(clError, "Failed to create a command commands!");

	// READ KERNEL FILENAME
	std::string filename = "NOTDEFINED.cl";
	char const* tmp_name = getenv("OPENCL_KERNEL");
	if (tmp_name) {
		filename = std::string(tmp_name);
	} else {
		filename = std::string(__FILE__);
		filename = filename.substr(0, filename.length() - 17);
		filename += "/kernels.cl";

	}

	// READ OPENCL_PARAMETERS
	std::string compile_parameters = "";
	char const* tmp_params = getenv("OPENCL_PARAMETERS");
	if (tmp_params) {
		compile_parameters = std::string(tmp_params);
	}

	std::ifstream kernelFile(filename.c_str(), std::ios::in);

	if (!kernelFile.is_open()) {
		std::cout << "Unable to open " << filename << ". " << __FILE__ << ":"
				<< __LINE__ << "Please set OPENCL_KERNEL" << std::endl;
		exit(1);
	}

	/*
	 * Read the kernel file into an output stream.
	 * Convert this into a char array for passing to OpenCL.
	 */
	std::ostringstream outputStringStream;
	outputStringStream << kernelFile.rdbuf();
	std::string srcStdStr = outputStringStream.str();
	const char* charSource = srcStdStr.c_str();

	kernelFile.close();
	// Create the compute program from the source buffer
	//
	program = clCreateProgramWithSource(context, 1, (const char **) &charSource,
			NULL, &clError);
	if (!program) {
		printf("Error: Failed to create compute program!\n");
		exit(1);
	}

	// Build the program executable
	//
	clError = clBuildProgram(program, 0, NULL, compile_parameters.c_str(), NULL,
			NULL);

	/* Get the size of the build log. */
	size_t logSize = 0;
	clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL,
			&logSize);

	if (clError != CL_SUCCESS) {
		if (logSize > 1) {
			char* log = new char[logSize];
			clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG,
					logSize, log, NULL);

			std::string stringChars(log, logSize);
			std::cerr << "Build log:\n " << stringChars << std::endl;

			delete[] log;
		}
		printf("Error: Failed to build program executable!\n");
		exit(1);
	}

	return;

}
示例#12
0
int main(int argc, char **argv)
{

	cl_int ret;


	/*
	 * Command line
	 */
	char *binary_path;
	if (argc != 2)
	{
		printf("syntax: %s <binary>\n", argv[0]);
		exit(1);
	}
	binary_path = argv[1];


	/*
	 * Platform
	 */

	/* Get platform */
	cl_platform_id platform;
	cl_uint num_platforms;
	ret = clGetPlatformIDs(1, &platform, &num_platforms);
	if (ret != CL_SUCCESS)
	{
		printf("error: second call to 'clGetPlatformIDs' failed\n");
		exit(1);
	}
	printf("Number of platforms: %d\n", num_platforms);

	/* Get platform name */
	char platform_name[100];
	ret = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clGetPlatformInfo' failed\n");
		exit(1);
	}
	printf("platform.name='%s'\n", platform_name);
	printf("\n");



	/*
	 * Device
	 */

	/* Get device */
	cl_device_id device;
	cl_uint num_devices;
	ret = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &num_devices);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clGetDeviceIDs' failed\n");
		exit(1);
	}
	printf("Number of devices: %d\n", num_devices);

	/* Get device name */
	char device_name[100];
	ret = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name), device_name, NULL);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clGetDeviceInfo' failed\n");
		exit(1);
	}
	printf("device.name='%s'\n", device_name);
	printf("\n");



	/*
	 * Context
	 */
	
	/* Create context */
	cl_context context;
	context = clCreateContext(NULL, 1, &device, NULL, NULL, &ret);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clCreateContext' failed\n");
		exit(1);
	}

	

	/*
	 * Command Queue
	 */
	
	/* Create command queue */
	cl_command_queue command_queue;
	command_queue = clCreateCommandQueue(context, device, 0, &ret);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clCreateCommandQueue' failed\n");
		exit(1);
	}
	printf("\n");



	/*
	 * Program
	 */
	
	/* Program binary */
	const unsigned char *binary;
	size_t binary_length;

	/* Read binary */
	binary = read_buffer(binary_path, &binary_length);
	if (!binary)
	{
		printf("error: %s: cannot open binary\n", binary_path);
		exit(1);
	}
	
	/* Create a program */
	cl_program program;
	program = clCreateProgramWithBinary(context, 1, &device, &binary_length,
			&binary, NULL, &ret);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clCreateProgramWithSource' failed\n");
		exit(1);
	}

	/* Build program */
	ret = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
	if (ret != CL_SUCCESS )
	{
		size_t size;
		char *log;

		/* Get log size */
		clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, 0, NULL, &size);

		/* Allocate log and print */
		log = malloc(size);
		clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, size, log, NULL);
		printf("error: call to 'clBuildProgram' failed:\n%s\n", log);

		/* Free log and exit */
		free(log);
		exit(1);
	}
	printf("program built\n");
	printf("\n");



	/*
	 * Kernel
	 */
	
	/* Create kernel */
	cl_kernel kernel;
	kernel = clCreateKernel(program, "vector_add", &ret);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clCreateKernel' failed\n");
		exit(1);
	}
	printf("\n");


	/*
	 * Buffers
	 */
	
	/* Create and allocate host buffers */
	size_t num_elem = 10;

	cl_int *src1_host_buffer;
	cl_int *src2_host_buffer;
	cl_int *dst_host_buffer;
	src1_host_buffer = malloc(num_elem * sizeof(cl_int));
	src2_host_buffer = malloc(num_elem * sizeof(cl_int));
	dst_host_buffer = malloc(num_elem * sizeof(cl_int));

	/* Initialize host source buffer */
	int i;
	for (i = 0; i < num_elem; i++)
	{
		src1_host_buffer[i] = i;
		src2_host_buffer[i] = 100;
	}
	
	/* Create device source buffers */
	cl_mem src1_device_buffer;
	cl_mem src2_device_buffer;
	src1_device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, num_elem * sizeof(cl_int), NULL, NULL);
	src2_device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, num_elem * sizeof(cl_int), NULL, NULL);
	if (!src1_device_buffer || !src2_device_buffer)
	{
		printf("error: could not create destination buffer\n");
		exit(1);
	}

	/* Create device destination buffer */
	cl_mem dst_device_buffer;
	dst_device_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, num_elem * sizeof(cl_int), NULL, &ret);
	if (ret != CL_SUCCESS)
	{
		printf("error: could not create destination buffer\n");
		exit(1);
	}

	/* Copy buffer */
	ret = clEnqueueWriteBuffer(command_queue, src1_device_buffer, CL_TRUE,
		0, num_elem * sizeof(cl_int), src1_host_buffer, 0, NULL, NULL);
	ret |= clEnqueueWriteBuffer(command_queue, src2_device_buffer, CL_TRUE,
		0, num_elem * sizeof(cl_int), src2_host_buffer, 0, NULL, NULL);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clEnqueueWriteBuffer' failed\n");
		exit(1);
	}


	/*
	 * Kernel arguments
	 */
	
	ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), &src1_device_buffer);
	ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &src2_device_buffer);
	ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &dst_device_buffer);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clSetKernelArg' failed\n");
		exit(1);
	}
	
	
	/*
	 * Launch Kernel
	 */
	
	size_t global_work_size = num_elem;
	size_t local_work_size = num_elem;

	/* Launch the kernel */
	ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL,
		&global_work_size, &local_work_size, 0, NULL, NULL);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clEnqueueNDRangeKernel' failed\n");
		exit(1);
	}

	/* Wait for it to finish */
	clFinish(command_queue);


	/*
	 * Result
	 */
	
	/* Receive buffer */
	ret = clEnqueueReadBuffer(command_queue, dst_device_buffer, CL_TRUE,
		0, num_elem * sizeof(cl_int), dst_host_buffer, 0, NULL, NULL);
	if (ret != CL_SUCCESS)
	{
		printf("error: call to 'clEnqueueReadBuffer' failed\n");
		exit(1);
	}

	/* Print result */
	for (i = 0; i < num_elem; i++)
		printf("dst_host_buffer[%d] = %d\n", i, dst_host_buffer[i]);
	printf("\n");

	return 0;
}
示例#13
0
// SETUP
int CLContext::setupCL()
{
	cl_int status = CL_SUCCESS;

	cl_device_type dType;
	int gpu = 1;

	if(gpu == 0)
		dType = CL_DEVICE_TYPE_CPU;
	else //deviceType = "gpu" 
		dType = CL_DEVICE_TYPE_GPU;

	/*
	* Have a look at the available platforms and pick either
	* the AMD one if available or a reasonable default.      <----- LOL check out the amd propaganda
	*/

	cl_uint numPlatforms;
	cl_platform_id platform = NULL;
	status = clGetPlatformIDs(0, NULL, &numPlatforms);
	if(!checkVal(status, CL_SUCCESS, "clGetPlatformIDs failed."))
		return CL_FAILURE;
	if (0 < numPlatforms) 
	{
		cl_platform_id* platforms = new cl_platform_id[numPlatforms];
		status = clGetPlatformIDs(numPlatforms, platforms, NULL);
		if(!checkVal(status, CL_SUCCESS, "clGetPlatformIDs failed."))
			return CL_FAILURE;
		for (unsigned i = 0; i < numPlatforms; ++i) 
		{
			char pbuf[100];
			status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuf), pbuf, NULL);

			if(!checkVal(status, CL_SUCCESS, "clGetPlatformInfo failed."))
				return CL_FAILURE;

			platform = platforms[i];
			if (!strcmp(pbuf, "Advanced Micro Devices, Inc.")) 
				break;
		}
		delete[] platforms;
	}

	/*
	* If we could find our platform, use it. Otherwise pass a NULL and get whatever the
	* implementation thinks we should be using.
	*/

	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM,  (cl_context_properties)platform,  0 };
	/* Use NULL for backward compatibility */
	cl_context_properties* cprops = (NULL == platform) ? NULL : cps;

	context = clCreateContextFromType( cprops, dType, NULL, NULL, &status);
	if(!checkVal( status, CL_SUCCESS, "clCreateContextFromType failed."))
		return CL_FAILURE;

	size_t deviceListSize;

	/* First, get the size of device list data */
	status = clGetContextInfo( context,  CL_CONTEXT_DEVICES,  0,  NULL,  &deviceListSize);
	if(!checkVal( status,  CL_SUCCESS, "clGetContextInfo failed."))
		return CL_FAILURE;

	/* Now allocate memory for device list based on the size we got earlier */
	devices = (cl_device_id*)malloc(deviceListSize);
	if(devices==NULL)
	{
		cout << "Failed to allocate memory (devices)." << endl;
		return CL_FAILURE;
	}

	/* Now, get the device list data */
	status = clGetContextInfo( context,  CL_CONTEXT_DEVICES,  deviceListSize,  devices,  NULL);
	if(!checkVal( status, CL_SUCCESS,  "clGetContextInfo failed."))
		return CL_FAILURE;

	/* Create command queue */
	commandQueue = clCreateCommandQueue( context, devices[0], 0, &status);
	if(!checkVal( status, CL_SUCCESS, "clCreateCommandQueue failed."))
		return CL_FAILURE;

	/* Get Device specific Information */
	status = clGetDeviceInfo( devices[0], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void*)&maxWorkGroupSize, NULL);

	if(!checkVal( status, CL_SUCCESS,  "clGetDeviceInfo CL_DEVICE_MAX_WORK_GROUP_SIZE failed."))
		return CL_FAILURE;


	status = clGetDeviceInfo( devices[0], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), (void*)&maxDimensions, NULL);
	if(!checkVal( status, CL_SUCCESS,  "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed."))
		return CL_FAILURE;


	maxWorkItemSizes = (size_t *)malloc(maxDimensions * sizeof(unsigned int));

	status = clGetDeviceInfo( devices[0], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(size_t) * maxDimensions, (void*)maxWorkItemSizes, NULL);
	if(!checkVal( status, CL_SUCCESS,  "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed."))
		return CL_FAILURE;

	status = clGetDeviceInfo( devices[0], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(cl_ulong), (void *)&totalLocalMemory, NULL);
	if(!checkVal( status, CL_SUCCESS,  "clGetDeviceInfo CL_DEVICE_LOCAL_MEM_SIZE failed."))
		return CL_FAILURE;

	/*
	* Create and initialize memory objects
	*/

	/* create a CL program using the kernel source */
	string content;
	fileH.open( "critterding.cl", content ); 

	const char * source = content.c_str();
	size_t sourceSize[] = { strlen(source) };
	program = clCreateProgramWithSource( context, 1, &source, sourceSize, &status);
	if(!checkVal( status, CL_SUCCESS, "clCreateProgramWithSource failed."))
		return CL_FAILURE;

	/* create a cl program executable for all the devices specified */
	status = clBuildProgram( program, 1, &devices[0], NULL, NULL, NULL);
	if(status != CL_SUCCESS)
	{
		if(status == CL_BUILD_PROGRAM_FAILURE)
		{
			cl_int logStatus;
			char * buildLog = NULL;
			size_t buildLogSize = 0;
			logStatus = clGetProgramBuildInfo (program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, &buildLogSize);
			if(!checkVal( logStatus, CL_SUCCESS, "clGetProgramBuildInfo failed."))
				return CL_FAILURE;

			buildLog = (char*)malloc(buildLogSize);
			if(buildLog == NULL)
			{
				cout << "Failed to allocate host memory. (buildLog)" << endl;
				return CL_FAILURE;
			}
			memset(buildLog, 0, buildLogSize);

			logStatus = clGetProgramBuildInfo (program, devices[0], CL_PROGRAM_BUILD_LOG, buildLogSize, buildLog, NULL);
			if(!checkVal( logStatus, CL_SUCCESS, "clGetProgramBuildInfo failed."))
			{
				free(buildLog);
				return CL_FAILURE;
			}

			std::cout << " \n\t\t\tBUILD LOG\n";
			std::cout << " ************************************************\n";
			std::cout << buildLog << std::endl;
			std::cout << " ************************************************\n";
			free(buildLog);
		}

		if(!checkVal( status, CL_SUCCESS, "clBuildProgram failed."))
			return CL_FAILURE;
	}

	return CL_SUCCESS;
}
示例#14
0
	int XdevLComputeDeviceCL::init() {
		cl_int ret;

		cl_uint numPlatforms;
		ret = clGetPlatformIDs(1, &m_platformID, &numPlatforms);
		if(CL_SUCCESS != ret) {
			XDEVL_MODULEX_ERROR(XdevLComputeDeviceCL, "clGetPlatformIDs failed: " << clErrorAsString(ret) << std::endl);
			return -1;
		}

		std::vector<cl_platform_id> platformIDs(numPlatforms);

		ret = clGetPlatformIDs(1, platformIDs.data(), nullptr);

		for(auto platform : platformIDs) {
			XdevLPlatformInfo info;
			info.id = platform;

			std::size_t size;
			ret = clGetPlatformInfo(platform, CL_PLATFORM_PROFILE, sizeof(info.profile), (void*)info.profile, &size);
			ret = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(info.version), (void*)info.version, &size);
			ret = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(info.name), (void*)info.name, &size);
			ret = clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(info.vendor), (void*)info.vendor, &size);

			if(CL_SUCCESS != ret) {
				XDEVL_MODULEX_ERROR(XdevLComputeDeviceCL, "clGetPlatformInfo failed: " << clErrorAsString(ret) << std::endl);
				return -1;
			}

			std::cout << "Platform Name       : " << info.name << std::endl;
			std::cout << "Platform Profile    : " << info.profile << std::endl;
			std::cout << "Platform Version    : " << info.version << std::endl;
			std::cout << "Platform Vendor     : " << info.vendor << std::endl;


			m_platforms.push_back(std::move(info));
		}

		for(auto platform : m_platforms) {
			cl_uint numDevices;
			ret = clGetDeviceIDs(platform.id, CL_DEVICE_TYPE_ALL, 1, &m_deviceID, &numDevices);
			if(CL_SUCCESS != ret) {
				XDEVL_MODULEX_ERROR(XdevLComputeDeviceCL, "clGetDeviceIDs failed: " << clErrorAsString(ret) << std::endl);
				return -1;
			}
			std::cout << "OpenCL Number of devices: " << numDevices << ", ID: " << m_deviceID << std::endl;

			XdevLDeviceInfo device;
			device.id = m_deviceID;

			std::size_t size;
			ret = clGetDeviceInfo(m_deviceID, CL_DEVICE_NAME, sizeof(device.name), (void*)device.name, &size);
			ret = clGetDeviceInfo(m_deviceID, CL_DEVICE_VENDOR, sizeof(device.vendor), (void*)device.vendor, &size);
			ret = clGetDeviceInfo(m_deviceID, CL_DRIVER_VERSION, sizeof(device.version), (void*)device.version, &size);
			ret = clGetDeviceInfo(m_deviceID, CL_DEVICE_PROFILE, sizeof(device.profile), (void*)device.profile, &size);

			if(CL_SUCCESS != ret) {
				return -1;
			}

			std::cout << "Device Profile    : " << device.profile << std::endl;
			std::cout << "Device Name       : " << device.name << std::endl;
			std::cout << "Device Version    : " << device.version << std::endl;
			std::cout << "Device Vendor     : " << device.vendor << std::endl;


			m_devices.push_back(std::move(device));
		}
		return 0;
	}
示例#15
0
文件: gpuinfo.c 项目: git2u/gpuinfo
static void dump_platform(int index, cl_platform_id platform_id)
{
	static struct {
		cl_platform_info info;
		size_t		size;
		void	   *addr;
	} catalog[] = {
		PLATFORM_ATTR(CL_PLATFORM_PROFILE, profile),
		PLATFORM_ATTR(CL_PLATFORM_VERSION, version),
        PLATFORM_ATTR(CL_PLATFORM_NAME, name),
        PLATFORM_ATTR(CL_PLATFORM_VENDOR, vendor),
        PLATFORM_ATTR(CL_PLATFORM_EXTENSIONS, extensions),
	};
	cl_device_id	device_ids[256];
	cl_uint			device_num;
	cl_int			i, rc;

	for (i=0; i < lengthof(catalog); i++)
	{
		rc = clGetPlatformInfo(platform_id,
							   catalog[i].info,
							   catalog[i].size,
							   catalog[i].addr,
							   NULL);
		if (rc != CL_SUCCESS)
		{
			fprintf(stderr, "failed on clGetPlatformInfo (%s)\n",
					opencl_strerror(rc));
			exit(1);
		}
	}

	rc = clGetDeviceIDs(platform_id,
						CL_DEVICE_TYPE_ALL,
						lengthof(device_ids),
						device_ids,
						&device_num);
	if (rc != CL_SUCCESS)
	{
		fprintf(stderr, "failed on clGetDeviceIDs (%s)\n",
				opencl_strerror(rc));
		exit(1);
	}

	if (only_list)
		printf("Platform-%02d: %s / %s - %s\n", index + 1,
			   platform_info.vendor,
			   platform_info.name,
			   platform_info.version);
	else
	{
		printf("platform-index:      %d\n", index + 1);
		printf("platform-vendor:     %s\n", platform_info.vendor);
		printf("platform-name:       %s\n", platform_info.name);
		printf("platform-version:    %s\n", platform_info.version);
		printf("platform-profile:    %s\n", platform_info.profile);
		printf("platform-extensions: %s\n", platform_info.extensions);
	}

	for (i=0; i < device_num; i++)
	{
		if (only_device < 0 || i + 1 == only_device)
			dump_device(i, device_ids[i]);
	}
	putchar('\n');
}
示例#16
0
int main(int argc, const char** argv) {
  // start logs
  printf("clDeviceQuery Starting...\n\n");
  bool bPassed = true;
  std::string sProfileString = "clDeviceQuery, Platform Name = ";

  // Get OpenCL platform ID for NVIDIA if avaiable, otherwise default
  char cBuffer[1024];
  cl_platform_id clSelectedPlatformID = NULL;
  cl_platform_id* clPlatformIDs;

  cl_uint num_platforms;
  cl_int ciErrNum = clGetPlatformIDs(0, NULL, &num_platforms);
  if (ciErrNum != CL_SUCCESS) {
    printf(" Error %i in clGetPlatformIDs Call!\n\n", ciErrNum);
    bPassed = false;
  } else {
    if (num_platforms == 0) {
      printf("No OpenCL platform found!\n\n");
      bPassed = false;
    } else {
      // if there's one platform or more, make space for ID's
      if ((clPlatformIDs = (cl_platform_id*)malloc(num_platforms * sizeof(cl_platform_id))) == NULL) {
	printf("Failed to allocate memory for cl_platform ID's!\n\n");
	bPassed = false;
      }

      printf("%d OpenCL Platforms found\n\n", num_platforms);
      // get platform info for each platform
      ciErrNum = clGetPlatformIDs (num_platforms, clPlatformIDs, NULL);
      for(cl_uint i = 0; i < num_platforms; ++i) {
	ciErrNum = clGetPlatformInfo (clPlatformIDs[i], CL_PLATFORM_NAME, 1024, &cBuffer, NULL);
	if(ciErrNum == CL_SUCCESS) {
	  clSelectedPlatformID = clPlatformIDs[i];
	  // Get OpenCL platform name and version
	  ciErrNum = clGetPlatformInfo (clSelectedPlatformID, CL_PLATFORM_NAME, sizeof(cBuffer), cBuffer, NULL);
	  if (ciErrNum == CL_SUCCESS) {
	    printf(" CL_PLATFORM_NAME: \t%s\n", cBuffer);
	    sProfileString += cBuffer;
	  } else {
	    printf(" Error %i in clGetPlatformInfo Call !!!\n\n", ciErrNum);
    bPassed = false;
  }
  sProfileString += ", Platform Version = ";

  ciErrNum = clGetPlatformInfo (clSelectedPlatformID, CL_PLATFORM_VERSION, sizeof(cBuffer), cBuffer, NULL);
  if (ciErrNum == CL_SUCCESS) {
    printf(" CL_PLATFORM_VERSION: \t%s\n", cBuffer);
    sProfileString += cBuffer;
  } else {
    printf(" Error %i in clGetPlatformInfo Call !!!\n\n", ciErrNum);
    bPassed = false;
  }

  // Log OpenCL SDK Version # (for convenience:  not specific to OpenCL)
  sProfileString += ", NumDevs = ";

  // Get and log OpenCL device info
  cl_uint ciDeviceCount;
  cl_device_id *devices;
  printf("OpenCL Device Info:\n\n");
  ciErrNum = clGetDeviceIDs (clSelectedPlatformID, CL_DEVICE_TYPE_ALL, 0, NULL, &ciDeviceCount);

  // check for 0 devices found or errors...
  if (ciDeviceCount == 0) {
    printf(" No devices found supporting OpenCL (return code %i)\n\n", ciErrNum);
    bPassed = false;
    sProfileString += "0";
  } else if (ciErrNum != CL_SUCCESS) {
    printf(" Error %i in clGetDeviceIDs call !!!\n\n", ciErrNum);
    bPassed = false;
  } else {
    // Get and log the OpenCL device ID's
    ciErrNum = clGetPlatformInfo (clSelectedPlatformID, CL_PLATFORM_NAME, sizeof(cBuffer), cBuffer, NULL);
    printf(" %u devices found supporting OpenCL on: %s\n\n", ciDeviceCount, cBuffer);
    char cTemp[2];
    sprintf(cTemp, "%u", ciDeviceCount);
    sProfileString += cTemp;
    if ((devices = (cl_device_id*)malloc(sizeof(cl_device_id) * ciDeviceCount)) == NULL) {
      printf(" Failed to allocate memory for devices !!!\n\n");
      bPassed = false;
    }
    ciErrNum = clGetDeviceIDs (clSelectedPlatformID, CL_DEVICE_TYPE_ALL, ciDeviceCount, devices, &ciDeviceCount);
    if (ciErrNum == CL_SUCCESS) {
      for(unsigned int i = 0; i < ciDeviceCount; ++i )  {
        printf(" ----------------------------------\n");
clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(cBuffer), &cBuffer, NULL);
printf(" Device %s\n", cBuffer);
printf(" ---------------------------------\n");
clPrintDevInfo(devices[i]);
sProfileString += ", Device = ";
sProfileString += cBuffer;
      }
            } else {
      printf(" Error %i in clGetDeviceIDs call !!!\n\n", ciErrNum);
      bPassed = false;
    }
  }

  // masterlog info
  sProfileString += "\n";
  printf("%s", sProfileString.c_str());
}
free(clPlatformIDs);
      }
    }
  }

  // Log system info(for convenience:  not specific to OpenCL)
  printf( "\nSystem Info: \n\n");
  char timestr[255];
  time_t now = time(NULL);
  struct tm  *ts;

  ts = localtime(&now);

  strftime(timestr, 255, " %H:%M:%S, %m/%d/%Y",ts);

  // write time and date to logs
  printf(" Local Time/Date = %s\n", timestr);
  // write proc and OS info to logs
  // parse /proc/cpuinfo
  std::ifstream cpuinfo( "/proc/cpuinfo" ); // open the file in /proc
  std::string tmp;

  int cpu_num = 0;
  std::string cpu_name = "none";
  do {
    cpuinfo >> tmp;

    if( tmp == "processor" )
      cpu_num++;

    if( tmp == "name" ) {
      cpuinfo >> tmp; // skip :

      std::stringstream tmp_stream("");
      do {
	cpuinfo >> tmp;
	if (tmp != std::string("stepping")) {
	  tmp_stream << tmp.c_str() << " ";
	}

      }
      while (tmp != std::string("stepping"));

      cpu_name = tmp_stream.str();
    }
  }
  while ( (! cpuinfo.eof()) );

  // Linux version
  std::ifstream version( "/proc/version" );
  char versionstr[255];

  version.getline(versionstr, 255);

  printf(" CPU Name: %s\n # of CPU processors: %u\n %s\n\n\n",
	 cpu_name.c_str(),cpu_num,versionstr);

  // finish
  printf("TEST %s\n\n", bPassed ? "PASSED" : "FAILED !!!");
}
示例#17
0
	OpenCLDevice(DeviceInfo& info, Stats &stats, bool background_)
	  : Device(stats)
	{
		background = background_;
		cpPlatform = NULL;
		cxContext = NULL;
		cqCommandQueue = NULL;
		cpProgram = NULL;
		ckPathTraceKernel = NULL;
		ckFilmConvertKernel = NULL;
		null_mem = 0;
		device_initialized = false;

		/* setup platform */
		cl_uint num_platforms;

		ciErr = clGetPlatformIDs(0, NULL, &num_platforms);
		if(opencl_error(ciErr))
			return;

		if(num_platforms == 0) {
			opencl_error("OpenCL: no platforms found.");
			return;
		}

		ciErr = clGetPlatformIDs(1, &cpPlatform, NULL);
		if(opencl_error(ciErr))
			return;

		char name[256];
		clGetPlatformInfo(cpPlatform, CL_PLATFORM_NAME, sizeof(name), &name, NULL);
		platform_name = name;

		/* get devices */
		vector<cl_device_id> device_ids;
		cl_uint num_devices;

		if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), 0, NULL, &num_devices)))
			return;

		if(info.num > num_devices) {
			if(num_devices == 0)
				opencl_error("OpenCL: no devices found.");
			else
				opencl_error("OpenCL: specified device not found.");
			return;
		}

		device_ids.resize(num_devices);
		
		if(opencl_error(clGetDeviceIDs(cpPlatform, opencl_device_type(), num_devices, &device_ids[0], NULL)))
			return;

		cdDevice = device_ids[info.num];

		/* create context */
		cxContext = clCreateContext(0, 1, &cdDevice, NULL, NULL, &ciErr);
		if(opencl_error(ciErr))
			return;

		cqCommandQueue = clCreateCommandQueue(cxContext, cdDevice, 0, &ciErr);
		if(opencl_error(ciErr))
			return;

		null_mem = (device_ptr)clCreateBuffer(cxContext, CL_MEM_READ_ONLY, 1, NULL, &ciErr);
		device_initialized = true;
	}
示例#18
0
/*
 * This function read the OpenCL platdorm and device versions
 * (using clGetxxxInfo API) and stores it in the ocl structure.
 * Later it will enable us to support both OpenCL 1.2 and 2.0 platforms and devices
 * in the same program.
 */
int GetPlatformAndDeviceVersion (cl_platform_id platformId, ocl_args_d_t *ocl)
{
    cl_int err = CL_SUCCESS;

    // Read the platform's version string length (param_value is NULL).
    // The value returned in stringLength
    size_t stringLength = 0;
    err = clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, 0, NULL, &stringLength);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clGetPlatformInfo() to get CL_PLATFORM_VERSION length returned '%s'.\n", TranslateOpenCLError(err));
        return err;
    }

    // Now, that we know the platform's version string length, we can allocate enough space before read it
    std::vector<char> platformVersion(stringLength);

    // Read the platform's version string
    // The read value returned in platformVersion
    err = clGetPlatformInfo(platformId, CL_PLATFORM_VERSION, stringLength, &platformVersion[0], NULL);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clGetplatform_ids() to get CL_PLATFORM_VERSION returned %s.\n", TranslateOpenCLError(err));
        return err;
    }

    if (strstr(&platformVersion[0], "OpenCL 2.0") != NULL)
    {
        ocl->platformVersion = OPENCL_VERSION_2_0;
    }

    // Read the device's version string length (param_value is NULL).
    err = clGetDeviceInfo(ocl->device, CL_DEVICE_VERSION, 0, NULL, &stringLength);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clGetDeviceInfo() to get CL_DEVICE_VERSION length returned '%s'.\n", TranslateOpenCLError(err));
        return err;
    }

    // Now, that we know the device's version string length, we can allocate enough space before read it
    std::vector<char> deviceVersion(stringLength);

    // Read the device's version string
    // The read value returned in deviceVersion
    err = clGetDeviceInfo(ocl->device, CL_DEVICE_VERSION, stringLength, &deviceVersion[0], NULL);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clGetDeviceInfo() to get CL_DEVICE_VERSION returned %s.\n", TranslateOpenCLError(err));
        return err;
    }

    if (strstr(&deviceVersion[0], "OpenCL 2.0") != NULL)
    {
        ocl->deviceVersion = OPENCL_VERSION_2_0;
    }

    // Read the device's OpenCL C version string length (param_value is NULL).
    err = clGetDeviceInfo(ocl->device, CL_DEVICE_OPENCL_C_VERSION, 0, NULL, &stringLength);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clGetDeviceInfo() to get CL_DEVICE_OPENCL_C_VERSION length returned '%s'.\n", TranslateOpenCLError(err));
        return err;
    }

    // Now, that we know the device's OpenCL C version string length, we can allocate enough space before read it
    std::vector<char> compilerVersion(stringLength);

    // Read the device's OpenCL C version string
    // The read value returned in compilerVersion
    err = clGetDeviceInfo(ocl->device, CL_DEVICE_OPENCL_C_VERSION, stringLength, &compilerVersion[0], NULL);
    if (CL_SUCCESS != err)
    {
        LogError("Error: clGetDeviceInfo() to get CL_DEVICE_OPENCL_C_VERSION returned %s.\n", TranslateOpenCLError(err));
        return err;
    }

    else if (strstr(&compilerVersion[0], "OpenCL C 2.0") != NULL)
    {
        ocl->compilerVersion = OPENCL_VERSION_2_0;
    }

    return err;
}
示例#19
0
int main(int argc, char *argv[]){

	if (MODE == 5){

		printf("---OpenCL Test Code---\n\n");


		cl_int errNum;
		cl_uint numPlatforms;
		cl_platform_id *platforms = NULL;
		cl_uint numDevices;
		cl_device_id *devices = NULL;

		//platform info fields
		char vendor[1024], name[1024], version[1024];

		//device info fields
		size_t MAX_WORK_GROUP_SIZE;
		cl_ulong GLOBAL_MEM_CACHE_SIZE, GLOBAL_MEM_SIZE, LOCAL_MEM_SIZE, GLOBAL_MEM_CACHELINE_SIZE;
		cl_uint MAX_COMPUTE_UNITS, MAX_WORK_ITEM_DIMENSIONS;
		size_t MAX_WORK_ITEM_SIZES[3];
		char DEVICE_NAME[1024], DEVICE_VENDOR[1024], DEVICE_VERSION[1024], DRIVER_VERSION[1024], EXTENSIONS[2048];
		cl_device_mem_cache_type GLOBAL_MEM_CACHE_TYPE;


		//printf("Getting number of OpenCL Platforms...\n");
		errNum = clGetPlatformIDs(0, NULL, &numPlatforms);
		if (errNum != CL_SUCCESS)
		{
			printf("Failed to get number of OpenCL platforms.\n");
			return 0;
		}
		else
		{

			//printf("found %d.\n", numPlatforms);
		}

		//printf("Allocating space for the platform info...\n");
		platforms = (cl_platform_id *)malloc(numPlatforms*sizeof(cl_platform_id));

		printf("---Platform Info---\n");
		errNum = clGetPlatformIDs(numPlatforms, platforms, NULL);
		if (errNum != CL_SUCCESS)
		{
			printf("Failed to get platform info.\n");
			return 0;
		}
		else
		{
			clGetPlatformInfo (platforms[0], CL_PLATFORM_VENDOR, sizeof(vendor), vendor, NULL);
			clGetPlatformInfo (platforms[0], CL_PLATFORM_NAME, sizeof(name), name, NULL);
			clGetPlatformInfo (platforms[0], CL_PLATFORM_VERSION, sizeof(version), version, NULL);

			//printf("Got platform info.\n");
			printf("Vendor: \t%s\n", vendor);
			printf("Name:   \t%s\n", name);
			printf("Version:\t%s\n", version);
		}

		//printf("Getting number of devices...\n");
		errNum = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
		if (errNum != CL_SUCCESS)
		{
			printf("Failed to get number of devices.\n");
			return 0;
		}
		else
		{
	    	//printf("Found %d.\n", numDevices);
	    }

		//printf("Allocating space for device info...\n");
		devices = (cl_device_id*)malloc(numDevices * sizeof(cl_device_id));

		printf("\n---Device Info---");
		errNum = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);
		if (errNum != CL_SUCCESS)
		{
			printf("Failed to get device info.\n");
			return 0;
		}
		else
		{

			int i, j = 0;
			for (i = 0; i < numDevices; i++ )
			{
				printf("\nDevice ID: %d\n", i+1);
				clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(DEVICE_NAME), DEVICE_NAME, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(DEVICE_VENDOR), DEVICE_VENDOR, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(DEVICE_VERSION), DEVICE_VERSION, NULL);
				clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(DRIVER_VERSION), DRIVER_VERSION, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_EXTENSIONS, sizeof(EXTENSIONS), EXTENSIONS, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(MAX_COMPUTE_UNITS), &MAX_COMPUTE_UNITS, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(GLOBAL_MEM_SIZE), &GLOBAL_MEM_SIZE, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_LOCAL_MEM_SIZE, sizeof(LOCAL_MEM_SIZE), &LOCAL_MEM_SIZE, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(MAX_WORK_ITEM_DIMENSIONS), &MAX_WORK_ITEM_DIMENSIONS, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, sizeof(MAX_WORK_ITEM_SIZES), MAX_WORK_ITEM_SIZES, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(MAX_WORK_GROUP_SIZE), &MAX_WORK_GROUP_SIZE, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(GLOBAL_MEM_CACHE_SIZE), &GLOBAL_MEM_CACHE_SIZE, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, sizeof(GLOBAL_MEM_CACHELINE_SIZE), &GLOBAL_MEM_CACHELINE_SIZE, NULL);
				clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(GLOBAL_MEM_CACHE_TYPE), &GLOBAL_MEM_CACHE_TYPE, NULL);


				printf("Device Name:\t%s\n", DEVICE_NAME);
				printf("Device Vendor:\t%s\n", DEVICE_VENDOR);
				printf("Device Version:\t%s\n", DEVICE_VERSION);
				printf("Driver Version:\t%s\n", DRIVER_VERSION);
				printf("EXTENSIONS:\t%s\n", EXTENSIONS);
				printf("Number of CUs:\t%d\n", MAX_COMPUTE_UNITS);
				printf("GMem:\t\t%lld (Bytes)\n", (long long) GLOBAL_MEM_SIZE);
				printf("GMem $ Size:\t%lld (Bytes)\n", (long long) GLOBAL_MEM_CACHE_SIZE);
				printf("GMem $ Line:\t%lld (Bytes)\n", (long long) GLOBAL_MEM_CACHELINE_SIZE);
				if(GLOBAL_MEM_CACHE_TYPE == CL_NONE)
				{
					printf("GMem $ Type:\tCL_NONE\n");
				}
				else if(GLOBAL_MEM_CACHE_TYPE == CL_READ_ONLY_CACHE)
				{
					printf("GMem $ Type:\tCL_READ_ONLY_CACHE\n");
				}

				else if(GLOBAL_MEM_CACHE_TYPE == CL_READ_WRITE_CACHE)
				{
					printf("GMem $ Type:\tCL_READ_WRITE_CACHE\n");
				}
				printf("LMem:\t\t%lld (Bytes)\n", (long long) LOCAL_MEM_SIZE);
				printf("Work Group Size:%d (Max)\n", (int) MAX_WORK_GROUP_SIZE);
				printf("Work Item Dim:\t%d (Max)\n", MAX_WORK_ITEM_DIMENSIONS);
				printf("Work Item Size:\t");
				for(j = 0; j < MAX_WORK_ITEM_DIMENSIONS; j ++)
				{
						if (j != (MAX_WORK_ITEM_DIMENSIONS -1))
						printf("%d, ", (int) MAX_WORK_ITEM_SIZES[j]);

						if (j == (MAX_WORK_ITEM_DIMENSIONS -1))
						printf("%d ", (int) MAX_WORK_ITEM_SIZES[j]);
				}
				printf("(Max)\n");

			}

				//printf("Got device info.\n");
		}


	}

	else if (MODE == 4){
		cl_context context = 0;
	    cl_command_queue commandQueue = 0;
	    cl_program program = 0;
	    cl_device_id device = 0;

	    //Create an OpenCL context on first available platform
	    context = CreateContext();
	    if (context == NULL)
	    {
	        printf("Failed to create OpenCL context.\n");
	        return 1;
	    }

	    //Create a command-queue on the first device available on the created context
	    commandQueue = CreateCommandQueue(context, &device);
	    if (commandQueue == NULL)
	    {
	    	printf("Failed to create commandQueue.\n");
	    	Cleanup(context, commandQueue, program, NULL);
	    	return 1;
	    }

	    // Create OpenCL program and store the binary for future use.
	    printf("Attempting to create kernel binary from source.\n");
	    program = CreateProgram(context, device, KERNELPATHIN);
	    if (program == NULL)
	    {
	    	printf("Failed to create Program");
	    	Cleanup(context, commandQueue, program, NULL);
	    	return 1;
	    }

	    printf("Kernel is saved.\n");
	    if (SaveProgramBinary(program, device, KERNELPATHOUT) == false)
	    {
	        printf("Failed to write program binary.\n");
	        Cleanup(context, commandQueue, program, NULL);
	        return 1;
	     }

	    //printf("---Done---");

	    //return 1;

	}
	else if (MODE == 3){

		//todo free remaining objects not passed to cleanup

		//profiling
		int write_bytes = 0;
		int read_bytes = 0;
		/*unsigned long long start_cycles, stop_cycles;
		unsigned long long start_setup, stop_setup;
		unsigned long long start_write, stop_write;
		unsigned long long start_read, stop_read;
		unsigned long long start_finalize, stop_finalize;
		struct timespec start_time_t, stop_time_t;*/


		printf("Stream Mode\n\n");
		//clock_gettime(CLOCK_MONOTONIC, &start_time_t);
		//start_cycles = rdtsc();


		int i;
		time_t t;
		srand((unsigned) time(&t));

	    // Create the two input vectors
	    printf("\nHostside malloc(s)\n");
	    fflush(stdout);
	    int *A = (int*)malloc(sizeof(int)*(SIZE*SIZE));
	    int *B = (int*)malloc(sizeof(int)*(SIZE*SIZE));
	    int *C = (int*)malloc(sizeof(int)*(SIZE*SIZE));

	    //profile
	    //bytes += 3 * sizeof(int)*(SIZE*SIZE);

	    printf("\nHostside mat init\n");
	    fflush(stdout);
	    for(i = 0; i < (SIZE*SIZE); i++) {
	        A[i] = B[i] = rand() % 10 + 1;;
	    }


	    //print matrix
    	printf("Matrix A[%d][%d]:\n", SIZE, SIZE);
	   	for(i = 0; i < (SIZE*SIZE); i++)
	    {
	   		printf("%3d ", A[i]);
	   		if(((i + 1) % SIZE) == 0)
	   			printf("\n");
	    }

	    //print matrix
	   	printf("\nMatrix B[%d][%d]:\n", SIZE, SIZE);
	    for(i = 0; i < (SIZE*SIZE); i++)
	    {
	    	printf("%3d ", B[i]);
	        if(((i + 1) % SIZE) == 0)
	        	printf("\n");
	    }


	    //syscall(STATS_RESET);


	    //Get platform and device information
	    cl_context context = 0;
	    cl_command_queue commandQueue = 0;
	    cl_program program = 0;
	    cl_device_id device = 0;
	    cl_kernel kernel = 0;
	    cl_uint err = 0;
	    //char *filepath = NULL;

	    //Create the context
	    printf("\nCreateContext\n");
	    fflush(stdout);
	    context = CreateContext();
	    if (context == NULL)
	    {
	    	printf("Failed to create OpenCL context.\n");
	    	return 1;
	    }

	   /* printf("\nEnd CreateContext\n");
	    fflush(stdout);*/

	    //Create a command-queue on the first device available on the created context
	    printf("\nCreateCommandQueue\n");
	    fflush(stdout);
	    commandQueue = CreateCommandQueue(context, &device);
	    if (commandQueue == NULL)
	    {
	    	printf("Failed to create command queue.\n");
	    	Cleanup(context, commandQueue, program, NULL);
	    	return 1;
	    }

	    //create the program from the binary
	    //program = CreateProgramFromBinary(context, device, "/home/stardica/Desktop/Kernels/vector.cl.bin.GPU");
	    //strcat(KERNELPATHOUT, ".GPU")
	    printf("\nCreateProgramFromBinary\n");
	    fflush(stdout);
	    program = CreateProgramFromBinary(context, device, KERNEL);
	    if (program == NULL)
	    {
	    	printf("Failed to load kernel binary,\n");
	    	Cleanup(context, commandQueue, program, NULL);
	    	return 1;
	    }



	    // Create OpenCL kernel
	    printf("\nclCreateKernel\n");
	    fflush(stdout);
	    kernel = clCreateKernel(program, "Matrix", NULL);
	    if (kernel == NULL)
	    {
	    	printf("Failed to create kernel.\n");
	    	Cleanup(context, commandQueue, program, NULL);
	    	return 1;
	    }

	    cl_mem a_mem_obj = 0;
	    cl_mem b_mem_obj = 0;
	    cl_mem c_mem_obj = 0;

  	    //Create memory buffers on the device for each vector

	    printf("\nclCreateBuffer(s)\n");
	    fflush(stdout);
	    if(LOCALMEM == 1 && CACHEDMEM == 0)
		{
			//this creates uncached buffers in the GPU's local memory
			#if M2S_CGM_OCL_SIM
			{
				a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
				b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
				c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);

			}
			#else
			{
				a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
				b_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
				c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
			}
			#endif
		}

		if(SYSMEM == 1 && CACHEDMEM == 0)
		{
			//this creates uncached buffers in the system memory
			#if M2S_CGM_OCL_SIM
			{
				a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
				b_mem_obj = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
				c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
			}
			#else
			{
				a_mem_obj = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
				b_mem_obj = clCreateBuffer(context,CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
				c_mem_obj = clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
			}
			#endif
		}

		if(SYSMEM == 1 && CACHEDMEM == 1)
		{
			//this creates cached buffers in the system memory.
			#if M2S_CGM_OCL_SIM
			{
				a_mem_obj = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
				b_mem_obj = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
				c_mem_obj = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL, CL_FALSE);
			}
			#else
			{
				a_mem_obj = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
				b_mem_obj = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
				c_mem_obj = clCreateBuffer(context, CL_MEM_ALLOC_HOST_PTR, (sizeof(int)*(SIZE*SIZE)), NULL, NULL);
			}
			#endif
		}

	    if (a_mem_obj == NULL || b_mem_obj == NULL  || c_mem_obj == NULL)
	    {
	    	printf("Failed to create memory objects.\n");
	    	Cleanup(context, commandQueue, program, kernel);
	    	return 1;
	    }

	    //Copy the lists A and B to their respective memory buffers
	    printf("\nclEnqueueWriteBuffer(s)\n");
	    fflush(stdout);
	    write_bytes += 2 * sizeof(int)*(SIZE*SIZE);
	   // start_write = rdtsc();
	    clEnqueueWriteBuffer(commandQueue, a_mem_obj, CL_TRUE, 0, (sizeof(int)*(SIZE*SIZE)), A, 0, NULL, NULL);
	    clEnqueueWriteBuffer(commandQueue, b_mem_obj, CL_TRUE, 0, (sizeof(int)*(SIZE*SIZE)), B, 0, NULL, NULL);
	   // stop_write = rdtsc();


	    // Set the arguments of the kernel
	    int *size = (int *)SIZE;
	    printf("\nclSetKernelArg(s)\n");
	    fflush(stdout);
	    err = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&c_mem_obj);
	    err = clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&a_mem_obj);
	    err = clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&b_mem_obj);
	    err = clSetKernelArg(kernel, 3, sizeof(int), (void *)&size);
	    if (err != CL_SUCCESS)
	    {
	    	printf("Kernel args not set.\n");
	    	return 1;
	    }

	    // Execute the OpenCL kernel on the list
	    size_t GlobalWorkSize[2], LocalWorkSize[2];

	    //Rember that in OpenCL we need to express the globalWorkSize in
	    //terms of the total number of threads. The underlying OpenCL API
	    //will look at the globalWorkSize and divide by the localWorkSize
	    //to arrive at a 64 by 64 NDRange of 16 by 16 work groups.

	    GlobalWorkSize[0] = GWS_0;//SIZE*SIZE*SIZE; // Process the entire lists
	    GlobalWorkSize[1] = GWS_1;//SIZE*SIZE*SIZE; // Process the entire lists
	    LocalWorkSize[0] = LWS_0; //SIZE Divide work items into groups of 64
	    LocalWorkSize[1] = LWS_1; //SIZE Divide work items into groups of 64


	    //used null for local, lets OpenCL determine the best local size.
	    //err = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, GlobalWorkSize, LocalWorkSize, 0, NULL, NULL);
	    printf("\nclEnqueueNDRangeKernel\n");
	    fflush(stdout);
	    err = clEnqueueNDRangeKernel(commandQueue, kernel, 2, NULL, GlobalWorkSize, LocalWorkSize, 0, NULL, NULL);
	    if (err != CL_SUCCESS)
	    {
	    	printf("ND range not enqueued. Code: %d\n", err);
	    	return 1;
	    }


	    //Read the memory buffer C on the device to the local variable C
	    printf("\nclEnqueueReadBuffer\n");
	    fflush(stdout);
	    read_bytes += sizeof(int)*(SIZE*SIZE);
	    //start_read = rdtsc();
	    err = clEnqueueReadBuffer(commandQueue, c_mem_obj, CL_TRUE, 0, (sizeof(int)*(SIZE*SIZE)), C, 0, NULL, NULL);
	   // stop_read = rdtsc();
	    if (err != CL_SUCCESS)
	    {
	    	printf("Buffer not returned.\n");
	    	return 1;
	    }

	    //syscall(STATS_STOP);

	    //print matrix
	    printf("\nMatrix C[%d][%d] = A[%d][%d]*B[%d][%d]:\n", SIZE, SIZE, SIZE, SIZE, SIZE, SIZE);
	    for(i = 0; i < (SIZE*SIZE); i++)
	    {
	    	printf("%3d ", C[i]);
	        if(((i + 1) % SIZE) == 0)
	        printf("\n");
	    }

	    printf("\nHostside clean up\n");
	    fflush(stdout);
	    err = clFlush(commandQueue);
	    err = clFinish(commandQueue);
	    Cleanup(context, commandQueue, program, kernel);
	    err = clReleaseMemObject(a_mem_obj);
	    err = clReleaseMemObject(b_mem_obj);
	    err = clReleaseMemObject(c_mem_obj);
	    free(A);
	    free(B);
	    free(C);

	    //printf("---Done---");



	    /*stop_cycles = rdtsc();
	    clock_gettime(CLOCK_MONOTONIC, &stop_time_t);
	    printf("Total cycles = %llu\n", (stop_cycles - start_cycles));

	    long int time_s = stop_time_t.tv_nsec - start_time_t.tv_nsec;
	    printf("Approximate runtime (check) = %ld ms\n", (time_s/1000000));

	    printf("Bytes written %d\n", write_bytes);
	    printf("transfer cycles = %llu\n", (stop_write - start_write));
	    printf("start at = %llu\n", (start_write - start_cycles));

	    printf("Bytes read %d\n", read_bytes);
	    printf("transfer cycles = %llu\n", (stop_read - start_read));
	    printf("start at = %llu\n", (start_read - start_cycles));*/

	}
	else if (MODE == 2){

		printf("Multi Thread Mode\n");
		//cal this:
		//assignToThisCore(0);//assign to core 0,1,2,...

		unsigned long long a, b;
	    int i = 0;
	    int j = 0;
	    int k = 0;

		LoadMatrices();

		pthread_t tid[SIZE*SIZE];


		//printf("waiting\n");
		//start our threads
		a = rdtsc();
		syscall(BEGIN_PARALLEL_SECTION);

		for(i=0;i<SIZE;i++){
			for(j=0;j<SIZE;j++){
				struct RowColumnData *RCData = (struct RowColumnData *) malloc(sizeof(struct RowColumnData));
				RCData->RowNum = i;
				RCData->ColumnNum = j;
				//printf("Thread create %d Row %d Col %d\n", k, RCData->RowNum, RCData->ColumnNum);
				pthread_create(&tid[k], NULL, RowColumnMultiply, RCData);
				k++;
			}
		}

		//Join threads////////////////////////////
		for (i=0;i<NUM_THREADS;i++)
		{
			pthread_join(tid[i], NULL);
		}
		syscall(END_PARALLEL_SECTION);
		b = rdtsc();

		PrintMatrices();



		//printf("\nend clock Cycles: %llu\n", b);
		printf("\nDone. Number of clock Cycles: %llu\n", b-a);

	}
	else if (MODE == 1)
	{

		printf("Single Thread Mode\n\n");
		//unsigned long long a, b;
		//a = rdtsc();
		//time_t t;
		int i,j,k;

		//srand((unsigned) time(&t));

		LoadMatrices();

		//multiply mats/////////////////////////
		for (i=0;i<SIZE;i++){
			for(j=0;j<SIZE;j++){
				for(k=0;k<SIZE;k++){
					matC[i][j] = matC[i][j] + (matA[i][k] * matB[k][j]);
					}
			}
		}

		PrintMatrices();

		//b = rdtsc();
		//printf("\nDone. Number of clock Cycles: %llu\n", b-a);
	}
	else if (MODE == 0)
	{
		printf("---Misc Tests---\n\n");

		printf("size of long long is %d\n", (int) sizeof(long long));
		printf("size of long is %d\n", (int) sizeof(long));
		printf("size of int is %d\n", (int) sizeof(int));
		printf("size of short is %d\n", (int) sizeof(short));
		printf("size of char * %d\n", (int) sizeof(char *));
		printf("size of unsigned int (word) %d\n", (int) sizeof(unsigned int));

		char *string = "test string";
		printf("Here is the string 1: \"%s\"\n", string);

		//Using the struct
		//set string variable and point to print_me.
		object.string = strdup(string);
		object.print_me = (void (*)(void *)) print_me;

		//use of print_me
		object.print_me(object.string);

		//pointer fun
		struct Object *ptr = &object;
		printf("this is the value of the pointer to struct object: %p\n", ptr);
		object.next=&object;
		printf("this is the value of the pointer to struct object: %p\n", object.next);
		object_ptr = &object;
		object_ptr->next = &object;
		printf("this is the value of the pointer to struct object: %p\n", object_ptr->next);

		//Macro fun
		PRINT(ptr, ptr);
		PRINT(object.next, object.next);
		PRINT(object_ptr->next, object_ptr->next);

		int mmu_page_size = 1 << 12;

		printf("mmu_papge_size = %d\n", mmu_page_size);


		//setjmp and longjmp fun
		/*jmp_buf environment;
		int i;

		i = setjmp(environment);
		printf("\n\nsetjmp returned = %d\n", i);

		printf("Env 1:\n");

		int x = 0;
		for(x = 0; x < 6; x++)
		{
			printf("  %x\n", environment[x]);
		}


		if (i < 3)
		{
			longjmp(environment, 3);
		}

		printf("longjmp finished with i = %d\n", i);*/


	}
	else
	{

		printf("---Invalid Mode Set---\n\n");

	}

	printf("\n---Done---\n");
	return 1;
}
示例#20
0
int deflate259_opencl(unsigned char* input, unsigned in_len, unsigned char* tree,
  unsigned tree_len, unsigned char* output, unsigned* out_len)
{
#define SDACCEL_WRAPPER
#ifdef SDACCEL_WRAPPER
  int err;                            // error code returned from api calls

  cl_platform_id platform_id;         // platform id
  cl_device_id device_id;             // compute device id 
  cl_context context;                 // compute context
  cl_command_queue commands;          // compute command queue
  cl_program program;                 // compute program
  cl_kernel kernel;                   // compute kernel
   
  char cl_platform_vendor[1001];
  char cl_platform_name[1001];

  err = clGetPlatformIDs(1,&platform_id,NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to find an OpenCL platform!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }
  err = clGetPlatformInfo(platform_id,CL_PLATFORM_VENDOR,1000,(void *)cl_platform_vendor,NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: clGetPlatformInfo(CL_PLATFORM_VENDOR) failed!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }
  printf("CL_PLATFORM_VENDOR %s\n",cl_platform_vendor);
  err = clGetPlatformInfo(platform_id,CL_PLATFORM_NAME,1000,(void *)cl_platform_name,NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: clGetPlatformInfo(CL_PLATFORM_NAME) failed!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }
  printf("CL_PLATFORM_NAME %s\n",cl_platform_name);

  // Connect to a compute device
  //
  int fpga = 0;
#if defined (FPGA_DEVICE)
  fpga = 1;
#endif
  err = clGetDeviceIDs(platform_id, fpga ? CL_DEVICE_TYPE_ACCELERATOR : CL_DEVICE_TYPE_CPU,
                       1, &device_id, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to create a device group!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Create a compute context 
  //
  context = clCreateContext(0, 1, &device_id, NULL, NULL, &err);
  if (!context)
  {
    printf("Error: Failed to create a compute context!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Create a command commands
  //
  commands = clCreateCommandQueue(context, device_id, 0, &err);
  if (!commands)
  {
    printf("Error: Failed to create a command commands!\n");
    printf("Error: code %i\n",err);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  int status;

  // Create Program Objects
  //
  
  // Load binary from disk
  unsigned char *kernelbinary;
  char xclbin[]="deflate1.xclbin";
  printf("loading %s\n", xclbin);
  int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary);
  if (n_i < 0) {
    printf("failed to load kernel from xclbin: %s\n", xclbin);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }
  size_t n = n_i;
  // Create the compute program from offline
  program = clCreateProgramWithBinary(context, 1, &device_id, &n,
                                      (const unsigned char **) &kernelbinary, &status, &err);
  if ((!program) || (err!=CL_SUCCESS)) {
    printf("Error: Failed to create compute program from binary %d!\n", err);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Build the program executable
  //
  err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    size_t len;
    char buffer[2048];

    printf("Error: Failed to build program executable!\n");
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
    printf("%s\n", buffer);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Create the compute kernel in the program we wish to run
  //
  kernel = clCreateKernel(program, "deflate259", &err);
  if (!kernel || err != CL_SUCCESS)
  {
    printf("Error: Failed to create compute kernel! %d\n", err);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Create the input and output arrays in device memory for our calculation
//  void deflate259_opencl(unsigned char* input, unsigned in_len, unsigned char* tree,
//    unsigned tree_len, unsigned char* output, unsigned* out_len)
  cl_mem input_arg, in_len_arg, tree_arg, tree_len_arg, output_arg, out_len_arg;
  input_arg = clCreateBuffer(context,  CL_MEM_READ_ONLY, CHUNK, NULL, NULL);
  in_len_arg = clCreateBuffer(context,  CL_MEM_READ_ONLY, sizeof(unsigned), NULL, NULL);
  tree_arg = clCreateBuffer(context,  CL_MEM_READ_ONLY, 512, NULL, NULL);
  tree_len_arg = clCreateBuffer(context,  CL_MEM_READ_ONLY, sizeof(unsigned), NULL, NULL);
  output_arg = clCreateBuffer(context, CL_MEM_WRITE_ONLY, CHUNK*2, NULL, NULL);
  out_len_arg = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(unsigned), NULL, NULL);

  if (!input_arg || !in_len_arg || !tree_arg || !tree_len_arg || !output_arg || !out_len_arg)
  {
    printf("Error: Failed to allocate device memory!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }    

  err = clEnqueueWriteBuffer(commands, input_arg, CL_TRUE, 0, in_len, input, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to write to source array input!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  err = clEnqueueWriteBuffer(commands, in_len_arg, CL_TRUE, 0, sizeof(unsigned), &in_len, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to write to source array &in_len!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  err = clEnqueueWriteBuffer(commands, tree_arg, CL_TRUE, 0, 512, tree, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to write to source array tree!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  err = clEnqueueWriteBuffer(commands, tree_len_arg, CL_TRUE, 0, sizeof(unsigned), &tree_len, 0, NULL, NULL);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to write to source array &tree_len!\n");
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Set the arguments to our compute kernel
//void deflate259_opencl(unsigned char* input, unsigned in_len, unsigned char* tree,
//  unsigned tree_len, unsigned char* output, unsigned* out_len)
  err = 0;
  err  = clSetKernelArg(kernel, 0, sizeof(cl_mem), &input_arg);
  err |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &in_len_arg);
  err |= clSetKernelArg(kernel, 2, sizeof(cl_mem), &tree_arg);
  err |= clSetKernelArg(kernel, 3, sizeof(cl_mem), &tree_len_arg);
  err |= clSetKernelArg(kernel, 4, sizeof(cl_mem), &output_arg);
  err |= clSetKernelArg(kernel, 5, sizeof(cl_mem), &out_len_arg);
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to set kernel arguments! %d\n", err);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Execute the kernel over the entire range of our 1d input data set
  // using the maximum number of work group items for this device
  //

#ifdef C_KERNEL
  err = clEnqueueTask(commands, kernel, 0, NULL, NULL);
#else
  size_t global[1];
  size_t local[1];
  global[0] = 1;
  local[0] = 1;
  err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, 
                               (size_t*)&global, (size_t*)&local, 0, NULL, NULL);
#endif
  if (err)
  {
    printf("Error: Failed to execute kernel! %d\n", err);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  // Read back the results from the device to verify the output
  //
  cl_event readevent;
  unsigned out_len_b;
  err = clEnqueueReadBuffer( commands, out_len_arg, CL_TRUE, 0, sizeof(unsigned),
      &out_len_b, 0, NULL, &readevent );
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to read output length! %d\n", err);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }

  clWaitForEvents(1, &readevent);
  *out_len = out_len_b;

  printf("Read final output length: %d\n", out_len_b);

  err = clEnqueueReadBuffer( commands, output_arg, CL_TRUE, 0, out_len_b, output, 0, NULL, &readevent );
  if (err != CL_SUCCESS)
  {
    printf("Error: Failed to read output data! %d\n", err);
    printf("Test failed\n");
    return EXIT_FAILURE;
  }
  clWaitForEvents(1, &readevent);
#endif
}
示例#21
0
int main(void) {
//time meassuring
  	struct timeval tvs;

//variables
	int 	Nx=1024;
	int		Ny=1024;
	int 	plotnum=0;
	int	  	Tmax=2;
	int 	plottime=0;
	int	  	plotgap=1;
	double	Lx=1.0;
	double 	Ly=1.0;
	double	dt=0.0;	
	double	A=0.0;
	double	B=0.0;
	double	Du=0.0;
	double	Dv=0.0;
//splitting coefficients
	double	a=0.5;	
	double 	b=0.5;
	double 	c=1.0;
//loop counters	
	int i=0;
	int j=0;
	int n=0;

	double*umax=NULL;
	double*vmax=NULL;
	parainit(&Nx,&Ny,&Tmax,&plotgap,&Lx,&Ly,&dt,&Du,&Dv,&A,&B);
	plottime=plotgap;
	vmax=(double*)malloc((Tmax/plotgap+1)*sizeof(double));
	umax=(double*)malloc((Tmax/plotgap+1)*sizeof(double));
//openCL variables
    cl_platform_id *platform_id = NULL;
    cl_kernel frequencies = NULL, initialdata = NULL, linearpart=NULL;
	cl_kernel nonlinearpart_a=NULL, nonlinearpart_b=NULL;
    cl_int ret;
    cl_uint num_platforms;
// Detect how many platforms there are.
	ret = clGetPlatformIDs(0, NULL, &num_platforms);
// Allocate enough space for the number of platforms.
	platform_id = (cl_platform_id*) malloc(num_platforms*sizeof(cl_platform_id));
// Store the platforms
	ret = clGetPlatformIDs(num_platforms, platform_id, NULL);
	printf("Found %d platform(s)!\n",num_platforms);
    cl_uint *num_devices;
	num_devices=(cl_uint*) malloc(num_platforms*sizeof(cl_uint));
    cl_device_id **device_id = NULL;
	device_id =(cl_device_id**) malloc(num_platforms*sizeof(cl_device_id*));
// Detect number of devices in the platforms
	for(i=0;i<num_platforms;i++){
		char buf[65536];
		size_t size;
		ret = clGetPlatformInfo(platform_id[i],CL_PLATFORM_VERSION,sizeof(buf),buf,&size);
		printf("%s\n",buf);
		ret = clGetDeviceIDs(platform_id[i],CL_DEVICE_TYPE_ALL,0,NULL,num_devices);
		printf("Found %d device(s) on platform %d!\n", num_devices[i],i);
		ret = clGetPlatformInfo(platform_id[i],CL_PLATFORM_NAME,sizeof(buf),buf,&size);
		printf("%s ",buf);
// Store numDevices from platform
		device_id[i]=(cl_device_id*) malloc(num_devices[i]*sizeof(device_id));
		ret = clGetDeviceIDs(platform_id[i],CL_DEVICE_TYPE_ALL,num_devices[i],device_id[i],NULL);
		for(j=0;j<num_devices[i];j++){
			ret = clGetDeviceInfo(device_id[i][j],CL_DEVICE_NAME,sizeof(buf),buf,&size);
			printf("%s (%d,%d)\n",buf,i,j);
		}
	}
//create context and command_queue
    cl_context context = NULL;
   	cl_command_queue command_queue = NULL;
//Which platform and device do i choose?
	int	chooseplatform=0;
	int	choosedevice=0;	  
	printf("Choose platform %d and device %d!\n",chooseplatform,choosedevice);
	context = clCreateContext( NULL, num_devices[chooseplatform], device_id[chooseplatform], NULL, NULL, &ret);
	if(ret!=CL_SUCCESS){printf("createContext ret:%d\n",ret); exit(1); }
	command_queue = clCreateCommandQueue(context, device_id[chooseplatform][choosedevice], 0, &ret);
	if(ret!=CL_SUCCESS){printf("createCommandQueue ret:%d\n",ret); exit(1); }

//OpenCL arrays
    cl_mem cl_u = NULL,cl_v = NULL;
   	cl_mem cl_uhat = NULL, cl_vhat = NULL;
    cl_mem cl_kx = NULL, cl_ky = NULL;

//FFT
	clfftPlanHandle planHandle;
    cl_mem tmpBuffer = NULL;
	fftinit(&planHandle,&context, &command_queue, &tmpBuffer, Nx, Ny);

//allocate gpu memory/
	cl_u=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx* Ny* sizeof(double), NULL, &ret);
	cl_v=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx* Ny* sizeof(double), NULL, &ret);
	cl_uhat=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx * Ny* sizeof(double), NULL, &ret);
	cl_vhat=clCreateBuffer(context, CL_MEM_READ_WRITE, 2*Nx * Ny* sizeof(double), NULL, &ret);
	cl_kx = clCreateBuffer(context, CL_MEM_READ_WRITE, Nx * sizeof(double), NULL, &ret);
	cl_ky = clCreateBuffer(context, CL_MEM_READ_WRITE, Ny * sizeof(double), NULL, &ret);

	printf("allocated space\n");
//load the kernels
	loadKernel(&frequencies,&context,&device_id[chooseplatform][choosedevice],"frequencies");
	loadKernel(&initialdata,&context,&device_id[chooseplatform][choosedevice],"initialdata"); 
	loadKernel(&linearpart,&context,&device_id[chooseplatform][choosedevice],"linearpart"); 
	loadKernel(&nonlinearpart_a,&context,&device_id[chooseplatform][choosedevice],"nonlinearpart_a"); 
	loadKernel(&nonlinearpart_b,&context,&device_id[chooseplatform][choosedevice],"nonlinearpart_b"); 

	size_t global_work_size[1] = {Nx*Ny};
	size_t global_work_size_X[1] = {Nx};
	size_t global_work_size_Y[1] = {Ny};
//frequencies
    ret = clSetKernelArg(frequencies, 0, sizeof(cl_mem),(void *)&cl_kx);
	ret = clSetKernelArg(frequencies, 1, sizeof(double),(void* )&Lx);
	ret = clSetKernelArg(frequencies, 2, sizeof(int),(void* )&Nx);
    ret = clEnqueueNDRangeKernel(command_queue, frequencies, 1, NULL, global_work_size_X, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);
    ret = clSetKernelArg(frequencies, 0, sizeof(cl_mem),(void *)&cl_ky);
	ret = clSetKernelArg(frequencies, 1, sizeof(double),(void* )&Ly);
	ret = clSetKernelArg(frequencies, 2, sizeof(int),(void* )&Ny);
    ret = clEnqueueNDRangeKernel(command_queue, frequencies, 1, NULL, global_work_size_Y, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);
//printCL(&cl_kx,&command_queue,Nx,1);
//printCL(&cl_ky,&command_queue,1,Ny);
//inintial data
    ret = clSetKernelArg(initialdata, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(initialdata, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(initialdata, 2, sizeof(int),(void* )&Nx);
	ret = clSetKernelArg(initialdata, 3, sizeof(int),(void* )&Ny);
	ret = clSetKernelArg(initialdata, 4, sizeof(double),(void* )&Lx);
	ret = clSetKernelArg(initialdata, 5, sizeof(double),(void* )&Ly);
    ret = clEnqueueNDRangeKernel(command_queue, initialdata, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);
//make output
    writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
    writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
    umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
    vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
	printf("Got initial data, starting timestepping\n");
	mtime_s(&tvs);

	for(n=0;n<=Tmax;n++){
//nonlinearpart_a
    ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);	

//nonlinearpart_b
    ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);

//linear
	fft2dfor(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
	fft2dfor(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);
//printf("A%f,B%f\n",A,B);
    ret = clSetKernelArg(linearpart, 0, sizeof(cl_mem),(void *)&cl_uhat);
    ret = clSetKernelArg(linearpart, 1, sizeof(cl_mem),(void *)&cl_vhat);
	ret = clSetKernelArg(linearpart, 2, sizeof(cl_mem),(void* )&cl_kx);
	ret = clSetKernelArg(linearpart, 3, sizeof(cl_mem),(void* )&cl_ky);
	ret = clSetKernelArg(linearpart, 4, sizeof(double),(void* )&Du);
	ret = clSetKernelArg(linearpart, 5, sizeof(double),(void* )&Dv);
	ret = clSetKernelArg(linearpart, 6, sizeof(double),(void* )&A);
	ret = clSetKernelArg(linearpart, 7, sizeof(double),(void* )&B);
	ret = clSetKernelArg(linearpart, 8, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(linearpart, 9, sizeof(double),(void* )&c);
	ret = clSetKernelArg(linearpart, 10, sizeof(int),(void* )&Nx);
	ret = clSetKernelArg(linearpart, 11, sizeof(int),(void* )&Ny);
    ret = clEnqueueNDRangeKernel(command_queue, linearpart, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);

	fft2dback(&cl_u, &cl_uhat,&planHandle,&command_queue,&tmpBuffer);
  	fft2dback(&cl_v, &cl_vhat,&planHandle,&command_queue,&tmpBuffer);

//nonlinearpart_b
    ret = clSetKernelArg(nonlinearpart_b, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_b, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_b, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_b, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_b, 4, sizeof(double),(void* )&b);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_b, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);		
//nonlinearpart_a
    ret = clSetKernelArg(nonlinearpart_a, 0, sizeof(cl_mem),(void *)&cl_u);
	ret = clSetKernelArg(nonlinearpart_a, 1, sizeof(cl_mem),(void* )&cl_v);
	ret = clSetKernelArg(nonlinearpart_a, 2, sizeof(double),(void* )&A);
	ret = clSetKernelArg(nonlinearpart_a, 3, sizeof(double),(void* )&dt);
	ret = clSetKernelArg(nonlinearpart_a, 4, sizeof(double),(void* )&a);
    ret = clEnqueueNDRangeKernel(command_queue, nonlinearpart_a, 1, NULL, global_work_size, NULL, 0, NULL, NULL);
	ret = clFinish(command_queue);	
// done
	if(n==plottime){
		printf("time:%f, step:%d,%d,umax:%f,vmax:%f\n",n*dt,n,plotnum,umax[plotnum],vmax[plotnum]);
		plottime=plottime+plotgap;
		plotnum=plotnum+1;
   	 	writedata_C(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
    	writedata_C(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
        umax[plotnum]=writeimage(&cl_u, &command_queue,Nx,Ny,plotnum,"u");
        vmax[plotnum]=writeimage(&cl_v, &command_queue,Nx,Ny,plotnum,"v");
	}
}//end timestepping

	printf("Finished time stepping\n");
	mtime_e(&tvs,"Programm took:");
	writearray(umax,(Tmax/plotgap)+1,"u");
	writearray(vmax,(Tmax/plotgap)+1,"v");
	free(umax);
	free(vmax);	

	clReleaseMemObject(cl_u);
	clReleaseMemObject(cl_v);
	clReleaseMemObject(cl_uhat);
	clReleaseMemObject(cl_vhat);
	clReleaseMemObject(cl_kx);
	clReleaseMemObject(cl_ky);

    ret = clReleaseKernel(initialdata); 
    ret = clReleaseKernel(frequencies); 
    ret = clReleaseKernel(linearpart); 
    ret = clReleaseKernel(nonlinearpart_a);
    ret = clReleaseKernel(nonlinearpart_b);

	fftdestroy(&planHandle, &tmpBuffer);

	ret = clReleaseCommandQueue(command_queue);
    ret = clReleaseContext(context);

	for(i=0;i<num_platforms;i++){free(device_id[i]);}
	free(device_id);
	free(platform_id);
	free(num_devices);
	printf("Program execution complete\n");

	return 0;
}
示例#22
0
_clState *initCl(unsigned int gpu, char *name, size_t nameSize)
{
	_clState *clState = calloc(1, sizeof(_clState));
	bool patchbfi = false, prog_built = false;
	struct cgpu_info *cgpu = &gpus[gpu];
	cl_platform_id platform = NULL;
	char pbuff[256], vbuff[255];
	cl_platform_id* platforms;
	cl_uint preferred_vwidth;
	cl_device_id *devices;
	cl_uint numPlatforms;
	cl_uint numDevices;
	cl_int status;

	status = clGetPlatformIDs(0, NULL, &numPlatforms);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Getting Platforms. (clGetPlatformsIDs)", status);
		return NULL;
	}

	platforms = (cl_platform_id *)alloca(numPlatforms*sizeof(cl_platform_id));
	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
		return NULL;
	}

	if (opt_platform_id >= (int)numPlatforms) {
		applog(LOG_ERR, "Specified platform that does not exist");
		return NULL;
	}

	status = clGetPlatformInfo(platforms[opt_platform_id], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
		return NULL;
	}
	platform = platforms[opt_platform_id];

	if (platform == NULL) {
		perror("NULL platform found!\n");
		return NULL;
	}

	applog(LOG_INFO, "CL Platform vendor: %s", pbuff);
	status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(pbuff), pbuff, NULL);
	if (status == CL_SUCCESS)
		applog(LOG_INFO, "CL Platform name: %s", pbuff);
	status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(vbuff), vbuff, NULL);
	if (status == CL_SUCCESS)
		applog(LOG_INFO, "CL Platform version: %s", vbuff);

	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Getting Device IDs (num)", status);
		return NULL;
	}

	if (numDevices > 0 ) {
		devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));

		/* Now, get the device list data */

		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: Getting Device IDs (list)", status);
			return NULL;
		}

		applog(LOG_INFO, "List of devices:");

		unsigned int i;
		for (i = 0; i < numDevices; i++) {
			status = clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
			if (status != CL_SUCCESS) {
				applog(LOG_ERR, "Error %d: Getting Device Info", status);
				return NULL;
			}

			applog(LOG_INFO, "\t%i\t%s", i, pbuff);
		}

		if (gpu < numDevices) {
			status = clGetDeviceInfo(devices[gpu], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
			if (status != CL_SUCCESS) {
				applog(LOG_ERR, "Error %d: Getting Device Info", status);
				return NULL;
			}

			applog(LOG_INFO, "Selected %i: %s", gpu, pbuff);
			strncpy(name, pbuff, nameSize);
		} else {
			applog(LOG_ERR, "Invalid GPU %i", gpu);
			return NULL;
		}

	} else return NULL;

	cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };

	clState->context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL, &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Creating Context. (clCreateContextFromType)", status);
		return NULL;
	}

	/////////////////////////////////////////////////////////////////
	// Create an OpenCL command queue
	/////////////////////////////////////////////////////////////////
	clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu],
						     CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &status);
	if (status != CL_SUCCESS) /* Try again without OOE enable */
		clState->commandQueue = clCreateCommandQueue(clState->context, devices[gpu], 0 , &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status);
		return NULL;
	}

	/* Check for BFI INT support. Hopefully people don't mix devices with
	 * and without it! */
	char * extensions = malloc(1024);
	const char * camo = "cl_amd_media_ops";
	char *find;

	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_EXTENSIONS", status);
		return NULL;
	}
	find = strstr(extensions, camo);
	if (find)
		clState->hasBitAlign = true;
		
	/* Check for OpenCL >= 1.0 support, needed for global offset parameter usage. */
	char * devoclver = malloc(1024);
	const char * ocl10 = "OpenCL 1.0";
	const char * ocl11 = "OpenCL 1.1";

	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_VERSION, 1024, (void *)devoclver, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_VERSION", status);
		return NULL;
	}
	find = strstr(devoclver, ocl10);
	if (!find) {
		clState->hasOpenCL11plus = true;
		find = strstr(devoclver, ocl11);
		if (!find)
			clState->hasOpenCL12plus = true;
	}

	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT", status);
		return NULL;
	}
	applog(LOG_DEBUG, "Preferred vector width reported %d", preferred_vwidth);

	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size_t), (void *)&clState->max_work_size, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_WORK_GROUP_SIZE", status);
		return NULL;
	}
	applog(LOG_DEBUG, "Max work group size reported %d", (int)(clState->max_work_size));

	status = clGetDeviceInfo(devices[gpu], CL_DEVICE_MAX_MEM_ALLOC_SIZE , sizeof(cl_ulong), (void *)&cgpu->max_alloc, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Failed to clGetDeviceInfo when trying to get CL_DEVICE_MAX_MEM_ALLOC_SIZE", status);
		return NULL;
	}
	applog(LOG_DEBUG, "Max mem alloc size is %lu", (long unsigned int)(cgpu->max_alloc));

	/* Create binary filename based on parameters passed to opencl
	 * compiler to ensure we only load a binary that matches what would
	 * have otherwise created. The filename is:
	 * name + kernelname +/- g(offset) + v + vectors + w + work_size + l + sizeof(long) + .bin
	 * For scrypt the filename is:
	 * name + kernelname + g + lg + lookup_gap + tc + thread_concurrency + w + work_size + l + sizeof(long) + .bin
	 */
	char binaryfilename[255];
	char filename[255];
	char numbuf[16];

	if (cgpu->kernel == KL_NONE) {
		if (opt_scrypt) {
			applog(LOG_INFO, "Selecting scrypt kernel");
			clState->chosen_kernel = KL_SCRYPT;
		} else if (opt_blake256) {
			applog(LOG_INFO, "Selecting blake256 kernel");
			clState->chosen_kernel = KL_BLAKE256;
		} else if (!strstr(name, "Tahiti") &&
			/* Detect all 2.6 SDKs not with Tahiti and use diablo kernel */
			(strstr(vbuff, "844.4") ||  // Linux 64 bit ATI 2.6 SDK
			 strstr(vbuff, "851.4") ||  // Windows 64 bit ""
			 strstr(vbuff, "831.4") ||
			 strstr(vbuff, "898.1") ||  // 12.2 driver SDK 
			 strstr(vbuff, "923.1") ||  // 12.4
			 strstr(vbuff, "938.2") ||  // SDK 2.7
			 strstr(vbuff, "1113.2"))) {// SDK 2.8
				applog(LOG_INFO, "Selecting diablo kernel");
				clState->chosen_kernel = KL_DIABLO;
		/* Detect all 7970s, older ATI and NVIDIA and use poclbm */
		} else if (strstr(name, "Tahiti") || !clState->hasBitAlign) {
			applog(LOG_INFO, "Selecting poclbm kernel");
			clState->chosen_kernel = KL_POCLBM;
		/* Use phatk for the rest R5xxx R6xxx */
		} else {
			applog(LOG_INFO, "Selecting phatk kernel");
			clState->chosen_kernel = KL_PHATK;
		}
		cgpu->kernel = clState->chosen_kernel;
	} else {
		clState->chosen_kernel = cgpu->kernel;
		if (clState->chosen_kernel == KL_PHATK &&
		    (strstr(vbuff, "844.4") || strstr(vbuff, "851.4") ||
		     strstr(vbuff, "831.4") || strstr(vbuff, "898.1") ||
		     strstr(vbuff, "923.1") || strstr(vbuff, "938.2") ||
		     strstr(vbuff, "1113.2"))) {
			applog(LOG_WARNING, "WARNING: You have selected the phatk kernel.");
			applog(LOG_WARNING, "You are running SDK 2.6+ which performs poorly with this kernel.");
			applog(LOG_WARNING, "Downgrade your SDK and delete any .bin files before starting again.");
			applog(LOG_WARNING, "Or allow cgminer to automatically choose a more suitable kernel.");
		}
	}

	/* For some reason 2 vectors is still better even if the card says
	 * otherwise, and many cards lie about their max so use 256 as max
	 * unless explicitly set on the command line. Tahiti prefers 1 */
	if (strstr(name, "Tahiti"))
		preferred_vwidth = 1;
	else if (preferred_vwidth > 2)
		preferred_vwidth = 2;

	switch (clState->chosen_kernel) {
		case KL_POCLBM:
			strcpy(filename, POCLBM_KERNNAME".cl");
			strcpy(binaryfilename, POCLBM_KERNNAME);
			break;
		case KL_PHATK:
			strcpy(filename, PHATK_KERNNAME".cl");
			strcpy(binaryfilename, PHATK_KERNNAME);
			break;
		case KL_DIAKGCN:
			strcpy(filename, DIAKGCN_KERNNAME".cl");
			strcpy(binaryfilename, DIAKGCN_KERNNAME);
			break;
		case KL_SCRYPT:
			strcpy(filename, SCRYPT_KERNNAME".cl");
			strcpy(binaryfilename, SCRYPT_KERNNAME);
			/* Scrypt only supports vector 1 */
			cgpu->vwidth = 1;
			break;
		case KL_BLAKE256:
			strcpy(filename, BLAKE256_KERNNAME".cl");
			strcpy(binaryfilename, BLAKE256_KERNNAME);
			break;
		case KL_NONE: /* Shouldn't happen */
		case KL_DIABLO:
			strcpy(filename, DIABLO_KERNNAME".cl");
			strcpy(binaryfilename, DIABLO_KERNNAME);
			break;
	}

	if (cgpu->vwidth)
		clState->vwidth = cgpu->vwidth;
	else {
		clState->vwidth = preferred_vwidth;
		cgpu->vwidth = preferred_vwidth;
	}

	if (((clState->chosen_kernel == KL_POCLBM || clState->chosen_kernel == KL_DIABLO || clState->chosen_kernel == KL_DIAKGCN) &&
		clState->vwidth == 1 && clState->hasOpenCL11plus) || opt_scrypt || opt_blake256)
			clState->goffset = true;

	if (cgpu->work_size && cgpu->work_size <= clState->max_work_size)
		clState->wsize = cgpu->work_size;
	else if (opt_scrypt)
		clState->wsize = 256;
	else if (strstr(name, "Tahiti"))
		clState->wsize = 64;
	else
		clState->wsize = (clState->max_work_size <= 256 ? clState->max_work_size : 256) / clState->vwidth;
	cgpu->work_size = clState->wsize;

#ifdef USE_SCRYPT
	if (opt_scrypt) {
		if (!cgpu->opt_lg) {
			applog(LOG_DEBUG, "GPU %d: selecting lookup gap of 2", gpu);
			cgpu->lookup_gap = 2;
		} else
			cgpu->lookup_gap = cgpu->opt_lg;

		if (!cgpu->opt_tc) {
			unsigned int sixtyfours;

			sixtyfours =  cgpu->max_alloc / 131072 / 64 - 1;
			cgpu->thread_concurrency = sixtyfours * 64;
			if (cgpu->shaders && cgpu->thread_concurrency > cgpu->shaders) {
				cgpu->thread_concurrency -= cgpu->thread_concurrency % cgpu->shaders;
				if (cgpu->thread_concurrency > cgpu->shaders * 5)
					cgpu->thread_concurrency = cgpu->shaders * 5;
			}
			applog(LOG_DEBUG, "GPU %d: selecting thread concurrency of %d", gpu, (int)(cgpu->thread_concurrency));
		} else
			cgpu->thread_concurrency = cgpu->opt_tc;
	}
#endif

	FILE *binaryfile;
	size_t *binary_sizes;
	char **binaries;
	int pl;
	char *source = file_contents(filename, &pl);
	size_t sourceSize[] = {(size_t)pl};
	cl_uint slot, cpnd;

	slot = cpnd = 0;

	if (!source)
		return NULL;

	binary_sizes = calloc(sizeof(size_t) * MAX_GPUDEVICES * 4, 1);
	if (unlikely(!binary_sizes)) {
		applog(LOG_ERR, "Unable to calloc binary_sizes");
		return NULL;
	}
	binaries = calloc(sizeof(char *) * MAX_GPUDEVICES * 4, 1);
	if (unlikely(!binaries)) {
		applog(LOG_ERR, "Unable to calloc binaries");
		return NULL;
	}

	strcat(binaryfilename, name);
	if (clState->goffset)
		strcat(binaryfilename, "g");
	if (opt_scrypt) {
#ifdef USE_SCRYPT
		sprintf(numbuf, "lg%utc%u", cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency);
		strcat(binaryfilename, numbuf);
#endif
	} else {
		sprintf(numbuf, "v%d", clState->vwidth);
		strcat(binaryfilename, numbuf);
	}
	sprintf(numbuf, "w%d", (int)clState->wsize);
	strcat(binaryfilename, numbuf);
	sprintf(numbuf, "l%d", (int)sizeof(long));
	strcat(binaryfilename, numbuf);
	strcat(binaryfilename, ".bin");

	binaryfile = fopen(binaryfilename, "rb");
	if (!binaryfile) {
		applog(LOG_DEBUG, "No binary found, generating from source");
	} else {
		struct stat binary_stat;

		if (unlikely(stat(binaryfilename, &binary_stat))) {
			applog(LOG_DEBUG, "Unable to stat binary, generating from source");
			fclose(binaryfile);
			goto build;
		}
		if (!binary_stat.st_size)
			goto build;

		binary_sizes[slot] = binary_stat.st_size;
		binaries[slot] = (char *)calloc(binary_sizes[slot], 1);
		if (unlikely(!binaries[slot])) {
			applog(LOG_ERR, "Unable to calloc binaries");
			fclose(binaryfile);
			return NULL;
		}

		if (fread(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot]) {
			applog(LOG_ERR, "Unable to fread binaries");
			fclose(binaryfile);
			free(binaries[slot]);
			goto build;
		}

		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)binaries, &status, NULL);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
			fclose(binaryfile);
			free(binaries[slot]);
			goto build;
		}

		fclose(binaryfile);
		applog(LOG_DEBUG, "Loaded binary image %s", binaryfilename);

		goto built;
	}

	/////////////////////////////////////////////////////////////////
	// Load CL file, build CL program object, create CL kernel object
	/////////////////////////////////////////////////////////////////

build:
	clState->program = clCreateProgramWithSource(clState->context, 1, (const char **)&source, sourceSize, &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithSource)", status);
		return NULL;
	}

	/* create a cl program executable for all the devices specified */
	char *CompilerOptions = calloc(1, 256);

#ifdef USE_SCRYPT
	if (opt_scrypt)
		sprintf(CompilerOptions, "-D LOOKUP_GAP=%d -D CONCURRENT_THREADS=%d -D WORKSIZE=%d",
			cgpu->lookup_gap, (unsigned int)cgpu->thread_concurrency, (int)clState->wsize);
	else
#endif
	{
		sprintf(CompilerOptions, "-D WORKSIZE=%d -D VECTORS%d -D WORKVEC=%d",
			(int)clState->wsize, clState->vwidth, (int)clState->wsize * clState->vwidth);
	}
	applog(LOG_DEBUG, "Setting worksize to %d", (int)(clState->wsize));
	if (clState->vwidth > 1)
		applog(LOG_DEBUG, "Patched source to suit %d vectors", clState->vwidth);

	if (clState->hasBitAlign) {
		strcat(CompilerOptions, " -D BITALIGN");
		applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN");
		if (!clState->hasOpenCL12plus &&
		    (strstr(name, "Cedar") ||
		     strstr(name, "Redwood") ||
		     strstr(name, "Juniper") ||
		     strstr(name, "Cypress" ) ||
		     strstr(name, "Hemlock" ) ||
		     strstr(name, "Caicos" ) ||
		     strstr(name, "Turks" ) ||
		     strstr(name, "Barts" ) ||
		     strstr(name, "Cayman" ) ||
		     strstr(name, "Antilles" ) ||
		     strstr(name, "Wrestler" ) ||
		     strstr(name, "Zacate" ) ||
		     strstr(name, "WinterPark" )))
			patchbfi = true;
	} else
		applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN");

	if (patchbfi) {
		strcat(CompilerOptions, " -D BFI_INT");
		applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT");
	} else
		applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch");

	if (clState->goffset)
		strcat(CompilerOptions, " -D GOFFSET");

	if (!clState->hasOpenCL11plus)
		strcat(CompilerOptions, " -D OCL1");

	applog(LOG_DEBUG, "CompilerOptions: %s", CompilerOptions);
	status = clBuildProgram(clState->program, 1, &devices[gpu], CompilerOptions , NULL, NULL);
	free(CompilerOptions);

	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status);
		size_t logSize;
		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);

		char *log = malloc(logSize);
		status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
		applog(LOG_ERR, "%s", log);
		return NULL;
	}

	prog_built = true;

#ifdef __APPLE__
	/* OSX OpenCL breaks reading off binaries with >1 GPU so always build
	 * from source. */
	goto built;
#endif

	status = clGetProgramInfo(clState->program, CL_PROGRAM_NUM_DEVICES, sizeof(cl_uint), &cpnd, NULL);
	if (unlikely(status != CL_SUCCESS)) {
		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_NUM_DEVICES. (clGetProgramInfo)", status);
		return NULL;
	}

	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t)*cpnd, binary_sizes, NULL);
	if (unlikely(status != CL_SUCCESS)) {
		applog(LOG_ERR, "Error %d: Getting program info CL_PROGRAM_BINARY_SIZES. (clGetProgramInfo)", status);
		return NULL;
	}

	/* The actual compiled binary ends up in a RANDOM slot! Grr, so we have
	 * to iterate over all the binary slots and find where the real program
	 * is. What the heck is this!? */
	for (slot = 0; slot < cpnd; slot++)
		if (binary_sizes[slot])
			break;

	/* copy over all of the generated binaries. */
	applog(LOG_DEBUG, "Binary size for gpu %d found in binary slot %d: %d", gpu, slot, (int)(binary_sizes[slot]));
	if (!binary_sizes[slot]) {
		applog(LOG_ERR, "OpenCL compiler generated a zero sized binary, FAIL!");
		return NULL;
	}
	binaries[slot] = calloc(sizeof(char) * binary_sizes[slot], 1);
	status = clGetProgramInfo(clState->program, CL_PROGRAM_BINARIES, sizeof(char *) * cpnd, binaries, NULL );
	if (unlikely(status != CL_SUCCESS)) {
		applog(LOG_ERR, "Error %d: Getting program info. CL_PROGRAM_BINARIES (clGetProgramInfo)", status);
		return NULL;
	}

	/* Patch the kernel if the hardware supports BFI_INT but it needs to
	 * be hacked in */
	if (patchbfi) {
		unsigned remaining = binary_sizes[slot];
		char *w = binaries[slot];
		unsigned int start, length;

		/* Find 2nd incidence of .text, and copy the program's
		* position and length at a fixed offset from that. Then go
		* back and find the 2nd incidence of \x7ELF (rewind by one
		* from ELF) and then patch the opcocdes */
		if (!advance(&w, &remaining, ".text"))
			goto build;
		w++; remaining--;
		if (!advance(&w, &remaining, ".text")) {
			/* 32 bit builds only one ELF */
			w--; remaining++;
		}
		memcpy(&start, w + 285, 4);
		memcpy(&length, w + 289, 4);
		w = binaries[slot]; remaining = binary_sizes[slot];
		if (!advance(&w, &remaining, "ELF"))
			goto build;
		w++; remaining--;
		if (!advance(&w, &remaining, "ELF")) {
			/* 32 bit builds only one ELF */
			w--; remaining++;
		}
		w--; remaining++;
		w += start; remaining -= start;
		applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching",
			w, remaining);
		patch_opcodes(w, length);

		status = clReleaseProgram(clState->program);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: Releasing program. (clReleaseProgram)", status);
			return NULL;
		}

		clState->program = clCreateProgramWithBinary(clState->context, 1, &devices[gpu], &binary_sizes[slot], (const unsigned char **)&binaries[slot], &status, NULL);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: Loading Binary into cl_program (clCreateProgramWithBinary)", status);
			return NULL;
		}

		/* Program needs to be rebuilt */
		prog_built = false;
	}

	free(source);

	/* Save the binary to be loaded next time */
	binaryfile = fopen(binaryfilename, "wb");
	if (!binaryfile) {
		/* Not a fatal problem, just means we build it again next time */
		applog(LOG_DEBUG, "Unable to create file %s", binaryfilename);
	} else {
		if (unlikely(fwrite(binaries[slot], 1, binary_sizes[slot], binaryfile) != binary_sizes[slot])) {
			applog(LOG_ERR, "Unable to fwrite to binaryfile");
			return NULL;
		}
		fclose(binaryfile);
	}
built:
	if (binaries[slot])
		free(binaries[slot]);
	free(binaries);
	free(binary_sizes);

	applog(LOG_INFO, "Initialising kernel %s with%s bitalign, %d vectors and worksize %d",
	       filename, clState->hasBitAlign ? "" : "out", clState->vwidth, (int)(clState->wsize));

	if (!prog_built) {
		/* create a cl program executable for all the devices specified */
		status = clBuildProgram(clState->program, 1, &devices[gpu], NULL, NULL, NULL);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: Building Program (clBuildProgram)", status);
			size_t logSize;
			status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, 0, NULL, &logSize);

			char *log = malloc(logSize);
			status = clGetProgramBuildInfo(clState->program, devices[gpu], CL_PROGRAM_BUILD_LOG, logSize, log, NULL);
			applog(LOG_ERR, "%s", log);
			return NULL;
		}
	}

	/* get a kernel object handle for a kernel with the given name */
	clState->kernel = clCreateKernel(clState->program, "search", &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Creating Kernel from program. (clCreateKernel)", status);
		return NULL;
	}

#ifdef USE_SCRYPT
	if (opt_scrypt) {
		size_t ipt = (1024 / cgpu->lookup_gap + (1024 % cgpu->lookup_gap > 0));
		size_t bufsize = 128 * ipt * cgpu->thread_concurrency;

		/* Use the max alloc value which has been rounded to a power of
		 * 2 greater >= required amount earlier */
		if (bufsize > cgpu->max_alloc) {
			applog(LOG_WARNING, "Maximum buffer memory device %d supports says %lu",
						gpu, (long unsigned int)(cgpu->max_alloc));
			applog(LOG_WARNING, "Your scrypt settings come to %d", (int)bufsize);
		}
		applog(LOG_DEBUG, "Creating scrypt buffer sized %d", (int)bufsize);
		clState->padbufsize = bufsize;

		/* This buffer is weird and might work to some degree even if
		 * the create buffer call has apparently failed, so check if we
		 * get anything back before we call it a failure. */
		clState->padbuffer8 = NULL;
		clState->padbuffer8 = clCreateBuffer(clState->context, CL_MEM_READ_WRITE, bufsize, NULL, &status);
		if (status != CL_SUCCESS && !clState->padbuffer8) {
			applog(LOG_ERR, "Error %d: clCreateBuffer (padbuffer8), decrease TC or increase LG", status);
			return NULL;
		}

		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
			return NULL;
		}
		clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status);
	} else
#endif
	if (opt_blake256) {
		clState->CLbuffer0 = clCreateBuffer(clState->context, CL_MEM_READ_ONLY, 128, NULL, &status);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: clCreateBuffer (CLbuffer0)", status);
			return NULL;
		}
		clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, SCRYPT_BUFFERSIZE, NULL, &status);
	} else
		clState->outputBuffer = clCreateBuffer(clState->context, CL_MEM_WRITE_ONLY, BUFFERSIZE, NULL, &status);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: clCreateBuffer (outputBuffer)", status);
		return NULL;
	}

	return clState;
}
示例#23
0
文件: clutils.c 项目: ngaloppo/cf4ocl
/** 
 * @brief Create a new OpenCL zone, which will contain complete information for an OpenCL execution session on a specific device. 
 * 
 * @param deviceType Device type (OpenCL bitfield).
 * @param numQueues Number of command queues.
 * @param queueProperties Properties for the command queues.
 * @param devSel Pointer to function which will select device, if more than one is available.
 * @param dsExtraArg Extra argument for (*deviceSelector) function.
 * @param err Error structure, to be populated if an error occurs.
 * @return OpenCL zone or NULL if device wasn't properly initialized.
 */
CLUZone* clu_zone_new(cl_uint deviceType, cl_uint numQueues, cl_int queueProperties, clu_device_selector devSel, void* dsExtraArg, GError **err) {
	
	/* OpenCL status variable. */
	cl_int status;
	
	/* OpenCL zone to initialize and return */
	CLUZone* zone;
	
	/* Information about devices */
	CLUDeviceInfo devInfos[CLU_MAX_DEVICES_TOTAL];

	/* Number of devices. */
	cl_uint numDevices;

	/* Index of device information */
	cl_int deviceInfoIndex;

	/* Context properties, */
	cl_context_properties cps[3] = {CL_CONTEXT_PLATFORM, 0, 0};

	/* List of platform Ids. */
	cl_platform_id platfIds[CLU_MAX_PLATFORMS];

	/* Number of platforms. */
	cl_uint numPlatforms;

	/* Total number of devices. */
	unsigned int totalNumDevices;

	/* Device IDs for a given platform. */
	cl_device_id devIds[CLU_MAX_DEVICES_PER_PLATFORM];
	
	/* Initialize zone */
	zone = (CLUZone*) malloc(sizeof(CLUZone));
	gef_if_error_create_goto(
		*err, 
		CLU_UTILS_ERROR, 
		NULL == zone, 
		CLU_ERROR_NOALLOC, 
		error_handler, 
		"Unable to allocate memory for OpenCL zone"
	);
	zone->context = NULL;
	zone->queues = NULL;
	zone->program = NULL;
	zone->device_info.device_id = NULL;
	zone->device_info.platform_id = NULL;
	zone->device_info.device_name[0] = '\0';
	zone->device_info.device_vendor[0] = '\0';
	zone->device_info.platform_name[0] = '\0';
		
	/* Get number of platforms */
	status = clGetPlatformIDs(0, NULL, &numPlatforms);
	gef_if_error_create_goto(
		*err, 
		CLU_UTILS_ERROR, 
		CL_SUCCESS != status, 
		CLU_OCL_ERROR, 
		error_handler, 
		"clu_zone_new: get number of platforms (OpenCL error %d: %s).",
		status,
		clerror_get(status));

	/* Get existing platforms */
	status = clGetPlatformIDs(numPlatforms, platfIds, NULL);
	gef_if_error_create_goto(
		*err, 
		CLU_UTILS_ERROR, 
		CL_SUCCESS != status, 
		CLU_OCL_ERROR, 
		error_handler, 
		"clu_zone_new: get platform Ids (OpenCL error %d: %s).", 
		status,
		clerror_get(status));

	/* Cycle through platforms, get specified devices in existing platforms */
	totalNumDevices = 0;
	for(unsigned int i = 0; i < numPlatforms; i++) 	{
		/* Get specified devices for current platform */
		status = clGetDeviceIDs(
			platfIds[i], 
			deviceType, 
			CLU_MAX_DEVICES_PER_PLATFORM, 
			devIds, 
			&numDevices);
		if (status != CL_DEVICE_NOT_FOUND) {
			/* At least one device found, lets take note */
			gef_if_error_create_goto(
				*err, 
				CLU_UTILS_ERROR, 
				CL_SUCCESS != status, 
				CLU_OCL_ERROR, 
				error_handler, 
				"clu_zone_new: get device Ids (OpenCL error %d: %s).", 
				status,
				clerror_get(status));
			for (unsigned int j = 0; j < numDevices; j++) {
				/* Keep device and platform IDs. */
				devInfos[totalNumDevices].device_id = devIds[j];
				devInfos[totalNumDevices].platform_id = platfIds[i];
				/* Get device name. */
				status = clGetDeviceInfo(
					devIds[j], 
					CL_DEVICE_NAME, 
					sizeof(devInfos[totalNumDevices].device_name), 
					devInfos[totalNumDevices].device_name, 
					NULL);
				gef_if_error_create_goto(
					*err, 
					CLU_UTILS_ERROR, 
					CL_SUCCESS != status, 
					CLU_OCL_ERROR, 
					error_handler, 
					"clu_zone_new: get device name info (OpenCL error %d: %s).",
					status,
					clerror_get(status));
				/* Get device vendor. */
				status = clGetDeviceInfo(
					devIds[j], 
					CL_DEVICE_VENDOR, 
					sizeof(devInfos[totalNumDevices].device_vendor), 
					devInfos[totalNumDevices].device_vendor, 
					NULL);
				gef_if_error_create_goto(
					*err, 
					CLU_UTILS_ERROR, 
					CL_SUCCESS != status, 
					CLU_OCL_ERROR, 
					error_handler, 
					"clu_zone_new: get device vendor info (OpenCL error %d: %s).", 
					status,
					clerror_get(status));
				/* Get platform name. */
				status = clGetPlatformInfo(
					platfIds[i],
					CL_PLATFORM_VENDOR,
					sizeof(devInfos[totalNumDevices].platform_name),
					devInfos[totalNumDevices].platform_name,
					NULL);
				gef_if_error_create_goto(
					*err, 
					CLU_UTILS_ERROR, 
					CL_SUCCESS != status, 
					CLU_OCL_ERROR, 
					error_handler, 
					"clu_zone_new: get platform info (OpenCL error %d: %s).",
					status,
					clerror_get(status));
				/* Increment total number of found devices. */
				totalNumDevices++;
			}
		}
	}
	
	/* Check whether any devices of the specified type were found */
	if (totalNumDevices == 0) {
		/* No devices of the specified type where found, return with error. */
		gef_if_error_create_goto(
			*err, 
			CLU_UTILS_ERROR, 
			1, 
			CLU_ERROR_DEVICE_NOT_FOUND, 
			error_handler, 
			"clu_zone_new: device not found.");
	} else {
		/* Several compatible devices found, choose one with given selector function. */
		deviceInfoIndex = devSel(devInfos, totalNumDevices, dsExtraArg);
		/* Test return value of selector function (if it is out of range, 
		 * there is a programming error). */
		g_assert_cmpint(deviceInfoIndex, >=, -1);
		g_assert_cmpint(deviceInfoIndex, <, totalNumDevices);
		/* If selector function returned -1, then no device is selectable. */
		if (deviceInfoIndex == -1) {
			gef_if_error_create_goto(
				*err, 
				CLU_UTILS_ERROR, 
				1, 
				CLU_ERROR_DEVICE_NOT_FOUND, 
				error_handler, 
				"clu_zone_new: specified device not found.");
		}
	}

	/* Store info about the selected device and platform. */
	zone->device_type = deviceType;
	zone->device_info = devInfos[deviceInfoIndex];

	/* Determine number of compute units for that device */
	status = clGetDeviceInfo(
		zone->device_info.device_id, 
		CL_DEVICE_MAX_COMPUTE_UNITS, 
		sizeof(cl_uint), 
		&zone->cu, 
		NULL);
	gef_if_error_create_goto(
		*err, 
		CLU_UTILS_ERROR, 
		CL_SUCCESS != status, 
		CLU_OCL_ERROR, 
		error_handler, 
		"clu_zone_new: get target device info (OpenCL error %d: %s).", 
		status,
		clerror_get(status));
	
	/* Create a context on that device. */
	cps[1] = (cl_context_properties) devInfos[deviceInfoIndex].platform_id;
	zone->context = clCreateContext(cps, 1, &zone->device_info.device_id, NULL, NULL, &status);
	gef_if_error_create_goto(
		*err, 
		CLU_UTILS_ERROR, 
		CL_SUCCESS != status, 
		CLU_OCL_ERROR, 
		error_handler, 
		"clu_zone_new: creating context (OpenCL error %d: %s).", 
		status,
		clerror_get(status));
	
	/* Create the specified command queues on that device */
	zone->numQueues = numQueues;
	zone->queues = (cl_command_queue*) malloc(numQueues * sizeof(cl_command_queue));
	gef_if_error_create_goto(
		*err, 
		CLU_UTILS_ERROR, 
		NULL == zone->queues, 
		CLU_ERROR_NOALLOC, 
		error_handler, 
		"Unable to allocate memory to keep OpenCL command queues in Zone."
	);
	
	for (unsigned int i = 0; i < numQueues; i++) {
		zone->queues[i] = clCreateCommandQueue(
			zone->context, 
			zone->device_info.device_id, 
			queueProperties, 
			&status);
		gef_if_error_create_goto(
			*err, 
			CLU_UTILS_ERROR, 
			CL_SUCCESS != status, 
			CLU_OCL_ERROR, 
			error_handler, 
			"clu_zone_new: creating command queue (OpenCL error %d: %s).", 
			status,
			clerror_get(status));
	}

	/* If we got here, everything is OK. */
	g_assert (err == NULL || *err == NULL);
	goto finish;
	
error_handler:
	/* If we got here there was an error, verify that it is so. */
	g_assert (err == NULL || *err != NULL);
	/* Free OpenCL zone. */
	if (zone != NULL) {
		clu_zone_free(zone);
		zone = NULL;
	}

finish:	

	/* Return. */
	return zone;

}
示例#24
0
int clDevicesNum(void) {
	cl_int status;
	char pbuff[256];
	cl_uint numDevices;
	cl_uint numPlatforms;
	int most_devices = -1;
	cl_platform_id *platforms;
	cl_platform_id platform = NULL;
	unsigned int i, mdplatform = 0;

	status = clGetPlatformIDs(0, NULL, &numPlatforms);
	/* If this fails, assume no GPUs. */
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: clGetPlatformsIDs failed (no OpenCL SDK installed?)", status);
		return -1;
	}

	if (numPlatforms == 0) {
		applog(LOG_ERR, "clGetPlatformsIDs returned no platforms (no OpenCL SDK installed?)");
		return -1;
	}

	platforms = (cl_platform_id *)alloca(numPlatforms*sizeof(cl_platform_id));
	status = clGetPlatformIDs(numPlatforms, platforms, NULL);
	if (status != CL_SUCCESS) {
		applog(LOG_ERR, "Error %d: Getting Platform Ids. (clGetPlatformsIDs)", status);
		return -1;
	}

	for (i = 0; i < numPlatforms; i++) {
		if (opt_platform_id >= 0 && (int)i != opt_platform_id)
			continue;

		status = clGetPlatformInfo( platforms[i], CL_PLATFORM_VENDOR, sizeof(pbuff), pbuff, NULL);
		if (status != CL_SUCCESS) {
			applog(LOG_ERR, "Error %d: Getting Platform Info. (clGetPlatformInfo)", status);
			return -1;
		}
		platform = platforms[i];
		applog(LOG_INFO, "CL Platform %d vendor: %s", i, pbuff);
		status = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(pbuff), pbuff, NULL);
		if (status == CL_SUCCESS)
			applog(LOG_INFO, "CL Platform %d name: %s", i, pbuff);
		status = clGetPlatformInfo(platform, CL_PLATFORM_VERSION, sizeof(pbuff), pbuff, NULL);
		if (status == CL_SUCCESS)
			applog(LOG_INFO, "CL Platform %d version: %s", i, pbuff);
		status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &numDevices);
		if (status != CL_SUCCESS) {
			applog(LOG_INFO, "Error %d: Getting Device IDs (num)", status);
			continue;
		}
		applog(LOG_INFO, "Platform %d devices: %d", i, numDevices);
		if ((int)numDevices > most_devices) {
			most_devices = numDevices;
			mdplatform = i;
		}
		if (numDevices) {
			unsigned int j;
			cl_device_id *devices = (cl_device_id *)malloc(numDevices*sizeof(cl_device_id));

			clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, numDevices, devices, NULL);
			for (j = 0; j < numDevices; j++) {
				clGetDeviceInfo(devices[j], CL_DEVICE_NAME, sizeof(pbuff), pbuff, NULL);
				applog(LOG_INFO, "\t%i\t%s", j, pbuff);
			}
			free(devices);
		}
	}

	if (opt_platform_id < 0)
		opt_platform_id = mdplatform;;

	return most_devices;
}
示例#25
0
int main( void )
{
    cl_int err;
    cl_platform_id platform = 0;
    cl_device_id device = 0;
    cl_context_properties props[3] = { CL_CONTEXT_PLATFORM, 0, 0 };
    cl_context ctx = 0;
    cl_command_queue queue = 0;
    cl_mem bufX;
    float *X;
    cl_event event = NULL;
    int ret = 0;

    const size_t N0 = 4, N1 = 4, N2 = 4;
    char platform_name[128];
    char device_name[128];

    /* FFT library realted declarations */
    clfftPlanHandle planHandle;
    clfftDim dim = CLFFT_3D;
    size_t clLengths[3] = {N0, N1, N2};

    /* Setup OpenCL environment. */
    err = clGetPlatformIDs( 1, &platform, NULL );

    size_t ret_param_size = 0;
    err = clGetPlatformInfo(platform, CL_PLATFORM_NAME,
            sizeof(platform_name), platform_name,
            &ret_param_size);
    printf("Platform found: %s\n", platform_name);

    err = clGetDeviceIDs( platform, CL_DEVICE_TYPE_DEFAULT, 1, &device, NULL );

    err = clGetDeviceInfo(device, CL_DEVICE_NAME,
            sizeof(device_name), device_name,
            &ret_param_size);
    printf("Device found on the above platform: %s\n", device_name);

    props[1] = (cl_context_properties)platform;
    ctx = clCreateContext( props, 1, &device, NULL, NULL, &err );
    queue = clCreateCommandQueue( ctx, device, 0, &err );

    /* Setup clFFT. */
    clfftSetupData fftSetup;
    err = clfftInitSetupData(&fftSetup);
    err = clfftSetup(&fftSetup);

    /* Allocate host & initialize data. */
    /* Only allocation shown for simplicity. */
    size_t buffer_size  = N0 * N1 * N2 * 2 * sizeof(*X);
    X = (float *)malloc(buffer_size);

    /* print input array just using the
     * indices to fill the array with data */
    printf("\nPerforming fft on an three dimensional array of size N0 x N1 x N2 : %ld x %ld x %ld\n", N0, N1, N2);
    int i, j, k;
    i = j = k = 0;
    for (i=0; i<N0; ++i) {
        for (j=0; j<N1; ++j) {
            for (k=0; k<N2; ++k) {
                float x = 0.0f;
                float y = 0.0f;
                if (i==0 && j==0 && k==0) {
                    x = y = 0.5f;
                }
                unsigned idx = 2*(k+j*N1+i*N0*N1);
                X[idx] = x;
                X[idx+1] = y;
                printf("(%f, %f) ", X[idx], X[idx+1]);
            }
            printf("\n");
        }
        printf("\n");
    }

    /* Prepare OpenCL memory objects and place data inside them. */
    bufX = clCreateBuffer( ctx, CL_MEM_READ_WRITE, buffer_size, NULL, &err );

    err = clEnqueueWriteBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );

    /* Create a default plan for a complex FFT. */
    err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);

    /* Set plan parameters. */
    err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
    err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
    err = clfftSetResultLocation(planHandle, CLFFT_INPLACE);

    /* Bake the plan. */
    err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);

    /* Execute the plan. */
    err = clfftEnqueueTransform(planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &bufX, NULL, NULL);

    /* Wait for calculations to be finished. */
    err = clFinish(queue);

    /* Fetch results of calculations. */
    err = clEnqueueReadBuffer( queue, bufX, CL_TRUE, 0, buffer_size, X, 0, NULL, NULL );

    /* print output array */
    printf("\n\nfft result: \n");
    i = j = k = 0;
    for (i=0; i<N0; ++i) {
        for (j=0; j<N1; ++j) {
            for (k=0; k<N2; ++k) {
                unsigned idx = 2*(k+j*N1+i*N0*N1);
                printf("(%f, %f) ", X[idx], X[idx+1]);
            }
            printf("\n");
        }
        printf("\n");
    }
    printf("\n");

    /* Release OpenCL memory objects. */
    clReleaseMemObject( bufX );

    free(X);

    /* Release the plan. */
    err = clfftDestroyPlan( &planHandle );

    /* Release clFFT library. */
    clfftTeardown( );

    /* Release OpenCL working objects. */
    clReleaseCommandQueue( queue );
    clReleaseContext( ctx );

    return ret;
}
示例#26
0
void BurstSort::parallelSort(std::ofstream& file){
	char* buffer = NULL;
	char* tmp;
	int* posArray = NULL;
	int entryLength = KEY_LENGTH + sizeof(char*);
	buffer = (char*) malloc(sizeof(char) * size * entryLength);
	posArray = (int*) malloc(sizeof(int) * (NODE_SIZE + 1));
	int pos = 0;
	posArray[0] = 0;
	for(int i = 0; i < NODE_SIZE; i++){
		for(int j = 0; j < nodes[i].used; j++){
			memcpy(buffer + pos * entryLength, nodes[i].entries[j], KEY_LENGTH * sizeof(char));
			memcpy(buffer + pos * entryLength + KEY_LENGTH, &nodes[i].entries[j], sizeof(char*));
			pos += sizeof(char);
		}
		posArray[i+1] = pos;
	}

	// OpenCL
	// Use this to check the output of each API call
    cl_int status;  
	cl_int numDevices = 1;
	
	// Connect to first platform
    cl_platform_id platform;
    status = clGetPlatformIDs(1, &platform, NULL);

	if (status != CL_SUCCESS) {
		printf("Error: Failed to find an OpenCL platform!\n");
		return -1;
	}
 
	char cBuffer[1024];
	clGetPlatformInfo(platform, CL_PLATFORM_VENDOR, sizeof(cBuffer), cBuffer, NULL);
	printf("CL_PLATFORM_VENDOR %s\n", cBuffer);

	clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(cBuffer), cBuffer, NULL);
	printf("CL_PLATFORM_NAME %s\n", cBuffer);

    cl_device_id device;
	status = clGetDeviceIDs(platform, CL_DEVICE_TYPE_ACCELERATOR, 1, &device, NULL);

	if (status != CL_SUCCESS) {
		printf("Error: Failed to create a device group!\n");
		return -1;
	}

	cl_long maxBufferSize = 0;
	status = clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_long), &maxBufferSize, NULL);
	printf("max buffer size: %lld\n", maxBufferSize);

    // Create a context and associate it with the devices
    cl_context context;
    context = clCreateContext(NULL, numDevices, &device, NULL, NULL, &status);
	

	if (status != CL_SUCCESS) {
		printf("Error in creating context, code %d\n", status);
		return -1;
	}
    // Create a command queue and associate it with the device 
    cl_command_queue cmdQueue;
    cmdQueue = clCreateCommandQueue(context, device, 0, &status);

	if (status != CL_SUCCESS) {
		printf("Error in creating command queue for a device, code %d\n", status);
		return -1;
	}

	// Load binary from disk
	unsigned char *kernelbinary;
	char *xclbin = "sort_xiaohui.xclbin";
	printf("loading %s\n", xclbin);
	int n_i = load_file_to_memory(xclbin, (char **) &kernelbinary);
	if (n_i < 0) {
		printf("ERROR: failed to load kernel from xclbin: %s\n", xclbin);
		return -1;
	}
	size_t n_bit = n_i;

	// Create the compute program from offline
	cl_program program = clCreateProgramWithBinary(context, 1, &device, &n_bit,
			(const unsigned char **) &kernelbinary, NULL, &status);
	if ((!program) || (status != CL_SUCCESS)) {
		printf("Error: Failed to create compute program from binary %d!\n", status);
		return -1;
	}

	// Build the program executable
	status = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

	if (status != CL_SUCCESS) {
		size_t len;
		char buffer[2048];

		printf("Error: Failed to build program executable!\n");
		clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
		printf("%s\n", buffer);
		return -1;
	}

	// Create the vector addition kernel
    cl_kernel kernel;
    kernel = clCreateKernel(program, "sort", &status);


	cl_mem clPosArray;
	cl_mem clBuffer;
	clBuffer = clCreateBuffer(context, CL_MEM_READ_WRITE, 
		sizeof(char) * size * entryLength, NULL, &status);

	clPosArray = clCreateBuffer(context, CL_MEM_READ_ONLY, 
		sizeof(int) * (NODE_SIZE + 1), NULL, &status);
	
	status = clEnqueueWriteBuffer(cmdQueue, clPosArray, CL_FALSE, 
		0, sizeof(int) * (NODE_SIZE + 1),posArray, 0, NULL, NULL);

	status = clEnqueueWriteBuffer(cmdQueue, clBuffer, CL_FALSE, 
		0, sizeof(char) * size * entryLength, buffer, 0, NULL, NULL);


    // Associate the input and output buffers with the kernel 
	status = clSetKernelArg(kernel, 0, sizeof(cl_mem), &clBuffer);

	status = clSetKernelArg(kernel, 1, sizeof(cl_mem), &clPosArray);

	int nodeSize = NODE_SIZE;
	status = clSetKernelArg(kernel, 2, sizeof(int), (void *)&nodeSize);

	status = clSetKernelArg(kernel, 3, sizeof(int), (void *)&entryLength);

	size_t globalWorkSize[1];   

	globalWorkSize[0] = NODE_SIZE;

    gettimeofday(&t1, NULL);
	// Execute the kernel for execution
    status = clEnqueueNDRangeKernel(cmdQueue, kernel, 1, NULL, globalWorkSize, NULL, 0, NULL, NULL);

	if (status != CL_SUCCESS) {
		printf("Error in clEnqueue, code %d\n", status);
		return -1;
	}


    // Read the device output buffer to the host output array
	clEnqueueReadBuffer(cmdQueue, clBuffer, CL_TRUE, 0, 
		sizeof(char) * size * entryLength, buffer, 0, NULL, NULL);

    // Free OpenCL resources
	clReleaseKernel(kernel);
	clReleaseProgram(program);
	clReleaseCommandQueue(cmdQueue);
	clReleaseMemObject(clBuffer);
	clReleaseMemObject(clPosArray);
	clReleaseContext(context);

    //print result
	for(int i = 0; i < size; i+= sizeof(char)){
		memcpy(&tmp,buffer + i * entryLength + KEY_LENGTH,sizeof(char*));
		file << tmp;
	}

    // Free host resources
	free(buffer);
	free(posArray);

	free(platforms);
	free(devices);

}
示例#27
0
int initializeCL(void) {
  cl_int status = 0;
  size_t deviceListSize;

  /*
   * Have a look at the available platforms and pick either
   * the AMD one if available or a reasonable default.
   */

  cl_uint numPlatforms;
  cl_platform_id platform = NULL;
  status = clGetPlatformIDs(0, NULL, &numPlatforms);
  if (status != CL_SUCCESS) {
    std::cout << "Error: Getting Platforms. (clGetPlatformsIDs)\n";
    return 1;
  }
  
  if(numPlatforms > 0) {
    cl_platform_id* platforms = new cl_platform_id[numPlatforms];
    status = clGetPlatformIDs(numPlatforms, platforms, NULL);
    if (status != CL_SUCCESS) {
      std::cout << "Error: Getting Platform Ids. (clGetPlatformsIDs)\n";
      return 1;
    }
    for (unsigned int i=0; i < numPlatforms; ++i) {
      char pbuff[100];
      status = clGetPlatformInfo(platforms[i], CL_PLATFORM_VENDOR,
          sizeof(pbuff), pbuff, NULL);
      if (status != CL_SUCCESS) {
        std::cout << "Error: Getting Platform Info.(clGetPlatformInfo)\n";
        return 1;
      }
      platform = platforms[i];
      if (!strcmp(pbuff, "NVIDIA Corporation")) {
         break;
      }
    }
    delete platforms;
  }

  if (NULL == platform) {
    std::cout << "NULL platform found so Exiting Application." << std::endl;
    return 1;
  }

  /*
   * If we could find our platform, use it. Otherwise use just available platform.
   */
  cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties)platform, 0 };

  /////////////////////////////////////////////////////////////////
  // Create an OpenCL context
  /////////////////////////////////////////////////////////////////
  context = clCreateContextFromType(cps, CL_DEVICE_TYPE_GPU, NULL, NULL,
      &status);
  if(status != CL_SUCCESS) {  
    std::cout << "Error: Creating Context. (clCreateContextFromType)\n";
    return 1; 
  }

  /* First, get the size of device list data */
  status = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
      &deviceListSize);
  if (status != CL_SUCCESS) {  
    std::cout << "Error: Getting Context Info \
        (device list size, clGetContextInfo)\n";
    return 1;
  }
示例#28
0
/**
* Selects CL platform/device capable of CL/GL interop.
*/
void cl_select(cl_platform_id* platform_id, cl_device_id* device_id) {
    cl_int err;
    int i;
    char* info;
    size_t infoSize;
    cl_uint platformCount;
    cl_platform_id *platforms;

    // get platform count
    err = clGetPlatformIDs(5, NULL, &platformCount);
    CHECK_ERR(err);

    // get all platforms
    platforms = (cl_platform_id*) malloc(sizeof(cl_platform_id) * platformCount);
    err = clGetPlatformIDs(platformCount, platforms, NULL);
    CHECK_ERR(err);

    // for each platform print all attributes
    for (i = 0; i < platformCount; i++) {

        printf("%d. Checking Platform \n", i+1);

        // get platform attribute value size
        err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, 0, NULL, &infoSize);
        CHECK_ERR(err);
        info = (char*) malloc(infoSize);

        // get platform attribute value
        err = clGetPlatformInfo(platforms[i], CL_PLATFORM_EXTENSIONS, infoSize, info, NULL);
        CHECK_ERR(err);

        if(strstr(info, GL_SHARING_EXTENSION) != NULL) {
            cl_uint num_devices;
            cl_device_id* devices;

            // Get the number of GPU devices available to the platform
            err = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
            CHECK_ERR(err);

            // Create the device list
            devices = new cl_device_id [num_devices];
            err  = clGetDeviceIDs(platforms[i], CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
            CHECK_ERR(err);

            int d;
            for(d = 0; d < num_devices; d++) {

                // get device attribute value size
                size_t extensionSize;
                err = clGetDeviceInfo(devices[d], CL_DEVICE_EXTENSIONS, 0, NULL, &extensionSize );
                CHECK_ERR(err);

                if(extensionSize > 0) {
                    char* extensions = (char*)malloc(extensionSize);
                    err = clGetDeviceInfo(devices[d], CL_DEVICE_EXTENSIONS, extensionSize, extensions, &extensionSize);
                    CHECK_ERR(err);

                    if(strstr(info, GL_SHARING_EXTENSION) != NULL) {
                        printf("Found Compatible platform %d and device %d out of %d .\n", i, d, num_devices);
                        *platform_id = platforms[i];
                        *device_id = devices[d];

                        // TODO remove. currently a toggle for intel/nvidia platform
                        // break;
                    }

                    free(extensions);
                }

            }
        }

        free(info);
        printf("\n");

    }

    free(platforms);
}
int main(int argc, char const *argv[])
{
        /* Get platform */
        cl_platform_id platform;
        cl_uint num_platforms;
        cl_int ret = clGetPlatformIDs(1, &platform, &num_platforms);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetPlatformIDs' failed\n");
                exit(1);
        }
        
        printf("Number of platforms: %d\n", num_platforms);
        printf("platform=%p\n", platform);
        
        /* Get platform name */
        char platform_name[100];
        ret = clGetPlatformInfo(platform, CL_PLATFORM_NAME, sizeof(platform_name), platform_name, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetPlatformInfo' failed\n");
                exit(1);
        }
        
        printf("platform.name='%s'\n\n", platform_name);
        
        /* Get device */
        cl_device_id device;
        cl_uint num_devices;
        ret = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, &num_devices);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetDeviceIDs' failed\n");
                exit(1);
        }
        
        printf("Number of devices: %d\n", num_devices);
        printf("device=%p\n", device);
        
        /* Get device name */
        char device_name[100];
        ret = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(device_name),
        device_name, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clGetDeviceInfo' failed\n");
                exit(1);
        }
        
        printf("device.name='%s'\n", device_name);
        printf("\n");
        
        /* Create a Context Object */
        cl_context context;
        context = clCreateContext(NULL, 1, &device, NULL, NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateContext' failed\n");
                exit(1);
        }
        
        printf("context=%p\n", context);
        
        /* Create a Command Queue Object*/
        cl_command_queue command_queue;
        command_queue = clCreateCommandQueue(context, device, 0, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateCommandQueue' failed\n");
                exit(1);
        }
        
        printf("command_queue=%p\n", command_queue);
        printf("\n");

        /* Program source */
        unsigned char *source_code;
        size_t source_length;

        /* Read program from 'native_recip_float2.cl' */
        source_code = read_buffer("native_recip_float2.cl", &source_length);

        /* Create a program */
        cl_program program;
        program = clCreateProgramWithSource(context, 1, (const char **)&source_code, &source_length, &ret);

        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateProgramWithSource' failed\n");
                exit(1);
        }
        printf("program=%p\n", program);

        /* Build program */
        ret = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
        if (ret != CL_SUCCESS )
        {
                size_t size;
                char *log;

                /* Get log size */
                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,0, NULL, &size);

                /* Allocate log and print */
                log = malloc(size);
                clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG,size, log, NULL);
                printf("error: call to 'clBuildProgram' failed:\n%s\n", log);
                
                /* Free log and exit */
                free(log);
                exit(1);
        }

        printf("program built\n");
        printf("\n");
        
        /* Create a Kernel Object */
        cl_kernel kernel;
        kernel = clCreateKernel(program, "native_recip_float2", &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clCreateKernel' failed\n");
                exit(1);
        }
        
        /* Create and allocate host buffers */
        size_t num_elem = 10;
        
        /* Create and init host side src buffer 0 */
        cl_float2 *src_0_host_buffer;
        src_0_host_buffer = malloc(num_elem * sizeof(cl_float2));
        for (int i = 0; i < num_elem; i++)
                src_0_host_buffer[i] = (cl_float2){{2.0, 2.0}};
        
        /* Create and init device side src buffer 0 */
        cl_mem src_0_device_buffer;
        src_0_device_buffer = clCreateBuffer(context, CL_MEM_READ_ONLY, num_elem * sizeof(cl_float2), NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: could not create source buffer\n");
                exit(1);
        }        
        ret = clEnqueueWriteBuffer(command_queue, src_0_device_buffer, CL_TRUE, 0, num_elem * sizeof(cl_float2), src_0_host_buffer, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueWriteBuffer' failed\n");
                exit(1);
        }

        /* Create host dst buffer */
        cl_float2 *dst_host_buffer;
        dst_host_buffer = malloc(num_elem * sizeof(cl_float2));
        memset((void *)dst_host_buffer, 1, num_elem * sizeof(cl_float2));

        /* Create device dst buffer */
        cl_mem dst_device_buffer;
        dst_device_buffer = clCreateBuffer(context, CL_MEM_WRITE_ONLY, num_elem *sizeof(cl_float2), NULL, &ret);
        if (ret != CL_SUCCESS)
        {
                printf("error: could not create dst buffer\n");
                exit(1);
        }
        
        /* Set kernel arguments */
        ret = CL_SUCCESS;
        ret |= clSetKernelArg(kernel, 0, sizeof(cl_mem), &src_0_device_buffer);
        ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), &dst_device_buffer);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clSetKernelArg' failed\n");
                exit(1);
        }

        /* Launch the kernel */
        size_t global_work_size = num_elem;
        size_t local_work_size = num_elem;
        ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, &local_work_size, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueNDRangeKernel' failed\n");
                exit(1);
        }

        /* Wait for it to finish */
        clFinish(command_queue);

        /* Read results from GPU */
        ret = clEnqueueReadBuffer(command_queue, dst_device_buffer, CL_TRUE,0, num_elem * sizeof(cl_float2), dst_host_buffer, 0, NULL, NULL);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clEnqueueReadBuffer' failed\n");
                exit(1);
        }

        /* Dump dst buffer to file */
        char dump_file[100];
        sprintf((char *)&dump_file, "%s.result", argv[0]);
        write_buffer(dump_file, (const char *)dst_host_buffer, num_elem * sizeof(cl_float2));
        printf("Result dumped to %s\n", dump_file);
        /* Free host dst buffer */
        free(dst_host_buffer);

        /* Free device dst buffer */
        ret = clReleaseMemObject(dst_device_buffer);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseMemObject' failed\n");
                exit(1);
        }
        
        /* Free host side src buffer 0 */
        free(src_0_host_buffer);

        /* Free device side src buffer 0 */
        ret = clReleaseMemObject(src_0_device_buffer);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseMemObject' failed\n");
                exit(1);
        }

        /* Release kernel */
        ret = clReleaseKernel(kernel);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseKernel' failed\n");
                exit(1);
        }

        /* Release program */
        ret = clReleaseProgram(program);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseProgram' failed\n");
                exit(1);
        }
        
        /* Release command queue */
        ret = clReleaseCommandQueue(command_queue);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseCommandQueue' failed\n");
                exit(1);
        }
        
        /* Release context */
        ret = clReleaseContext(context);
        if (ret != CL_SUCCESS)
        {
                printf("error: call to 'clReleaseContext' failed\n");
                exit(1);
        }
                
        return 0;
}
int 
MemoryOptimizations::genBinaryImage()
{
    cl_int status = CL_SUCCESS;

    /*
     * Have a look at the available platforms and pick either
     * the AMD one if available or a reasonable default.
     */
    cl_uint numPlatforms;
    cl_platform_id platform = NULL;
    status = clGetPlatformIDs(0, NULL, &numPlatforms);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetPlatformIDs failed."))
    {
        return SDK_FAILURE;
    }
    if (0 < numPlatforms) 
    {
        cl_platform_id* platforms = new cl_platform_id[numPlatforms];
        status = clGetPlatformIDs(numPlatforms, platforms, NULL);
        if(!sampleCommon->checkVal(status,
                                   CL_SUCCESS,
                                   "clGetPlatformIDs failed."))
        {
            return SDK_FAILURE;
        }

        char platformName[100];
        for (unsigned i = 0; i < numPlatforms; ++i) 
        {
            status = clGetPlatformInfo(platforms[i],
                                       CL_PLATFORM_VENDOR,
                                       sizeof(platformName),
                                       platformName,
                                       NULL);

            if(!sampleCommon->checkVal(status,
                                       CL_SUCCESS,
                                       "clGetPlatformInfo failed."))
            {
                return SDK_FAILURE;
            }

            platform = platforms[i];
            if (!strcmp(platformName, "Advanced Micro Devices, Inc.")) 
            {
                break;
            }
        }
        std::cout << "Platform found : " << platformName << "\n";
        delete[] platforms;
    }

    if(NULL == platform)
    {
        sampleCommon->error("NULL platform found so Exiting Application.");
        return SDK_FAILURE;
    }

    /*
     * If we could find our platform, use it. Otherwise use just available platform.
     */
    cl_context_properties cps[5] = 
    {
        CL_CONTEXT_PLATFORM, 
        (cl_context_properties)platform, 
        CL_CONTEXT_OFFLINE_DEVICES_AMD,
        (cl_context_properties)1,
        0
    };

    context = clCreateContextFromType(cps,
                                      CL_DEVICE_TYPE_ALL,
                                      NULL,
                                      NULL,
                                      &status);

    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clCreateContextFromType failed."))
    {
        return SDK_FAILURE;
    }

    /* create a CL program using the kernel source */
    streamsdk::SDKFile kernelFile;
    std::string kernelPath = sampleCommon->getPath();
    kernelPath.append("MemoryOptimizations_Kernels.cl");
    if(!kernelFile.open(kernelPath.c_str()))
    {
        std::cout << "Failed to load kernel file : " << kernelPath << std::endl;
        return SDK_FAILURE;
    }
    const char * source = kernelFile.source().c_str();
    size_t sourceSize[] = {strlen(source)};
    program = clCreateProgramWithSource(context,
                                        1,
                                        &source,
                                        sourceSize,
                                        &status);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clCreateProgramWithSource failed."))
    {
        return SDK_FAILURE;
    }
    
    /* create a cl program executable for all the devices specified */
    status = clBuildProgram(program,
                            0,
                            NULL,
                            NULL,
                            NULL,
                            NULL);

    size_t numDevices;
    status = clGetProgramInfo(program, 
                           CL_PROGRAM_NUM_DEVICES,
                           sizeof(numDevices),
                           &numDevices,
                           NULL );
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_NUM_DEVICES) failed."))
    {
        return SDK_FAILURE;
    }

    std::cout << "Number of devices found : " << numDevices << "\n\n";
    devices = (cl_device_id *)malloc( sizeof(cl_device_id) * numDevices );
    if(devices == NULL)
    {
        sampleCommon->error("Failed to allocate host memory.(devices)");
        return SDK_FAILURE;
    }
    /* grab the handles to all of the devices in the program. */
    status = clGetProgramInfo(program, 
                              CL_PROGRAM_DEVICES, 
                              sizeof(cl_device_id) * numDevices,
                              devices,
                              NULL );
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_DEVICES) failed."))
    {
        return SDK_FAILURE;
    }


    /* figure out the sizes of each of the binaries. */
    size_t *binarySizes = (size_t*)malloc( sizeof(size_t) * numDevices );
    if(devices == NULL)
    {
        sampleCommon->error("Failed to allocate host memory.(binarySizes)");
        return SDK_FAILURE;
    }
    
    status = clGetProgramInfo(program, 
                              CL_PROGRAM_BINARY_SIZES,
                              sizeof(size_t) * numDevices, 
                              binarySizes, NULL);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_BINARY_SIZES) failed."))
    {
        return SDK_FAILURE;
    }

    size_t i = 0;
    /* copy over all of the generated binaries. */
    char **binaries = (char **)malloc( sizeof(char *) * numDevices );
    if(binaries == NULL)
    {
        sampleCommon->error("Failed to allocate host memory.(binaries)");
        return SDK_FAILURE;
    }

    for(i = 0; i < numDevices; i++)
    {
        if(binarySizes[i] != 0)
        {
            binaries[i] = (char *)malloc( sizeof(char) * binarySizes[i]);
            if(binaries[i] == NULL)
            {
                sampleCommon->error("Failed to allocate host memory.(binaries[i])");
                return SDK_FAILURE;
            }
        }
        else
        {
            binaries[i] = NULL;
        }
    }
    status = clGetProgramInfo(program, 
                              CL_PROGRAM_BINARIES,
                              sizeof(char *) * numDevices, 
                              binaries, 
                              NULL);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clGetProgramInfo(CL_PROGRAM_BINARIES) failed."))
    {
        return SDK_FAILURE;
    }

    /* dump out each binary into its own separate file. */
    for(i = 0; i < numDevices; i++)
    {
        char fileName[100];
        sprintf(fileName, "%s.%d", dumpBinary.c_str(), (int)i);
        if(binarySizes[i] != 0)
        {
            char deviceName[1024];
            status = clGetDeviceInfo(devices[i], 
                                     CL_DEVICE_NAME, 
                                     sizeof(deviceName),
                                     deviceName, 
                                     NULL);
            if(!sampleCommon->checkVal(status,
                                       CL_SUCCESS,
                                       "clGetDeviceInfo(CL_DEVICE_NAME) failed."))
            {
                return SDK_FAILURE;
            }

            printf( "%s binary kernel: %s\n", deviceName, fileName);
            streamsdk::SDKFile BinaryFile;
            if(!BinaryFile.writeBinaryToFile(fileName, 
                                             binaries[i], 
                                             binarySizes[i]))
            {
                std::cout << "Failed to load kernel file : " << fileName << std::endl;
                return SDK_FAILURE;
            }
        }
        else
        {
            printf("Skipping %s since there is no binary data to write!\n",
                    fileName);
        }
    }

    // Release all resouces and memory
    for(i = 0; i < numDevices; i++)
    {
        if(binaries[i] != NULL)
        {
            free(binaries[i]);
            binaries[i] = NULL;
        }
    }

    if(binaries != NULL)
    {
        free(binaries);
        binaries = NULL;
    }

    if(binarySizes != NULL)
    {
        free(binarySizes);
        binarySizes = NULL;
    }

    if(devices != NULL)
    {
        free(devices);
        devices = NULL;
    }

    status = clReleaseProgram(program);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clReleaseProgram failed."))
    {
        return SDK_FAILURE;
    }

    status = clReleaseContext(context);
    if(!sampleCommon->checkVal(status,
                               CL_SUCCESS,
                               "clReleaseContext failed."))
    {
        return SDK_FAILURE;
    }

    return SDK_SUCCESS;
}