Esempio n. 1
0
static void
setup_two_queues (Data *data)
{
    cl_int errcode;

    data->compute_queue = clCreateCommandQueue (ocl_get_context (data->ocl),
                                                ocl_get_devices (data->ocl)[0],
                                                0, &errcode);
    OCL_CHECK_ERROR (errcode);

    data->write_queue = clCreateCommandQueue (ocl_get_context (data->ocl),
                                              ocl_get_devices (data->ocl)[0],
                                              0, &errcode);
    OCL_CHECK_ERROR (errcode);

    data->read_queue = data->write_queue;
}
Esempio n. 2
0
int
main (void)
{
    OclPlatform *ocl;
    cl_mem mem;
    cl_program program;
    cl_kernel kernel;
    cl_int errcode;
    cl_event event;
    size_t n_elements;

    cl_command_queue *cmd_queues;

    ocl = ocl_new_with_queues (0, CL_DEVICE_TYPE_ALL, 0);

    if (ocl == NULL)
        return 1;

    program = ocl_create_program_from_file (ocl, "test.cl", NULL, &errcode);
    OCL_CHECK_ERROR (errcode);

    cmd_queues = ocl_get_cmd_queues (ocl);
    kernel = clCreateKernel (program, "fill_ones", &errcode);
    OCL_CHECK_ERROR (errcode);

    n_elements = 1024 * 1024;
    mem = clCreateBuffer (ocl_get_context (ocl), CL_MEM_READ_WRITE,
                          n_elements * sizeof (float),
                          NULL, &errcode);

    OCL_CHECK_ERROR (clSetKernelArg (kernel, 0, sizeof (cl_mem), &mem));
    OCL_CHECK_ERROR (clEnqueueNDRangeKernel (cmd_queues[0], kernel,
                                             1, NULL, &n_elements, NULL,
                                             0, NULL, &event));
                                             
    OCL_CHECK_ERROR (clWaitForEvents (1, &event));

    /* 
     * If the event is not released, nvidia-smi will report that about 60 MB are
     * not freed, although we free all other resources including the memory
     * object itself.
     */
    /* OCL_CHECK_ERROR (clReleaseEvent (event)); */

    OCL_CHECK_ERROR (errcode);
    OCL_CHECK_ERROR (clReleaseMemObject (mem));
    OCL_CHECK_ERROR (clReleaseKernel (kernel));
    OCL_CHECK_ERROR (clReleaseProgram (program));

    ocl_free (ocl);

    fflush (stdin);
    printf ("Press Enter to exit ...\n");
    getchar ();

    return 0;
}
Esempio n. 3
0
u32 getOpenCLDeviceFreq(int device)
{
  ocl_context_t *cont = ocl_get_context(device);

  if (cont)
  {
    cl_uint clockrate;
    if (clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clockrate), &clockrate, NULL) == CL_SUCCESS)
      return clockrate;
  }

  return 0;
}
Esempio n. 4
0
static void
setup_ooo_queue (Data *data)
{
    cl_int errcode;

    data->compute_queue = clCreateCommandQueue (ocl_get_context (data->ocl),
                                                ocl_get_devices (data->ocl)[0],
                                                CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, 
                                                &errcode);
    OCL_CHECK_ERROR (errcode);
    data->write_queue = data->compute_queue;
    data->read_queue = data->compute_queue;
}
Esempio n. 5
0
static Data *
setup_data (OclPlatform *ocl,
            size_t n_elements)
{
    Data *data;
    cl_int errcode;

    data = g_malloc0 (sizeof (Data));

    data->ocl = ocl;

    data->compute_queue = NULL;
    data->write_queue = NULL;
    data->read_queue = NULL;

    data->program = ocl_create_program_from_file (ocl, "test.cl", NULL, &errcode);
    OCL_CHECK_ERROR (errcode);

    data->kernel = clCreateKernel (data->program, "run_sin", &errcode);
    OCL_CHECK_ERROR (errcode);

    data->n_elements = n_elements;
    data->size = n_elements * sizeof (float);
    data->input = g_malloc0 (data->size);
    data->output = g_malloc0 (data->size);

    data->in_mem = clCreateBuffer (ocl_get_context (ocl), CL_MEM_READ_ONLY,
                                   data->size, NULL, &errcode);
    OCL_CHECK_ERROR (errcode);

    data->out_mem = clCreateBuffer (ocl_get_context (ocl), CL_MEM_WRITE_ONLY,
                                    data->size, NULL, &errcode);
    OCL_CHECK_ERROR (errcode);

    return data;
}
Esempio n. 6
0
long getOpenCLRawProcessorID(int device, const char **cpuname)
{
  static cl_char device_name[256+130];
  strcpy((char*)device_name, "Unknown");

  if (cpuname)
    *cpuname = (const char*)device_name;

  ocl_context_t *cont = ocl_get_context(device);
  if (cont)
  {
    clGetDeviceInfo(cont->deviceID, CL_DEVICE_NAME, sizeof(device_name)-130, device_name, NULL);

    //retrieve card info, if available
    u32 off = strlen((const char*)device_name);
    device_name[off++]=' '; device_name[off++]='\0';
#ifdef CL_DEVICE_BOARD_NAME_AMD
    if (clGetDeviceInfo(cont->deviceID, CL_DEVICE_BOARD_NAME_AMD, sizeof(device_name)-off, &device_name[off], NULL) == CL_SUCCESS)
    {
      device_name[off-1]='(';
      u32 off2 = strlen((const char*)device_name);
      device_name[off2] = ')';
      device_name[off2+1] = '\0';
    }
#endif
// ??? Never used
/*
    cl_uint vendor_id=0;
    clGetDeviceInfo(cont->deviceID, CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL);

    cl_uint cunits=0;
    clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cunits), &cunits, NULL);
*/
    return GetDeviceID(/* vendor_id, device_name, cunits, */ cont);
  }

  return -1;
}
Esempio n. 7
0
void OpenCLPrintExtendedGpuInfo(int device)
{
  const char *data;
  cl_int status;

  ocl_context_t *cont = ocl_get_context(device);
  if (cont == NULL)
    return;

  if (cont->firstOnPlatform)
  {
    //Print platform info once
    LogRaw("\nPlatform info:\n");
    LogRaw("--------------\n");
    cl_char str[80];
    status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_NAME, sizeof(str), (void *)str, NULL);
    if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Platform Name", str);

    status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_VENDOR, sizeof(str), (void *)str, NULL);
    if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Platform Vendor", str);

    status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_VERSION, sizeof(str), (void *)str, NULL);
    if (status == CL_SUCCESS)  LogRaw("%30s: %s\n", "Platform Version", str);

    cl_char *str2;
    size_t sz;
    status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_EXTENSIONS, 0, NULL, &sz);
    if (sz)
    {
      str2 = (cl_char*)malloc(sz+1);
      if (str2)
      {
        status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_EXTENSIONS, sz+1, (void *)str2, NULL);
        if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Platform extensions", str2);
        free(str2);
      }
    }
    /* Split platform and device info */
    LogRaw("\nDevice info:\n");
    LogRaw("--------------\n");
  }

  cl_char device_name[1024] = {0};
  cl_device_type type;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL);
  if (status == CL_SUCCESS)
  {
    if ( type & CL_DEVICE_TYPE_CPU )
      data = "CPU";
    else
    {
      if ( type & CL_DEVICE_TYPE_GPU )
        data = "GPU";
      else
        data = "UNKNOWN";
    }
    LogRaw("%30s: %s\n", "Type", data);
  }

  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_NAME, sizeof(device_name), device_name, NULL);
  if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Name",device_name);

  cl_uint clockrate;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clockrate), &clockrate, NULL);
  if (status == CL_SUCCESS) LogRaw("%30s: %u\n", "Max clockrate", clockrate);

  cl_uint cunits;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cunits), &cunits, NULL);
  if (status == CL_SUCCESS) LogRaw("%30s: %u\n", "Max compute units", cunits);

  cl_ulong gmemcache;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(gmemcache), &gmemcache, NULL);
  if (status == CL_SUCCESS) LogRaw("%30s: %" PRIu64 "\n", "Global memory cache size", gmemcache);

  cl_device_mem_cache_type ct;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(ct), &ct, NULL);
  if (status == CL_SUCCESS)
  {
    switch(ct)
    {
      case CL_NONE:
        data = "NONE";
        break;
      case CL_READ_ONLY_CACHE:
        data = "Read Only";
        break;
      case CL_READ_WRITE_CACHE:
        data = "Read/Write";
        break;
      default:
        data = "Not sure";
    }
    LogRaw("%30s: %s\n", "Global memory cache type", data);
  }

  cl_bool um;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(um), &um, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %s\n", "Unified memory subsystem", (um ? "Yes" : "No"));

  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof(um), &um, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %s\n", "Image support", (um ? "Yes" : "No"));

  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(gmemcache), &gmemcache, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %" PRIu64 "\n", "Local memory size", gmemcache);

  size_t mwgs;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(mwgs), &mwgs, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %lu\n", "Max workgroup size", (unsigned long)mwgs);

  cl_uint nvw;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof(nvw), &nvw, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %u\n", "native vector width (int)", nvw);
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, sizeof(nvw), &nvw, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %u\n", "native vector width (float)", nvw);

  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof(device_name), device_name, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %s\n", "OpenCL C version",device_name);

  size_t ptres;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(ptres), &ptres, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %lu\n", "Device timer resolution (ns)", (unsigned long)ptres);

  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_VENDOR, sizeof(device_name), device_name, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %s\n", "Device vendor",device_name);

  cl_uint vendor_id;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: 0x%x\n", "Device vendor id",vendor_id);

  status = clGetDeviceInfo(cont->deviceID, CL_DRIVER_VERSION, sizeof(device_name), device_name, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %s\n", "Driver version",device_name);

  cl_uint devbits;
  status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(devbits), &devbits, NULL);
  if (status == CL_SUCCESS)
    LogRaw("%30s: %u%s\n", "Device address bits", devbits, (devbits == sizeof(size_t) * 8 ? "" : " - NOT MATCHED -"));

  //TODO: device extensions
}
Esempio n. 8
0
int main(int argc, char* argv[])
{



		const size_t SIZE_execution_bit = (input_length - 3*filter_length +1);
		const size_t SIZE_input_bit = sizeof(gint32)*(input_length+1);
		const size_t SIZE_settings_bit = sizeof(gint32)*4;

		size_t output_bit_on_counts;
		size_t* SIZE_execution_pointer = &SIZE_execution_bit;

		gint32* filtersettings = (gint32*) malloc(SIZE_settings_bit);
		gint32* input_vector = (gint32*) malloc(SIZE_input_bit);
		gint32* positions = (gint32*) malloc(SIZE_input_bit);

		filtersettings[0] = filter_length;
		filtersettings[1] = threshhold;
		filtersettings[2] = input_length;
		filtersettings[3] = 0;



		//GPU-Init
		ocl = ocl_new(CL_DEVICE_TYPE_GPU,1);
		context = ocl_get_context(ocl);
		queue = ocl_get_cmd_queues (ocl)[0];
		clFinish(queue);

		program = ocl_create_program_from_file(ocl, "edel_kernel_secondder.cl", NULL, &errcode);
		OCL_CHECK_ERROR(errcode);

		filter1 = clCreateKernel(program, "second_filter", &errcode);
		OCL_CHECK_ERROR(errcode);

		//GPU-Buffer which can be done before the Computation
		settings = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, SIZE_settings_bit, filtersettings, &errcode);
		OCL_CHECK_ERROR(errcode);

		input = clCreateBuffer(context, CL_MEM_READ_ONLY, SIZE_input_bit, NULL, &errcode);
		OCL_CHECK_ERROR(errcode);


		if(debugmode != 0)
		{
			srand((unsigned) time( NULL ));
			counter = rand_rects(expected,1,input_length,3*filter_length,3*filter_length,3*filter_length,peak_length,base+peak, input_vector, noise, base, 0,positions);
			if(harddebug != 0)
			{
				for(i = 0; i < input_length;i++)
				{
					if(input_length < 10000)
					{
						printf("input_vector[%i] = %d\n",i,input_vector[i]);
					}
					else
					{
						printf("input_vector[%i] = %d\t",i,input_vector[i]);
					}
				}
			}

			printf("\n counts = %d\n", counter);
			printf("%lu Bits needed for Output-Vector \n", output_bit_on_counts);

		}

		output_bit_on_counts = sizeof(gint32) * safetyfactor * 2*((counter + 2));

		clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, SIZE_input_bit, input_vector, 0, NULL, NULL);

		gint32* energy_time = (gint32*)malloc(output_bit_on_counts);


		for(i = 0; i < safetyfactor * (2*counter+2); i++)
		{
			energy_time[i] = -9999;
		}


		output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, output_bit_on_counts, NULL , &errcode);
		OCL_CHECK_ERROR(errcode);


		OCL_CHECK_ERROR(clSetKernelArg(filter1, 0, sizeof(cl_mem), &input));
		OCL_CHECK_ERROR(clSetKernelArg(filter1, 1, sizeof(cl_mem), &output));
		OCL_CHECK_ERROR(clSetKernelArg(filter1, 2, sizeof(cl_mem), &settings));

		size_t local_item_size;
		size_t global_item_size = (size_t) (input_length - 3*filter_length +1);

		local_item_size = ocl_get_local_size(global_item_size, 2,1);

		             
                if(debugmode != 0)
                {
                        printf("local item size = %lu \n %lu", &local_item_size, local_item_size);
                        if(local_item_size != 0)
                        {
                              printf("This works because you divide %lu / %lu \n and this is %lu", global_item_size,local_item_size, global_item_size/local_item_size);
                        }
                        else
                        {
                              	FILE* attention;
				attention = fopen("filterlengthbad", "a+");
				if(attention == NULL)
				{
					printf("error in opening debug file \n");
					exit(1);
				}
				fprintf(attention, "The filterlength %d is not good for this filter, choose another filterlength ! \n", filter_length);
				fclose(attention);
				printf("There is no way to fit it evenly divided to workgroups, just let OpenCL do it \n");
                        }
                        if(harddebug != 0)
                        {
                                getchar();
                        }

                }


		if(local_item_size == 0)
		{
			OCL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, filter1, 1, NULL, &global_item_size, NULL, 0, NULL, NULL));	
		}
		else
		{
			OCL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, filter1, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL));
		}


		
		
		//local_item_size = NULL;	
		clEnqueueReadBuffer(queue, output, CL_TRUE, 0, output_bit_on_counts, energy_time, 0, NULL, NULL);
		clEnqueueReadBuffer(queue, settings, CL_TRUE, 0, SIZE_settings_bit, filtersettings, 0, NULL, NULL);


		//Writing back the data
		for(i = 0; i < filtersettings[3]; i++)
		{
			writing_back(filemode, filename, filename_e,filename_t, energy_time,i);
		}

		if(debugmode != 0)
		{
			printf("The Positions are:\n");
			for(i=0; i < counter; i++)
			{
				printf("%d\t", positions[i]);
				printf("note that this postion is the middle of the rect \n");
			}
		}
		//Safetychanges
		if(filtersettings[3] > counter)
		{
			safetyfactor = safetyfactor + 5*(filtersettings[3] - counter);
			if(safetyfactor <= 0)
			{
				safetyfactor = 10;
			}

			notexpect = filtersettings[3] - expected;
			if(safemode != 0 && notexpect >= notexpect_max)
			{
				printf("The Filter found to many peaks it. It expected %d. It found %d times more than expected.\n", expected, notexpect);
				printf("Safemode is on. Exit program \n");
				OCL_CHECK_ERROR(clReleaseMemObject(input));
				OCL_CHECK_ERROR(clReleaseMemObject(output));
				OCL_CHECK_ERROR(clReleaseMemObject(settings));
				OCL_CHECK_ERROR(clReleaseKernel(filter1));
				OCL_CHECK_ERROR(clReleaseProgram(program));

				ocl_free(ocl);

				free(input_vector);
				free(energy_time);
				free(positions);
				free(filtersettings);

			}
			else
			{
				printf("The Filter found to many peaks it. It expected %d. It found %d times more than expected \n", expected, notexpect);
			}
		}






		OCL_CHECK_ERROR(clReleaseMemObject(input));
		OCL_CHECK_ERROR(clReleaseMemObject(output));
		OCL_CHECK_ERROR(clReleaseMemObject(settings));
		OCL_CHECK_ERROR(clReleaseKernel(filter1));
		OCL_CHECK_ERROR(clReleaseProgram(program));

		ocl_free(ocl);
		free(input_vector);
		free(energy_time);
		free(positions);
		free(filtersettings);




}