static void setup_two_queues (Data *data) { cl_int errcode; data->compute_queue = clCreateCommandQueue (ocl_get_context (data->ocl), ocl_get_devices (data->ocl)[0], 0, &errcode); OCL_CHECK_ERROR (errcode); data->write_queue = clCreateCommandQueue (ocl_get_context (data->ocl), ocl_get_devices (data->ocl)[0], 0, &errcode); OCL_CHECK_ERROR (errcode); data->read_queue = data->write_queue; }
int main (void) { OclPlatform *ocl; cl_mem mem; cl_program program; cl_kernel kernel; cl_int errcode; cl_event event; size_t n_elements; cl_command_queue *cmd_queues; ocl = ocl_new_with_queues (0, CL_DEVICE_TYPE_ALL, 0); if (ocl == NULL) return 1; program = ocl_create_program_from_file (ocl, "test.cl", NULL, &errcode); OCL_CHECK_ERROR (errcode); cmd_queues = ocl_get_cmd_queues (ocl); kernel = clCreateKernel (program, "fill_ones", &errcode); OCL_CHECK_ERROR (errcode); n_elements = 1024 * 1024; mem = clCreateBuffer (ocl_get_context (ocl), CL_MEM_READ_WRITE, n_elements * sizeof (float), NULL, &errcode); OCL_CHECK_ERROR (clSetKernelArg (kernel, 0, sizeof (cl_mem), &mem)); OCL_CHECK_ERROR (clEnqueueNDRangeKernel (cmd_queues[0], kernel, 1, NULL, &n_elements, NULL, 0, NULL, &event)); OCL_CHECK_ERROR (clWaitForEvents (1, &event)); /* * If the event is not released, nvidia-smi will report that about 60 MB are * not freed, although we free all other resources including the memory * object itself. */ /* OCL_CHECK_ERROR (clReleaseEvent (event)); */ OCL_CHECK_ERROR (errcode); OCL_CHECK_ERROR (clReleaseMemObject (mem)); OCL_CHECK_ERROR (clReleaseKernel (kernel)); OCL_CHECK_ERROR (clReleaseProgram (program)); ocl_free (ocl); fflush (stdin); printf ("Press Enter to exit ...\n"); getchar (); return 0; }
u32 getOpenCLDeviceFreq(int device) { ocl_context_t *cont = ocl_get_context(device); if (cont) { cl_uint clockrate; if (clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clockrate), &clockrate, NULL) == CL_SUCCESS) return clockrate; } return 0; }
static void setup_ooo_queue (Data *data) { cl_int errcode; data->compute_queue = clCreateCommandQueue (ocl_get_context (data->ocl), ocl_get_devices (data->ocl)[0], CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, &errcode); OCL_CHECK_ERROR (errcode); data->write_queue = data->compute_queue; data->read_queue = data->compute_queue; }
static Data * setup_data (OclPlatform *ocl, size_t n_elements) { Data *data; cl_int errcode; data = g_malloc0 (sizeof (Data)); data->ocl = ocl; data->compute_queue = NULL; data->write_queue = NULL; data->read_queue = NULL; data->program = ocl_create_program_from_file (ocl, "test.cl", NULL, &errcode); OCL_CHECK_ERROR (errcode); data->kernel = clCreateKernel (data->program, "run_sin", &errcode); OCL_CHECK_ERROR (errcode); data->n_elements = n_elements; data->size = n_elements * sizeof (float); data->input = g_malloc0 (data->size); data->output = g_malloc0 (data->size); data->in_mem = clCreateBuffer (ocl_get_context (ocl), CL_MEM_READ_ONLY, data->size, NULL, &errcode); OCL_CHECK_ERROR (errcode); data->out_mem = clCreateBuffer (ocl_get_context (ocl), CL_MEM_WRITE_ONLY, data->size, NULL, &errcode); OCL_CHECK_ERROR (errcode); return data; }
long getOpenCLRawProcessorID(int device, const char **cpuname) { static cl_char device_name[256+130]; strcpy((char*)device_name, "Unknown"); if (cpuname) *cpuname = (const char*)device_name; ocl_context_t *cont = ocl_get_context(device); if (cont) { clGetDeviceInfo(cont->deviceID, CL_DEVICE_NAME, sizeof(device_name)-130, device_name, NULL); //retrieve card info, if available u32 off = strlen((const char*)device_name); device_name[off++]=' '; device_name[off++]='\0'; #ifdef CL_DEVICE_BOARD_NAME_AMD if (clGetDeviceInfo(cont->deviceID, CL_DEVICE_BOARD_NAME_AMD, sizeof(device_name)-off, &device_name[off], NULL) == CL_SUCCESS) { device_name[off-1]='('; u32 off2 = strlen((const char*)device_name); device_name[off2] = ')'; device_name[off2+1] = '\0'; } #endif // ??? Never used /* cl_uint vendor_id=0; clGetDeviceInfo(cont->deviceID, CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL); cl_uint cunits=0; clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cunits), &cunits, NULL); */ return GetDeviceID(/* vendor_id, device_name, cunits, */ cont); } return -1; }
void OpenCLPrintExtendedGpuInfo(int device) { const char *data; cl_int status; ocl_context_t *cont = ocl_get_context(device); if (cont == NULL) return; if (cont->firstOnPlatform) { //Print platform info once LogRaw("\nPlatform info:\n"); LogRaw("--------------\n"); cl_char str[80]; status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_NAME, sizeof(str), (void *)str, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Platform Name", str); status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_VENDOR, sizeof(str), (void *)str, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Platform Vendor", str); status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_VERSION, sizeof(str), (void *)str, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Platform Version", str); cl_char *str2; size_t sz; status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_EXTENSIONS, 0, NULL, &sz); if (sz) { str2 = (cl_char*)malloc(sz+1); if (str2) { status = clGetPlatformInfo(cont->platformID, CL_PLATFORM_EXTENSIONS, sz+1, (void *)str2, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Platform extensions", str2); free(str2); } } /* Split platform and device info */ LogRaw("\nDevice info:\n"); LogRaw("--------------\n"); } cl_char device_name[1024] = {0}; cl_device_type type; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_TYPE, sizeof(type), &type, NULL); if (status == CL_SUCCESS) { if ( type & CL_DEVICE_TYPE_CPU ) data = "CPU"; else { if ( type & CL_DEVICE_TYPE_GPU ) data = "GPU"; else data = "UNKNOWN"; } LogRaw("%30s: %s\n", "Type", data); } status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_NAME, sizeof(device_name), device_name, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Name",device_name); cl_uint clockrate; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(clockrate), &clockrate, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %u\n", "Max clockrate", clockrate); cl_uint cunits; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(cunits), &cunits, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %u\n", "Max compute units", cunits); cl_ulong gmemcache; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, sizeof(gmemcache), &gmemcache, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %" PRIu64 "\n", "Global memory cache size", gmemcache); cl_device_mem_cache_type ct; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, sizeof(ct), &ct, NULL); if (status == CL_SUCCESS) { switch(ct) { case CL_NONE: data = "NONE"; break; case CL_READ_ONLY_CACHE: data = "Read Only"; break; case CL_READ_WRITE_CACHE: data = "Read/Write"; break; default: data = "Not sure"; } LogRaw("%30s: %s\n", "Global memory cache type", data); } cl_bool um; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_HOST_UNIFIED_MEMORY, sizeof(um), &um, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Unified memory subsystem", (um ? "Yes" : "No")); status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_IMAGE_SUPPORT, sizeof(um), &um, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Image support", (um ? "Yes" : "No")); status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_LOCAL_MEM_SIZE, sizeof(gmemcache), &gmemcache, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %" PRIu64 "\n", "Local memory size", gmemcache); size_t mwgs; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(mwgs), &mwgs, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %lu\n", "Max workgroup size", (unsigned long)mwgs); cl_uint nvw; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, sizeof(nvw), &nvw, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %u\n", "native vector width (int)", nvw); status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, sizeof(nvw), &nvw, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %u\n", "native vector width (float)", nvw); status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_OPENCL_C_VERSION, sizeof(device_name), device_name, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "OpenCL C version",device_name); size_t ptres; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_PROFILING_TIMER_RESOLUTION, sizeof(ptres), &ptres, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %lu\n", "Device timer resolution (ns)", (unsigned long)ptres); status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_VENDOR, sizeof(device_name), device_name, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Device vendor",device_name); cl_uint vendor_id; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_VENDOR_ID, sizeof(vendor_id), &vendor_id, NULL); if (status == CL_SUCCESS) LogRaw("%30s: 0x%x\n", "Device vendor id",vendor_id); status = clGetDeviceInfo(cont->deviceID, CL_DRIVER_VERSION, sizeof(device_name), device_name, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %s\n", "Driver version",device_name); cl_uint devbits; status = clGetDeviceInfo(cont->deviceID, CL_DEVICE_ADDRESS_BITS, sizeof(devbits), &devbits, NULL); if (status == CL_SUCCESS) LogRaw("%30s: %u%s\n", "Device address bits", devbits, (devbits == sizeof(size_t) * 8 ? "" : " - NOT MATCHED -")); //TODO: device extensions }
int main(int argc, char* argv[]) { const size_t SIZE_execution_bit = (input_length - 3*filter_length +1); const size_t SIZE_input_bit = sizeof(gint32)*(input_length+1); const size_t SIZE_settings_bit = sizeof(gint32)*4; size_t output_bit_on_counts; size_t* SIZE_execution_pointer = &SIZE_execution_bit; gint32* filtersettings = (gint32*) malloc(SIZE_settings_bit); gint32* input_vector = (gint32*) malloc(SIZE_input_bit); gint32* positions = (gint32*) malloc(SIZE_input_bit); filtersettings[0] = filter_length; filtersettings[1] = threshhold; filtersettings[2] = input_length; filtersettings[3] = 0; //GPU-Init ocl = ocl_new(CL_DEVICE_TYPE_GPU,1); context = ocl_get_context(ocl); queue = ocl_get_cmd_queues (ocl)[0]; clFinish(queue); program = ocl_create_program_from_file(ocl, "edel_kernel_secondder.cl", NULL, &errcode); OCL_CHECK_ERROR(errcode); filter1 = clCreateKernel(program, "second_filter", &errcode); OCL_CHECK_ERROR(errcode); //GPU-Buffer which can be done before the Computation settings = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, SIZE_settings_bit, filtersettings, &errcode); OCL_CHECK_ERROR(errcode); input = clCreateBuffer(context, CL_MEM_READ_ONLY, SIZE_input_bit, NULL, &errcode); OCL_CHECK_ERROR(errcode); if(debugmode != 0) { srand((unsigned) time( NULL )); counter = rand_rects(expected,1,input_length,3*filter_length,3*filter_length,3*filter_length,peak_length,base+peak, input_vector, noise, base, 0,positions); if(harddebug != 0) { for(i = 0; i < input_length;i++) { if(input_length < 10000) { printf("input_vector[%i] = %d\n",i,input_vector[i]); } else { printf("input_vector[%i] = %d\t",i,input_vector[i]); } } } printf("\n counts = %d\n", counter); printf("%lu Bits needed for Output-Vector \n", output_bit_on_counts); } output_bit_on_counts = sizeof(gint32) * safetyfactor * 2*((counter + 2)); clEnqueueWriteBuffer(queue, input, CL_TRUE, 0, SIZE_input_bit, input_vector, 0, NULL, NULL); gint32* energy_time = (gint32*)malloc(output_bit_on_counts); for(i = 0; i < safetyfactor * (2*counter+2); i++) { energy_time[i] = -9999; } output = clCreateBuffer(context, CL_MEM_WRITE_ONLY, output_bit_on_counts, NULL , &errcode); OCL_CHECK_ERROR(errcode); OCL_CHECK_ERROR(clSetKernelArg(filter1, 0, sizeof(cl_mem), &input)); OCL_CHECK_ERROR(clSetKernelArg(filter1, 1, sizeof(cl_mem), &output)); OCL_CHECK_ERROR(clSetKernelArg(filter1, 2, sizeof(cl_mem), &settings)); size_t local_item_size; size_t global_item_size = (size_t) (input_length - 3*filter_length +1); local_item_size = ocl_get_local_size(global_item_size, 2,1); if(debugmode != 0) { printf("local item size = %lu \n %lu", &local_item_size, local_item_size); if(local_item_size != 0) { printf("This works because you divide %lu / %lu \n and this is %lu", global_item_size,local_item_size, global_item_size/local_item_size); } else { FILE* attention; attention = fopen("filterlengthbad", "a+"); if(attention == NULL) { printf("error in opening debug file \n"); exit(1); } fprintf(attention, "The filterlength %d is not good for this filter, choose another filterlength ! \n", filter_length); fclose(attention); printf("There is no way to fit it evenly divided to workgroups, just let OpenCL do it \n"); } if(harddebug != 0) { getchar(); } } if(local_item_size == 0) { OCL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, filter1, 1, NULL, &global_item_size, NULL, 0, NULL, NULL)); } else { OCL_CHECK_ERROR(clEnqueueNDRangeKernel(queue, filter1, 1, NULL, &global_item_size, &local_item_size, 0, NULL, NULL)); } //local_item_size = NULL; clEnqueueReadBuffer(queue, output, CL_TRUE, 0, output_bit_on_counts, energy_time, 0, NULL, NULL); clEnqueueReadBuffer(queue, settings, CL_TRUE, 0, SIZE_settings_bit, filtersettings, 0, NULL, NULL); //Writing back the data for(i = 0; i < filtersettings[3]; i++) { writing_back(filemode, filename, filename_e,filename_t, energy_time,i); } if(debugmode != 0) { printf("The Positions are:\n"); for(i=0; i < counter; i++) { printf("%d\t", positions[i]); printf("note that this postion is the middle of the rect \n"); } } //Safetychanges if(filtersettings[3] > counter) { safetyfactor = safetyfactor + 5*(filtersettings[3] - counter); if(safetyfactor <= 0) { safetyfactor = 10; } notexpect = filtersettings[3] - expected; if(safemode != 0 && notexpect >= notexpect_max) { printf("The Filter found to many peaks it. It expected %d. It found %d times more than expected.\n", expected, notexpect); printf("Safemode is on. Exit program \n"); OCL_CHECK_ERROR(clReleaseMemObject(input)); OCL_CHECK_ERROR(clReleaseMemObject(output)); OCL_CHECK_ERROR(clReleaseMemObject(settings)); OCL_CHECK_ERROR(clReleaseKernel(filter1)); OCL_CHECK_ERROR(clReleaseProgram(program)); ocl_free(ocl); free(input_vector); free(energy_time); free(positions); free(filtersettings); } else { printf("The Filter found to many peaks it. It expected %d. It found %d times more than expected \n", expected, notexpect); } } OCL_CHECK_ERROR(clReleaseMemObject(input)); OCL_CHECK_ERROR(clReleaseMemObject(output)); OCL_CHECK_ERROR(clReleaseMemObject(settings)); OCL_CHECK_ERROR(clReleaseKernel(filter1)); OCL_CHECK_ERROR(clReleaseProgram(program)); ocl_free(ocl); free(input_vector); free(energy_time); free(positions); free(filtersettings); }