void print(const char *dev) { int fd = open(dev, O_RDONLY); if ( fd == -1 ) { fprintf(stderr, "Failed to open %s: %s\n", dev, strerror(errno)); exit(-1); } printf("\n%s\n", dev); print_driver_version(fd); print_device_info(fd); print_device_name(fd); print_device_port(fd); close(fd); }
int main(int argc, char* argv[]) { int err = EXIT_FAILURE; struct cmd_line cmd_line; setlocale(LC_ALL, ""); if (parse_cmd_line(argc, argv, &cmd_line) < 0) goto arg_check_failed; struct pci_access * pci = pci_alloc(); if (! pci) goto pci_alloc_failed; /* This access bypass the kernel and use a memory mapping * to PCI configuration registers */ pci->method = PCI_ACCESS_I386_TYPE1; struct pci_dev * dev = create_pci_dev(pci, cmd_line.slot); if (! dev) goto create_pci_dev_failed; print_device_name(pci, dev); unsigned long * timestamps = malloc(sizeof(*timestamps) * cmd_line.iteration_count); if (! timestamps) { fprintf(stderr, "Can't allocate timestamp storage (%s)\n", strerror(errno)); goto malloc_failed; } struct timestamp t; read_timestamp_counter(&t); perform_reads(dev, timestamps, cmd_line.iteration_count, cmd_line.wait_time_us); unsigned long test_duration_cycles = cycle_since_timestamp(&t); double cpu_mhz = get_cpu_mhz(); if (cpu_mhz < 0) goto get_cpu_mhz_failed; print_results(cpu_mhz, timestamps, cmd_line.iteration_count, test_duration_cycles, cmd_line.limit_ns); err = EXIT_SUCCESS; get_cpu_mhz_failed: free(timestamps); malloc_failed: pci_free_dev(dev); create_pci_dev_failed: pci_cleanup(pci); pci_alloc_failed: arg_check_failed: return err; }
void mat_mul_opencl_printf(float *M_A, float *M_B, float *M_C, size_t ROW_A, size_t COL_A, size_t COL_B) { cl_platform_id *platform; cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT; cl_device_id dev; cl_context context; cl_command_queue cmd_queue; cl_program program; cl_kernel kernel; cl_mem mem_A, mem_B, mem_C; cl_int err; cl_uint num_platforms; cl_uint num_dev = 0; int i; // Get the device type to use from the environmental variable. char *dtype = getenv("CL_DEV_TYPE"); if (dtype) { if (strcasecmp(dtype, "cpu") == 0) { dev_type = CL_DEVICE_TYPE_CPU; } else if (strcasecmp(dtype, "gpu") == 0) { dev_type = CL_DEVICE_TYPE_GPU; } } // Platform err = clGetPlatformIDs(0, NULL, &num_platforms); CHECK_ERROR(err); if (num_platforms == 0) { fprintf(stderr, "[%s:%d] ERROR: No OpenCL platform\n", __FILE__,__LINE__); exit(EXIT_FAILURE); } printf("Number of platforms: %u\n", num_platforms); platform = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms); err = clGetPlatformIDs(num_platforms, platform, NULL); CHECK_ERROR(err); // Device for (i = 0; i < num_platforms; i++) { err = clGetDeviceIDs(platform[i], dev_type, 1, &dev, &num_dev); if (err != CL_DEVICE_NOT_FOUND) CHECK_ERROR(err); if (num_dev == 1) break; } if (num_dev < 1) { fprintf(stderr, "[%s:%d] ERROR: No device\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } print_device_name(dev); // Context context = clCreateContext(NULL, 1, &dev, NULL, NULL, &err); CHECK_ERROR(err); // Command queue cmd_queue = clCreateCommandQueue(context, dev, 0, &err); CHECK_ERROR(err); // Create a program. size_t source_len; char *source_code = get_source_code("./kernel_printf.cl", &source_len); program = clCreateProgramWithSource(context, 1, (const char **)&source_code, &source_len, &err); free(source_code); CHECK_ERROR(err); // Build the program. char build_opts[200]; sprintf(build_opts, "-DROW_A=%lu -DCOL_A=%lu -DCOL_B=%lu", ROW_A, COL_A, COL_B); err = clBuildProgram(program, 1, &dev, build_opts, NULL, NULL); if (err != CL_SUCCESS) { print_build_log(program, dev); CHECK_ERROR(err); } // Kernel kernel = clCreateKernel(program, "mat_mul", &err); CHECK_ERROR(err); // Buffers mem_A = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ROW_A * COL_A, M_A, &err); CHECK_ERROR(err); mem_B = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * COL_A * COL_B, M_B, &err); CHECK_ERROR(err); mem_C = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(float) * ROW_A * COL_B, M_C, &err); CHECK_ERROR(err); // Set the arguments. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_A); CHECK_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_B); CHECK_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_C); CHECK_ERROR(err); // Enqueue the kernel. size_t lws[2] = {16, 16}; size_t gws[2]; gws[1] = (size_t)ceil((double)ROW_A / lws[1]) * lws[1]; gws[0] = (size_t)ceil((double)COL_B / lws[0]) * lws[0]; timer_start(1); err = clEnqueueNDRangeKernel(cmd_queue, kernel, 2, NULL, gws, lws, 0, NULL, NULL); CHECK_ERROR(err); // Read the result. err = clEnqueueReadBuffer(cmd_queue, mem_C, CL_TRUE, 0, sizeof(float) * ROW_A * COL_B, M_C, 0, NULL, NULL); CHECK_ERROR(err); timer_stop(1); printf("Kernel time : %f sec\n", timer_read(1)); // Release clReleaseMemObject(mem_A); clReleaseMemObject(mem_B); clReleaseMemObject(mem_C); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); free(platform); }
void mat_mul_opencl_binary(float *M_A, float *M_B, float *M_C, size_t ROW_A, size_t COL_A, size_t COL_B) { cl_platform_id *platform; cl_device_type dev_type; cl_device_id dev; cl_context context; cl_command_queue cmd_queue; cl_program program; cl_kernel kernel; cl_mem mem_A, mem_B, mem_C; cl_event ev_kernel, ev_bp; cl_int err; cl_uint num_platforms; cl_uint num_dev = 0; int i; // Platform err = clGetPlatformIDs(0, NULL, &num_platforms); CHECK_ERROR(err); if (num_platforms == 0) { fprintf(stderr, "[%s:%d] ERROR: No OpenCL platform\n", __FILE__,__LINE__); exit(EXIT_FAILURE); } printf("Number of platforms: %u\n", num_platforms); platform = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms); err = clGetPlatformIDs(num_platforms, platform, NULL); CHECK_ERROR(err); // Device dev_type = get_device_type(); for (i = 0; i < num_platforms; i++) { err = clGetDeviceIDs(platform[i], dev_type, 1, &dev, &num_dev); if (err != CL_DEVICE_NOT_FOUND) CHECK_ERROR(err); if (num_dev == 1) break; } if (num_dev < 1) { fprintf(stderr, "[%s:%d] ERROR: No device\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } print_device_name(dev); free(platform); // Context context = clCreateContext(NULL, 1, &dev, NULL, NULL, &err); CHECK_ERROR(err); // Create a program. char *source_code = get_source_code("./kernel_2d.cl"); program = clCreateProgramWithSource(context, 1, (const char **)&source_code, NULL, &err); free(source_code); CHECK_ERROR(err); // Callback data for clBuildProgram ev_bp = clCreateUserEvent(context, &err); CHECK_ERROR(err); bp_data_t bp_data; bp_data.dev = dev; bp_data.event = &ev_bp; // Build the program. char build_opts[200]; sprintf(build_opts, "-DROW_A=%lu -DCOL_A=%lu -DCOL_B=%lu", ROW_A, COL_A, COL_B); err = clBuildProgram(program, 1, &dev, build_opts, build_program_callback, &bp_data); CHECK_ERROR(err); // Command queue cmd_queue = clCreateCommandQueue(context, dev, CL_QUEUE_PROFILING_ENABLE, &err); CHECK_ERROR(err); // Buffers mem_A = clCreateBuffer(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(float) * ROW_A * COL_A, M_A, &err); CHECK_ERROR(err); mem_B = clCreateBuffer(context, CL_MEM_READ_ONLY, sizeof(float) * COL_A * COL_B, NULL, &err); CHECK_ERROR(err); err = clEnqueueWriteBuffer(cmd_queue, mem_B, CL_FALSE, 0, sizeof(float) * COL_A * COL_B, M_B, 0, NULL, NULL); CHECK_ERROR(err) mem_C = clCreateBuffer(context, CL_MEM_READ_WRITE, sizeof(float) * ROW_A * COL_B, NULL, &err); CHECK_ERROR(err); // Index space (gws) and work-group size (lws) size_t lws[2] = {16, 16}; size_t gws[2]; gws[1] = (size_t)ceil((double)ROW_A / lws[1]) * lws[1]; gws[0] = (size_t)ceil((double)COL_B / lws[0]) * lws[0]; // Wait for the kernel creation. clWaitForEvents(1, bp_data.event); // Kernel kernel = clCreateKernel(program, "mat_mul", &err); CHECK_ERROR(err); // Set the arguments. err = clSetKernelArg(kernel, 0, sizeof(cl_mem), &mem_A); CHECK_ERROR(err); err = clSetKernelArg(kernel, 1, sizeof(cl_mem), &mem_B); CHECK_ERROR(err); err = clSetKernelArg(kernel, 2, sizeof(cl_mem), &mem_C); CHECK_ERROR(err); // Enqueue the kernel. err = clEnqueueNDRangeKernel(cmd_queue, kernel, 2, NULL, gws, lws, 0, NULL, &ev_kernel); CHECK_ERROR(err); // Read the result. err = clEnqueueReadBuffer(cmd_queue, mem_C, CL_TRUE, 0, sizeof(float) * ROW_A * COL_B, M_C, 0, NULL, NULL); CHECK_ERROR(err); // Read the profiling info. cl_ulong start_time, end_time; err = clGetEventProfilingInfo(ev_kernel, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &start_time, NULL); CHECK_ERROR(err); err = clGetEventProfilingInfo(ev_kernel, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &end_time, NULL); CHECK_ERROR(err); printf("Kernel time : %lf sec\n", (double)(end_time - start_time) / 10e9); // Release clReleaseEvent(ev_kernel); clReleaseEvent(ev_bp); clReleaseMemObject(mem_A); clReleaseMemObject(mem_B); clReleaseMemObject(mem_C); clReleaseKernel(kernel); clReleaseProgram(program); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); }
void mandelbrot(int m, int n) { cl_platform_id *platform; cl_device_type dev_type = CL_DEVICE_TYPE_GPU; cl_device_id *devs = NULL; cl_context context; cl_command_queue *cmd_queues; cl_program program; cl_kernel *kernels; cl_mem *mem_R; cl_mem *mem_G; cl_mem *mem_B; cl_int err; cl_uint num_platforms; cl_uint num_devs = 0; cl_event *ev_kernels; int count_max = COUNT_MAX; int i, j, jhi, jlo; char *output_filename = "mandelbrot.ppm"; FILE *output_unit; double wtime; float x_max = 1.25; float x_min = - 2.25; // float x; // float x1; // float x2; float y_max = 1.75; float y_min = - 1.75; //float y; //float y1; //float y2; size_t size_color; size_color = sizeof(int) * m * n; int (*r)[n] = (int (*)[n])calloc(m * n, sizeof(int)); int (*g)[n] = (int (*)[n])calloc(m * n, sizeof(int)); int (*b)[n] = (int (*)[n])calloc(m * n, sizeof(int)); printf( " Sequential C version\n" ); printf( "\n" ); printf( " Create an ASCII PPM image of the Mandelbrot set.\n" ); printf( "\n" ); printf( " For each point C = X + i*Y\n" ); printf( " with X range [%g,%g]\n", x_min, x_max ); printf( " and Y range [%g,%g]\n", y_min, y_max ); printf( " carry out %d iterations of the map\n", count_max ); printf( " Z(n+1) = Z(n)^2 + C.\n" ); printf( " If the iterates stay bounded (norm less than 2)\n" ); printf( " then C is taken to be a member of the set.\n" ); printf( "\n" ); printf( " An ASCII PPM image of the set is created using\n" ); printf( " M = %d pixels in the X direction and\n", m ); printf( " N = %d pixels in the Y direction.\n", n ); timer_init(); timer_start(0); // Platform err = clGetPlatformIDs(0, NULL, &num_platforms); CHECK_ERROR(err); if (num_platforms == 0) { fprintf(stderr, "[%s:%d] ERROR: No OpenCL platform\n", __FILE__,__LINE__); exit(EXIT_FAILURE); } printf("Number of platforms: %u\n", num_platforms); platform = (cl_platform_id *)malloc(sizeof(cl_platform_id) * num_platforms); err = clGetPlatformIDs(num_platforms, platform, NULL); CHECK_ERROR(err); // Device for (i = 0; i < num_platforms; i++) { err = clGetDeviceIDs(platform[i], dev_type, 0, NULL, &num_devs); if (err != CL_DEVICE_NOT_FOUND) CHECK_ERROR(err); num_devs = 1; //** if (num_devs >= 1) { devs = (cl_device_id*)malloc(sizeof(cl_device_id) * num_devs); err = clGetDeviceIDs(platform[i], dev_type, num_devs, devs, NULL); break; } } if ( devs == NULL || num_devs < 1) { fprintf(stderr, "[%s:%d] ERROR: No device\n", __FILE__, __LINE__); exit(EXIT_FAILURE); } for( i = 0; i < num_devs; ++i ) { printf("dev[%d] : ", i); print_device_name(devs[i]); } // Context context = clCreateContext(NULL, num_devs, devs, NULL, NULL, &err); CHECK_ERROR(err); // Command queue cmd_queues = (cl_command_queue*)malloc(sizeof(cl_command_queue)*num_devs); for( i = 0; i < num_devs; ++i) { cmd_queues[i] = clCreateCommandQueue(context, devs[i], 0, &err); CHECK_ERROR(err); } // Create a program. size_t source_len; char *source_code = get_source_code("./mandelbrot_kernel.cl", &source_len); program = clCreateProgramWithSource(context, 1, (const char **)&source_code, &source_len, &err); free(source_code); CHECK_ERROR(err); // Build the program. char build_opts[200]; sprintf(build_opts, "-Dm=%d -Dn=%d -Dnum_devs=%d", m, n, num_devs); err = clBuildProgram(program, num_devs, devs, build_opts, NULL, NULL); if (err != CL_SUCCESS) { print_build_log(program, devs[0]); CHECK_ERROR(err); } // Kernel kernels = (cl_kernel*)malloc(sizeof(cl_kernel)*num_devs); for (i = 0; i < num_devs; i++) { kernels[i] = clCreateKernel(program, "mandelbrot_kernel", NULL); } // Buffers mem_R = (cl_mem*)malloc(sizeof(cl_mem)*num_devs); mem_G = (cl_mem*)malloc(sizeof(cl_mem)*num_devs); mem_B = (cl_mem*)malloc(sizeof(cl_mem)*num_devs); for(i = 0; i < num_devs; i++) { mem_R[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, size_color / num_devs, NULL, NULL); mem_G[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, size_color / num_devs, NULL, NULL); mem_B[i] = clCreateBuffer(context, CL_MEM_READ_WRITE, size_color / num_devs, NULL, NULL); } /* // Write to Buffers for(i = 0; i < num_devs; i++) { clEnqueueWriteBuffer(cmd_queues[i], mem_CHECK[i], CL_FALSE, 0, size_CHECK / num_devs, (CHECK + (N / num_devs) * i), 0, NULL, NULL); } */ // Set the arguments. for(i = 0; i < num_devs; i++) { // flag = i * (m * n / num_devs); clSetKernelArg(kernels[i], 0, sizeof(cl_mem), (void*) &mem_R[i]); clSetKernelArg(kernels[i], 1, sizeof(cl_mem), (void*) &mem_G[i]); clSetKernelArg(kernels[i], 2, sizeof(cl_mem), (void*) &mem_B[i]); clSetKernelArg(kernels[i], 3, sizeof(int), &count_max); clSetKernelArg(kernels[i], 4, sizeof(float), &x_max); clSetKernelArg(kernels[i], 5, sizeof(float), &x_min); clSetKernelArg(kernels[i], 6, sizeof(float), &y_max); clSetKernelArg(kernels[i], 7, sizeof(float), &y_min); } // Enqueue the kernel. size_t lws[1] = {256}; size_t gws[1] = { m * n /num_devs }; gws[0] = (size_t)ceil((double)m * n / lws[0]) * lws[0]; ev_kernels = (cl_event*)malloc(sizeof(cl_event)*num_devs); for(i = 0; i < num_devs; i++) { err = clEnqueueNDRangeKernel(cmd_queues[i], kernels[i], 1, NULL, gws, lws, 0, NULL, &ev_kernels[i]); CHECK_ERROR(err); } // Read the result. for(i = 0; i < num_devs; i++) { err = clEnqueueReadBuffer(cmd_queues[i], mem_R[i], CL_TRUE, 0, size_color / num_devs, r, 1, &ev_kernels[i], NULL); err = clEnqueueReadBuffer(cmd_queues[i], mem_G[i], CL_TRUE, 0, size_color / num_devs, g, 1, &ev_kernels[i], NULL); err = clEnqueueReadBuffer(cmd_queues[i], mem_B[i], CL_TRUE, 0, size_color / num_devs, b, 1, &ev_kernels[i], NULL); } // Release for( i = 0; i < num_devs; ++i ) { clFinish(cmd_queues[i]); clReleaseMemObject(mem_R[i]); clReleaseMemObject(mem_G[i]); clReleaseMemObject(mem_B[i]); clReleaseKernel(kernels[i]); clReleaseCommandQueue(cmd_queues[i]); clReleaseEvent(ev_kernels[i]); } clReleaseProgram(program); clReleaseContext(context); free(mem_R); free(mem_G); free(mem_B); free(cmd_queues); free(kernels); free(devs); free(ev_kernels); free(platform); timer_stop(0); wtime = timer_read(0); printf( "\n" ); printf( " Time = %lf seconds.\n", wtime ); // Write data to an ASCII PPM file. output_unit = fopen( output_filename, "wt" ); fprintf( output_unit, "P3\n" ); fprintf( output_unit, "%d %d\n", n, m ); fprintf( output_unit, "%d\n", 255 ); for ( i = 0; i < m; i++ ) { for ( jlo = 0; jlo < n; jlo = jlo + 4 ) { jhi = MIN( jlo + 4, n ); for ( j = jlo; j < jhi; j++ ) { fprintf( output_unit, " %d %d %d", r[i][j], g[i][j], b[i][j] ); } fprintf( output_unit, "\n" ); } } fclose( output_unit ); printf( "\n" ); printf( " Graphics data written to \"%s\".\n\n", output_filename ); // Terminate. free(r); free(g); free(b); }