acc_kernel_t acc_build_kernel(size_t region_id, size_t kernel_id) { acc_init_once(); acc_kernel_desc_t kernel = acc_kernel_desc_by_ID(region_id, kernel_id); assert(kernel != NULL); acc_kernel_t result = (acc_kernel_t)malloc(sizeof(struct acc_kernel_t_)); result->desc = kernel; result->param_ptrs = ( void **)malloc(kernel->num_params * sizeof( void *)); result->scalar_ptrs = ( void **)malloc(kernel->num_scalars * sizeof( void *)); result->data_ptrs = (d_void **)malloc(kernel->num_datas * sizeof(d_void *)); result->data_size = ( size_t *)malloc(kernel->num_datas * sizeof( size_t)); result->loops = (acc_loop_desc_t *)malloc(kernel->num_loops * sizeof(struct acc_loop_desc_t_ *)); unsigned i; for (i = 0; i < kernel->num_loops; i++) { result->loops[i] = (acc_loop_desc_t)malloc(sizeof(struct acc_loop_desc_t_)); result->loops[i]->lower = 0; result->loops[i]->upper = 0; result->loops[i]->stride = 0; result->loops[i]->nbr_it = 0; } return result; }
void acc_init_(acc_device_t dev, int num) { acc_init_once(); unsigned device_idx = acc_get_device_idx(dev, num); acc_init__(device_idx); }
void acc_shutdown_(acc_device_t dev, int num) { #if PRINT_INFO printf("[warning] Shuting OpenACC down...\n"); #endif acc_init_once(); unsigned device_idx = acc_get_device_idx(dev, num); if (acc_runtime.opencl_data->devices_data[device_idx] != NULL) { cl_int status; status = clReleaseContext(acc_runtime.opencl_data->devices_data[device_idx]->context); if (status != CL_SUCCESS) printf("[warning] clReleaseContext : %s, %u return %u\n", acc_device_name[dev], num, status); if (acc_runtime.opencl_data->devices_data[device_idx]->programs != NULL) { unsigned i; for (i = 0; i < compiler_data.num_regions; i++) if (acc_runtime.opencl_data->devices_data[device_idx]->programs[i] != NULL) { status = clReleaseProgram(acc_runtime.opencl_data->devices_data[device_idx]->programs[i]); if (status != CL_SUCCESS) printf("[warning] clReleaseProgram : %s, %u for region %u return %u\n", acc_device_name[dev], num, i, status); } free(acc_runtime.opencl_data->devices_data[device_idx]->programs); acc_runtime.opencl_data->devices_data[device_idx]->programs = NULL; } free(acc_runtime.opencl_data->devices_data[device_idx]); acc_runtime.opencl_data->devices_data[device_idx] = NULL; } }
void acc_shutdown(acc_device_t dev) { acc_init_once(); size_t num_devices = acc_get_num_devices(dev); assert(num_devices > 0); unsigned i; for (i = 0; i < num_devices; i++) acc_shutdown_(dev, i); }
void acc_init__(unsigned device_idx) { acc_init_once(); if (acc_runtime.opencl_data->devices_data[device_idx] == NULL) { cl_int status; cl_device_id * device = &(acc_runtime.opencl_data->devices[0][device_idx]); acc_device_data_t device_data = (acc_device_data_t)malloc(sizeof(struct acc_device_data_t_)); if (device_data == NULL) { perror("[fatal] malloc : device_data"); exit(-1); } acc_runtime.opencl_data->devices_data[device_idx] = device_data; device_data->context = clCreateContext(NULL, 1, device, &acc_ocl_ctx_error_callback, NULL, &status); if (status != CL_SUCCESS || device_data->context == NULL) { printf("[error] clCreateContext : %s, %u return %u : failed\n", "", 0/** \todo acc_device_name[dev], num*/, status); free(device_data); device_data = NULL; return; } device_data->programs = (cl_program *)malloc(compiler_data.num_regions * sizeof(cl_program)); if (device_data->programs == NULL) { perror("[fatal] malloc : device_data->programs"); exit(-1); } unsigned i; for (i = 0; i < compiler_data.num_regions; i++) device_data->programs[i] = NULL; device_data->command_queue = clCreateCommandQueue(device_data->context, *device, CL_QUEUE_PROFILING_ENABLE, &status); if (status != CL_SUCCESS || device_data->command_queue == NULL) { char * status_str; switch (status) { case CL_INVALID_CONTEXT: status_str = "CL_INVALID_CONTEXT"; break; case CL_INVALID_DEVICE: status_str = "CL_INVALID_DEVICE"; break; case CL_INVALID_VALUE: status_str = "CL_INVALID_VALUE"; break; case CL_INVALID_QUEUE_PROPERTIES: status_str = "CL_INVALID_QUEUE_PROPERTIES"; break; case CL_OUT_OF_RESOURCES: status_str = "CL_OUT_OF_RESOURCES"; break; case CL_OUT_OF_HOST_MEMORY: status_str = "CL_OUT_OF_HOST_MEMORY"; break; default: status_str = "CL_UNKNOWN_ERROR_CODE"; break; } printf("[fatal ] clCreateCommandQueue... return %s\n", status_str); exit(-1); /// \todo error code } } }
void acc_dbg_dump_runtime() { acc_init_once(); assert(0); /// \todo useless before updating printf("Device type : "); switch (acc_runtime.curr_device_type) { case acc_device_any: printf("ACC_DEVICE_ANY\n"); break; case acc_device_nvidia: printf("ACC_DEVICE_NVIDIA\n"); break; case acc_device_amd: printf("ACC_DEVICE_AMD\n"); break; case acc_device_intel: printf("ACC_DEVICE_INTEL\n"); break; default: printf("invalid !\n"); } printf("Device Number : %u\n", acc_runtime.curr_device_num); if (acc_runtime.opencl_data != NULL) { printf("OpenCL Data:\n"); unsigned i, j; for (i = 0; i < acc_runtime.opencl_data->num_platforms; i++) { char buf[100]; printf(" Platform %u: \n", i); clGetPlatformInfo(acc_runtime.opencl_data->platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL); printf(" Vendor: %s\n", buf); clGetPlatformInfo(acc_runtime.opencl_data->platforms[i], CL_PLATFORM_NAME, sizeof(buf), buf, NULL); printf(" Name: %s\n", buf); printf(" Number Devices: %u\n", acc_runtime.opencl_data->num_devices[i]); for (j = 0; j < acc_runtime.opencl_data->num_devices[i]; j++) { printf(" Device %u: \n", j); clGetDeviceInfo((acc_runtime.opencl_data->devices)[i][j], CL_DEVICE_VENDOR, sizeof(buf), buf, NULL); printf(" Vendor: %s\n", buf); clGetDeviceInfo((acc_runtime.opencl_data->devices)[i][j], CL_DEVICE_NAME, sizeof(buf), buf, NULL); printf(" Name: %s\n", buf); } } } else printf("No OpenCL Data !\n"); }
acc_kernel_t acc_build_kernel(struct acc_kernel_desc_t_ * kernel) { acc_init_once(); acc_kernel_t result = (acc_kernel_t)malloc(sizeof(struct acc_kernel_t_)); result->desc = kernel; result->param_ptrs = ( void **)malloc(kernel->num_params * sizeof( void *)); result->scalar_ptrs = ( void **)malloc(kernel->num_scalars * sizeof( void *)); result->data_ptrs = (d_void **)malloc(kernel->num_datas * sizeof(d_void *)); result->data_size = ( size_t *)malloc(kernel->num_datas * sizeof( size_t)); result->private_ptrs = (d_void **)malloc(kernel->num_datas * sizeof(d_void *)); result->private_size = ( size_t *)malloc(kernel->num_datas * sizeof( size_t)); result->loops = malloc(kernel->num_loops * sizeof(struct acc_loop_t_)); unsigned i; for (i = 0; i < kernel->num_loops; i++) { result->loops[i].lower = 0; result->loops[i].upper = 0; result->loops[i].stride = 0; } return result; }
int acc_get_num_devices(acc_device_t dev) { acc_init_once(); return acc_runtime.devices[dev].num; }
int acc_on_device(acc_device_t dev) { acc_init_once(); assert(!"NIY"); /// \todo return -1; }
int acc_get_device_num(acc_device_t dev) { acc_init_once(); return acc_runtime.curr_device_num; }
void acc_set_device_num (int num, acc_device_t dev) { acc_init_once(); acc_runtime.curr_device_type = dev; acc_runtime.curr_device_num = num; }
acc_device_t acc_get_device_type() { acc_init_once(); return acc_runtime.curr_device_type; }
void acc_set_device_type(acc_device_t dev) { acc_init_once(); acc_runtime.curr_device_type = dev; }
int main(int argc, char ** argv) { if (argc != 16) { printf("Usage: %s mun_gang[3] num_worker[3] portions[3] version_by_devices[3] n m p\n", argv[0]); exit(-1); } size_t num_gang[3] = {atoi(argv[1]), atoi(argv[2]), atoi(argv[3])}; size_t num_worker[3] = {atoi(argv[4]), atoi(argv[5]), atoi(argv[6])}; portions[0] = atoi(argv[7]); portions[1] = atoi(argv[8]); portions[2] = atoi(argv[9]); version_by_devices[0] = atoi(argv[10]); version_by_devices[1] = atoi(argv[11]); version_by_devices[2] = atoi(argv[12]); size_t n = atoi(argv[13]); size_t m = atoi(argv[14]); size_t p = atoi(argv[15]); size_t vector_length[3] = {1,1,1}; // Initialize OpenACC (for profiling) acc_init_once(); // Set the experiment (Configure 'Runs' table in DB) char * experiment_desc = "gpu_kernel_id INT, cpu_kernel_id INT, mic_kernel_id INT, "\ "gpu_portion INT, cpu_portion INT, mic_portion INT, "\ "gpu_gang INT, gpu_worker INT, "\ "cpu_gang INT, cpu_worker INT, "\ "mic_gang INT, mic_worker INT, "\ "n INT, m INT, p INT , "\ "comp_time BIGINT , data_time BIGINT "; acc_profiling_set_experiment(experiment_desc); // Add current run in 'Runs' table char run_desc[1024]; sprintf(run_desc, " '%zd' , '%zd' , '%zd' , '%u' , '%u', '%u' , '%zd' , '%zd' , '%zd' , '%zd' , '%zd' , '%zd' , '%zd' , '%zd' , '%zd' , '0' , '0' ", version_by_devices[0], version_by_devices[1], version_by_devices[2], portions[0], portions[1], portions[2], num_gang[0], num_worker[0], num_gang[1], num_worker[1], num_gang[2], num_worker[2], n, m, p); acc_profiling_new_run(run_desc); int i, j; float ** a; float ** b; float ** c; init_data(n, m, p, &a, &b, &c); acc_timer_t data_timer = acc_timer_build(); acc_timer_t comp_timer = acc_timer_build(); kernel_509(n, m, p, a, b, c, num_gang, num_worker, vector_length, data_timer, comp_timer); acc_timer_delta(comp_timer); acc_timer_delta(data_timer); // Update 'Runs' table with host-side timers. char db_query[1024]; sprintf(db_query, "UPDATE Runs SET comp_time='%d', data_time='%d' where rowid='%d'", comp_timer->delta, data_timer->delta, acc_profiler->run_id); char * err_msg; int status = sqlite3_exec (acc_profiler->db_file, db_query, NULL, 0, &err_msg); assert (status == SQLITE_OK); free_data(a, b, c); acc_profiling_exit(); return 0; }