void print_platforms_devices() { // get number of platforms cl_uint plat_count; CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); // allocate memory, get list of platforms cl_platform_id *platforms = (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); CHECK_SYS_ERROR(!platforms, "allocating platform array"); CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); // iterate over platforms for (cl_uint i = 0; i < plat_count; ++i) { // get platform vendor name char buf[MAX_NAME_LEN]; CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL)); printf("platform %d: vendor '%s'\n", i, buf); // get number of devices in platform cl_uint dev_count; CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &dev_count)); cl_device_id *devices = (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); CHECK_SYS_ERROR(!devices, "allocating device array"); // get list of devices in platform CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, dev_count, devices, NULL)); // iterate over devices for (cl_uint j = 0; j < dev_count; ++j) { char buf[MAX_NAME_LEN]; CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, sizeof(buf), buf, NULL)); printf(" device %d: '%s'\n", j, buf); } free(devices); } free(platforms); }
bool BinaryFile::Truncate(const string& filename) { DWORD error = GetLastError(); HANDLE hFile = CreateFile(filename.Data(), GENERIC_WRITE, 0, 0, TRUNCATE_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); error = GetLastError(); if (error == ERROR_FILE_NOT_FOUND || error == ERROR_SUCCESS) return true; SetLastError(error); CHECK_SYS_ERROR(L"Error can't truncate binary file " + filename); CloseHandle(hFile); CHECK_SYS_ERROR(L"Failed to truncate a file " + filename); return true; }
bool BinaryFile::Save(const string& filename, const Buffer& buffer) { DWORD error = GetLastError(); HANDLE hFile = CreateFile(filename.Data(), GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); CHECK_SYS_ERROR(L"Error in binary file, open file for saving " + filename); DWORD read; WriteFile(hFile, (LPCVOID)buffer.StartPointer(), (DWORD)buffer.GetPosition(), &read, 0); CHECK_SYS_ERROR(L"Error in binary file, can't write data to file " + filename); if (read != buffer.GetPosition()) throw OSException(L"Error in binary file, written data is less than should be " + filename); CloseHandle(hFile); CHECK_SYS_ERROR(L"Saving binary file failed " + filename); return true; }
//TODO move this to the routine opencl.printAllPlatform() void print_platforms_devices() { cl_uint plat_count; CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); cl_platform_id *platforms = (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); CHECK_SYS_ERROR(!platforms, "allocating platform array"); CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); cl_uint i; for (i = 0; i < plat_count; ++i) { char buf[100]; CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL)); printf("plat %d: vendor '%s'\n", i, buf); cl_uint dev_count; CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &dev_count)); cl_device_id *devices = (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); CHECK_SYS_ERROR(!devices, "allocating device array"); CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, dev_count, devices, NULL)); cl_uint j; for (j = 0; j < dev_count; ++j) { char buf[100]; CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, sizeof(buf), buf, NULL)); printf(" dev %d '%s'\n", j, buf); } free(devices); } free(platforms); }
bool BinaryFile::Load(const string& filename, Buffer& buffer) { DWORD error = GetLastError(); HANDLE hFile = CreateFile(filename.Data(), GENERIC_READ, FILE_SHARE_READ|FILE_SHARE_WRITE, 0, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); CHECK_SYS_ERROR(L"Error in binary file, can't load it " + filename); int size = GetFileSize(hFile, 0); buffer.SetSize(size); DWORD read; ReadFile(hFile, buffer.StartPointer(), size, &read, 0); CHECK_SYS_ERROR(L"Error in binary file, can't read data " + filename); if (read != size) throw OSException(L"Error in binary file, read data less than file contains, possible bad staff happenes " + filename); CloseHandle(hFile); CHECK_SYS_ERROR(L"Binary file load failed " + filename); return true; }
bool BinaryFile::Append(const string& filename, const Buffer& buffer) { DWORD error = GetLastError(); HANDLE hFile = CreateFile(filename.Data(), GENERIC_WRITE, 0, 0, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); CHECK_SYS_ERROR(L"Error in binary file, can't open file for appending it " + filename); DWORD offset = GetFileSize(hFile, 0); SetFilePointer(hFile, offset, 0, FILE_BEGIN); DWORD read; WriteFile(hFile, (LPCVOID)buffer.StartPointer(), (DWORD)buffer.GetPosition(), &read, 0); CHECK_SYS_ERROR(L"Error in binary file, can't write data to file " + filename); if (read != buffer.GetPosition()) throw OSException(L"Error in binary file, written data is less than should be in " + filename); CloseHandle(hFile); CHECK_SYS_ERROR(L"Failed to append a file " + filename); return true; }
cl_kernel kernel_from_string(cl_context ctx, char const *knl, char const *knl_name, char const *options) { // create an OpenCL program (may have multiple kernels) size_t sizes[] = { strlen(knl) }; cl_int status; cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); CHECK_CL_ERROR(status, "clCreateProgramWithSource"); // build it status = clBuildProgram(program, 0, NULL, options, NULL, NULL); if (status != CL_SUCCESS) { // build failed, get build log and print it cl_device_id dev; CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES, sizeof(dev), &dev, NULL)); size_t log_size; CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size)); char *log = (char *) malloc(log_size); CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log"); char devname[MAX_NAME_LEN]; CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME, sizeof(devname), devname, NULL)); CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, log_size, log, NULL)); fprintf(stderr, "*** build of '%s' on '%s' failed:\n%s\n*** (end of error)\n", knl_name, devname, log); abort(); } else CHECK_CL_ERROR(status, "clBuildProgram"); // fish the kernel out of the program cl_kernel kernel = clCreateKernel(program, knl_name, &status); CHECK_CL_ERROR(status, "clCreateKernel"); CALL_CL_GUARDED(clReleaseProgram, (program)); return kernel; }
//TODO remove this at some point (deprecated) cl_kernel kernel_from_string(cl_context ctx, char const *knl, char const *knl_name, char const *options) { size_t sizes[] = { strlen(knl) }; cl_int status; cl_program program = clCreateProgramWithSource(ctx, 1, &knl, sizes, &status); CHECK_CL_ERROR(status, "clCreateProgramWithSource"); status = clBuildProgram(program, 0, NULL, options, NULL, NULL); if (status != CL_SUCCESS) { // build failed, get build log. cl_device_id dev; CALL_CL_GUARDED(clGetProgramInfo, (program, CL_PROGRAM_DEVICES, sizeof(dev), &dev, NULL)); size_t log_size; CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size)); char *log = malloc(log_size); CHECK_SYS_ERROR(!log, "kernel_from_string: allocate log"); char devname[100]; CALL_CL_GUARDED(clGetDeviceInfo, (dev, CL_DEVICE_NAME, sizeof(devname), devname, NULL)); CALL_CL_GUARDED(clGetProgramBuildInfo, (program, dev, CL_PROGRAM_BUILD_LOG, log_size, log, NULL)); THError("*** build of '%s' on '%s' failed:\n%s\n*** (end of error)\n", knl_name, devname, log); } else CHECK_CL_ERROR(status, "clBuildProgram"); cl_kernel kernel = clCreateKernel(program, knl_name, &status); CHECK_CL_ERROR(status, "clCreateKernel"); CALL_CL_GUARDED(clReleaseProgram, (program)); return kernel; }
char *read_file(const char *filename) { FILE *f = fopen(filename, "r"); CHECK_SYS_ERROR(!f, "read_file: opening file"); CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end"); long size = ftell(f); CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, "read_file: seeking to start"); char *result = (char *) malloc(size+1); CHECK_SYS_ERROR(!result, "read_file: allocating file contents"); CHECK_SYS_ERROR(fread(result, 1, size, f) < size, "read_file: reading file contents"); CHECK_SYS_ERROR(fclose(f), "read_file: closing file"); result[size] = '\0'; return result; }
char *read_file(const char *filename) { FILE *f = fopen(filename, "r"); CHECK_SYS_ERROR(!f, "read_file: opening file"); // figure out file size CHECK_SYS_ERROR(fseek(f, 0, SEEK_END) < 0, "read_file: seeking to end"); size_t size = ftell(f); CHECK_SYS_ERROR(fseek(f, 0, SEEK_SET) != 0, "read_file: seeking to start"); // allocate memory, slurp in entire file char *result = (char *) malloc(size+1); CHECK_SYS_ERROR(!result, "read_file: allocating file contents"); CHECK_SYS_ERROR(fread(result, 1, size, f) < size, "read_file: reading file contents"); // close, return CHECK_SYS_ERROR(fclose(f), "read_file: closing file"); result[size] = '\0'; return result; }
void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling) { cl_uint plat_count; CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); cl_platform_id *platforms = (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); CHECK_SYS_ERROR(!platforms, "allocating platform array"); CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); for (cl_uint i = 0; i < plat_count; ++i) { char buf[100]; CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL)); if (!plat_name || strstr(buf, plat_name)) { cl_uint dev_count; CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &dev_count)); cl_device_id *devices = (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); CHECK_SYS_ERROR(!devices, "allocating device array"); CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, dev_count, devices, NULL)); for (cl_uint j = 0; j < dev_count; ++j) { char buf[100]; CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, sizeof(buf), buf, NULL)); if (!dev_name || strstr(buf, dev_name)) { if (idx == 0) { cl_platform_id plat = platforms[i]; cl_device_id dev = devices[j]; free(devices); free(platforms); cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; cl_int status; *ctx = clCreateContext( cps, 1, &dev, NULL, NULL, &status); CHECK_CL_ERROR(status, "clCreateContext"); cl_command_queue_properties qprops = 0; if (enable_profiling) qprops |= CL_QUEUE_PROFILING_ENABLE; *queue = clCreateCommandQueue(*ctx, dev, qprops, &status); CHECK_CL_ERROR(status, "clCreateCommandQueue"); return; } else --idx; } } free(devices); } } free(platforms); fputs("create_context_on: specified device not found.\n", stderr); abort(); }
int cat_decode_word(search_t *search, const features_t *features, lattice_t *lattice, symbol_t *ref, reference_t ref_type, const char *lattice_tmpl) { TRACE(1, "Decoding CAT word...\n"); int prefix_len = 0; const int ref_len = symlen(ref); float beam = search->decoder->beam_pruning; search_create_emission_cache(search); symbol_t *prefix = (symbol_t *)malloc((ref_len+1)*sizeof(symbol_t)); if (search->decoder->grammar->start != VOCAB_NONE) { prefix[prefix_len++] = search->decoder->grammar->start; } prefix[prefix_len] = VOCAB_NONE; if (ref_type == REF_SOURCE || ref_type == REF_TARGET) { for (; prefix_len < ref_len; prefix_len++) { { char *prefix_str = NULL; vocab_symbols_to_string(prefix, (ref_type == REF_SOURCE)?search->decoder->vocab->in:search->decoder->vocab->out, &prefix_str); TRACE(1, "next prefix: %s\n", prefix_str); free(prefix_str); } search_t *prefix_search = NULL; if (ref_type == REF_SOURCE) { prefix_search = search_create_from_prefix(search, prefix, NULL); } else if (ref_type == REF_TARGET) { prefix_search = search_create_from_prefix(search, NULL, prefix); } else { REQUIRE(ref_type > REF_NONE && ref_type < REF_MAX, "Invalid reference type\n"); } search->decoder->beam_pruning = beam; CHECK(prefix_search->decoder->grammar->list_initial->num_elements > 0, "Empty prefix grammar. Possible lack of coverture\n"); fprintf(stderr, "n initials = %d, n_states = %d\n", prefix_search->decoder->grammar->list_initial->num_elements, prefix_search->decoder->grammar->num_states); grammar_write_dot(prefix_search->decoder->grammar, stderr); lattice_t *prefix_lattice = lattice_create(lattice->nbest, lattice->nnode, prefix_search->decoder); //lattice_t *prefix_lattice = lattice_create(lattice->nbest, lattice->nnode, search->decoder); clock_t tim = clock(); decode(prefix_search, features, prefix_lattice); //decode(search, features, prefix_lattice); clock_t tim2 = clock(); TRACE(1, "iter %d tim %f\n", prefix_len, ((float) ((tim2 - tim) / CLOCKS_PER_SEC) / prefix_search->n_frames) / 0.01); //Calculate best hypothesis { symbol_t *best_ext_hyp = NULL; lattice_best_hyp(prefix_lattice, &best_ext_hyp); if (best_ext_hyp != NULL) { // write lattice { char path[MAX_LINE]; sprintf(path, lattice_tmpl, prefix_len); FILE *lattice_file = smart_fopen(path, "w"); CHECK_SYS_ERROR(lattice_file != NULL, "Couldn't create word graph file '%s'\n", path); lattice_write(prefix_lattice, lattice_file, path); smart_fclose(lattice_file); } // add one word to the prefix prefix[prefix_len] = ref[prefix_len]; prefix[prefix_len + 1] = VOCAB_NONE; { char *sentence_str = NULL; extended_vocab_symbols_to_string(best_ext_hyp, prefix_lattice->decoder->vocab, &sentence_str); TRACE(1, "%s\n", sentence_str); free(sentence_str); } free(best_ext_hyp); } else { TRACE(1, "Sentence not recognized. Increasing beam search\n"); // add one word to the prefix //prefix[prefix_len] = ref[prefix_len]; //prefix[prefix_len + 1] = VOCAB_NONE; //abort(); prefix_len--; beam *= 2; } } lattice_delete(prefix_lattice); search_delete(prefix_search); fflush(stdout); } } free(prefix); return prefix_len; }
void create_context_on(const char *plat_name, const char*dev_name, cl_uint idx, cl_context *ctx, cl_command_queue *queue, int enable_profiling) { char dev_sel_buf[MAX_NAME_LEN]; char platform_sel_buf[MAX_NAME_LEN]; // get number of platforms cl_uint plat_count; CALL_CL_GUARDED(clGetPlatformIDs, (0, NULL, &plat_count)); // allocate memory, get list of platform handles cl_platform_id *platforms = (cl_platform_id *) malloc(plat_count*sizeof(cl_platform_id)); CHECK_SYS_ERROR(!platforms, "allocating platform array"); CALL_CL_GUARDED(clGetPlatformIDs, (plat_count, platforms, NULL)); // print menu, if requested #ifndef CL_HELPER_FORCE_INTERACTIVE if (plat_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer #endif { puts("Choose platform:"); for (cl_uint i = 0; i < plat_count; ++i) { char buf[MAX_NAME_LEN]; CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL)); printf("[%d] %s\n", i, buf); } printf("Enter choice: "); fflush(stdout); char *sel = read_a_line(); if (!sel) { fprintf(stderr, "error reading line from stdin"); abort(); } int sel_int = MIN(MAX(0, atoi(sel)), (int) plat_count-1); free(sel); CALL_CL_GUARDED(clGetPlatformInfo, (platforms[sel_int], CL_PLATFORM_VENDOR, sizeof(platform_sel_buf), platform_sel_buf, NULL)); plat_name = platform_sel_buf; } // iterate over platforms for (cl_uint i = 0; i < plat_count; ++i) { // get platform name char buf[MAX_NAME_LEN]; CALL_CL_GUARDED(clGetPlatformInfo, (platforms[i], CL_PLATFORM_VENDOR, sizeof(buf), buf, NULL)); // does it match? if (!plat_name || strstr(buf, plat_name)) { // get number of devices in platform cl_uint dev_count; CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, 0, NULL, &dev_count)); // allocate memory, get list of device handles in platform cl_device_id *devices = (cl_device_id *) malloc(dev_count*sizeof(cl_device_id)); CHECK_SYS_ERROR(!devices, "allocating device array"); CALL_CL_GUARDED(clGetDeviceIDs, (platforms[i], CL_DEVICE_TYPE_ALL, dev_count, devices, NULL)); // {{{ print device menu, if requested #ifndef CL_HELPER_FORCE_INTERACTIVE if (dev_name == CHOOSE_INTERACTIVELY) // yes, we want exactly that pointer #endif { puts("Choose device:"); for (cl_uint j = 0; j < dev_count; ++j) { char buf[MAX_NAME_LEN]; CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, sizeof(buf), buf, NULL)); printf("[%d] %s\n", j, buf); } printf("Enter choice: "); fflush(stdout); char *sel = read_a_line(); if (!sel) { fprintf(stderr, "error reading line from stdin"); abort(); } int int_sel = MIN(MAX(0, atoi(sel)), (int) dev_count-1); free(sel); CALL_CL_GUARDED(clGetDeviceInfo, (devices[int_sel], CL_DEVICE_NAME, sizeof(dev_sel_buf), dev_sel_buf, NULL)); dev_name = dev_sel_buf; } // }}} // iterate over devices for (cl_uint j = 0; j < dev_count; ++j) { // get device name char buf[MAX_NAME_LEN]; CALL_CL_GUARDED(clGetDeviceInfo, (devices[j], CL_DEVICE_NAME, sizeof(buf), buf, NULL)); // does it match? if (!dev_name || strstr(buf, dev_name)) { if (idx == 0) { cl_platform_id plat = platforms[i]; cl_device_id dev = devices[j]; free(devices); free(platforms); cl_int status; // create a context #if OPENCL_SHARE_WITH_OPENGL #if __APPLE__ // CGLContextObj kCGLContext = CGLGetCurrentContext(); // CGLShareGroupObj kCGLShareGroup = CGLGetShareGroup(kCGLContext); // cl_context_properties cps[] = { // CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)kCGLShareGroup, // CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; // CGLContextObj gl_context = CGLGetCurrentContext(); CGLShareGroupObj share_group = CGLGetShareGroup(gl_context); cl_context_properties properties[] = { CL_CONTEXT_PROPERTY_USE_CGL_SHAREGROUP_APPLE, (cl_context_properties)share_group, 0 }; *ctx = clCreateContext(properties, 0, 0, 0, 0, 0); clGetGLContextInfoAPPLE(*ctx, gl_context, CL_CGL_DEVICE_FOR_CURRENT_VIRTUAL_SCREEN_APPLE, sizeof(dev), &dev, NULL); #elif WIN32 cl_context_properties cps[] = { CL_GL_CONTEXT_KHR, (cl_context_properties) wglGetCurrentContext(), CL_WGL_HDC_KHR, (cl_context_properties) wglGetCurrentDC(), CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0}; //Probably won't work because &dev should correspond to glContext *ctx = clCreateContext(cps, 1, &dev, NULL, NULL, &status); CHECK_CL_ERROR(status, "clCreateContext"); #else // Linux cl_context_properties cps[] = { CL_GL_CONTEXT_KHR, ( cl_context_properties) glXGetCurrentContext(), CL_GLX_DISPLAY_KHR, (cl_context_properties) glXGetCurrentDisplay(), CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; //Probably won't work because &dev should correspond to glContext *ctx = clCreateContext(cps, 1, &dev, NULL, NULL, &status); CHECK_CL_ERROR(status, "clCreateContext"); #endif #else // create a context cl_context_properties cps[3] = { CL_CONTEXT_PLATFORM, (cl_context_properties) plat, 0 }; // create a command queue cl_command_queue_properties qprops = 0; if (enable_profiling) qprops |= CL_QUEUE_PROFILING_ENABLE; *queue = clCreateCommandQueue(*ctx, dev, qprops, &status); CHECK_CL_ERROR(status, "clCreateCommandQueue"); #endif // *ctx = clCreateContext( // cps, 1, &dev, NULL, NULL, &status); // CHECK_CL_ERROR(status, "clCreateContext"); // // create a command queue cl_command_queue_properties qprops = 0; if (enable_profiling) qprops |= CL_QUEUE_PROFILING_ENABLE; *queue = clCreateCommandQueue(*ctx, dev, qprops, &status); CHECK_CL_ERROR(status, "clCreateCommandQueue"); return; } else --idx; } } free(devices); } } free(platforms); fputs("create_context_on: specified device not found.\n", stderr); abort(); }
int main() { int enable_profiling = 0; #ifdef DO_TIMING enable_profiling = 1; #endif //print_platforms_devices(); cl_context ctx; cl_command_queue queue; create_context_on("NVIDIA", NULL, 0, &ctx, &queue, enable_profiling); // -------------------------------------------------------------------------- // load kernels // -------------------------------------------------------------------------- // read the cl file char buf[100]; sprintf(buf, "mg-kernel-ver%d.cl", VERSION); char *knl_text = read_file(buf); //get work group dimensions and gflop info. int wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt; if (sscanf(knl_text, "// workgroup: (%d,%d,%d) z_div:%d fetch_per_pt:%d flops_per_pt:%d", &wg_x, &wg_y, &wg_z, &z_div, &fetch_per_pt, &flops_per_pt) == 6) { wg_dims = 3; } else if (sscanf(knl_text, "// workgroup: (%d,%d) fetch_per_pt:%d flops_per_pt:%d", &wg_x, &wg_y, &fetch_per_pt, &flops_per_pt) == 4) { wg_dims = 2; wg_z = -1; z_div = -1; } else { perror("reading workgroup spec"); abort(); } #ifdef USE_DOUBLE char *compile_opt = "-DFTYPE=double"; #else char *compile_opt = "-DFTYPE=float"; #endif // creation of the kernel cl_kernel poisson_knl = kernel_from_string(ctx, knl_text, "fd_update", compile_opt); free(knl_text); // my compiler complains about this one. OJO!! // -------------------------------------------------------------------------- // set up grid // -------------------------------------------------------------------------- const unsigned points = POINTS; const ftype minus_bdry = -1, plus_bdry = 1; // We're dividing into (points-1) intervals. ftype dx = (plus_bdry-minus_bdry)/(points-1); // -------------------------------------------------------------------------- // allocate and initialize CPU memory // -------------------------------------------------------------------------- int use_alignment; unsigned dim_other = points; //if order 2 then 1 point extra on each side #ifdef USE_ALIGNMENT // adjusts dimension so that the next row starts in a number divisible by 16 unsigned dim_x = ((dim_other + 15) / 16) * 16; unsigned field_start = 0; use_alignment = 1; #else unsigned dim_x = dim_other; unsigned field_start = 0;// this one puts me right at the beginning use_alignment = 0; #endif // --------Allocate forcing uexact, r and u vectors ------------------------- const size_t field_size = 0+dim_x*dim_x*dim_x; // extra large to fit the 2^n constrain in GPU ftype *f = malloc(field_size*sizeof(ftype)); CHECK_SYS_ERROR(!f, "allocating f"); ftype *u = malloc (field_size*sizeof(ftype)); CHECK_SYS_ERROR(!u, "allocating u"); ftype *uexact = malloc (field_size*sizeof(ftype)); CHECK_SYS_ERROR(!uexact, "allocating uexact"); ftype *r = malloc(field_size * sizeof(ftype)); CHECK_SYS_ERROR(!r, "allocating residual r"); // -------------------------------------------------------------------------- // initialize // -------------------------------------------------------------------------- // zero out (necessary to initialize everything bec. I measure norms) for (size_t i = 0; i < field_size; ++i){ f[i] = 0; u[i] = 0; uexact[i] = 0; r[i] = 0; } // set up the forcing field init_f (points, f, dx, field_start, dim_x, dim_other, minus_bdry); // Initialize u with initial boundary conditions init_u ( points, u , minus_bdry, plus_bdry, dx, field_start, dim_x, dim_other); // Initialize the exact solution init_uexact(points, u, uexact, dx, field_size, field_start, dim_x, dim_other); // -------------------------------------------------------------------------- // Setup the v-cycles // -------------------------------------------------------------------------- unsigned n1, n2, n3, ncycles; n1 = 50; n2 = 60; n3 = 1; ncycles = 2; ftype *sweeps = malloc (ncycles*sizeof(ftype)); ftype *rnorm = malloc (ncycles*sizeof(ftype)); ftype *enorm = malloc (ncycles*sizeof(ftype)); ftype rtol = 1.0e-05; // Find the norm of the residual (choose your method) sweeps[0] =0; resid (r, f, u, dx, field_size, field_start, dim_x, dim_other); rnorm[0] = norm( r , field_size) * dx; U_error(u, uexact, r, field_size); enorm[0] = norm( r, field_size ) * dx; for(unsigned icycle = 1; icycle <= ncycles; icycle++){ mgv(f, u, dx, n1, n2, n3, field_size, points, use_alignment, dim_x, ctx, queue, poisson_knl, wg_dims , wg_x, wg_y, wg_z, z_div, fetch_per_pt, flops_per_pt); //update u through a v-cycle sweeps[icycle] = sweeps[icycle -1] + (4 * (n1 + n2)/3); resid (r, f, u, dx, field_size, field_start, dim_x, dim_other); rnorm[icycle] = norm( r, field_size ) * dx; U_error(u, uexact, r, field_size); enorm[icycle] = norm( r, field_size ) * dx; //cfacts = (rnorm(icycle)/rnorm(icycle - 1))^(1 / (n1 + n2)) not necessary //disp something here if I want to. //printf("norm of the cycle %f", enorm[icycle]); if(rnorm[icycle] <= rtol * rnorm[0]) break; } #ifdef DO_TIMING printf(" ftype:%d ver:%d align:%d pts:%d\tgflops:%.1f\tmcells:%.1f\tgbytes:%.1f [/sec]\tout_gflops:%.6f\n", (int) sizeof(ftype), VERSION, use_alignment, points, gflops_performed/seconds_taken, mcells_updated/seconds_taken, gbytes_accessed/seconds_taken, gflops_performed/tot_secs); #endif // -------------------------------------------------------------------------- // clean up // -------------------------------------------------------------------------- CALL_CL_GUARDED(clReleaseKernel, (poisson_knl)); CALL_CL_GUARDED(clReleaseCommandQueue, (queue)); CALL_CL_GUARDED(clReleaseContext, (ctx)); }