/************************************************************************** Function: reference_jacobi This routine contains the main iteration loop for the Jacobi iteration reference implementation (no OpenCL). params: a two arrays to compute solution into max_iter maximum number of iterations size size of array for this MPI rank tolerance all differences should be les than this tolerance value mpi_ranks number of MPI ranks in each dimension rank_pos cartesian position of this rank origin origin for this rank d discretion size mpi_comm MPI communications structure **************************************************************************/ static void reference_jacobi(value_type *a[2], unsigned int max_iter, size_t size[DIMENSIONS], value_type tolerance, value_type d[DIMENSIONS]) { unsigned int rc, iter = 0; value_type max_diff, timer; /* init arrays by setting the initial value and the boundary conditions */ set_initial_solution(a[OLD], size, INITIAL_GUESS); set_initial_solution(a[NEW], size, INITIAL_GUESS); set_boundary_conditions(a[OLD], size, d); set_boundary_conditions(a[NEW], size, d); /* print the initial solution guess */ print_array("Init ", a[NEW], size, d); /* iterate until maximum difference is less than the given tolerance or number of iterations is too high */ do { /* swap array pointers for next iteration */ SWAP_PTR(a[OLD], a[NEW]); /* iterate using a[OLD] as the input and a[NEW] as the output */ max_diff = reference_jacobi_kernel(a[OLD], a[NEW], size); /* output status for user, overwrite the same line */ if (0 == iter % 100) { printf("Iteration=%5d, max difference=%0.7f, target=%0.7f\r", iter, max_diff, tolerance); fflush(stdout); } /* increment counter */ iter++; } while (max_diff > tolerance && max_iter > iter); /* do loop */ /* output final iteration count and maximum difference value */ printf("Iteration=%5d, max difference=%0.7f, execution time=%.3f seconds\n", iter, max_diff, timer); }
void run_convergence_order_study (int argc, char** argv, const int conv_study_type) { const char* ctrl_name = argv[1]; struct Test_Info test_info = { .n_warn = 0, }; struct Integration_Test_Info* int_test_info = constructor_Integration_Test_Info(ctrl_name); if (argc == 4) int_test_info->conv_study_extension = argv[3]; const int* p_ref = int_test_info->p_ref, * ml_ref = int_test_info->ml; struct Simulation* sim = NULL; const char type_rc = 'r'; int ml_prev = ml_ref[0]-1, p_prev = p_ref[0]-1; bool ignore_static = false; int ml_max = ml_ref[1]; switch (conv_study_type) { case CONV_STUDY_SOLVE: break; // Do nothing case CONV_STUDY_SOLVE_NO_CHECK: // fallthrough case CONV_STUDY_RESTART: ignore_static = true; break; default: EXIT_ERROR("Unsupported: %d\n",conv_study_type); break; } for (int ml = ml_ref[0]; ml <= ml_max; ++ml) { for (int p = p_ref[0]; p <= p_ref[1]; ++p) { const int adapt_type = int_test_info->adapt_type; const char*const ctrl_name_curr = set_file_name_curr(adapt_type,p,ml,false,ctrl_name); structor_simulation(&sim,'c',adapt_type,p,ml,p_prev,ml_prev,ctrl_name_curr,type_rc,ignore_static); // d. ignore_static = false; switch (conv_study_type) { case CONV_STUDY_SOLVE: // fallthrough case CONV_STUDY_SOLVE_NO_CHECK: switch (get_set_method(NULL)) { case METHOD_DG: case METHOD_DPG: case METHOD_OPG: case METHOD_OPGC0: solve_for_solution(sim); break; case METHOD_L2_PROJ: set_initial_solution(sim); break; // do nothing. default: EXIT_ERROR("Unsupported: %d\n",get_set_method(NULL)); break; } break; case CONV_STUDY_RESTART: { assert(using_restart() == true); set_initial_solution(sim); set_to_zero_residual(sim); struct Test_Case*const test_case = (struct Test_Case*) sim->test_case_rc->tc; test_case->constructor_Error_CE = test_case->constructor_Error_CE_restart_test; break; } default: EXIT_ERROR("Unsupported: %d\n",conv_study_type); break; } if (p == ORDER_VIS_CONV_P && ml <= ORDER_VIS_CONV_ML_MAX) { output_visualization(sim,VIS_GEOM_EDGES); output_visualization(sim,VIS_SOLUTION); output_visualization(sim,VIS_GEOM_VOLUMES); output_visualization(sim,VIS_NORMALS); } output_error(sim); output_error_functionals(sim); if (DISPLAY_CONV) printf("\ntest_integration_convergence (ml, p, dof): %d %d %td\n\n\n",ml,p,compute_dof(sim)); if ((ml == ml_max) && (p == p_ref[1])) { output_restart(sim); set_convergence_order_discount(int_test_info); bool pass = true; switch (conv_study_type) { case CONV_STUDY_SOLVE: // fallthrough case CONV_STUDY_RESTART: check_convergence_orders(ERROR_STANDARD,&pass,&test_info,int_test_info,sim); check_convergence_orders(ERROR_FUNCTIONAL,&pass,&test_info,int_test_info,sim); break; case CONV_STUDY_SOLVE_NO_CHECK: break; // do nothing. default: EXIT_ERROR("Unsupported: %d\n",conv_study_type); break; } assert_condition(pass); structor_simulation(&sim,'d',ADAPT_0,p,ml,p_prev,ml_prev,NULL,type_rc,ignore_static); } p_prev = p; ml_prev = ml; }} destructor_Integration_Test_Info(int_test_info); } // Static functions ************************************************************************************************* // // Level 0 ********************************************************************************************************** // /// \brief Container for convergence order related data for each mesh level and polynomial order. struct Conv_Order_Data { const struct const_Multiarray_d* h, ///< The multiarray of mesh spacing. * l2_err, ///< The multiarray of L2 errors. * conv_orders; ///< The multiarray of convergence orders. const struct const_Multiarray_i* cases_run, ///< The multiarray of flags indicating which cases were run. * ex_ord; ///< The multiarray of expected orders. const char*const* var_names; ///< Names of the variables for which the error and convergence orders are provided. }; /** \brief Copy the files from the $BUILD/output/error/... subdirectory to the $BUILD/output/results/... subdirectory in * the case where a convergence study is being performed. */ static void copy_error_files_for_conv_study (const struct Integration_Test_Info*const int_test_info, ///< \ref Integration_Test_Info. const int error_type, ///< Defined for \ref compute_error_file_name. const struct Simulation*const sim ///< \ref Simulation. ); /** \brief Constructor for a \ref Conv_Order_Data container. * \return See brief. */ static struct Conv_Order_Data* constructor_Conv_Order_Data (const struct Integration_Test_Info*const int_test_info, ///< \ref Integration_Test_Info. const int error_type, ///< Current error type. const struct Simulation*const sim ///< \ref Simulation. ); /// \brief Destructor for a \ref Conv_Order_Data container. static void destructor_Conv_Order_Data (const struct Conv_Order_Data*const cod ///< Standard. ); /** \brief Compute the number of errors to be read for the convergence order test. * \return See brief. */ static int compute_n_err (const char* input_name ///< \ref fopen_sp_input_file :: name_part. ); /** \brief Return whether the convergence orders are in the expected range. * \return See brief. */ static bool attained_expected_conv_orders (const double discount, ///< Allowable discount from the expected conv. orders. const struct const_Multiarray_d*const conv_orders, ///< The container for the conv. order data. const struct const_Multiarray_i*const exp_orders, ///< The container of expected conv. order data. const struct Integration_Test_Info* int_test_info ///< \ref Integration_Test_Info. ); /// \brief Output the combined results from all runs currently stored in the \ref Conv_Order_Data container. static void output_combined_results (const struct Conv_Order_Data*const cod, ///< \ref Conv_Order_Data. const struct Integration_Test_Info*const int_test_info, ///< \ref Integration_Test_Info. const int error_type, ///< Defined for \ref compute_error_file_name. const struct Simulation*const sim ///< \ref Simulation. ); static void set_convergence_order_discount (struct Integration_Test_Info*const int_test_info) { char line[STRLEN_MAX]; bool found_discount = false; FILE* input_file = fopen_input('t',NULL,NULL); // closed while (fgets(line,sizeof(line),input_file)) { if (strstr(line,"conv_order_discount")) { found_discount = true; read_skip_const_d(line,&int_test_info->conv_order_discount,1,false); } } fclose(input_file); if (!found_discount) const_cast_d(&int_test_info->conv_order_discount,0.0); }
/************************************************************************** Function: ocl_jacobi This routine contains the main iteration loop for the Jacobi iteration using OpenCL kernel. params: a two arrays to compute solution into max_iter maximum number of iterations size size of array for this MPI rank tolerance all differences should be les than this tolerance value mpi_ranks number of MPI ranks in each dimension rank_pos cartesian position of this rank origin origin for this rank d discretion size mpi_comm MPI communications structure local_workblock_size size of local workblock for OpenCL kernel device_type OpenCL device type full_copy boolean if full buffer copy is to be done **************************************************************************/ static void ocl_jacobi(value_type *a[2], unsigned int max_iter, size_t size[DIMENSIONS], value_type tolerance, value_type d[DIMENSIONS], size_t local_workblock_size[DIMENSIONS], cl_device_type device_type, unsigned int full_copy) { size_t array_size; unsigned int i, j, rc, iter = 0; size_t delta_buffer_size, delta_size[DIMENSIONS]; size_t tile_delta_size, tile_cache_size; value_type max_diff, timer; icl_device* device_id; icl_kernel* kernel; cl_int err; icl_buffer *a_buf[2], *delta_buf; value_type *delta; /* convenience for y stride in array */ cl_uint ystride = size[Y]+2*GHOST_CELL_WIDTH; /* init devices */ icl_init_devices(device_type); /* find OpenCL device */ device_id = icl_get_device(0); /* build the kernel and verify the kernel */ kernel = icl_create_kernel(device_id, "jacsolver_kernel.cl", "ocl_jacobi_local_copy", "", ICL_SOURCE); /* calculate size of kernel local memory - also used later for kernel params */ tile_delta_size = local_workblock_size[X] * local_workblock_size[Y]; tile_cache_size = (local_workblock_size[X]+2*GHOST_CELL_WIDTH) * (local_workblock_size[Y]+2*GHOST_CELL_WIDTH); /* verify the device has enough resources for this device */ /* I'm an optimist, we just hope for the best if ((cluGetAvailableLocalMem(device_id, kernel) < tile_delta_size + tile_cache_size) || (! cluCheckLocalWorkgroupSize(device_id, kernel, DIMENSIONS, local_workblock_size))) { local_workblock_size[X] = 1; local_workblock_size[Y] = 1; } */ printf("Estimating solution using OpenCL Jacobi iteration with %d x %d workblock.\n", (int)local_workblock_size[X], (int)local_workblock_size[Y]); fflush(stdout); /* init arrays by setting the initial value and the boundary conditions */ set_initial_solution(a[OLD], size, INITIAL_GUESS); set_initial_solution(a[NEW], size, INITIAL_GUESS); set_boundary_conditions(a[OLD], size, d); set_boundary_conditions(a[NEW], size, d); /* print the initial solution guess */ print_array("Init ", a[NEW], size, d); /* allocate memory for differences */ delta_size[X] = size[X] / local_workblock_size[X]; delta_size[Y] = size[Y] / local_workblock_size[Y]; delta_buffer_size = delta_size[X] * delta_size[Y]; delta = (value_type *)malloc(sizeof(value_type) * delta_buffer_size); /* initialize deltas so that first execution of kernel with overlapping * reduction on the host will work correctly and not prematurely exit */ for (i=0; i<delta_size[X]; ++i) { for (j=0; j<delta_size[Y]; ++j) { delta[i * delta_size[Y] + j] = 1.0; } } /* create buffers for OpenCL device using host memory */ array_size = (size[X]+2*GHOST_CELL_WIDTH) * ystride; a_buf[OLD] = icl_create_buffer(device_id, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(value_type) * array_size); a_buf[NEW] = icl_create_buffer(device_id, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(value_type) * array_size); delta_buf = icl_create_buffer(device_id, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(value_type) * delta_buffer_size); /* copy over buffers to device */ icl_write_buffer(a_buf[OLD], CL_TRUE, sizeof(value_type) * array_size, a[OLD], NULL, NULL); icl_write_buffer(a_buf[NEW], CL_TRUE, sizeof(value_type) * array_size, a[NEW], NULL, NULL); /* set the kernel execution type - data parallel */ // cluSetKernelNDRange(clu, kernel, DIMENSIONS, NULL, size, local_workblock_size); /* iterate until maximum difference is less than the given tolerance or number of iterations is too high */ do { /* swap array pointers for next iteration */ SWAP_PTR(a[OLD], a[NEW]); SWAP_BUF(a_buf[OLD], a_buf[NEW]); icl_run_kernel(kernel, DIMENSIONS, size, local_workblock_size, NULL, NULL, 6, (size_t)0,(void *) a_buf[OLD], (size_t)0, (void *) a_buf[NEW], sizeof(value_type) * tile_delta_size, NULL, sizeof(value_type) * tile_cache_size, NULL, (size_t)0, (void *) delta_buf, sizeof(cl_uint), (void *) &ystride); /* while the kernel is running, calculate the reduction for the previous iteration */ max_diff = ocl_jacobi_reduce(delta, delta_size); /* enqueue a synchronous copy of the delta. This will not occur until the kernel * has finished. The deltas for each workgroup is a much smaller array to process */ icl_read_buffer(delta_buf, CL_TRUE, sizeof(value_type) * delta_buffer_size, delta, NULL, NULL); // clEnqueueReadBuffer(queue, a_buf[NEW], CL_TRUE, 0, sizeof(value_type) * array_size, a[NEW], 0, NULL, NULL)); /* output status for user, overwrite the same line */ if ((0 == iter % 100)) { printf("Iteration=%5d, max difference=%0.7f, target=%0.7f\r", iter, max_diff, tolerance); fflush(stdout); } /* increment the iteration counter */ iter++; } while (max_diff > tolerance && max_iter >= iter); /* do loop */ /* read back the final result */ icl_read_buffer(a_buf[NEW], CL_TRUE, sizeof(value_type) * array_size, a[NEW], NULL, NULL); /* output final iteration count and maximum difference value */ printf("Iteration=%5d, max difference=%0.7f, execution time=%.3f seconds\n", iter-1, max_diff, timer); fflush(stdout); /* finish usage of OpenCL device */ icl_release_buffers(3, a_buf[OLD], a_buf[NEW], delta_buf); icl_release_kernel(kernel); free(delta); }