コード例 #1
0
ファイル: ocl_jacsolver.c プロジェクト: 8l/insieme
/**************************************************************************
 Function: reference_jacobi

 This routine contains the main iteration loop for the Jacobi iteration
 reference implementation (no OpenCL).

 params:
    a           two arrays to compute solution into
    max_iter    maximum number of iterations   
    size        size of array for this MPI rank
    tolerance   all differences should be les than this tolerance value
    mpi_ranks   number of MPI ranks in each dimension
    rank_pos    cartesian position of this rank    
    origin      origin for this rank
    d           discretion size
    mpi_comm    MPI communications structure
 **************************************************************************/
static void reference_jacobi(value_type *a[2],
                            unsigned int max_iter,
                            size_t size[DIMENSIONS],
                            value_type tolerance,
                            value_type d[DIMENSIONS]) {

    unsigned int rc, iter = 0;
    value_type max_diff, timer;

    /* init arrays by setting the initial value and the boundary conditions */
    set_initial_solution(a[OLD], size, INITIAL_GUESS);
    set_initial_solution(a[NEW], size, INITIAL_GUESS);
    set_boundary_conditions(a[OLD], size, d);
    set_boundary_conditions(a[NEW], size, d);

    /* print the initial solution guess */
    print_array("Init ", a[NEW], size, d);

    /*  iterate until maximum difference is less than the given tolerance
        or number of iterations is too high
     */
    do {
        /* swap array pointers for next iteration */
        SWAP_PTR(a[OLD], a[NEW]);

        /* iterate using a[OLD] as the input and a[NEW] as the output */
        max_diff = reference_jacobi_kernel(a[OLD], a[NEW], size);

        /* output status for user, overwrite the same line */
        if (0 == iter % 100) {
            printf("Iteration=%5d, max difference=%0.7f, target=%0.7f\r",
                iter, max_diff, tolerance);
            fflush(stdout);
        }

        /* increment counter */
        iter++;
    } while (max_diff > tolerance && max_iter > iter); /* do loop */

    /* output final iteration count and maximum difference value */
    printf("Iteration=%5d, max difference=%0.7f, execution time=%.3f seconds\n",
                    iter, max_diff, timer);

}
void run_convergence_order_study (int argc, char** argv, const int conv_study_type)
{
	const char* ctrl_name = argv[1];

	struct Test_Info test_info = { .n_warn = 0, };

	struct Integration_Test_Info* int_test_info = constructor_Integration_Test_Info(ctrl_name);
	if (argc == 4)
		int_test_info->conv_study_extension = argv[3];

	const int* p_ref  = int_test_info->p_ref,
	         * ml_ref = int_test_info->ml;

	struct Simulation* sim = NULL;
	const char type_rc = 'r';

	int ml_prev = ml_ref[0]-1,
	    p_prev  = p_ref[0]-1;

	bool ignore_static = false;
	int ml_max = ml_ref[1];
	switch (conv_study_type) {
	case CONV_STUDY_SOLVE:
		break; // Do nothing
	case CONV_STUDY_SOLVE_NO_CHECK: // fallthrough
	case CONV_STUDY_RESTART:
		ignore_static = true;
		break;
	default:
		EXIT_ERROR("Unsupported: %d\n",conv_study_type);
		break;
	}

	for (int ml = ml_ref[0]; ml <= ml_max; ++ml) {
	for (int p = p_ref[0]; p <= p_ref[1]; ++p) {
		const int adapt_type = int_test_info->adapt_type;
		const char*const ctrl_name_curr = set_file_name_curr(adapt_type,p,ml,false,ctrl_name);
		structor_simulation(&sim,'c',adapt_type,p,ml,p_prev,ml_prev,ctrl_name_curr,type_rc,ignore_static); // d.
		ignore_static = false;

		switch (conv_study_type) {
		case CONV_STUDY_SOLVE:          // fallthrough
		case CONV_STUDY_SOLVE_NO_CHECK:
			switch (get_set_method(NULL)) {
			case METHOD_DG: case METHOD_DPG: case METHOD_OPG: case METHOD_OPGC0:
				solve_for_solution(sim);
				break;
			case METHOD_L2_PROJ:
				set_initial_solution(sim);
				break; // do nothing.
			default:
				EXIT_ERROR("Unsupported: %d\n",get_set_method(NULL));
				break;
			}
			break;
		case CONV_STUDY_RESTART: {
			assert(using_restart() == true);
			set_initial_solution(sim);
			set_to_zero_residual(sim);
			struct Test_Case*const test_case = (struct Test_Case*) sim->test_case_rc->tc;
			test_case->constructor_Error_CE = test_case->constructor_Error_CE_restart_test;
			break;
		} default:
			EXIT_ERROR("Unsupported: %d\n",conv_study_type);
			break;
		}

		if (p == ORDER_VIS_CONV_P && ml <= ORDER_VIS_CONV_ML_MAX) {
			output_visualization(sim,VIS_GEOM_EDGES);
			output_visualization(sim,VIS_SOLUTION);
			output_visualization(sim,VIS_GEOM_VOLUMES);
			output_visualization(sim,VIS_NORMALS);
		}

		output_error(sim);
		output_error_functionals(sim);

		if (DISPLAY_CONV)
			printf("\ntest_integration_convergence (ml, p, dof): %d %d %td\n\n\n",ml,p,compute_dof(sim));

		if ((ml == ml_max) && (p == p_ref[1])) {
			output_restart(sim);

			set_convergence_order_discount(int_test_info);
			bool pass = true;
			switch (conv_study_type) {
			case CONV_STUDY_SOLVE:   // fallthrough
			case CONV_STUDY_RESTART:
				check_convergence_orders(ERROR_STANDARD,&pass,&test_info,int_test_info,sim);
				check_convergence_orders(ERROR_FUNCTIONAL,&pass,&test_info,int_test_info,sim);
				break;
			case CONV_STUDY_SOLVE_NO_CHECK:
				break; // do nothing.
			default:
				EXIT_ERROR("Unsupported: %d\n",conv_study_type);
				break;
			}
			assert_condition(pass);

			structor_simulation(&sim,'d',ADAPT_0,p,ml,p_prev,ml_prev,NULL,type_rc,ignore_static);
		}

		p_prev  = p;
		ml_prev = ml;
	}}
	destructor_Integration_Test_Info(int_test_info);
}

// Static functions ************************************************************************************************* //
// Level 0 ********************************************************************************************************** //

/// \brief Container for convergence order related data for each mesh level and polynomial order.
struct Conv_Order_Data {
	const struct const_Multiarray_d* h,           ///< The multiarray of mesh spacing.
	                               * l2_err,      ///< The multiarray of L2 errors.
	                               * conv_orders; ///< The multiarray of convergence orders.
	const struct const_Multiarray_i* cases_run,   ///< The multiarray of flags indicating which cases were run.
	                               * ex_ord;      ///< The multiarray of expected orders.

	const char*const* var_names; ///< Names of the variables for which the error and convergence orders are provided.
};

/** \brief Copy the files from the $BUILD/output/error/... subdirectory to the $BUILD/output/results/... subdirectory in
 *  the case where a convergence study is being performed. */
static void copy_error_files_for_conv_study
	(const struct Integration_Test_Info*const int_test_info, ///< \ref Integration_Test_Info.
	 const int error_type,                                   ///< Defined for \ref compute_error_file_name.
	 const struct Simulation*const sim                       ///< \ref Simulation.
	);

/** \brief Constructor for a \ref Conv_Order_Data container.
 *  \return See brief. */
static struct Conv_Order_Data* constructor_Conv_Order_Data
	(const struct Integration_Test_Info*const int_test_info, ///< \ref Integration_Test_Info.
	 const int error_type,                                   ///< Current error type.
	 const struct Simulation*const sim                       ///< \ref Simulation.
	);

/// \brief Destructor for a \ref Conv_Order_Data container.
static void destructor_Conv_Order_Data
	(const struct Conv_Order_Data*const cod ///< Standard.
	);

/** \brief Compute the number of errors to be read for the convergence order test.
 *  \return See brief. */
static int compute_n_err
	(const char* input_name ///< \ref fopen_sp_input_file :: name_part.
	);

/** \brief Return whether the convergence orders are in the expected range.
 *  \return See brief. */
static bool attained_expected_conv_orders
	(const double discount,                             ///< Allowable discount from the expected conv. orders.
	 const struct const_Multiarray_d*const conv_orders, ///< The container for the conv. order data.
	 const struct const_Multiarray_i*const exp_orders,  ///< The container of expected conv. order data.
	 const struct Integration_Test_Info* int_test_info  ///< \ref Integration_Test_Info.
	);

/// \brief Output the combined results from all runs currently stored in the \ref Conv_Order_Data container.
static void output_combined_results
	(const struct Conv_Order_Data*const cod,                 ///< \ref Conv_Order_Data.
	 const struct Integration_Test_Info*const int_test_info, ///< \ref Integration_Test_Info.
	 const int error_type,                                   ///< Defined for \ref compute_error_file_name.
	 const struct Simulation*const sim                       ///< \ref Simulation.
	);

static void set_convergence_order_discount (struct Integration_Test_Info*const int_test_info)
{
	char line[STRLEN_MAX];
	bool found_discount = false;

	FILE* input_file = fopen_input('t',NULL,NULL); // closed
	while (fgets(line,sizeof(line),input_file)) {
		if (strstr(line,"conv_order_discount")) {
			found_discount = true;
			read_skip_const_d(line,&int_test_info->conv_order_discount,1,false);
		}
	}
	fclose(input_file);

	if (!found_discount)
		const_cast_d(&int_test_info->conv_order_discount,0.0);
}
コード例 #3
0
ファイル: ocl_jacsolver.c プロジェクト: 8l/insieme
/**************************************************************************
 Function: ocl_jacobi

  This routine contains the main iteration loop for the Jacobi iteration
  using OpenCL kernel.

 params:
    a                       two arrays to compute solution into
    max_iter                maximum number of iterations
    size                    size of array for this MPI rank
    tolerance               all differences should be les than this tolerance value
    mpi_ranks               number of MPI ranks in each dimension
    rank_pos                cartesian position of this rank
    origin                  origin for this rank
    d                       discretion size
    mpi_comm                MPI communications structure
    local_workblock_size    size of local workblock for OpenCL kernel
    device_type             OpenCL device type
    full_copy               boolean if full buffer copy is to be done
 **************************************************************************/
static void ocl_jacobi(value_type *a[2],
                        unsigned int max_iter,
                        size_t size[DIMENSIONS],
                        value_type tolerance,
                        value_type d[DIMENSIONS],
                        size_t local_workblock_size[DIMENSIONS],
                        cl_device_type device_type,
                        unsigned int full_copy) {

    size_t array_size;
    unsigned int i, j, rc, iter = 0;
    size_t delta_buffer_size, delta_size[DIMENSIONS];
    size_t tile_delta_size, tile_cache_size;
    value_type max_diff, timer;
    icl_device* device_id;
    icl_kernel* kernel;
    cl_int err;
    icl_buffer *a_buf[2], *delta_buf;
    value_type *delta;
 
    /* convenience for y stride in array */
    cl_uint ystride = size[Y]+2*GHOST_CELL_WIDTH;
    
    /* init devices */
    icl_init_devices(device_type);
    
    /* find OpenCL device */
    device_id  = icl_get_device(0);


    /* build the kernel and verify the kernel */
    kernel = icl_create_kernel(device_id, "jacsolver_kernel.cl", "ocl_jacobi_local_copy", "", ICL_SOURCE);

    /* calculate size of kernel local memory  - also used later for kernel params */
    tile_delta_size = local_workblock_size[X] * local_workblock_size[Y];
    tile_cache_size = (local_workblock_size[X]+2*GHOST_CELL_WIDTH) * (local_workblock_size[Y]+2*GHOST_CELL_WIDTH);

    /* verify the device has enough resources for this device */
/*  I'm an optimist, we just hope for the best
  	if ((cluGetAvailableLocalMem(device_id, kernel) < tile_delta_size + tile_cache_size) ||
        (! cluCheckLocalWorkgroupSize(device_id, kernel, DIMENSIONS, local_workblock_size))) {
        local_workblock_size[X] = 1;
        local_workblock_size[Y] = 1;
    }
*/
    printf("Estimating solution using OpenCL Jacobi iteration with %d x %d workblock.\n", (int)local_workblock_size[X], (int)local_workblock_size[Y]);
    fflush(stdout);

    /* init arrays by setting the initial value and the boundary conditions */
    set_initial_solution(a[OLD], size, INITIAL_GUESS);
    set_initial_solution(a[NEW], size, INITIAL_GUESS);
    set_boundary_conditions(a[OLD], size, d);
    set_boundary_conditions(a[NEW], size, d);

    /* print the initial solution guess */ 
    print_array("Init ", a[NEW], size, d);

    /* allocate memory for differences */
    delta_size[X] = size[X] / local_workblock_size[X];
    delta_size[Y] = size[Y] / local_workblock_size[Y];
    delta_buffer_size = delta_size[X] * delta_size[Y];
    delta = (value_type *)malloc(sizeof(value_type) * delta_buffer_size);
    
    /* initialize deltas so that first execution of kernel with overlapping 
     * reduction on the host will work correctly and not prematurely exit
     */
    for (i=0; i<delta_size[X]; ++i) {
        for (j=0; j<delta_size[Y]; ++j) {
            delta[i * delta_size[Y] + j] = 1.0;
        }
    }

    /* create buffers for OpenCL device using host memory */
    array_size = (size[X]+2*GHOST_CELL_WIDTH) * ystride;
    a_buf[OLD] = icl_create_buffer(device_id, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(value_type) * array_size);
    a_buf[NEW] = icl_create_buffer(device_id, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(value_type) * array_size);
    delta_buf = icl_create_buffer(device_id, CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, sizeof(value_type) * delta_buffer_size);

    /* copy over buffers to device */
    icl_write_buffer(a_buf[OLD], CL_TRUE, sizeof(value_type) * array_size, a[OLD], NULL, NULL);
    icl_write_buffer(a_buf[NEW], CL_TRUE, sizeof(value_type) * array_size, a[NEW], NULL, NULL);

    /* set the kernel execution type  - data parallel */
 //   cluSetKernelNDRange(clu, kernel, DIMENSIONS, NULL, size, local_workblock_size);

    /*  iterate until maximum difference is less than the given tolerance
        or number of iterations is too high */
    do {
        /* swap array pointers for next iteration */
        SWAP_PTR(a[OLD], a[NEW]);
        SWAP_BUF(a_buf[OLD], a_buf[NEW]);
        icl_run_kernel(kernel, DIMENSIONS, size, local_workblock_size, NULL, NULL, 6,
                    (size_t)0,(void *) a_buf[OLD],
                    (size_t)0, (void *) a_buf[NEW],
                    sizeof(value_type) * tile_delta_size, NULL,
                    sizeof(value_type) * tile_cache_size, NULL,
                    (size_t)0, (void *) delta_buf,
                    sizeof(cl_uint), (void *) &ystride);

        /* while the kernel is running, calculate the reduction for the previous iteration */
        max_diff = ocl_jacobi_reduce(delta, delta_size);
        
        /* enqueue a synchronous copy of the delta. This will not occur until the kernel 
         * has finished. The deltas for each workgroup is a much smaller array to process
         */
        icl_read_buffer(delta_buf, CL_TRUE, sizeof(value_type) * delta_buffer_size, delta, NULL, NULL);
//        clEnqueueReadBuffer(queue, a_buf[NEW], CL_TRUE,    0, sizeof(value_type) * array_size, a[NEW], 0, NULL, NULL));

        /* output status for user, overwrite the same line */
        if ((0 == iter % 100)) {
            printf("Iteration=%5d, max difference=%0.7f, target=%0.7f\r",
                        iter, max_diff, tolerance);
            fflush(stdout);
        }

        
        /* increment the iteration counter */
        iter++;
    } while (max_diff > tolerance && max_iter >= iter); /* do loop */

    /* read back the final result */
    icl_read_buffer(a_buf[NEW], CL_TRUE, sizeof(value_type) * array_size, a[NEW], NULL, NULL);

    /* output final iteration count and maximum difference value */
    printf("Iteration=%5d, max difference=%0.7f, execution time=%.3f seconds\n", iter-1, max_diff, timer);
    fflush(stdout);

    /* finish usage of OpenCL device */
    icl_release_buffers(3, a_buf[OLD], a_buf[NEW], delta_buf);
    icl_release_kernel(kernel);
    free(delta);
}