Esempio n. 1
0
int run_dgcuda(int argc, char *argv[]) {
    int local_num_elem, local_num_sides;
    int n_threads, n_blocks_elem, n_blocks_sides;
    int i, n, local_n_p, total_timesteps, local_n_quad, local_n_quad1d;
    int verbose, convergence, video, eval_error, benchmark;

    double endtime, t;
    double tol, total_error, max_error;
    double *min_radius;
    double min_r;
    double *V1x, *V1y, *V2x, *V2y, *V3x, *V3y;
    double *sides_x1, *sides_x2;
    double *sides_y1, *sides_y2;

    double *r1_local, *r2_local, *w_local;

    double *s_r, *oned_w_local;

    int *left_elem, *right_elem;
    int *elem_s1, *elem_s2, *elem_s3;
    int *left_side_number, *right_side_number;

    FILE *mesh_file, *out_file;

    char out_filename[100];
    char *mesh_filename;

    double *Uv1, *Uv2, *Uv3;
    double *error;

    clock_t start, end;
    double elapsed;

    // get input 
    endtime = -1;
    if (get_input(argc, argv, &n, &total_timesteps, &endtime, 
                              &verbose, &video, &convergence, &tol, 
                              &benchmark, &eval_error, 
                              &mesh_filename)) {
        return 1;
    }

    // set the order of the approximation & timestep
    local_n_p = (n + 1) * (n + 2) / 2;

    // sanity check on limiter
    if (limiter && n != 1) {
        printf("Error: limiter only enabled for p = 1\n");
        exit(0);
    }

    // open the mesh to get local_num_elem for allocations
    mesh_file = fopen(mesh_filename, "r");
    if (!mesh_file) {
        printf("\nERROR: mesh file not found.\n");
        return 1;
    }

    // read in the mesh and make all the mappings
    read_mesh(mesh_file, &local_num_sides, &local_num_elem,
                         &V1x, &V1y, &V2x, &V2y, &V3x, &V3y,
                         &left_side_number, &right_side_number,
                         &sides_x1, &sides_y1, 
                         &sides_x2, &sides_y2, 
                         &elem_s1, &elem_s2, &elem_s3,
                         &left_elem, &right_elem);

    // close the file
    fclose(mesh_file);

    // initialize the gpu
    init_cpu(local_num_elem, local_num_sides, local_n_p,
             V1x, V1y, V2x, V2y, V3x, V3y,
             left_side_number, right_side_number,
             sides_x1, sides_y1,
             sides_x2, sides_y2, 
             elem_s1, elem_s2, elem_s3,
             left_elem, right_elem,
             convergence, eval_error);

    // get the correct quadrature rules for this scheme
    set_quadrature(n, &r1_local, &r2_local, &w_local, 
                   &s_r, &oned_w_local, &local_n_quad, &local_n_quad1d);

    // set constant data
    set_N(local_N);
    set_n_p(local_n_p);
    set_num_elem(local_num_elem);
    set_num_sides(local_num_sides);
    set_n_quad(local_n_quad);
    set_n_quad1d(local_n_quad1d);

    // find the min inscribed circle
    preval_inscribed_circles(d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y);
    min_radius = (double *) malloc(local_num_elem * sizeof(double));
    memcpy(min_radius, d_J, local_num_elem * sizeof(double));
    min_r = min_radius[0];
    for (i = 1; i < local_num_elem; i++) {
        min_r = (min_radius[i] < min_r) ? min_radius[i] : min_r;
        // report problem
        if (min_radius[i] == 0) {
            printf("%i\n", i);
            printf("%.015lf, %.015lf, %.015lf, %.015lf, %.015lf, %.015lf\n", 
                                                     V1x[i], V1y[i],
                                                     V2x[i], V2y[i],
                                                     V3x[i], V3y[i]);
        }
    }
    free(min_radius);

    // pre computations
    preval_jacobian(d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y); 

    preval_side_length(d_s_length, d_s_V1x, d_s_V1y, d_s_V2x, d_s_V2y);
                                                      
    preval_normals(d_Nx, d_Ny, 
                   d_s_V1x, d_s_V1y, d_s_V2x, d_s_V2y,
                   d_V1x, d_V1y, 
                   d_V2x, d_V2y, 
                   d_V3x, d_V3y, 
                   d_left_side_number);


    preval_normals_direction(d_Nx, d_Ny, 
                             d_V1x, d_V1y, 
                             d_V2x, d_V2y, 
                             d_V3x, d_V3y, 
                             d_left_elem, d_left_side_number);

    preval_partials(d_V1x, d_V1y,
                    d_V2x, d_V2y,
                    d_V3x, d_V3y,
                    d_xr,  d_yr,
                    d_xs,  d_ys);

   // evaluate the basis functions at those points and store on GPU
    preval_basis(r1_local, r2_local, s_r, w_local, oned_w_local, local_n_quad, local_n_quad1d, local_n_p);

    // no longer need any of these CPU variables
    free(elem_s1);
    free(elem_s2);
    free(elem_s3);
    free(sides_x1);
    free(sides_x2);
    free(sides_y1);
    free(sides_y2);
    free(left_elem);
    free(right_elem);
    free(left_side_number);
    free(right_side_number);
    free(r1_local);
    free(r2_local);
    free(w_local);
    free(s_r);
    free(oned_w_local);

    // initial conditions
    init_conditions(d_c, d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y);

    printf(" ? %i degree polynomial interpolation (local_n_p = %i)\n", n, local_n_p);
    printf(" ? %i precomputed basis points\n", local_n_quad * local_n_p);
    printf(" ? %i elements\n", local_num_elem);
    printf(" ? %i sides\n", local_num_sides);
    printf(" ? min radius = %.015lf\n", min_r);

    if (endtime == -1 && convergence != 1) {
        printf(" ? total_timesteps = %i\n", total_timesteps);
    } else if (endtime != -1 && convergence != 1) {
        printf(" ? endtime = %lf\n", endtime);
    }


    if (benchmark) {
        start = clock();
    }

    t = time_integrate_rk4(local_num_elem, local_num_sides, 
                           n, local_n_p,
                           endtime, total_timesteps, min_r,
                           verbose, convergence, video, tol);

    if (benchmark) {
        end = clock();
        elapsed = ((double)(end - start)) / CLOCKS_PER_SEC;
        printf("Runtime: %lf seconds\n", elapsed);
    }

    // evaluate and write U to file
    write_U(local_num_elem, total_timesteps, total_timesteps);

    // free everything else
    free(d_s_V1x);
    free(d_s_V2x);
    free(d_s_V1y);
    free(d_s_V2y);

    free(d_s_length);
    free(d_lambda);
    free(d_k1);
    free(d_k2);
    free(d_k3);
    free(d_k4);
    free(d_rhs_volume);
    free(d_rhs_surface_left);
    free(d_rhs_surface_right);
    free(d_elem_s1);
    free(d_elem_s2);
    free(d_elem_s3);
    free(d_xr);
    free(d_yr);
    free(d_xs);
    free(d_ys);

    free(d_left_side_number);
    free(d_right_side_number);

    free(d_Nx);
    free(d_Ny);

    free(d_right_elem);
    free(d_left_elem);
    free(d_c);
    free(d_J);

    free(d_Uv1);
    free(d_Uv2);
    free(d_Uv3);
    free(d_V1x);
    free(d_V1y);
    free(d_V2x);
    free(d_V2y);
    free(d_V3x);
    free(d_V3y);

    // free CPU variables
    free(V1x);
    free(V1y);
    free(V2x);
    free(V2y);
    free(V3x);
    free(V3y);

    return 0;
}
Esempio n. 2
0
/*********************************************************************
** Function:         Die()
** Description:      Default constructor
** Parameters:       int sides; defaults to 6
** Pre-Conditions:   Called during instantiation
** Post-Conditions:  New Die object created fully initialized
********************************************************************/
Die::Die(int sides)
{
	set_num_sides(sides);
}