int run_dgcuda(int argc, char *argv[]) { int local_num_elem, local_num_sides; int n_threads, n_blocks_elem, n_blocks_sides; int i, n, local_n_p, total_timesteps, local_n_quad, local_n_quad1d; int verbose, convergence, video, eval_error, benchmark; double endtime, t; double tol, total_error, max_error; double *min_radius; double min_r; double *V1x, *V1y, *V2x, *V2y, *V3x, *V3y; double *sides_x1, *sides_x2; double *sides_y1, *sides_y2; double *r1_local, *r2_local, *w_local; double *s_r, *oned_w_local; int *left_elem, *right_elem; int *elem_s1, *elem_s2, *elem_s3; int *left_side_number, *right_side_number; FILE *mesh_file, *out_file; char out_filename[100]; char *mesh_filename; double *Uv1, *Uv2, *Uv3; double *error; clock_t start, end; double elapsed; // get input endtime = -1; if (get_input(argc, argv, &n, &total_timesteps, &endtime, &verbose, &video, &convergence, &tol, &benchmark, &eval_error, &mesh_filename)) { return 1; } // set the order of the approximation & timestep local_n_p = (n + 1) * (n + 2) / 2; // sanity check on limiter if (limiter && n != 1) { printf("Error: limiter only enabled for p = 1\n"); exit(0); } // open the mesh to get local_num_elem for allocations mesh_file = fopen(mesh_filename, "r"); if (!mesh_file) { printf("\nERROR: mesh file not found.\n"); return 1; } // read in the mesh and make all the mappings read_mesh(mesh_file, &local_num_sides, &local_num_elem, &V1x, &V1y, &V2x, &V2y, &V3x, &V3y, &left_side_number, &right_side_number, &sides_x1, &sides_y1, &sides_x2, &sides_y2, &elem_s1, &elem_s2, &elem_s3, &left_elem, &right_elem); // close the file fclose(mesh_file); // initialize the gpu init_cpu(local_num_elem, local_num_sides, local_n_p, V1x, V1y, V2x, V2y, V3x, V3y, left_side_number, right_side_number, sides_x1, sides_y1, sides_x2, sides_y2, elem_s1, elem_s2, elem_s3, left_elem, right_elem, convergence, eval_error); // get the correct quadrature rules for this scheme set_quadrature(n, &r1_local, &r2_local, &w_local, &s_r, &oned_w_local, &local_n_quad, &local_n_quad1d); // set constant data set_N(local_N); set_n_p(local_n_p); set_num_elem(local_num_elem); set_num_sides(local_num_sides); set_n_quad(local_n_quad); set_n_quad1d(local_n_quad1d); // find the min inscribed circle preval_inscribed_circles(d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y); min_radius = (double *) malloc(local_num_elem * sizeof(double)); memcpy(min_radius, d_J, local_num_elem * sizeof(double)); min_r = min_radius[0]; for (i = 1; i < local_num_elem; i++) { min_r = (min_radius[i] < min_r) ? min_radius[i] : min_r; // report problem if (min_radius[i] == 0) { printf("%i\n", i); printf("%.015lf, %.015lf, %.015lf, %.015lf, %.015lf, %.015lf\n", V1x[i], V1y[i], V2x[i], V2y[i], V3x[i], V3y[i]); } } free(min_radius); // pre computations preval_jacobian(d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y); preval_side_length(d_s_length, d_s_V1x, d_s_V1y, d_s_V2x, d_s_V2y); preval_normals(d_Nx, d_Ny, d_s_V1x, d_s_V1y, d_s_V2x, d_s_V2y, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, d_left_side_number); preval_normals_direction(d_Nx, d_Ny, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, d_left_elem, d_left_side_number); preval_partials(d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y, d_xr, d_yr, d_xs, d_ys); // evaluate the basis functions at those points and store on GPU preval_basis(r1_local, r2_local, s_r, w_local, oned_w_local, local_n_quad, local_n_quad1d, local_n_p); // no longer need any of these CPU variables free(elem_s1); free(elem_s2); free(elem_s3); free(sides_x1); free(sides_x2); free(sides_y1); free(sides_y2); free(left_elem); free(right_elem); free(left_side_number); free(right_side_number); free(r1_local); free(r2_local); free(w_local); free(s_r); free(oned_w_local); // initial conditions init_conditions(d_c, d_J, d_V1x, d_V1y, d_V2x, d_V2y, d_V3x, d_V3y); printf(" ? %i degree polynomial interpolation (local_n_p = %i)\n", n, local_n_p); printf(" ? %i precomputed basis points\n", local_n_quad * local_n_p); printf(" ? %i elements\n", local_num_elem); printf(" ? %i sides\n", local_num_sides); printf(" ? min radius = %.015lf\n", min_r); if (endtime == -1 && convergence != 1) { printf(" ? total_timesteps = %i\n", total_timesteps); } else if (endtime != -1 && convergence != 1) { printf(" ? endtime = %lf\n", endtime); } if (benchmark) { start = clock(); } t = time_integrate_rk4(local_num_elem, local_num_sides, n, local_n_p, endtime, total_timesteps, min_r, verbose, convergence, video, tol); if (benchmark) { end = clock(); elapsed = ((double)(end - start)) / CLOCKS_PER_SEC; printf("Runtime: %lf seconds\n", elapsed); } // evaluate and write U to file write_U(local_num_elem, total_timesteps, total_timesteps); // free everything else free(d_s_V1x); free(d_s_V2x); free(d_s_V1y); free(d_s_V2y); free(d_s_length); free(d_lambda); free(d_k1); free(d_k2); free(d_k3); free(d_k4); free(d_rhs_volume); free(d_rhs_surface_left); free(d_rhs_surface_right); free(d_elem_s1); free(d_elem_s2); free(d_elem_s3); free(d_xr); free(d_yr); free(d_xs); free(d_ys); free(d_left_side_number); free(d_right_side_number); free(d_Nx); free(d_Ny); free(d_right_elem); free(d_left_elem); free(d_c); free(d_J); free(d_Uv1); free(d_Uv2); free(d_Uv3); free(d_V1x); free(d_V1y); free(d_V2x); free(d_V2y); free(d_V3x); free(d_V3y); // free CPU variables free(V1x); free(V1y); free(V2x); free(V2y); free(V3x); free(V3y); return 0; }
/********************************************************************* ** Function: Die() ** Description: Default constructor ** Parameters: int sides; defaults to 6 ** Pre-Conditions: Called during instantiation ** Post-Conditions: New Die object created fully initialized ********************************************************************/ Die::Die(int sides) { set_num_sides(sides); }