int main(int argc, char **argv) { // OP initialisation op_init(argc,argv,2); int *bnode, *cell; double *xm;//, *q; int nnode,ncell,nbnodes,niter; double rms = 1; // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0/gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187;Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf*minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; char file[] = "FE_grid.h5"; // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set_hdf5(file, "nodes"); op_set bnodes = op_decl_set_hdf5(file, "bedges"); op_set cells = op_decl_set_hdf5(file, "cells"); op_map pbnodes = op_decl_map_hdf5(bnodes,nodes,1,file, "pbedge"); op_map pcell = op_decl_map_hdf5(cells, nodes,4,file, "pcell"); op_dat p_xm = op_decl_dat_hdf5(nodes ,2,"double", file, "p_x"); op_dat p_phim = op_decl_dat_hdf5(nodes, 1, "double", file, "p_phim"); op_dat p_resm = op_decl_dat_hdf5(nodes, 1, "double", file, "p_resm"); op_dat p_K = op_decl_dat_hdf5(cells, 16, "double:soa",file, "p_K"); op_dat p_V = op_decl_dat_hdf5(nodes, 1, "double", file, "p_V"); op_dat p_P = op_decl_dat_hdf5(nodes, 1, "double", file, "p_P"); op_dat p_U = op_decl_dat_hdf5(nodes, 1, "double", file, "p_U"); op_decl_const2("gam",1,"double",&gam ); op_decl_const2("gm1",1,"double",&gm1 ); op_decl_const2("gm1i",1,"double",&gm1i ); op_decl_const2("m2",1,"double",&m2 ); op_decl_const2("wtg1",2,"double",wtg1 ); op_decl_const2("xi1",2,"double",xi1 ); op_decl_const2("Ng1",4,"double",Ng1 ); op_decl_const2("Ng1_xi",4,"double",Ng1_xi ); op_decl_const2("wtg2",4,"double",wtg2 ); op_decl_const2("Ng2",16,"double",Ng2 ); op_decl_const2("Ng2_xi",32,"double",Ng2_xi ); op_decl_const2("minf",1,"double",&minf ); op_decl_const2("freq",1,"double",&freq ); op_decl_const2("kappa",1,"double",&kappa ); op_decl_const2("nmode",1,"double",&nmode ); op_decl_const2("mfan",1,"double",&mfan ); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, p_xm); op_printf("nodes: %d cells: %d bnodes: %d\n", nodes->size, cells->size, bnodes->size); nnode = op_get_size(nodes); ncell = op_get_size(cells); nbnodes = op_get_size(bnodes); double cpu_t1, cpu_t2, wall_t1, wall_t2; op_timers(&cpu_t1, &wall_t1); // main time-marching loop niter = 20; for(int iter=1; iter<=niter; iter++) { op_par_loop_res_calc("res_calc",cells, op_arg_dat(p_xm,-4,pcell,2,"double",OP_READ), op_arg_dat(p_phim,-4,pcell,1,"double",OP_READ), op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_WRITE), op_arg_dat(p_resm,-4,pcell,1,"double",OP_INC)); op_par_loop_dirichlet("dirichlet",bnodes, op_arg_dat(p_resm,0,pbnodes,1,"double",OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; //c1 = R'*R; op_par_loop_init_cg("init_cg",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c1,1,"double",OP_INC), op_arg_dat(p_U,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_WRITE)); //set up stopping conditions double res0 = sqrt(c1); double res = res0; int iter = 0; int maxiter = 200; while (res > 0.1*res0 && iter < maxiter) { //V = Stiffness*P op_par_loop_spMV("spMV",cells, op_arg_dat(p_V,-4,pcell,1,"double",OP_INC), op_arg_dat(p_K,-1,OP_ID,16,"double:soa",OP_READ), op_arg_dat(p_P,-4,pcell,1,"double",OP_READ)); op_par_loop_dirichlet("dirichlet",bnodes, op_arg_dat(p_V,0,pbnodes,1,"double",OP_WRITE)); c2 = 0; //c2 = P'*V; op_par_loop_dotPV("dotPV",nodes, op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c2,1,"double",OP_INC)); alpha = c1/c2; //U = U + alpha*P; //resm = resm-alpha*V; op_par_loop_updateUR("updateUR",nodes, op_arg_dat(p_U,-1,OP_ID,1,"double",OP_INC), op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_INC), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_V,-1,OP_ID,1,"double",OP_RW), op_arg_gbl(&alpha,1,"double",OP_READ)); c3 = 0; //c3 = resm'*resm; op_par_loop_dotR("dotR",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&c3,1,"double",OP_INC)); beta = c3/c1; //P = beta*P+resm; op_par_loop_updateP("updateP",nodes, op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_READ), op_arg_dat(p_P,-1,OP_ID,1,"double",OP_RW), op_arg_gbl(&beta,1,"double",OP_READ)); c1 = c3; res = sqrt(c1); iter++; } rms = 0; //phim = phim - Stiffness\Load; op_par_loop_update("update",nodes, op_arg_dat(p_phim,-1,OP_ID,1,"double",OP_RW), op_arg_dat(p_resm,-1,OP_ID,1,"double",OP_WRITE), op_arg_dat(p_U,-1,OP_ID,1,"double",OP_READ), op_arg_gbl(&rms,1,"double",OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms)/sqrt(nnode), iter); } op_timing_output(); op_timers(&cpu_t2, &wall_t2); op_printf("Max total runtime = %f\n",wall_t2-wall_t1); op_exit(); }
int main(int argc, char **argv) { // OP initialisation op_init(argc, argv, 2); // MPI for user I/O int my_rank; int comm_size; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &comm_size); // timer double cpu_t1, cpu_t2, wall_t1, wall_t2; int *bnode, *cell, *g_bnode, *g_cell; double *xm, *g_xm; ; int nnode, ncell, nbnodes, niter, g_nnode, g_ncell, g_nbnodes; double rms = 1; // read in grid op_printf("reading in grid \n"); FILE *fp; if ((fp = fopen("FE_grid.dat", "r")) == NULL) { op_printf("can't open file FE_grid.dat\n"); exit(-1); } if (fscanf(fp, "%d %d %d \n", &g_nnode, &g_ncell, &g_nbnodes) != 3) { op_printf("error reading from new_grid.dat\n"); exit(-1); } if (my_rank == MPI_ROOT) { g_cell = (int *)malloc(4 * g_ncell * sizeof(int)); g_bnode = (int *)malloc(g_nbnodes * sizeof(int)); g_xm = (double *)malloc(2 * g_nnode * sizeof(double)); for (int n = 0; n < g_nnode; n++) { if (fscanf(fp, "%lf %lf \n", &g_xm[2 * n], &g_xm[2 * n + 1]) != 2) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n = 0; n < g_ncell; n++) { if (fscanf(fp, "%d %d %d %d \n", &g_cell[4 * n], &g_cell[4 * n + 1], &g_cell[4 * n + 2], &g_cell[4 * n + 3]) != 4) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } for (int n = 0; n < g_nbnodes; n++) { if (fscanf(fp, "%d \n", &g_bnode[n]) != 1) { op_printf("error reading from new_grid.dat\n"); exit(-1); } } } fclose(fp); nnode = compute_local_size(g_nnode, comm_size, my_rank); ncell = compute_local_size(g_ncell, comm_size, my_rank); nbnodes = compute_local_size(g_nbnodes, comm_size, my_rank); cell = (int *)malloc(4 * ncell * sizeof(int)); bnode = (int *)malloc(nbnodes * sizeof(int)); xm = (double *)malloc(2 * nnode * sizeof(double)); scatter_int_array(g_cell, cell, comm_size, g_ncell, ncell, 4); scatter_int_array(g_bnode, bnode, comm_size, g_nbnodes, nbnodes, 1); scatter_double_array(g_xm, xm, comm_size, g_nnode, nnode, 2); if (my_rank == MPI_ROOT) { free(g_cell); free(g_xm); free(g_bnode); } // set constants and initialise flow field and residual op_printf("initialising flow field \n"); double gam = 1.4; gm1 = gam - 1.0; gm1i = 1.0 / gm1; wtg1[0] = 0.5; wtg1[1] = 0.5; xi1[0] = 0.211324865405187; xi1[1] = 0.788675134594813; Ng1[0] = 0.788675134594813; Ng1[1] = 0.211324865405187; Ng1[2] = 0.211324865405187; Ng1[3] = 0.788675134594813; Ng1_xi[0] = -1; Ng1_xi[1] = -1; Ng1_xi[2] = 1; Ng1_xi[3] = 1; wtg2[0] = 0.25; wtg2[1] = 0.25; wtg2[2] = 0.25; wtg2[3] = 0.25; Ng2[0] = 0.622008467928146; Ng2[1] = 0.166666666666667; Ng2[2] = 0.166666666666667; Ng2[3] = 0.044658198738520; Ng2[4] = 0.166666666666667; Ng2[5] = 0.622008467928146; Ng2[6] = 0.044658198738520; Ng2[7] = 0.166666666666667; Ng2[8] = 0.166666666666667; Ng2[9] = 0.044658198738520; Ng2[10] = 0.622008467928146; Ng2[11] = 0.166666666666667; Ng2[12] = 0.044658198738520; Ng2[13] = 0.166666666666667; Ng2[14] = 0.166666666666667; Ng2[15] = 0.622008467928146; Ng2_xi[0] = -0.788675134594813; Ng2_xi[1] = 0.788675134594813; Ng2_xi[2] = -0.211324865405187; Ng2_xi[3] = 0.211324865405187; Ng2_xi[4] = -0.788675134594813; Ng2_xi[5] = 0.788675134594813; Ng2_xi[6] = -0.211324865405187; Ng2_xi[7] = 0.211324865405187; Ng2_xi[8] = -0.211324865405187; Ng2_xi[9] = 0.211324865405187; Ng2_xi[10] = -0.788675134594813; Ng2_xi[11] = 0.788675134594813; Ng2_xi[12] = -0.211324865405187; Ng2_xi[13] = 0.211324865405187; Ng2_xi[14] = -0.788675134594813; Ng2_xi[15] = 0.788675134594813; Ng2_xi[16] = -0.788675134594813; Ng2_xi[17] = -0.211324865405187; Ng2_xi[18] = 0.788675134594813; Ng2_xi[19] = 0.211324865405187; Ng2_xi[20] = -0.211324865405187; Ng2_xi[21] = -0.788675134594813; Ng2_xi[22] = 0.211324865405187; Ng2_xi[23] = 0.788675134594813; Ng2_xi[24] = -0.788675134594813; Ng2_xi[25] = -0.211324865405187; Ng2_xi[26] = 0.788675134594813; Ng2_xi[27] = 0.211324865405187; Ng2_xi[28] = -0.211324865405187; Ng2_xi[29] = -0.788675134594813; Ng2_xi[30] = 0.211324865405187; Ng2_xi[31] = 0.788675134594813; minf = 0.1; m2 = minf * minf; freq = 1; kappa = 1; nmode = 0; mfan = 1.0; double *phim = (double *)malloc(nnode * sizeof(double)); memset(phim, 0, nnode * sizeof(double)); for (int i = 0; i < nnode; i++) { phim[i] = minf * xm[2 * i]; } double *K = (double *)malloc(4 * 4 * ncell * sizeof(double)); memset(K, 0, 4 * 4 * ncell * sizeof(double)); double *resm = (double *)malloc(nnode * sizeof(double)); memset(resm, 0, nnode * sizeof(double)); double *V = (double *)malloc(nnode * sizeof(double)); memset(V, 0, nnode * sizeof(double)); double *P = (double *)malloc(nnode * sizeof(double)); memset(P, 0, nnode * sizeof(double)); double *U = (double *)malloc(nnode * sizeof(double)); memset(U, 0, nnode * sizeof(double)); // declare sets, pointers, datasets and global constants op_set nodes = op_decl_set(nnode, "nodes"); op_set bnodes = op_decl_set(nbnodes, "bedges"); op_set cells = op_decl_set(ncell, "cells"); op_map pbnodes = op_decl_map(bnodes, nodes, 1, bnode, "pbedge"); op_map pcell = op_decl_map(cells, nodes, 4, cell, "pcell"); op_dat p_xm = op_decl_dat(nodes, 2, "double", xm, "p_x"); op_dat p_phim = op_decl_dat(nodes, 1, "double", phim, "p_phim"); op_dat p_resm = op_decl_dat(nodes, 1, "double", resm, "p_resm"); op_dat p_K = op_decl_dat(cells, 16, "double:soa", K, "p_K"); op_dat p_V = op_decl_dat(nodes, 1, "double", V, "p_V"); op_dat p_P = op_decl_dat(nodes, 1, "double", P, "p_P"); op_dat p_U = op_decl_dat(nodes, 1, "double", U, "p_U"); op_decl_const2("gam", 1, "double", &gam); op_decl_const2("gm1", 1, "double", &gm1); op_decl_const2("gm1i", 1, "double", &gm1i); op_decl_const2("m2", 1, "double", &m2); op_decl_const2("wtg1", 2, "double", wtg1); op_decl_const2("xi1", 2, "double", xi1); op_decl_const2("Ng1", 4, "double", Ng1); op_decl_const2("Ng1_xi", 4, "double", Ng1_xi); op_decl_const2("wtg2", 4, "double", wtg2); op_decl_const2("Ng2", 16, "double", Ng2); op_decl_const2("Ng2_xi", 32, "double", Ng2_xi); op_decl_const2("minf", 1, "double", &minf); op_decl_const2("freq", 1, "double", &freq); op_decl_const2("kappa", 1, "double", &kappa); op_decl_const2("nmode", 1, "double", &nmode); op_decl_const2("mfan", 1, "double", &mfan); op_diagnostic_output(); op_partition("PTSCOTCH", "KWAY", cells, pcell, NULL); // main time-marching loop niter = 20; // initialise timers for total execution wall time op_timers(&cpu_t1, &wall_t1); for (int iter = 1; iter <= niter; iter++) { op_par_loop_res_calc("res_calc", cells, op_arg_dat(p_xm, -4, pcell, 2, "double", OP_READ), op_arg_dat(p_phim, -4, pcell, 1, "double", OP_READ), op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_WRITE), op_arg_dat(p_resm, -4, pcell, 1, "double", OP_INC)); op_par_loop_dirichlet("dirichlet", bnodes, op_arg_dat(p_resm, 0, pbnodes, 1, "double", OP_WRITE)); double c1 = 0; double c2 = 0; double c3 = 0; double alpha = 0; double beta = 0; // c1 = R'*R; op_par_loop_init_cg("init_cg", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c1, 1, "double", OP_INC), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_WRITE)); // set up stopping conditions double res0 = sqrt(c1); double res = res0; int inner_iter = 0; int maxiter = 200; while (res > 0.1 * res0 && inner_iter < maxiter) { // V = Stiffness*P op_par_loop_spMV("spMV", cells, op_arg_dat(p_V, -4, pcell, 1, "double", OP_INC), op_arg_dat(p_K, -1, OP_ID, 16, "double:soa", OP_READ), op_arg_dat(p_P, -4, pcell, 1, "double", OP_READ)); op_par_loop_dirichlet("dirichlet", bnodes, op_arg_dat(p_V, 0, pbnodes, 1, "double", OP_WRITE)); c2 = 0; // c2 = P'*V; op_par_loop_dotPV("dotPV", nodes, op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c2, 1, "double", OP_INC)); alpha = c1 / c2; // U = U + alpha*P; // resm = resm-alpha*V; op_par_loop_updateUR("updateUR", nodes, op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_INC), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_V, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&alpha, 1, "double", OP_READ)); c3 = 0; // c3 = resm'*resm; op_par_loop_dotR("dotR", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&c3, 1, "double", OP_INC)); beta = c3 / c1; // P = beta*P+resm; op_par_loop_updateP("updateP", nodes, op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_READ), op_arg_dat(p_P, -1, OP_ID, 1, "double", OP_RW), op_arg_gbl(&beta, 1, "double", OP_READ)); c1 = c3; res = sqrt(c1); inner_iter++; } rms = 0; // phim = phim - Stiffness\Load; op_par_loop_update("update", nodes, op_arg_dat(p_phim, -1, OP_ID, 1, "double", OP_RW), op_arg_dat(p_resm, -1, OP_ID, 1, "double", OP_WRITE), op_arg_dat(p_U, -1, OP_ID, 1, "double", OP_READ), op_arg_gbl(&rms, 1, "double", OP_INC)); op_printf("rms = %10.5e iter: %d\n", sqrt(rms) / sqrt(g_nnode), inner_iter); } op_timers(&cpu_t2, &wall_t2); op_timing_output(); op_printf("Max total runtime = %f\n", wall_t2 - wall_t1); op_exit(); /*free(cell); free(bnode); free(xm); free(phim); free(K); free(resm); free(V); free(P); free(U);*/ }