int comp_resid(int nn, int nt, int nb, int **tri_conn, double * x, double *y, int *nbs, int ***bs, double **Q, double **R, double Q1, double Q2, double Q3, double Q4) { // // Function updates R[node][4] // Q1,Q2,Q3,Q4 == Q_infinity conditions // int i,k,t,b; int n0, n1, n2; double x0, x1, x2; double y0, y1, y2; double xc, yc, xm, ym; double fp1,fp2,fp3,fp4; double fm1,fm2,fm3,fm4; double nx = 0.0; double ny = 0.0; // loop over all triangles for (t = 0; t < nt; t++) { // loop over 3 nodes of triangle for (k = 0; k < 3; k++) { n0 = tri_conn[t][k]; n1 = tri_conn[t][(k+1)%3]; n2 = tri_conn[t][(k+2)%3]; x0 = x[n0]; x1 = x[n1]; x2 = x[n2]; y0 = y[n0]; y1 = y[n1]; y2 = y[n2]; xc = (x0 + x1 + x2) / 3.0; yc = (y0 + y1 + y2) / 3.0; xm = 0.5*(x0 + x1); ym = 0.5*(y0 + y1); nx = yc - ym; ny = xm - xc; // get F+ and F- fluxp(Q[n0][0], Q[n0][1], Q[n0][2], Q[n0][3], nx, ny, &fp1, &fp2, &fp3, &fp4); fluxm(Q[n1][0], Q[n1][1], Q[n1][2], Q[n1][3], nx, ny, &fm1, &fm2, &fm3, &fm4); // Resid[Q_left] R[n0][0] += (fp1 + fm1); R[n0][1] += (fp2 + fm2); R[n0][2] += (fp3 + fm3); R[n0][3] += (fp4 + fm4); // Resid[Q_right] R[n1][0] -= (fp1 + fm1); R[n1][1] -= (fp2 + fm2); R[n1][2] -= (fp3 + fm3); R[n1][3] -= (fp4 + fm4); } } //printf("\nafter going over all triangles\n"); //printf("R[node %3d][% 7.2f][% 7.2f][% 7.2f][% 7.2f]\n", 4, R[4][0], R[4][1], R[4][2], R[4][3]); // Loop over all boundaries for (b=0; b < nb; b++) { // Loop over boundary segments for (i=0; i < nbs[b]; i++) { //get node 0 1 n0 = bs[b][i][0]; n1 = bs[b][i][1]; x0 = x[n0]; x1 = x[n1]; y0 = y[n0]; y1 = y[n1]; // get midpt xm = 0.5*(x0 + x1); ym = 0.5*(y0 + y1); // get normal from midpt to node 0 nx = ym - y0; ny = x0 - xm; // fminus(Q_inf) for node 0 & 1 fluxm(Q1,Q2,Q3,Q4,nx, ny, &fm1, &fm2, &fm3, &fm4); // fplus(QL) for node 0 fluxp(0.75*Q[n0][0]+0.25*Q[n1][0], 0.75*Q[n0][1]+0.25*Q[n1][1], 0.75*Q[n0][2]+0.25*Q[n1][2], 0.75*Q[n0][3]+0.25*Q[n1][3], nx, ny, &fp1, &fp2, &fp3, &fp4); // Update Resid for node 0 R[n0][0] += (fp1 + fm1); R[n0][1] += (fp2 + fm2); R[n0][2] += (fp3 + fm3); R[n0][3] += (fp4 + fm4); // fplus(QL) for node 1 fluxp(0.75*Q[n1][0]+0.25*Q[n0][0], 0.75*Q[n1][1]+0.25*Q[n0][1], 0.75*Q[n1][2]+0.25*Q[n0][2], 0.75*Q[n1][3]+0.25*Q[n0][3], nx, ny, &fp1, &fp2, &fp3, &fp4); // Update Resid for node 1 R[n1][0] += (fp1 + fm1); R[n1][1] += (fp2 + fm2); R[n1][2] += (fp3 + fm3); R[n1][3] += (fp4 + fm4); } } //printf("\nafter looping over boundaries\n"); //printf("R[node %3d][% 7.2f][% 7.2f][% 7.2f][% 7.2f]\n", 4, R[4][0], R[4][1], R[4][2], R[4][3]); return 0; }
void run( const int N, const int M, const int L, const int hyper_threads, const int vector_lanes, const int nx, const int ny, const int nz, const int ichunk, const int nang, const int noct, const int ng, const int nmom, const int cmom, const vector<diag_c>& diag ) { typedef typename Kokkos::DefaultExecutionSpace device_t; typedef TeamPolicy<device_t> team_policy_t; typedef View<double*, device_t> view_1d_t; typedef View<double**, Kokkos::LayoutLeft, device_t> view_2d_t; typedef View<double***, Kokkos::LayoutLeft, device_t> view_3d_t; typedef View<double****, Kokkos::LayoutLeft, device_t> view_4d_t; typedef View<double*****, Kokkos::LayoutLeft, device_t> view_5d_t; typedef View<double******, Kokkos::LayoutLeft, device_t> view_6d_t; typedef View<double*******, Kokkos::LayoutLeft, device_t> view_7d_t; int id = 1; int ich = 1; int jlo = 0; int jhi = ny-1; int jst = 1; int jd = 2; int klo = 0; int khi = nz-1; int kst = 1; int kd = 2; double hi = c1; Kokkos::initialize(); Kokkos::DefaultExecutionSpace::print_configuration(cout); view_4d_t psii( "psii", nang, ny, nz, ng ); view_4d_t psij( "psij", nang, ichunk, nz, ng ); view_4d_t psik( "psik", nang, ichunk, ny, ng ); view_4d_t jb_in( "jb_in", nang, ichunk, ny, ng ); // jb_in(nang,ichunk,nz,ng) view_4d_t kb_in( "kb_in", nang, ichunk, ny, ng ); // kb_in(nang,ichunk,nz,ng) view_6d_t qim( "qim", nang, nx, ny, nz, noct, ng ); // qim(nang,nx,ny,nz,noct,ng) view_5d_t qtot( "qtot", cmom, nx, ny, nx, ng ); // qtot(cmom,nx,ny,nz,ng) view_2d_t ec( "ec", nang, cmom ); // ec(nang,cmom) view_1d_t mu( "mu", nang ); // mu(nang) view_1d_t w( "w", nang ); // w(nang) view_1d_t wmu( "wmu", nang ); // wmu(nang) view_1d_t weta( "weta", nang ); // weta(nang) view_1d_t wxi( "wxi", nang ); // wxi(nang) view_1d_t hj( "hj", nang ); // hj(nang) view_1d_t hk( "hk", nang ); // hk(nang) view_1d_t vdelt( "vdelt", ng ); // vdelt(ng) view_6d_t ptr_in( "ptr_in", nang, nx, ny, nz, noct, ng ); // ptr_in(nang,nx,ny,nz,noct,ng) view_6d_t ptr_out( "ptr_out", nang, nx, ny, nz, noct, ng ); // ptr_out(nang,nx,ny,nz,noct,ng) view_4d_t flux( "flux", nx, ny, nz, ng ); // flux(nx,ny,nz,ng) view_5d_t fluxm( "fluxm", cmom-1, nx, ny, nz, ng ); //fluxm(cmom-1,nx,ny,nz,ng) view_2d_t psi( "psi", nang, M ); view_2d_t pc( "pc", nang, M ); view_4d_t jb_out( "jb_out", nang, ichunk, nz, ng ); view_4d_t kb_out( "kb_out", nang, ichunk, ny, ng ); view_4d_t flkx( "flkx", nx+1, ny, nz, ng ); view_4d_t flky( "flky", nx, ny+1, nz, ng ); view_4d_t flkz( "flkz", nx, ny, nz+1, ng ); view_3d_t hv( "hv", nang, 4, M ); // hv(nang,4,M) view_3d_t fxhv( "fxhv", nang, 4, M ); // fxhv(nang,4,M) view_5d_t dinv( "dinv", nang, nx, ny, nz, ng ); // dinv(nang,nx,ny,nz,ng) view_2d_t den( "den", nang, M ); // den(nang,M) view_4d_t t_xs( "t_xs", nx, ny, nz, ng ); // t_xs(nx,ny,nz,ng) const team_policy_t policy( N, hyper_threads, vector_lanes ); for (int ii = 0; ii < n_test_iter; ii++) { Kokkos::Impl::Timer timer; for (int oct = 0; oct < noct; oct++) { parallel_for( policy, dim3_sweep2< team_policy_t, view_1d_t, view_2d_t, view_3d_t, view_4d_t, view_5d_t, view_6d_t, view_7d_t > ( M, L, ng, cmom, noct, nx, ny, nz, ichunk, diag, id, ich, oct, jlo, jhi, jst, jd, klo, khi, kst, kd, psii, psij, psik, jb_in, kb_in, qim, qtot, ec, mu, w, wmu, weta, wxi, hi, hj, hk, vdelt, ptr_in, ptr_out, flux, fluxm, psi, pc, jb_out, kb_out, flkx, flky, flkz, hv, fxhv, dinv, den, t_xs ) ); }// end noct std::cout << " ii " << ii << " elapsed time " << timer.seconds() << std::endl; } // end n_test_iter Kokkos::finalize(); }
void run_serial( int nx, int ny, int nz, int ndiag, const vector<diag_c>& diag, int ndimen, int id, int jd, int kd, int nang, int nmom, int noct, int ng, int ich, int ichunk, int jlo, int klo, int jhi, int khi, int jst, int kst, bool firsty, bool lasty, bool firstz, bool lastz, int nnested, int src_opt, int fixup, double hi, double vdelt ) { typedef Kokkos::Serial device_type; typedef Kokkos::View<double*, Kokkos::LayoutLeft, device_type> serial_view_t_1d; typedef Kokkos::View<double**, Kokkos::LayoutLeft, device_type> serial_view_t_2d; typedef Kokkos::View<double***, Kokkos::LayoutLeft, device_type> serial_view_t_3d; typedef Kokkos::View<double****, Kokkos::LayoutLeft, device_type> serial_view_t_4d; typedef Kokkos::View<double*****, Kokkos::LayoutLeft, device_type> serial_view_t_5d; typedef Kokkos::View<double******, Kokkos::LayoutLeft, device_type> serial_view_t_6d; typedef Kokkos::View<double*, Kokkos::LayoutStride> serial_view_t_1d_s; // cout << " a " << endl; int cmom = nmom * nmom; int d1 = nang; // TEST, (timedep == 1 => d1 = nang; d2 = nx; d3 = ny; d4 = nz ) int d2 = nx; // TEST int d3 = ny; // TEST int d4 = nz; // TEST // cout << " b " << endl; serial_view_t_1d hj( "hj", nang ); serial_view_t_1d hk( "hk", nang ); serial_view_t_1d mu( "mu", nang ); serial_view_t_1d w( "w", nang ); serial_view_t_6d qim( "qim", nang, nx, ny, nz, noct, ng ); serial_view_t_3d psii( "psii", nang, ny, nz ); serial_view_t_3d psij( "psij", nang, ichunk, nz ); serial_view_t_3d psik( "psik", nang, ichunk, ny ); serial_view_t_4d qtot( "qtot", cmom, nx, ny, nz ); serial_view_t_2d ec( "ec", nang, cmom ); serial_view_t_4d ptr_in( "ptr_in", d1, d2, d3, d4 ); serial_view_t_4d ptr_out( "ptr_out", d1, d2, d3, d4 ); serial_view_t_4d dinv( "dinv", nang, nx, ny, nz ); serial_view_t_3d flux( "flux", nx, ny, nz ); serial_view_t_4d fluxm( "fluxm", cmom-1, nx, ny, nz ); serial_view_t_3d jb_in( "jb_in", nang, ichunk, nz ); serial_view_t_3d jb_out( "jb_out", nang, ichunk, nz ); serial_view_t_3d kb_in( "kb_in", nang, ichunk, ny ); serial_view_t_3d kb_out( "kb_out", nang, ichunk, ny ); serial_view_t_1d wmu( "wmu", nang ); serial_view_t_1d weta( "weta", nang ); serial_view_t_1d wxi( "wxi", nang ); serial_view_t_3d flkx( "flkx", nx+1, ny, nz ); serial_view_t_3d flky( "flky", nx, ny+1, nz ); serial_view_t_3d flkz( "flkz", nx, ny, nz+1 ); serial_view_t_3d t_xs( "t_xs", nx, ny, nz ); // cout << " c " << endl; for (int ii = 0; ii < n_test_iter; ii++) { time(&timer_start); for (int oct = 0; oct < noct; oct++) { for (int g = 0; g < ng; g++) { dim3_sweep< device_type, serial_view_t_1d, serial_view_t_2d, serial_view_t_3d, serial_view_t_4d, serial_view_t_5d, serial_view_t_6d, serial_view_t_1d_s > ( ichunk, firsty, lasty, firstz, lastz, nnested, nx, hi, hj, hk, ndimen, ny, nz, ndiag, diag, cmom, nang, mu, w, noct, src_opt, ng, qim, fixup, ich, id, d1, d2, d3, d4, jd, kd, jlo, klo, oct, g, jhi, khi, jst, kst, psii, psij, psik, qtot, ec, vdelt, ptr_in, ptr_out, dinv, flux, fluxm, jb_in, jb_out, kb_in, kb_out, wmu, weta, wxi, flkx, flky, flkz, t_xs ); } } time(&timer_end); std::cout << " ii " << ii << " elapsed time " << difftime(timer_end, timer_start) << std::endl; } }