void ArrayMesh::set_mesh( int coords_per_vertex, unsigned long num_vertices, double* interleaved_vertex_coords, const int* vertex_fixed_flags, unsigned long num_elements, EntityTopology element_type, const unsigned long* element_connectivity_array, bool one_based_conn_indices, unsigned nodes_per_element, const int* vertex_slaved_flags ) { clear_mesh(); mDimension = coords_per_vertex; vertexCount = num_vertices; coordArray = interleaved_vertex_coords; fixedFlags = vertex_fixed_flags; slavedFlags = vertex_slaved_flags; elementCount = num_elements; connArray = element_connectivity_array; elementType = element_type; oneBasedArrays = one_based_conn_indices; if (oneBasedArrays) { coordArray -= mDimension; --fixedFlags; } if (nodes_per_element < 2) nodesPerElement = TopologyInfo::corners( element_type ); else nodesPerElement = nodes_per_element; vertexByteArray = new unsigned char[num_vertices + one_based_conn_indices]; assert(valid()); memset( vertexByteArray, 0, num_vertices + one_based_conn_indices ); }
void system::set_problem(const bool init) { if (myproc == 0) fprintf(stderr, " ********* Setting up Orszag-Tang vortex ************* \n"); const real b0 = 1.0/sqrt(4.0*M_PI); const real d0 = 25.0/(36.0*M_PI); const real v0 = 1.0; const real p0 = 5.0/(12*M_PI); gamma_gas = 5.0/3; courant_no = 0.8; if (!init) return; U_local.resize(local_n); dU_local.resize(local_n); Wrec_local.resize(local_n); const real adv = 0.0; double dt_min = HUGE; for (int i = 0; i < (int)local_n; i++) { const Particle &pi = ptcl_local[i]; real x = pi.pos.x; real y = pi.pos.y; real d, p, vx, vy, vz, bx, by, bz; vx = -v0 * sin(2.0*M_PI*y) + adv; vy = +v0 * sin(2.0*M_PI*x) + adv; vz = 0.0; bx = -b0*sin(2*M_PI*y); by = +b0*sin(4*M_PI*x); bz = 0.0; // bx = by = bz= 0; // bz = b0; d = d0; p = p0; real scal = 1; #ifdef __ADVECT_PULSE_TEST__ vx = 0; vy = 0; vz = 0; bx = by = bz = 0; p = 1.0; d = 1.0; // vx = 1; vy = 0; // if (x > 0.3 && x < 0.7) d = 2; vx = 0; vy = 1; if (y > 0.25 && y < 0.75) { d = 10; // p = 1; } #endif #if 0 bx = by = bz = 0; #endif Fluid m; m[Fluid::DENS] = d ; m[Fluid::ETHM] = p/(gamma_gas - 1.0); m[Fluid::VELX] = vx; m[Fluid::VELY] = vy; m[Fluid::VELZ] = vz; m[Fluid::BX ] = bx; m[Fluid::BY ] = by; m[Fluid::BZ ] = bz; m[Fluid::PSI ] = 0.0; m[Fluid::ENTR] = compute_entropy_from_ethm(m); for (int k = 0 ; k < Fluid::NSCALARS; k++) m.scal(k) = scal; Wrec_local[i] = Fluid_rec(m); U_local [i] = m.to_conservative(cell_local[i].Volume); dU_local[i] = 0.0; ptcl_local[i].volume = cell_local[i].Volume; const double L = std::pow(cell_local[i].Volume, 1.0/3); const double cs_est = std::sqrt((p*gamma_gas + (bx*bx+by*by+bz*bz))/d); const double v_est = std::sqrt(vx*vx + vy*vy + vz*vz); const double dt_est = 0.1 * courant_no * L/(cs_est + v_est); ptcl_local[i].tlast = 0.0; ptcl_local[i].rung = scheduler.get_rung(dt_est); dt_min = std::min(dt_min, dt_est); } double dt_min_glob; MPI_Allreduce(&dt_min, &dt_min_glob, 1, MPI_DOUBLE, MPI_MIN, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " pvel ... \n"); get_active_ptcl(true); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " pvel ... \n"); cell_list.swap(cell_local); ptcl_import.swap(ptcl_local); U_import.swap(U_local); site_active_list.swap(active_ptcl); compute_pvel(); compute_timesteps(true); cell_list.swap(cell_local); ptcl_import.swap(ptcl_local); U_import.swap(U_local); site_active_list.swap(active_ptcl); for (int i = 0; i < (int)local_n; i++) { ptcl_local[i].rung += 1; ptcl_local[i].orig_vel = ptcl_local[i].vel; } all_active = true; scheduler.flush_list(); for (int i = 0; i < (int)local_n; i++) { scheduler.push_particle(i, (int)ptcl_local[i].rung); ptcl_local[i].tend = 0.0 + scheduler.get_dt(ptcl_local[i].rung); } boundary_n = 0; clear_mesh(true); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr, " proc= %d: complete problem setup \n", myproc); }
void system::set_geometry(const bool init) { const double dt_max = 1.0/64 ; //16; // 1.0/128; scheduler = Scheduler(dt_max); t_end = 2.5; n_restart = 1; dt_restart = dt_max; dt_dump = dt_max / 16; dt_dump = dt_max ; //* 4; di_log = 100; global_n = local_n = 0; int nx = 16; int ny = 16; int nz = 16; // nx = ny = nz = 32; // nx = ny = nz = 64; nx = ny = 32; nz = 32; // nx = ny = 32; nz = 16; nx = ny = 64; nz = 16; nx = ny = 128; nz = 16; // nx = ny = 256; nz = 16; // nx = ny = 256; nz = 256; // nx = ny = nz = 128; // eulerian = true; #if 0 #if 1 #define __ADVECT_PULSE_TEST__ nx = ny = 64; nz = 16; dt_dump = dt_max; dt_restart = 1e10; #endif // nx = ny = 128; #endif const double Lx = 1.0; const vec3 rmin(0.0); const vec3 rmax(Lx, (Lx/nx)*ny, (Lx/nx)*nz); global_domain = boundary(rmin, rmax); global_domain_size = global_domain.hsize() * 2.0; const vec3 Len3 = global_domain.hsize() * 2.0; pfloat<0>::set_scale(Len3.x); pfloat<1>::set_scale(Len3.y); pfloat<2>::set_scale(Len3.z); Distribute::int3 nt(1, 1, 1); switch(nproc) { case 1: break; case 2: nt.x = 2; nt.y = 1; nt.z = 1; break; case 4: nt.x = 2; nt.y = 2; nt.z = 1; break; case 8: nt.x = 4; nt.y = 2; nt.z = 1; break; case 16: nt.x = 4; nt.y = 4; nt.z = 1; break; case 32: nt.x = 8; nt.y = 4; nt.z = 1; break; case 64: nt.x = 8; nt.y = 8; nt.z = 1; break; case 128: nt.x = 8; nt.y = 8; nt.z = 2; break; default: assert(false); } const Distribute::int3 nt_glb(nt); const pBoundary pglobal_domain(pfloat3(0.0), pfloat3(Len3)); distribute_glb.set(nproc, nt, pglobal_domain); if (!init) return; if (myproc == 0) { ptcl_local.clear(); ptcl_local.reserve(128); const dvec3 dr = dvec3(Len3.x/nx, Len3.y/ny, Len3.z/nz); const real rmax = dr.abs() * 1.0; fprintf(stderr, "dr= %g %g %g \n", dr.x, dr.y, dr.z); fprintf(stderr, "rmin= %g %g %g \n", global_domain.get_rmin().x, global_domain.get_rmin().y, global_domain.get_rmin().z); fprintf(stderr, "rmax= %g %g %g \n", global_domain.get_rmax().x, global_domain.get_rmax().y, global_domain.get_rmax().z); for (int k = 0; k < nz; k++) { for (int j = 0; j < ny; j++) { for (int i = 0; i < nx; i++) { dvec3 pos = global_domain.get_rmin() + dvec3(i*dr.x, j*dr.y, k*dr.z) + 0.5*dr; const int ijk = (k*ny + j)*nx + i; #if 0 if (!eulerian) { const real f = 1.0e-6; pos += vec3(drand48()*dr.x*f, drand48()*dr.y*f, drand48()*dr.z*f); } #endif #if 1 pos = global_domain.get_rmin() + dvec3( drand48()*Len3.x, drand48()*Len3.y, drand48()*Len3.z); #else #define _UNIFORM_MESH_ #endif dvec3 vel(0.0, 0.0, 0.0); Particle p; p.set_pos(pos); p.vel = vel; p.orig_vel = p.vel; p.boundary = 0; p.idx = ijk; p.rmax = rmax; ptcl_local.push_back(p); } } } local_n = ptcl_local.size(); global_n = local_n; fprintf(stderr, " *** proc= %d : local_n= %llu global_n= %llu \n", myproc, local_n, global_n); } // myproc == 0 MPI_Bcast(&global_n, 1, MPI_INT, 0, MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr, " *** Distrubiting data \n"); all_active = true; for (int k = 0; k < 5; k++) distribute_data(false,false,false); #if 0 std::vector< std::pair<int, TREAL> > rmax_list; local_tree.root.get_rmax(rmax_list); assert((int)rmax_list.size() == local_n); for (int i = 0; i < local_n; i++) ptcl[rmax_list[i].first].rmax = rmax_list[i].second; #endif MPI_Barrier(MPI_COMM_WORLD); fprintf(stderr, " *** proc= %d : local_n= %llu global_n= %llu \n", myproc, local_n, global_n); fprintf(stderr, " proc= %d relax \n", myproc); #ifndef _UNIFORM_MESH_ relax_mesh(5); #endif fprintf(stderr, " ---- done --- \n"); { distribute_data(false, false, false); const double t10 = mytimer::get_wtime(); clear_mesh(false); int nattempt = build_mesh_global(); double dt10 = mytimer::get_wtime() - t10; double volume_loc = 0.0; { std::vector<TREAL> v(local_n); for (int i = 0; i < (int)local_n; i++) v[i] = cell_local[i].Volume; std::sort(v.begin(), v.end()); // sort volumes from low to high, to avoid roundoff errors for (int i = 0; i < (int)local_n; i++) volume_loc += v[i]; } double dt10max; MPI_Allreduce(&dt10, &dt10max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); double volume_glob = 0.0; int nattempt_max, nattempt_min; MPI_Allreduce(&volume_loc, &volume_glob, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&nattempt, &nattempt_max, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&nattempt, &nattempt_min, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); const double volume_exact = global_domain_size.x*global_domain_size.y*global_domain_size.z; if (myproc == 0) { fprintf(stderr, "first call build_mesh:[ %g sec :: %g cells/s/proc/thread ]\n", dt10max, global_n/nproc/dt10max); fprintf(stderr, " computed_volume= %g exact_volume= %g diff= %g [ %g ] nattempt= %d %d \n", volume_glob, volume_exact, volume_glob - volume_exact, (volume_glob - volume_exact)/volume_exact, nattempt_min, nattempt_max); } } extract_ngb_from_mesh(); #if 0 set_problem(true); iterate(); MPI_Barrier(MPI_COMM_WORLD); MPI_Finalize(); exit(-1); #endif }
ArrayMesh::~ArrayMesh() { clear_mesh(); }
void system::read_binary(const char *filename, const int n_files) { #if 1 assert(n_files == 1); vec3 rmin, rmax; if (myproc == 0) { FILE *fin; if (!(fin = fopen(filename, "r"))) { std::cerr << "Cannot open file " << filename << std::endl; exit(-1); } std::cerr << "proc= " << myproc << " read snapshot: " << filename << std::endl; int ival; float fval; #define fload(x) { myfread(&fval, sizeof(float), 1, fin); x = fval; } #define iload(x) { myfread(&ival, sizeof(int), 1, fin); x = ival;} float ftmp; int itmp, np0, npx, npy, npz; iload(itmp); // 20*4 assert(itmp == 20*4); iload(itmp); // myid iload(np0); iload(npx); union { unsigned long long uint_long; unsigned int uint[2]; } data; iload(data.uint[0]); iload(data.uint[1]); scheduler.tsysU = data.uint_long; float courant_No; int nglob, nloc, ndim; iload(nglob); iload(nloc); iload(ndim); assert(ndim == 3); fload(t_global); fload(dt_global); iload(iteration); fload(courant_No); fload(gamma_gas); int periodic_on; iload(periodic_on); assert(periodic_on == -1); fload(rmin.x); fload(rmin.y); fload(rmin.z); fload(rmax.x); fload(rmax.y); fload(rmax.z); iload(itmp); // 20*4 assert(itmp == 20*4); ptcl_local.resize(nglob); U_local.resize(nglob); dU_local.resize(nglob); fprintf(stderr, "np =%d nglob= %d \n", np0, nglob); int pc = 0; for (int pr = 0; pr < np0; pr++) { fprintf(stderr, " p= %d out of %d; nloc= %d\n", pr, np0, nloc); for (int i = 0; i < nloc; i++) { Particle p; p.tend = t_global; p.rung = 0.0; p.new_dt = 0.0; p.local_id = i; Fluid W(0.0); iload(ival); assert(ival == 26*4); iload(ival); p.idx = ival; fload(p.pos.x); fload(p.pos.y); fload(p.pos.z); p.pos = periodic(p.pos); assert(rmin.x <= p.pos.x); assert(rmax.x >= p.pos.x); assert(rmin.y <= p.pos.y); assert(rmax.y >= p.pos.y); assert(rmin.z <= p.pos.z); assert(rmax.z >= p.pos.z); p.orig_pos = p.pos; p.pot = 0; fload(p.vel.x); fload(p.vel.y); fload(p.vel.z); p.orig_vel = p.vel; fload(W[Fluid::DENS]); fload(W[Fluid::ETHM]); fload(ftmp); // compute_pressure(m.dens, m.ethm)); fload(p.rmax); //dump( (sqr(m.B.x ) + sqr(m.B.y ) + sqr(m.B.z ))*0.5f); iload(p.boundary); // fload(ftmp); //dump(sqrt(sqr(m.vel.x) + sqr(m.vel.y) + sqr(m.vel.z))); fload(W[Fluid::VELX]); fload(W[Fluid::VELY]); fload(W[Fluid::VELZ]); fload(W[Fluid::BX]); fload(W[Fluid::BY]); fload(W[Fluid::BZ]); float h; fload(h); fload(p.volume); p.volume_new = p.volume; fload(W[Fluid::PSI]); fload(ftmp); //L*divB_i[i]); fload(W[Fluid::ENTR]); fload(ftmp); // Jx fload(ftmp); // Jy fload(ftmp); // Jz iload(ival); assert(ival == 26*4); p.tlast = t_global; ptcl_local[pc] = p; U_local [pc] = W; dU_local [pc] = 0.0; dU_local [pc] = 0.0; pc++; } fprintf(stderr, "p= %d np0= %d size= %d %d\n", pr, np0, (int)U_local.size(), (int)ptcl_local.size()); if (!(pr < np0-1)) break; iload(itmp); // 20*4 assert(itmp == 20*4); iload(itmp); // myid iload(np0); iload(npx); iload(npy); iload(npz); int nglob1; iload(nglob1); if (nglob != nglob1) { fprintf(stderr, "np; npx, npy, npz = %d; %d %d %d \n", np0, npx, npy, npz); fprintf(stderr, "nglob= %d nglob1= %d\n", nglob, nglob1); } assert(nglob == nglob1); iload(nloc); iload(ndim); fload(t_global); fload(dt_global); iload(iteration); fload(courant_No); fload(gamma_gas); iload(periodic_on); fload(rmin.x); fload(rmin.y); fload(rmin.z); fload(rmax.x); fload(rmax.y); fload(rmax.z); iload(itmp); // 20*4 } assert(pc == nglob); assert(nglob == (int)U_local.size()); fclose(fin); local_n = U_local.size(); } global_n = U_local.size(); local_n = global_n; MPI_Bcast(&global_n, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); MPI_Bcast(&iteration, 1, MPI_INT, 0, MPI_COMM_WORLD); double dt_glob = dt_global; double t_glob = t_global; MPI_Bcast(& t_glob, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&dt_glob, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD); MPI_Bcast(&scheduler.tsysU, 1, MPI_UNSIGNED_LONG_LONG, 0, MPI_COMM_WORLD); dt_global = dt_glob; t_global = t_glob; scheduler.set_tsys(t_global); assert(t_global == scheduler.get_tsys()); // scheduler.tsysU = (unsigned long long)(t_global / scheduler.dt_tick); scheduler.min_rung = 0; dt_global = 0.0f; distribute_data(true, false, true); #if 1 fit_vec(ptcl_local); fit_vec(U_local); fit_vec(dU_local); fit_vec(Wrec_local); #endif all_active = true; MPI_Barrier(MPI_COMM_WORLD); for (int i = 0; i < (int)local_n; i++) { ptcl_local[i].tlast = t_global; // ptcl_local[i].volume = cell_local[i].Volume; Wrec_local[i] = Fluid_rec(U_local[i]); U_local[i] = U_local[i].to_conservative(ptcl_local[i].volume); dU_local[i] = 0.0; } MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " pvel ... \n"); get_active_ptcl(true); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " pvel ... \n"); cell_list.swap(cell_local); ptcl_import.swap(ptcl_local); U_import.swap(U_local); site_active_list.swap(active_ptcl); compute_pvel(); compute_timesteps(true); cell_list.swap(cell_local); ptcl_import.swap(ptcl_local); U_import.swap(U_local); site_active_list.swap(active_ptcl); for (int i = 0; i < (int)local_n; i++) { ptcl_local[i].rung += 1; ptcl_local[i].tend = ptcl_local[i].tlast + scheduler.get_dt(ptcl_local[i].rung); ptcl_local[i].orig_vel = ptcl_local[i].vel; ptcl_local[i].unset_active(); } all_active = true; scheduler.flush_list(); boundary_n = 0; for (int i = 0; i < (int)local_n; i++) { scheduler.push_particle(i, (int)ptcl_local[i].rung); if (ptcl_local[i].is_boundary()) boundary_n++; } unsigned long long boundary_glb; MPI_Allreduce(&boundary_n, &boundary_glb, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr, "boundary_glb= %lld\n", boundary_glb); clear_mesh(true); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr, " proc= %d: complete read_binary \n", myproc); #endif }
void system::distribute_data(const bool FLUID, const bool GRADS, const bool NGB) { distribute_data_flag = true; ptcl_local.resize(local_n); std::vector<vec3> ptcl_pos(local_n); // compute integer coordinates for each position // for (int i = 0; i < (int)local_n; i++) { ptcl_local[i].local_id = i; ptcl_local[i].orig_pos = periodic(ptcl_local[i].orig_pos); ptcl_pos[i] = ptcl_local[i].orig_pos; } // determine domain decomposition // std::vector<vec3> sample_pos; determine_sampling_freq(); collect_sample_data(sample_pos, ptcl_pos); DistributeNew<real, vec3, boundary> distribute(nproc, global_domain); if (myproc == 0) { // distribute_glb.determine_division(sample_pos); #if 1 distribute.determine_division(sample_pos, nproc*32); #else distribute.determine_division(sample_pos, nproc*8); #endif } myMPI::Bcast(distribute.tiles, 0, nproc); myMPI::Bcast(distribute.procs, 0, nproc); compute_proc_domain(distribute.tiles, distribute.procs); if (FLUID && GRADS) for (int i = 0; i < (int)local_n; i++) Wrec_local[i].pos.x = divBi[i]; int iloc = 0; std::vector<Particle> ptcl_send[NMAXPROC]; std::vector<Particle> ptcl_recv[NMAXPROC]; std::vector<ParticleFluidStruct> fluid_send[NMAXPROC]; std::vector<ParticleFluidStruct> fluid_recv[NMAXPROC]; std::vector<ParticleFluidStructLite> fluidlite_send[NMAXPROC]; std::vector<ParticleFluidStructLite> fluidlite_recv[NMAXPROC]; #if 0 std::vector<int> ngb_send[NMAXPROC]; std::vector<int> ngb_recv[NMAXPROC]; #endif std::vector<int> remote_tiles; int nremove = 0; for (int i = 0; i < (int)local_n; i++) { remote_tiles.clear(); proc_tree.root.walk_boundary(boundary(ptcl_pos[i]), remote_tiles, global_domain_size); assert(remote_tiles.size() > 0); const int proc = proc_procs[remote_tiles[0]]; assert(proc >= 0); assert(proc < nproc); if (proc == myproc && !ptcl_local[i].is_remove()) { std::swap(ptcl_local[i], ptcl_local[iloc]); std::swap(ptcl_pos [i], ptcl_pos [iloc]); if (FLUID) { std::swap( U_local[i], U_local[iloc]); std::swap( dU_local[i], dU_local[iloc]); if (GRADS) std::swap(Wrec_local[i], Wrec_local[iloc]); } iloc++; } else if (!ptcl_local[i].is_remove()) { if (FLUID && GRADS) fluid_send[proc].push_back(ParticleFluidStruct(ptcl_local[i], U_local[i], dU_local[i], Wrec_local[i])); else if (FLUID) fluidlite_send[proc].push_back(ParticleFluidStructLite(ptcl_local[i], U_local[i], dU_local[i])); else ptcl_send[proc].push_back(ptcl_local[i]); } else nremove++; } #if 0 if (FLUID && GRADS) myMPI::all2all(fluid_send, fluid_recv, myproc, nproc, mpi_debug_flag); else if (FLUID ) myMPI::all2all(fluidlite_send, fluidlite_recv, myproc, nproc, mpi_debug_flag); else myMPI::all2all(ptcl_send, ptcl_recv, myproc, nproc, mpi_debug_flag); #else { static int nsend[NMAXPROC], nrecv[NMAXPROC]; if (FLUID && GRADS) myMPI::all2all<true>(fluid_send, fluid_recv, myproc, nproc, 1, nsend, nrecv); else if (FLUID ) myMPI::all2all<true>(fluidlite_send, fluidlite_recv, myproc, nproc, 1, nsend, nrecv); else myMPI::all2all<true>(ptcl_send, ptcl_recv, myproc, nproc, 1, nsend, nrecv); } #endif int nrecv = 0; if (FLUID && GRADS) for (int p = 0; p < nproc; p++) nrecv += fluid_recv[p].size(); else if (FLUID) for (int p = 0; p < nproc; p++) nrecv += fluidlite_recv[p].size(); else for (int p = 0; p < nproc; p++) nrecv += ptcl_recv[p].size(); { const int nloc = iloc + nrecv; ptcl_local.resize(nloc); fit_vec(ptcl_local); U_local .resize(nloc); fit_vec(U_local); dU_local .resize(nloc); fit_vec(dU_local); Wrec_local.resize(nloc); fit_vec(Wrec_local); divBi .resize(nloc); fit_vec(divBi); Wextra_local.resize(nloc); fit_vec(Wextra_local); } for (int p = 0; p < nproc; p++) for (size_t q = 0; q < (FLUID ? (GRADS ? fluid_recv[p].size() : fluidlite_recv[p].size()) : ptcl_recv[p].size()); q++) { assert(p != myproc); if (FLUID && GRADS) { ptcl_local[iloc] = fluid_recv[p][q].p; U_local [iloc] = fluid_recv[p][q].U; dU_local [iloc] = fluid_recv[p][q].dU; Wrec_local[iloc] = fluid_recv[p][q].Wrec; } else if (FLUID) { ptcl_local[iloc] = fluidlite_recv[p][q].p; U_local [iloc] = fluidlite_recv[p][q].U; dU_local [iloc] = fluidlite_recv[p][q].dU; } else ptcl_local[iloc] = ptcl_recv[p][q]; assert(!ptcl_local[iloc].is_remove()); iloc++; } local_n = iloc; assert(iloc = (int)ptcl_local.size()); if (FLUID && GRADS) for (int i = 0; i < (int)local_n; i++) { divBi[i] = Wrec_local[i].pos.x; Wrec_local[i].pos.x = ptcl_local[i].pos.x; } unsigned long long nglob, nloc = local_n; unsigned long long nvirt_glob; virtual_n = nremove; MPI_Allreduce(&nloc, &nglob, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&virtual_n, &nvirt_glob, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); unsigned long long local_n_min, local_n_max, local_n_mean; MPI_Allreduce(&local_n, &local_n_min, 1, MPI_UNSIGNED_LONG_LONG, MPI_MIN, MPI_COMM_WORLD); MPI_Allreduce(&local_n, &local_n_max, 1, MPI_UNSIGNED_LONG_LONG, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&local_n, &local_n_mean, 1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD); if (myproc == 0) { fprintf(stderr, "local_n= [min: %llu max: %llu ; mean: %llu ] global_n= %llu nglob= %llu remove_n_glob= %llu \n", local_n_min, local_n_max, local_n_mean/nproc, global_n, nglob, nvirt_glob); } MPI_Barrier(MPI_COMM_WORLD); assert(nglob == global_n - nvirt_glob); global_n = nglob; virtual_n = 0; sort_local_data(); // build local tree // global_domain_size = global_domain.hsize() * 2.0; local_tree.clear(); local_tree.set_domain( boundary( global_domain.centre() - global_domain.hsize()*1.5, global_domain.centre() + global_domain.hsize()*1.5)); std::vector<Octree::Particle> tree_ptcl(local_n); for (int i = 0; i < (int)local_n; i++) { assert(!ptcl_local[i].is_remove()); tree_ptcl[i] = Octree::Particle(ptcl_local[i].orig_pos, i); } local_tree.insert(&tree_ptcl[0], local_n, 0, local_n); local_tree.get_leaves(); local_tree.root.inner_boundary(); if (NGB) { if (myproc == 0) fprintf(stderr, "---buidling mesh---\n"); const double t10 = mytimer::get_wtime(); clear_mesh(false); const double t15 = mytimer::get_wtime(); build_mesh_global(); double dt_mesh = mytimer::get_wtime() - t15; double volume_loc = 0.0; { std::vector<TREAL> v(local_n); for (int i = 0; i < (int)local_n; i++) { v[i] = cell_local[i].Volume; ptcl_local[i].volume_new = v[i]; ptcl_local[i].local_id = i; } std::sort(v.begin(), v.end()); // sort volumes from low to high, to avoid roundoff errors for (int i = 0; i < (int)local_n; i++) volume_loc += v[i]; } extract_ngb_from_mesh(); clear_mesh(true); double dt = mytimer::get_wtime() - t10; double volume_glob = 0.0; double dt_max = 0.0; double dt_mesh_max = 0.0; MPI_Allreduce(&volume_loc, &volume_glob, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&dt, &dt_max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&dt_mesh, &dt_mesh_max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); const double volume_exact = global_domain_size.x*global_domain_size.y*global_domain_size.z; if (myproc == 0) { fprintf(stderr, " distribute::build_mesh:[ %g (all %g ) sec :: %g cells/s/proc/thread ]\n", dt_mesh_max, dt_max, global_n/nproc/dt_mesh_max); fprintf(stderr, " computed_volume= %g exact_volume= %g diff= %g [ %g ] \n", volume_glob, volume_exact, volume_glob - volume_exact, (volume_glob - volume_exact)/volume_exact); } } }
void system::set_geometry(const bool init) { const double dt_max = 1.0/512; scheduler = Scheduler(dt_max); int np; float lx, ly, lz; FILE *fin = NULL; if (myproc == 0) { float wp; fin = fopen(fin_data, "r"); int ival; size_t nread; nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 2*sizeof(int)); nread = fread(&np, sizeof(int), 1, fin); nread = fread(&wp, sizeof(float), 1, fin); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 2*sizeof(int)); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 3*sizeof(float)); nread = fread(&lx, sizeof(float), 1, fin); nread = fread(&ly, sizeof(float), 1, fin); nread = fread(&lz, sizeof(float), 1, fin); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == 3*sizeof(float)); fprintf(stderr, " np= %d wp= %g \n",np, wp); fprintf(stderr, " lx= %g ly= %g lz= %g \n", lx, ly, lz); } MPI_Bcast(&lx, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&ly, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); MPI_Bcast(&lz, 1, MPI_FLOAT, 0, MPI_COMM_WORLD); t_end = 0.2; n_restart = 2; dt_restart = dt_max; dt_dump = 0.01; di_log = 100; global_n = local_n = 0; // eulerian = true; const vec3 rmin(0.0); const vec3 rmax(lx, ly, lz); global_domain = boundary(rmin, rmax); global_domain_size = global_domain.hsize() * 2.0; const vec3 Len3 = global_domain.hsize() * 2.0; pfloat<0>::set_scale(Len3.x); pfloat<1>::set_scale(Len3.y); pfloat<2>::set_scale(Len3.z); if (myproc == 0) { ptcl.resize(np); const int nx = (int)std::pow(np, 1.0/3.0); const dvec3 dr = dvec3(Len3.x/nx, Len3.y/nx, Len3.z/nx); const real rmax = dr.abs() * 1.0; fprintf(stderr, "dr= %g %g %g \n", dr.x, dr.y, dr.z); local_n = ptcl.size(); global_n = local_n; { std::vector<float> x(local_n), y(local_n), z(local_n); size_t nread; int ival; nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&x[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&y[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&z[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); for (int i = 0; i < local_n; i++) { const dvec3 vel(0.0, 0.0, 0.0); ptcl[i] = Particle(x[i], y[i], z[i], vel.x, vel.y, vel.z, i); ptcl[i].rmax = rmax; ptcl[i].unset_derefine(); } } U.resize(local_n); const int var_list[7] = { Fluid::VELX, Fluid::VELY, Fluid::VELZ, Fluid::DENS, Fluid::BX, Fluid::BY, Fluid::BZ}; std::vector<float> data(local_n); for (int var = 0; var < 7; var++) { fprintf(stderr, " reading vat %d out of %d \n", var+1, 7); int ival; size_t nread; nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); nread = fread(&data[0], sizeof(float), local_n, fin); assert((int)nread == local_n); nread = fread(&ival, sizeof(int), 1, fin); assert(ival == local_n*(int)sizeof(float)); for (int i = 0; i < local_n; i++) U[i][var_list[var]] = data[i]; } for (int i = 0; i < local_n; i++) { assert(U[i][Fluid::DENS] > 0.0); U[i][Fluid::ETHM] = cs2 * U[i][Fluid::DENS]; } fclose(fin); fprintf(stderr, " *** proc= %d : local_n= %d global_n= %d \n", myproc, local_n, global_n); } // myproc == 0 MPI_Bcast(&global_n, 1, MPI_INT, 0, MPI_COMM_WORLD); fprintf(stderr, " proc= %d distrubite \n", myproc); MPI_Barrier(MPI_COMM_WORLD); Distribute::int3 nt(1, 1, 1); switch(nproc) { case 1: break; case 2: nt.x = 2; nt.y = 1; nt.z = 1; break; case 4: nt.x = 2; nt.y = 2; nt.z = 1; break; case 6: nt.x = 3; nt.y = 2; nt.z = 1; break; case 8: nt.x = 2; nt.y = 2; nt.z = 2; break; case 16: nt.x = 4; nt.y = 2; nt.z = 2; break; case 32: nt.x = 4; nt.y = 4; nt.z = 2; break; case 64: nt.x = 4; nt.y = 4; nt.z = 4; break; case 128: nt.x = 8; nt.y = 4; nt.z = 4; break; case 256: nt.x = 8; nt.y = 8; nt.z = 4; break; case 512: nt.x = 8; nt.y = 8; nt.z = 8; break; default: assert(false); } const Distribute::int3 nt_glb(nt); const pBoundary pglobal_domain(pfloat3(0.0), pfloat3(Len3)); distribute_glb.set(nproc, nt, pglobal_domain); for (int k = 0; k < 5; k++) distribute_data(true, false); const int nloc_reserve = (int)(2.0*global_n/nproc); fit_reserve_vec(ptcl, nloc_reserve); fit_reserve_vec(ptcl_ppos, nloc_reserve); fit_reserve_vec(U, nloc_reserve); fit_reserve_vec(dU, nloc_reserve); fit_reserve_vec(Wgrad, nloc_reserve); fit_reserve_vec(gradPsi, nloc_reserve); fit_reserve_vec(cells, nloc_reserve); MPI_Barrier(MPI_COMM_WORLD); fprintf(stderr, " *** proc= %d : local_n= %d global_n= %d \n", myproc, local_n, global_n); fprintf(stderr, " proc= %d building_mesh \n", myproc); MPI_Barrier(MPI_COMM_WORLD); const double t10 = mytimer::get_wtime(); clear_mesh(); int nattempt = build_mesh(true); double dt10 = mytimer::get_wtime() - t10; double volume_loc = 0.0; { std::vector<TREAL> v(local_n); for (int i = 0; i < local_n; i++) v[i] = cells[i].Volume; std::sort(v.begin(), v.end()); // sort volumes from low to high, to avoid roundoff errors for (int i = 0; i < local_n; i++) volume_loc += v[i]; } double dt10max; MPI_Allreduce(&dt10, &dt10max, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); double volume_glob = 0.0; int nattempt_max, nattempt_min; MPI_Allreduce(&volume_loc, &volume_glob, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); MPI_Allreduce(&nattempt, &nattempt_max, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); MPI_Allreduce(&nattempt, &nattempt_min, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD); const double volume_exact = global_domain_size.x*global_domain_size.y*global_domain_size.z; if (myproc == 0) { fprintf(stderr, "first call build_mesh:[ %g sec :: %g cells/s/proc/thread ]\n", dt10max, global_n/nproc/dt10max); fprintf(stderr, " computed_volume= %g exact_volume= %g diff= %g [ %g ] nattempt= %d %d \n", volume_glob, volume_exact, volume_glob - volume_exact, (volume_glob - volume_exact)/volume_exact, nattempt_min, nattempt_max); } exchange_ptcl(); }
void system::set_problem(const bool init) { if (myproc == 0) fprintf(stderr, " ********* Setting up MHD Turbulence ************* \n"); const int reserve_n = (int)(1.25*local_n); U.reserve(reserve_n); dU.reserve(reserve_n); Wgrad.reserve(reserve_n); U.resize(local_n); dU.resize(local_n); Wgrad.resize(local_n); gamma_gas = 1.0; courant_no = 0.4; for (int i = 0; i < local_n; i++) { assert(U[i][Fluid::DENS] > 0.0); U[i][Fluid::PSI ] = 0.0; for (int k = 0 ; k < Fluid::NSCALARS; k++) U[i].scal(k) = 1.0; dU[i] = Fluid(0.0); Wgrad[i] = 0.0; for (int k = 0; k < Fluid::NFLUID; k++) Wgrad[i].m[k] = U[i][k]; U[i] = U[i].to_conservative(cells[i].Volume); ptcl[i].Volume = cells[i].Volume; } entropy_scalar = -1; isoeos_flag = true; MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " pvel ... \n"); get_active_ptcl(true); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " primitives ... \n"); exchange_primitive_and_wdot(); MPI_Barrier(MPI_COMM_WORLD); compute_pvel(); exchange_pvel(); MPI_Barrier(MPI_COMM_WORLD); if (myproc == 0) fprintf(stderr , " tgradients ... \n"); compute_tgradient(); if (myproc == 0) fprintf(stderr , " timestep... \n"); compute_timesteps(true); for (int i = 0; i < local_n; i++) ptcl[i].rung[0] += 3; all_active = true; scheduler.flush_list(); for (int i = 0; i < local_n; i++) scheduler.push_particle(i, (int)ptcl[i].rung[0]); MPI_Barrier(MPI_COMM_WORLD); if (!eulerian) clear_mesh(); if (myproc == 0) fprintf(stderr, " proc= %d: complete problem setup \n", myproc); MPI_Barrier(MPI_COMM_WORLD); }