double Box::d_numPts () const { BL_ASSERT(ok()); return D_TERM(double(length(0)), *double(length(1)), *double(length(2))); }
void CellBilinear::interp (const FArrayBox& crse, int crse_comp, FArrayBox& fine, int fine_comp, int ncomp, const Box& fine_region, const IntVect & ratio, const Geometry& /*crse_geom*/, const Geometry& /*fine_geom*/, Array<BCRec>& /*bcr*/, int actual_comp, int actual_state) { BL_PROFILE("CellBilinear::interp()"); #if (BL_SPACEDIM == 3) BoxLib::Error("interp: not implemented"); #endif // // Set up to call FORTRAN. // const int* clo = crse.box().loVect(); const int* chi = crse.box().hiVect(); const int* flo = fine.loVect(); const int* fhi = fine.hiVect(); const int* lo = fine_region.loVect(); const int* hi = fine_region.hiVect(); int num_slope = D_TERM(2,*2,*2)-1; int len0 = crse.box().length(0); int slp_len = num_slope*len0; Array<Real> slope(slp_len); int strp_len = len0*ratio[0]; Array<Real> strip(strp_len); int strip_lo = ratio[0] * clo[0]; int strip_hi = ratio[0] * chi[0]; const Real* cdat = crse.dataPtr(crse_comp); Real* fdat = fine.dataPtr(fine_comp); const int* ratioV = ratio.getVect(); FORT_CBINTERP (cdat,ARLIM(clo),ARLIM(chi),ARLIM(clo),ARLIM(chi), fdat,ARLIM(flo),ARLIM(fhi),ARLIM(lo),ARLIM(hi), D_DECL(&ratioV[0],&ratioV[1],&ratioV[2]),&ncomp, slope.dataPtr(),&num_slope,strip.dataPtr(),&strip_lo,&strip_hi, &actual_comp,&actual_state); }
void NodeBilinear::interp (const FArrayBox& crse, int crse_comp, FArrayBox& fine, int fine_comp, int ncomp, const Box& fine_region, const IntVect& ratio, const Geometry& /*crse_geom */, const Geometry& /*fine_geom */, Array<BCRec>& /*bcr*/, int actual_comp, int actual_state) { BL_PROFILE("NodeBilinear::interp()"); // // Set up to call FORTRAN. // const int* clo = crse.box().loVect(); const int* chi = crse.box().hiVect(); const int* flo = fine.loVect(); const int* fhi = fine.hiVect(); const int* lo = fine_region.loVect(); const int* hi = fine_region.hiVect(); int num_slope = D_TERM(2,*2,*2)-1; int len0 = crse.box().length(0); int slp_len = num_slope*len0; Array<Real> strip(slp_len); const Real* cdat = crse.dataPtr(crse_comp); Real* fdat = fine.dataPtr(fine_comp); const int* ratioV = ratio.getVect(); FORT_NBINTERP (cdat,ARLIM(clo),ARLIM(chi),ARLIM(clo),ARLIM(chi), fdat,ARLIM(flo),ARLIM(fhi),ARLIM(lo),ARLIM(hi), D_DECL(&ratioV[0],&ratioV[1],&ratioV[2]),&ncomp, strip.dataPtr(),&num_slope,&actual_comp,&actual_state); }
int ParticleBase::CIC_Cells_Fracs (const ParticleBase& p, const Real* plo, const Real* dx_geom, const Real* dx_part, Array<Real>& fracs, Array<IntVect>& cells) { if (dx_geom == dx_part) { const int M = D_TERM(2,+2,+4); fracs.resize(M); cells.resize(M); ParticleBase::CIC_Cells_Fracs_Basic(p,plo,dx_geom,fracs.dataPtr(),cells.dataPtr()); return M; } // // The first element in fracs and cells is the lowest corner, the last is the highest. // const Real hilen[BL_SPACEDIM] = { D_DECL((p.m_pos[0]-plo[0]+dx_part[0]/2)/dx_geom[0], (p.m_pos[1]-plo[1]+dx_part[1]/2)/dx_geom[1], (p.m_pos[2]-plo[2]+dx_part[2]/2)/dx_geom[2]) }; const Real lolen[BL_SPACEDIM] = { D_DECL((p.m_pos[0]-plo[0]-dx_part[0]/2)/dx_geom[0], (p.m_pos[1]-plo[1]-dx_part[1]/2)/dx_geom[1], (p.m_pos[2]-plo[2]-dx_part[2]/2)/dx_geom[2]) }; const IntVect hicell(D_DECL(floor(hilen[0]), floor(hilen[1]), floor(hilen[2]))); const IntVect locell(D_DECL(floor(lolen[0]), floor(lolen[1]), floor(lolen[2]))); const Real cell_density = D_TERM(dx_geom[0]/dx_part[0],*dx_geom[1]/dx_part[1],*dx_geom[2]/dx_part[2]); const int M = D_TERM((hicell[0]-locell[0]+1),*(hicell[1]-locell[1]+1),*(hicell[2]-locell[2]+1)); fracs.resize(M); cells.resize(M); // // This portion might be slightly inefficient. Feel free to redo it if need be. // int i = 0; #if (BL_SPACEDIM == 1) for (int xi = locell[0]; xi <= hicell[0]; xi++) { cells[i][0] = xi; fracs[i] = (std::min(hilen[0]-xi,Real(1))-std::max(lolen[0]-xi,Real(0)))*cell_density; i++; } #elif (BL_SPACEDIM == 2) for (int yi = locell[1]; yi <= hicell[1]; yi++) { const Real yf = std::min(hilen[1]-yi,Real(1))-std::max(lolen[1]-yi,Real(0)); for (int xi = locell[0]; xi <= hicell[0]; xi ++) { cells[i][0] = xi; cells[i][1] = yi; fracs[i] = yf * (std::min(hilen[0]-xi,Real(1))-std::max(lolen[0]-xi,Real(0)))*cell_density; i++; } } #elif (BL_SPACEDIM == 3) for (int zi = locell[2]; zi <= hicell[2]; zi++) { const Real zf = std::min(hilen[2]-zi,Real(1))-std::max(lolen[2]-zi,Real(0)); for (int yi = locell[1]; yi <= hicell[1]; yi++) { const Real yf = std::min(hilen[1]-yi,Real(1))-std::max(lolen[1]-yi,Real(0)); for (int xi = locell[0]; xi <= hicell[0]; xi++) { cells[i][0] = xi; cells[i][1] = yi; cells[i][2] = zi; fracs[i] = zf * yf * (std::min(hilen[0]-xi,Real(1))-std::max(lolen[0]-xi,Real(0))) * cell_density; i++; } } } #endif return M; }
PetscErrorCode plotAll( Vector<LevelData<FArrayBox> *> &a_phi, Vector<LevelData<FArrayBox> *> &a_rhs, Vector<RefCountedPtr<LevelData<FArrayBox> > > &a_exact, Real a_errNorm[2], string a_fname, Real a_cdx, Vector<DisjointBoxLayout> &a_grids, Vector<int> &a_refratios, Vector<ProblemDomain> &a_domains, PetscCompGridPois &a_petscop, Vec a_x, int a_sub_id = -1 ) { CH_TIME("plotAll"); int nLev = a_phi.size(); PetscErrorCode ierr; Vector<LevelData<FArrayBox>* > plotData(nLev, NULL); if ( a_x ) { ierr = a_petscop.putPetscInChombo(a_x,a_phi); CHKERRQ(ierr); } for (int ilev=0;ilev<nLev;ilev++) { plotData[ilev] = new LevelData<FArrayBox>(a_grids[ilev],4*COMP_POIS_DOF,IntVect::Unit); } a_errNorm[0] = a_errNorm[1] = 0; Real dx = a_cdx; for (int ilev=0;ilev<nLev;ilev++,dx/=s_refRatio) { Interval phiInterval(0,COMP_POIS_DOF-1); a_phi[ilev]->copyTo(phiInterval, *plotData[ilev], phiInterval); Interval rhsInterval(COMP_POIS_DOF,2*COMP_POIS_DOF-1); a_rhs[ilev]->copyTo(phiInterval, *plotData[ilev], rhsInterval); Interval exInterval(2*COMP_POIS_DOF,3*COMP_POIS_DOF-1); a_exact[ilev]->copyTo(phiInterval, *plotData[ilev], exInterval); // use phi for error const DisjointBoxLayout& dbl = a_grids[ilev]; for (DataIterator dit(dbl); dit.ok(); ++dit) { FArrayBox& exactfab = (*a_exact[ilev])[dit]; FArrayBox& phiFAB = (*a_phi[ilev])[dit]; Box region = exactfab.box(); for (BoxIterator bit(region); bit.ok(); ++bit) { IntVect iv = bit(); for (int i=0;i<COMP_POIS_DOF;i++) phiFAB(iv,i) = phiFAB(iv,i) - exactfab(iv,i); } } // zero error on covered if (ilev!=nLev-1) { const DisjointBoxLayout& dbl = a_grids[ilev]; // zero out fine cover DisjointBoxLayout dblCoarsenedFine; Copier copier; coarsen(dblCoarsenedFine, a_grids[ilev+1], a_refratios[ilev]); // coarsens entire grid copier.define(dblCoarsenedFine, dbl, IntVect::Zero); LevelDataOps<FArrayBox> ops; ops.copyToZero(*a_phi[ilev],copier); } // copy in Interval errInterval(3*COMP_POIS_DOF,4*COMP_POIS_DOF-1); a_phi[ilev]->copyTo(phiInterval, *plotData[ilev], errInterval); // get error norms for (DataIterator dit(dbl); dit.ok(); ++dit) { Box region = dbl[dit]; FArrayBox& phifab = (*a_phi[ilev])[dit]; Real mnorm = phifab.norm(region,0); if (mnorm>a_errNorm[0]) a_errNorm[0] = mnorm; mnorm = phifab.norm(region,1)*D_TERM(dx,*dx,*dx); a_errNorm[1] += mnorm; } } { double error; #ifdef CH_MPI MPI_Allreduce( &a_errNorm[0], &error, 1, MPI_DOUBLE, MPI_MAX, PETSC_COMM_WORLD ); a_errNorm[0] = error; #endif #ifdef CH_MPI MPI_Allreduce( &a_errNorm[1], &error, 1, MPI_DOUBLE, MPI_SUM, PETSC_COMM_WORLD ); a_errNorm[1] = error; #endif } pout() << "\t\t plot |error|_inf=" << a_errNorm[0] << endl; // plot if (true){ CH_TIME("plot"); char suffix[30]; if (a_sub_id>=0) sprintf(suffix, "%dd.%d.hdf5",SpaceDim,a_sub_id); else sprintf(suffix, "%dd.hdf5",SpaceDim); a_fname += suffix; Vector<string> varNames(4*COMP_POIS_DOF); int kk=0; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "phi "; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "rhs "; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "exa "; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "err "; kk=0; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i; for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i; Real bogusVal = 1.0; WriteAMRHierarchyHDF5(a_fname, a_grids, plotData, varNames, a_domains[0].domainBox(), a_cdx, bogusVal, bogusVal, a_refratios, nLev); } for (int ilev=0;ilev<nLev;ilev++) { delete plotData[ilev]; } PetscFunctionReturn(0); }
FabArrayBase::FBCacheIter FabArrayBase::TheFB (bool cross, const FabArrayBase& mf) { BL_PROFILE("FabArray::TheFB"); BL_ASSERT(mf.size() > 0); const FabArrayBase::SI si(mf.boxArray(), mf.DistributionMap(), mf.nGrow(), cross); const IntVect& Typ = mf.boxArray()[0].type(); const int Scale = D_TERM(Typ[0],+3*Typ[1],+5*Typ[2]) + 11; const int Key = mf.size() + mf.boxArray()[0].numPts() + mf.nGrow() + Scale + cross; std::pair<FBCacheIter,FBCacheIter> er_it = m_TheFBCache.equal_range(Key); for (FBCacheIter it = er_it.first; it != er_it.second; ++it) { if (it->second == si) { ++it->second.m_nuse; m_FBC_stats.recordUse(); return it; } } if (m_TheFBCache.size() >= fb_cache_max_size) { // // Don't let the size of the cache get too big. // Get rid of entries with the biggest largest key that haven't been reused. // Otherwise just remove the entry with the largest key. // FBCacheIter End = m_TheFBCache.end(); FBCacheIter last_it = End; FBCacheIter erase_it = End; for (FBCacheIter it = m_TheFBCache.begin(); it != End; ++it) { last_it = it; if (it->second.m_nuse <= 1) erase_it = it; } if (erase_it != End) { m_FBC_stats.recordErase(erase_it->second.m_nuse); m_TheFBCache.erase(erase_it); } else if (last_it != End) { m_FBC_stats.recordErase(last_it->second.m_nuse); m_TheFBCache.erase(last_it); } } // // Got to insert one & then build it. // FBCacheIter cache_it = m_TheFBCache.insert(FBCache::value_type(Key,si)); SI& TheFB = cache_it->second; const int MyProc = ParallelDescriptor::MyProc(); const BoxArray& ba = mf.boxArray(); const DistributionMapping& dm = mf.DistributionMap(); const Array<int>& imap = mf.IndexMap(); // // Here's where we allocate memory for the cache innards. // We do this so we don't have to build objects of these types // each time we search the cache. Otherwise we'd be constructing // and destroying said objects quite frequently. // TheFB.m_LocTags = new CopyComTag::CopyComTagsContainer; TheFB.m_SndTags = new CopyComTag::MapOfCopyComTagContainers; TheFB.m_RcvTags = new CopyComTag::MapOfCopyComTagContainers; TheFB.m_SndVols = new std::map<int,int>; TheFB.m_RcvVols = new std::map<int,int>; TheFB.m_nuse = 1; m_FBC_stats.recordBuild(); m_FBC_stats.recordUse(); if (imap.empty()) // // We don't own any of the relevant FABs so can't possibly have any work to do. // return cache_it; const int nlocal = imap.size(); const int ng = si.m_ngrow; std::vector< std::pair<int,Box> > isects; CopyComTag::MapOfCopyComTagContainers send_tags; // temp copy for (int i = 0; i < nlocal; ++i) { const int ksnd = imap[i]; const Box& vbx = ba[ksnd]; ba.intersections(vbx, isects, ng); for (int j = 0, M = isects.size(); j < M; ++j) { const int krcv = isects[j].first; const Box& bx = isects[j].second; const int dst_owner = dm[krcv]; if (krcv == ksnd) continue; // same box if (dst_owner == MyProc) continue; // local copy will be dealt with later send_tags[dst_owner].push_back(CopyComTag(bx, krcv, ksnd)); } } CopyComTag::MapOfCopyComTagContainers recv_tags; // temp copy BaseFab<int> localtouch, remotetouch; bool check_local = false, check_remote = false; #ifdef _OPENMP if (omp_get_max_threads() > 1) { check_local = true; check_remote = true; } #endif if (ba.ixType().cellCentered()) { TheFB.m_threadsafe_loc = true; TheFB.m_threadsafe_rcv = true; check_local = false; check_remote = false; } for (int i = 0; i < nlocal; ++i) { const int krcv = imap[i]; const Box& bxrcv = BoxLib::grow(ba[krcv], ng); if (check_local) { localtouch.resize(bxrcv); localtouch.setVal(0); } if (check_remote) { remotetouch.resize(bxrcv); remotetouch.setVal(0); } ba.intersections(bxrcv, isects); for (int j = 0, M = isects.size(); j < M; ++j) { const int ksnd = isects[j].first; const Box& bx = isects[j].second; const int src_owner = dm[ksnd]; if (krcv == ksnd) continue; // same box if (src_owner == MyProc) { // local copy const BoxList tilelist(bx, FabArrayBase::comm_tile_size); for (BoxList::const_iterator it_tile = tilelist.begin(), End_tile = tilelist.end(); it_tile != End_tile; ++it_tile) { TheFB.m_LocTags->push_back(CopyComTag(*it_tile, krcv, ksnd)); } if (check_local) { localtouch.plus(1, bx); } } else { recv_tags[src_owner].push_back(CopyComTag(bx, krcv, ksnd)); if (check_remote) { remotetouch.plus(1, bx); } } } if (check_local) { // safe if a cell is touched no more than once // keep checking thread safety if it is safe so far check_local = TheFB.m_threadsafe_loc = localtouch.max() <= 1; } if (check_remote) { check_remote = TheFB.m_threadsafe_rcv = remotetouch.max() <= 1; } } // ba.clear_hash_bin(); for (int ipass = 0; ipass < 2; ++ipass) // pass 0: send; pass 1: recv { CopyComTag::MapOfCopyComTagContainers & Tags = (ipass == 0) ? *TheFB.m_SndTags : *TheFB.m_RcvTags; CopyComTag::MapOfCopyComTagContainers & tmpTags = (ipass == 0) ? send_tags : recv_tags; std::map<int,int> & Vols = (ipass == 0) ? *TheFB.m_SndVols : *TheFB.m_RcvVols; for (CopyComTag::MapOfCopyComTagContainers::iterator it = tmpTags.begin(), End = tmpTags.end(); it != End; ++it) { const int key = it->first; std::vector<CopyComTag>& cctv = it->second; // We need to fix the order so that the send and recv processes match. std::sort(cctv.begin(), cctv.end()); std::vector<CopyComTag> new_cctv; new_cctv.reserve(cctv.size()); for (std::vector<CopyComTag>::const_iterator it2 = cctv.begin(), End2 = cctv.end(); it2 != End2; ++it2) { const Box& bx = it2->box; std::vector<Box> boxes; int vol = 0; if (si.m_cross) { const Box& dstfabbx = ba[it2->fabIndex]; for (int dir = 0; dir < BL_SPACEDIM; dir++) { Box lo = dstfabbx; lo.setSmall(dir, dstfabbx.smallEnd(dir) - ng); lo.setBig (dir, dstfabbx.smallEnd(dir) - 1); lo &= bx; if (lo.ok()) { boxes.push_back(lo); vol += lo.numPts(); } Box hi = dstfabbx; hi.setSmall(dir, dstfabbx.bigEnd(dir) + 1); hi.setBig (dir, dstfabbx.bigEnd(dir) + ng); hi &= bx; if (hi.ok()) { boxes.push_back(hi); vol += hi.numPts(); } } } else { boxes.push_back(bx); vol += bx.numPts(); } Vols[key] += vol; for (std::vector<Box>::const_iterator it_bx = boxes.begin(), End_bx = boxes.end(); it_bx != End_bx; ++it_bx) { const BoxList tilelist(*it_bx, FabArrayBase::comm_tile_size); for (BoxList::const_iterator it_tile = tilelist.begin(), End_tile = tilelist.end(); it_tile != End_tile; ++it_tile) { new_cctv.push_back(CopyComTag(*it_tile, it2->fabIndex, it2->srcIndex)); } } } Tags[key].swap(new_cctv); } } return cache_it; }
FabArrayBase::CPCCacheIter FabArrayBase::TheCPC (const CPC& cpc, const FabArrayBase& dst, const FabArrayBase& src) { BL_PROFILE("FabArrayBase::TheCPC()"); BL_ASSERT(cpc.m_dstba.size() > 0 && cpc.m_srcba.size() > 0); // // We want to choose our keys wisely to minimize search time. // We'd like to distinguish between copies of the same length // but with different edgeness of boxes. We also want to // differentiate dst.copy(src) from src.copy(dst). // CPCCache& TheCopyCache = FabArrayBase::m_TheCopyCache; const IntVect& Typ = cpc.m_dstba[0].type(); const int Scale = D_TERM(Typ[0],+3*Typ[1],+5*Typ[2]) + 11; int Key = cpc.m_dstba.size() + cpc.m_srcba.size() + Scale; Key += cpc.m_dstba[0].numPts() + cpc.m_dstba[cpc.m_dstba.size()-1].numPts(); Key += cpc.m_dstdm[0] + cpc.m_dstdm[cpc.m_dstdm.size()-1]; std::pair<CPCCacheIter,CPCCacheIter> er_it = TheCopyCache.equal_range(Key); for (CPCCacheIter it = er_it.first; it != er_it.second; ++it) { if (it->second == cpc) { ++it->second.m_nuse; m_CPC_stats.recordUse(); return it; } } if (TheCopyCache.size() >= copy_cache_max_size) { // // Don't let the size of the cache get too big. // Get rid of entries with the biggest largest key that haven't been reused. // Otherwise just remove the entry with the largest key. // CPCCache::iterator End = TheCopyCache.end(); CPCCache::iterator last_it = End; CPCCache::iterator erase_it = End; for (CPCCache::iterator it = TheCopyCache.begin(); it != End; ++it) { last_it = it; if (it->second.m_nuse <= 1) erase_it = it; } if (erase_it != End) { m_CPC_stats.recordErase(erase_it->second.m_nuse); TheCopyCache.erase(erase_it); } else if (last_it != End) { m_CPC_stats.recordErase(last_it->second.m_nuse); TheCopyCache.erase(last_it); } } // // Got to insert one & then build it. // CPCCacheIter cache_it = TheCopyCache.insert(CPCCache::value_type(Key,cpc)); CPC& TheCPC = cache_it->second; const int MyProc = ParallelDescriptor::MyProc(); // // Here's where we allocate memory for the cache innards. // We do this so we don't have to build objects of these types // each time we search the cache. Otherwise we'd be constructing // and destroying said objects quite frequently. // TheCPC.m_LocTags = new CopyComTag::CopyComTagsContainer; TheCPC.m_SndTags = new CopyComTag::MapOfCopyComTagContainers; TheCPC.m_RcvTags = new CopyComTag::MapOfCopyComTagContainers; TheCPC.m_SndVols = new std::map<int,int>; TheCPC.m_RcvVols = new std::map<int,int>; TheCPC.m_nuse = 1; m_CPC_stats.recordBuild(); m_CPC_stats.recordUse(); if (dst.IndexMap().empty() && src.IndexMap().empty()) // // We don't own any of the relevant FABs so can't possibly have any work to do. // return cache_it; const BoxArray& ba_src = TheCPC.m_srcba; const DistributionMapping& dm_src = TheCPC.m_srcdm; const Array<int>& imap_src = src.IndexMap(); const int nlocal_src = imap_src.size(); const int ng_src = TheCPC.m_srcng; const BoxArray& ba_dst = TheCPC.m_dstba; const DistributionMapping& dm_dst = TheCPC.m_dstdm; const Array<int>& imap_dst = dst.IndexMap(); const int nlocal_dst = dst.IndexMap().size(); const int ng_dst = TheCPC.m_dstng; std::vector< std::pair<int,Box> > isects; CopyComTag::MapOfCopyComTagContainers send_tags; // temp copy for (int i = 0; i < nlocal_src; ++i) { const int k_src = imap_src[i]; const Box& bx_src = BoxLib::grow(ba_src[k_src], ng_src); ba_dst.intersections(bx_src, isects, ng_dst); for (int j = 0, M = isects.size(); j < M; ++j) { const int k_dst = isects[j].first; const Box& bx = isects[j].second; const int dst_owner = dm_dst[k_dst]; if (dst_owner == MyProc) continue; // local copy will be dealt with later send_tags[dst_owner].push_back(CopyComTag(bx, k_dst, k_src)); } } CopyComTag::MapOfCopyComTagContainers recv_tags; // temp copy BaseFab<int> localtouch, remotetouch; bool check_local = false, check_remote = false; #ifdef _OPENMP if (omp_get_max_threads() > 1) { check_local = true; check_remote = true; } #endif for (int i = 0; i < nlocal_dst; ++i) { const int k_dst = imap_dst[i]; const Box& bx_dst = BoxLib::grow(ba_dst[k_dst], ng_dst); if (check_local) { localtouch.resize(bx_dst); localtouch.setVal(0); } if (check_remote) { remotetouch.resize(bx_dst); remotetouch.setVal(0); } ba_src.intersections(bx_dst, isects, ng_src); for (int j = 0, M = isects.size(); j < M; ++j) { const int k_src = isects[j].first; const Box& bx = isects[j].second; const int src_owner = dm_src[k_src]; if (src_owner == MyProc) { // local copy const BoxList tilelist(bx, FabArrayBase::comm_tile_size); for (BoxList::const_iterator it_tile = tilelist.begin(), End_tile = tilelist.end(); it_tile != End_tile; ++it_tile) { TheCPC.m_LocTags->push_back(CopyComTag(*it_tile, k_dst, k_src)); } if (check_local) { localtouch.plus(1, bx); } } else { recv_tags[src_owner].push_back(CopyComTag(bx, k_dst, k_src)); if (check_remote) { remotetouch.plus(1, bx); } } } if (check_local) { // safe if a cell is touched no more than once // keep checking thread safety if it is safe so far check_local = TheCPC.m_threadsafe_loc = localtouch.max() <= 1; } if (check_remote) { check_remote = TheCPC.m_threadsafe_rcv = remotetouch.max() <= 1; } } // ba_src.clear_hash_bin(); // ba_dst.clear_hash_bin(); for (int ipass = 0; ipass < 2; ++ipass) // pass 0: send; pass 1: recv { CopyComTag::MapOfCopyComTagContainers & Tags = (ipass == 0) ? *TheCPC.m_SndTags : *TheCPC.m_RcvTags; CopyComTag::MapOfCopyComTagContainers & tmpTags = (ipass == 0) ? send_tags : recv_tags; std::map<int,int> & Vols = (ipass == 0) ? *TheCPC.m_SndVols : *TheCPC.m_RcvVols; for (CopyComTag::MapOfCopyComTagContainers::iterator it = tmpTags.begin(), End = tmpTags.end(); it != End; ++it) { const int key = it->first; std::vector<CopyComTag>& cctv = it->second; // We need to fix the order so that the send and recv processes match. std::sort(cctv.begin(), cctv.end()); std::vector<CopyComTag> new_cctv; new_cctv.reserve(cctv.size()); for (std::vector<CopyComTag>::const_iterator it2 = cctv.begin(), End2 = cctv.end(); it2 != End2; ++it2) { const Box& bx = it2->box; Vols[key] += bx.numPts(); const BoxList tilelist(bx, FabArrayBase::comm_tile_size); for (BoxList::const_iterator it_tile = tilelist.begin(), End_tile = tilelist.end(); it_tile != End_tile; ++it_tile) { new_cctv.push_back(CopyComTag(*it_tile, it2->fabIndex, it2->srcIndex)); } } Tags[key].swap(new_cctv); } } return cache_it; }