bool BoxList::operator== (const BoxList& rhs) const { if ( !(size() == rhs.size()) ) return false; BoxList::const_iterator liter = begin(), riter = rhs.begin(), End = end(); for (; liter != End; ++liter, ++riter) if ( !( *liter == *riter) ) return false; return true; }
bool BoxList::contains (const BoxList& bl) const { if (isEmpty() || bl.isEmpty()) return false; BL_ASSERT(ixType() == bl.ixType()); if (!minimalBox().contains(bl.minimalBox())) return false; BoxArray ba(*this); for (const_iterator bli = bl.begin(), End = bl.end(); bli != End; ++bli) if (!ba.contains(*bli)) return false; return true; }
FabArrayBase::FBCacheIter FabArrayBase::TheFB (bool cross, const FabArrayBase& mf) { BL_PROFILE("FabArray::TheFB"); BL_ASSERT(mf.size() > 0); const FabArrayBase::SI si(mf.boxArray(), mf.DistributionMap(), mf.nGrow(), cross); const IntVect& Typ = mf.boxArray()[0].type(); const int Scale = D_TERM(Typ[0],+3*Typ[1],+5*Typ[2]) + 11; const int Key = mf.size() + mf.boxArray()[0].numPts() + mf.nGrow() + Scale + cross; std::pair<FBCacheIter,FBCacheIter> er_it = m_TheFBCache.equal_range(Key); for (FBCacheIter it = er_it.first; it != er_it.second; ++it) { if (it->second == si) { ++it->second.m_nuse; m_FBC_stats.recordUse(); return it; } } if (m_TheFBCache.size() >= fb_cache_max_size) { // // Don't let the size of the cache get too big. // Get rid of entries with the biggest largest key that haven't been reused. // Otherwise just remove the entry with the largest key. // FBCacheIter End = m_TheFBCache.end(); FBCacheIter last_it = End; FBCacheIter erase_it = End; for (FBCacheIter it = m_TheFBCache.begin(); it != End; ++it) { last_it = it; if (it->second.m_nuse <= 1) erase_it = it; } if (erase_it != End) { m_FBC_stats.recordErase(erase_it->second.m_nuse); m_TheFBCache.erase(erase_it); } else if (last_it != End) { m_FBC_stats.recordErase(last_it->second.m_nuse); m_TheFBCache.erase(last_it); } } // // Got to insert one & then build it. // FBCacheIter cache_it = m_TheFBCache.insert(FBCache::value_type(Key,si)); SI& TheFB = cache_it->second; const int MyProc = ParallelDescriptor::MyProc(); const BoxArray& ba = mf.boxArray(); const DistributionMapping& dm = mf.DistributionMap(); const Array<int>& imap = mf.IndexMap(); // // Here's where we allocate memory for the cache innards. // We do this so we don't have to build objects of these types // each time we search the cache. Otherwise we'd be constructing // and destroying said objects quite frequently. // TheFB.m_LocTags = new CopyComTag::CopyComTagsContainer; TheFB.m_SndTags = new CopyComTag::MapOfCopyComTagContainers; TheFB.m_RcvTags = new CopyComTag::MapOfCopyComTagContainers; TheFB.m_SndVols = new std::map<int,int>; TheFB.m_RcvVols = new std::map<int,int>; TheFB.m_nuse = 1; m_FBC_stats.recordBuild(); m_FBC_stats.recordUse(); if (imap.empty()) // // We don't own any of the relevant FABs so can't possibly have any work to do. // return cache_it; const int nlocal = imap.size(); const int ng = si.m_ngrow; std::vector< std::pair<int,Box> > isects; CopyComTag::MapOfCopyComTagContainers send_tags; // temp copy for (int i = 0; i < nlocal; ++i) { const int ksnd = imap[i]; const Box& vbx = ba[ksnd]; ba.intersections(vbx, isects, ng); for (int j = 0, M = isects.size(); j < M; ++j) { const int krcv = isects[j].first; const Box& bx = isects[j].second; const int dst_owner = dm[krcv]; if (krcv == ksnd) continue; // same box if (dst_owner == MyProc) continue; // local copy will be dealt with later send_tags[dst_owner].push_back(CopyComTag(bx, krcv, ksnd)); } } CopyComTag::MapOfCopyComTagContainers recv_tags; // temp copy BaseFab<int> localtouch, remotetouch; bool check_local = false, check_remote = false; #ifdef _OPENMP if (omp_get_max_threads() > 1) { check_local = true; check_remote = true; } #endif if (ba.ixType().cellCentered()) { TheFB.m_threadsafe_loc = true; TheFB.m_threadsafe_rcv = true; check_local = false; check_remote = false; } for (int i = 0; i < nlocal; ++i) { const int krcv = imap[i]; const Box& bxrcv = BoxLib::grow(ba[krcv], ng); if (check_local) { localtouch.resize(bxrcv); localtouch.setVal(0); } if (check_remote) { remotetouch.resize(bxrcv); remotetouch.setVal(0); } ba.intersections(bxrcv, isects); for (int j = 0, M = isects.size(); j < M; ++j) { const int ksnd = isects[j].first; const Box& bx = isects[j].second; const int src_owner = dm[ksnd]; if (krcv == ksnd) continue; // same box if (src_owner == MyProc) { // local copy const BoxList tilelist(bx, FabArrayBase::comm_tile_size); for (BoxList::const_iterator it_tile = tilelist.begin(), End_tile = tilelist.end(); it_tile != End_tile; ++it_tile) { TheFB.m_LocTags->push_back(CopyComTag(*it_tile, krcv, ksnd)); } if (check_local) { localtouch.plus(1, bx); } } else { recv_tags[src_owner].push_back(CopyComTag(bx, krcv, ksnd)); if (check_remote) { remotetouch.plus(1, bx); } } } if (check_local) { // safe if a cell is touched no more than once // keep checking thread safety if it is safe so far check_local = TheFB.m_threadsafe_loc = localtouch.max() <= 1; } if (check_remote) { check_remote = TheFB.m_threadsafe_rcv = remotetouch.max() <= 1; } } // ba.clear_hash_bin(); for (int ipass = 0; ipass < 2; ++ipass) // pass 0: send; pass 1: recv { CopyComTag::MapOfCopyComTagContainers & Tags = (ipass == 0) ? *TheFB.m_SndTags : *TheFB.m_RcvTags; CopyComTag::MapOfCopyComTagContainers & tmpTags = (ipass == 0) ? send_tags : recv_tags; std::map<int,int> & Vols = (ipass == 0) ? *TheFB.m_SndVols : *TheFB.m_RcvVols; for (CopyComTag::MapOfCopyComTagContainers::iterator it = tmpTags.begin(), End = tmpTags.end(); it != End; ++it) { const int key = it->first; std::vector<CopyComTag>& cctv = it->second; // We need to fix the order so that the send and recv processes match. std::sort(cctv.begin(), cctv.end()); std::vector<CopyComTag> new_cctv; new_cctv.reserve(cctv.size()); for (std::vector<CopyComTag>::const_iterator it2 = cctv.begin(), End2 = cctv.end(); it2 != End2; ++it2) { const Box& bx = it2->box; std::vector<Box> boxes; int vol = 0; if (si.m_cross) { const Box& dstfabbx = ba[it2->fabIndex]; for (int dir = 0; dir < BL_SPACEDIM; dir++) { Box lo = dstfabbx; lo.setSmall(dir, dstfabbx.smallEnd(dir) - ng); lo.setBig (dir, dstfabbx.smallEnd(dir) - 1); lo &= bx; if (lo.ok()) { boxes.push_back(lo); vol += lo.numPts(); } Box hi = dstfabbx; hi.setSmall(dir, dstfabbx.bigEnd(dir) + 1); hi.setBig (dir, dstfabbx.bigEnd(dir) + ng); hi &= bx; if (hi.ok()) { boxes.push_back(hi); vol += hi.numPts(); } } } else { boxes.push_back(bx); vol += bx.numPts(); } Vols[key] += vol; for (std::vector<Box>::const_iterator it_bx = boxes.begin(), End_bx = boxes.end(); it_bx != End_bx; ++it_bx) { const BoxList tilelist(*it_bx, FabArrayBase::comm_tile_size); for (BoxList::const_iterator it_tile = tilelist.begin(), End_tile = tilelist.end(); it_tile != End_tile; ++it_tile) { new_cctv.push_back(CopyComTag(*it_tile, it2->fabIndex, it2->srcIndex)); } } } Tags[key].swap(new_cctv); } } return cache_it; }
void AuxBoundaryData::initialize (const BoxArray& ba, int n_grow, int n_comp, const Geometry& geom) { BL_ASSERT(!m_initialized); const bool verbose = false; const int NProcs = ParallelDescriptor::NProcs(); const Real strt_time = ParallelDescriptor::second(); m_ngrow = n_grow; BoxList gcells = BoxLib::GetBndryCells(ba,n_grow); // // Remove any intersections with periodically shifted valid region. // if (geom.isAnyPeriodic()) { Box dmn = geom.Domain(); for (int d = 0; d < BL_SPACEDIM; d++) if (!geom.isPeriodic(d)) dmn.grow(d,n_grow); for (BoxList::iterator it = gcells.begin(); it != gcells.end(); ) { const Box& isect = *it & dmn; if (isect.ok()) { *it++ = isect; } else { gcells.remove(it++); } } } gcells.simplify(); if (gcells.size() < NProcs) { gcells.maxSize(BL_SPACEDIM == 3 ? 64 : 128); } BoxArray nba(gcells); gcells.clear(); if (nba.size() > 0) { m_fabs.define(nba, n_comp, 0, Fab_allocate); } else { m_empty = true; } if (verbose) { const int IOProc = ParallelDescriptor::IOProcessorNumber(); Real run_time = ParallelDescriptor::second() - strt_time; const int sz = nba.size(); #ifdef BL_LAZY Lazy::QueueReduction( [=] () mutable { #endif ParallelDescriptor::ReduceRealMax(run_time,IOProc); if (ParallelDescriptor::IOProcessor()) std::cout << "AuxBoundaryData::initialize() size = " << sz << ", time = " << run_time << '\n'; #ifdef BL_LAZY }); #endif } m_initialized = true; }
void MFGhostIter::Initialize () { int rit = 0; int nworkers = 1; #ifdef BL_USE_TEAM if (ParallelDescriptor::TeamSize() > 1) { rit = ParallelDescriptor::MyRankInTeam(); nworkers = ParallelDescriptor::TeamSize(); } #endif int tid = 0; int nthreads = 1; #ifdef _OPENMP nthreads = omp_get_num_threads(); if (nthreads > 1) tid = omp_get_thread_num(); #endif int npes = nworkers*nthreads; int pid = rit*nthreads+tid; BoxList alltiles; Array<int> allindex; Array<int> alllocalindex; for (int i=0; i < fabArray.IndexMap().size(); ++i) { int K = fabArray.IndexMap()[i]; const Box& vbx = fabArray.box(K); const Box& fbx = fabArray.fabbox(K); const BoxList& diff = BoxLib::boxDiff(fbx, vbx); for (BoxList::const_iterator bli = diff.begin(); bli != diff.end(); ++bli) { BoxList tiles(*bli, FabArrayBase::mfghostiter_tile_size); int nt = tiles.size(); for (int it=0; it<nt; ++it) { allindex.push_back(K); alllocalindex.push_back(i); } alltiles.catenate(tiles); } } int n_tot_tiles = alltiles.size(); int navg = n_tot_tiles / npes; int nleft = n_tot_tiles - navg*npes; int ntiles = navg; if (pid < nleft) ntiles++; // how many tiles should we skip? int nskip = pid*navg + std::min(pid,nleft); BoxList::const_iterator bli = alltiles.begin(); for (int i=0; i<nskip; ++i) ++bli; lta.indexMap.reserve(ntiles); lta.localIndexMap.reserve(ntiles); lta.tileArray.reserve(ntiles); for (int i=0; i<ntiles; ++i) { lta.indexMap.push_back(allindex[i+nskip]); lta.localIndexMap.push_back(alllocalindex[i+nskip]); lta.tileArray.push_back(*bli++); } currentIndex = beginIndex = 0; endIndex = lta.indexMap.size(); lta.nuse = 0; index_map = &(lta.indexMap); local_index_map = &(lta.localIndexMap); tile_array = &(lta.tileArray); }