Exemple #1
0
double
Box::d_numPts () const
{
    BL_ASSERT(ok());

    return D_TERM(double(length(0)), *double(length(1)), *double(length(2)));
}
Exemple #2
0
void
CellBilinear::interp (const FArrayBox&  crse,
                      int               crse_comp,
                      FArrayBox&        fine,
                      int               fine_comp,
                      int               ncomp,
                      const Box&        fine_region,
                      const IntVect &   ratio,
                      const Geometry& /*crse_geom*/,
                      const Geometry& /*fine_geom*/,
                      Array<BCRec>&   /*bcr*/,
                      int               actual_comp,
                      int               actual_state)
{
    BL_PROFILE("CellBilinear::interp()");
#if (BL_SPACEDIM == 3)
    BoxLib::Error("interp: not implemented");
#endif
    //
    // Set up to call FORTRAN.
    //
    const int* clo = crse.box().loVect();
    const int* chi = crse.box().hiVect();
    const int* flo = fine.loVect();
    const int* fhi = fine.hiVect();
    const int* lo  = fine_region.loVect();
    const int* hi  = fine_region.hiVect();
    int num_slope  = D_TERM(2,*2,*2)-1;
    int len0       = crse.box().length(0);
    int slp_len    = num_slope*len0;

    Array<Real> slope(slp_len);

    int strp_len = len0*ratio[0];

    Array<Real> strip(strp_len);

    int strip_lo = ratio[0] * clo[0];
    int strip_hi = ratio[0] * chi[0];

    const Real* cdat  = crse.dataPtr(crse_comp);
    Real*       fdat  = fine.dataPtr(fine_comp);
    const int* ratioV = ratio.getVect();

    FORT_CBINTERP (cdat,ARLIM(clo),ARLIM(chi),ARLIM(clo),ARLIM(chi),
                   fdat,ARLIM(flo),ARLIM(fhi),ARLIM(lo),ARLIM(hi),
                   D_DECL(&ratioV[0],&ratioV[1],&ratioV[2]),&ncomp,
                   slope.dataPtr(),&num_slope,strip.dataPtr(),&strip_lo,&strip_hi,
                   &actual_comp,&actual_state);
}
Exemple #3
0
void
NodeBilinear::interp (const FArrayBox&  crse,
                      int               crse_comp,
                      FArrayBox&        fine,
                      int               fine_comp,
                      int               ncomp,
                      const Box&        fine_region,
                      const IntVect&    ratio,
                      const Geometry& /*crse_geom */,
                      const Geometry& /*fine_geom */,
                      Array<BCRec>&   /*bcr*/,
                      int               actual_comp,
                      int               actual_state)
{
    BL_PROFILE("NodeBilinear::interp()");
    //
    // Set up to call FORTRAN.
    //
    const int* clo = crse.box().loVect();
    const int* chi = crse.box().hiVect();
    const int* flo = fine.loVect();
    const int* fhi = fine.hiVect();
    const int* lo  = fine_region.loVect();
    const int* hi  = fine_region.hiVect();
    int num_slope  = D_TERM(2,*2,*2)-1;
    int len0       = crse.box().length(0);
    int slp_len    = num_slope*len0;

    Array<Real> strip(slp_len);

    const Real* cdat  = crse.dataPtr(crse_comp);
    Real*       fdat  = fine.dataPtr(fine_comp);
    const int* ratioV = ratio.getVect();

    FORT_NBINTERP (cdat,ARLIM(clo),ARLIM(chi),ARLIM(clo),ARLIM(chi),
                   fdat,ARLIM(flo),ARLIM(fhi),ARLIM(lo),ARLIM(hi),
                   D_DECL(&ratioV[0],&ratioV[1],&ratioV[2]),&ncomp,
                   strip.dataPtr(),&num_slope,&actual_comp,&actual_state);
}
Exemple #4
0
int
ParticleBase::CIC_Cells_Fracs (const ParticleBase& p,
                               const Real*         plo,
                               const Real*         dx_geom,
                               const Real*         dx_part,
                               Array<Real>&        fracs,
                               Array<IntVect>&     cells)
{
    if (dx_geom == dx_part)
    {
        const int M = D_TERM(2,+2,+4);
        fracs.resize(M);
        cells.resize(M);
        ParticleBase::CIC_Cells_Fracs_Basic(p,plo,dx_geom,fracs.dataPtr(),cells.dataPtr());
        return M;
    }
    //
    // The first element in fracs and cells is the lowest corner, the last is the highest.
    //
    const Real hilen[BL_SPACEDIM] = { D_DECL((p.m_pos[0]-plo[0]+dx_part[0]/2)/dx_geom[0],
                                             (p.m_pos[1]-plo[1]+dx_part[1]/2)/dx_geom[1],
                                             (p.m_pos[2]-plo[2]+dx_part[2]/2)/dx_geom[2]) };

    const Real lolen[BL_SPACEDIM] = { D_DECL((p.m_pos[0]-plo[0]-dx_part[0]/2)/dx_geom[0],
                                             (p.m_pos[1]-plo[1]-dx_part[1]/2)/dx_geom[1],
                                             (p.m_pos[2]-plo[2]-dx_part[2]/2)/dx_geom[2]) };

    const IntVect hicell(D_DECL(floor(hilen[0]), floor(hilen[1]), floor(hilen[2])));
    
    const IntVect locell(D_DECL(floor(lolen[0]), floor(lolen[1]), floor(lolen[2])));
    
    const Real cell_density = D_TERM(dx_geom[0]/dx_part[0],*dx_geom[1]/dx_part[1],*dx_geom[2]/dx_part[2]);
    
    const int M = D_TERM((hicell[0]-locell[0]+1),*(hicell[1]-locell[1]+1),*(hicell[2]-locell[2]+1));

    fracs.resize(M);
    cells.resize(M);
    //
    // This portion might be slightly inefficient. Feel free to redo it if need be.
    //
    int i = 0;
#if (BL_SPACEDIM == 1)
    for (int xi = locell[0]; xi <= hicell[0]; xi++)
    {
        cells[i][0] = xi;
        fracs[i] = (std::min(hilen[0]-xi,Real(1))-std::max(lolen[0]-xi,Real(0)))*cell_density;
        i++;
    }
#elif (BL_SPACEDIM == 2)
    for (int yi = locell[1]; yi <= hicell[1]; yi++)
    {
        const Real yf = std::min(hilen[1]-yi,Real(1))-std::max(lolen[1]-yi,Real(0));
        for (int xi = locell[0]; xi <= hicell[0]; xi ++)
        {
            cells[i][0] = xi;
            cells[i][1] = yi;
            fracs[i] = yf * (std::min(hilen[0]-xi,Real(1))-std::max(lolen[0]-xi,Real(0)))*cell_density;
            i++;
        }
    }
#elif (BL_SPACEDIM == 3)
    for (int zi = locell[2]; zi <= hicell[2]; zi++)
    {
        const Real zf = std::min(hilen[2]-zi,Real(1))-std::max(lolen[2]-zi,Real(0));
        for (int yi = locell[1]; yi <= hicell[1]; yi++)
        {
            const Real yf = std::min(hilen[1]-yi,Real(1))-std::max(lolen[1]-yi,Real(0));
            for (int xi = locell[0]; xi <= hicell[0]; xi++)
            {
                cells[i][0] = xi;
                cells[i][1] = yi;
                cells[i][2] = zi;
                fracs[i] = zf * yf * (std::min(hilen[0]-xi,Real(1))-std::max(lolen[0]-xi,Real(0))) * cell_density;
                i++;
            }
        }
    }
#endif

    return M;
}
PetscErrorCode plotAll( Vector<LevelData<FArrayBox> *> &a_phi,
                        Vector<LevelData<FArrayBox> *> &a_rhs,
                        Vector<RefCountedPtr<LevelData<FArrayBox> > > &a_exact,
                        Real a_errNorm[2], string a_fname, Real a_cdx,
                        Vector<DisjointBoxLayout> &a_grids,
                        Vector<int> &a_refratios,
                        Vector<ProblemDomain> &a_domains,
                        PetscCompGridPois &a_petscop,
                        Vec a_x,
                        int a_sub_id = -1 )
{
  CH_TIME("plotAll");
  int nLev = a_phi.size();
  PetscErrorCode ierr;
  Vector<LevelData<FArrayBox>* > plotData(nLev, NULL);
  
  if ( a_x )
    {
      ierr = a_petscop.putPetscInChombo(a_x,a_phi); CHKERRQ(ierr);
    }

  for (int ilev=0;ilev<nLev;ilev++) 
    {      
      plotData[ilev] = new LevelData<FArrayBox>(a_grids[ilev],4*COMP_POIS_DOF,IntVect::Unit);
    }

  a_errNorm[0] = a_errNorm[1] = 0;
  Real dx = a_cdx;
  for (int ilev=0;ilev<nLev;ilev++,dx/=s_refRatio) 
    {
      Interval phiInterval(0,COMP_POIS_DOF-1);
      a_phi[ilev]->copyTo(phiInterval, *plotData[ilev], phiInterval);
      Interval rhsInterval(COMP_POIS_DOF,2*COMP_POIS_DOF-1);
      a_rhs[ilev]->copyTo(phiInterval, *plotData[ilev], rhsInterval);
      Interval exInterval(2*COMP_POIS_DOF,3*COMP_POIS_DOF-1);
      a_exact[ilev]->copyTo(phiInterval, *plotData[ilev], exInterval);
      // use phi for error
      const DisjointBoxLayout& dbl = a_grids[ilev];
      for (DataIterator dit(dbl); dit.ok(); ++dit)
        {
          FArrayBox& exactfab = (*a_exact[ilev])[dit];
          FArrayBox& phiFAB = (*a_phi[ilev])[dit];
          Box region = exactfab.box();
          for (BoxIterator bit(region); bit.ok(); ++bit)
            {
              IntVect iv = bit();
              for (int i=0;i<COMP_POIS_DOF;i++)
                phiFAB(iv,i) = phiFAB(iv,i) - exactfab(iv,i);
            }
        }
      
      // zero error on covered
      if (ilev!=nLev-1) 
        {
          const DisjointBoxLayout& dbl = a_grids[ilev];
          // zero out fine cover
          DisjointBoxLayout dblCoarsenedFine;
          Copier copier;
          coarsen(dblCoarsenedFine, a_grids[ilev+1], a_refratios[ilev]); // coarsens entire grid
          copier.define(dblCoarsenedFine, dbl, IntVect::Zero);
          LevelDataOps<FArrayBox> ops;
          ops.copyToZero(*a_phi[ilev],copier);
        }

      // copy in
      Interval errInterval(3*COMP_POIS_DOF,4*COMP_POIS_DOF-1);
      a_phi[ilev]->copyTo(phiInterval, *plotData[ilev], errInterval);

      // get error norms
      for (DataIterator dit(dbl); dit.ok(); ++dit)
        {
          Box region = dbl[dit];
          FArrayBox& phifab = (*a_phi[ilev])[dit];
          Real mnorm = phifab.norm(region,0);
          if (mnorm>a_errNorm[0]) a_errNorm[0] = mnorm;
          mnorm = phifab.norm(region,1)*D_TERM(dx,*dx,*dx);
          a_errNorm[1] += mnorm;
        }
    }
  {
    double error;
#ifdef CH_MPI
    MPI_Allreduce( &a_errNorm[0], &error, 1, MPI_DOUBLE, MPI_MAX, PETSC_COMM_WORLD );
    a_errNorm[0] = error;
#endif

#ifdef CH_MPI
    MPI_Allreduce( &a_errNorm[1], &error, 1, MPI_DOUBLE, MPI_SUM, PETSC_COMM_WORLD );
    a_errNorm[1] = error;
#endif
  }

  pout() << "\t\t plot |error|_inf=" << a_errNorm[0] << endl;
  
  // plot
  if (true){  
    CH_TIME("plot");
    char suffix[30];
    if (a_sub_id>=0) sprintf(suffix, "%dd.%d.hdf5",SpaceDim,a_sub_id);
    else sprintf(suffix, "%dd.hdf5",SpaceDim);
    a_fname += suffix;
    Vector<string> varNames(4*COMP_POIS_DOF);
    int kk=0;
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "phi ";
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "rhs ";
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "exa ";
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk] = "err ";
    kk=0;
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i;
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i;
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i;
    for (int i=0; i<COMP_POIS_DOF; ++i,kk++) varNames[kk][3] = '1' + i;

    Real bogusVal = 1.0;
    WriteAMRHierarchyHDF5(a_fname,
                          a_grids,
                          plotData,
                          varNames,
                          a_domains[0].domainBox(),
                          a_cdx,
                          bogusVal,
                          bogusVal,
                          a_refratios,
                          nLev);
  }

  for (int ilev=0;ilev<nLev;ilev++) 
    {
      delete plotData[ilev];
    }

  PetscFunctionReturn(0);
}
Exemple #6
0
FabArrayBase::FBCacheIter
FabArrayBase::TheFB (bool                cross,
                     const FabArrayBase& mf)
{
    BL_PROFILE("FabArray::TheFB");

    BL_ASSERT(mf.size() > 0);

    const FabArrayBase::SI si(mf.boxArray(), mf.DistributionMap(), mf.nGrow(), cross);

    const IntVect& Typ   = mf.boxArray()[0].type();
    const int      Scale = D_TERM(Typ[0],+3*Typ[1],+5*Typ[2]) + 11;
    const int      Key   = mf.size() + mf.boxArray()[0].numPts() + mf.nGrow() + Scale + cross;

    std::pair<FBCacheIter,FBCacheIter> er_it = m_TheFBCache.equal_range(Key);

    for (FBCacheIter it = er_it.first; it != er_it.second; ++it)
    {
        if (it->second == si)
        {
	    ++it->second.m_nuse;
	    m_FBC_stats.recordUse();
            return it;
        }
    }

    if (m_TheFBCache.size() >= fb_cache_max_size)
    {
        //
        // Don't let the size of the cache get too big.
        // Get rid of entries with the biggest largest key that haven't been reused.
        // Otherwise just remove the entry with the largest key.
        //
        FBCacheIter End      = m_TheFBCache.end();
        FBCacheIter last_it  = End;
        FBCacheIter erase_it = End;

        for (FBCacheIter it = m_TheFBCache.begin(); it != End; ++it)
        {
            last_it = it;

            if (it->second.m_nuse <= 1)
                erase_it = it;
        }

        if (erase_it != End)
        {
	    m_FBC_stats.recordErase(erase_it->second.m_nuse);
            m_TheFBCache.erase(erase_it);
        }
        else if (last_it != End)
        {
	    m_FBC_stats.recordErase(last_it->second.m_nuse);
	    m_TheFBCache.erase(last_it);
        }
    }
    //
    // Got to insert one & then build it.
    //
    FBCacheIter                cache_it = m_TheFBCache.insert(FBCache::value_type(Key,si));
    SI&                        TheFB    = cache_it->second;
    const int                  MyProc   = ParallelDescriptor::MyProc();
    const BoxArray&            ba       = mf.boxArray();
    const DistributionMapping& dm       = mf.DistributionMap();
    const Array<int>&          imap     = mf.IndexMap();
    //
    // Here's where we allocate memory for the cache innards.
    // We do this so we don't have to build objects of these types
    // each time we search the cache.  Otherwise we'd be constructing
    // and destroying said objects quite frequently.
    //
    TheFB.m_LocTags = new CopyComTag::CopyComTagsContainer;
    TheFB.m_SndTags = new CopyComTag::MapOfCopyComTagContainers;
    TheFB.m_RcvTags = new CopyComTag::MapOfCopyComTagContainers;
    TheFB.m_SndVols = new std::map<int,int>;
    TheFB.m_RcvVols = new std::map<int,int>;

    TheFB.m_nuse = 1;

    m_FBC_stats.recordBuild();
    m_FBC_stats.recordUse();

    if (imap.empty())
        //
        // We don't own any of the relevant FABs so can't possibly have any work to do.
        //
        return cache_it;

    const int nlocal = imap.size();
    const int ng = si.m_ngrow;
    std::vector< std::pair<int,Box> > isects;

    CopyComTag::MapOfCopyComTagContainers send_tags; // temp copy

    for (int i = 0; i < nlocal; ++i)
    {
	const int ksnd = imap[i];
	const Box& vbx = ba[ksnd];

	ba.intersections(vbx, isects, ng);

	for (int j = 0, M = isects.size(); j < M; ++j)
	{
	    const int krcv      = isects[j].first;
	    const Box& bx       = isects[j].second;
	    const int dst_owner = dm[krcv];

	    if (krcv == ksnd) continue;  // same box

	    if (dst_owner == MyProc) continue;  // local copy will be dealt with later

	    send_tags[dst_owner].push_back(CopyComTag(bx, krcv, ksnd));
	}
    }

    CopyComTag::MapOfCopyComTagContainers recv_tags; // temp copy

    BaseFab<int> localtouch, remotetouch;
    bool check_local = false, check_remote = false;
#ifdef _OPENMP
    if (omp_get_max_threads() > 1) {
        check_local = true;
        check_remote = true;
    }
#endif

    if (ba.ixType().cellCentered()) {
	TheFB.m_threadsafe_loc = true;
	TheFB.m_threadsafe_rcv = true;
        check_local = false;
        check_remote = false;
    }

    for (int i = 0; i < nlocal; ++i)
    {
	const int   krcv = imap[i];
	const Box& bxrcv = BoxLib::grow(ba[krcv], ng);

	if (check_local) {
	    localtouch.resize(bxrcv);
	    localtouch.setVal(0);
	}

	if (check_remote) {
	    remotetouch.resize(bxrcv);
	    remotetouch.setVal(0);
	}

	ba.intersections(bxrcv, isects);

	for (int j = 0, M = isects.size(); j < M; ++j)
	{
	    const int ksnd      = isects[j].first;
	    const Box& bx       = isects[j].second;
	    const int src_owner = dm[ksnd];

	    if (krcv == ksnd) continue;  // same box

	    if (src_owner == MyProc) { // local copy
		const BoxList tilelist(bx, FabArrayBase::comm_tile_size);
		for (BoxList::const_iterator
			 it_tile  = tilelist.begin(),
			 End_tile = tilelist.end();   it_tile != End_tile; ++it_tile)
		{
		    TheFB.m_LocTags->push_back(CopyComTag(*it_tile, krcv, ksnd));
		}
		if (check_local) {
		    localtouch.plus(1, bx);
		}
	    } else {
		recv_tags[src_owner].push_back(CopyComTag(bx, krcv, ksnd));
		if (check_remote) {
		    remotetouch.plus(1, bx);
		}
	    }
	}

	if (check_local) {  
	    // safe if a cell is touched no more than once 
	    // keep checking thread safety if it is safe so far
            check_local = TheFB.m_threadsafe_loc = localtouch.max() <= 1;
        }

	if (check_remote) {
            check_remote = TheFB.m_threadsafe_rcv = remotetouch.max() <= 1;
        }
    }

//    ba.clear_hash_bin();

    for (int ipass = 0; ipass < 2; ++ipass) // pass 0: send; pass 1: recv
    {
	CopyComTag::MapOfCopyComTagContainers & Tags
	    = (ipass == 0) ? *TheFB.m_SndTags : *TheFB.m_RcvTags;
	CopyComTag::MapOfCopyComTagContainers & tmpTags
	    = (ipass == 0) ?        send_tags :        recv_tags;
	std::map<int,int> & Vols
	    = (ipass == 0) ? *TheFB.m_SndVols : *TheFB.m_RcvVols;

	for (CopyComTag::MapOfCopyComTagContainers::iterator 
		 it  = tmpTags.begin(), 
		 End = tmpTags.end();   it != End; ++it)
	{
	    const int key = it->first;
	    std::vector<CopyComTag>& cctv = it->second;

	    // We need to fix the order so that the send and recv processes match.
	    std::sort(cctv.begin(), cctv.end());

	    std::vector<CopyComTag> new_cctv;
	    new_cctv.reserve(cctv.size());

	    for (std::vector<CopyComTag>::const_iterator 
		     it2  = cctv.begin(),
		     End2 = cctv.end();   it2 != End2; ++it2)
	    {
		const Box& bx = it2->box;

		std::vector<Box> boxes;
		int vol = 0;

		if (si.m_cross) {
		    const Box& dstfabbx = ba[it2->fabIndex];
		    for (int dir = 0; dir < BL_SPACEDIM; dir++)
	            {
			Box lo = dstfabbx;
			lo.setSmall(dir, dstfabbx.smallEnd(dir) - ng);
			lo.setBig  (dir, dstfabbx.smallEnd(dir) - 1);
			lo &= bx;
			if (lo.ok()) {
			    boxes.push_back(lo);
			    vol += lo.numPts();
			}

			Box hi = dstfabbx;
			hi.setSmall(dir, dstfabbx.bigEnd(dir) + 1);
			hi.setBig  (dir, dstfabbx.bigEnd(dir) + ng);
			hi &= bx;
			if (hi.ok()) {
			    boxes.push_back(hi);
			    vol += hi.numPts();
			}
		    }
		} else {
		    boxes.push_back(bx);
		    vol += bx.numPts();
		}

		Vols[key] += vol;

		for (std::vector<Box>::const_iterator 
			 it_bx  = boxes.begin(),
			 End_bx = boxes.end();    it_bx != End_bx; ++it_bx)
	        {
		    const BoxList tilelist(*it_bx, FabArrayBase::comm_tile_size);
		    for (BoxList::const_iterator 
			     it_tile  = tilelist.begin(), 
			     End_tile = tilelist.end();   it_tile != End_tile; ++it_tile)
                    {
			new_cctv.push_back(CopyComTag(*it_tile, it2->fabIndex, it2->srcIndex));
		    }
		}
	    }

	    Tags[key].swap(new_cctv);
	}
    }

    return cache_it;
}
Exemple #7
0
FabArrayBase::CPCCacheIter
FabArrayBase::TheCPC (const CPC&          cpc,
                      const FabArrayBase& dst,
                      const FabArrayBase& src)
{
    BL_PROFILE("FabArrayBase::TheCPC()");

    BL_ASSERT(cpc.m_dstba.size() > 0 && cpc.m_srcba.size() > 0);
    //
    // We want to choose our keys wisely to minimize search time.
    // We'd like to distinguish between copies of the same length
    // but with different edgeness of boxes.  We also want to
    // differentiate dst.copy(src) from src.copy(dst).
    //
    CPCCache&      TheCopyCache = FabArrayBase::m_TheCopyCache;
    const IntVect& Typ          = cpc.m_dstba[0].type();
    const int      Scale        = D_TERM(Typ[0],+3*Typ[1],+5*Typ[2]) + 11;

    int Key = cpc.m_dstba.size() + cpc.m_srcba.size() + Scale;
    Key    += cpc.m_dstba[0].numPts() + cpc.m_dstba[cpc.m_dstba.size()-1].numPts();
    Key    += cpc.m_dstdm[0] + cpc.m_dstdm[cpc.m_dstdm.size()-1];

    std::pair<CPCCacheIter,CPCCacheIter> er_it = TheCopyCache.equal_range(Key);

    for (CPCCacheIter it = er_it.first; it != er_it.second; ++it)
    {
        if (it->second == cpc)
        {
	    ++it->second.m_nuse;
	    m_CPC_stats.recordUse();
            return it;
        }
    }

    if (TheCopyCache.size() >= copy_cache_max_size)
    {
        //
        // Don't let the size of the cache get too big.
        // Get rid of entries with the biggest largest key that haven't been reused.
        // Otherwise just remove the entry with the largest key.
        //
        CPCCache::iterator End      = TheCopyCache.end();
        CPCCache::iterator last_it  = End;
        CPCCache::iterator erase_it = End;

        for (CPCCache::iterator it = TheCopyCache.begin(); it != End; ++it)
        {
            last_it = it;

            if (it->second.m_nuse <= 1)
                erase_it = it;
        }

        if (erase_it != End)
        {
	    m_CPC_stats.recordErase(erase_it->second.m_nuse);
            TheCopyCache.erase(erase_it);
        }
        else if (last_it != End)
        {
	    m_CPC_stats.recordErase(last_it->second.m_nuse);
            TheCopyCache.erase(last_it);
        }
    }
    //
    // Got to insert one & then build it.
    //
    CPCCacheIter cache_it = TheCopyCache.insert(CPCCache::value_type(Key,cpc));
    CPC&         TheCPC   = cache_it->second;
    const int    MyProc   = ParallelDescriptor::MyProc();
    //
    // Here's where we allocate memory for the cache innards.
    // We do this so we don't have to build objects of these types
    // each time we search the cache.  Otherwise we'd be constructing
    // and destroying said objects quite frequently.
    //
    TheCPC.m_LocTags = new CopyComTag::CopyComTagsContainer;
    TheCPC.m_SndTags = new CopyComTag::MapOfCopyComTagContainers;
    TheCPC.m_RcvTags = new CopyComTag::MapOfCopyComTagContainers;
    TheCPC.m_SndVols = new std::map<int,int>;
    TheCPC.m_RcvVols = new std::map<int,int>;

    TheCPC.m_nuse = 1;

    m_CPC_stats.recordBuild();
    m_CPC_stats.recordUse();

    if (dst.IndexMap().empty() && src.IndexMap().empty())
        //
        // We don't own any of the relevant FABs so can't possibly have any work to do.
        //
        return cache_it;

    const BoxArray& ba_src = TheCPC.m_srcba;
    const DistributionMapping& dm_src = TheCPC.m_srcdm;
    const Array<int>& imap_src = src.IndexMap();
    const int nlocal_src = imap_src.size();
    const int ng_src = TheCPC.m_srcng;

    const BoxArray& ba_dst = TheCPC.m_dstba;
    const DistributionMapping& dm_dst = TheCPC.m_dstdm;
    const Array<int>& imap_dst = dst.IndexMap();
    const int nlocal_dst = dst.IndexMap().size();
    const int ng_dst = TheCPC.m_dstng;

    std::vector< std::pair<int,Box> > isects;

    CopyComTag::MapOfCopyComTagContainers send_tags; // temp copy

    for (int i = 0; i < nlocal_src; ++i)
    {
	const int   k_src = imap_src[i];
	const Box& bx_src = BoxLib::grow(ba_src[k_src], ng_src);

	ba_dst.intersections(bx_src, isects, ng_dst);

	for (int j = 0, M = isects.size(); j < M; ++j)
        {
	    const int k_dst     = isects[j].first;
	    const Box& bx       = isects[j].second;
	    const int dst_owner = dm_dst[k_dst];

	    if (dst_owner == MyProc) continue; // local copy will be dealt with later
	    
	    send_tags[dst_owner].push_back(CopyComTag(bx, k_dst, k_src));
	}
    }

    CopyComTag::MapOfCopyComTagContainers recv_tags; // temp copy

    BaseFab<int> localtouch, remotetouch;
    bool check_local = false, check_remote = false;
#ifdef _OPENMP
    if (omp_get_max_threads() > 1) {
        check_local = true;
        check_remote = true;
    }
#endif    

    for (int i = 0; i < nlocal_dst; ++i)
    {
	const int   k_dst = imap_dst[i];
	const Box& bx_dst = BoxLib::grow(ba_dst[k_dst], ng_dst);

	if (check_local) {
	    localtouch.resize(bx_dst);
	    localtouch.setVal(0);
	}

	if (check_remote) {
	    remotetouch.resize(bx_dst);
	    remotetouch.setVal(0);
	}

	ba_src.intersections(bx_dst, isects, ng_src);

	for (int j = 0, M = isects.size(); j < M; ++j)
        {
	    const int k_src     = isects[j].first;
	    const Box& bx       = isects[j].second;
	    const int src_owner = dm_src[k_src];

	    if (src_owner == MyProc) { // local copy
		const BoxList tilelist(bx, FabArrayBase::comm_tile_size);
		for (BoxList::const_iterator
			 it_tile  = tilelist.begin(),
			 End_tile = tilelist.end();   it_tile != End_tile; ++it_tile)
		{
		    TheCPC.m_LocTags->push_back(CopyComTag(*it_tile, k_dst, k_src));
		}
		if (check_local) {
		    localtouch.plus(1, bx);
		}
	    } else {
		recv_tags[src_owner].push_back(CopyComTag(bx, k_dst, k_src));
		if (check_remote) {
		    remotetouch.plus(1, bx);
		}
	    }
	}

	if (check_local) {  
	    // safe if a cell is touched no more than once 
	    // keep checking thread safety if it is safe so far
            check_local = TheCPC.m_threadsafe_loc = localtouch.max() <= 1;
        }

	if (check_remote) {
            check_remote = TheCPC.m_threadsafe_rcv = remotetouch.max() <= 1;
        }
    }

//    ba_src.clear_hash_bin();
//    ba_dst.clear_hash_bin();

    for (int ipass = 0; ipass < 2; ++ipass) // pass 0: send; pass 1: recv
    {
	CopyComTag::MapOfCopyComTagContainers & Tags
	    = (ipass == 0) ? *TheCPC.m_SndTags : *TheCPC.m_RcvTags;
	CopyComTag::MapOfCopyComTagContainers & tmpTags
	    = (ipass == 0) ?         send_tags :         recv_tags;
	std::map<int,int> & Vols
	    = (ipass == 0) ? *TheCPC.m_SndVols : *TheCPC.m_RcvVols;

	for (CopyComTag::MapOfCopyComTagContainers::iterator 
		 it  = tmpTags.begin(), 
		 End = tmpTags.end();   it != End; ++it)
	{
	    const int key = it->first;
	    std::vector<CopyComTag>& cctv = it->second;

	    // We need to fix the order so that the send and recv processes match.
	    std::sort(cctv.begin(), cctv.end());

	    std::vector<CopyComTag> new_cctv;
	    new_cctv.reserve(cctv.size());

	    for (std::vector<CopyComTag>::const_iterator 
		     it2  = cctv.begin(),
		     End2 = cctv.end();   it2 != End2; ++it2)
	    {
		const Box& bx = it2->box;

		Vols[key] += bx.numPts();

		const BoxList tilelist(bx, FabArrayBase::comm_tile_size);
		for (BoxList::const_iterator 
			 it_tile  = tilelist.begin(), 
			 End_tile = tilelist.end();    it_tile != End_tile; ++it_tile)
                {
		    new_cctv.push_back(CopyComTag(*it_tile, it2->fabIndex, it2->srcIndex));
		}
	    }

	    Tags[key].swap(new_cctv);
	}
    }    

    return cache_it;
}