void MPIDistributedDevice::commit() { if (!initialized) { int _ac = 1; const char *_av[] = {"ospray_mpi_distributed_device"}; auto *setComm = static_cast<MPI_Comm*>(getParam<void*>("worldCommunicator", nullptr)); shouldFinalizeMPI = mpicommon::init(&_ac, _av, setComm == nullptr); if (setComm) { MPI_CALL(Comm_dup(*setComm, &mpicommon::world.comm)); MPI_CALL(Comm_rank(mpicommon::world.comm, &mpicommon::world.rank)); MPI_CALL(Comm_size(mpicommon::world.comm, &mpicommon::world.size)); } auto &embreeDevice = api::ISPCDevice::embreeDevice; embreeDevice = rtcNewDevice(generateEmbreeDeviceCfg(*this).c_str()); rtcSetDeviceErrorFunction(embreeDevice, embreeErrorFunc, nullptr); RTCError erc = rtcGetDeviceError(embreeDevice); if (erc != RTC_ERROR_NONE) { // why did the error function not get called !? postStatusMsg() << "#osp:init: embree internal error number " << erc; assert(erc == RTC_ERROR_NONE); } initialized = true; } Device::commit(); masterRank = getParam<int>("masterRank", 0); TiledLoadBalancer::instance = make_unique<staticLoadBalancer::Distributed>(); }
void build_communicate_groups(COMMUNICATE *communicate,int cp_on) /*==========================================================================*/ /* Begin routine */ {/*begin routine */ /*==========================================================================*/ #include "../typ_defs/typ_mask.h" int myid = communicate->myid; int np_states = communicate->np_states; int np_beads = communicate->np_beads; int np = communicate->np; int np_forc = communicate->np_forc; int np_forc_src = communicate->np_forc_src; int np_forc_trg = communicate->np_forc_trg; int i,ii,loop,iii,ntemp,idiv,irem,icase,numcomm_bead; int *ranks; MPI_Comm world; MPI_Comm comm_forc; MPI_Group excl_group,temp; /*=======================================================================*/ /* 0) Mallocs and Dups */ numcomm_bead = np/np_beads; /* number of bead communicators */ if(numcomm_bead==0){numcomm_bead=1;} ranks = (int *) cmalloc(MAXPROCS*sizeof(int)); Comm_dup(communicate->world,&(world)); Barrier(world); /*=======================================================================*/ /* I) Get path_integral or Bead level communicators */ /* Bead communciators are OUTER communicators */ /* Split world into forc and bead pieces if cp is off */ /* Split world into state and bead pieces if cp is on */ /* Comm_beads_forc is a copy of Comm_beads */ if(cp_on==1){icase=1;} if(cp_on==0){icase=2;} switch(icase){ case 1: communicate->comm_beads = build_grp_comm_outer(np,np_beads,np_states,myid, &(communicate->myid_bead),ranks,world); break; case 2: communicate->comm_beads = build_grp_comm_outer(np,np_beads,np_forc,myid, &(communicate->myid_bead),ranks,world); break; }/*switch*/ Comm_dup(communicate->comm_beads,&(communicate->comm_beads_forc)); communicate->myid_bead_forc = communicate->myid_bead; /*=======================================================================*/ /* II) Get state and force level communicators */ /* State and Force comms are INNER communicators */ /* CP is off : np = np_forc*np_bead */ if((cp_on==1)&&(np_forc==1)) {icase=1;} if((cp_on==1)&&(np_forc==np_states)){icase=2;} if((cp_on==0)) {icase=3;} switch(icase){ /*======================================================================*/ /* CP IS ON : Np_forc = 1 */ case 1: /*------------------------------------------------------*/ /* i) The force level communicator is only proc 0 */ ranks[0] = myid; Comm_group(world,&excl_group); Group_incl(excl_group,np_forc,ranks,&temp); Comm_create(world,temp,&communicate->comm_forc); Comm_dup(communicate->comm_forc,&(communicate->comm_forc_source)); Comm_dup(communicate->comm_forc,&(communicate->comm_forc_target)); communicate->myid_forc = 0; communicate->myid_forc_source = 0; communicate->myid_forc_target = 0; Group_free(&excl_group); /*------------------------------------------------------*/ /* ii) The state level communicator is an INNER */ communicate->comm_states = build_grp_comm_inner(np,np_beads,np_states,myid, &(communicate->myid_state),ranks,world); /*------------------------------------------------*/ /* iii) The myid_bead_prime and myid_bead store */ /* the id of this proc in the comm_bead, if */ /* it is the FIRST bead level communicator. */ /* In subsequent comm_beads,myid_bead is out */ /* of range and myid_bead_prime=myid_state. */ /* myid_bead_forc ALWAYS has carries */ /* the rank of this proc in the bead comm */ /* to which it is associated */ communicate->myid_bead_prime = communicate->myid_bead; if(communicate->myid_state!=0){ communicate->myid_bead = communicate->np_beads; }/*endif*/ break; /*======================================================================*/ case 2: /*------------------------------------------------------*/ /* i) The state level communicator is an INNER */ communicate->comm_states = build_grp_comm_inner(np,np_beads,np_states,myid, &(communicate->myid_state),ranks,world); /*------------------------------------------------------*/ /* ii) The forc level communicator is an INNER = state */ communicate->myid_forc = communicate->myid_state; Comm_dup(communicate->comm_states,&(communicate->comm_forc)); Comm_dup(communicate->comm_states,&(comm_forc)); /*------------------------------------------------*/ /* iii) Split the force level communicator */ /* Targets are OUTER and Sources are INNER */ communicate->comm_forc_target = build_grp_comm_outer(np_forc,np_forc_trg,np_forc_src, communicate->myid_forc,&(communicate->myid_forc_target), ranks,comm_forc); communicate->comm_forc_source = build_grp_comm_inner(np_forc,np_forc_trg,np_forc_src, communicate->myid_forc,&(communicate->myid_forc_source), ranks,comm_forc); /*------------------------------------------------*/ /* iii) The myid_bead_prime and myid_bead store */ /* the id of this proc in the comm_bead, if */ /* it is the FIRST bead level communicator. */ /* In subsequent comm_beads,myid_bead is out */ /* of range and myid_bead_prime=myid_state. */ /* myid_bead_forc ALWAYS has carries */ /* the rank of this proc in the bead comm */ /* to which it is associated */ communicate->myid_bead_prime = communicate->myid_bead; if(communicate->myid_state!=0){ communicate->myid_bead = communicate->np_beads; }/*endif*/ break; /*======================================================================*/ case 3: /* CP is off : force level only */ /*------------------------------------------------*/ /* i) The state communicator is only proc 0 */ ranks[0] = myid; Comm_group(world,&excl_group); Group_incl(excl_group,np_states,ranks,&temp); Comm_create(world,temp,&communicate->comm_states); communicate->myid_state = 0; Group_free(&excl_group); /*------------------------------------------------*/ /* ii) The force level communicator is an INNER */ communicate->comm_forc = build_grp_comm_inner(np,np_beads,np_forc,myid, &(communicate->myid_forc),ranks,world); Comm_dup(communicate->comm_forc,&(comm_forc)); /*------------------------------------------------*/ /* iii) Split the force level communicator */ /* Targets are OUTER and Sources are INNER */ communicate->comm_forc_target = build_grp_comm_outer(np_forc,np_forc_trg,np_forc_src, communicate->myid_forc,&(communicate->myid_forc_target), ranks,comm_forc); communicate->comm_forc_source = build_grp_comm_inner(np_forc,np_forc_trg,np_forc_src, communicate->myid_forc,&(communicate->myid_forc_source), ranks,comm_forc); /*------------------------------------------------*/ /* iv) The myid_bead_prime and myid_bead store */ /* the id of this proc in the comm_bead, if */ /* it is the FIRST bead level communicator. */ /* In subsequent comm_beads,myid_bead is out */ /* of range and myid_bead_prime=myid_forc. */ /* myid_bead_forc ALWAYS has carries */ /* the rank of this proc in the bead comm */ /* to which it is associated */ communicate->myid_bead_prime = communicate->myid_bead; if(communicate->myid_forc!=0){ communicate->myid_bead = communicate->np_beads; }/*endif*/ break; }/*end switch*/ /*------------------------------------------------------------------------*/ /* Free the memory */ cfree(&ranks[0]); /*------------------------------------------------------------------------*/ } /*end routine*/
/*==========================================================================*/ void build_cp_comm_pkg_dvr(CP *cp,MPI_Comm world) /*==========================================================================*/ {/* begin routine */ /*==========================================================================*/ /* Local variable declarations */ #include "../typ_defs/typ_mask.h" int irem,idiv,iii; int nstate_ncoef_proc_max,nstate_ncoef_proc_min; int num_coef,*num_coef_v,ncoef_proc,ncoef_proc_yz; /*==========================================================================*/ /* I) Up states */ /*------------------------------------*/ /* i) states per processor stuff */ idiv = cp->cpcoeffs_info.nstate_up/cp->communicate.np_states; irem = (cp->cpcoeffs_info.nstate_up % cp->communicate.np_states); cp->cpcoeffs_info.nstate_up_proc = idiv; if(cp->communicate.myid_state < irem) { cp->cpcoeffs_info.nstate_up_proc = idiv+1; }/*endif*/ if(cp->communicate.myid_state <= irem) { cp->cpcoeffs_info.istate_up_st = cp->communicate.myid_state*(idiv+1)+1; } else { cp->cpcoeffs_info.istate_up_st = irem*(idiv+1) + (cp->communicate.myid_state-irem)*idiv+1; }/*endif*/ cp->cpcoeffs_info.istate_up_end = cp->cpcoeffs_info.istate_up_st + cp->cpcoeffs_info.nstate_up_proc-1; /*------------------------------------*/ /* ii) coefs per processor stuff */ cp->cp_comm_state_pkg_dvr_up.num_proc = cp->communicate.np_states; cp->cp_comm_state_pkg_dvr_up.myid = cp->communicate.myid_state; cp->cp_comm_state_pkg_dvr_up.nstate = cp->cpcoeffs_info.nstate_up; cp->cp_comm_state_pkg_dvr_up.ncoef = cp->cpcoeffs_info.ncoef; cp->cp_comm_state_pkg_dvr_up.nstate_proc= cp->cpcoeffs_info.nstate_up_proc; cp->cp_comm_state_pkg_dvr_up.world = world; if(cp->communicate.np_states > 1){ Comm_dup(cp->communicate.comm_states,&(cp->cp_comm_state_pkg_dvr_up.comm)); } else { cp->cp_comm_state_pkg_dvr_up.comm = cp->communicate.comm_states; }/* endif */ irem = (cp->cp_comm_state_pkg_dvr_up.nstate % cp->cp_comm_state_pkg_dvr_up.num_proc); cp->cp_comm_state_pkg_dvr_up.nstate_proc_max = (irem > 0 ? idiv+1 : idiv); cp->cp_comm_state_pkg_dvr_up.nstate_max = (irem > 0 ? ((idiv+1)*cp->communicate.np_states) : (idiv*cp->communicate.np_states)) ; /* different from PW code*/ cp->cp_comm_state_pkg_dvr_up.nstate_proc_min = idiv; idiv = (cp->cpcoeffs_info.grid_ny)*(cp->cpcoeffs_info.grid_nz)/ (cp->cp_comm_state_pkg_dvr_up.num_proc); irem = (cp->cpcoeffs_info.grid_ny * cp->cpcoeffs_info.grid_nz) % cp->cp_comm_state_pkg_dvr_up.num_proc; ncoef_proc_yz = (cp->communicate.myid_state < irem ? idiv+1 : idiv); ncoef_proc = ncoef_proc_yz * (cp->cpcoeffs_info.grid_nx); cp->cpcoeffs_info.nstate_ncoef_proc_up = ncoef_proc; cp->cp_comm_state_pkg_dvr_up.nstate_ncoef_proc = cp->cpcoeffs_info.nstate_ncoef_proc_up; if(cp->communicate.np_states > 1){ Allreduce(&(cp->cpcoeffs_info.nstate_ncoef_proc_up), &nstate_ncoef_proc_max, 1,MPI_INT,MPI_MAX,0,world); /* Not defined anymore Allreduce(&(cp->cpcoeffs_info.nstate_ncoef_proc_up), &nstate_ncoef_proc_min, 1,MPI_INT,MPI_MIN,0,world); */ }else{ nstate_ncoef_proc_max = cp->cpcoeffs_info.nstate_ncoef_proc_up; /* nstate_ncoef_proc_min = cp->cpcoeffs_info.nstate_ncoef_proc_up; */ } cp->cpcoeffs_info.nstate_ncoef_proc_max_up = nstate_ncoef_proc_max; cp->cp_comm_state_pkg_dvr_up.nstate_ncoef_proc_max = nstate_ncoef_proc_max; /*cp->cp_comm_state_pkg_dvr_up.nstate_ncoef_proc_min = nstate_ncoef_proc_min; */ if(cp->communicate.np_states > 1){ num_coef = cp->cp_comm_state_pkg_dvr_up.nstate_ncoef_proc; num_coef_v = (int *) malloc((cp->communicate.np_states)*sizeof(int))-1; Allgather(&num_coef,1,MPI_INT,&num_coef_v[1],1,MPI_INT,0,world); cp->cpcoeffs_info.icoef_start_up = 1; for(iii=1; iii <= cp->communicate.myid; iii++){ cp->cpcoeffs_info.icoef_start_up += num_coef_v[iii]; } cp->cp_comm_state_pkg_dvr_up.icoef_start = cp->cpcoeffs_info.icoef_start_up; }else{ cp->cpcoeffs_info.icoef_start_up = 1; cp->cp_comm_state_pkg_dvr_up.icoef_start = 1; } /*==========================================================================*/ /* II) Down states */ /*------------------------------------*/ /* i) states per processor stuff */ idiv = cp->cpcoeffs_info.nstate_dn/cp->communicate.np_states; irem = (cp->cpcoeffs_info.nstate_dn % cp->communicate.np_states); cp->cpcoeffs_info.nstate_dn_proc = idiv; if(cp->communicate.myid_state < irem) { cp->cpcoeffs_info.nstate_dn_proc = idiv+1; }/*endif*/ if(cp->communicate.myid_state <= irem) { cp->cpcoeffs_info.istate_dn_st = cp->communicate.myid_state*(idiv+1)+1; } else { cp->cpcoeffs_info.istate_dn_st = irem*(idiv+1) + (cp->communicate.myid_state-irem)*idiv+1; }/*endif*/ cp->cpcoeffs_info.istate_dn_end = cp->cpcoeffs_info.istate_dn_st + cp->cpcoeffs_info.nstate_dn_proc-1; /*------------------------------------*/ /* ii) coefs per processor stuff */ cp->cp_comm_state_pkg_dvr_dn.num_proc = cp->communicate.np_states; cp->cp_comm_state_pkg_dvr_dn.myid = cp->communicate.myid_state; cp->cp_comm_state_pkg_dvr_dn.nstate = cp->cpcoeffs_info.nstate_dn; cp->cp_comm_state_pkg_dvr_dn.ncoef = cp->cpcoeffs_info.ncoef; cp->cp_comm_state_pkg_dvr_dn.nstate_proc= cp->cpcoeffs_info.nstate_dn_proc; cp->cp_comm_state_pkg_dvr_dn.world = world; if(cp->communicate.np_states > 1){ Comm_dup(cp->communicate.comm_states,&(cp->cp_comm_state_pkg_dvr_dn.comm)); } else { cp->cp_comm_state_pkg_dvr_dn.comm = cp->communicate.comm_states; }/* endif */ irem = (cp->cp_comm_state_pkg_dvr_dn.nstate % cp->cp_comm_state_pkg_dvr_dn.num_proc); cp->cp_comm_state_pkg_dvr_dn.nstate_proc_max = (irem > 0 ? idiv+1 : idiv); cp->cp_comm_state_pkg_dvr_dn.nstate_max = (irem > 0 ? ((idiv+1)*cp->communicate.np_states) : (idiv*cp->communicate.np_states)) ; cp->cp_comm_state_pkg_dvr_dn.nstate_proc_min = idiv; cp->cpcoeffs_info.nstate_ncoef_proc_dn = ncoef_proc; cp->cp_comm_state_pkg_dvr_dn.nstate_ncoef_proc = cp->cpcoeffs_info.nstate_ncoef_proc_dn; if(cp->communicate.np_states > 1){ Allreduce(&(cp->cpcoeffs_info.nstate_ncoef_proc_dn), &nstate_ncoef_proc_max, 1,MPI_INT,MPI_MAX,0,world); /* Allreduce(&(cp->cpcoeffs_info.nstate_ncoef_proc_dn), &nstate_ncoef_proc_min, 1,MPI_INT,MPI_MIN,0,world); */ }else{ nstate_ncoef_proc_max = cp->cpcoeffs_info.nstate_ncoef_proc_dn; /* nstate_ncoef_proc_min = cp->cpcoeffs_info.nstate_ncoef_proc_dn; */ } cp->cpcoeffs_info.nstate_ncoef_proc_max_dn = nstate_ncoef_proc_max; cp->cp_comm_state_pkg_dvr_dn.nstate_ncoef_proc_max = cp->cpcoeffs_info.nstate_ncoef_proc_max_dn; /* cp->cp_comm_state_pkg_dvr_dn.nstate_ncoef_proc_min = nstate_ncoef_proc_min; */ if(cp->communicate.np_states > 1){ for(iii=1; iii <= cp->communicate.np_states; iii++){ num_coef_v[iii] = 0; } num_coef = cp->cp_comm_state_pkg_dvr_dn.nstate_ncoef_proc; Allgather(&num_coef,1,MPI_INT,&num_coef_v[1],1,MPI_INT,0,world); cp->cpcoeffs_info.icoef_start_dn = 1; for(iii=1; iii <= cp->communicate.myid; iii++){ cp->cpcoeffs_info.icoef_start_dn += num_coef_v[iii]; } cp->cp_comm_state_pkg_dvr_dn.icoef_start = cp->cpcoeffs_info.icoef_start_dn; }else{ cp->cpcoeffs_info.icoef_start_dn = 1; cp->cp_comm_state_pkg_dvr_dn.icoef_start = 1; } if(cp->communicate.np_states > 1){ free(&num_coef_v[1]); } /*==========================================================================*/ }/* end routine */
/*! do an MPI_Comm_dup, and return duplicated communicator */ Group Group::dup() const { MPI_Comm duped; MPI_CALL(Comm_dup(comm,&duped)); return Group(duped); }