/** \function pidtonid * finds nids for pids 1 to CmiNumPes and stores them in an array * correspondingly also creates an array for nids to pids */ void pidtonid(int numpes) { CmiLock(cray_lock); if (pid2nid != NULL) { CmiUnlock(cray_lock); return; /* did once already */ } getDimension(&maxNID,&maxX,&maxY,&maxZ); int numCores = CmiNumCores(); pid2nid = (int *)malloc(sizeof(int) * numpes); #if XT4_TOPOLOGY || XT5_TOPOLOGY || XE6_TOPOLOGY int i, nid, ret; CmiAssert(rca_coords == NULL); rca_coords = (rca_mesh_coord_t *)malloc(sizeof(rca_mesh_coord_t)*(maxNID+1)); for (i=0; i<maxNID; i++) { rca_coords[i].mesh_x = rca_coords[i].mesh_y = rca_coords[i].mesh_z = -1; } for (i=0; i<numpes; i++) { PMI_Get_nid(CmiGetNodeGlobal(CmiNodeOf(i),CmiMyPartition()), &nid); pid2nid[i] = nid; CmiAssert(nid < maxNID); ret = rca_get_meshcoord(nid, &rca_coords[nid]); CmiAssert(ret != -1); } #endif CmiUnlock(cray_lock); }
/** \function getMeshCoord * wrapper function for rca_get_meshcoord */ int getMeshCoord(int nid, int *x, int *y, int *z) { rca_mesh_coord_t xyz; rca_get_meshcoord(nid, &xyz); *x = xyz.mesh_x; *y = xyz.mesh_y; *z = xyz.mesh_z; }
/** \function getMeshCoord * wrapper function for rca_get_meshcoord * 0: success, -1: failure */ int getMeshCoord(int nid, int *x, int *y, int *z) { #if CMK_HAS_RCALIB if (rca_coords == NULL) { rca_mesh_coord_t xyz; int ret = -1; ret = rca_get_meshcoord(nid, &xyz); if (ret == -1) return -1; *x = xyz.mesh_x; *y = xyz.mesh_y; *z = xyz.mesh_z; return ret; } else { *x = rca_coords[nid].mesh_x; *y = rca_coords[nid].mesh_y; *z = rca_coords[nid].mesh_z; return *x==-1?-1:0; } #else CmiAbort("rca_get_meshcoord does not exist"); return -1; #endif }
int A1D_Initialize() { #ifdef DMAPPD_USES_MPI int mpi_initialized, mpi_provided; int mpi_status = MPI_SUCCESS; int namelen; char procname[MPI_MAX_PROCESSOR_NAME]; #endif #ifdef __CRAYXE int pmi_status = PMI_SUCCESS; int nodeid = -1; rca_mesh_coord_t rca_xyz; dmapp_return_t dmapp_status = DMAPP_RC_SUCCESS; dmapp_rma_attrs_ext_t dmapp_config_in, dmapp_config_out; dmapp_jobinfo_t dmapp_info; dmapp_pe_t dmapp_rank = -1; int dmapp_size = -1; #endif int sheapflag = 0; #ifdef DEBUG_FUNCTION_ENTER_EXIT fprintf(stderr,"entering A1D_Initialize() \n"); #endif #ifdef DMAPPD_USES_MPI /*************************************************** * * configure MPI * ***************************************************/ /* MPI has to be Initialized for this implementation to work */ MPI_Initialized(&mpi_initialized); assert(mpi_initialized==1); /* MPI has to tolerate threads because A1 supports them */ MPI_Query_thread(&mpi_provided); //assert(mpi_provided>MPI_THREAD_SINGLE); /* have to use our own communicator for collectives to be proper */ mpi_status = MPI_Comm_dup(MPI_COMM_WORLD,&A1D_COMM_WORLD); assert(mpi_status==0); /* get my MPI rank */ mpi_status = MPI_Comm_rank(A1D_COMM_WORLD,&mpi_rank); assert(mpi_status==0); /* get MPI world size */ mpi_status = MPI_Comm_size(A1D_COMM_WORLD,&mpi_size); assert(mpi_status==0); /* in a perfect world, this would provide topology information like BG */ MPI_Get_processor_name( procname, &namelen ); printf( "%d: MPI_Get_processor_name = %s\n" , mpi_rank, procname ); fflush( stdout ); /* barrier to make sure MPI is ready everywhere */ mpi_status = MPI_Barrier(A1D_COMM_WORLD); assert(mpi_status==0); #endif #ifdef __CRAYXE /*************************************************** * * query topology * ***************************************************/ PMI_Get_nid( mpi_rank, &nodeid ); assert(pmi_status==PMI_SUCCESS); rca_get_meshcoord((uint16_t)nodeid, &rca_xyz); printf("%d: rca_get_meshcoord returns (%2u,%2u,%2u)\n", mpi_rank, rca_xyz.mesh_x, rca_xyz.mesh_y, rca_xyz.mesh_z ); #endif #ifdef __CRAYXE /*************************************************** * * configure DMAPP * ***************************************************/ dmapp_config_in.max_outstanding_nb = DMAPP_DEF_OUTSTANDING_NB; /* 512 */ dmapp_config_in.offload_threshold = DMAPP_OFFLOAD_THRESHOLD; /* 4096 */ #ifdef DETERMINISTIC_ROUTING dmapp_config_in.put_relaxed_ordering = DMAPP_ROUTING_DETERMINISTIC; dmapp_config_in.get_relaxed_ordering = DMAPP_ROUTING_DETERMINISTIC; #else dmapp_config_in.put_relaxed_ordering = DMAPP_ROUTING_ADAPTIVE; dmapp_config_in.get_relaxed_ordering = DMAPP_ROUTING_ADAPTIVE; #endif dmapp_config_in.max_concurrency = 1; /* not thread-safe */ #ifdef FLUSH_IMPLEMENTED dmapp_config_in.PI_ordering = DMAPP_PI_ORDERING_RELAXED; #else dmapp_config_in.PI_ordering = DMAPP_PI_ORDERING_STRICT; #endif dmapp_status = dmapp_init_ext( &dmapp_config_in, &dmapp_config_out ); assert(dmapp_status==DMAPP_RC_SUCCESS); #ifndef FLUSH_IMPLEMENTED /* without strict PI ordering, we have to flush remote stores with a get packet to force global visibility */ assert( dmapp_config_out.PI_ordering == DMAPP_PI_ORDERING_STRICT); #endif dmapp_status = dmapp_get_jobinfo(&dmapp_info); assert(dmapp_status==DMAPP_RC_SUCCESS); dmapp_rank = dmapp_info.pe; dmapp_size = dmapp_info.npes; A1D_Sheap_desc = dmapp_info.sheap_seg; /* make sure PMI and DMAPP agree */ assert(mpi_rank==dmapp_rank); assert(mpi_size==dmapp_size); #endif /*************************************************** * * setup protocols * ***************************************************/ #ifdef FLUSH_IMPLEMENTED /* allocate Put list */ A1D_Put_flush_list = malloc( mpi_size * sizeof(int32_t) ); assert(A1D_Put_flush_list != NULL); #endif #ifdef __CRAYXE A1D_Acc_lock = dmapp_sheap_malloc( sizeof(int64_t) ); #endif A1D_Allreduce_issame64((size_t)A1D_Acc_lock, &sheapflag); assert(sheapflag==1); #ifdef DEBUG_FUNCTION_ENTER_EXIT fprintf(stderr,"exiting A1D_Initialize() \n"); #endif return(0); }
int main(void) { int rc; int rank, size; PMI_BOOL initialized; rc = PMI_Initialized(&initialized); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Initialized failed"); if (initialized!=PMI_TRUE) { int spawned; rc = PMI_Init(&spawned); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Init failed"); } rc = PMI_Get_rank(&rank); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_rank failed"); rc = PMI_Get_size(&size); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_size failed"); printf("rank %d of %d \n", rank, size); int rpn; /* rpn = ranks per node */ rc = PMI_Get_clique_size(&rpn); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_clique_size failed"); printf("rank %d clique size %d \n", rank, rpn); int * clique_ranks = malloc( rpn * sizeof(int) ); if (clique_ranks==NULL) PMI_Abort(rpn,"malloc failed"); rc = PMI_Get_clique_ranks(clique_ranks, rpn); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_clique_ranks failed"); for(int i = 0; i<rpn; i++) printf("rank %d clique[%d] = %d \n", rank, i, clique_ranks[i]); int nid; rc = PMI_Get_nid(rank, &nid); if (rc!=PMI_SUCCESS) PMI_Abort(rc,"PMI_Get_nid failed"); printf("rank %d PMI_Get_nid gives nid %d \n", rank, nid); #if OLD rca_mesh_coord_t xyz; rca_get_meshcoord( (uint16_t) nid, &xyz); printf("rank %d rca_get_meshcoord returns (%2u,%2u,%2u)\n", rank, xyz.mesh_x, xyz.mesh_y, xyz.mesh_z); #else // UNTESTED pmi_mesh_coord_t xyz; PMI_Get_meshcoord((uint16_t) nid, &xyz); printf("rank %d PMI_Get_meshcoord returns (%2u,%2u,%2u)\n", rank, xyz.mesh_x, xyz.mesh_y, xyz.mesh_z); #endif fflush(stdout); return 0; }