static int bgq_nodenum(void) { int hostnum; Personality_t personality; Kernel_GetPersonality(&personality, sizeof(personality)); /* Each MPI rank has a unique coordinate in a 6-dimensional space (A,B,C,D,E,T), with dimensions A-E corresponding to different physical nodes, and T within each node. Each node has sixteen physical cores, each of which can have up to four hardware threads, so 0 <= T <= 63 (but the maximum value of T depends on the confituration of ranks and OpenMP threads per node). However, T is irrelevant for computing a suitable return value for gmx_hostname_num(). */ hostnum = personality.Network_Config.Acoord; hostnum *= personality.Network_Config.Bnodes; hostnum += personality.Network_Config.Bcoord; hostnum *= personality.Network_Config.Cnodes; hostnum += personality.Network_Config.Ccoord; hostnum *= personality.Network_Config.Dnodes; hostnum += personality.Network_Config.Dcoord; hostnum *= personality.Network_Config.Enodes; hostnum += personality.Network_Config.Ecoord; if (debug) { std::fprintf(debug, "Torus ID A: %d / %d B: %d / %d C: %d / %d D: %d / %d E: %d / %d\n" "Node ID T: %d / %d core: %d / %d hardware thread: %d / %d\n", personality.Network_Config.Acoord, personality.Network_Config.Anodes, personality.Network_Config.Bcoord, personality.Network_Config.Bnodes, personality.Network_Config.Ccoord, personality.Network_Config.Cnodes, personality.Network_Config.Dcoord, personality.Network_Config.Dnodes, personality.Network_Config.Ecoord, personality.Network_Config.Enodes, Kernel_ProcessorCoreID(), 16, Kernel_ProcessorID(), 64, Kernel_ProcessorThreadID(), 4); } return hostnum; }
EPIK_TOPOL * EPIK_Pform_hw_topol() { EPIK_TOPOL * myt; Kernel_GetPersonality(&mybgp, sizeof(_BGP_Personality_t)); myt=EPIK_Cart_create("Blue Gene/P Hardware Topology",4); EPIK_Cart_add_dim(myt, (elg_ui4)BGP_Personality_xSize(&mybgp), BGP_Personality_isTorusX(&mybgp),"X"); EPIK_Cart_add_dim(myt, (elg_ui4)BGP_Personality_ySize(&mybgp), BGP_Personality_isTorusY(&mybgp),"Y"); EPIK_Cart_add_dim(myt, (elg_ui4)BGP_Personality_zSize(&mybgp), BGP_Personality_isTorusZ(&mybgp),"Z"); EPIK_Cart_add_dim(myt, 4, ELG_FALSE, "Core"); /* BlueGene/P quad-core */ return myt; }
double core_timer_fetch_frequency(struct core_timer *timer) { double frequency; #ifdef __bgq__ Personality_t personality; #endif frequency = 1; /* * \see https://hpc-forge.cineca.it/files/ScuolaCalcoloParallelo_WebDAV/public/anno-2013/advanced-school/adv_mpi_handson.pdf * \see https://github.com/GeneAssembly/biosal/issues/429 */ #ifdef __bgq__ Kernel_GetPersonality(&personality, sizeof(Personality_t)); frequency = personality.Kernel_Config.FreqMHz * 1000000.0; #endif return frequency; }
//get the spi coord, grid size and torusicity void get_spi_coord() { //get the personality Personality_t pers; Kernel_GetPersonality(&pers,sizeof(pers)); //copy the coords spi_rank_coord[0]=pers.Network_Config.Acoord; spi_rank_coord[1]=pers.Network_Config.Bcoord; spi_rank_coord[2]=pers.Network_Config.Ccoord; spi_rank_coord[3]=pers.Network_Config.Dcoord; spi_rank_coord[4]=pers.Network_Config.Ecoord; //get torusicity for(int idir=0;idir<5;idir++) spi_dir_is_torus[idir]=ND_GET_TORUS(idir,pers.Network_Config.NetFlags); //get size spi_dir_size[0]=pers.Network_Config.Anodes; spi_dir_size[1]=pers.Network_Config.Bnodes; spi_dir_size[2]=pers.Network_Config.Cnodes; spi_dir_size[3]=pers.Network_Config.Dnodes; spi_dir_size[4]=pers.Network_Config.Enodes; }
BGQTorusManager::BGQTorusManager() { order[0] = 5; order[1] = 4; order[2] = 3; order[3] = 2; order[4] = 1; order[5] = 0; int numPes = CmiNumPesGlobal(); procsPerNode = Kernel_ProcessCount(); thdsPerProc = CmiMyNodeSize(); hw_NT = procsPerNode*thdsPerProc; Personality_t pers; Kernel_GetPersonality(&pers, sizeof(pers)); hw_NA = pers.Network_Config.Anodes; hw_NB = pers.Network_Config.Bnodes; hw_NC = pers.Network_Config.Cnodes; hw_ND = pers.Network_Config.Dnodes; hw_NE = pers.Network_Config.Enodes; unsigned int isFile = 0; Kernel_GetMapping(10, mapping, &isFile); if(!isFile) { for(int i = 0; i < 6 ; i++) { if(mapping[i] != 'T') { order[5 - i] = mapping[i] - 'A'; } else { order[5 - i] = 5; } } } //printf("Mapping %d %d %d %d %d %d\n",order[0],order[1],order[2],order[3],order[4],order[5]); rn_NA = hw_NA; rn_NB = hw_NB; rn_NC = hw_NC; rn_ND = hw_ND; rn_NE = hw_NE; int max_t = 0; if(rn_NA * rn_NB * rn_NC * rn_ND * rn_NE != numPes/hw_NT) { rn_NA = rn_NB = rn_NC = rn_ND =rn_NE =0; int rn_NT=0; int min_a, min_b, min_c, min_d, min_e, min_t; min_a = min_b = min_c = min_d = min_e = min_t = (~(-1)); int tmp_t, tmp_a, tmp_b, tmp_c, tmp_d, tmp_e; uint64_t numentries; BG_CoordinateMapping_t *coord; int nranks=numPes/thdsPerProc; coord = (BG_CoordinateMapping_t *) malloc(sizeof(BG_CoordinateMapping_t)*nranks); Kernel_RanksToCoords(sizeof(BG_CoordinateMapping_t)*nranks, coord, &numentries); for(int c = 0; c < nranks; c++) { tmp_a = coord[c].a; tmp_b = coord[c].b; tmp_c = coord[c].c; tmp_d = coord[c].d; tmp_e = coord[c].e; tmp_t = coord[c].t; if(tmp_a > rn_NA) rn_NA = tmp_a; if(tmp_a < min_a) min_a = tmp_a; if(tmp_b > rn_NB) rn_NB = tmp_b; if(tmp_b < min_b) min_b = tmp_b; if(tmp_c > rn_NC) rn_NC = tmp_c; if(tmp_c < min_c) min_c = tmp_c; if(tmp_d > rn_ND) rn_ND = tmp_d; if(tmp_d < min_d) min_d = tmp_d; if(tmp_e > rn_NE) rn_NE = tmp_e; if(tmp_e < min_e) min_e = tmp_e; if(tmp_t > rn_NT) rn_NT = tmp_t; if(tmp_t < min_t) min_t = tmp_t; } rn_NA = rn_NA - min_a + 1; rn_NB = rn_NB - min_b + 1; rn_NC = rn_NC - min_c + 1; rn_ND = rn_ND - min_d + 1; rn_NE = rn_NE - min_e + 1; procsPerNode = rn_NT - min_t + 1; hw_NT = procsPerNode * thdsPerProc; free(coord); } dimA = rn_NA; dimB = rn_NB; dimC = rn_NC; dimD = rn_ND; dimE = rn_NE; dimA = dimA * hw_NT; // assuming TABCDE dims[0] = rn_NA; dims[1] = rn_NB; dims[2] = rn_NC; dims[3] = rn_ND; dims[4] = rn_NE; dims[5] = hw_NT; torus[0] = ((rn_NA % 4) == 0)? true:false; torus[1] = ((rn_NB % 4) == 0)? true:false; torus[2] = ((rn_NC % 4) == 0)? true:false; torus[3] = ((rn_ND % 4) == 0)? true:false; torus[4] = true; populateLocalNodes(); }
int main(int argc, char* argv[]) { size_t size = ( argc>1 ? atoi(argv[1]) : getpagesize() ); printf("size = %ld \n", (long)size); #if defined(POSIX_SHM) int fd = shm_open("./bar", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR ); if (fd<0) printf("shm_open failed: %d \n", fd); else printf("shm_open succeeded: %d \n", fd); #elif defined(DEV_SHM) int fd = open("/dev/shm/foo", O_RDWR | O_CREAT, S_IRUSR | S_IWUSR ); if (fd<0) printf("open failed: %d \n", fd); else printf("open succeeded: %d \n", fd); #else int fd = -1; printf("no file backing \n"); #endif if (fd>=0) { int rc = ftruncate(fd, size); if (rc==0) printf("ftruncate succeeded \n"); else printf("ftruncate failed \n"); } #if defined(__bgp__) && defined(BGP_SMP_SHM_BROKEN) void * ptr = NULL; _BGP_Personality_t pers; Kernel_GetPersonality(&pers, sizeof(pers)); if( BGP_Personality_processConfig(&pers) == _BGP_PERS_PROCESSCONFIG_SMP ) { printf("SMP mode => MAP_PRIVATE | MAP_ANONYMOUS \n"); ptr = mmap( NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, fd, 0 ); } else ptr = mmap( NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 ); #else void * ptr = mmap( NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0 ); #endif if (ptr==NULL) printf("mmap failed \n"); else printf("mmap succeeded \n"); printf("trying memset \n"); memset(ptr, '\0', size); printf("memset succeeded \n"); if (fd>=0) { int rc = ftruncate(fd, 0); if (rc==0) printf("ftruncate succeeded \n"); else printf("ftruncate failed \n"); } #if defined(POSIX_SHM) if (fd>=0) { int rc = shm_unlink("./bar"); if (rc==0) printf("shm_unlink succeeded \n"); else printf("shm_unlink failed \n"); } #elif defined(DEV_SHM) if (fd>=0) { int rc = close(fd); if (rc==0) printf("close succeeded \n"); else printf("close failed \n"); } #endif if (ptr!=NULL) { int rc = munmap(ptr, size); if (rc==0) printf("munmap succeeded \n"); else printf("munmap failed \n"); } return 0; }
int main(int argc, char **argv) { BG_CoordinateMapping_t coord; BG_JobCoords_t job; Personality_t pers; Kernel_GetPersonality(&pers, sizeof(pers)); myRank=Kernel_GetRank(); myCoord=Kernel_MyTcoord(); Kernel_JobCoords(&job); // myCoreID=Kernel_ProcessorCoreID(); // myHWTID=Kernel_ProcessorThreadID(); // myPhysicalID=Kernel_PhysicalProcessorID(); torus_t tcoords = { myA=pers.Network_Config.Acoord, myB=pers.Network_Config.Bcoord, myC=pers.Network_Config.Ccoord, myD=pers.Network_Config.Dcoord, myE=pers.Network_Config.Ecoord }; torus_t tdims = { pers.Network_Config.Anodes, pers.Network_Config.Bnodes, pers.Network_Config.Cnodes, pers.Network_Config.Dnodes, pers.Network_Config.Enodes }; numNodes = tdims.a * tdims.b * tdims.c * tdims.d * tdims.e; unsigned my_com_A=job.shape.a; unsigned my_com_B=job.shape.b; unsigned my_com_C=job.shape.c; unsigned my_com_D=job.shape.d; unsigned my_com_E=job.shape.e; unsigned my_com_Acoord=job.corner.a; unsigned my_com_Bcoord=job.corner.b; unsigned my_com_Ccoord=job.corner.c; unsigned my_com_Dcoord=job.corner.d; unsigned my_com_Ecoord=job.corner.e; if ( myRank == 763 ) { printf("number of nodes:%d \n", numNodes); printf("number of processes per node:%d \n",Kernel_ProcessCount()); printf("number of hardware threads per process:%d \n",Kernel_ProcessorCount()); printf("MPI rank %d has 5D torus coordinates <%d,%d,%d,%d,%d> \n", myRank, myA, myB, myC, myD, myE); printf("job has 5D torus dimensions <%d,%d,%d,%d,%d> \n", tdims.a, tdims.b, tdims.c, tdims.d, tdims.e); printf("MPI rank %d has dimensions <%d,%d,%d,%d,%d> \n", myRank, my_com_A,my_com_B, my_com_C, my_com_D, my_com_E); printf("MPI rank %d has coordinates <%d,%d,%d,%d,%d> \n",myRank, my_com_Acoord, my_com_Bcoord, my_com_Ccoord, my_com_Dcoord, my_com_Ecoord); } // printf("rank %d has coordinates unsigned a,b,c,d,e; unsigned a_mult = tdims.b * tdims.c * tdims.d * tdims.e; unsigned b_mult = tdims.c * tdims.d * tdims.e; unsigned c_mult = tdims.d * tdims.e; unsigned d_mult = tdims.e; for ( a = 0; a < tdims.a; a++ ) for ( b = 0; b < tdims.b; b++ ) for ( c = 0; c < tdims.c; c++ ) for ( d = 0; d < tdims.d; d++ ) for ( e = 0; e < tdims.e; e++ ) { unsigned rank = a * a_mult + b * b_mult + c * c_mult + d * d_mult + e; if ( a == tcoords.a && b == tcoords.b && c == tcoords.c && d == tcoords.d && e == tcoords.e ) myRank_test=rank; } // if (myRank == myRank_test) printf("MPI rank %d returns 1 \n", myRank); }
static void init() { int rc = 0; uint32_t fifoid=0; uint32_t subgroup, group; int i; /* If we are the 1st process, set up the rget inj fifo */ if ( Kernel_ProcessorID() == 0 ) { /* Set up an rget injection fifo to be used by all processes on this node. * It is at a well-known location...subgroup 0, fifo 0. * - Allocate storage for an injection fifo * - Allocate and initialize that injection fifo. * - Activate that injection fifo. */ rc = posix_memalign( (void**)&_ififoPtr, 64, _injFifoSize ); assert ( rc == 0 ); /* Set user fifo attribute. */ Kernel_InjFifoAttributes_t injFifoAttrs[1]; injFifoAttrs[0].RemoteGet = 1; injFifoAttrs[0].System = 0; injFifoAttrs[0].Priority = 0; subgroup = 0; rc = Kernel_AllocateInjFifos (subgroup, &_ififo_subgroup, 1, &fifoid, injFifoAttrs); assert ( rc == 0 ); Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _ififoPtr, _injFifoSize ); rc = Kernel_InjFifoInit( &_ififo_subgroup, fifoid, &mregion, (uint64_t)_ififoPtr - (uint64_t)mregion.BaseVa, _injFifoSize-1 ); assert ( rc == 0 ); rc = Kernel_InjFifoActivate ( &_ififo_subgroup, 1, &fifoid, KERNEL_INJ_FIFO_ACTIVATE ); assert ( rc == 0 ); /* Allocate a Base Address Table Entry for all processes on the node to use, * and set its value to zero. */ uint32_t batId = 0; rc = Kernel_AllocateBaseAddressTable( 0, /* subgroup */ &_batSubgroup, 1, &batId, 0 /* "User" access */); assert ( rc == 0 ); MUHWI_BaseAddress_t baseAddress; baseAddress = 0; rc = MUSPI_SetBaseAddress ( &_batSubgroup, batId, baseAddress ); assert ( rc == 0 ); } /* Set up a reception fifo to receive packets. * - Allocate storage for a reception fifo * - Use the subgroup equal to our HW thread ID. * - Allocate and initialize that reception fifo. * - Enable that reception fifo. */ rc = posix_memalign( (void**)&_rfifoPtr, 32, _recFifoSize ); assert ( rc == 0 ); Kernel_RecFifoAttributes_t recFifoAttrs[1]; recFifoAttrs[0].System = 0; subgroup = Kernel_ProcessorID(); group = Kernel_ProcessorCoreID(); rc = Kernel_AllocateRecFifos (subgroup, &_rfifo_subgroup, 1, &fifoid, recFifoAttrs); assert ( rc == 0 ); _rfifoShadowPtr = &_rfifo_subgroup._recfifos[fifoid]; uint64_t recFifoEnableBits; Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _rfifoPtr, _recFifoSize ); rc = Kernel_RecFifoInit( &_rfifo_subgroup, fifoid, &mregion, (uint64_t)_rfifoPtr - (uint64_t)mregion.BaseVa, _recFifoSize-1 ); assert ( rc == 0 ); recFifoEnableBits = ( 0x0000000000000001ULL << ( 15 - ( ( (Kernel_ProcessorThreadID())*BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP) + fifoid ) ) ); rc = Kernel_RecFifoEnable ( group, recFifoEnableBits ); assert ( rc == 0 ); _globalRecFifoId = subgroup * BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP; /* Allocate NUM_BUFS send and recv buffers */ for (i=0; i<NUM_BUFS; i++) { int size = (1<<i)*1024; rc = posix_memalign( (void**)&_sBuff[i], 8, size ); assert ( rc == 0 ); /* Init the buffer */ int j; unsigned char value=i; unsigned char *bufPtr=_sBuff[i]; for (j=0; j<size; j++) { *bufPtr = value++; bufPtr++; } Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _sBuff[i], size ); _sBuffPA[i] = (uint64_t)_sBuff[i] - (uint64_t)mregion.BaseVa + (uint64_t)mregion.BasePa; rc = posix_memalign( (void**)&_rBuff[i], 8, size ); assert ( rc == 0 ); Kernel_CreateMemoryRegion ( &mregion, _rBuff[i], size ); _rBuffPA[i] = (uint64_t)_rBuff[i] - (uint64_t)mregion.BaseVa + (uint64_t)mregion.BasePa; } /* Obtain our node coordinates */ Personality_t personality; Kernel_GetPersonality(&personality, sizeof(personality)); myCoords.Destination.A_Destination = personality.Network_Config.Acoord; myCoords.Destination.B_Destination = personality.Network_Config.Bcoord; myCoords.Destination.C_Destination = personality.Network_Config.Ccoord; myCoords.Destination.D_Destination = personality.Network_Config.Dcoord; myCoords.Destination.E_Destination = personality.Network_Config.Ecoord; /* Build the remote get descriptor model */ { MUSPI_Pt2PtRemoteGetDescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; /* To be set by the agent */ i.Base.Message_Length = sizeof(MUHWI_Descriptor_t); i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_PRIORITY; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_HIGH_PRIORITY; i.Pt2Pt.Skip = 0; i.RemoteGet.Type = MUHWI_PACKET_TYPE_GET; i.RemoteGet.Rget_Inj_FIFO_Id = 0; rc = MUSPI_CreatePt2PtRemoteGetDescriptor( &_rgetDesc, &i ); assert ( rc == 0 ); } /* Build the data descriptor model */ { MUSPI_Pt2PtDirectPutDescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; /* To be set at runtime */ i.Base.Message_Length = 0; /* To be set at runtime */ i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; i.Pt2Pt.Skip = 0; i.DirectPut.Rec_Payload_Base_Address_Id = 0; i.DirectPut.Rec_Payload_Offset = 0; /* To be set at runtime */ i.DirectPut.Rec_Counter_Base_Address_Id = 0; i.DirectPut.Rec_Counter_Offset = 0; /* Not used...agent uses its own */ i.DirectPut.Pacing = MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; rc = MUSPI_CreatePt2PtDirectPutDescriptor( &_dataDesc, &i ); assert ( rc == 0 ); } /* Build the completion descriptor model */ { MUSPI_Pt2PtMemoryFIFODescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; i.Base.Message_Length = 0; i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; i.Pt2Pt.Skip = 0; i.MemFIFO.Rec_FIFO_Id = _globalRecFifoId; i.MemFIFO.Rec_Put_Offset = 0; /* Will contain the message number at runtime */ rc = MUSPI_CreatePt2PtMemoryFIFODescriptor( &_completionDesc, &i ); assert ( rc == 0 ); } /* Initialize request data structures */ memset(_requestStatus,0x00,sizeof(_requestStatus)); /* Wait to ensure this fifo has been allocated on process 0 before proceeding. */ sleep(10); }
int OSPU_Comm_split_node(MPI_Comm oldcomm, MPI_Comm * newcomm) { int rc; #if defined(__bgp__) _BGP_Personality_t personality; Kernel_GetPersonality( &personality, sizeof(personality) ); /* SMP mode is trivial */ if (personality.Kernel_Config.ProcessConfig == _BGP_PERS_PROCESSCONFIG_SMP ) { *newcomm = MPI_COMM_SELF; return rc = MPI_SUCCESS; } else { int xrank = personality.Network_Config.Xcoord; int yrank = personality.Network_Config.Ycoord; int zrank = personality.Network_Config.Zcoord; int xsize = personality.Network_Config.Xnodes; int ysize = personality.Network_Config.Ynodes; int zsize = personality.Network_Config.Znodes; color = xrank * ysize * zsize + yrank * zsize + zrank; rc = MPI_Comm_split(oldcomm, color, 0, newcomm); return rc; } #elif defined(__bgq__) /* SMP mode is trivial */ if ( 1 == Kernel_ProcessCount() ) { *newcomm = MPI_COMM_SELF; return rc = MPI_SUCCESS; } else { Personality_t personality; Kernel_GetPersonality( &personality, sizeof(personality) ); int arank = personality.Network_Config.Acoord; int brank = personality.Network_Config.Bcoord; int crank = personality.Network_Config.Ccoord; int drank = personality.Network_Config.Dcoord; int erank = personality.Network_Config.Ecoord; int asize = personality.Network_Config.Anodes; int bsize = personality.Network_Config.Bnodes; int csize = personality.Network_Config.Cnodes; int dsize = personality.Network_Config.Dnodes; int esize = personality.Network_Config.Enodes; color = arank * bsize * csize * dsize * esize + brank * csize * dsize * esize + crank * dsize * esize + drank * esize + erank; rc = MPI_Comm_split(oldcomm, color, 0, newcomm); return rc; } #endif return rc = MPI_SUCCESS; }
int GPTLget_memusage (int *size, int *rss, int *share, int *text, int *datastack) { #if defined (BGP) || defined(BGQ) long long alloc; struct mallinfo m; #if defined (BGP) Personality pers; #endif #if defined (BGQ) uint64_t shared_mem_count; #endif long long total; int node_config; /* memory available */ #if defined(BGP) Kernel_GetPersonality(&pers, sizeof(pers)); total = BGP_Personality_DDRSizeMB(&pers); node_config = BGP_Personality_processConfig(&pers); if (node_config == _BGP_PERS_PROCESSCONFIG_VNM) total /= 4; else if (node_config == _BGP_PERS_PROCESSCONFIG_2x2) total /= 2; total *= 1024*1024; *size = total; #endif #if defined(BGQ) Kernel_GetMemorySize(KERNEL_MEMSIZE_SHARED, &shared_mem_count); shared_mem_count *= 1024*1024; *size = shared_mem_count; #endif /* total memory used - heap only (not static memory)*/ m = mallinfo(); alloc = m.hblkhd + m.uordblks; *rss = alloc; *share = -1; *text = -1; *datastack = -1; #elif (defined HAVE_SLASHPROC) FILE *fd; /* file descriptor for fopen */ int pid; /* process id */ static char *head = "/proc/"; /* part of path */ static char *tail = "/statm"; /* part of path */ char file[19]; /* full path to file in /proc */ int dum; /* placeholder for unused return arguments */ int ret; /* function return value */ /* ** The file we want to open is /proc/<pid>/statm */ pid = (int) getpid (); if (pid > 999999) { fprintf (stderr, "get_memusage: pid %d is too large\n", pid); return -1; } sprintf (file, "%s%d%s", head, pid, tail); if ((fd = fopen (file, "r")) < 0) { fprintf (stderr, "get_memusage: bad attempt to open %s\n", file); return -1; } /* ** Read the desired data from the /proc filesystem directly into the output ** arguments, close the file and return. */ ret = fscanf (fd, "%d %d %d %d %d %d %d", size, rss, share, text, datastack, &dum, &dum); ret = fclose (fd); return 0; #elif (defined __APPLE__) FILE *fd; char cmd[60]; int pid = (int) getpid (); sprintf (cmd, "ps -o vsz -o rss -o tsiz -p %d | grep -v RSS", pid); fd = popen (cmd, "r"); if (fd) { fscanf (fd, "%d %d %d", size, rss, text); *share = -1; *datastack = -1; (void) pclose (fd); } return 0; #else struct rusage usage; /* structure filled in by getrusage */ if (getrusage (RUSAGE_SELF, &usage) < 0) return -1; *size = -1; *rss = usage.ru_maxrss; *share = -1; *text = -1; *datastack = -1; #ifdef IRIX64 *datastack = usage.ru_idrss + usage.ru_isrss; #endif return 0; #endif }
void z_mpi_get_cart_topology(int *ndims, int *dims, int *torus, int *pos) /* z_proto, z_func z_mpi_get_cart_topology */ { int _ndims, _dims[MAX_CART_NDIMS], _torus[MAX_CART_NDIMS], _pos[MAX_CART_NDIMS]; #if defined(HAVE__BGP_PERSONALITY_T) /* BlueGene/P */ _BGP_Personality_t personality; _ndims = 4; if (dims == NULL || torus == NULL || pos == NULL) goto exit_ndims_only; Kernel_GetPersonality(&personality, sizeof(personality)); _dims[0] = personality.Network_Config.Xnodes; _dims[1] = personality.Network_Config.Ynodes; _dims[2] = personality.Network_Config.Znodes; _torus[0] = BGP_Personality_isTorusX(&personality); _torus[1] = BGP_Personality_isTorusY(&personality); _torus[2] = BGP_Personality_isTorusZ(&personality); _pos[0] = personality.Network_Config.Xcoord; _pos[1] = personality.Network_Config.Ycoord; _pos[2] = personality.Network_Config.Zcoord; switch (personality.Kernel_Config.ProcessConfig) { case _BGP_PERS_PROCESSCONFIG_SMP: _dims[3] = 1; break; case _BGP_PERS_PROCESSCONFIG_VNM: _dims[3] = 4; break; case _BGP_PERS_PROCESSCONFIG_2x2: _dims[3] = 2; break; default: _dims[3] = 1; break; } _torus[3] = (_torus[0] || _torus[1] || _torus[2]); _pos[3] = Kernel_PhysicalProcessorID(); #elif defined(HAVE_MPIX_HARDWARE_T) /* BlueGene/Q */ int i; MPIX_Hardware_t hw; _ndims = MPIX_TORUS_MAX_DIMS + 1; if (dims == NULL || torus == NULL || pos == NULL) goto exit_ndims_only; MPIX_Hardware(&hw); _torus[MPIX_TORUS_MAX_DIMS] = 0; for (i = 0; i < MPIX_TORUS_MAX_DIMS; ++i) { _dims[i] = hw.Size[i]; _torus[i] = hw.isTorus[i]?1:0; _pos[0] = hw.Coords[i]; if (_torus[i]) _torus[MPIX_TORUS_MAX_DIMS] = 1; } _dims[MPIX_TORUS_MAX_DIMS] = hw.ppn; _pos[MPIX_TORUS_MAX_DIMS] = hw.coreID; #else /* MPI */ MPI_Comm comm; int size, rank; _ndims = 3; if (dims == NULL || torus == NULL || pos == NULL) goto exit_ndims_only; comm = MPI_COMM_WORLD; MPI_Comm_size(comm, &size); MPI_Comm_rank(comm, &rank); _dims[0] = 0; _dims[1] = 0; _dims[2] = 0; MPI_Dims_create(size, 3, _dims); _pos[2] = (rank / (1)) % _dims[2]; _pos[1] = (rank / (1 * _dims[2])) % _dims[1]; _pos[0] = (rank / (1 * _dims[2] * _dims[1])) % _dims[0]; _torus[0] = 0; _torus[1] = 0; _torus[2] = 0; #endif if (*ndims <= 0) *ndims = _ndims; z_mpi_remap_cart_topology(_ndims, _dims, _torus, _pos, *ndims, dims, torus, pos); exit_ndims_only: *ndims = _ndims; }