static int bgq_nodenum(void) { int hostnum; Personality_t personality; Kernel_GetPersonality(&personality, sizeof(personality)); /* Each MPI rank has a unique coordinate in a 6-dimensional space (A,B,C,D,E,T), with dimensions A-E corresponding to different physical nodes, and T within each node. Each node has sixteen physical cores, each of which can have up to four hardware threads, so 0 <= T <= 63 (but the maximum value of T depends on the confituration of ranks and OpenMP threads per node). However, T is irrelevant for computing a suitable return value for gmx_hostname_num(). */ hostnum = personality.Network_Config.Acoord; hostnum *= personality.Network_Config.Bnodes; hostnum += personality.Network_Config.Bcoord; hostnum *= personality.Network_Config.Cnodes; hostnum += personality.Network_Config.Ccoord; hostnum *= personality.Network_Config.Dnodes; hostnum += personality.Network_Config.Dcoord; hostnum *= personality.Network_Config.Enodes; hostnum += personality.Network_Config.Ecoord; if (debug) { std::fprintf(debug, "Torus ID A: %d / %d B: %d / %d C: %d / %d D: %d / %d E: %d / %d\n" "Node ID T: %d / %d core: %d / %d hardware thread: %d / %d\n", personality.Network_Config.Acoord, personality.Network_Config.Anodes, personality.Network_Config.Bcoord, personality.Network_Config.Bnodes, personality.Network_Config.Ccoord, personality.Network_Config.Cnodes, personality.Network_Config.Dcoord, personality.Network_Config.Dnodes, personality.Network_Config.Ecoord, personality.Network_Config.Enodes, Kernel_ProcessorCoreID(), 16, Kernel_ProcessorID(), 64, Kernel_ProcessorThreadID(), 4); } return hostnum; }
static void init() { int rc = 0; uint32_t fifoid=0; uint32_t subgroup, group; int i; /* If we are the 1st process, set up the rget inj fifo */ if ( Kernel_ProcessorID() == 0 ) { /* Set up an rget injection fifo to be used by all processes on this node. * It is at a well-known location...subgroup 0, fifo 0. * - Allocate storage for an injection fifo * - Allocate and initialize that injection fifo. * - Activate that injection fifo. */ rc = posix_memalign( (void**)&_ififoPtr, 64, _injFifoSize ); assert ( rc == 0 ); /* Set user fifo attribute. */ Kernel_InjFifoAttributes_t injFifoAttrs[1]; injFifoAttrs[0].RemoteGet = 1; injFifoAttrs[0].System = 0; injFifoAttrs[0].Priority = 0; subgroup = 0; rc = Kernel_AllocateInjFifos (subgroup, &_ififo_subgroup, 1, &fifoid, injFifoAttrs); assert ( rc == 0 ); Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _ififoPtr, _injFifoSize ); rc = Kernel_InjFifoInit( &_ififo_subgroup, fifoid, &mregion, (uint64_t)_ififoPtr - (uint64_t)mregion.BaseVa, _injFifoSize-1 ); assert ( rc == 0 ); rc = Kernel_InjFifoActivate ( &_ififo_subgroup, 1, &fifoid, KERNEL_INJ_FIFO_ACTIVATE ); assert ( rc == 0 ); /* Allocate a Base Address Table Entry for all processes on the node to use, * and set its value to zero. */ uint32_t batId = 0; rc = Kernel_AllocateBaseAddressTable( 0, /* subgroup */ &_batSubgroup, 1, &batId, 0 /* "User" access */); assert ( rc == 0 ); MUHWI_BaseAddress_t baseAddress; baseAddress = 0; rc = MUSPI_SetBaseAddress ( &_batSubgroup, batId, baseAddress ); assert ( rc == 0 ); } /* Set up a reception fifo to receive packets. * - Allocate storage for a reception fifo * - Use the subgroup equal to our HW thread ID. * - Allocate and initialize that reception fifo. * - Enable that reception fifo. */ rc = posix_memalign( (void**)&_rfifoPtr, 32, _recFifoSize ); assert ( rc == 0 ); Kernel_RecFifoAttributes_t recFifoAttrs[1]; recFifoAttrs[0].System = 0; subgroup = Kernel_ProcessorID(); group = Kernel_ProcessorCoreID(); rc = Kernel_AllocateRecFifos (subgroup, &_rfifo_subgroup, 1, &fifoid, recFifoAttrs); assert ( rc == 0 ); _rfifoShadowPtr = &_rfifo_subgroup._recfifos[fifoid]; uint64_t recFifoEnableBits; Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _rfifoPtr, _recFifoSize ); rc = Kernel_RecFifoInit( &_rfifo_subgroup, fifoid, &mregion, (uint64_t)_rfifoPtr - (uint64_t)mregion.BaseVa, _recFifoSize-1 ); assert ( rc == 0 ); recFifoEnableBits = ( 0x0000000000000001ULL << ( 15 - ( ( (Kernel_ProcessorThreadID())*BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP) + fifoid ) ) ); rc = Kernel_RecFifoEnable ( group, recFifoEnableBits ); assert ( rc == 0 ); _globalRecFifoId = subgroup * BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP; /* Allocate NUM_BUFS send and recv buffers */ for (i=0; i<NUM_BUFS; i++) { int size = (1<<i)*1024; rc = posix_memalign( (void**)&_sBuff[i], 8, size ); assert ( rc == 0 ); /* Init the buffer */ int j; unsigned char value=i; unsigned char *bufPtr=_sBuff[i]; for (j=0; j<size; j++) { *bufPtr = value++; bufPtr++; } Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _sBuff[i], size ); _sBuffPA[i] = (uint64_t)_sBuff[i] - (uint64_t)mregion.BaseVa + (uint64_t)mregion.BasePa; rc = posix_memalign( (void**)&_rBuff[i], 8, size ); assert ( rc == 0 ); Kernel_CreateMemoryRegion ( &mregion, _rBuff[i], size ); _rBuffPA[i] = (uint64_t)_rBuff[i] - (uint64_t)mregion.BaseVa + (uint64_t)mregion.BasePa; } /* Obtain our node coordinates */ Personality_t personality; Kernel_GetPersonality(&personality, sizeof(personality)); myCoords.Destination.A_Destination = personality.Network_Config.Acoord; myCoords.Destination.B_Destination = personality.Network_Config.Bcoord; myCoords.Destination.C_Destination = personality.Network_Config.Ccoord; myCoords.Destination.D_Destination = personality.Network_Config.Dcoord; myCoords.Destination.E_Destination = personality.Network_Config.Ecoord; /* Build the remote get descriptor model */ { MUSPI_Pt2PtRemoteGetDescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; /* To be set by the agent */ i.Base.Message_Length = sizeof(MUHWI_Descriptor_t); i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_PRIORITY; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_HIGH_PRIORITY; i.Pt2Pt.Skip = 0; i.RemoteGet.Type = MUHWI_PACKET_TYPE_GET; i.RemoteGet.Rget_Inj_FIFO_Id = 0; rc = MUSPI_CreatePt2PtRemoteGetDescriptor( &_rgetDesc, &i ); assert ( rc == 0 ); } /* Build the data descriptor model */ { MUSPI_Pt2PtDirectPutDescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; /* To be set at runtime */ i.Base.Message_Length = 0; /* To be set at runtime */ i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; i.Pt2Pt.Skip = 0; i.DirectPut.Rec_Payload_Base_Address_Id = 0; i.DirectPut.Rec_Payload_Offset = 0; /* To be set at runtime */ i.DirectPut.Rec_Counter_Base_Address_Id = 0; i.DirectPut.Rec_Counter_Offset = 0; /* Not used...agent uses its own */ i.DirectPut.Pacing = MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; rc = MUSPI_CreatePt2PtDirectPutDescriptor( &_dataDesc, &i ); assert ( rc == 0 ); } /* Build the completion descriptor model */ { MUSPI_Pt2PtMemoryFIFODescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; i.Base.Message_Length = 0; i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; i.Pt2Pt.Skip = 0; i.MemFIFO.Rec_FIFO_Id = _globalRecFifoId; i.MemFIFO.Rec_Put_Offset = 0; /* Will contain the message number at runtime */ rc = MUSPI_CreatePt2PtMemoryFIFODescriptor( &_completionDesc, &i ); assert ( rc == 0 ); } /* Initialize request data structures */ memset(_requestStatus,0x00,sizeof(_requestStatus)); /* Wait to ensure this fifo has been allocated on process 0 before proceeding. */ sleep(10); }