//initialize the spi communications void init_spi() { //check not to have initialized if(!spi_inited) { verbosity_lv1_master_printf("Starting spi\n"); //check that we do not have more than one process per node if(Kernel_ProcessCount()!=1) crash("only one process per node implemented"); //mark as initialized spi_inited=true; //get coordinates, size and rank in the 5D grid set_spi_geometry(); //check that all ranks are first neighbours in SPI grid check_all_lattice_neighbours_are_spi_first_neighbours(); //allocate bats spi_bat_id[0]=0; spi_bat_id[1]=1; if(Kernel_AllocateBaseAddressTable(0,&spi_bat_gr,2,spi_bat_id,0)) crash("allocating bat"); ////////////////////////////////// init the fifos /////////////////////////////////// //alloc space for the injection fifos uint32_t fifo_size=64*NSPI_FIFO; for(int ififo=0;ififo<NSPI_FIFO;ififo++) spi_fifo[ififo]=(uint64_t*)memalign(64,fifo_size); //set default attributes for inj fifo Kernel_InjFifoAttributes_t fifo_attrs[NSPI_FIFO]; memset(fifo_attrs,0,NSPI_FIFO*sizeof(Kernel_InjFifoAttributes_t)); //initialize them with default attributes uint32_t fifo_id[NSPI_FIFO]; for(int ififo=0;ififo<NSPI_FIFO;ififo++) fifo_id[ififo]=ififo; if(Kernel_AllocateInjFifos(0,&spi_fifo_sg_ptr,NSPI_FIFO,fifo_id,fifo_attrs)) crash("allocating inj fifos"); //init the MU MMIO for the fifos for(int ififo=0;ififo<NSPI_FIFO;ififo++) { //create the memory region Kernel_MemoryRegion_t mem_region; if(Kernel_CreateMemoryRegion(&mem_region,spi_fifo[NSPI_FIFO-1-ififo],fifo_size)) crash("creating memory region %d of bytes",ififo,fifo_size); //initialize the fifos if(Kernel_InjFifoInit(&spi_fifo_sg_ptr,fifo_id[ififo],&mem_region, (uint64_t)spi_fifo[NSPI_FIFO-1-ififo]-(uint64_t)mem_region.BaseVa,fifo_size-1)) crash("initializing fifo"); } //activate the fifos if(Kernel_InjFifoActivate(&spi_fifo_sg_ptr,NSPI_FIFO,fifo_id,KERNEL_INJ_FIFO_ACTIVATE)) crash("activating fifo"); //check alignment CRASH_IF_NOT_ALIGNED(recv_buf,64); CRASH_IF_NOT_ALIGNED(send_buf,64); //get physical address of receiving buffer Kernel_MemoryRegion_t mem_region; if(Kernel_CreateMemoryRegion(&mem_region,recv_buf,recv_buf_size)) crash("creating recv_buf memory region of %d bytes",recv_buf_size); //set the physical address if(MUSPI_SetBaseAddress(&spi_bat_gr,spi_bat_id[0],(uint64_t)recv_buf- (uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa)) crash("setting base address"); //set receive counter bat to MU style atomic PA addr of the receive counter if((uint64_t)(&spi_recv_counter)&0x7) crash("recv counter not 8 byte aligned"); if(Kernel_CreateMemoryRegion(&mem_region,(void*)&spi_recv_counter,sizeof(uint64_t))) crash("creating memory region of %d bytes",sizeof(uint64_t)); if(MUSPI_SetBaseAddress(&spi_bat_gr,spi_bat_id[1],MUSPI_GetAtomicAddress((uint64_t)&spi_recv_counter-(uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa,MUHWI_ATOMIC_OPCODE_STORE_ADD))) crash("setting base addr"); //reset number of byte to be received spi_recv_counter=0; //get the send buffer physical address if(Kernel_CreateMemoryRegion(&mem_region,send_buf,send_buf_size)) crash("creating memory region of %d bytes",send_buf_size); spi_send_buf_phys_addr=(uint64_t)send_buf-(uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa; //find hints for descriptors set_spi_hints(); #ifdef SPI_BARRIER //init the barrier if(MUSPI_GIBarrierInit(&spi_barrier,0)) crash("initializing the barrier"); #endif verbosity_lv2_master_printf("spi initialized\n"); } }
static void init() { int rc = 0; uint32_t fifoid=0; uint32_t subgroup, group; int i; /* If we are the 1st process, set up the rget inj fifo */ if ( Kernel_ProcessorID() == 0 ) { /* Set up an rget injection fifo to be used by all processes on this node. * It is at a well-known location...subgroup 0, fifo 0. * - Allocate storage for an injection fifo * - Allocate and initialize that injection fifo. * - Activate that injection fifo. */ rc = posix_memalign( (void**)&_ififoPtr, 64, _injFifoSize ); assert ( rc == 0 ); /* Set user fifo attribute. */ Kernel_InjFifoAttributes_t injFifoAttrs[1]; injFifoAttrs[0].RemoteGet = 1; injFifoAttrs[0].System = 0; injFifoAttrs[0].Priority = 0; subgroup = 0; rc = Kernel_AllocateInjFifos (subgroup, &_ififo_subgroup, 1, &fifoid, injFifoAttrs); assert ( rc == 0 ); Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _ififoPtr, _injFifoSize ); rc = Kernel_InjFifoInit( &_ififo_subgroup, fifoid, &mregion, (uint64_t)_ififoPtr - (uint64_t)mregion.BaseVa, _injFifoSize-1 ); assert ( rc == 0 ); rc = Kernel_InjFifoActivate ( &_ififo_subgroup, 1, &fifoid, KERNEL_INJ_FIFO_ACTIVATE ); assert ( rc == 0 ); /* Allocate a Base Address Table Entry for all processes on the node to use, * and set its value to zero. */ uint32_t batId = 0; rc = Kernel_AllocateBaseAddressTable( 0, /* subgroup */ &_batSubgroup, 1, &batId, 0 /* "User" access */); assert ( rc == 0 ); MUHWI_BaseAddress_t baseAddress; baseAddress = 0; rc = MUSPI_SetBaseAddress ( &_batSubgroup, batId, baseAddress ); assert ( rc == 0 ); } /* Set up a reception fifo to receive packets. * - Allocate storage for a reception fifo * - Use the subgroup equal to our HW thread ID. * - Allocate and initialize that reception fifo. * - Enable that reception fifo. */ rc = posix_memalign( (void**)&_rfifoPtr, 32, _recFifoSize ); assert ( rc == 0 ); Kernel_RecFifoAttributes_t recFifoAttrs[1]; recFifoAttrs[0].System = 0; subgroup = Kernel_ProcessorID(); group = Kernel_ProcessorCoreID(); rc = Kernel_AllocateRecFifos (subgroup, &_rfifo_subgroup, 1, &fifoid, recFifoAttrs); assert ( rc == 0 ); _rfifoShadowPtr = &_rfifo_subgroup._recfifos[fifoid]; uint64_t recFifoEnableBits; Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _rfifoPtr, _recFifoSize ); rc = Kernel_RecFifoInit( &_rfifo_subgroup, fifoid, &mregion, (uint64_t)_rfifoPtr - (uint64_t)mregion.BaseVa, _recFifoSize-1 ); assert ( rc == 0 ); recFifoEnableBits = ( 0x0000000000000001ULL << ( 15 - ( ( (Kernel_ProcessorThreadID())*BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP) + fifoid ) ) ); rc = Kernel_RecFifoEnable ( group, recFifoEnableBits ); assert ( rc == 0 ); _globalRecFifoId = subgroup * BGQ_MU_NUM_REC_FIFOS_PER_SUBGROUP; /* Allocate NUM_BUFS send and recv buffers */ for (i=0; i<NUM_BUFS; i++) { int size = (1<<i)*1024; rc = posix_memalign( (void**)&_sBuff[i], 8, size ); assert ( rc == 0 ); /* Init the buffer */ int j; unsigned char value=i; unsigned char *bufPtr=_sBuff[i]; for (j=0; j<size; j++) { *bufPtr = value++; bufPtr++; } Kernel_MemoryRegion_t mregion; Kernel_CreateMemoryRegion ( &mregion, _sBuff[i], size ); _sBuffPA[i] = (uint64_t)_sBuff[i] - (uint64_t)mregion.BaseVa + (uint64_t)mregion.BasePa; rc = posix_memalign( (void**)&_rBuff[i], 8, size ); assert ( rc == 0 ); Kernel_CreateMemoryRegion ( &mregion, _rBuff[i], size ); _rBuffPA[i] = (uint64_t)_rBuff[i] - (uint64_t)mregion.BaseVa + (uint64_t)mregion.BasePa; } /* Obtain our node coordinates */ Personality_t personality; Kernel_GetPersonality(&personality, sizeof(personality)); myCoords.Destination.A_Destination = personality.Network_Config.Acoord; myCoords.Destination.B_Destination = personality.Network_Config.Bcoord; myCoords.Destination.C_Destination = personality.Network_Config.Ccoord; myCoords.Destination.D_Destination = personality.Network_Config.Dcoord; myCoords.Destination.E_Destination = personality.Network_Config.Ecoord; /* Build the remote get descriptor model */ { MUSPI_Pt2PtRemoteGetDescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; /* To be set by the agent */ i.Base.Message_Length = sizeof(MUHWI_Descriptor_t); i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_PRIORITY; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_HIGH_PRIORITY; i.Pt2Pt.Skip = 0; i.RemoteGet.Type = MUHWI_PACKET_TYPE_GET; i.RemoteGet.Rget_Inj_FIFO_Id = 0; rc = MUSPI_CreatePt2PtRemoteGetDescriptor( &_rgetDesc, &i ); assert ( rc == 0 ); } /* Build the data descriptor model */ { MUSPI_Pt2PtDirectPutDescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; /* To be set at runtime */ i.Base.Message_Length = 0; /* To be set at runtime */ i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; i.Pt2Pt.Skip = 0; i.DirectPut.Rec_Payload_Base_Address_Id = 0; i.DirectPut.Rec_Payload_Offset = 0; /* To be set at runtime */ i.DirectPut.Rec_Counter_Base_Address_Id = 0; i.DirectPut.Rec_Counter_Offset = 0; /* Not used...agent uses its own */ i.DirectPut.Pacing = MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; rc = MUSPI_CreatePt2PtDirectPutDescriptor( &_dataDesc, &i ); assert ( rc == 0 ); } /* Build the completion descriptor model */ { MUSPI_Pt2PtMemoryFIFODescriptorInfo_t i; memset(&i, 0x00, sizeof(i)); i.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; i.Base.Payload_Address = 0; i.Base.Message_Length = 0; i.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_LOCAL0; i.Base.Dest.Destination.Destination = myCoords.Destination.Destination; i.Pt2Pt.Hints_ABCD = 0; i.Pt2Pt.Misc1 = MUHWI_PACKET_USE_DETERMINISTIC_ROUTING; i.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; i.Pt2Pt.Skip = 0; i.MemFIFO.Rec_FIFO_Id = _globalRecFifoId; i.MemFIFO.Rec_Put_Offset = 0; /* Will contain the message number at runtime */ rc = MUSPI_CreatePt2PtMemoryFIFODescriptor( &_completionDesc, &i ); assert ( rc == 0 ); } /* Initialize request data structures */ memset(_requestStatus,0x00,sizeof(_requestStatus)); /* Wait to ensure this fifo has been allocated on process 0 before proceeding. */ sleep(10); }
int test_main ( void ) { if (PhysicalThreadIndex() > 0) // run a single core test. test_exit(0); int rc=0; printf("Torus Remote Get Atomic Test\n"); // Perform initialization of the network and mu Personality_t *pers; pers = fwext_getPersonality(); uint64_t p1 = pers->Kernel_Config.NodeConfig & PERS_ENABLE_Mambo; if (p1) is_mambo = 1; // ND and MU init is done in firmware, but we disable it in svchost and // call it directly here because it performs much better. // #if 0 fw_nd_set_verbose(0); // if 1, prints all dcr commands, don't use on cycle sim // on cycle sim, can have DcrMonitory trace DCR commands rc = fw_nd_reset_release(pers); if(rc) { TRACE(("fw_nd_reset_release failed with rc=%d\n",rc)); test_exit (rc); } fw_mu_set_verbose(0); // if 1, prints all dcr commands, don't use on cycle sim // on cycle sim, can have DcrMonitory trace DCR commands rc = fw_mu_reset_release(pers); if(rc) { printf("fw_mu_reset_release failed with rc=%d\n",rc); test_exit (rc); } // #endif // if 0 uint64_t max_value = ~0; fw_mu_set_sys_range(0, /* range_id */ 0, /* min_value */ max_value); fw_mu_set_usr_range(0, /* range_id */ 0, /* min_value */ max_value); /* fw_mu_set_imfifo_rget (1, 1); */ /* fw_mu_set_imfifo_system (1, 0); */ TRACE(("Network and MU Initialization is complete\n")); #else int main(int argc, char **argv) { int rc; #endif uint i = 0; // Destination for Remote Get packet MUHWI_Destination_t dest; MUSPI_SetUpDestination ( &dest, 0, 0, 0, 0, 0 ); MUSPI_InjFifoSubGroup_t fifo_subgroup; uint64 message_size_in_bytes_remote_get = MESSAGE_SIZE_REMOTE_GET; uint64 message_size_in_bytes_direct_put = MESSAGE_SIZE_DIRECT_PUT; TRACE(("main(): Injection Memory FIFO (0,0,0), Send Remote Get Message with Atomic Increment\n")); //#ifdef PRINT_DEBUG_MESSAGES printf("Start!\n"); //#endif // ------------------------------------------------------ // allocates area for message_sent_remote_get[] buffer (RemoteGet) // ------------------------------------------------------ uint64 *message_sent_remote_get = (uint64 *)malloc(message_size_in_bytes_remote_get); uint64 *message_sent_direct_put = (uint64 *)malloc(message_size_in_bytes_direct_put); TRACE(("message_sent_remote_get (address) = %p\n", message_sent_remote_get)); TRACE(("message_size_in_bytes_remote_get = %lld\n", message_size_in_bytes_remote_get)); TRACE(("message_sent_direct_put (address) = %p\n", message_sent_direct_put)); TRACE(("message_size_in_bytes_direct_put = %lld\n", message_size_in_bytes_direct_put)); // Initializes the message_sent_remote_get[] buffer for (i=0; i<message_size_in_bytes_remote_get/8; i++) message_sent_remote_get[i] = 0x00ull; // 8-bytes Kernel_MemoryRegion_t mregionSentRemoteGet; rc = Kernel_CreateMemoryRegion ( &mregionSentRemoteGet, message_sent_remote_get, message_size_in_bytes_remote_get ); if ( rc != 0) { printf("Kernel_CreateMemoryRegion failed for message_sent_remote_get with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } // Initializes the message_sent[] buffer *message_sent_direct_put = (uint64)ATOMIC_COUNTER_INITIAL_VALUE; // 8-bytes uint64_t expected_counter_value = ATOMIC_COUNTER_INITIAL_VALUE + RECEIVE_BUFFER_INITIAL_VALUE; Kernel_MemoryRegion_t mregionSentDirectPut; rc = Kernel_CreateMemoryRegion ( &mregionSentDirectPut, message_sent_direct_put, message_size_in_bytes_direct_put ); if ( rc != 0) { printf("Kernel_CreateMemoryRegion failed for message_sent_direct_put with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } // Get an atomic address for the message_sent buffer. uint64_t message_sent_atomic_address = MUSPI_GetAtomicAddress ( (uint64_t)message_sent_direct_put - (uint64_t)mregionSentDirectPut.BaseVa + (uint64_t)mregionSentDirectPut.BasePa, MUHWI_ATOMIC_OPCODE_LOAD_INCREMENT ); TRACE(("message_sent_direct_put (atomic address) = 0x%llx\n", (long long unsigned int)message_sent_atomic_address)); ///////////////////////////////////////////////// typedef struct recvArea { volatile uint64 counter; unsigned char recvBuffer[MESSAGE_SIZE_DIRECT_PUT]; } recvArea_t; // Allocate space for the reception counter and the receive buffer recvArea_t *recvAreaPtr = (recvArea_t*)malloc ( sizeof(recvArea_t) ); if ( !recvAreaPtr ) { printf("Allocating recvArea failed\n"); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } volatile uint64 *counterAddress = (volatile uint64*)&(recvAreaPtr->counter); unsigned char *recvBufferAddress = (unsigned char *)&(recvAreaPtr->recvBuffer[0]); *((uint64*)recvBufferAddress) = RECEIVE_BUFFER_INITIAL_VALUE; // Get a memory region for the recvArea. Kernel_MemoryRegion_t recvAreaMemRegion; rc = Kernel_CreateMemoryRegion ( &recvAreaMemRegion, recvAreaPtr, sizeof(recvArea_t) ); if ( rc != 0) { printf("Kernel_CreateMemoryRegion failed for recvAreaMemRegion with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } // Calculate the offsets of the counter and receive buffer from the base address. uint64_t recvAreaBasePA = (uint64_t)recvAreaMemRegion.BasePa; uint64_t counterOffset = (uint64_t)counterAddress - (uint64_t)recvAreaMemRegion.BaseVa; uint64_t recvBufferOffset = (uint64_t)recvBufferAddress - (uint64_t)recvAreaMemRegion.BaseVa; TRACE(("counterAddress=%p, recvBufferAddress=%p, recvAreaBasePA=0x%llx, counterOffset=0x%llx, recvBufferOffset=0x%llx\n",counterAddress, recvBufferAddress, (long long unsigned int)recvAreaBasePA, (long long unsigned int)counterOffset, (long long unsigned int)recvBufferOffset)); ////////////////////////////////////////////////////////////// // Initialize base address table and atomic counter info ////////////////////////////////////////////////////////////// /* Set up the base address table */ uint32_t batids[1] = {0}; MUSPI_BaseAddressTableSubGroup_t bat; rc = Kernel_AllocateBaseAddressTable ( 0, &bat, 1, batids, 0 /* "User" use */ ); if (rc != 0) { printf("Kernel_AllocateBaseAddressTable failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } rc = MUSPI_SetBaseAddress ( &bat, 0, (uint64_t)recvAreaMemRegion.BasePa ); if (rc != 0) { printf("MUSPI_SetBaseAddress failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } TRACE(("Set BaseAddressTable entry slot 0 to 0x%llx\n", (long long unsigned int)recvAreaMemRegion.BasePa)); uint64_t muAtomicCounterOffset = MUSPI_GetAtomicOffsetFromBaseAddress ( &bat, 0, recvAreaBasePA + counterOffset, MUHWI_ATOMIC_OPCODE_STORE_ADD ); uint64_t muAtomicRecvBufferOffset = MUSPI_GetAtomicOffsetFromBaseAddress ( &bat, 0, recvAreaBasePA + recvBufferOffset, MUHWI_ATOMIC_OPCODE_STORE_ADD ); TRACE(("main(): recvCounterVa=%p, recvAreaBasePA=0x%llx, muAtomicCounterOffset=0x%llx, muAtomicRecvBufferOffset=0x%llx\n", &(recvAreaPtr->counter), (long long unsigned int)recvAreaBasePA, (long long unsigned int)muAtomicCounterOffset, (long long unsigned int)muAtomicRecvBufferOffset)); ////////////////////////////////////////////////////////////// // Create a DirectPut Descriptor and copy it into the // message payload ////////////////////////////////////////////////////////////// TRACE(("main(): Configures direct put descriptor\n")); MUSPI_Pt2PtDirectPutDescriptorInfo_t mu_iDirectPutDescriptorInfo; mu_iDirectPutDescriptorInfo.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; mu_iDirectPutDescriptorInfo.Base.Payload_Address = message_sent_atomic_address; mu_iDirectPutDescriptorInfo.Base.Message_Length = message_size_in_bytes_direct_put; mu_iDirectPutDescriptorInfo.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP; mu_iDirectPutDescriptorInfo.Base.Dest = dest; mu_iDirectPutDescriptorInfo.Pt2Pt.Hints_ABCD = MUHWI_PACKET_HINT_AP; mu_iDirectPutDescriptorInfo.Pt2Pt.Misc1 = MUHWI_PACKET_HINT_E_NONE | MUHWI_PACKET_DO_NOT_ROUTE_TO_IO_NODE | MUHWI_PACKET_USE_DETERMINISTIC_ROUTING | MUHWI_PACKET_DO_NOT_DEPOSIT; mu_iDirectPutDescriptorInfo.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; mu_iDirectPutDescriptorInfo.Pt2Pt.Skip = 0; mu_iDirectPutDescriptorInfo.DirectPut.Rec_Payload_Base_Address_Id = 0; mu_iDirectPutDescriptorInfo.DirectPut.Rec_Payload_Offset = muAtomicRecvBufferOffset; mu_iDirectPutDescriptorInfo.DirectPut.Rec_Counter_Base_Address_Id = 0; mu_iDirectPutDescriptorInfo.DirectPut.Rec_Counter_Offset = muAtomicCounterOffset; mu_iDirectPutDescriptorInfo.DirectPut.Pacing = MUHWI_PACKET_DIRECT_PUT_IS_NOT_PACED; rc = MUSPI_CreatePt2PtDirectPutDescriptor( &mu_iDirectPutDescriptor, &mu_iDirectPutDescriptorInfo ); if (rc != 0) { printf("MUSPI_CreatePt2PtDirectPutDescriptor failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } //MUSPI_DescriptorDumpHex("Direct Put Descriptor", // &mu_iDirectPutDescriptor); // Copy Descriptor into RemoteGet message payload memcpy((char *)((void *)message_sent_remote_get), (char *)((void *)(&mu_iDirectPutDescriptor)), message_size_in_bytes_remote_get); ///////////////////////////////////////////////////////////// // RemoteGet message // Create a remote get descriptor ///////////////////////////////////////////////////////////// TRACE(("main(): Configures remote get descriptor\n")); MUSPI_Pt2PtRemoteGetDescriptorInfo_t mu_iRemoteGetDescriptorInfo; mu_iRemoteGetDescriptorInfo.Base.Pre_Fetch_Only = MUHWI_DESCRIPTOR_PRE_FETCH_ONLY_NO; mu_iRemoteGetDescriptorInfo.Base.Payload_Address = (uint64_t)message_sent_remote_get - (uint64_t)mregionSentRemoteGet.BaseVa + (uint64_t)mregionSentRemoteGet.BasePa; mu_iRemoteGetDescriptorInfo.Base.Message_Length = message_size_in_bytes_remote_get; mu_iRemoteGetDescriptorInfo.Base.Torus_FIFO_Map = MUHWI_DESCRIPTOR_TORUS_FIFO_MAP_AP; mu_iRemoteGetDescriptorInfo.Base.Dest = dest; mu_iRemoteGetDescriptorInfo.Pt2Pt.Hints_ABCD = MUHWI_PACKET_HINT_AP; mu_iRemoteGetDescriptorInfo.Pt2Pt.Misc1 = MUHWI_PACKET_HINT_E_NONE | MUHWI_PACKET_DO_NOT_ROUTE_TO_IO_NODE | MUHWI_PACKET_USE_DETERMINISTIC_ROUTING | MUHWI_PACKET_DO_NOT_DEPOSIT; mu_iRemoteGetDescriptorInfo.Pt2Pt.Misc2 = MUHWI_PACKET_VIRTUAL_CHANNEL_DETERMINISTIC; mu_iRemoteGetDescriptorInfo.Pt2Pt.Skip = 0; mu_iRemoteGetDescriptorInfo.RemoteGet.Type = MUHWI_PACKET_TYPE_GET; mu_iRemoteGetDescriptorInfo.RemoteGet.Rget_Inj_FIFO_Id = 1; // Fifo 1 is for remote get use // Prepares Injection Memory FIFO Descriptor (RemoteGet) rc = MUSPI_CreatePt2PtRemoteGetDescriptor( &mu_iRemoteGetDescriptor, &mu_iRemoteGetDescriptorInfo ); if (rc != 0) { printf("MUSPI_CreatePt2PtRemoteGetDescriptor failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } // MUSPI_DescriptorDumpHex("Remote Get Descriptor", // &mu_iRemoteGetDescriptor); ///////////////////////////////////////////////////////////////// // Configures Injection Memory FIFO Registers // - fifo 0 that the core injects descriptors into // - fifo 1 that the MU injects remote get payload into ///////////////////////////////////////////////////////////////// TRACE(("main(): Configures Injection Memory FIFO Registers\n")); void *injMemoryFifoPtr, *memoryForInjMemoryFifoPtr; rc = malloc_memalign ( &memoryForInjMemoryFifoPtr, &injMemoryFifoPtr, 64, INJ_MEMORY_FIFO_SIZE+1 ); if (rc) { printf("inj_memory_fifo malloc failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } void *rgetMemoryFifoPtr, *memoryForRgetMemoryFifoPtr; rc = malloc_memalign ( &memoryForRgetMemoryFifoPtr, &rgetMemoryFifoPtr, 64, INJ_MEMORY_FIFO_SIZE+1 ); if (rc) { printf("rget_memory_fifo malloc failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } uint32_t fifoid[2] = { 0, 1 }; Kernel_InjFifoAttributes_t injFifoAttrs[2]; injFifoAttrs[0].RemoteGet = 0; injFifoAttrs[0].System = 0; injFifoAttrs[1].RemoteGet = 1; injFifoAttrs[1].System = 0; rc = Kernel_AllocateInjFifos (0, &fifo_subgroup, 2, fifoid, injFifoAttrs); if ( rc != 0) { printf("Kernel_AllocateInjFifos failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } Kernel_MemoryRegion_t mregionInj; rc = Kernel_CreateMemoryRegion ( &mregionInj, injMemoryFifoPtr, INJ_MEMORY_FIFO_SIZE + 1 ); if ( rc != 0) { printf("Kernel_CreateMemoryRegion failed for injMemoryFifoPtr with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } Kernel_MemoryRegion_t mregionRget; rc = Kernel_CreateMemoryRegion ( &mregionRget, rgetMemoryFifoPtr, INJ_MEMORY_FIFO_SIZE + 1 ); if ( rc != 0) { printf("Kernel_CreateMemoryRegion failed for rgetMemoryFifoPtr with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } rc = Kernel_InjFifoInit (&fifo_subgroup, fifoid[0], &mregionInj, (uint64_t)injMemoryFifoPtr - (uint64_t)mregionInj.BaseVa, INJ_MEMORY_FIFO_SIZE); if (rc != 0) { printf("Kernel_InjFifoInit Inj failed with rc=%d, errno=%d\n",rc,errno); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } rc = Kernel_InjFifoInit (&fifo_subgroup, fifoid[1], &mregionRget, (uint64_t)rgetMemoryFifoPtr - (uint64_t)mregionRget.BaseVa, INJ_MEMORY_FIFO_SIZE); if (rc != 0) { printf("Kernel_InjFifoInit Rget failed with rc=%d, errno=%d\n",rc,errno); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } rc = Kernel_InjFifoActivate (&fifo_subgroup, 2, fifoid, KERNEL_INJ_FIFO_ACTIVATE); if (rc != 0) { printf("Kernel_InjFifoActivate Inj failed with rc=%d, errno=%d\n",rc,errno); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } // --------------------------------------------- // Reception Side // --------------------------------------------- /* *data_counter_base_address = REC_PAYLOAD_BASE_ADDRESS; */ /* printf("data_counter_base_address = %p\n", data_counter_base_address); */ // Loop, sending the remote get, waiting for the reception counter to hit zero, // and verifying the received counter's value. // for (i=0; i<num_iterations; i++) /** disable loop **/ { // Let's initialize the Counter for corresponding Counter Id // Note: counter is initialized with the message size // updates counter with number of bytes sent *counterAddress = MESSAGE_SIZE_DIRECT_PUT; // ----------------------------------------------------------- // Processor Advances Tail pointer - Descriptor is 64-bytes // MU should Inject (RemoteGet) message into the Torus // ----------------------------------------------------------- // Let's Inject the (RemoteGet) Descriptor into the Injection Memory FIFO #if 1 printf("main(): Inject Descriptor into Injection Memory FIFO\n"); #endif rc = MUSPI_InjFifoInject (MUSPI_IdToInjFifo(fifoid[0], &fifo_subgroup), (void *)(&mu_iRemoteGetDescriptor) ); if (rc < 0) // Should have injected 1 descriptor { printf("MUSPI_InjFifoInject failed with rc=%d\n",rc); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } #ifndef __FWEXT__ printf("main(): Successful injection of remote get descriptor\n"); #endif // ////////////////////////////////////////////////// // Reception side, check counter value // ////////////////////////////////////////////////// uint64 volatile counter_value; // wait for the counter to reach ZERO while (1) { counter_value = *counterAddress; if (counter_value == 0) { // #if 1 printf("counter is now ZERO !!!!\n"); #endif break; } } _bgq_msync(); // Ensure data is available to all cores. // Let's print the Received Message contents //put_offset = (uint64)mu_pktHdrDirectPut.Put_Offset_LSB; #ifndef __FWEXT__ printf("recvBufferAddress = %p\n", recvBufferAddress); printf("---Prints Received Message contents\n"); Print_Message((unsigned char *)recvBufferAddress, message_size_in_bytes_direct_put); printf("---Where Received Message is being stored: recvBufferAddress = %p\n", recvBufferAddress); printf("---Checks Received Message contents(size = %lld)\n", message_size_in_bytes_direct_put); #endif uint64_t receivedCounterValue = *((uint64_t*)recvBufferAddress); if ( receivedCounterValue == expected_counter_value ) { printf("---Received Counter Value = %llu\n", (long long unsigned int)receivedCounterValue); } else { printf("ERROR: Received Counter Value = %llu, expected %llu\n", (long long unsigned int)receivedCounterValue, (long long unsigned int)expected_counter_value); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } if ( *message_sent_direct_put == ATOMIC_COUNTER_INITIAL_VALUE+1 ) { printf("---Sent Counter Value = %llu\n", (long long unsigned int)*message_sent_direct_put); } else { printf("ERROR: Sent Counter Value = %llu, expected %llu\n", (long long unsigned int)*message_sent_direct_put, (long long unsigned int)(ATOMIC_COUNTER_INITIAL_VALUE+1)); #ifdef __FWEXT__ test_exit(1); #else exit(1); #endif } } //printf("All counter values passed\n"); #ifdef __FWEXT__ if ( is_mambo == 0 ) // Termination checks don't work in mambo. ErrInt DCRs are not zero. { rc = fw_nd_term_check(pers); if (rc) { printf("ERROR: fw_nd_term_check failed with rc=%d\n",rc); test_exit(1); } rc = fw_mu_term_check(pers); if (rc) { printf("ERROR: fw_mu_term_check failed with rc=%d\n",rc); test_exit(1); } } #endif printf("Done!\n"); #ifdef __FWEXT__ test_exit (0); #endif return 0; }
int msg_InjFifoInit ( msg_InjFifoHandle_t *injFifoHandlePtr, uint32_t startingSubgroupId, uint32_t startingFifoId, uint32_t numFifos, size_t fifoSize, Kernel_InjFifoAttributes_t *injFifoAttrs ) { void *buffer = NULL; uint32_t endingFifoId; // Relative to a subgroup uint32_t numFifosInSubgroup; int rc; uint32_t subgroupId = startingSubgroupId; uint32_t fifoIds[BGQ_MU_NUM_INJ_FIFOS_PER_SUBGROUP]; Kernel_InjFifoAttributes_t attrs[BGQ_MU_NUM_INJ_FIFOS_PER_SUBGROUP]; Kernel_InjFifoAttributes_t defaultAttrs; uint64_t lock_cache; memset ( &defaultAttrs, 0x00, sizeof(defaultAttrs) ); if(injFifoAttrs == NULL) { injFifoAttrs = &defaultAttrs; } // Malloc space for the info structure msg_InjFifoInfo_t *info; info = (msg_InjFifoInfo_t *) memalign(32, sizeof(msg_InjFifoInfo_t)); if( !info ) return -1; // Initialize the info structure info->startingSubgroupId = startingSubgroupId; info->startingFifoId = startingFifoId; info->numFifos = numFifos; info->numSubgroups = 0; // Malloc space for the injection fifos. They are 64-byte aligned. for (unsigned int i = 0; i < numFifos; i++) { info->fifoPtr[i] = (uint64_t*)memalign(64, fifoSize); if ( !info->fifoPtr[i] ) return -1; } // Process one subgroup at a time. // - Allocate the fifos. // - Init the MU MMIO for the fifos. // - Activate the fifos. while ( numFifos > 0 ) { info->numSubgroups++; // startingFifoId is the starting fifo number relative to the // subgroup we are working on. // Determine endingFifoId, the ending fifo number relative to // the subgroup we are working on. endingFifoId = startingFifoId + numFifos-1; if ( endingFifoId > (BGQ_MU_NUM_INJ_FIFOS_PER_SUBGROUP-1) ) { endingFifoId = BGQ_MU_NUM_INJ_FIFOS_PER_SUBGROUP-1; } numFifosInSubgroup = endingFifoId - startingFifoId + 1; info->numFifosInSubgroup[subgroupId] = numFifosInSubgroup; // Init structures for allocating the fifos... // - fifo Ids // - attributes for (unsigned int i = 0; i < numFifosInSubgroup; i++) { fifoIds[i] = startingFifoId + i; memcpy(&attrs[i], injFifoAttrs, sizeof(attrs[i])); } // Allocate the fifos rc = Kernel_AllocateInjFifos (subgroupId, &info->subgroup[subgroupId], numFifosInSubgroup, fifoIds, attrs); if ( rc ) { printf("msg_InjFifoInit: Kernel_AllocateInjFifos failed with rc=%d\n",rc); return rc; } // Init the MU MMIO for the fifos. for (unsigned int i = 0; i < numFifosInSubgroup; i++) { Kernel_MemoryRegion_t memRegion; rc = Kernel_CreateMemoryRegion ( &memRegion, info->fifoPtr[numFifos-i-1], fifoSize ); if ( rc ) { printf("msg_InjFifoInit: Kernel_CreateMemoryRegion failed with rc=%d\n",rc); return rc; } // initialise the Fifos rc = Kernel_InjFifoInit (&info->subgroup[subgroupId], fifoIds[i], &memRegion, (uint64_t)info->fifoPtr[numFifos-i-1] - (uint64_t)memRegion.BaseVa, fifoSize-1); if ( rc ) { printf("msg_InjFifoInit: Kernel_InjFifoInit failed with rc=%d\n",rc); return rc; } } // Activate the fifos. rc = Kernel_InjFifoActivate (&info->subgroup[subgroupId], numFifosInSubgroup, fifoIds, KERNEL_INJ_FIFO_ACTIVATE); if ( rc ) { printf("msg_InjFifoInit: Kernel_InjFifoActivate failed with rc=%d\n",rc); return rc; } startingFifoId = 0; // Next subgroup will start at fifo 0. subgroupId++; // Next subgroup. numFifos -= numFifosInSubgroup; } injFifoHandlePtr->pOpaqueObject = (void *)info; return 0; }