int fi_bgq_progress_disable (struct fi_bgq_domain *bgq_domain, const unsigned id) { assert(id < (64/Kernel_ProcessCount()-1)); assert(id < bgq_domain->progress.max_threads); fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); if (0 == bgq_domain->progress.thread[id].enabled) { assert(0 == bgq_domain->progress.thread[id].active); return 0; } bgq_domain->progress.thread[id].enabled = 0; fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); /* Wait until the progress thread is active */ while (bgq_domain->progress.thread[id].active) { fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); } int rc __attribute__ ((unused)); void *retval = NULL; rc = pthread_join(bgq_domain->progress.thread[id].pthread, &retval); assert(0 == rc); bgq_domain->progress.thread[id].pthread = 0; l2atomic_fifo_disable(&bgq_domain->progress.thread[id].consumer, &bgq_domain->progress.thread[id].producer); --(bgq_domain->progress.num_threads_active); return 0; }
/* number of CPUs */ int vt_pform_num_cpus() { #ifdef BGP_GROUP_ON_NODEBOARD return 32 * Kernel_ProcessCount(); #else return 1; #endif }
int fi_bgq_progress_enable (struct fi_bgq_domain *bgq_domain, const unsigned id) { assert(id < (64/Kernel_ProcessCount()-1)); assert(id < bgq_domain->progress.max_threads); fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); if (bgq_domain->progress.thread[id].enabled) { assert(bgq_domain->progress.thread[id].active); return 0; } bgq_domain->progress.thread[id].enabled = 1; bgq_domain->progress.thread[id].active = 0; fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); int rc = 0; rc = pthread_create(&bgq_domain->progress.thread[id].pthread, NULL, progress_fn, (void *)&bgq_domain->progress.thread[id]); if (rc) { /* Error starting this progress thread */ bgq_domain->progress.thread[id].enabled = 0; bgq_domain->progress.thread[id].active = 0; return -1; } /* Wait until the progress thread is active */ while (0 == bgq_domain->progress.thread[id].active) { fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO); } ++(bgq_domain->progress.num_threads_active); fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO); return 0; }
int fi_bgq_progress_init (struct fi_bgq_domain *bgq_domain, const uint64_t max_threads) { assert(max_threads < (64/Kernel_ProcessCount())); bgq_domain->progress.max_threads = max_threads; bgq_domain->progress.num_threads_active = 0; bgq_domain->progress.memptr = NULL; if (0 == max_threads) return 0; size_t i, j; const size_t bytes = sizeof(union fi_bgq_progress_data) * max_threads; const size_t alignment = 128; void * memptr = malloc(bytes+alignment); uint32_t cnk_rc = 0; cnk_rc = Kernel_L2AtomicsAllocate(memptr, bytes+alignment); assert(0==cnk_rc); if (cnk_rc != 0) { /* Error allocating l2atomic memory */ free(memptr); bgq_domain->progress.memptr = NULL; return -1; } union fi_bgq_progress_data *data = (union fi_bgq_progress_data *)(((uintptr_t)memptr + alignment) & (~(alignment-1))); const uint64_t npackets = sizeof(data[0].data) / sizeof(uint64_t); const size_t nep = sizeof(bgq_domain->progress.thread[0].tag_ep) / sizeof(struct fi_bgq_ep *); for (i=0; i<max_threads; ++i) { l2atomic_fifo_initialize(&bgq_domain->progress.thread[i].consumer, &bgq_domain->progress.thread[i].producer, &data[i].l2atomic, npackets); bgq_domain->progress.thread[i].tag_ep_count = 0; bgq_domain->progress.thread[i].msg_ep_count = 0; bgq_domain->progress.thread[i].all_ep_count = 0; bgq_domain->progress.thread[i].pthread = 0; bgq_domain->progress.thread[i].enabled = 0; bgq_domain->progress.thread[i].active = 0; fi_bgq_ref_inc(&bgq_domain->ref_cnt, "domain"); bgq_domain->progress.thread[i].bgq_domain = bgq_domain; for (j=0; j<nep; ++j) { bgq_domain->progress.thread[i].tag_ep[j] = NULL; bgq_domain->progress.thread[i].msg_ep[j] = NULL; bgq_domain->progress.thread[i].all_ep[j] = NULL; } } bgq_domain->progress.memptr = memptr; return 0; }
int PMI_Init (int *spawned) { if (!spawned) return PMI_ERR_INVALID_ARG; *spawned = PMI_FALSE; if (pmi_rank != INT_MAX) return PMI_FAIL; if (pmi_size != INT_MAX) return PMI_FAIL; Personality_t pers; int rc = 0; rc = Kernel_GetPersonality(&pers, sizeof(pers)); if (rc) return PMI_FAIL; /* calculate the maximum number of ranks from the torus dimensions */ Personality_Networks_t *net = &pers.Network_Config; uint32_t max_ranks = net->Anodes * net->Bnodes * net->Cnodes * net->Dnodes * net->Enodes * Kernel_ProcessCount(); uint64_t numentries = 0; BG_CoordinateMapping_t mapping[max_ranks]; rc = Kernel_RanksToCoords(sizeof(mapping), mapping, &numentries); bgq_node_list = (uint32_t *) malloc(sizeof(uint32_t) * max_ranks); uint32_t tcoord32bitmask = 0xFFFFFFC0; uint32_t origcoord; /* while populating the rank map also determine how many local ranks on my node - pmi_local_size */ BG_CoordinateMapping_t my_bgq_coords; my_bgq_coords.e = net->Ecoord; my_bgq_coords.reserved = mapping[0].reserved; my_bgq_coords.a = net->Acoord; my_bgq_coords.b = net->Bcoord; my_bgq_coords.c = net->Ccoord; my_bgq_coords.d = net->Dcoord; my_bgq_coords.t = 0; memcpy(&my_bgq_rank_node_id,&my_bgq_coords,sizeof(BG_CoordinateMapping_t)); pmi_local_size = 0; int i; for (i=0;i<numentries;i++) { memcpy(&origcoord, &(mapping[i]),sizeof(BG_CoordinateMapping_t)); bgq_node_list[i] = origcoord & tcoord32bitmask; if (my_bgq_rank_node_id == bgq_node_list[i]) pmi_local_size++; } pmi_size = (int) numentries; pmi_rank = (int) Kernel_GetRank(); // the largest possible key value is the number of nodes * the space for a node block def bgq_pmi_value_maxlen = net->Anodes * net->Bnodes * net->Cnodes * net->Dnodes * net->Enodes * BGQ_SINGLE_ENTRY_NODE_BLOCK_MAX_LEN; if (bgq_pmi_value_maxlen < BGQ_PMI_VALUE_MAXLEN) bgq_pmi_value_maxlen = BGQ_PMI_VALUE_MAXLEN; return PMI_SUCCESS; }
/* unique numeric SMP-node identifier */ long vt_pform_node_id() { #ifdef BGP_GROUP_ON_NODEBOARD _BGP_UniversalComponentIdentifier uci; uci.UCI = mybgp.Kernel_Config.UniversalComponentIdentifier; /* use upper part of UCI (upto NodeCard, ignore lower 14bits) * but only use the 13 bits (1FFF) that describe row,col,mp,nc */ return ((uci.UCI>>14)&0x1FFF); #else return ( BGP_Personality_psetNum(&mybgp) * BGP_Personality_psetSize(&mybgp) + BGP_Personality_rankInPset(&mybgp)) * Kernel_ProcessCount() + Kernel_PhysicalProcessorID(); #endif }
BGQTorusManager::BGQTorusManager() { order[0] = 5; order[1] = 4; order[2] = 3; order[3] = 2; order[4] = 1; order[5] = 0; int numPes = CmiNumPesGlobal(); procsPerNode = Kernel_ProcessCount(); thdsPerProc = CmiMyNodeSize(); hw_NT = procsPerNode*thdsPerProc; Personality_t pers; Kernel_GetPersonality(&pers, sizeof(pers)); hw_NA = pers.Network_Config.Anodes; hw_NB = pers.Network_Config.Bnodes; hw_NC = pers.Network_Config.Cnodes; hw_ND = pers.Network_Config.Dnodes; hw_NE = pers.Network_Config.Enodes; unsigned int isFile = 0; Kernel_GetMapping(10, mapping, &isFile); if(!isFile) { for(int i = 0; i < 6 ; i++) { if(mapping[i] != 'T') { order[5 - i] = mapping[i] - 'A'; } else { order[5 - i] = 5; } } } //printf("Mapping %d %d %d %d %d %d\n",order[0],order[1],order[2],order[3],order[4],order[5]); rn_NA = hw_NA; rn_NB = hw_NB; rn_NC = hw_NC; rn_ND = hw_ND; rn_NE = hw_NE; int max_t = 0; if(rn_NA * rn_NB * rn_NC * rn_ND * rn_NE != numPes/hw_NT) { rn_NA = rn_NB = rn_NC = rn_ND =rn_NE =0; int rn_NT=0; int min_a, min_b, min_c, min_d, min_e, min_t; min_a = min_b = min_c = min_d = min_e = min_t = (~(-1)); int tmp_t, tmp_a, tmp_b, tmp_c, tmp_d, tmp_e; uint64_t numentries; BG_CoordinateMapping_t *coord; int nranks=numPes/thdsPerProc; coord = (BG_CoordinateMapping_t *) malloc(sizeof(BG_CoordinateMapping_t)*nranks); Kernel_RanksToCoords(sizeof(BG_CoordinateMapping_t)*nranks, coord, &numentries); for(int c = 0; c < nranks; c++) { tmp_a = coord[c].a; tmp_b = coord[c].b; tmp_c = coord[c].c; tmp_d = coord[c].d; tmp_e = coord[c].e; tmp_t = coord[c].t; if(tmp_a > rn_NA) rn_NA = tmp_a; if(tmp_a < min_a) min_a = tmp_a; if(tmp_b > rn_NB) rn_NB = tmp_b; if(tmp_b < min_b) min_b = tmp_b; if(tmp_c > rn_NC) rn_NC = tmp_c; if(tmp_c < min_c) min_c = tmp_c; if(tmp_d > rn_ND) rn_ND = tmp_d; if(tmp_d < min_d) min_d = tmp_d; if(tmp_e > rn_NE) rn_NE = tmp_e; if(tmp_e < min_e) min_e = tmp_e; if(tmp_t > rn_NT) rn_NT = tmp_t; if(tmp_t < min_t) min_t = tmp_t; } rn_NA = rn_NA - min_a + 1; rn_NB = rn_NB - min_b + 1; rn_NC = rn_NC - min_c + 1; rn_ND = rn_ND - min_d + 1; rn_NE = rn_NE - min_e + 1; procsPerNode = rn_NT - min_t + 1; hw_NT = procsPerNode * thdsPerProc; free(coord); } dimA = rn_NA; dimB = rn_NB; dimC = rn_NC; dimD = rn_ND; dimE = rn_NE; dimA = dimA * hw_NT; // assuming TABCDE dims[0] = rn_NA; dims[1] = rn_NB; dims[2] = rn_NC; dims[3] = rn_ND; dims[4] = rn_NE; dims[5] = hw_NT; torus[0] = ((rn_NA % 4) == 0)? true:false; torus[1] = ((rn_NB % 4) == 0)? true:false; torus[2] = ((rn_NC % 4) == 0)? true:false; torus[3] = ((rn_ND % 4) == 0)? true:false; torus[4] = true; populateLocalNodes(); }
//initialize the spi communications void init_spi() { //check not to have initialized if(!spi_inited) { verbosity_lv1_master_printf("Starting spi\n"); //check that we do not have more than one process per node if(Kernel_ProcessCount()!=1) crash("only one process per node implemented"); //mark as initialized spi_inited=true; //get coordinates, size and rank in the 5D grid set_spi_geometry(); //check that all ranks are first neighbours in SPI grid check_all_lattice_neighbours_are_spi_first_neighbours(); //allocate bats spi_bat_id[0]=0; spi_bat_id[1]=1; if(Kernel_AllocateBaseAddressTable(0,&spi_bat_gr,2,spi_bat_id,0)) crash("allocating bat"); ////////////////////////////////// init the fifos /////////////////////////////////// //alloc space for the injection fifos uint32_t fifo_size=64*NSPI_FIFO; for(int ififo=0;ififo<NSPI_FIFO;ififo++) spi_fifo[ififo]=(uint64_t*)memalign(64,fifo_size); //set default attributes for inj fifo Kernel_InjFifoAttributes_t fifo_attrs[NSPI_FIFO]; memset(fifo_attrs,0,NSPI_FIFO*sizeof(Kernel_InjFifoAttributes_t)); //initialize them with default attributes uint32_t fifo_id[NSPI_FIFO]; for(int ififo=0;ififo<NSPI_FIFO;ififo++) fifo_id[ififo]=ififo; if(Kernel_AllocateInjFifos(0,&spi_fifo_sg_ptr,NSPI_FIFO,fifo_id,fifo_attrs)) crash("allocating inj fifos"); //init the MU MMIO for the fifos for(int ififo=0;ififo<NSPI_FIFO;ififo++) { //create the memory region Kernel_MemoryRegion_t mem_region; if(Kernel_CreateMemoryRegion(&mem_region,spi_fifo[NSPI_FIFO-1-ififo],fifo_size)) crash("creating memory region %d of bytes",ififo,fifo_size); //initialize the fifos if(Kernel_InjFifoInit(&spi_fifo_sg_ptr,fifo_id[ififo],&mem_region, (uint64_t)spi_fifo[NSPI_FIFO-1-ififo]-(uint64_t)mem_region.BaseVa,fifo_size-1)) crash("initializing fifo"); } //activate the fifos if(Kernel_InjFifoActivate(&spi_fifo_sg_ptr,NSPI_FIFO,fifo_id,KERNEL_INJ_FIFO_ACTIVATE)) crash("activating fifo"); //check alignment CRASH_IF_NOT_ALIGNED(recv_buf,64); CRASH_IF_NOT_ALIGNED(send_buf,64); //get physical address of receiving buffer Kernel_MemoryRegion_t mem_region; if(Kernel_CreateMemoryRegion(&mem_region,recv_buf,recv_buf_size)) crash("creating recv_buf memory region of %d bytes",recv_buf_size); //set the physical address if(MUSPI_SetBaseAddress(&spi_bat_gr,spi_bat_id[0],(uint64_t)recv_buf- (uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa)) crash("setting base address"); //set receive counter bat to MU style atomic PA addr of the receive counter if((uint64_t)(&spi_recv_counter)&0x7) crash("recv counter not 8 byte aligned"); if(Kernel_CreateMemoryRegion(&mem_region,(void*)&spi_recv_counter,sizeof(uint64_t))) crash("creating memory region of %d bytes",sizeof(uint64_t)); if(MUSPI_SetBaseAddress(&spi_bat_gr,spi_bat_id[1],MUSPI_GetAtomicAddress((uint64_t)&spi_recv_counter-(uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa,MUHWI_ATOMIC_OPCODE_STORE_ADD))) crash("setting base addr"); //reset number of byte to be received spi_recv_counter=0; //get the send buffer physical address if(Kernel_CreateMemoryRegion(&mem_region,send_buf,send_buf_size)) crash("creating memory region of %d bytes",send_buf_size); spi_send_buf_phys_addr=(uint64_t)send_buf-(uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa; //find hints for descriptors set_spi_hints(); #ifdef SPI_BARRIER //init the barrier if(MUSPI_GIBarrierInit(&spi_barrier,0)) crash("initializing the barrier"); #endif verbosity_lv2_master_printf("spi initialized\n"); } }
int fi_bgq_set_default_info() { struct fi_info *fi, *prev_fi; uint32_t ppn = Kernel_ProcessCount(); /* * See: fi_bgq_stx_init() for the number of mu injection fifos * allocated for each tx context. Each rx context uses one * mu injection fifo and one mu reception fifo. */ const unsigned tx_ctx_cnt = (((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / 3) / ppn; /* * The number of rx contexts on a node is the minimum of: * 1. number of mu injection fifos on the node not used by tx contexts * 2. total number mu reception fifos on the node */ const unsigned rx_ctx_cnt = MIN((((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) - (tx_ctx_cnt * ppn)), ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP)) / ppn; fi = fi_dupinfo(NULL); if (!fi) { errno = FI_ENOMEM; return -errno; } fi_bgq_global.info = fi; *fi->tx_attr = (struct fi_tx_attr) { .caps = FI_RMA | FI_ATOMIC | FI_TRANSMIT_COMPLETE, .mode = FI_ASYNC_IOV, .op_flags = FI_TRANSMIT_COMPLETE, .msg_order = FI_ORDER_SAS | FI_ORDER_WAW | FI_ORDER_RAW | FI_ORDER_RAR, .comp_order = FI_ORDER_NONE, .inject_size = FI_BGQ_INJECT_SIZE, .size = FI_BGQ_TX_SIZE, .iov_limit = SIZE_MAX, .rma_iov_limit = 0 }; *fi->rx_attr = (struct fi_rx_attr) { .caps = FI_RMA | FI_ATOMIC | FI_NAMED_RX_CTX, .mode = FI_ASYNC_IOV, .op_flags = 0, .msg_order = 0, .comp_order = FI_ORDER_NONE, .total_buffered_recv = FI_BGQ_TOTAL_BUFFERED_RECV, .size = FI_BGQ_RX_SIZE, .iov_limit = SIZE_MAX }; *fi->ep_attr = (struct fi_ep_attr) { .type = FI_EP_RDM, .protocol = FI_BGQ_PROTOCOL, .protocol_version = FI_BGQ_PROTOCOL_VERSION, .max_msg_size = FI_BGQ_MAX_MSG_SIZE, .msg_prefix_size = FI_BGQ_MAX_PREFIX_SIZE, .max_order_raw_size = FI_BGQ_MAX_ORDER_RAW_SIZE, .max_order_war_size = FI_BGQ_MAX_ORDER_WAR_SIZE, .max_order_waw_size = FI_BGQ_MAX_ORDER_WAW_SIZE, .mem_tag_format = FI_BGQ_MEM_TAG_FORMAT, .tx_ctx_cnt = tx_ctx_cnt, .rx_ctx_cnt = rx_ctx_cnt, }; *fi->domain_attr = (struct fi_domain_attr) { .domain = NULL, .name = NULL, /* TODO: runtime query for name? */ .threading = FI_THREAD_FID, .control_progress = FI_PROGRESS_MANUAL, .data_progress = FI_PROGRESS_AUTO, // + FI_PROGRESS_MANUAL ? .resource_mgmt = FI_RM_DISABLED, .av_type = FI_AV_MAP, .mr_mode = FI_MR_SCALABLE, .mr_key_size = 2, .cq_data_size = 0, .cq_cnt = 128 / ppn, .ep_cnt = SIZE_MAX, .tx_ctx_cnt = tx_ctx_cnt, .rx_ctx_cnt = rx_ctx_cnt, .max_ep_tx_ctx = ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2, .max_ep_rx_ctx = ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP) / ppn, .max_ep_stx_ctx = ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2, .max_ep_srx_ctx = 0 }; *fi->fabric_attr = (struct fi_fabric_attr) { .fabric = NULL, .name = strdup(FI_BGQ_FABRIC_NAME), .prov_name = strdup(FI_BGQ_PROVIDER_NAME), .prov_version = FI_BGQ_PROVIDER_VERSION }; fi->caps = FI_RMA | FI_ATOMIC | FI_NAMED_RX_CTX | FI_TRANSMIT_COMPLETE; fi->mode = FI_ASYNC_IOV; fi->addr_format = FI_ADDR_BGQ; fi->src_addrlen = 24; // includes null fi->dest_addrlen = 24; // includes null prev_fi = fi; fi = fi_dupinfo(prev_fi); prev_fi->next = fi; return 0; }
int main(int argc, char **argv) { BG_CoordinateMapping_t coord; BG_JobCoords_t job; Personality_t pers; Kernel_GetPersonality(&pers, sizeof(pers)); myRank=Kernel_GetRank(); myCoord=Kernel_MyTcoord(); Kernel_JobCoords(&job); // myCoreID=Kernel_ProcessorCoreID(); // myHWTID=Kernel_ProcessorThreadID(); // myPhysicalID=Kernel_PhysicalProcessorID(); torus_t tcoords = { myA=pers.Network_Config.Acoord, myB=pers.Network_Config.Bcoord, myC=pers.Network_Config.Ccoord, myD=pers.Network_Config.Dcoord, myE=pers.Network_Config.Ecoord }; torus_t tdims = { pers.Network_Config.Anodes, pers.Network_Config.Bnodes, pers.Network_Config.Cnodes, pers.Network_Config.Dnodes, pers.Network_Config.Enodes }; numNodes = tdims.a * tdims.b * tdims.c * tdims.d * tdims.e; unsigned my_com_A=job.shape.a; unsigned my_com_B=job.shape.b; unsigned my_com_C=job.shape.c; unsigned my_com_D=job.shape.d; unsigned my_com_E=job.shape.e; unsigned my_com_Acoord=job.corner.a; unsigned my_com_Bcoord=job.corner.b; unsigned my_com_Ccoord=job.corner.c; unsigned my_com_Dcoord=job.corner.d; unsigned my_com_Ecoord=job.corner.e; if ( myRank == 763 ) { printf("number of nodes:%d \n", numNodes); printf("number of processes per node:%d \n",Kernel_ProcessCount()); printf("number of hardware threads per process:%d \n",Kernel_ProcessorCount()); printf("MPI rank %d has 5D torus coordinates <%d,%d,%d,%d,%d> \n", myRank, myA, myB, myC, myD, myE); printf("job has 5D torus dimensions <%d,%d,%d,%d,%d> \n", tdims.a, tdims.b, tdims.c, tdims.d, tdims.e); printf("MPI rank %d has dimensions <%d,%d,%d,%d,%d> \n", myRank, my_com_A,my_com_B, my_com_C, my_com_D, my_com_E); printf("MPI rank %d has coordinates <%d,%d,%d,%d,%d> \n",myRank, my_com_Acoord, my_com_Bcoord, my_com_Ccoord, my_com_Dcoord, my_com_Ecoord); } // printf("rank %d has coordinates unsigned a,b,c,d,e; unsigned a_mult = tdims.b * tdims.c * tdims.d * tdims.e; unsigned b_mult = tdims.c * tdims.d * tdims.e; unsigned c_mult = tdims.d * tdims.e; unsigned d_mult = tdims.e; for ( a = 0; a < tdims.a; a++ ) for ( b = 0; b < tdims.b; b++ ) for ( c = 0; c < tdims.c; c++ ) for ( d = 0; d < tdims.d; d++ ) for ( e = 0; e < tdims.e; e++ ) { unsigned rank = a * a_mult + b * b_mult + c * c_mult + d * d_mult + e; if ( a == tcoords.a && b == tcoords.b && c == tcoords.c && d == tcoords.d && e == tcoords.e ) myRank_test=rank; } // if (myRank == myRank_test) printf("MPI rank %d returns 1 \n", myRank); }
int OSPU_Comm_split_node(MPI_Comm oldcomm, MPI_Comm * newcomm) { int rc; #if defined(__bgp__) _BGP_Personality_t personality; Kernel_GetPersonality( &personality, sizeof(personality) ); /* SMP mode is trivial */ if (personality.Kernel_Config.ProcessConfig == _BGP_PERS_PROCESSCONFIG_SMP ) { *newcomm = MPI_COMM_SELF; return rc = MPI_SUCCESS; } else { int xrank = personality.Network_Config.Xcoord; int yrank = personality.Network_Config.Ycoord; int zrank = personality.Network_Config.Zcoord; int xsize = personality.Network_Config.Xnodes; int ysize = personality.Network_Config.Ynodes; int zsize = personality.Network_Config.Znodes; color = xrank * ysize * zsize + yrank * zsize + zrank; rc = MPI_Comm_split(oldcomm, color, 0, newcomm); return rc; } #elif defined(__bgq__) /* SMP mode is trivial */ if ( 1 == Kernel_ProcessCount() ) { *newcomm = MPI_COMM_SELF; return rc = MPI_SUCCESS; } else { Personality_t personality; Kernel_GetPersonality( &personality, sizeof(personality) ); int arank = personality.Network_Config.Acoord; int brank = personality.Network_Config.Bcoord; int crank = personality.Network_Config.Ccoord; int drank = personality.Network_Config.Dcoord; int erank = personality.Network_Config.Ecoord; int asize = personality.Network_Config.Anodes; int bsize = personality.Network_Config.Bnodes; int csize = personality.Network_Config.Cnodes; int dsize = personality.Network_Config.Dnodes; int esize = personality.Network_Config.Enodes; color = arank * bsize * csize * dsize * esize + brank * csize * dsize * esize + crank * dsize * esize + drank * esize + erank; rc = MPI_Comm_split(oldcomm, color, 0, newcomm); return rc; } #endif return rc = MPI_SUCCESS; }