Пример #1
0
int fi_bgq_progress_disable (struct fi_bgq_domain *bgq_domain, const unsigned id) {

	assert(id < (64/Kernel_ProcessCount()-1));
	assert(id < bgq_domain->progress.max_threads);

	fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO);
	if (0 == bgq_domain->progress.thread[id].enabled) {
		assert(0 == bgq_domain->progress.thread[id].active);
		return 0;
	}

	bgq_domain->progress.thread[id].enabled = 0;
	fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO);

	/* Wait until the progress thread is active */
	while (bgq_domain->progress.thread[id].active) {
		fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO);
	}

	int rc __attribute__ ((unused));
	void *retval = NULL;
	rc = pthread_join(bgq_domain->progress.thread[id].pthread, &retval);
	assert(0 == rc);
	bgq_domain->progress.thread[id].pthread = 0;

	l2atomic_fifo_disable(&bgq_domain->progress.thread[id].consumer,
		&bgq_domain->progress.thread[id].producer);

	--(bgq_domain->progress.num_threads_active);

	return 0;
}
Пример #2
0
/* number of CPUs */
int vt_pform_num_cpus() {
#ifdef BGP_GROUP_ON_NODEBOARD
  return 32 * Kernel_ProcessCount();
#else
  return 1;
#endif
}
Пример #3
0
int fi_bgq_progress_enable (struct fi_bgq_domain *bgq_domain, const unsigned id) {

	assert(id < (64/Kernel_ProcessCount()-1));
	assert(id < bgq_domain->progress.max_threads);

	fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO);
	if (bgq_domain->progress.thread[id].enabled) {
		assert(bgq_domain->progress.thread[id].active);
		return 0;
	}

	bgq_domain->progress.thread[id].enabled = 1;
	bgq_domain->progress.thread[id].active = 0;
	fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO);

	int rc = 0;
	rc = pthread_create(&bgq_domain->progress.thread[id].pthread, NULL, progress_fn, (void *)&bgq_domain->progress.thread[id]);
	if (rc) {
		/* Error starting this progress thread */
		bgq_domain->progress.thread[id].enabled = 0;
		bgq_domain->progress.thread[id].active = 0;
		return -1;
	}

	/* Wait until the progress thread is active */
	while (0 == bgq_domain->progress.thread[id].active) {
		fi_bgq_msync(FI_BGQ_MSYNC_TYPE_RO);
	}

	++(bgq_domain->progress.num_threads_active);
	fi_bgq_msync(FI_BGQ_MSYNC_TYPE_WO);

	return 0;
}
Пример #4
0
int fi_bgq_progress_init (struct fi_bgq_domain *bgq_domain, const uint64_t max_threads) {

	assert(max_threads < (64/Kernel_ProcessCount()));

	bgq_domain->progress.max_threads = max_threads;
	bgq_domain->progress.num_threads_active = 0;
	bgq_domain->progress.memptr = NULL;

	if (0 == max_threads) return 0;

	size_t i, j;

	const size_t bytes = sizeof(union fi_bgq_progress_data) * max_threads;
	const size_t alignment = 128;

	void * memptr = malloc(bytes+alignment);
	uint32_t cnk_rc = 0;
	cnk_rc = Kernel_L2AtomicsAllocate(memptr, bytes+alignment);
	assert(0==cnk_rc);
	if (cnk_rc != 0) {
		/* Error allocating l2atomic memory */
		free(memptr);
		bgq_domain->progress.memptr = NULL;
		return -1;
	}

	union fi_bgq_progress_data *data = (union fi_bgq_progress_data *)(((uintptr_t)memptr + alignment) & (~(alignment-1)));
	const uint64_t npackets = sizeof(data[0].data) / sizeof(uint64_t);
	const size_t nep = sizeof(bgq_domain->progress.thread[0].tag_ep) / sizeof(struct fi_bgq_ep *);
	for (i=0; i<max_threads; ++i) {
		l2atomic_fifo_initialize(&bgq_domain->progress.thread[i].consumer,
			&bgq_domain->progress.thread[i].producer,
			&data[i].l2atomic, npackets);

		bgq_domain->progress.thread[i].tag_ep_count = 0;
		bgq_domain->progress.thread[i].msg_ep_count = 0;
		bgq_domain->progress.thread[i].all_ep_count = 0;
		bgq_domain->progress.thread[i].pthread = 0;
		bgq_domain->progress.thread[i].enabled = 0;
		bgq_domain->progress.thread[i].active = 0;

		fi_bgq_ref_inc(&bgq_domain->ref_cnt, "domain");
		bgq_domain->progress.thread[i].bgq_domain = bgq_domain;

		for (j=0; j<nep; ++j) {
			bgq_domain->progress.thread[i].tag_ep[j] = NULL;
			bgq_domain->progress.thread[i].msg_ep[j] = NULL;
			bgq_domain->progress.thread[i].all_ep[j] = NULL;
		}
	}

	bgq_domain->progress.memptr = memptr;

	return 0;
}
Пример #5
0
int PMI_Init (int *spawned)
{
	if (!spawned) return PMI_ERR_INVALID_ARG;
	*spawned = PMI_FALSE;

	if (pmi_rank != INT_MAX) return PMI_FAIL;
	if (pmi_size != INT_MAX) return PMI_FAIL;

	Personality_t pers;
	int rc = 0;
	rc = Kernel_GetPersonality(&pers, sizeof(pers));
	if (rc) return PMI_FAIL;

	/* calculate the maximum number of ranks from the torus dimensions */
	Personality_Networks_t *net = &pers.Network_Config;
	uint32_t max_ranks = net->Anodes * net->Bnodes * net->Cnodes *
		net->Dnodes * net->Enodes * Kernel_ProcessCount();
	uint64_t numentries = 0;

	BG_CoordinateMapping_t mapping[max_ranks];
	rc = Kernel_RanksToCoords(sizeof(mapping), mapping, &numentries);
	bgq_node_list = (uint32_t *) malloc(sizeof(uint32_t) * max_ranks);
	uint32_t tcoord32bitmask = 0xFFFFFFC0;
	uint32_t origcoord;

	/* while populating the rank map also determine how many local ranks on
	   my node - pmi_local_size */
	BG_CoordinateMapping_t my_bgq_coords;
	my_bgq_coords.e = net->Ecoord;
	my_bgq_coords.reserved = mapping[0].reserved;
	my_bgq_coords.a = net->Acoord;
	my_bgq_coords.b = net->Bcoord;
	my_bgq_coords.c = net->Ccoord;
	my_bgq_coords.d = net->Dcoord;
	my_bgq_coords.t = 0;

	memcpy(&my_bgq_rank_node_id,&my_bgq_coords,sizeof(BG_CoordinateMapping_t));
	pmi_local_size = 0;

	int i;
	for (i=0;i<numentries;i++) {
		memcpy(&origcoord, &(mapping[i]),sizeof(BG_CoordinateMapping_t));
		bgq_node_list[i]  = origcoord & tcoord32bitmask;
		if (my_bgq_rank_node_id == bgq_node_list[i])
			pmi_local_size++;
	}
	pmi_size = (int) numentries;
	pmi_rank = (int) Kernel_GetRank();
	// the largest possible key value is the number of nodes * the space for a node block def
	bgq_pmi_value_maxlen = net->Anodes * net->Bnodes * net->Cnodes *
		net->Dnodes * net->Enodes * BGQ_SINGLE_ENTRY_NODE_BLOCK_MAX_LEN;
	if (bgq_pmi_value_maxlen < BGQ_PMI_VALUE_MAXLEN)
		bgq_pmi_value_maxlen = BGQ_PMI_VALUE_MAXLEN;
	return PMI_SUCCESS;
}
Пример #6
0
/* unique numeric SMP-node identifier */
long vt_pform_node_id() {
#ifdef BGP_GROUP_ON_NODEBOARD
  _BGP_UniversalComponentIdentifier uci;
  uci.UCI = mybgp.Kernel_Config.UniversalComponentIdentifier;
  /* use upper part of UCI (upto NodeCard, ignore lower 14bits)
   * but only use the 13 bits (1FFF) that describe row,col,mp,nc */
  return ((uci.UCI>>14)&0x1FFF);
#else
  return ( BGP_Personality_psetNum(&mybgp) *
           BGP_Personality_psetSize(&mybgp) +
           BGP_Personality_rankInPset(&mybgp)) * Kernel_ProcessCount()
           + Kernel_PhysicalProcessorID();
#endif
}
Пример #7
0
BGQTorusManager::BGQTorusManager() {
  order[0] = 5;
  order[1] = 4;
  order[2] = 3;
  order[3] = 2;
  order[4] = 1;
  order[5] = 0;

  int numPes = CmiNumPesGlobal();
  procsPerNode = Kernel_ProcessCount();
  thdsPerProc = CmiMyNodeSize();
  hw_NT = procsPerNode*thdsPerProc;

  Personality_t pers;
  Kernel_GetPersonality(&pers, sizeof(pers));

  hw_NA = pers.Network_Config.Anodes;
  hw_NB = pers.Network_Config.Bnodes;
  hw_NC = pers.Network_Config.Cnodes;
  hw_ND = pers.Network_Config.Dnodes;
  hw_NE = pers.Network_Config.Enodes;

  unsigned int isFile = 0;
  Kernel_GetMapping(10, mapping, &isFile);
  if(!isFile) {
    for(int i = 0; i < 6 ; i++) {
      if(mapping[i] != 'T') {
        order[5 - i] = mapping[i] - 'A';
      } else {
        order[5 - i] = 5;
      }
    }
  }
  //printf("Mapping %d %d %d %d %d %d\n",order[0],order[1],order[2],order[3],order[4],order[5]);

  rn_NA = hw_NA;
  rn_NB = hw_NB;
  rn_NC = hw_NC;
  rn_ND = hw_ND;
  rn_NE = hw_NE;

  int max_t = 0;
  if(rn_NA * rn_NB * rn_NC * rn_ND * rn_NE != numPes/hw_NT) {
    rn_NA = rn_NB = rn_NC = rn_ND =rn_NE =0;
    int rn_NT=0;
    int min_a, min_b, min_c, min_d, min_e, min_t;
    min_a = min_b = min_c = min_d = min_e = min_t = (~(-1));
    int tmp_t, tmp_a, tmp_b, tmp_c, tmp_d, tmp_e;
    uint64_t numentries;
    BG_CoordinateMapping_t *coord;

    int nranks=numPes/thdsPerProc;
    coord = (BG_CoordinateMapping_t *) malloc(sizeof(BG_CoordinateMapping_t)*nranks);
    Kernel_RanksToCoords(sizeof(BG_CoordinateMapping_t)*nranks, coord, &numentries);

    for(int c = 0; c < nranks; c++) {
      tmp_a = coord[c].a;
      tmp_b = coord[c].b;
      tmp_c = coord[c].c;
      tmp_d = coord[c].d;
      tmp_e = coord[c].e;
      tmp_t = coord[c].t;

      if(tmp_a > rn_NA) rn_NA = tmp_a;
      if(tmp_a < min_a) min_a = tmp_a;
      if(tmp_b > rn_NB) rn_NB = tmp_b;
      if(tmp_b < min_b) min_b = tmp_b;
      if(tmp_c > rn_NC) rn_NC = tmp_c;
      if(tmp_c < min_c) min_c = tmp_c;
      if(tmp_d > rn_ND) rn_ND = tmp_d;
      if(tmp_d < min_d) min_d = tmp_d;
      if(tmp_e > rn_NE) rn_NE = tmp_e;
      if(tmp_e < min_e) min_e = tmp_e;
      if(tmp_t > rn_NT) rn_NT = tmp_t;
      if(tmp_t < min_t) min_t = tmp_t;
    }
    rn_NA = rn_NA - min_a + 1;
    rn_NB = rn_NB - min_b + 1;
    rn_NC = rn_NC - min_c + 1;
    rn_ND = rn_ND - min_d + 1;
    rn_NE = rn_NE - min_e + 1;
    procsPerNode = rn_NT - min_t + 1;
    hw_NT = procsPerNode * thdsPerProc;
    free(coord);
  }

  dimA = rn_NA;
  dimB = rn_NB;
  dimC = rn_NC;
  dimD = rn_ND;
  dimE = rn_NE;
  dimA = dimA * hw_NT;	// assuming TABCDE

  dims[0] = rn_NA;
  dims[1] = rn_NB;
  dims[2] = rn_NC;
  dims[3] = rn_ND;
  dims[4] = rn_NE;
  dims[5] = hw_NT;

  torus[0] = ((rn_NA % 4) == 0)? true:false;
  torus[1] = ((rn_NB % 4) == 0)? true:false;
  torus[2] = ((rn_NC % 4) == 0)? true:false;
  torus[3] = ((rn_ND % 4) == 0)? true:false;
  torus[4] = true;

  populateLocalNodes();
}
Пример #8
0
  //initialize the spi communications
  void init_spi()
  {
    //check not to have initialized
    if(!spi_inited)
      {
	verbosity_lv1_master_printf("Starting spi\n");
	
	//check that we do not have more than one process per node
	if(Kernel_ProcessCount()!=1) crash("only one process per node implemented");
	
	//mark as initialized
	spi_inited=true;
	
	//get coordinates, size and rank in the 5D grid
	set_spi_geometry();
	
	//check that all ranks are first neighbours in SPI grid
	check_all_lattice_neighbours_are_spi_first_neighbours();
	
	//allocate bats
	spi_bat_id[0]=0;
	spi_bat_id[1]=1;
	if(Kernel_AllocateBaseAddressTable(0,&spi_bat_gr,2,spi_bat_id,0)) crash("allocating bat");
	
	////////////////////////////////// init the fifos ///////////////////////////////////
	
	//alloc space for the injection fifos
	uint32_t fifo_size=64*NSPI_FIFO;
	for(int ififo=0;ififo<NSPI_FIFO;ififo++) spi_fifo[ififo]=(uint64_t*)memalign(64,fifo_size);
	
	//set default attributes for inj fifo
	Kernel_InjFifoAttributes_t fifo_attrs[NSPI_FIFO];
	memset(fifo_attrs,0,NSPI_FIFO*sizeof(Kernel_InjFifoAttributes_t));
	
	//initialize them with default attributes
	uint32_t fifo_id[NSPI_FIFO];
	for(int ififo=0;ififo<NSPI_FIFO;ififo++) fifo_id[ififo]=ififo;
	if(Kernel_AllocateInjFifos(0,&spi_fifo_sg_ptr,NSPI_FIFO,fifo_id,fifo_attrs)) crash("allocating inj fifos");
	
	//init the MU MMIO for the fifos
	for(int ififo=0;ififo<NSPI_FIFO;ififo++)
	  {
	    //create the memory region
	    Kernel_MemoryRegion_t mem_region;
	    if(Kernel_CreateMemoryRegion(&mem_region,spi_fifo[NSPI_FIFO-1-ififo],fifo_size))
	      crash("creating memory region %d of bytes",ififo,fifo_size);
	    
	    //initialize the fifos
	    if(Kernel_InjFifoInit(&spi_fifo_sg_ptr,fifo_id[ififo],&mem_region,
				  (uint64_t)spi_fifo[NSPI_FIFO-1-ififo]-(uint64_t)mem_region.BaseVa,fifo_size-1))
	      crash("initializing fifo");
	  }
	
	//activate the fifos
	if(Kernel_InjFifoActivate(&spi_fifo_sg_ptr,NSPI_FIFO,fifo_id,KERNEL_INJ_FIFO_ACTIVATE)) crash("activating fifo");
	
	//check alignment
	CRASH_IF_NOT_ALIGNED(recv_buf,64);
	CRASH_IF_NOT_ALIGNED(send_buf,64);
	
	//get physical address of receiving buffer
	Kernel_MemoryRegion_t mem_region;
	if(Kernel_CreateMemoryRegion(&mem_region,recv_buf,recv_buf_size))
	  crash("creating recv_buf memory region of %d bytes",recv_buf_size);
	
	//set the physical address
	if(MUSPI_SetBaseAddress(&spi_bat_gr,spi_bat_id[0],(uint64_t)recv_buf-
				(uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa))
	  crash("setting base address");
	
	//set receive counter bat to MU style atomic PA addr of the receive counter
	if((uint64_t)(&spi_recv_counter)&0x7) crash("recv counter not 8 byte aligned");
	if(Kernel_CreateMemoryRegion(&mem_region,(void*)&spi_recv_counter,sizeof(uint64_t)))
	  crash("creating memory region of %d bytes",sizeof(uint64_t));
	if(MUSPI_SetBaseAddress(&spi_bat_gr,spi_bat_id[1],MUSPI_GetAtomicAddress((uint64_t)&spi_recv_counter-(uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa,MUHWI_ATOMIC_OPCODE_STORE_ADD))) crash("setting base addr");
	
	//reset number of byte to be received
	spi_recv_counter=0;
	
	//get the send buffer physical address
	if(Kernel_CreateMemoryRegion(&mem_region,send_buf,send_buf_size))
	  crash("creating memory region of %d bytes",send_buf_size);
	spi_send_buf_phys_addr=(uint64_t)send_buf-(uint64_t)mem_region.BaseVa+(uint64_t)mem_region.BasePa;
	
	//find hints for descriptors
	set_spi_hints();
	
#ifdef SPI_BARRIER
	//init the barrier
	if(MUSPI_GIBarrierInit(&spi_barrier,0)) crash("initializing the barrier");
#endif
	verbosity_lv2_master_printf("spi initialized\n");
      }
  }
Пример #9
0
int fi_bgq_set_default_info()
{
	struct fi_info *fi, *prev_fi;
	uint32_t ppn = Kernel_ProcessCount();

	/*
	 * See: fi_bgq_stx_init() for the number of mu injection fifos
	 * allocated for each tx context. Each rx context uses one
	 * mu injection fifo and one mu reception fifo.
	 */
	const unsigned tx_ctx_cnt = (((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / 3) / ppn;

	/*
	 * The number of rx contexts on a node is the minimum of:
	 * 1. number of mu injection fifos on the node not used by tx contexts
	 * 2. total number mu reception fifos on the node
	 */
	const unsigned rx_ctx_cnt = MIN((((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) - (tx_ctx_cnt * ppn)), ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP)) / ppn;

	fi = fi_dupinfo(NULL);
	if (!fi) {
		errno = FI_ENOMEM;
		return -errno;
	}

	fi_bgq_global.info = fi;

	*fi->tx_attr = (struct fi_tx_attr) {
		.caps		= FI_RMA | FI_ATOMIC | FI_TRANSMIT_COMPLETE,
		.mode		= FI_ASYNC_IOV,
		.op_flags	= FI_TRANSMIT_COMPLETE,
		.msg_order	= FI_ORDER_SAS | FI_ORDER_WAW | FI_ORDER_RAW | FI_ORDER_RAR,
		.comp_order	= FI_ORDER_NONE,
		.inject_size	= FI_BGQ_INJECT_SIZE,
		.size		= FI_BGQ_TX_SIZE,
		.iov_limit	= SIZE_MAX,
		.rma_iov_limit  = 0
	};

	*fi->rx_attr = (struct fi_rx_attr) {
		.caps		= FI_RMA | FI_ATOMIC | FI_NAMED_RX_CTX,
		.mode		= FI_ASYNC_IOV,
		.op_flags	= 0,
		.msg_order	= 0,
		.comp_order	= FI_ORDER_NONE,
		.total_buffered_recv = FI_BGQ_TOTAL_BUFFERED_RECV,
		.size		= FI_BGQ_RX_SIZE,
		.iov_limit	= SIZE_MAX
	};

	*fi->ep_attr = (struct fi_ep_attr) {
		.type			= FI_EP_RDM,
		.protocol		= FI_BGQ_PROTOCOL,
		.protocol_version	= FI_BGQ_PROTOCOL_VERSION,
		.max_msg_size		= FI_BGQ_MAX_MSG_SIZE,
		.msg_prefix_size	= FI_BGQ_MAX_PREFIX_SIZE,
		.max_order_raw_size	= FI_BGQ_MAX_ORDER_RAW_SIZE,
		.max_order_war_size	= FI_BGQ_MAX_ORDER_WAR_SIZE,
		.max_order_waw_size	= FI_BGQ_MAX_ORDER_WAW_SIZE,
		.mem_tag_format		= FI_BGQ_MEM_TAG_FORMAT,
		.tx_ctx_cnt		= tx_ctx_cnt,
		.rx_ctx_cnt		= rx_ctx_cnt,
	};

	*fi->domain_attr = (struct fi_domain_attr) {
		.domain		= NULL,
		.name		= NULL, /* TODO: runtime query for name? */
		.threading	= FI_THREAD_FID,
		.control_progress = FI_PROGRESS_MANUAL,
		.data_progress	= FI_PROGRESS_AUTO, // + FI_PROGRESS_MANUAL ?
		.resource_mgmt	= FI_RM_DISABLED,
		.av_type	= FI_AV_MAP,
		.mr_mode	= FI_MR_SCALABLE,
		.mr_key_size	= 2,
		.cq_data_size	= 0,
		.cq_cnt		= 128 / ppn,
		.ep_cnt		= SIZE_MAX,
		.tx_ctx_cnt	= tx_ctx_cnt,
		.rx_ctx_cnt	= rx_ctx_cnt,

		.max_ep_tx_ctx	= ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2,
		.max_ep_rx_ctx	= ((BGQ_MU_NUM_REC_FIFO_GROUPS-1) * BGQ_MU_NUM_REC_FIFOS_PER_GROUP) / ppn,
		.max_ep_stx_ctx	= ((BGQ_MU_NUM_INJ_FIFO_GROUPS-1) * BGQ_MU_NUM_INJ_FIFOS_PER_GROUP) / ppn / 2,
		.max_ep_srx_ctx	= 0
	};

	*fi->fabric_attr = (struct fi_fabric_attr) {
		.fabric		= NULL,
		.name		= strdup(FI_BGQ_FABRIC_NAME),
		.prov_name	= strdup(FI_BGQ_PROVIDER_NAME),
		.prov_version	= FI_BGQ_PROVIDER_VERSION
	};

	fi->caps		= FI_RMA | FI_ATOMIC |
					FI_NAMED_RX_CTX | FI_TRANSMIT_COMPLETE;
	fi->mode		= FI_ASYNC_IOV;
	fi->addr_format		= FI_ADDR_BGQ;
	fi->src_addrlen		= 24; // includes null
	fi->dest_addrlen	= 24; // includes null

	prev_fi = fi;
	fi = fi_dupinfo(prev_fi);
	prev_fi->next = fi;

	return 0;
}
Пример #10
0
int main(int argc, char **argv)
{
    BG_CoordinateMapping_t coord;
    BG_JobCoords_t job;
    Personality_t pers;
    Kernel_GetPersonality(&pers, sizeof(pers));
    myRank=Kernel_GetRank();
    myCoord=Kernel_MyTcoord();
    Kernel_JobCoords(&job);
//    myCoreID=Kernel_ProcessorCoreID();
//    myHWTID=Kernel_ProcessorThreadID();
//    myPhysicalID=Kernel_PhysicalProcessorID();
  torus_t tcoords =
    {
       myA=pers.Network_Config.Acoord,
       myB=pers.Network_Config.Bcoord,
       myC=pers.Network_Config.Ccoord,
       myD=pers.Network_Config.Dcoord,
       myE=pers.Network_Config.Ecoord
     };

  torus_t tdims =
    {
      pers.Network_Config.Anodes,
      pers.Network_Config.Bnodes,
      pers.Network_Config.Cnodes,
      pers.Network_Config.Dnodes,
      pers.Network_Config.Enodes
    };

  numNodes = tdims.a * tdims.b * tdims.c * tdims.d * tdims.e;

  unsigned my_com_A=job.shape.a;
  unsigned my_com_B=job.shape.b;
  unsigned my_com_C=job.shape.c;
  unsigned my_com_D=job.shape.d;
  unsigned my_com_E=job.shape.e;

  unsigned my_com_Acoord=job.corner.a;
  unsigned my_com_Bcoord=job.corner.b;
  unsigned my_com_Ccoord=job.corner.c;
  unsigned my_com_Dcoord=job.corner.d;
  unsigned my_com_Ecoord=job.corner.e;

    if ( myRank == 763 )
      {
        printf("number of nodes:%d \n", numNodes);
        printf("number of processes per node:%d \n",Kernel_ProcessCount());
        printf("number of hardware threads per process:%d \n",Kernel_ProcessorCount());
        printf("MPI rank %d has 5D torus coordinates <%d,%d,%d,%d,%d> \n", myRank, myA, myB, myC, myD, myE);
        printf("job has 5D torus dimensions <%d,%d,%d,%d,%d> \n", tdims.a, tdims.b, tdims.c, tdims.d, tdims.e);
        printf("MPI rank %d has dimensions <%d,%d,%d,%d,%d> \n", myRank, my_com_A,my_com_B, my_com_C, my_com_D, my_com_E);
        printf("MPI rank %d has coordinates <%d,%d,%d,%d,%d> \n",myRank, my_com_Acoord, my_com_Bcoord, my_com_Ccoord, my_com_Dcoord, my_com_Ecoord); 
      }

//        printf("rank %d has coordinates 
  unsigned a,b,c,d,e;
  unsigned a_mult = tdims.b * tdims.c * tdims.d * tdims.e;
  unsigned b_mult = tdims.c * tdims.d * tdims.e;
  unsigned c_mult = tdims.d * tdims.e;
  unsigned d_mult = tdims.e;

  for ( a = 0; a < tdims.a; a++ )
    for ( b = 0; b < tdims.b; b++ )
      for ( c = 0; c < tdims.c; c++ )
        for ( d = 0; d < tdims.d; d++ )
          for ( e = 0; e < tdims.e; e++ )
            {
               unsigned rank = a * a_mult + b * b_mult + c * c_mult + d * d_mult + e;

               if ( a == tcoords.a &&
                    b == tcoords.b &&
                    c == tcoords.c &&
                    d == tcoords.d &&
                    e == tcoords.e )

               myRank_test=rank;
            }

//         if (myRank == myRank_test) printf("MPI rank %d returns 1 \n", myRank);
}
Пример #11
0
int OSPU_Comm_split_node(MPI_Comm oldcomm, MPI_Comm * newcomm)
{
    int rc;

#if defined(__bgp__)

    _BGP_Personality_t personality;

    Kernel_GetPersonality( &personality, sizeof(personality) );

    /* SMP mode is trivial */
    if (personality.Kernel_Config.ProcessConfig == _BGP_PERS_PROCESSCONFIG_SMP )
    {
        *newcomm = MPI_COMM_SELF;
        return rc = MPI_SUCCESS;
    }
    else
    {
        int xrank = personality.Network_Config.Xcoord;
        int yrank = personality.Network_Config.Ycoord;
        int zrank = personality.Network_Config.Zcoord;

        int xsize = personality.Network_Config.Xnodes;
        int ysize = personality.Network_Config.Ynodes;
        int zsize = personality.Network_Config.Znodes;

        color = xrank * ysize * zsize
              + yrank * zsize
              + zrank;

        rc = MPI_Comm_split(oldcomm, color, 0, newcomm);
        return rc;
    }

#elif defined(__bgq__)

    /* SMP mode is trivial */
    if ( 1 == Kernel_ProcessCount() )
    {
        *newcomm = MPI_COMM_SELF;
        return rc = MPI_SUCCESS;
    }
    else
    {
        Personality_t personality;
        Kernel_GetPersonality( &personality, sizeof(personality) );

        int arank = personality.Network_Config.Acoord;
        int brank = personality.Network_Config.Bcoord;
        int crank = personality.Network_Config.Ccoord;
        int drank = personality.Network_Config.Dcoord;
        int erank = personality.Network_Config.Ecoord;

        int asize = personality.Network_Config.Anodes;
        int bsize = personality.Network_Config.Bnodes;
        int csize = personality.Network_Config.Cnodes;
        int dsize = personality.Network_Config.Dnodes;
        int esize = personality.Network_Config.Enodes;

        color = arank * bsize * csize * dsize * esize
              + brank * csize * dsize * esize
              + crank * dsize * esize
              + drank * esize
              + erank;

        rc = MPI_Comm_split(oldcomm, color, 0, newcomm);
        return rc;
    }

#endif

    return rc = MPI_SUCCESS;
}