/* this takes a cpumask and converts it to CPU id on a node */ static int cpumask_to_id(opal_paffinity_base_cpu_set_t cpumask) { int currid; for(currid=0; currid<OPAL_PAFFINITY_BITMASK_CPU_MAX; currid++) { if (OPAL_PAFFINITY_CPU_ISSET(currid, cpumask)) return currid; } return -1; }
/** * Make a prettyprint string for a cset in a map format. * Example: [B . . .] * Key: [] - signifies socket boundary * . - signifies core a process not bound to * B - signifies core a process is bound to */ int opal_paffinity_base_cset2mapstr(char *str, int len, opal_paffinity_base_cpu_set_t *cset) { int ret, i, j, k, num_sockets, num_cores, flag; int phys_socket, phys_core; char tmp[BUFSIZ]; const int stmp = sizeof(tmp) - 1; str[0] = tmp[stmp] = '\0'; /* Loop over the number of sockets in this machine */ ret = opal_paffinity_base_get_socket_info(&num_sockets); if (OPAL_SUCCESS != ret) { return ret; } for (i = 0; i < num_sockets; ++i) { strncat(str, "[", len - strlen(str)); /* Loop over the number of cores in this socket */ ret = opal_paffinity_base_get_core_info(i, &num_cores); if (OPAL_SUCCESS != ret) { return ret; } phys_socket = opal_paffinity_base_get_physical_socket_id(i); for (j = 0; j < num_cores; j++) { if (0 < j) { /* add space after first core is printed */ strncat(str, " ", len - strlen(str)); } phys_core = opal_paffinity_base_get_physical_core_id(phys_socket, j); ret = opal_paffinity_base_get_map_to_processor_id(phys_socket, phys_core, &k); if (OPAL_SUCCESS != ret) { return ret; } flag = OPAL_PAFFINITY_CPU_ISSET(k, *cset); if (flag) { /* mark core as bound to process */ strncat(str, "B", len - strlen(str)); } else { /* mark core as no process bound to it */ strncat(str, ".", len - strlen(str)); } } strncat(str, "]", len - strlen(str)); } return OPAL_SUCCESS; }
/** * Make a prettyprint string for a cset. */ int opal_paffinity_base_cset2str(char *str, int len, opal_paffinity_base_cpu_set_t *cset) { int ret, i, j, k, num_sockets, num_cores, flag, count, range_first=0, range_last; int phys_socket, phys_core; char tmp[BUFSIZ]; const int stmp = sizeof(tmp) - 1; str[0] = tmp[stmp] = '\0'; /* Loop over the number of sockets in this machine */ ret = opal_paffinity_base_get_socket_info(&num_sockets); if (OPAL_SUCCESS != ret) { return ret; } for (i = 0; i < num_sockets; ++i) { /* Loop over the number of cores in this socket */ ret = opal_paffinity_base_get_core_info(i, &num_cores); if (OPAL_SUCCESS != ret) { return ret; } phys_socket = opal_paffinity_base_get_physical_socket_id(i); /* Must initially set range_last to a low number -- smaller than -1, so that the comparisons below work out properly. */ for (range_last = -5, count = j = 0; j < num_cores; ++j) { phys_core = opal_paffinity_base_get_physical_core_id(phys_socket, j); ret = opal_paffinity_base_get_map_to_processor_id(phys_socket, phys_core, &k); if (OPAL_SUCCESS != ret) { return ret; } /* Prettyprint the cores that we're actually bound to */ flag = OPAL_PAFFINITY_CPU_ISSET(k, *cset); if (flag) { if (0 == count) { snprintf(tmp, stmp, "socket %d[core %d", i, j); strncat(str, tmp, len - strlen(str)); range_first = range_last = j; } else { if (j - 1 == range_last) { range_last = j; } else { snprintf(tmp, stmp, "-%d,%d", range_last, j); strncat(str, tmp, len - strlen(str)); range_first = range_last = j; } } ++count; } } if (count > 0) { if (range_first != range_last) { snprintf(tmp, stmp, "-%d", range_last); strncat(str, tmp, len - strlen(str)); } strncat(str, "] ", len - strlen(str)); } } /* If the last character is a space, remove it */ if (' ' == str[strlen(str) - 1]) { str[strlen(str) - 1] = '\0'; } return OPAL_SUCCESS; }
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs, int n_procs_in, struct ompi_communicator_t *comm, char *key, void *output_data ) { /* local variables */ mca_sbgp_basesmsocket_module_t *module; /* opal_buffer_t* sbuffer = OBJ_NEW(opal_buffer_t); opal_buffer_t* rbuffer = OBJ_NEW(opal_buffer_t); */ opal_paffinity_base_cpu_set_t my_cpu_set; bool bound; int ret; int num_processors; int socket_tmp; int my_socket_index; int core_index=-1; int proc, cnt, local, n_local_peers, my_index, my_rank; ompi_proc_t* my_proc; int *local_ranks_in_comm=NULL; int *socket_info=NULL, my_socket_info; int i_cnt, lp_cnt, my_local_index, comm_size=ompi_comm_size(comm); /* initialize data */ output_data=NULL; my_rank=ompi_comm_rank(comm); my_proc=ompi_comm_peer_lookup(comm,my_rank); for( proc=0 ; proc < n_procs_in ; proc++) { if( procs[proc]==my_proc) { my_index=proc; } } /*create a new module*/ module=OBJ_NEW(mca_sbgp_basesmsocket_module_t); if (!module ) { return NULL; } module->super.group_size=0; module->super.group_comm = comm; module->super.group_list = NULL; module->super.group_net = OMPI_SBGP_SOCKET; /* ** get my process affinity information ** */ /* get the number of processors on this node */ ret=opal_paffinity_base_get_processor_info(&num_processors); /* get process affinity mask */ OPAL_PAFFINITY_CPU_ZERO(my_cpu_set); ret=opal_paffinity_base_get(&my_cpu_set); OPAL_PAFFINITY_PROCESS_IS_BOUND(my_cpu_set,&bound); /*debug process affinity*/ /* { ret=opal_paffinity_base_get_socket_info(&num_socket); fprintf(stderr,"Number of sockets %d\n",num_socket); fprintf(stderr,"Test if rank %d is bound %d\n", my_rank, bound); fprintf(stderr,"return from opal_paffinity_base_get: %d\n\n",ret); fprintf(stderr,"bitmask elements: "); unsigned int long jj; for(jj=0; jj < OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS; jj++) fprintf(stderr," %d ",my_cpu_set.bitmask[jj]); fprintf(stderr,"\n"); fflush(stderr); } end debug process affinity*/ if( !bound ) { /* pa affinity not set, so socket index will be set to -1 */ my_socket_index=-1; /*debug print*/ /* */ fprintf(stderr,"[%d]FAILED to set basesmsocket group !!!\n",my_rank); fflush(stderr); /*end debug*/ goto NoLocalPeers; } else { my_socket_index=-1; /* loop over number of processors */ for ( proc=0 ; proc < num_processors ; proc++ ) { if (OPAL_PAFFINITY_CPU_ISSET(proc,my_cpu_set)) { ret=opal_paffinity_base_get_map_to_socket_core(proc,&socket_tmp,&core_index); if( my_socket_index != socket_tmp ) { my_socket_index=socket_tmp; break; } } } /* end of proc loop */ } /* Debug prints */ /* { fprintf(stderr,"Number of processors per node: %d\n",num_processors); fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index); fprintf(stderr,"n_proc_in = %d\n",n_procs_in); fprintf(stderr,"\n"); fflush(stderr); } end debug prints */ /*get my socket index*/ cnt=0; for( proc=0 ; proc < n_procs_in ; proc++) { local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags); if( local ) { cnt++; } } /*debug print */ /* fprintf(stderr,"Number of local processors %d\n",cnt); end debug print*/ /* if no other local procs found skip to end */ if( 1 >= cnt ) { goto NoLocalPeers; } #if 0 int *local_ranks_in_comm; int32_t *socket_info, *my_socket_info; int my_local_index; #endif /* allocate structure to hold the list of local ranks */ local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt); if(NULL == local_ranks_in_comm ) { goto Error; } /* figure out which ranks from the input communicator - comm - will * particiapte in the local socket determination. */ n_local_peers=0; i_cnt=0; for( proc = 0; proc < n_procs_in; proc++) { local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags); if ( local ) { /* set the rank within the on-host ranks - this will be used for tha * allgather */ if( my_proc == procs[proc] ) { my_local_index=n_local_peers; } /* find the rank of the current proc in comm. We take advantage * of the fact that ranks in a group have the same relative * ordering as they do within the communicator. */ #if 1 /*for( lp_cnt=i_cnt; lp_cnt < comm_size ; lp_cnt++ ) {*/ for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) { if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ) { local_ranks_in_comm[i_cnt]=lp_cnt; /* lp_cnt has alrady been checked */ i_cnt++; /* found the corresponding rank in comm, so don't need * to search any more */ break; } /*i_cnt++;*/ /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/ } #endif n_local_peers++; } } /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/ socket_info=(int *)malloc(sizeof(int)*n_local_peers); /*fprintf(stderr,"XXX got socket info\n");*/ if(NULL == socket_info ) { goto Error; } my_socket_info=my_socket_index; /* Allgather data over the communicator */ ret=comm_allgather_pml(&my_socket_info, socket_info, 1, MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm); if (OMPI_SUCCESS != ret ) { fprintf(stderr," comm_allgather_pml returned error %d \n", ret); fflush(stderr); return NULL; } /*allocate memory to the group_list probably an overestimation of the necessary resources */ module->super.group_list=(int *)malloc(sizeof(int)*cnt); if(NULL == module->super.group_list) { goto Error; } /* figure out who is sharing the same socket */ cnt=0; for (proc = 0; proc < n_local_peers; proc++) { int rem_rank=local_ranks_in_comm[proc]; int rem_socket_index=socket_info[proc]; /*Populate the list*/ if (rem_socket_index == my_socket_index) { module->super.group_list[cnt]=rem_rank; cnt++; } } module->super.group_size=cnt; /*debug print*/ /* { int ii; fprintf(stderr,"Ranks per socket: %d\n",cnt); fprintf(stderr,"Socket %d owns ranks: ", my_socket_index); for (ii=0; ii < cnt; ii++) fprintf(stderr,"%d ",module->super.group_list[ii]); fprintf(stderr,"\n"); fflush(stderr); } { cpu_set_t set; unsigned int len = sizeof(set); int i; unsigned long mask = 0; CPU_ZERO(&set); if (sched_getaffinity(0, len, &set) < 0) { perror("sched_getaffinity"); return -1; } for (i = 0; i < CPU_SETSIZE; i++) { int cpu = CPU_ISSET(i, &set); if (cpu) { mask |= 1<< i; } } opal_output(0,"%d: my affinity mask is: %08lx\n", my_local_index,mask); } end debug*/ /*Free resources*/ free(local_ranks_in_comm); free(socket_info); /*Return the module*/ return (mca_sbgp_base_module_t *) module; NoLocalPeers: /* nothing to store, so just free the module and return */ /*fprintf(stderr,"No local socket peers\n");*/ /*free(module);*/ if(socket_info) { free(socket_info); socket_info=NULL; } if(local_ranks_in_comm) { free(local_ranks_in_comm); } OBJ_RELEASE(module); return NULL; Error: /*clean up*/ if( NULL != module->super.group_list) { free(module->super.group_list); module->super.group_list=NULL; } if(socket_info) { free(socket_info); socket_info=NULL; } if(local_ranks_in_comm) { free(local_ranks_in_comm); } OBJ_RELEASE(module); return NULL; }