예제 #1
0
/* NTH: this is no longer used but may be used if we can determine the binding policy*/
static int mca_sbgp_map_to_logical_socket_id(int *socket)
{
    int ret = OMPI_SUCCESS;
    hwloc_obj_t obj;
    hwloc_obj_t first_pu_object;
    hwloc_bitmap_t good;
    int pu_os_index = -1, my_logical_socket_id = -1;
    int this_pus_logical_socket_id = -1;

    *socket = my_logical_socket_id;

    /* bozo check */
    if (NULL == opal_hwloc_topology) {
        return OPAL_ERR_NOT_INITIALIZED;
    }

    good = hwloc_bitmap_alloc();
    if (NULL == good) {
        return OPAL_ERR_OUT_OF_RESOURCE;
    }

    /* get this process' CPU binding */
    if( 0 !=  hwloc_get_cpubind(opal_hwloc_topology,good, 0)){
        /* report some error */
        BASESMSOCKET_VERBOSE(10, "The global variable opal_hwloc_topology appears not to have been initialized\n");
        hwloc_bitmap_free(good);
        return OMPI_ERROR;
    }

    /* find the first logical PU object in the hwloc tree */
    first_pu_object = hwloc_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU, 0);


    /* get the next bit in the bitmap (note: if pu_os_index == -1, then the 
     * first bit is returned 
     */
     /* traverse the hwloc tree */
     while( -1 != (pu_os_index = hwloc_bitmap_next(good, pu_os_index) ) ) {
         /* Traverse all PUs in the machine in logical order, in the simple case 
          * there should only be a single PU that this process is bound to, right?
          *
          */
          for( obj = first_pu_object; obj != NULL; obj = obj->next_cousin ) {/* WTF is a "next_cousin" ? */ 
              /* is this PU the same as the bit I pulled off the mask? */
              if( obj->os_index == (unsigned int) pu_os_index) {
                  /* Then I found it, break out of for loop */
                  break;
              }
          }

          if( NULL != obj) {
              /* if we found the PU, then go upward in the tree
               * looking for the enclosing socket 
               */
               while( (NULL != obj) && ( HWLOC_OBJ_SOCKET != obj->type) ){
                   obj = obj->parent;
               }

               if( NULL == obj ) {
                   /* then we couldn't find an enclosing socket, report this */
               } else {
                   /* We found the enclosing socket */
                   if( -1 == my_logical_socket_id ){
                       /* this is the first PU that I'm bound to */
                       this_pus_logical_socket_id = obj->logical_index;
                       my_logical_socket_id = this_pus_logical_socket_id;
                   } else {
                       /* this is not the first PU that I'm bound to. 
                        * Seems I'm bound to more than a single PU. Question
                        * is, am I bound to the same socket?? 
                        */
                       /* in order to get rid of the compiler warning, I had to cast 
                        * "this_pus_logical_socket_id", at a glance this seems ok, 
                        * but if subgrouping problems arise, maybe look here. I shall 
                        * tag this line with the "mark of the beast" for grepability
                        * 666
                        */  
                        if( (unsigned int) this_pus_logical_socket_id != obj->logical_index ){
                            /* 666 */
                            /* Then we're bound to more than one socket...fail */
                            this_pus_logical_socket_id = -1;
                            my_logical_socket_id = -1;
                            break;
                        }
                   }
               }

          }

          /* end while */
     }
     *socket = my_logical_socket_id;
     hwloc_bitmap_free(good);

     return ret;

}
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
        int n_procs_in,
        struct ompi_communicator_t *comm,
        char *key,
        void *output_data
                                                                 )
{
    /* local variables */
    mca_sbgp_basesmsocket_module_t *module;
    int ret;
    int my_socket_index;
    int proc, cnt, local, n_local_peers, my_rank;
    ompi_proc_t* my_proc;
    int *local_ranks_in_comm=NULL;
    int *socket_info=NULL, my_socket_info;
    int  i_cnt, lp_cnt, my_local_index = -1, comm_size=ompi_comm_size(comm);

    /* initialize data */
    output_data=NULL;
    my_rank=ompi_comm_rank(comm);
    my_proc=ompi_comm_peer_lookup(comm,my_rank);

    /*create a new module*/
    module=OBJ_NEW(mca_sbgp_basesmsocket_module_t);
    if (!module ) {
        return NULL;
    }
    module->super.group_size=0;
    module->super.group_comm = comm;
    module->super.group_list = NULL;
    module->super.group_net = OMPI_SBGP_SOCKET;

    /* test to see if process is bound */
    if( OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ) {

        /* pa affinity not set, so socket index will be set to -1 */
        my_socket_index=-1;
        /*debug print*/
        /* */
        BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group, processes are not bound!!!\n",my_rank));
        /*end debug*/
        goto NoLocalPeers;
    } else {

        my_socket_index=-1;
        /* this should find my logical socket id which is the socket id we want
         * physical socket ids are not necessarily unique, logical ones, as defined
         * by the hwloc API are unique.
         */
        if( OMPI_SUCCESS != mca_sbgp_map_to_logical_socket_id(&my_socket_index)) {
            BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group !!!\n",my_rank));

            goto NoLocalPeers;
        }
    }

    /* Debug prints */
    /*
       {
       fprintf(stderr,"Number of processors per node: %d\n",num_processors);
       fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index);
       fprintf(stderr,"n_proc_in = %d\n",n_procs_in);
       fprintf(stderr,"\n");
       fflush(stderr);
       }
       end debug prints */


    /*get my socket index*/
    cnt=0;
    for( proc=0 ; proc < n_procs_in ; proc++) {
        local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if( local ) {
            cnt++;
        }
    }
    /*debug print */
    /*
    fprintf(stderr,"Number of local processors %d\n",cnt);
    end debug print*/

    /* if no other local procs found skip to end */
    if( 1 >= cnt ) {
        goto NoLocalPeers;
    }



    /* allocate structure to hold the list of local ranks */
    local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt);
    if(NULL == local_ranks_in_comm ) {
        goto Error;
    }
    /* figure out which ranks from the input communicator - comm - will
     * particiapte in the local socket determination.
     */

    n_local_peers=0;
    i_cnt=0;
    for( proc = 0; proc < n_procs_in; proc++) {
        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if ( local ) {

            /* set the rank within the on-host ranks - this will be used for tha
             * allgather
             */
            if( my_proc == procs[proc] ) {
                my_local_index=n_local_peers;
            }
            /* find the rank of the current proc in comm.  We take advantage
             * of the fact that ranks in a group have the same relative
             * ordering as they do within the communicator.
             */
            for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) {
                if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ) {
                    local_ranks_in_comm[i_cnt]=lp_cnt;
                    /* lp_cnt has alrady been checked */
                    i_cnt++;
                    /* found the corresponding rank in comm, so don't need
                     * to search any more */
                    break;
                }
                /*i_cnt++;*/
                /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/
            }
            n_local_peers++;
        }
    }
    /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/
    socket_info=(int *)malloc(sizeof(int)*n_local_peers);
    /*fprintf(stderr,"XXX got socket info\n");*/
    if(NULL == socket_info ) {
        goto Error;
    }

    my_socket_info=my_socket_index;

    /* Allgather data over the communicator */
    ret=comm_allgather_pml(&my_socket_info, socket_info, 1,
                           MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm);
    if (OMPI_SUCCESS != ret ) {
        BASESMSOCKET_VERBOSE(10, ("comm_allgather_pml returned error %d\n",ret));
        return NULL;
    }


    /*allocate memory to the group_list probably an overestimation
      of the necessary resources */
    module->super.group_list=(int *)malloc(sizeof(int)*cnt);
    if(NULL == module->super.group_list) {
        goto Error;
    }

    /* figure out who is sharing the same socket */
    cnt=0;
    for (proc = 0; proc < n_local_peers; proc++) {
        int rem_rank=local_ranks_in_comm[proc];
        int rem_socket_index=socket_info[proc];

        /*Populate the list*/
        if (rem_socket_index == my_socket_index) {
            module->super.group_list[cnt]=rem_rank;
            cnt++;
        }
    }

    module->super.group_size=cnt;

#if 0
    /*debug print*/

    {
        int ii;
        fprintf(stderr,"Ranks per socket: %d\n",cnt);
        fprintf(stderr,"Socket %d owns ranks: ", my_socket_index);
        for (ii=0; ii < cnt; ii++)
            fprintf(stderr,"%d ",module->super.group_list[ii]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }
#endif

    /* end debug*/


    /*Free resources*/
    free(local_ranks_in_comm);
    free(socket_info);

    /*Return the module*/
    return (mca_sbgp_base_module_t *) module;


NoLocalPeers:
    /* nothing to store, so just free the module and return */
    /*fprintf(stderr,"No local socket peers\n");*/
    /*free(module);*/
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;

Error:
    /*clean up*/
    if( NULL != module->super.group_list) {
        free(module->super.group_list);
        module->super.group_list=NULL;
    }
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;


}