OSHMEM_DECLSPEC int oshmem_shmem_exchange_allgather(void *buf,
                                          int buf_size)
{
    int rc = OSHMEM_SUCCESS;
    int i = 0;
    int *ranks_in_comm = NULL;

    ranks_in_comm = (int *) malloc(orte_process_info.num_procs * sizeof(int));
    if (NULL == ranks_in_comm) {
        return OSHMEM_ERR_OUT_OF_RESOURCE;
    }

    for (i = 0; i < (int) orte_process_info.num_procs; ++i) {
        ranks_in_comm[i] = i;
    }
    void* buf_temp = malloc(buf_size);
    memcpy(buf_temp, (char*)buf + buf_size * ORTE_PROC_MY_NAME->vpid, buf_size);

    rc = comm_allgather_pml( buf_temp,
                             buf,
                             buf_size,
                             MPI_BYTE,
                             ORTE_PROC_MY_NAME->vpid,
                             orte_process_info.num_procs,
                             ranks_in_comm,
                             (ompi_communicator_t *) &ompi_mpi_comm_world);

    if (ranks_in_comm)
        free(ranks_in_comm);
    if (buf_temp)
        free(buf_temp);
    return rc;
}
Esempio n. 2
0
int bcol_basesmuma_smcm_allgather_connection(
                                             mca_bcol_basesmuma_module_t *sm_bcol_module,
                                             mca_sbgp_base_module_t *module,
                                             opal_list_t *peer_list,
                                             bcol_basesmuma_smcm_proc_item_t ***back_files,
                                             ompi_communicator_t *comm,
                                             bcol_basesmuma_smcm_file_t input,
                                             char *base_fname,
                                             bool map_all)
{

    /* define local variables */

    int rc, i, fd;
    ptrdiff_t mem_offset;
    ompi_proc_t *proc_temp, *my_id;
    bcol_basesmuma_smcm_proc_item_t *temp;
    bcol_basesmuma_smcm_proc_item_t *item_ptr;
    bcol_basesmuma_smcm_proc_item_t **backing_files;
    struct file_info_t local_file;
    struct file_info_t *all_files=NULL;

    /* sanity check */
    if (strlen(input.file_name) > SM_BACKING_FILE_NAME_MAX_LEN-1) {
        opal_output (ompi_bcol_base_framework.framework_output, "backing file name too long:  %s len :: %d",
                     input.file_name, (int) strlen(input.file_name));
        return OMPI_ERR_BAD_PARAM;
    }

    backing_files = (bcol_basesmuma_smcm_proc_item_t **)
        calloc(module->group_size, sizeof(bcol_basesmuma_smcm_proc_item_t *));
    if (!backing_files) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* FIXME *back_files might have been already allocated
     * so free it in order to avoid a memory leak */
    if (NULL != *back_files) {
        free (*back_files);
    }
    *back_files = backing_files;

    my_id = ompi_proc_local();

    /* Phase One:
       gather a list of processes that will participate in the allgather - I'm
       preparing this list from the sbgp-ing module that was passed into the function */

    /* fill in local file information */
    local_file.vpid  = ((orte_process_name_t*)&my_id->super.proc_name)->vpid;
    local_file.jobid = ((orte_process_name_t*)&my_id->super.proc_name)->jobid;
    local_file.file_size=input.size;
    local_file.size_ctl_structure=input.size_ctl_structure;
    local_file.data_seg_alignment=input.data_seg_alignment;

    strcpy (local_file.file_name, input.file_name);

    /* will exchange this data type as a string of characters -
     * this routine is first called before MPI_init() completes
     * and before error handling is setup, so can't use the
     * MPI data types to send this data */
    all_files = (struct file_info_t *) calloc(module->group_size,
                                              sizeof (struct file_info_t));
    if (!all_files) {
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    /* exchange data */
    rc = comm_allgather_pml(&local_file,all_files,sizeof(struct file_info_t), MPI_CHAR,
                            sm_bcol_module->super.sbgp_partner_module->my_index,
                            sm_bcol_module->super.sbgp_partner_module->group_size,
                            sm_bcol_module->super.sbgp_partner_module->group_list,
                            sm_bcol_module->super.sbgp_partner_module->group_comm);
    if( OMPI_SUCCESS != rc ) {
        opal_output (ompi_bcol_base_framework.framework_output, "failed in comm_allgather_pml.  Error code: %d", rc);
        goto Error;
    }

    /* Phase four:
       loop through the receive buffer, unpack the data recieved from remote peers */

    for (i = 0; i < module->group_size; i++) {
        struct file_info_t *rem_file = all_files + i;

        /* check if this is my index or if the file is already mapped (set above). ther
         * is no reason to look through the peer list again because no two members of
         * the group will have the same vpid/jobid pair. ignore this previously found
         * mapping if map_all was requested (NTH: not sure why exactly since we re-map
         * and already mapped file) */
        if (sm_bcol_module->super.sbgp_partner_module->my_index == i) {
            continue;
        }

        proc_temp = ompi_comm_peer_lookup(comm,module->group_list[i]);

        OPAL_LIST_FOREACH(item_ptr, peer_list, bcol_basesmuma_smcm_proc_item_t) {
            /* if the vpid/jobid/filename combination already exists in the list,
               then do not map this peer's file --- because you already have */
            if (0 == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL,
                                                  OMPI_CAST_RTE_NAME(&proc_temp->super.proc_name),
                                                  &item_ptr->peer) &&
                0 == strcmp (item_ptr->sm_file.file_name, rem_file->file_name)) {
                ++item_ptr->refcnt;
                /* record file data */
                backing_files[i] = item_ptr;
                break;
            }
        }

        if (!map_all && backing_files[i]) {
            continue;
        }

        temp = OBJ_NEW(bcol_basesmuma_smcm_proc_item_t);
        if (!temp) {
            rc = OMPI_ERR_OUT_OF_RESOURCE;
            goto Error;
        }

        temp->peer.vpid = rem_file->vpid;
        temp->peer.jobid = rem_file->jobid;

        temp->sm_file.file_name = strdup (rem_file->file_name);
        if (!temp->sm_file.file_name) {
            rc = OMPI_ERR_OUT_OF_RESOURCE;
            OBJ_RELEASE(temp);
            goto Error;
        }

        temp->sm_file.size = (size_t) rem_file->file_size;
        temp->sm_file.mpool_size = (size_t) rem_file->file_size;
        temp->sm_file.size_ctl_structure = (size_t) rem_file->size_ctl_structure;
        temp->sm_file.data_seg_alignment = (size_t) rem_file->data_seg_alignment;
        temp->refcnt = 1;

        /* Phase Five:
           If map_all == true, then  we map every peer's file
           else we check to see if I have already mapped this
           vpid/jobid/filename combination and if I have, then
           I do not mmap this peer's file.
           *
           */
        fd = open(temp->sm_file.file_name, O_RDWR, 0600);
        if (0 > fd) {
            opal_output (ompi_bcol_base_framework.framework_output, "SMCM Allgather failed to open sm backing file %s. errno = %d",
                         temp->sm_file.file_name, errno);
            rc = OMPI_ERROR;
            goto Error;
        }

        /* map the file */
        temp->sm_mmap = bcol_basesmuma_smcm_reg_mmap (NULL, fd, temp->sm_file.size,
                                                      temp->sm_file.size_ctl_structure,
                                                      temp->sm_file.data_seg_alignment,
                                                      temp->sm_file.file_name);
        close (fd);
        if (NULL == temp->sm_mmap) {
            opal_output (ompi_bcol_base_framework.framework_output, "mmapping failed to map remote peer's file");
            OBJ_RELEASE(temp);
            rc = OMPI_ERROR;
            goto Error;
        }

        /* compute memory offset */
        mem_offset = (ptrdiff_t) temp->sm_mmap->data_addr -
            (ptrdiff_t) temp->sm_mmap->map_seg;
        temp->sm_mmap->map_seg->seg_offset = mem_offset;
        temp->sm_mmap->map_seg->seg_size = temp->sm_file.size - mem_offset;
        /* more stuff to follow */

        /* append this peer's info, including shared memory map addr, onto the
           peer_list */

        /* record file data */
        backing_files[i] = (bcol_basesmuma_smcm_proc_item_t *) temp;

        opal_list_append(peer_list, (opal_list_item_t*) temp);
    }

    rc = OMPI_SUCCESS;

 Error:

    /* error clean-up and return */
    if (NULL != all_files) {
        free(all_files);
    }

    return rc;
}
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
        int n_procs_in,
        struct ompi_communicator_t *comm,
        char *key,
        void *output_data
                                                                 )
{
    /* local variables */
    mca_sbgp_basesmsocket_module_t *module;
    /*
    opal_buffer_t* sbuffer = OBJ_NEW(opal_buffer_t);
    opal_buffer_t* rbuffer = OBJ_NEW(opal_buffer_t);
    */
    opal_paffinity_base_cpu_set_t my_cpu_set;
    bool bound;
    int ret;
    int num_processors;
    int socket_tmp;
    int my_socket_index;
    int core_index=-1;
    int proc, cnt, local, n_local_peers, my_index, my_rank;
    ompi_proc_t* my_proc;
    int *local_ranks_in_comm=NULL;
    int *socket_info=NULL, my_socket_info;
    int  i_cnt, lp_cnt, my_local_index, comm_size=ompi_comm_size(comm);

    /* initialize data */
    output_data=NULL;
    my_rank=ompi_comm_rank(comm);
    my_proc=ompi_comm_peer_lookup(comm,my_rank);
    for( proc=0 ; proc < n_procs_in ; proc++) {
        if( procs[proc]==my_proc) {
            my_index=proc;
        }
    }

    /*create a new module*/
    module=OBJ_NEW(mca_sbgp_basesmsocket_module_t);
    if (!module ) {
        return NULL;
    }
    module->super.group_size=0;
    module->super.group_comm = comm;
    module->super.group_list = NULL;
    module->super.group_net = OMPI_SBGP_SOCKET;

    /*
        ** get my process affinity information
        ** */

    /* get the number of processors on this node */

    ret=opal_paffinity_base_get_processor_info(&num_processors);

    /* get process affinity mask */
    OPAL_PAFFINITY_CPU_ZERO(my_cpu_set);
    ret=opal_paffinity_base_get(&my_cpu_set);
    OPAL_PAFFINITY_PROCESS_IS_BOUND(my_cpu_set,&bound);

    /*debug process affinity*/
    /*
    {
        ret=opal_paffinity_base_get_socket_info(&num_socket);
        fprintf(stderr,"Number of sockets %d\n",num_socket);
        fprintf(stderr,"Test if rank %d is bound %d\n", my_rank, bound);
        fprintf(stderr,"return from opal_paffinity_base_get: %d\n\n",ret);
        fprintf(stderr,"bitmask elements: ");
        unsigned int long  jj;
        for(jj=0; jj < OPAL_PAFFINITY_BITMASK_NUM_ELEMENTS; jj++)
                 fprintf(stderr," %d ",my_cpu_set.bitmask[jj]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }
    end debug process affinity*/

    if( !bound ) {

        /* pa affinity not set, so socket index will be set to -1 */
        my_socket_index=-1;
        /*debug print*/
        /* */
        fprintf(stderr,"[%d]FAILED to set basesmsocket group !!!\n",my_rank);
        fflush(stderr);
        /*end debug*/
        goto NoLocalPeers;
    } else {

        my_socket_index=-1;
        /* loop over number of processors */
        for ( proc=0 ; proc < num_processors ; proc++ ) {
            if (OPAL_PAFFINITY_CPU_ISSET(proc,my_cpu_set)) {
                ret=opal_paffinity_base_get_map_to_socket_core(proc,&socket_tmp,&core_index);
                if( my_socket_index != socket_tmp ) {
                    my_socket_index=socket_tmp;
                    break;
                }
            }
        } /* end of proc loop */
    }

    /* Debug prints */
    /*
    {
    fprintf(stderr,"Number of processors per node: %d\n",num_processors);
    fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index);
    fprintf(stderr,"n_proc_in = %d\n",n_procs_in);
    fprintf(stderr,"\n");
    fflush(stderr);
    }
    end debug prints */


    /*get my socket index*/
    cnt=0;
    for( proc=0 ; proc < n_procs_in ; proc++) {
        local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if( local ) {
            cnt++;
        }
    }
    /*debug print */
    /*
    fprintf(stderr,"Number of local processors %d\n",cnt);
    end debug print*/

    /* if no other local procs found skip to end */
    if( 1 >= cnt ) {
        goto NoLocalPeers;
    }


#if 0
    int *local_ranks_in_comm;
    int32_t *socket_info, *my_socket_info;
    int  my_local_index;
#endif
    /* allocate structure to hold the list of local ranks */
    local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt);
    if(NULL == local_ranks_in_comm ) {
        goto Error;
    }
    /* figure out which ranks from the input communicator - comm - will
     * particiapte in the local socket determination.
     */

    n_local_peers=0;
    i_cnt=0;
    for( proc = 0; proc < n_procs_in; proc++) {
        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if ( local ) {

            /* set the rank within the on-host ranks - this will be used for tha
             * allgather
             */
            if( my_proc == procs[proc] ) {
                my_local_index=n_local_peers;
            }
            /* find the rank of the current proc in comm.  We take advantage
             * of the fact that ranks in a group have the same relative
             * ordering as they do within the communicator.
             */
#if 1
            /*for( lp_cnt=i_cnt; lp_cnt < comm_size ; lp_cnt++ ) {*/
            for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) {
                if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ) {
                    local_ranks_in_comm[i_cnt]=lp_cnt;
                    /* lp_cnt has alrady been checked */
                    i_cnt++;
                    /* found the corresponding rank in comm, so don't need
                     * to search any more */
                    break;
                }
                /*i_cnt++;*/
                /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/
            }
#endif
            n_local_peers++;
        }
    }
    /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/
    socket_info=(int *)malloc(sizeof(int)*n_local_peers);
    /*fprintf(stderr,"XXX got socket info\n");*/
    if(NULL == socket_info ) {
        goto Error;
    }

    my_socket_info=my_socket_index;

    /* Allgather data over the communicator */
    ret=comm_allgather_pml(&my_socket_info, socket_info, 1,
                           MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm);
    if (OMPI_SUCCESS != ret ) {
        fprintf(stderr," comm_allgather_pml returned error %d \n", ret);
        fflush(stderr);
        return NULL;
    }


    /*allocate memory to the group_list probably an overestimation
      of the necessary resources */
    module->super.group_list=(int *)malloc(sizeof(int)*cnt);
    if(NULL == module->super.group_list) {
        goto Error;
    }

    /* figure out who is sharing the same socket */
    cnt=0;
    for (proc = 0; proc < n_local_peers; proc++) {
        int rem_rank=local_ranks_in_comm[proc];
        int rem_socket_index=socket_info[proc];

        /*Populate the list*/
        if (rem_socket_index == my_socket_index) {
            module->super.group_list[cnt]=rem_rank;
            cnt++;
        }
    }

    module->super.group_size=cnt;

    /*debug print*/
    /*
    {
        int ii;
        fprintf(stderr,"Ranks per socket: %d\n",cnt);
        fprintf(stderr,"Socket %d owns ranks: ", my_socket_index);
        for (ii=0; ii < cnt; ii++)
            fprintf(stderr,"%d ",module->super.group_list[ii]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }

    {
        cpu_set_t set;
        unsigned int len = sizeof(set);
        int i;
        unsigned long mask = 0;
        CPU_ZERO(&set);
        if (sched_getaffinity(0, len, &set) < 0) {
            perror("sched_getaffinity");
            return -1;
        }
        for (i = 0; i < CPU_SETSIZE; i++) {
            int cpu = CPU_ISSET(i, &set);
            if (cpu) {
                mask |= 1<< i;
            }
        }
        opal_output(0,"%d: my affinity mask is: %08lx\n", my_local_index,mask);
    }


    end debug*/


    /*Free resources*/
    free(local_ranks_in_comm);
    free(socket_info);

    /*Return the module*/
    return (mca_sbgp_base_module_t *) module;


NoLocalPeers:
    /* nothing to store, so just free the module and return */
    /*fprintf(stderr,"No local socket peers\n");*/
    /*free(module);*/
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;

Error:
    /*clean up*/
    if( NULL != module->super.group_list) {
        free(module->super.group_list);
        module->super.group_list=NULL;
    }
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;


}
static mca_sbgp_base_module_t *mca_sbgp_basesmsocket_select_procs(struct ompi_proc_t ** procs,
        int n_procs_in,
        struct ompi_communicator_t *comm,
        char *key,
        void *output_data
                                                                 )
{
    /* local variables */
    mca_sbgp_basesmsocket_module_t *module;
    int ret;
    int my_socket_index;
    int proc, cnt, local, n_local_peers, my_rank;
    ompi_proc_t* my_proc;
    int *local_ranks_in_comm=NULL;
    int *socket_info=NULL, my_socket_info;
    int  i_cnt, lp_cnt, my_local_index = -1, comm_size=ompi_comm_size(comm);

    /* initialize data */
    output_data=NULL;
    my_rank=ompi_comm_rank(comm);
    my_proc=ompi_comm_peer_lookup(comm,my_rank);

    /*create a new module*/
    module=OBJ_NEW(mca_sbgp_basesmsocket_module_t);
    if (!module ) {
        return NULL;
    }
    module->super.group_size=0;
    module->super.group_comm = comm;
    module->super.group_list = NULL;
    module->super.group_net = OMPI_SBGP_SOCKET;

    /* test to see if process is bound */
    if( OPAL_BIND_TO_NONE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy) ) {

        /* pa affinity not set, so socket index will be set to -1 */
        my_socket_index=-1;
        /*debug print*/
        /* */
        BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group, processes are not bound!!!\n",my_rank));
        /*end debug*/
        goto NoLocalPeers;
    } else {

        my_socket_index=-1;
        /* this should find my logical socket id which is the socket id we want
         * physical socket ids are not necessarily unique, logical ones, as defined
         * by the hwloc API are unique.
         */
        if( OMPI_SUCCESS != mca_sbgp_map_to_logical_socket_id(&my_socket_index)) {
            BASESMSOCKET_VERBOSE(10, ("[%d] FAILED to set basesmsocket group !!!\n",my_rank));

            goto NoLocalPeers;
        }
    }

    /* Debug prints */
    /*
       {
       fprintf(stderr,"Number of processors per node: %d\n",num_processors);
       fprintf(stderr,"I am rank %d and my socket index is %d\n and my core index is %d\n",my_rank,my_socket_index,core_index);
       fprintf(stderr,"n_proc_in = %d\n",n_procs_in);
       fprintf(stderr,"\n");
       fflush(stderr);
       }
       end debug prints */


    /*get my socket index*/
    cnt=0;
    for( proc=0 ; proc < n_procs_in ; proc++) {
        local=OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if( local ) {
            cnt++;
        }
    }
    /*debug print */
    /*
    fprintf(stderr,"Number of local processors %d\n",cnt);
    end debug print*/

    /* if no other local procs found skip to end */
    if( 1 >= cnt ) {
        goto NoLocalPeers;
    }



    /* allocate structure to hold the list of local ranks */
    local_ranks_in_comm=(int *)malloc(sizeof(int)*cnt);
    if(NULL == local_ranks_in_comm ) {
        goto Error;
    }
    /* figure out which ranks from the input communicator - comm - will
     * particiapte in the local socket determination.
     */

    n_local_peers=0;
    i_cnt=0;
    for( proc = 0; proc < n_procs_in; proc++) {
        local = OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags);
        if ( local ) {

            /* set the rank within the on-host ranks - this will be used for tha
             * allgather
             */
            if( my_proc == procs[proc] ) {
                my_local_index=n_local_peers;
            }
            /* find the rank of the current proc in comm.  We take advantage
             * of the fact that ranks in a group have the same relative
             * ordering as they do within the communicator.
             */
            for( lp_cnt=proc; lp_cnt < comm_size ; lp_cnt++ ) {
                if(procs[proc] == ompi_comm_peer_lookup(comm,lp_cnt) ) {
                    local_ranks_in_comm[i_cnt]=lp_cnt;
                    /* lp_cnt has alrady been checked */
                    i_cnt++;
                    /* found the corresponding rank in comm, so don't need
                     * to search any more */
                    break;
                }
                /*i_cnt++;*/
                /*fprintf(stderr,"QQQ i_cnt %d \n",i_cnt);*/
            }
            n_local_peers++;
        }
    }
    /*fprintf(stderr,"YYY n_local_peers %d\n",n_local_peers);*/
    socket_info=(int *)malloc(sizeof(int)*n_local_peers);
    /*fprintf(stderr,"XXX got socket info\n");*/
    if(NULL == socket_info ) {
        goto Error;
    }

    my_socket_info=my_socket_index;

    /* Allgather data over the communicator */
    ret=comm_allgather_pml(&my_socket_info, socket_info, 1,
                           MPI_INT, my_local_index, n_local_peers, local_ranks_in_comm,comm);
    if (OMPI_SUCCESS != ret ) {
        BASESMSOCKET_VERBOSE(10, ("comm_allgather_pml returned error %d\n",ret));
        return NULL;
    }


    /*allocate memory to the group_list probably an overestimation
      of the necessary resources */
    module->super.group_list=(int *)malloc(sizeof(int)*cnt);
    if(NULL == module->super.group_list) {
        goto Error;
    }

    /* figure out who is sharing the same socket */
    cnt=0;
    for (proc = 0; proc < n_local_peers; proc++) {
        int rem_rank=local_ranks_in_comm[proc];
        int rem_socket_index=socket_info[proc];

        /*Populate the list*/
        if (rem_socket_index == my_socket_index) {
            module->super.group_list[cnt]=rem_rank;
            cnt++;
        }
    }

    module->super.group_size=cnt;

#if 0
    /*debug print*/

    {
        int ii;
        fprintf(stderr,"Ranks per socket: %d\n",cnt);
        fprintf(stderr,"Socket %d owns ranks: ", my_socket_index);
        for (ii=0; ii < cnt; ii++)
            fprintf(stderr,"%d ",module->super.group_list[ii]);
        fprintf(stderr,"\n");
        fflush(stderr);
    }
#endif

    /* end debug*/


    /*Free resources*/
    free(local_ranks_in_comm);
    free(socket_info);

    /*Return the module*/
    return (mca_sbgp_base_module_t *) module;


NoLocalPeers:
    /* nothing to store, so just free the module and return */
    /*fprintf(stderr,"No local socket peers\n");*/
    /*free(module);*/
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;

Error:
    /*clean up*/
    if( NULL != module->super.group_list) {
        free(module->super.group_list);
        module->super.group_list=NULL;
    }
    if(socket_info) {
        free(socket_info);
        socket_info=NULL;
    }
    if(local_ranks_in_comm) {
        free(local_ranks_in_comm);
    }
    OBJ_RELEASE(module);
    return NULL;


}
Esempio n. 5
0
/* New init function used for new control scheme where we put the control
 * struct at the top of the payload buffer
 */
int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block,
        uint32_t data_offset,
        mca_bcol_base_module_t *bcol_module,
        void *reg_data)
{
    /* assumption here is that the block has been registered with
     * sm bcol hence has been mapped by each process, need to be
     * sure that memory is mapped amongst sm peers
     */

    /* local variables */
    int ret = OMPI_SUCCESS, i, j;
    sm_buffer_mgmt *pload_mgmt;
    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
    bcol_basesmuma_registration_data_t *sm_reg_data =
        (bcol_basesmuma_registration_data_t *) reg_data;
    mca_bcol_basesmuma_module_t *sm_bcol =
        (mca_bcol_basesmuma_module_t *) bcol_module;
    mca_bcol_base_memory_block_desc_t *ml_block = payload_block;
    size_t malloc_size;
    bcol_basesmuma_smcm_file_t input_file;
    int leading_dim,loop_limit,buf_id;
    unsigned char *base_ptr;
    mca_bcol_basesmuma_module_t *sm_bcol_module=
        (mca_bcol_basesmuma_module_t *)bcol_module;
    int my_idx, array_id;
    mca_bcol_basesmuma_header_t *ctl_ptr;
    void **results_array, *mem_offset;

    mca_bcol_basesmuma_local_mlmem_desc_t *ml_mem = &sm_bcol_module->ml_mem;

    /* first, we get a pointer to the payload buffer management struct */
    pload_mgmt = &(sm_bcol->colls_with_user_data);

    /* go ahead and get the header size that is cached on the payload block
     */
    sm_bcol->total_header_size = data_offset;

    /* allocate memory for pointers to mine and my peers' payload buffers
     * difference here is that now we use our new data struct
     */
    malloc_size = ml_block->num_banks*ml_block->num_buffers_per_bank*
        pload_mgmt->size_of_group *sizeof(mca_bcol_basesmuma_payload_t);
    pload_mgmt->data_buffs = (mca_bcol_basesmuma_payload_t *) malloc(malloc_size);
    if( !pload_mgmt->data_buffs) {
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto exit_ERROR;
    }

    /* allocate some memory to hold the offsets */
    results_array = (void **) malloc(pload_mgmt->size_of_group * sizeof (void *));

    /* setup the input file for the shared memory connection manager */
    input_file.file_name = sm_reg_data->file_name;
    input_file.size = sm_reg_data->size;
    input_file.size_ctl_structure = 0;
    input_file.data_seg_alignment = BASESMUMA_CACHE_LINE_SIZE;
    input_file.mpool_size = sm_reg_data->size;

    /* call the connection manager and map my shared memory peers' file
     */
    ret = bcol_basesmuma_smcm_allgather_connection(
        sm_bcol,
        sm_bcol->super.sbgp_partner_module,
        &(cs->sm_connections_list),
        &(sm_bcol->payload_backing_files_info),
        sm_bcol->super.sbgp_partner_module->group_comm,
        input_file,cs->payload_base_fname,
        false);
    if( OMPI_SUCCESS != ret ) {
        goto exit_ERROR;
    }


    /* now we exchange offset info - don't assume symmetric virtual memory
     */

    mem_offset = (void *) ((uintptr_t) ml_block->block->base_addr -
                           (uintptr_t) cs->sm_payload_structs->data_addr);

    /* call into the exchange offsets function */
    ret=comm_allgather_pml(&mem_offset, results_array, sizeof (void *), MPI_BYTE,
                           sm_bcol_module->super.sbgp_partner_module->my_index,
                           sm_bcol_module->super.sbgp_partner_module->group_size,
                           sm_bcol_module->super.sbgp_partner_module->group_list,
                           sm_bcol_module->super.sbgp_partner_module->group_comm);
    if( OMPI_SUCCESS != ret ) {
        goto exit_ERROR;
    }

    /* convert memory offset to virtual address in current rank */
    leading_dim = pload_mgmt->size_of_group;
    loop_limit =  ml_block->num_banks*ml_block->num_buffers_per_bank;
    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {

        /* get the base pointer */
        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
            /* me */
            base_ptr=cs->sm_payload_structs->map_addr;
        } else {
            base_ptr=sm_bcol_module->payload_backing_files_info[i]->
                sm_mmap->map_addr;
        }

        /* first, set the pointer to the control struct */
        pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
            (uintptr_t)(((uint64_t)(uintptr_t)results_array[array_id])+(uint64_t)(uintptr_t)base_ptr);
        /* second, calculate where to set the data pointer */
        pload_mgmt->data_buffs[array_id].payload=(void *)
            (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
                        (uint64_t)(uintptr_t) data_offset);

        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
            /* now, play the same game as above
             *
             * first, set the control struct's position */
            pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
                (uintptr_t)(((uint64_t)(uintptr_t)(pload_mgmt->data_buffs[array_id_m1].ctl_struct) +
                             (uint64_t)(uintptr_t)ml_block->size_buffer));

            /* second, set the payload pointer */
            pload_mgmt->data_buffs[array_id].payload =(void *)
                (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
                            (uint64_t)(uintptr_t) data_offset);
        }

    }

    /* done with the index array */
    free (results_array);

    /* initialize my control structures!! */
    my_idx = sm_bcol_module->super.sbgp_partner_module->my_index;
    leading_dim = sm_bcol_module->super.sbgp_partner_module->group_size;
    for( buf_id = 0; buf_id < loop_limit; buf_id++){
        array_id = SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
        ctl_ptr = pload_mgmt->data_buffs[array_id].ctl_struct;

        /* initialize the data structures */
        for( j = 0; j < SM_BCOLS_MAX; j++){
            for( i = 0; i < NUM_SIGNAL_FLAGS; i++){
                ctl_ptr->flags[i][j] = -1;
            }
        }
        ctl_ptr->sequence_number = -1;
        ctl_ptr->src = -1;
    }




    /* setup the data structures needed for releasing the payload
     * buffers back to the ml level
     */
    for( i=0 ; i < (int) ml_block->num_banks ; i++ ) {
        sm_bcol->colls_with_user_data.
            ctl_buffs_mgmt[i].nb_barrier_desc.ml_memory_block_descriptor=
            ml_block;
    }

    ml_mem->num_banks = ml_block->num_banks;
    ml_mem->bank_release_counter = calloc(ml_block->num_banks, sizeof(uint32_t));
    ml_mem->num_buffers_per_bank = ml_block->num_buffers_per_bank;
    ml_mem->size_buffer = ml_block->size_buffer;
    /* pointer to ml level descriptor */
    ml_mem->ml_mem_desc = ml_block;

    if (OMPI_SUCCESS != init_nb_coll_buff_desc(&ml_mem->nb_coll_desc,
                                               ml_block->block->base_addr,
                                               ml_mem->num_banks,
                                               ml_mem->num_buffers_per_bank,
                                               ml_mem->size_buffer,
                                               data_offset,
                                               sm_bcol_module->super.sbgp_partner_module->group_size,
                                               sm_bcol_module->pow_k)) {

        BASESMUMA_VERBOSE(10, ("Failed to allocate memory descriptors for storing state of non-blocking collectives\n"));
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

exit_ERROR:
    return ret;
}
Esempio n. 6
0
/* this is the new one, uses the pml allgather */
int base_bcol_basesmuma_exchange_offsets(
    mca_bcol_basesmuma_module_t *sm_bcol_module,
    void **result_array, uint64_t mem_offset, int loop_limit,
    int leading_dim)
{
    int ret=OMPI_SUCCESS,i;
    int count;
    int index_in_group;
    char *send_buff;
    char *recv_buff;
    uint64_t rem_mem_offset;

    /* malloc some memory */
    count = sizeof(uint64_t) + sizeof(int);
    send_buff = (char *) malloc(count);
    recv_buff = (char *) malloc(count *
                           sm_bcol_module->super.sbgp_partner_module->group_size);
    /*  exchange the base pointer for the controls structures - gather
     *  every one else's infromation.
     */


    /* pack the offset of the allocated region */
    memcpy((void *) send_buff, (void *) &(sm_bcol_module->super.sbgp_partner_module->my_index), sizeof(int));
    memcpy((void *) (send_buff+ sizeof(int)), (void *) &(mem_offset), sizeof(uint64_t));

    /* get the offsets from all procs, so can setup the control data
     * structures.
     */

    ret=comm_allgather_pml((void *) send_buff,(void *) recv_buff,count,
            MPI_BYTE,
            sm_bcol_module->super.sbgp_partner_module->my_index,
            sm_bcol_module->super.sbgp_partner_module->group_size,
            sm_bcol_module->super.sbgp_partner_module->group_list,
            sm_bcol_module->super.sbgp_partner_module->group_comm);
    if( OMPI_SUCCESS != ret ) {
        goto exit_ERROR;
    }

    /* get the control stucture offsets within the shared memory
     *   region and populate the control structures - we do not assume
     *   any symmetry in memory layout of each process
     */

    /* loop over the procs in the group */
    for(i = 0; i < sm_bcol_module->super.sbgp_partner_module->group_size; i++){
        int array_id;
        /* get this peer's index in the group */
        memcpy((void *) &index_in_group, (void *) (recv_buff + i*count) , sizeof(int));

        /* get the offset */
        memcpy((void *) &rem_mem_offset, (void *) (recv_buff + i*count + sizeof(int)), sizeof(uint64_t));

        array_id=SM_ARRAY_INDEX(leading_dim,0,index_in_group);
        result_array[array_id]=(void *)(uintptr_t)rem_mem_offset;

    }

exit_ERROR:
    /* clean up */
    if( NULL != send_buff ) {
        free(send_buff);
        send_buff = NULL;
    }
    if( NULL != recv_buff ) {
        free(recv_buff);
        recv_buff = NULL;
    }

    return ret;


}
Esempio n. 7
0
static int base_bcol_basesmuma_exchange_ctl_params(
    mca_bcol_basesmuma_module_t *sm_bcol_module,
    mca_bcol_basesmuma_component_t *cs,
    sm_buffer_mgmt *ctl_mgmt, list_data_t *data_blk)
{
    int ret=OMPI_SUCCESS,i,loop_limit;
    int leading_dim, buf_id;
    void *mem_offset;
    unsigned char *base_ptr;
    mca_bcol_basesmuma_ctl_struct_t *ctl_ptr;

    /* data block base offset in the mapped file */
    mem_offset = (void *)((uintptr_t)data_blk->data -
                          (uintptr_t)cs->sm_ctl_structs->data_addr);

    /* number of buffers in data block */
    loop_limit=cs->basesmuma_num_mem_banks+ctl_mgmt->number_of_buffs;
    leading_dim=ctl_mgmt->size_of_group;
    ret=comm_allgather_pml(&mem_offset, ctl_mgmt->ctl_buffs, sizeof(void *),
                           MPI_BYTE, sm_bcol_module->super.sbgp_partner_module->my_index,
                           sm_bcol_module->super.sbgp_partner_module->group_size,
                           sm_bcol_module->super.sbgp_partner_module->group_list,
                           sm_bcol_module->super.sbgp_partner_module->group_comm);
    if( OMPI_SUCCESS != ret ) {
        goto exit_ERROR;
    }

#if 0
    ret=base_bcol_basesmuma_exchange_offsets( sm_bcol_module,
            (void **)ctl_mgmt->ctl_buffs, mem_offset, loop_limit, leading_dim);
    if( OMPI_SUCCESS != ret ) {
        goto exit_ERROR;
    }
#endif

    /* convert memory offset to virtual address in current rank */
    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {

        /* get the base pointer */
        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
            /* me */
            base_ptr=cs->sm_ctl_structs->map_addr;
        } else {
            base_ptr=sm_bcol_module->ctl_backing_files_info[i]->sm_mmap->map_addr;
        }
        ctl_mgmt->ctl_buffs[array_id]=(void *)
            (uintptr_t)(((uint64_t)(uintptr_t)ctl_mgmt->ctl_buffs[array_id])+(uint64_t)(uintptr_t)base_ptr);
        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
            ctl_mgmt->ctl_buffs[array_id]=(void *) (uintptr_t)((uint64_t)(uintptr_t)(ctl_mgmt->ctl_buffs[array_id_m1])+
                (uint64_t)(uintptr_t)sizeof(mca_bcol_basesmuma_ctl_struct_t));
        }
    }
    /* initialize my control structues */
    for( buf_id = 0 ; buf_id < loop_limit ; buf_id++ ) {

        int my_idx=sm_bcol_module->super.sbgp_partner_module->my_index;
        int array_id=SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
        ctl_ptr = (mca_bcol_basesmuma_ctl_struct_t *)
                ctl_mgmt->ctl_buffs[array_id];

        /* initialize the data structures - RLG, this is only one data
         * structure that needs to be initialized, more are missing */
        ctl_ptr->sequence_number=-1;
        ctl_ptr->flag=-1;
        ctl_ptr->index=0;
        ctl_ptr->src_ptr = NULL;
    }

    return ret;

exit_ERROR:

    return ret;
}