Ejemplo n.º 1
0
static int base_bcol_basesmuma_setup_ctl (mca_bcol_basesmuma_module_t *sm_bcol_module,
                                          mca_bcol_basesmuma_component_t *cs)
{
    const int my_index = sm_bcol_module->super.sbgp_partner_module->my_index;;
    bcol_basesmuma_smcm_file_t input_file;
    int ret;

    /* exchange remote addressing information if it has not already been done  */
    if (NULL == sm_bcol_module->ctl_backing_files_info) {
        input_file.file_name=cs->sm_ctl_structs->map_path;
        input_file.size=cs->sm_ctl_structs->map_size;
        input_file.size_ctl_structure=0;
        input_file.data_seg_alignment=BASESMUMA_CACHE_LINE_SIZE;
        input_file.mpool_size=cs->sm_ctl_structs->map_size;
        ret = bcol_basesmuma_smcm_allgather_connection(sm_bcol_module,
                                                       sm_bcol_module->super.sbgp_partner_module,
                                                       &(cs->sm_connections_list),
                                                       &(sm_bcol_module->ctl_backing_files_info),
                                                       sm_bcol_module->super.sbgp_partner_module->group_comm,
                                                       input_file, cs->clt_base_fname,
                                                       false);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
    }

    /* fill in the pointer to other ranks scartch shared memory */
    if (NULL == sm_bcol_module->shared_memory_scratch_space) {
        sm_bcol_module->shared_memory_scratch_space =
            calloc (sm_bcol_module->super.sbgp_partner_module->group_size, sizeof (void *));
        if (!sm_bcol_module->shared_memory_scratch_space) {
            opal_output (ompi_bcol_base_framework.framework_output, "Cannot allocate memory for shared_memory_scratch_space.");
            return OMPI_ERR_OUT_OF_RESOURCE;
        }

        for (int i = 0 ; i < sm_bcol_module->super.sbgp_partner_module->group_size ; ++i) {
            if (i == my_index) {
                /* local file data is not cached in this list */
                continue;
            }

            sm_bcol_module->shared_memory_scratch_space[i] =
                (void *)((intptr_t) sm_bcol_module->ctl_backing_files_info[i]->sm_mmap +
                         cs->scratch_offset_from_base_ctl_file);
        }

        sm_bcol_module->shared_memory_scratch_space[my_index] =
            (void *)((intptr_t) cs->sm_ctl_structs->map_addr + cs->scratch_offset_from_base_ctl_file);
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 2
0
/* New init function used for new control scheme where we put the control
 * struct at the top of the payload buffer
 */
int bcol_basesmuma_bank_init_opti(struct mca_bcol_base_memory_block_desc_t *payload_block,
        uint32_t data_offset,
        mca_bcol_base_module_t *bcol_module,
        void *reg_data)
{
    /* assumption here is that the block has been registered with
     * sm bcol hence has been mapped by each process, need to be
     * sure that memory is mapped amongst sm peers
     */

    /* local variables */
    int ret = OMPI_SUCCESS, i, j;
    sm_buffer_mgmt *pload_mgmt;
    mca_bcol_basesmuma_component_t *cs = &mca_bcol_basesmuma_component;
    bcol_basesmuma_registration_data_t *sm_reg_data =
        (bcol_basesmuma_registration_data_t *) reg_data;
    mca_bcol_basesmuma_module_t *sm_bcol =
        (mca_bcol_basesmuma_module_t *) bcol_module;
    mca_bcol_base_memory_block_desc_t *ml_block = payload_block;
    size_t malloc_size;
    bcol_basesmuma_smcm_file_t input_file;
    int leading_dim,loop_limit,buf_id;
    unsigned char *base_ptr;
    mca_bcol_basesmuma_module_t *sm_bcol_module=
        (mca_bcol_basesmuma_module_t *)bcol_module;
    int my_idx, array_id;
    mca_bcol_basesmuma_header_t *ctl_ptr;
    void **results_array, *mem_offset;

    mca_bcol_basesmuma_local_mlmem_desc_t *ml_mem = &sm_bcol_module->ml_mem;

    /* first, we get a pointer to the payload buffer management struct */
    pload_mgmt = &(sm_bcol->colls_with_user_data);

    /* go ahead and get the header size that is cached on the payload block
     */
    sm_bcol->total_header_size = data_offset;

    /* allocate memory for pointers to mine and my peers' payload buffers
     * difference here is that now we use our new data struct
     */
    malloc_size = ml_block->num_banks*ml_block->num_buffers_per_bank*
        pload_mgmt->size_of_group *sizeof(mca_bcol_basesmuma_payload_t);
    pload_mgmt->data_buffs = (mca_bcol_basesmuma_payload_t *) malloc(malloc_size);
    if( !pload_mgmt->data_buffs) {
        ret = OMPI_ERR_OUT_OF_RESOURCE;
        goto exit_ERROR;
    }

    /* allocate some memory to hold the offsets */
    results_array = (void **) malloc(pload_mgmt->size_of_group * sizeof (void *));

    /* setup the input file for the shared memory connection manager */
    input_file.file_name = sm_reg_data->file_name;
    input_file.size = sm_reg_data->size;
    input_file.size_ctl_structure = 0;
    input_file.data_seg_alignment = BASESMUMA_CACHE_LINE_SIZE;
    input_file.mpool_size = sm_reg_data->size;

    /* call the connection manager and map my shared memory peers' file
     */
    ret = bcol_basesmuma_smcm_allgather_connection(
        sm_bcol,
        sm_bcol->super.sbgp_partner_module,
        &(cs->sm_connections_list),
        &(sm_bcol->payload_backing_files_info),
        sm_bcol->super.sbgp_partner_module->group_comm,
        input_file,cs->payload_base_fname,
        false);
    if( OMPI_SUCCESS != ret ) {
        goto exit_ERROR;
    }


    /* now we exchange offset info - don't assume symmetric virtual memory
     */

    mem_offset = (void *) ((uintptr_t) ml_block->block->base_addr -
                           (uintptr_t) cs->sm_payload_structs->data_addr);

    /* call into the exchange offsets function */
    ret=comm_allgather_pml(&mem_offset, results_array, sizeof (void *), MPI_BYTE,
                           sm_bcol_module->super.sbgp_partner_module->my_index,
                           sm_bcol_module->super.sbgp_partner_module->group_size,
                           sm_bcol_module->super.sbgp_partner_module->group_list,
                           sm_bcol_module->super.sbgp_partner_module->group_comm);
    if( OMPI_SUCCESS != ret ) {
        goto exit_ERROR;
    }

    /* convert memory offset to virtual address in current rank */
    leading_dim = pload_mgmt->size_of_group;
    loop_limit =  ml_block->num_banks*ml_block->num_buffers_per_bank;
    for (i=0;i< sm_bcol_module->super.sbgp_partner_module->group_size;i++) {

        /* get the base pointer */
        int array_id=SM_ARRAY_INDEX(leading_dim,0,i);
        if( i == sm_bcol_module->super.sbgp_partner_module->my_index) {
            /* me */
            base_ptr=cs->sm_payload_structs->map_addr;
        } else {
            base_ptr=sm_bcol_module->payload_backing_files_info[i]->
                sm_mmap->map_addr;
        }

        /* first, set the pointer to the control struct */
        pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
            (uintptr_t)(((uint64_t)(uintptr_t)results_array[array_id])+(uint64_t)(uintptr_t)base_ptr);
        /* second, calculate where to set the data pointer */
        pload_mgmt->data_buffs[array_id].payload=(void *)
            (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
                        (uint64_t)(uintptr_t) data_offset);

        for( buf_id = 1 ; buf_id < loop_limit ; buf_id++ ) {
            int array_id_m1=SM_ARRAY_INDEX(leading_dim,(buf_id-1),i);
            array_id=SM_ARRAY_INDEX(leading_dim,buf_id,i);
            /* now, play the same game as above
             *
             * first, set the control struct's position */
            pload_mgmt->data_buffs[array_id].ctl_struct=(mca_bcol_basesmuma_header_t *)
                (uintptr_t)(((uint64_t)(uintptr_t)(pload_mgmt->data_buffs[array_id_m1].ctl_struct) +
                             (uint64_t)(uintptr_t)ml_block->size_buffer));

            /* second, set the payload pointer */
            pload_mgmt->data_buffs[array_id].payload =(void *)
                (uintptr_t)((uint64_t)(uintptr_t) pload_mgmt->data_buffs[array_id].ctl_struct +
                            (uint64_t)(uintptr_t) data_offset);
        }

    }

    /* done with the index array */
    free (results_array);

    /* initialize my control structures!! */
    my_idx = sm_bcol_module->super.sbgp_partner_module->my_index;
    leading_dim = sm_bcol_module->super.sbgp_partner_module->group_size;
    for( buf_id = 0; buf_id < loop_limit; buf_id++){
        array_id = SM_ARRAY_INDEX(leading_dim,buf_id,my_idx);
        ctl_ptr = pload_mgmt->data_buffs[array_id].ctl_struct;

        /* initialize the data structures */
        for( j = 0; j < SM_BCOLS_MAX; j++){
            for( i = 0; i < NUM_SIGNAL_FLAGS; i++){
                ctl_ptr->flags[i][j] = -1;
            }
        }
        ctl_ptr->sequence_number = -1;
        ctl_ptr->src = -1;
    }




    /* setup the data structures needed for releasing the payload
     * buffers back to the ml level
     */
    for( i=0 ; i < (int) ml_block->num_banks ; i++ ) {
        sm_bcol->colls_with_user_data.
            ctl_buffs_mgmt[i].nb_barrier_desc.ml_memory_block_descriptor=
            ml_block;
    }

    ml_mem->num_banks = ml_block->num_banks;
    ml_mem->bank_release_counter = calloc(ml_block->num_banks, sizeof(uint32_t));
    ml_mem->num_buffers_per_bank = ml_block->num_buffers_per_bank;
    ml_mem->size_buffer = ml_block->size_buffer;
    /* pointer to ml level descriptor */
    ml_mem->ml_mem_desc = ml_block;

    if (OMPI_SUCCESS != init_nb_coll_buff_desc(&ml_mem->nb_coll_desc,
                                               ml_block->block->base_addr,
                                               ml_mem->num_banks,
                                               ml_mem->num_buffers_per_bank,
                                               ml_mem->size_buffer,
                                               data_offset,
                                               sm_bcol_module->super.sbgp_partner_module->group_size,
                                               sm_bcol_module->pow_k)) {

        BASESMUMA_VERBOSE(10, ("Failed to allocate memory descriptors for storing state of non-blocking collectives\n"));
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

exit_ERROR:
    return ret;
}