Exemplo n.º 1
0
static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count, ompi_datatype_t *source_datatype,
                                     void *result_buffer, int result_count, ompi_datatype_t *result_datatype,
                                     ompi_osc_rdma_peer_t *peer, uint64_t target_address,
                                     mca_btl_base_registration_handle_t *target_handle, int target_count,
                                     ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_module_t *module,
                                     ompi_osc_rdma_request_t *request)
{
    int ret = OMPI_SUCCESS;

    do {
        if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
            (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
        }

        if (NULL != result_buffer) {
            /* get accumulate */

            ret = ompi_datatype_sndrcv ((void *) (intptr_t) target_address, target_count, target_datatype,
                                        result_buffer, result_count, result_datatype);

            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }
        }

        if (&ompi_mpi_op_no_op.op != op) {
            if (&ompi_mpi_op_replace.op != op) {
                ret = ompi_osc_base_sndrcv_op (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address,
                                               target_count, target_datatype, op);
            } else {
                ret = ompi_datatype_sndrcv (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address,
                                            target_count, target_datatype);
            }
        }

        if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
            (void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
        }
    } while (0);

    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
                             "ompi_osc_rdma_gacc_self: failed performing accumulate operation. ret = %d", ret));
        return ret;
    }

    if (request) {
        /* NTH: is it ok to use an ompi error code here? */
        ompi_osc_rdma_request_complete (request, ret);
    }

    return ret;
}
Exemplo n.º 2
0
static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void *compare_buffer, void *result_buffer,
                                           ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
                                           uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
                                           ompi_osc_rdma_module_t *module)
{
    ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));

    memcpy (result_buffer, (void *) (uintptr_t) target_address, datatype->super.size);

    if (0 == memcmp (compare_buffer, result_buffer, datatype->super.size)) {
        memcpy ((void *) (uintptr_t) target_address, source_buffer, datatype->super.size);
    }

    ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));

    return OMPI_SUCCESS;
}
Exemplo n.º 3
0
int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    const int my_rank = ompi_comm_rank (module->comm);
    ompi_osc_rdma_peer_t *my_peer = ompi_osc_rdma_module_peer (module, my_rank);
    ompi_osc_rdma_region_t *region;
    osc_rdma_counter_t region_count;
    osc_rdma_counter_t region_id;
    void *bound;
    intptr_t page_size = getpagesize ();
    int region_index;
    int ret;

    if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
        return OMPI_ERR_RMA_FLAVOR;
    }

    if (0 == len) {
        /* shot-circuit 0-byte case */
        return OMPI_SUCCESS;
    }

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);

    OPAL_THREAD_LOCK(&module->lock);

    region_count = module->state->region_count & 0xffffffffL;
    region_id    = module->state->region_count >> 32;

    if (region_count == mca_osc_rdma_component.max_attach) {
        OPAL_THREAD_UNLOCK(&module->lock);
        return OMPI_ERR_RMA_ATTACH;
    }

    /* it is wasteful to register less than a page. this may allow the remote side to access more
     * memory but the MPI standard covers this with calling the calling behavior erroneous */
    bound = (void *)OPAL_ALIGN((intptr_t) base + len, page_size, intptr_t);
    base = (void *)((intptr_t) base & ~(page_size - 1));
    len = (size_t)((intptr_t) bound - (intptr_t) base);

    /* see if a matching region already exists */
    region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
                                                   (intptr_t) bound, module->region_size, &region_index);
    if (NULL != region) {
        ++module->dynamic_handles[region_index].refcnt;
        OPAL_THREAD_UNLOCK(&module->lock);
        /* no need to invalidate remote caches */
        return OMPI_SUCCESS;
    }

    /* region is in flux */
    module->state->region_count = -1;
    opal_atomic_wmb ();

    ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));

    /* do a binary seach for where the region should be inserted */
    if (region_count) {
        region = find_insertion_point ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
                                       module->region_size, &region_index);

        if (region_index < region_count) {
            memmove ((void *) ((intptr_t) region + module->region_size), region, (region_count - region_index) * module->region_size);

            if (module->selected_btl->btl_register_mem) {
                memmove (module->dynamic_handles + region_index + 1, module->dynamic_handles + region_index,
                         (region_count - region_index) * sizeof (module->dynamic_handles[0]));
            }
        }
    } else {
        region_index = 0;
        region = (ompi_osc_rdma_region_t *) module->state->regions;
    }

    region->base = (intptr_t) base;
    region->len  = len;

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "attaching dynamic memory region {%p, %p} at index %d",
                     base, (void *)((intptr_t) base + len), region_index);

    if (module->selected_btl->btl_register_mem) {
        mca_btl_base_registration_handle_t *handle;

        ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, (void *) region->base, region->len, MCA_BTL_REG_FLAG_ACCESS_ANY,
                                      &handle);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            OPAL_THREAD_UNLOCK(&module->lock);
            return OMPI_ERR_RMA_ATTACH;
        }

        memcpy (region->btl_handle_data, handle, module->selected_btl->btl_registration_handle_size);
        module->dynamic_handles[region_index].btl_handle = handle;
    } else {
        module->dynamic_handles[region_index].btl_handle = NULL;
    }

    module->dynamic_handles[region_index].refcnt = 1;

#if OPAL_ENABLE_DEBUG
    for (int i = 0 ; i < region_count + 1 ; ++i) {
        region = (ompi_osc_rdma_region_t *) ((intptr_t) module->state->regions + i * module->region_size);

        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, " dynamic region %d: {%p, %lu}", i,
                         (void *) region->base, (unsigned long) region->len);
    }
#endif

    opal_atomic_mb ();
    /* the region state has changed */
    module->state->region_count = ((region_id + 1) << 32) | (region_count + 1);

    ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
    OPAL_THREAD_UNLOCK(&module->lock);

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach complete");

    return OMPI_SUCCESS;
}
Exemplo n.º 4
0
int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    const int my_rank = ompi_comm_rank (module->comm);
    ompi_osc_rdma_peer_dynamic_t *my_peer = (ompi_osc_rdma_peer_dynamic_t *) ompi_osc_rdma_module_peer (module, my_rank);
    osc_rdma_counter_t region_count, region_id;
    ompi_osc_rdma_region_t *region;
    int region_index;

    if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
        return OMPI_ERR_WIN;
    }

    OPAL_THREAD_LOCK(&module->lock);

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach: %s, %p", win->w_name, base);

    /* the upper 4 bytes of the region count are an instance counter */
    region_count = module->state->region_count & 0xffffffffL;
    region_id    = module->state->region_count >> 32;

    region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0,
                                                   region_count - 1, (intptr_t) base, (intptr_t) base + 1,
                                                   module->region_size, &region_index);
    if (NULL == region) {
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not find dynamic memory region starting at %p", base);
        OPAL_THREAD_UNLOCK(&module->lock);
        return OMPI_ERROR;
    }

    if (--module->dynamic_handles[region_index].refcnt > 0) {
        OPAL_THREAD_UNLOCK(&module->lock);
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");
        return OMPI_SUCCESS;
    }

    /* lock the region so it can't change while a peer is reading it */
    ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "detaching dynamic memory region {%p, %p} from index %d",
                     base, (void *)((intptr_t) base + region->len), region_index);

    if (module->selected_btl->btl_register_mem) {
        ompi_osc_rdma_deregister (module, module->dynamic_handles[region_index].btl_handle);

        if (region_index < region_count - 1) {
            memmove (module->dynamic_handles + region_index, module->dynamic_handles + region_index + 1,
                     (region_count - region_index - 1) * sizeof (void *));
        }

        memset (module->dynamic_handles + region_count - 1, 0, sizeof (module->dynamic_handles[0]));
    }

    if (region_index < region_count - 1) {
        memmove (region, (void *)((intptr_t) region + module->region_size),
                 (region_count - region_index - 1) * module->region_size);;
    }

    module->state->region_count = ((region_id + 1) << 32) | (region_count - 1);

    ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));

    OPAL_THREAD_UNLOCK(&module->lock);

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");

    return OMPI_SUCCESS;
}
Exemplo n.º 5
0
static inline int cas_rdma (ompi_osc_rdma_sync_t *sync, const void *source_buffer, const void *compare_buffer, void *result_buffer,
                            ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address,
                            mca_btl_base_registration_handle_t *target_handle)
{
    ompi_osc_rdma_module_t *module = sync->module;
    const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment);
    unsigned long offset, aligned_len, len = datatype->super.size;
    ompi_osc_rdma_frag_t *frag = NULL;
    ompi_osc_rdma_request_t *request;
    char *ptr = NULL;
    int ret;

    OMPI_OSC_RDMA_REQUEST_ALLOC(module, peer, request);

    request->internal = true;
    request->type = OMPI_OSC_RDMA_TYPE_CSWAP;
    request->sync = sync;

    OPAL_THREAD_LOCK(&module->lock);
    /* to ensure order wait until the previous accumulate completes */
    while (ompi_osc_rdma_peer_is_accumulating (peer)) {
        OPAL_THREAD_UNLOCK(&module->lock);
        ompi_osc_rdma_progress (module);
        OPAL_THREAD_LOCK(&module->lock);
    }
    peer->flags |= OMPI_OSC_RDMA_PEER_ACCUMULATING;
    OPAL_THREAD_UNLOCK(&module->lock);

    offset = target_address & btl_alignment_mask;;
    aligned_len = (len + offset + btl_alignment_mask) & ~btl_alignment_mask;

    ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;

        OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
                             "Could not allocate an rdma fragment for get accumulate. Falling back on point-to-point"));
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
        (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
    }

    /* set up the request */
    request->frag         = frag;
    request->origin_addr  = (void *) source_buffer;
    request->ctx          = (void *) target_handle;
    request->result_addr  = result_buffer;
    request->compare_addr = compare_buffer;
    request->result_dt    = datatype;
    request->offset       = (ptrdiff_t) offset;
    request->target_address = target_address;
    request->len          = len;

    OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output, "initiating btl get..."));

    ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr,
                                         target_address, frag->handle, target_handle,
                                         aligned_len, 0, MCA_BTL_NO_ORDER,
                                         ompi_osc_rdma_cas_get_complete, request, NULL);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        ompi_osc_rdma_frag_complete (frag);
        return ret;
    }

    ompi_osc_rdma_sync_rdma_inc (sync);

    return OMPI_SUCCESS;
}
Exemplo n.º 6
0
static inline int ompi_osc_rdma_gacc_contig (ompi_osc_rdma_sync_t *sync, const void *source, int source_count, ompi_datatype_t *source_datatype,
                                             void *result, int result_count, ompi_datatype_t *result_datatype,
                                             ompi_osc_rdma_peer_t *peer, uint64_t target_address,
                                             mca_btl_base_registration_handle_t *target_handle, int target_count,
                                             ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_request_t *request)
{
    ompi_osc_rdma_module_t *module = sync->module;
    const size_t btl_alignment_mask = ALIGNMENT_MASK(module->selected_btl->btl_get_alignment);
    unsigned long len = target_count * target_datatype->super.size;
    ompi_osc_rdma_frag_t *frag = NULL;
    unsigned long aligned_len, offset;
    char *ptr = NULL;
    int ret;

    offset = target_address & btl_alignment_mask;;
    aligned_len = (len + offset + btl_alignment_mask) & ~btl_alignment_mask;

    ret = ompi_osc_rdma_frag_alloc (module, aligned_len, &frag, &ptr);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
                             "Could not allocate an rdma fragment for get accumulate"));
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    OPAL_THREAD_LOCK(&module->lock);
    /* to ensure order wait until the previous accumulate completes */
    while (ompi_osc_rdma_peer_is_accumulating (peer)) {
        OPAL_THREAD_UNLOCK(&module->lock);
        ompi_osc_rdma_progress (module);
        OPAL_THREAD_LOCK(&module->lock);
    }

    peer->flags |= OMPI_OSC_RDMA_PEER_ACCUMULATING;
    OPAL_THREAD_UNLOCK(&module->lock);

    if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
        (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
    }

    /* set up the request */
    request->frag          = frag;
    request->origin_addr   = (void *) source;
    request->origin_dt     = source_datatype;
    request->origin_count  = source_count;
    request->ctx           = (void *) target_handle;
    request->result_addr   = result;
    request->result_count  = result_count;
    request->result_dt     = result_datatype;
    request->offset        = (ptrdiff_t) target_address & btl_alignment_mask;
    request->target_address = target_address;
    request->len           = len;
    request->op            = op;
    request->sync          = sync;

    ompi_osc_rdma_sync_rdma_inc (sync);

    if (&ompi_mpi_op_replace.op != op || result) {
        /* align the target address */
        target_address = target_address & ~btl_alignment_mask;

        OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output,
                             "initiating btl get local: {%p, %p}, remote: {0x%" PRIx64 ", %p}...",
                             ptr, (void *) frag->handle, target_address, (void *) target_handle));

        ret = module->selected_btl->btl_get (module->selected_btl, peer->data_endpoint, ptr,
                                             target_address, frag->handle, target_handle, aligned_len,
                                             0, MCA_BTL_NO_ORDER, ompi_osc_rdma_acc_get_complete,
                                             request, NULL);
    } else {
        /* copy the put accumulate data */
        memcpy (ptr, source, len);

        OPAL_OUTPUT_VERBOSE((60, ompi_osc_base_framework.framework_output,
                             "initiating btl put..."));

        ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, ptr,
                                             target_address, frag->handle, target_handle, len, 0,
                                             MCA_BTL_NO_ORDER, ompi_osc_rdma_acc_put_complete,
                                             request, NULL);
    }

    if (OPAL_UNLIKELY(OMPI_SUCCESS == ret)) {
        return OMPI_SUCCESS;
    }

    OPAL_OUTPUT_VERBOSE((20, ompi_osc_base_framework.framework_output, "btl operation failed with ret = %d", ret));

    ompi_osc_rdma_cleanup_rdma (sync, frag, NULL, NULL);

    return ret;
}