Ejemplo n.º 1
0
static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count, ompi_datatype_t *source_datatype,
                                     void *result_buffer, int result_count, ompi_datatype_t *result_datatype,
                                     ompi_osc_rdma_peer_t *peer, uint64_t target_address,
                                     mca_btl_base_registration_handle_t *target_handle, int target_count,
                                     ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_module_t *module,
                                     ompi_osc_rdma_request_t *request)
{
    int ret = OMPI_SUCCESS;

    do {
        if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
            (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
        }

        if (NULL != result_buffer) {
            /* get accumulate */

            ret = ompi_datatype_sndrcv ((void *) (intptr_t) target_address, target_count, target_datatype,
                                        result_buffer, result_count, result_datatype);

            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                break;
            }
        }

        if (&ompi_mpi_op_no_op.op != op) {
            if (&ompi_mpi_op_replace.op != op) {
                ret = ompi_osc_base_sndrcv_op (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address,
                                               target_count, target_datatype, op);
            } else {
                ret = ompi_datatype_sndrcv (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address,
                                            target_count, target_datatype);
            }
        }

        if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
            (void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
        }
    } while (0);

    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output,
                             "ompi_osc_rdma_gacc_self: failed performing accumulate operation. ret = %d", ret));
        return ret;
    }

    if (request) {
        /* NTH: is it ok to use an ompi error code here? */
        ompi_osc_rdma_request_complete (request, ret);
    }

    return ret;
}
Ejemplo n.º 2
0
/**
 * ompi_osc_rdma_cas_get_complete:
 * Note: This function will not work as is in a heterogeneous environment.
 */
static void ompi_osc_rdma_cas_get_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                                            void *local_address, mca_btl_base_registration_handle_t *local_handle,
                                            void *context, void *data, int status)
{
    ompi_osc_rdma_request_t *request = (ompi_osc_rdma_request_t *) context;
    ompi_osc_rdma_sync_t *sync = request->sync;
    ompi_osc_rdma_module_t *module = sync->module;
    intptr_t source = (intptr_t) local_address + request->offset;
    ompi_osc_rdma_frag_t *frag = request->frag;
    ompi_osc_rdma_peer_t *peer = request->peer;
    int ret;

    if (OMPI_SUCCESS == status) {
        /* copy data to the user buffer (for gacc) */
        memcpy (request->result_addr, (void *) source, request->len);
        memcpy ((void *) source, request->origin_addr, request->len);

        if (0 == memcmp ((void *) source, request->compare_addr, request->len)) {
            /* the target and compare buffers match so write the source to the target */
            ret = module->selected_btl->btl_put (module->selected_btl, peer->data_endpoint, local_address,
                                                 request->target_address, local_handle,
                                                 (mca_btl_base_registration_handle_t *) request->ctx,
                                                 request->len, 0, MCA_BTL_NO_ORDER,
                                                 ompi_osc_rdma_acc_put_complete, request, NULL);
            if (OPAL_UNLIKELY(OPAL_SUCCESS != ret)) {
                OPAL_OUTPUT_VERBOSE((1, ompi_osc_base_framework.framework_output, "could not start put to complete accumulate "
                                     "operation. opal return code: %d", ret));
            }

            /* TODO -- we can do better. probably should queue up the next step and handle it in progress */
            assert (OPAL_SUCCESS == ret);
        } else {
            /* this is a no-op. nothing more to do except release the accumulate lock */
            ompi_osc_rdma_frag_complete (frag);

            if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
                (void) ompi_osc_rdma_lock_release_exclusive (module, request->peer,
                                                             offsetof (ompi_osc_rdma_state_t, accumulate_lock));
            }

            /* the request is now complete and the outstanding rdma operation is complete */
            ompi_osc_rdma_request_complete (request, status);

            ompi_osc_rdma_sync_rdma_dec (sync);
            peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;
        }
    }
}
Ejemplo n.º 3
0
static inline int ompi_osc_rdma_unlock_atomic_internal (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer,
                                                        ompi_osc_rdma_sync_t *lock)
{
    if (MPI_LOCK_EXCLUSIVE == lock->sync.lock.type) {
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock on peer");
        ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, local_lock));
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global exclusive lock");
        ompi_osc_rdma_lock_release_shared (module, module->leader, -1, offsetof (ompi_osc_rdma_state_t, global_lock));
        peer->flags &= ~OMPI_OSC_RDMA_PEER_EXCLUSIVE;
    } else {
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global shared lock");
        ompi_osc_rdma_lock_release_shared (module, peer, -1, offsetof (ompi_osc_rdma_state_t, local_lock));
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 4
0
static inline int ompi_osc_rdma_cas_local (const void *source_buffer, const void *compare_buffer, void *result_buffer,
                                           ompi_datatype_t *datatype, ompi_osc_rdma_peer_t *peer,
                                           uint64_t target_address, mca_btl_base_registration_handle_t *target_handle,
                                           ompi_osc_rdma_module_t *module)
{
    ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));

    memcpy (result_buffer, (void *) (uintptr_t) target_address, datatype->super.size);

    if (0 == memcmp (compare_buffer, result_buffer, datatype->super.size)) {
        memcpy ((void *) (uintptr_t) target_address, source_buffer, datatype->super.size);
    }

    ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));

    return OMPI_SUCCESS;
}
Ejemplo n.º 5
0
/* completion of an accumulate put */
static void ompi_osc_rdma_acc_put_complete (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
                                            void *local_address, mca_btl_base_registration_handle_t *local_handle,
                                            void *context, void *data, int status)
{
    ompi_osc_rdma_request_t *request = (ompi_osc_rdma_request_t *) context;
    ompi_osc_rdma_sync_t *sync = request->sync;
    ompi_osc_rdma_peer_t *peer = request->peer;

    ompi_osc_rdma_frag_complete (request->frag);
    ompi_osc_rdma_request_complete (request, status);

    if (!ompi_osc_rdma_peer_is_exclusive (peer)) {
        (void) ompi_osc_rdma_lock_release_exclusive (sync->module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock));
    }

    ompi_osc_rdma_sync_rdma_dec (sync);
    peer->flags &= ~OMPI_OSC_RDMA_PEER_ACCUMULATING;
}
Ejemplo n.º 6
0
int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    const int my_rank = ompi_comm_rank (module->comm);
    ompi_osc_rdma_peer_t *my_peer = ompi_osc_rdma_module_peer (module, my_rank);
    ompi_osc_rdma_region_t *region;
    osc_rdma_counter_t region_count;
    osc_rdma_counter_t region_id;
    void *bound;
    intptr_t page_size = getpagesize ();
    int region_index;
    int ret;

    if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
        return OMPI_ERR_RMA_FLAVOR;
    }

    if (0 == len) {
        /* shot-circuit 0-byte case */
        return OMPI_SUCCESS;
    }

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);

    OPAL_THREAD_LOCK(&module->lock);

    region_count = module->state->region_count & 0xffffffffL;
    region_id    = module->state->region_count >> 32;

    if (region_count == mca_osc_rdma_component.max_attach) {
        OPAL_THREAD_UNLOCK(&module->lock);
        return OMPI_ERR_RMA_ATTACH;
    }

    /* it is wasteful to register less than a page. this may allow the remote side to access more
     * memory but the MPI standard covers this with calling the calling behavior erroneous */
    bound = (void *)OPAL_ALIGN((intptr_t) base + len, page_size, intptr_t);
    base = (void *)((intptr_t) base & ~(page_size - 1));
    len = (size_t)((intptr_t) bound - (intptr_t) base);

    /* see if a matching region already exists */
    region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
                                                   (intptr_t) bound, module->region_size, &region_index);
    if (NULL != region) {
        ++module->dynamic_handles[region_index].refcnt;
        OPAL_THREAD_UNLOCK(&module->lock);
        /* no need to invalidate remote caches */
        return OMPI_SUCCESS;
    }

    /* region is in flux */
    module->state->region_count = -1;
    opal_atomic_wmb ();

    ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));

    /* do a binary seach for where the region should be inserted */
    if (region_count) {
        region = find_insertion_point ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
                                       module->region_size, &region_index);

        if (region_index < region_count) {
            memmove ((void *) ((intptr_t) region + module->region_size), region, (region_count - region_index) * module->region_size);

            if (module->selected_btl->btl_register_mem) {
                memmove (module->dynamic_handles + region_index + 1, module->dynamic_handles + region_index,
                         (region_count - region_index) * sizeof (module->dynamic_handles[0]));
            }
        }
    } else {
        region_index = 0;
        region = (ompi_osc_rdma_region_t *) module->state->regions;
    }

    region->base = (intptr_t) base;
    region->len  = len;

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "attaching dynamic memory region {%p, %p} at index %d",
                     base, (void *)((intptr_t) base + len), region_index);

    if (module->selected_btl->btl_register_mem) {
        mca_btl_base_registration_handle_t *handle;

        ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, (void *) region->base, region->len, MCA_BTL_REG_FLAG_ACCESS_ANY,
                                      &handle);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            OPAL_THREAD_UNLOCK(&module->lock);
            return OMPI_ERR_RMA_ATTACH;
        }

        memcpy (region->btl_handle_data, handle, module->selected_btl->btl_registration_handle_size);
        module->dynamic_handles[region_index].btl_handle = handle;
    } else {
        module->dynamic_handles[region_index].btl_handle = NULL;
    }

    module->dynamic_handles[region_index].refcnt = 1;

#if OPAL_ENABLE_DEBUG
    for (int i = 0 ; i < region_count + 1 ; ++i) {
        region = (ompi_osc_rdma_region_t *) ((intptr_t) module->state->regions + i * module->region_size);

        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, " dynamic region %d: {%p, %lu}", i,
                         (void *) region->base, (unsigned long) region->len);
    }
#endif

    opal_atomic_mb ();
    /* the region state has changed */
    module->state->region_count = ((region_id + 1) << 32) | (region_count + 1);

    ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
    OPAL_THREAD_UNLOCK(&module->lock);

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach complete");

    return OMPI_SUCCESS;
}
Ejemplo n.º 7
0
int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
{
    ompi_osc_rdma_module_t *module = GET_MODULE(win);
    const int my_rank = ompi_comm_rank (module->comm);
    ompi_osc_rdma_peer_dynamic_t *my_peer = (ompi_osc_rdma_peer_dynamic_t *) ompi_osc_rdma_module_peer (module, my_rank);
    osc_rdma_counter_t region_count, region_id;
    ompi_osc_rdma_region_t *region;
    int region_index;

    if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
        return OMPI_ERR_WIN;
    }

    OPAL_THREAD_LOCK(&module->lock);

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach: %s, %p", win->w_name, base);

    /* the upper 4 bytes of the region count are an instance counter */
    region_count = module->state->region_count & 0xffffffffL;
    region_id    = module->state->region_count >> 32;

    region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0,
                                                   region_count - 1, (intptr_t) base, (intptr_t) base + 1,
                                                   module->region_size, &region_index);
    if (NULL == region) {
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not find dynamic memory region starting at %p", base);
        OPAL_THREAD_UNLOCK(&module->lock);
        return OMPI_ERROR;
    }

    if (--module->dynamic_handles[region_index].refcnt > 0) {
        OPAL_THREAD_UNLOCK(&module->lock);
        OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");
        return OMPI_SUCCESS;
    }

    /* lock the region so it can't change while a peer is reading it */
    ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "detaching dynamic memory region {%p, %p} from index %d",
                     base, (void *)((intptr_t) base + region->len), region_index);

    if (module->selected_btl->btl_register_mem) {
        ompi_osc_rdma_deregister (module, module->dynamic_handles[region_index].btl_handle);

        if (region_index < region_count - 1) {
            memmove (module->dynamic_handles + region_index, module->dynamic_handles + region_index + 1,
                     (region_count - region_index - 1) * sizeof (void *));
        }

        memset (module->dynamic_handles + region_count - 1, 0, sizeof (module->dynamic_handles[0]));
    }

    if (region_index < region_count - 1) {
        memmove (region, (void *)((intptr_t) region + module->region_size),
                 (region_count - region_index - 1) * module->region_size);;
    }

    module->state->region_count = ((region_id + 1) << 32) | (region_count - 1);

    ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));

    OPAL_THREAD_UNLOCK(&module->lock);

    OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");

    return OMPI_SUCCESS;
}