示例#1
0
static
mxm_mem_key_t *mca_spml_ikrit_get_mkey_slow(int pe, void *va, int ptl_id, void **rva)
{
    sshmem_mkey_t *mkey;

retry:
    mkey = mca_memheap_base_get_cached_mkey(pe, va, ptl_id, rva);
    if (NULL == mkey) {
        SPML_ERROR("pe=%d: %p is not address of shared variable", pe, va);
        oshmem_shmem_abort(-1);
        return NULL;
    }

    if (MXM_PTL_SHM == ptl_id) {
        if (mca_memheap_base_can_local_copy(mkey, va)) {
            return NULL;
        }

        /* if dst addr is on memheap and local copy is not allowed 
         * disable direct shm transport
         */ 
        if (memheap_is_va_in_segment(va, HEAP_SEG_INDEX)) {
            mca_spml_ikrit.mxm_peers[pe].ptl_id = MXM_PTL_RDMA;
        }
        /* going via mxm must always work */
        ptl_id = MXM_PTL_RDMA;
        goto retry;
    }

    return to_mxm_mkey(mkey);
}
示例#2
0
static int mca_spml_ikrit_get_helper(mxm_send_req_t *sreq,
                                     void *src_addr,
                                     size_t size,
                                     void *dst_addr,
                                     int src)
{
    /* shmem spec states that get() operations are blocking. So it is enough
     to have single mxm request. Also we count on mxm doing copy */
    void *rva;
    sshmem_mkey_t *r_mkey;
    int ptl_id;

    ptl_id = get_ptl_id(src);
    /* already tried to send via shm and failed. go via rdma */
    if (ptl_id == MXM_PTL_SHM)
        ptl_id = MXM_PTL_RDMA;

    /**
     * Get the address to the remote rkey.
     **/
    r_mkey = mca_memheap.memheap_get_cached_mkey(src,
                                                 src_addr,
                                                 ptl_id,
                                                 &rva);
    if (!r_mkey) {
        SPML_ERROR("pe=%d: %p is not address of shared variable",
                   src, src_addr);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERROR;
    }

    SPML_VERBOSE(100,
                 "get: pe:%d ptl=%d src=%p -> dst: %p sz=%d. src_rva=%p, %s",
                 src, ptl_id, src_addr, dst_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));

    /* mxm does not really cares for get lkey */
    sreq->base.mq = mca_spml_ikrit.mxm_mq;
    sreq->base.conn = mca_spml_ikrit.mxm_peers[src]->mxm_conn;
    sreq->base.data_type = MXM_REQ_DATA_BUFFER;
    sreq->base.data.buffer.ptr = dst_addr;
    sreq->base.data.buffer.length = size;
#if MXM_API < MXM_VERSION(2,0)
    sreq->base.data.buffer.memh = NULL;
    sreq->op.mem.remote_memh = NULL;
#else
    sreq->op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif
    sreq->opcode = MXM_REQ_OP_GET;
    sreq->op.mem.remote_vaddr = (intptr_t) rva;
    sreq->base.state = MXM_REQ_NEW;

    return OSHMEM_SUCCESS;
}
示例#3
0
/* simple buffered put implementation. NOT IN USE
 * Problems:
 * - slighly worse performance than impl based on non buffered put
 * - fence complexity is O(n_active_connections) instead of O(n_connections_with_outstanding_puts).
 *   Later is bounded by the network RTT & mxm ack timer.
 */
int mca_spml_ikrit_put_simple(void* dst_addr,
                              size_t size,
                              void* src_addr,
                              int dst)
{
    void *rva;
    mxm_send_req_t mxm_req;
    mxm_wait_t wait;
    int ptl_id;
    sshmem_mkey_t *r_mkey;
    static int count;

    ptl_id = get_ptl_id(dst);
    /* Get rkey of remote PE (dst proc) which must be on memheap  */
    r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva);
    if (!r_mkey) {
        SPML_ERROR("pe=%d: %p is not address of shared variable",
                   dst, dst_addr);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERROR;
    }

#if SPML_IKRIT_PUT_DEBUG == 1
    SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
            dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
    if (ptl_id == MXM_PTL_SHM) {

        if (mca_memheap_base_can_local_copy(r_mkey, dst_addr)) {
            memcpy((void *) (unsigned long) rva, src_addr, size);
            /* call progress as often as we would have with regular put */
            if (++count % SPML_IKRIT_PACKETS_PER_SYNC == 0)
                mxm_progress(mca_spml_ikrit.mxm_context);
            return OSHMEM_SUCCESS;
        }
        /* segment not mapped - fallback to rmda */
        ptl_id = MXM_PTL_RDMA;
        r_mkey = mca_memheap_base_get_cached_mkey(dst,
                                                     //(unsigned long) dst_addr,
                                                     dst_addr,
                                                     ptl_id,
                                                     &rva);
        if (!r_mkey) {
            SPML_ERROR("pe=%d: %p is not address of shared variable",
                       dst, dst_addr);
            oshmem_shmem_abort(-1);
            return OSHMEM_ERROR;
        }
    }

#if SPML_IKRIT_PUT_DEBUG == 1
    SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
            dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif

    /* fill out request */
    mxm_req.base.mq = mca_spml_ikrit.mxm_mq;
#if MXM_API <  MXM_VERSION(2,0)
    mxm_req.base.flags = MXM_REQ_FLAG_BLOCKING;
#else
    mxm_req.flags = MXM_REQ_SEND_FLAG_BLOCKING;
#endif
    mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn;
    mxm_req.base.data_type = MXM_REQ_DATA_BUFFER;
    mxm_req.base.data.buffer.ptr = src_addr;
    mxm_req.base.data.buffer.length = size;
    mxm_req.base.completed_cb = 0;
    mxm_req.base.context = 0;
    mxm_req.opcode = MXM_REQ_OP_PUT;
    mxm_req.op.mem.remote_vaddr = (intptr_t) rva;
    mxm_req.base.state = MXM_REQ_NEW;
    mxm_req.base.error = MXM_OK;

#if MXM_API < MXM_VERSION(2, 0)
    mxm_req.base.data.buffer.memh = NULL;
    mxm_req.op.mem.remote_memh = NULL;
#else
    mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif

    if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) {
        opal_list_append(&mca_spml_ikrit.active_peers,
                         &mca_spml_ikrit.mxm_peers[dst]->super);
        mca_spml_ikrit.mxm_peers[dst]->need_fence = 1;
    }

    SPML_IKRIT_MXM_POST_SEND(mxm_req);

    wait.req = &mxm_req.base;
    wait.state = (mxm_req_state_t)(MXM_REQ_SENT | MXM_REQ_COMPLETED);
    wait.progress_cb = NULL;
    wait.progress_arg = NULL;
    mxm_wait(&wait);

    return OSHMEM_SUCCESS;
}
示例#4
0
/**
 * TODO: using put request as handle is not good.
 */
static inline int mca_spml_ikrit_put_internal(void* dst_addr,
                                              size_t size,
                                              void* src_addr,
                                              int dst,
                                              void **handle,
                                              int zcopy)
{
    void *rva;
    mca_spml_ikrit_put_request_t *put_req;
    int ptl_id;
    sshmem_mkey_t *r_mkey;
    static int count;
    int need_progress = 0;

    if (0 >= size) {
        return OSHMEM_SUCCESS;
    }

    ptl_id = get_ptl_id(dst);
    /* Get rkey of remote PE (dst proc) which must be on memheap  */
    r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva);
    if (!r_mkey) {
        SPML_ERROR("pe=%d: %p is not address of shared variable",
                   dst, dst_addr);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERROR;
    }

#if SPML_IKRIT_PUT_DEBUG == 1

    SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
            dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif
    if (ptl_id == MXM_PTL_SHM) {

        if (mca_memheap_base_can_local_copy(r_mkey, dst_addr)) {
            memcpy((void *) (unsigned long) rva, src_addr, size);
            /* call progress as often as we would have with regular put */
            if (++count % SPML_IKRIT_PACKETS_PER_SYNC == 0)
                mxm_progress(mca_spml_ikrit.mxm_context);
            return OSHMEM_SUCCESS;
        }
        /* segment not mapped - fallback to rmda */
        ptl_id = MXM_PTL_RDMA;
        r_mkey = mca_memheap_base_get_cached_mkey(dst, dst_addr, ptl_id, &rva);
        if (!r_mkey) {
            SPML_ERROR("pe=%d: %p is not address of shared variable",
                       dst, dst_addr);
            oshmem_shmem_abort(-1);
            return OSHMEM_ERROR;
        }
    }

#if SPML_IKRIT_PUT_DEBUG == 1
    SPML_VERBOSE(100, "put: pe:%d ptl=%d dst=%p <- src: %p sz=%d. dst_rva=%p, %s",
            dst, ptl_id, dst_addr, src_addr, (int)size, (void *)rva, mca_spml_base_mkey2str(r_mkey));
#endif

    put_req = alloc_put_req();
    if (NULL == put_req) {
        SPML_ERROR("out of put requests - aborting");
        oshmem_shmem_abort(-1);
        return OSHMEM_ERROR;
    }
    if (handle)
        *handle = put_req;

    /* fill out request */
    put_req->mxm_req.base.mq = mca_spml_ikrit.mxm_mq;
    /* request immediate responce if we are getting low on send buffers. We only get responce from remote on ack timeout.
     * Also request explicit ack once in a while  */
#if MXM_API < MXM_VERSION(2,0)
    put_req->mxm_req.opcode = MXM_REQ_OP_PUT;
    if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER ||
            (mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) {
        put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_SYNC;
        need_progress = 1;
    } else  {
        put_req->mxm_req.base.flags = MXM_REQ_FLAG_SEND_LAZY|MXM_REQ_FLAG_SEND_SYNC;
    }
#else
    put_req->mxm_req.flags = 0;
    if (mca_spml_ikrit.free_list_max - mca_spml_ikrit.n_active_puts <= SPML_IKRIT_PUT_LOW_WATER ||
            (int)opal_list_get_size(&mca_spml_ikrit.active_peers) > mca_spml_ikrit.unsync_conn_max ||
            (mca_spml_ikrit.mxm_peers[dst]->n_active_puts + 1) % SPML_IKRIT_PACKETS_PER_SYNC == 0) {
        need_progress = 1;
        put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC;
    } else  {
        put_req->mxm_req.opcode = MXM_REQ_OP_PUT;
    }
    if (!zcopy) {
        if (size < mca_spml_ikrit.put_zcopy_threshold) {
            put_req->mxm_req.flags |= MXM_REQ_SEND_FLAG_BLOCKING;
        } else {
            put_req->mxm_req.opcode = MXM_REQ_OP_PUT_SYNC;
        }
    }
#endif

    put_req->mxm_req.base.conn = mca_spml_ikrit.mxm_peers[dst]->mxm_conn;
    put_req->mxm_req.base.data_type = MXM_REQ_DATA_BUFFER;
    put_req->mxm_req.base.data.buffer.ptr = src_addr;
    put_req->mxm_req.base.data.buffer.length = size;
    put_req->mxm_req.base.completed_cb = put_completion_cb;
    put_req->mxm_req.base.context = put_req;
    put_req->mxm_req.op.mem.remote_vaddr = (intptr_t) rva;
    put_req->mxm_req.base.state = MXM_REQ_NEW;
    put_req->pe = dst;

#if MXM_API < MXM_VERSION(2,0)
    put_req->mxm_req.base.data.buffer.memh = NULL;
    put_req->mxm_req.op.mem.remote_memh = NULL;
#else
    put_req->mxm_req.op.mem.remote_mkey = to_mxm_mkey(r_mkey);
#endif

    OPAL_THREAD_ADD32(&mca_spml_ikrit.n_active_puts, 1);
    if (mca_spml_ikrit.mxm_peers[dst]->need_fence == 0) {
        opal_list_append(&mca_spml_ikrit.active_peers,
                         &mca_spml_ikrit.mxm_peers[dst]->super);
        mca_spml_ikrit.mxm_peers[dst]->need_fence = 1;
    }

    mca_spml_ikrit.mxm_peers[dst]->n_active_puts++;

    SPML_IKRIT_MXM_POST_SEND(put_req->mxm_req);

    if (need_progress)
        mxm_progress(mca_spml_ikrit.mxm_context);

    return OSHMEM_SUCCESS;
}
示例#5
0
int mca_atomic_mxm_fadd(void *target,
                        void *prev,
                        const void *value,
                        size_t nlong,
                        int pe,
                        struct oshmem_op_t *op)
{
    unsigned my_pe;
    uint8_t nlong_order;
    void *remote_addr;
    int ptl_id;
    mxm_send_req_t sreq;
    mxm_error_t mxm_err;
    sshmem_mkey_t *r_mkey;
    static char dummy_buf[8];

    my_pe = oshmem_my_proc_id();
    ptl_id = -1;
    mxm_err = MXM_OK;

    if (!target || !value) {
        ATOMIC_ERROR("[#%d] target or value are not defined", my_pe);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERR_BAD_PARAM;
    }

    if ((pe < 0) || (pe >= oshmem_num_procs())) {
        ATOMIC_ERROR("[#%d] PE=%d not valid", my_pe, pe);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERR_BAD_PARAM;
    }

    switch (nlong) {
    case 1:
        nlong_order = 0;
        break;
    case 2:
        nlong_order = 1;
        break;
    case 4:
        nlong_order = 2;
        break;
    case 8:
        nlong_order = 3;
        break;
    default:
        ATOMIC_ERROR("[#%d] Type size must be 1/2/4 or 8 bytes.", my_pe);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERR_BAD_PARAM;
    }

    ptl_id = oshmem_proc_group_all(pe)->transport_ids[0];
    if (MXM_PTL_SHM == ptl_id) {
        ptl_id = MXM_PTL_RDMA;
    }
    r_mkey = mca_memheap.memheap_get_cached_mkey(pe,
                                             target,
                                             ptl_id,
                                             &remote_addr);
    if (!r_mkey) {
        ATOMIC_ERROR("[#%d] %p is not address of symmetric variable",
                     my_pe, target);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERR_BAD_PARAM;
    }

    /* mxm request init */
    sreq.base.state = MXM_REQ_NEW;
    sreq.base.mq = mca_spml_self->mxm_mq;
    sreq.base.conn = mca_spml_self->mxm_peers[pe]->mxm_hw_rdma_conn;
    sreq.base.completed_cb = NULL;
    sreq.base.data_type = MXM_REQ_DATA_BUFFER;

    sreq.op.atomic.remote_vaddr = (uintptr_t) remote_addr;
#if MXM_API < MXM_VERSION(2,0)
    sreq.op.atomic.remote_memh  = MXM_INVALID_MEM_HANDLE;
    memcpy(&sreq.op.atomic.value8, value, nlong);
#else
    sreq.op.atomic.remote_mkey = to_mxm_mkey(r_mkey);
    memcpy(&sreq.op.atomic.value, value, nlong);
#endif
    sreq.op.atomic.order = nlong_order;

    /* Do we need atomic 'add' or atomic 'fetch and add'? */
    if (NULL == prev) {
        sreq.base.data.buffer.ptr = dummy_buf;
        sreq.base.data.buffer.length = nlong;
        sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#if MXM_API < MXM_VERSION(2,0)
        sreq.base.flags = MXM_REQ_FLAG_SEND_SYNC;
        sreq.opcode = MXM_REQ_OP_ATOMIC_ADD;
#else
        sreq.flags = 0;
        sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
#endif
    } else {
        sreq.base.data.buffer.ptr = prev;
        sreq.base.data.buffer.length = nlong;
        sreq.base.data.buffer.memh = MXM_INVALID_MEM_HANDLE;
#if MXM_API < MXM_VERSION(2,0)
        sreq.base.flags = 0;
#else
        sreq.flags = 0;
#endif

        sreq.opcode = MXM_REQ_OP_ATOMIC_FADD;
    }

    if (MXM_OK != (mxm_err = mxm_req_send(&sreq))) {
        ATOMIC_ERROR("[#%d] mxm_req_send failed, mxm_error = %d",
                     my_pe, mxm_err);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERROR;
    }

    mxm_req_wait(&sreq.base);
    if (MXM_OK != sreq.base.error) {
        ATOMIC_ERROR("[#%d] mxm_req_wait got non MXM_OK error: %d",
                     my_pe, sreq.base.error);
        oshmem_shmem_abort(-1);
        return OSHMEM_ERROR;
    }

    return OSHMEM_SUCCESS;
}