int mca_btl_ofi_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle, mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { int rc; int fi_datatype = FI_UINT64; mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint; mca_btl_ofi_completion_t *comp = NULL; mca_btl_ofi_context_t *ofi_context; ofi_context = get_ofi_context(ofi_btl); if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) { fi_datatype = FI_UINT32; } comp = mca_btl_ofi_completion_alloc(btl, endpoint, ofi_context, local_address, local_handle, cbfunc, cbcontext, cbdata, MCA_BTL_OFI_TYPE_CSWAP); /* copy the operand because it might get freed from upper layer */ comp->operand = (uint64_t) value; comp->compare = (uint64_t) compare; remote_address = (remote_address - (uint64_t) remote_handle->base_addr); /* perform atomic */ rc = fi_compare_atomic(ofi_context->tx_ctx, (void*) &comp->operand, 1, NULL, (void*) &comp->compare, NULL, local_address, local_handle->desc, btl_endpoint->peer_addr, remote_address, remote_handle->rkey, fi_datatype, FI_CSWAP, comp); if (rc == -FI_EAGAIN) { return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { BTL_ERROR(("fi_compare_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl); return OPAL_SUCCESS; }
int mca_btl_ofi_aop (struct mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata) { int rc; int fi_datatype = FI_UINT64; int fi_op; mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl; mca_btl_ofi_endpoint_t *btl_endpoint = (mca_btl_ofi_endpoint_t*) endpoint; mca_btl_ofi_completion_t *comp = NULL; mca_btl_ofi_context_t *ofi_context; ofi_context = get_ofi_context(ofi_btl); if (flags & MCA_BTL_ATOMIC_FLAG_32BIT) { fi_datatype = FI_UINT32; } fi_op = to_fi_op(op); comp = mca_btl_ofi_completion_alloc(btl, endpoint, ofi_context, NULL, NULL, cbfunc, cbcontext, cbdata, MCA_BTL_OFI_TYPE_AOP); /* copy the operand because it might get freed from upper layer */ comp->operand = (uint64_t) operand; remote_address = (remote_address - (uint64_t) remote_handle->base_addr); rc = fi_atomic(ofi_context->tx_ctx, (void*) &comp->operand, 1, NULL, /* operand */ btl_endpoint->peer_addr, /* remote addr */ remote_address, remote_handle->rkey, /* remote buffer */ fi_datatype, fi_op, comp); if (rc == -FI_EAGAIN) { return OPAL_ERR_OUT_OF_RESOURCE; } else if (rc < 0) { BTL_ERROR(("fi_atomic failed with rc=%d (%s)", rc, fi_strerror(-rc))); MCA_BTL_OFI_ABORT(); } MCA_BTL_OFI_NUM_RDMA_INC(ofi_btl); return OPAL_SUCCESS; }
static inline int to_fi_op(mca_btl_base_atomic_op_t op) { switch (op) { case MCA_BTL_ATOMIC_ADD: return FI_SUM; case MCA_BTL_ATOMIC_SWAP: return FI_ATOMIC_WRITE; default: BTL_ERROR(("Unknown or unsupported atomic op.")); MCA_BTL_OFI_ABORT(); /* just to squash the warning */ return OPAL_ERROR; } }
int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) { int ret = 0; int events_read; int events = 0; struct fi_cq_entry cq_entry[MCA_BTL_OFI_DEFAULT_MAX_CQE]; struct fi_cq_err_entry cqerr = {0}; mca_btl_ofi_completion_context_t *c_ctx; mca_btl_ofi_base_completion_t *comp; mca_btl_ofi_rdma_completion_t *rdma_comp; mca_btl_ofi_frag_completion_t *frag_comp; ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read); if (0 < ret) { events_read = ret; for (int i = 0; i < events_read; i++) { if (NULL != cq_entry[i].op_context) { ++events; c_ctx = (mca_btl_ofi_completion_context_t*) cq_entry[i].op_context; /* We are casting to every type here just for simplicity. */ comp = (mca_btl_ofi_base_completion_t*) c_ctx->comp; frag_comp = (mca_btl_ofi_frag_completion_t*) c_ctx->comp; rdma_comp = (mca_btl_ofi_rdma_completion_t*) c_ctx->comp; switch (comp->type) { case MCA_BTL_OFI_TYPE_GET: case MCA_BTL_OFI_TYPE_PUT: case MCA_BTL_OFI_TYPE_AOP: case MCA_BTL_OFI_TYPE_AFOP: case MCA_BTL_OFI_TYPE_CSWAP: /* call the callback */ if (rdma_comp->cbfunc) { rdma_comp->cbfunc (comp->btl, comp->endpoint, rdma_comp->local_address, rdma_comp->local_handle, rdma_comp->cbcontext, rdma_comp->cbdata, OPAL_SUCCESS); } MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t*) comp->btl); break; case MCA_BTL_OFI_TYPE_RECV: mca_btl_ofi_recv_frag((mca_btl_ofi_module_t*) comp->btl, (mca_btl_ofi_endpoint_t*) comp->endpoint, context, frag_comp->frag); break; case MCA_BTL_OFI_TYPE_SEND: MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t*) comp->btl); mca_btl_ofi_frag_complete(frag_comp->frag, OPAL_SUCCESS); break; default: /* catasthrophic */ BTL_ERROR(("unknown completion type")); MCA_BTL_OFI_ABORT(); } /* return the completion handler */ opal_free_list_return(comp->my_list, (opal_free_list_item_t*) comp); } } } else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) { ret = fi_cq_readerr(context->cq, &cqerr, 0); /* cq readerr failed!? */ if (0 > ret) { BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)", __FILE__, __LINE__, fi_strerror(-ret), ret)); } else { BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n", cqerr.prov_errno)); } MCA_BTL_OFI_ABORT(); } #ifdef FI_EINTR /* sometimes, sockets provider complain about interupt. We do nothing. */ else if (OPAL_UNLIKELY(ret == -FI_EINTR)) { } #endif /* If the error is not FI_EAGAIN, report the error and abort. */ else if (OPAL_UNLIKELY(ret != -FI_EAGAIN)) { BTL_ERROR(("fi_cq_read returned error %d:%s", ret, fi_strerror(-ret))); MCA_BTL_OFI_ABORT(); } return events; }