int mca_pml_ucx_recv(void *buf, size_t count, ompi_datatype_t *datatype, int src, int tag, struct ompi_communicator_t* comm, ompi_status_public_t* mpi_status) { ucp_tag_t ucp_tag, ucp_tag_mask; ompi_request_t *req; PML_UCX_TRACE_RECV("%s", buf, count, datatype, src, tag, comm, "recv"); PML_UCX_MAKE_RECV_TAG(ucp_tag, ucp_tag_mask, tag, src, comm); req = (ompi_request_t*)ucp_tag_recv_nb(ompi_pml_ucx.ucp_worker, buf, count, mca_pml_ucx_get_datatype(datatype), ucp_tag, ucp_tag_mask, mca_pml_ucx_blocking_recv_completion); if (UCS_PTR_IS_ERR(req)) { PML_UCX_ERROR("ucx recv failed: %s", ucs_status_string(UCS_PTR_STATUS(req))); return OMPI_ERROR; } ucp_worker_progress(ompi_pml_ucx.ucp_worker); while ( !REQUEST_COMPLETE(req) ) { opal_progress(); } if (mpi_status != MPI_STATUS_IGNORE) { *mpi_status = req->req_status; } req->req_complete = REQUEST_PENDING; ucp_request_release(req); return OMPI_SUCCESS; }
static void ucp_perf_test_destroy_eps(ucx_perf_context_t* perf, unsigned group_size) { ucs_status_ptr_t *reqs; ucp_tag_recv_info_t info; ucs_status_t status; unsigned i; reqs = calloc(sizeof(*reqs), group_size); for (i = 0; i < group_size; ++i) { if (perf->ucp.peers[i].rkey != NULL) { ucp_rkey_destroy(perf->ucp.peers[i].rkey); } if (perf->ucp.peers[i].ep != NULL) { reqs[i] = ucp_disconnect_nb(perf->ucp.peers[i].ep); } } for (i = 0; i < group_size; ++i) { if (!UCS_PTR_IS_PTR(reqs[i])) { continue; } do { ucp_worker_progress(perf->ucp.worker); status = ucp_request_test(reqs[i], &info); } while (status == UCS_INPROGRESS); ucp_request_release(reqs[i]); } free(reqs); free(perf->ucp.peers); }
void mlx_send_callback( void *request, ucs_status_t status) { struct util_cq *cq; struct mlx_request *mlx_req = request; struct fi_cq_tagged_entry *t_entry; struct util_cq_err_entry *err; cq = mlx_req->cq; if (status == UCS_ERR_CANCELED) { ucp_request_release(request); return; } fastlock_acquire(&cq->cq_lock); t_entry = cirque_tail(cq->cirq); *t_entry = (mlx_req->completion.tagged); cirque_commit(cq->cirq); if (status != UCS_OK){ t_entry->flags |= UTIL_FLAG_ERROR; err = calloc(1, sizeof(struct util_cq_err_entry)); if (!err) { FI_WARN(&mlx_prov, FI_LOG_CQ, "out of memory, cannot report CQ error\n"); return; } err->err_entry = (mlx_req->completion.error); err->err_entry.prov_errno = (int)status; err->err_entry.err = MLX_TRANSLATE_ERRCODE(status); err->err_entry.olen = 0; slist_insert_tail(&err->list_entry, &cq->err_list); } mlx_req->type = MLX_FI_REQ_UNINITIALIZED; fastlock_release(&cq->cq_lock); ucp_request_release(request); }
static int connect_client() { ucp_tag_recv_info_t info_tag; ucp_tag_message_h msg_tag; ucs_status_t status; ucp_ep_params_t ep_params; struct msg *msg = 0; struct ucx_context *request = 0; size_t msg_len = 0; int ret = -1; int i; /* Send client UCX address to server */ ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = peer_addr; status = ucp_ep_create(ucp_worker, &ep_params, &rem_ep); if (status != UCS_OK) { abort(); } msg_len = sizeof(*msg) + local_addr_len; msg = calloc(1, msg_len); if (!msg) { abort(); } msg->data_len = local_addr_len; memcpy(msg->data, local_addr, local_addr_len); request = ucp_tag_send_nb(rem_ep, msg, msg_len, ucp_dt_make_contig(1), tag, send_handle); if (UCS_PTR_IS_ERR(request)) { fprintf(stderr, "unable to send UCX address message\n"); free(msg); abort(); } else if (UCS_PTR_STATUS(request) != UCS_OK) { fprintf(stderr, "UCX address message was scheduled for send\n"); wait(ucp_worker, request); request->completed = 0; /* Reset request state before recycling it */ ucp_request_release(request); } free (msg); ret = 0; err: return ret; }
int ompi_osc_ucx_compare_and_swap(const void *origin_addr, const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, ptrdiff_t target_disp, struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t *)win->w_osc_module; ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); ucp_rkey_h rkey; size_t dt_bytes; ompi_osc_ucx_internal_request_t *req = NULL; int ret = OMPI_SUCCESS; ucs_status_t status; ret = check_sync_state(module, target, false); if (ret != OMPI_SUCCESS) { return ret; } ret = start_atomicity(module, ep, target); if (ret != OMPI_SUCCESS) { return ret; } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { status = get_dynamic_win_info(remote_addr, module, ep, target); if (status != UCS_OK) { return OMPI_ERROR; } } rkey = (module->win_info_array[target]).rkey; ompi_datatype_type_size(dt, &dt_bytes); memcpy(result_addr, origin_addr, dt_bytes); req = ucp_atomic_fetch_nb(ep, UCP_ATOMIC_FETCH_OP_CSWAP, *(uint64_t *)compare_addr, result_addr, dt_bytes, remote_addr, rkey, req_completion); if (UCS_PTR_IS_PTR(req)) { ucp_request_release(req); } ret = incr_and_check_ops_num(module, target, ep); if (ret != OMPI_SUCCESS) { return ret; } return end_atomicity(module, ep, target); }
void ucp_ep_destroy(ucp_ep_h ep) { ucp_worker_h worker = ep->worker; ucs_status_ptr_t *request; ucs_status_t status; request = ucp_disconnect_nb(ep); if (request == NULL) { return; } else if (UCS_PTR_IS_ERR(request)) { ucs_warn("disconnect failed: %s", ucs_status_string(UCS_PTR_STATUS(request))); return; } else { do { ucp_worker_progress(worker); status = ucp_request_test(request, NULL); } while (status == UCS_INPROGRESS); ucp_request_release(request); } }
static void mca_spml_ucx_waitall(void **reqs, size_t *count_p) { ucs_status_t status; size_t i; SPML_VERBOSE(10, "waiting for %d disconnect requests", *count_p); for (i = 0; i < *count_p; ++i) { do { opal_progress(); status = ucp_request_test(reqs[i], NULL); } while (status == UCS_INPROGRESS); if (status != UCS_OK) { SPML_ERROR("disconnect request failed: %s", ucs_status_string(status)); } ucp_request_release(reqs[i]); reqs[i] = NULL; } *count_p = 0; }
int ompi_osc_ucx_fetch_and_op(const void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, ptrdiff_t target_disp, struct ompi_op_t *op, struct ompi_win_t *win) { ompi_osc_ucx_module_t *module = (ompi_osc_ucx_module_t*) win->w_osc_module; int ret = OMPI_SUCCESS; ret = check_sync_state(module, target, false); if (ret != OMPI_SUCCESS) { return ret; } if (op == &ompi_mpi_op_no_op.op || op == &ompi_mpi_op_replace.op || op == &ompi_mpi_op_sum.op) { ucp_ep_h ep = OSC_UCX_GET_EP(module->comm, target); uint64_t remote_addr = (module->win_info_array[target]).addr + target_disp * OSC_UCX_GET_DISP(module, target); ucp_rkey_h rkey; uint64_t value = *(uint64_t *)origin_addr; ucp_atomic_fetch_op_t opcode; size_t dt_bytes; ompi_osc_ucx_internal_request_t *req = NULL; ucs_status_t status; ret = start_atomicity(module, ep, target); if (ret != OMPI_SUCCESS) { return ret; } if (module->flavor == MPI_WIN_FLAVOR_DYNAMIC) { status = get_dynamic_win_info(remote_addr, module, ep, target); if (status != UCS_OK) { return OMPI_ERROR; } } rkey = (module->win_info_array[target]).rkey; ompi_datatype_type_size(dt, &dt_bytes); if (op == &ompi_mpi_op_replace.op) { opcode = UCP_ATOMIC_FETCH_OP_SWAP; } else { opcode = UCP_ATOMIC_FETCH_OP_FADD; if (op == &ompi_mpi_op_no_op.op) { value = 0; } } req = ucp_atomic_fetch_nb(ep, opcode, value, result_addr, dt_bytes, remote_addr, rkey, req_completion); if (UCS_PTR_IS_PTR(req)) { ucp_request_release(req); } ret = incr_and_check_ops_num(module, target, ep); if (ret != OMPI_SUCCESS) { return ret; } return end_atomicity(module, ep, target); } else { return ompi_osc_ucx_get_accumulate(origin_addr, 1, dt, result_addr, 1, dt, target, target_disp, 1, dt, op, win); } }
static int run_ucx_server(ucp_worker_h ucp_worker) { ucp_tag_recv_info_t info_tag; ucp_tag_message_h msg_tag; ucs_status_t status; ucp_ep_h client_ep; struct msg *msg = 0; struct ucx_context *request = 0; size_t msg_len = 0; int ret = -1; /* Receive client UCX address */ do { /* Following blocked methods used to polling internal file descriptor * to make CPU idle and don't spin loop */ if (ucp_test_mode == TEST_MODE_WAIT) { status = ucp_worker_wait(ucp_worker); if (status != UCS_OK) { goto err; } } else if (ucp_test_mode == TEST_MODE_EVENTFD) { status = test_poll_wait(ucp_worker); if (status != UCS_OK) { goto err; } } /* Progressing before probe to update the state */ ucp_worker_progress(ucp_worker); /* Probing incoming events in non-block mode */ msg_tag = ucp_tag_probe_nb(ucp_worker, tag, tag_mask, 1, &info_tag); } while (msg_tag == NULL); msg = malloc(info_tag.length); if (!msg) { fprintf(stderr, "unable to allocate memory\n"); goto err; } request = ucp_tag_msg_recv_nb(ucp_worker, msg, info_tag.length, ucp_dt_make_contig(1), msg_tag, recv_handle); if (UCS_PTR_IS_ERR(request)) { fprintf(stderr, "unable to receive UCX address message (%s)\n", ucs_status_string(UCS_PTR_STATUS(request))); free(msg); goto err; } else { wait(ucp_worker, request); ucp_request_release(request); printf("UCX address message was received\n"); } peer_addr = malloc(msg->data_len); if (!peer_addr) { fprintf(stderr, "unable to allocate memory for peer address\n"); free(msg); goto err; } peer_addr_len = msg->data_len; memcpy(peer_addr, msg->data, peer_addr_len); free(msg); /* Send test string to client */ status = ucp_ep_create(ucp_worker, peer_addr, &client_ep); if (status != UCS_OK) { goto err; } msg_len = sizeof(*msg) + strlen(test_str) + 1; msg = calloc(1, msg_len); if (!msg) { printf("unable to allocate memory\n"); goto err_ep; } msg->data_len = msg_len - sizeof(*msg); snprintf((char *)msg->data, msg->data_len, "%s", test_str); request = ucp_tag_send_nb(client_ep, msg, msg_len, ucp_dt_make_contig(1), tag, send_handle); if (UCS_PTR_IS_ERR(request)) { fprintf(stderr, "unable to send UCX data message\n"); free(msg); goto err_ep; } else if (UCS_PTR_STATUS(request) != UCS_OK) { printf("UCX data message was scheduled for send\n"); wait(ucp_worker, request); ucp_request_release(request); } ret = 0; free(msg); err_ep: ucp_ep_destroy(client_ep); err: return ret; }
void mlx_recv_callback ( void *request, ucs_status_t status, ucp_tag_recv_info_t *info) { struct util_cq *cq; struct mlx_request *mlx_req; mlx_req = (struct mlx_request*)request; if (status == UCS_ERR_CANCELED) { ucp_request_release(request); return; } cq = mlx_req->cq; mlx_req->completion.tagged.tag = info->sender_tag; mlx_req->completion.tagged.len = info->length; if (status != UCS_OK) { mlx_req->completion.error.prov_errno = (int)status; mlx_req->completion.error.err = MLX_TRANSLATE_ERRCODE(status); } if (mlx_req->type == MLX_FI_REQ_UNINITIALIZED) { if (status != UCS_OK) { mlx_req->completion.error.olen = info->length; mlx_req->type = MLX_FI_REQ_UNEXPECTED_ERR; } else { mlx_req->type = MLX_FI_REQ_UNEXPECTED; } } else { if (status != UCS_OK) { mlx_req->completion.error.olen = info->length - mlx_req->completion.error.len; } struct fi_cq_tagged_entry *t_entry; t_entry = cirque_tail(cq->cirq); *t_entry = (mlx_req->completion.tagged); if (status != UCS_OK) { struct util_cq_err_entry* err; t_entry->flags |= UTIL_FLAG_ERROR; err = calloc(1, sizeof(struct util_cq_err_entry)); if (!err) { FI_WARN(&mlx_prov, FI_LOG_CQ, "out of memory, cannot report CQ error\n"); return; } err->err_entry = (mlx_req->completion.error); slist_insert_tail(&err->list_entry, &cq->err_list); } if (cq->src){ cq->src[cirque_windex((struct mlx_comp_cirq*)(cq->cirq))] = FI_ADDR_NOTAVAIL; } if (cq->wait) { cq->wait->signal(cq->wait); } mlx_req->type = MLX_FI_REQ_UNINITIALIZED; cirque_commit(cq->cirq); ucp_request_release(request); } fastlock_release(&cq->cq_lock); }
/*Using for selective completions scenario*/ void mlx_send_callback_no_compl( void *request, ucs_status_t status) { ucp_request_release(request); }
void print_ucp_info(int print_opts, ucs_config_print_flags_t print_flags, uint64_t ctx_features, const ucp_ep_params_t *base_ep_params, size_t estimated_num_eps, unsigned dev_type_bitmap, const char *mem_size) { ucp_config_t *config; ucs_status_t status; ucs_status_ptr_t status_ptr; ucp_context_h context; ucp_worker_h worker; ucp_params_t params; ucp_worker_params_t worker_params; ucp_ep_params_t ep_params; ucp_address_t *address; size_t address_length; resource_usage_t usage; ucp_ep_h ep; status = ucp_config_read(NULL, NULL, &config); if (status != UCS_OK) { return; } memset(¶ms, 0, sizeof(params)); params.field_mask = UCP_PARAM_FIELD_FEATURES | UCP_PARAM_FIELD_ESTIMATED_NUM_EPS; params.features = ctx_features; params.estimated_num_eps = estimated_num_eps; get_resource_usage(&usage); if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SELF))) { ucp_config_modify(config, "SELF_DEVICES", ""); } if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_SHM))) { ucp_config_modify(config, "SHM_DEVICES", ""); } if (!(dev_type_bitmap & UCS_BIT(UCT_DEVICE_TYPE_NET))) { ucp_config_modify(config, "NET_DEVICES", ""); } status = ucp_init(¶ms, config, &context); if (status != UCS_OK) { printf("<Failed to create UCP context>\n"); goto out_release_config; } if ((print_opts & PRINT_MEM_MAP) && (mem_size != NULL)) { ucp_mem_print_info(mem_size, context, stdout); } if (print_opts & PRINT_UCP_CONTEXT) { ucp_context_print_info(context, stdout); print_resource_usage(&usage, "UCP context"); } if (!(print_opts & (PRINT_UCP_WORKER|PRINT_UCP_EP))) { goto out_cleanup_context; } worker_params.field_mask = UCP_WORKER_PARAM_FIELD_THREAD_MODE; worker_params.thread_mode = UCS_THREAD_MODE_MULTI; get_resource_usage(&usage); status = ucp_worker_create(context, &worker_params, &worker); if (status != UCS_OK) { printf("<Failed to create UCP worker>\n"); goto out_cleanup_context; } if (print_opts & PRINT_UCP_WORKER) { ucp_worker_print_info(worker, stdout); print_resource_usage(&usage, "UCP worker"); } if (print_opts & PRINT_UCP_EP) { status = ucp_worker_get_address(worker, &address, &address_length); if (status != UCS_OK) { printf("<Failed to get UCP worker address>\n"); goto out_destroy_worker; } ep_params = *base_ep_params; ep_params.field_mask |= UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = address; status = ucp_ep_create(worker, &ep_params, &ep); ucp_worker_release_address(worker, address); if (status != UCS_OK) { printf("<Failed to create UCP endpoint>\n"); goto out_destroy_worker; } ucp_ep_print_info(ep, stdout); status_ptr = ucp_disconnect_nb(ep); if (UCS_PTR_IS_PTR(status_ptr)) { do { ucp_worker_progress(worker); status = ucp_request_test(status_ptr, NULL); } while (status == UCS_INPROGRESS); ucp_request_release(status_ptr); } } out_destroy_worker: ucp_worker_destroy(worker); out_cleanup_context: ucp_cleanup(context); out_release_config: ucp_config_release(config); }
int progress(int server, int msg_size) { int flag = 1; int recvd = 0, sent = 0; struct ucx_context *sreq = NULL, *rreq = NULL; struct epoll_event events[2]; int ret; if( server ){ sreq = launch_send(msg_size); if( !sreq ){ /* inline send */ sent = 1; } } progress_calls++; while (flag) { int i; ucs_status_t status; status = ucp_worker_arm(ucp_worker); if (status == UCS_ERR_BUSY) { /* some events are arrived already */ struct ucx_context *tmp; ucp_worker_progress(ucp_worker); progress_count++; tmp = launch_recv(); rreq = (rreq) ? rreq : tmp; ret = 0; goto progress; } else if ( UCS_OK != status ){ abort(); } ret = epoll_wait(epoll_fd_local, events, 2, -1); if ( 0 > ret ) { if (errno == EINTR) { continue; } else { abort(); } } ucp_worker_progress(ucp_worker); progress_count++; for(i=0; i<ret; i++){ if( events[i].data.fd == epoll_fd){ struct ucx_context *tmp; tmp = launch_recv(); rreq = (rreq) ? rreq : tmp; continue; } if( events[i].data.fd == signal_pipe[0]){ char buf; read(signal_pipe[0], &buf, sizeof(buf)); continue; } } progress: if( sreq ){ if( sreq->completed ){ if( !same_buf ){ free(sreq->buf); } sreq->completed = 0; ucp_request_release(sreq); sreq = NULL; sent = 1; } } if( rreq ){ if( rreq->completed ){ if( !same_buf ) { free(rreq->buf); } rreq->completed = 0; ucp_request_release(rreq); rreq = NULL; recvd = 1; if( !server ){ sreq = launch_send(msg_size); if( !sreq ){ /* inline send */ sent = 1; } } } } if( recvd && sent ){ flag = 0; } if( sreq || rreq ){ activate_progress(); } } }
static int connect_server() { ucp_tag_recv_info_t info_tag; ucp_tag_message_h msg_tag; ucs_status_t status; ucp_ep_params_t ep_params; struct msg *msg = 0; struct ucx_context *request = 0; /* Receive client UCX address */ do { /* Following blocked methods used to polling internal file descriptor * to make CPU idle and don't spin loop */ /* Progressing before probe to update the state */ ucp_worker_progress(ucp_worker); /* Probing incoming events in non-block mode */ msg_tag = ucp_tag_probe_nb(ucp_worker, tag, tag_mask, 1, &info_tag); } while (msg_tag == NULL); msg = malloc(info_tag.length); if (!msg) { fprintf(stderr, "unable to allocate memory\n"); abort(); } request = ucp_tag_msg_recv_nb(ucp_worker, msg, info_tag.length, ucp_dt_make_contig(1), msg_tag, recv_handle); if (UCS_PTR_IS_ERR(request)) { fprintf(stderr, "unable to receive UCX address message (%s)\n", ucs_status_string(UCS_PTR_STATUS(request))); free(msg); abort(); } else { wait(ucp_worker, request); request->completed = 0; ucp_request_release(request); printf("UCX address message was received\n"); } peer_addr = malloc(msg->data_len); if (!peer_addr) { fprintf(stderr, "unable to allocate memory for peer address\n"); free(msg); abort(); } peer_addr_len = msg->data_len; memcpy(peer_addr, msg->data, peer_addr_len); free(msg); /* Send test string to client */ ep_params.field_mask = UCP_EP_PARAM_FIELD_REMOTE_ADDRESS; ep_params.address = peer_addr; status = ucp_ep_create(ucp_worker, &ep_params, &rem_ep); if (status != UCS_OK) { abort(); } }