int ompi_osc_portals4_put(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win) { int ret; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; ptl_handle_md_t md_h; void *md_base; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, (unsigned long) win)); offset = get_displacement(module, target) * target_disp; if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { opal_output(ompi_osc_base_framework.framework_output, "MPI_Put: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { (void)opal_atomic_add_64(&module->opcount, 1); ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base); ret = PtlPut(md_h, (ptl_size_t) ((char*) origin_addr - (char*) md_base), length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, offset, NULL, 0); if (OMPI_SUCCESS != ret) { return ret; } } return OMPI_SUCCESS; }
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { gni_return_t grc; int rc; BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, " "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, " "msg_maxsize = %d", ep->remote_attr.smsg_attr.msg_type, ep->remote_attr.smsg_attr.msg_buffer, ep->remote_attr.smsg_attr.buff_size, ep->remote_attr.smsg_attr.mem_hndl.qword1, ep->remote_attr.smsg_attr.mem_hndl.qword2, ep->remote_attr.smsg_attr.mbox_offset, ep->remote_attr.smsg_attr.mbox_maxcredit, ep->remote_attr.smsg_attr.msg_maxsize)); BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, " "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, " "msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer, ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1, ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset, ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize)); grc = GNI_SmsgInit (ep->smsg_ep_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr.smsg_attr); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) { BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc)); return opal_common_rc_ugni_to_opal (grc); } /* set the local event data to the local index and the remote event data to my * index on the remote peer. This makes lookup of endpoints on completion take * a single lookup in the endpoints array. we will not be able to change the * remote peer's index in the endpoint's array after this point. */ GNI_EpSetEventData (ep->rdma_ep_handle, ep->index, ep->remote_attr.index); GNI_EpSetEventData (ep->smsg_ep_handle, ep->index, ep->remote_attr.index); ep->rmt_irq_mem_hndl = ep->remote_attr.rmt_irq_mem_hndl; ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED; (void) opal_atomic_add_64 (&ep->btl->connected_peer_count, 1); /* send all pending messages */ BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list))); rc = mca_btl_ugni_progress_send_wait_list (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OPAL_THREAD_LOCK(&ep->btl->ep_wait_list_lock); if (false == ep->wait_listed) { opal_list_append (&ep->btl->ep_wait_list, &ep->super); ep->wait_listed = true; } OPAL_THREAD_UNLOCK(&ep->btl->ep_wait_list_lock); } return OPAL_SUCCESS; }
static void *thread_main(void *arg) { int rank = (int) (unsigned long) arg; int i; /* thread tests */ for (i = 0; i < nreps; i++) { opal_atomic_add_32(&val32, 5); #if OPAL_HAVE_ATOMIC_MATH_64 opal_atomic_add_64(&val64, 5); #endif opal_atomic_add(&valint, 5); } return (void *) (unsigned long) (rank + 1000); }
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) { gni_return_t rc; if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) { /* nothing to do */ return OPAL_SUCCESS; } if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) { OPAL_THREAD_LOCK(&ep->common->dev->dev_lock); rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT); OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock); if (GNI_RC_SUCCESS != rc) { BTL_VERBOSE(("btl/ugni could not send close message")); } /* we might want to wait for local completion here (do we even care), yes we do */ /* TODO: FIX FIX FIX */ } /* TODO: FIX GROSS */ OPAL_THREAD_LOCK(&ep->common->dev->dev_lock); (void) opal_common_ugni_ep_destroy (&ep->smsg_ep_handle); (void) opal_common_ugni_ep_destroy (&ep->rdma_ep_handle); OPAL_THREAD_UNLOCK(&ep->common->dev->dev_lock); if (ep->mailbox) { opal_free_list_return (&ep->btl->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox)); ep->mailbox = NULL; } ep->state = MCA_BTL_UGNI_EP_STATE_INIT; (void) opal_atomic_add_64 (&ep->btl->connected_peer_count, -11); return OPAL_SUCCESS; }
int main(int argc, char *argv[]) { #if OPAL_HAVE_POSIX_THREADS int tid; pthread_t *th; #endif if (argc != 2) { printf("*** Incorrect number of arguments. Skipping test\n"); return 77; } nthreads = atoi(argv[1]); /* first test single-threaded functionality */ /* -- cmpset 32-bit tests -- */ vol32 = 42, old32 = 42, new32 = 50; assert(opal_atomic_cmpset_32(&vol32, old32, new32) == 1); opal_atomic_rmb(); assert(vol32 == new32); vol32 = 42, old32 = 420, new32 = 50; assert(opal_atomic_cmpset_32(&vol32, old32, new32) == 0); opal_atomic_rmb(); assert(vol32 == 42); vol32 = 42, old32 = 42, new32 = 50; assert(opal_atomic_cmpset_acq_32(&vol32, old32, new32) == 1); assert(vol32 == new32); vol32 = 42, old32 = 420, new32 = 50; assert(opal_atomic_cmpset_acq_32(&vol32, old32, new32) == 0); assert(vol32 == 42); vol32 = 42, old32 = 42, new32 = 50; assert(opal_atomic_cmpset_rel_32(&vol32, old32, new32) == 1); opal_atomic_rmb(); assert(vol32 == new32); vol32 = 42, old32 = 420, new32 = 50; assert(opal_atomic_cmpset_rel_32(&vol32, old32, new32) == 0); opal_atomic_rmb(); assert(vol32 == 42); /* -- cmpset 64-bit tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 vol64 = 42, old64 = 42, new64 = 50; assert(1 == opal_atomic_cmpset_64(&vol64, old64, new64)); opal_atomic_rmb(); assert(new64 == vol64); vol64 = 42, old64 = 420, new64 = 50; assert(opal_atomic_cmpset_64(&vol64, old64, new64) == 0); opal_atomic_rmb(); assert(vol64 == 42); vol64 = 42, old64 = 42, new64 = 50; assert(opal_atomic_cmpset_acq_64(&vol64, old64, new64) == 1); assert(vol64 == new64); vol64 = 42, old64 = 420, new64 = 50; assert(opal_atomic_cmpset_acq_64(&vol64, old64, new64) == 0); assert(vol64 == 42); vol64 = 42, old64 = 42, new64 = 50; assert(opal_atomic_cmpset_rel_64(&vol64, old64, new64) == 1); opal_atomic_rmb(); assert(vol64 == new64); vol64 = 42, old64 = 420, new64 = 50; assert(opal_atomic_cmpset_rel_64(&vol64, old64, new64) == 0); opal_atomic_rmb(); assert(vol64 == 42); #endif /* -- cmpset int tests -- */ volint = 42, oldint = 42, newint = 50; assert(opal_atomic_cmpset(&volint, oldint, newint) == 1); opal_atomic_rmb(); assert(volint ==newint); volint = 42, oldint = 420, newint = 50; assert(opal_atomic_cmpset(&volint, oldint, newint) == 0); opal_atomic_rmb(); assert(volint == 42); volint = 42, oldint = 42, newint = 50; assert(opal_atomic_cmpset_acq(&volint, oldint, newint) == 1); assert(volint == newint); volint = 42, oldint = 420, newint = 50; assert(opal_atomic_cmpset_acq(&volint, oldint, newint) == 0); assert(volint == 42); volint = 42, oldint = 42, newint = 50; assert(opal_atomic_cmpset_rel(&volint, oldint, newint) == 1); opal_atomic_rmb(); assert(volint == newint); volint = 42, oldint = 420, newint = 50; assert(opal_atomic_cmpset_rel(&volint, oldint, newint) == 0); opal_atomic_rmb(); assert(volint == 42); /* -- cmpset ptr tests -- */ volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; assert(opal_atomic_cmpset_ptr(&volptr, oldptr, newptr) == 1); opal_atomic_rmb(); assert(volptr == newptr); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; assert(opal_atomic_cmpset_ptr(&volptr, oldptr, newptr) == 0); opal_atomic_rmb(); assert(volptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; assert(opal_atomic_cmpset_acq_ptr(&volptr, oldptr, newptr) == 1); assert(volptr == newptr); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; assert(opal_atomic_cmpset_acq_ptr(&volptr, oldptr, newptr) == 0); assert(volptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; assert(opal_atomic_cmpset_rel_ptr(&volptr, oldptr, newptr) == 1); opal_atomic_rmb(); assert(volptr == newptr); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; assert(opal_atomic_cmpset_rel_ptr(&volptr, oldptr, newptr) == 0); opal_atomic_rmb(); assert(volptr == (void *) 42); /* -- add_32 tests -- */ val32 = 42; assert(opal_atomic_add_32(&val32, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val32); /* -- add_64 tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 val64 = 42; assert(opal_atomic_add_64(&val64, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val64); #endif /* -- add_int tests -- */ valint = 42; opal_atomic_add(&valint, 5); opal_atomic_rmb(); assert((42 + 5) == valint); /* threaded tests */ val32 = 0; #if OPAL_HAVE_ATOMIC_MATH_64 val64 = 0ul; #endif valint = 0; /* -- create the thread set -- */ #if OPAL_HAVE_POSIX_THREADS th = (pthread_t *) malloc(nthreads * sizeof(pthread_t)); if (!th) { perror("malloc"); exit(EXIT_FAILURE); } for (tid = 0; tid < nthreads; tid++) { if (pthread_create(&th[tid], NULL, thread_main, (void *) (unsigned long) tid) != 0) { perror("pthread_create"); exit(EXIT_FAILURE); } } /* -- wait for the thread set to finish -- */ for (tid = 0; tid < nthreads; tid++) { void *thread_return; if (pthread_join(th[tid], &thread_return) != 0) { perror("pthread_join"); exit(EXIT_FAILURE); } } free(th); opal_atomic_rmb(); assert((5 * nthreads * nreps) == val32); #if OPAL_HAVE_ATOMIC_MATH_64 opal_atomic_rmb(); assert((5 * nthreads * nreps) == val64); #endif opal_atomic_rmb(); assert((5 * nthreads * nreps) == valint); #endif return 0; }
int ompi_osc_portals4_compare_and_swap(const void *origin_addr, const void *compare_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_win_t *win) { int ret; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; ptl_datatype_t ptl_dt; ptl_size_t result_md_offset, origin_md_offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %d, 0x%lx", (unsigned long) origin_addr, (unsigned long) compare_addr, (unsigned long) result_addr, dt->name, target, (int) target_disp, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); if (OMPI_SUCCESS != ret) return ret; offset = get_displacement(module, target) * target_disp; ret = ompi_datatype_type_size(dt, &length); if (OMPI_SUCCESS != ret) return ret; assert(length <= module->fetch_atomic_max); result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; (void)opal_atomic_add_64(&module->opcount, 1); ret = PtlSwap(module->md_h, result_md_offset, module->md_h, origin_md_offset, length, peer, module->pt_idx, module->match_bits, offset, NULL, 0, compare_addr, PTL_CSWAP, ptl_dt); if (OMPI_SUCCESS != ret) { return ret; } return OMPI_SUCCESS; }
int ompi_osc_portals4_get_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, void *result_addr, int result_count, struct ompi_datatype_t *result_dt, int target, MPI_Aint target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win) { int ret; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, (unsigned long) result_addr, result_count, result_dt->name, target, (int) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); offset = get_displacement(module, target) * target_disp; /* we don't support non-contiguous buffers. but if the count is 0, we don't care if buffer is non-contiguous. */ if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) || (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count)) || (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count))) { opal_output(ompi_osc_base_framework.framework_output, "MPI_Get_accumulate: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { sent = 0; if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); ret = PtlSwap(module->md_h, result_md_offset + sent, module->md_h, origin_md_offset + sent, msg_length, peer, module->pt_idx, module->match_bits, offset + sent, NULL, 0, NULL, PTL_SWAP, ptl_dt); sent += msg_length; } while (sent < length); } else if (MPI_NO_OP == op) { ptl_size_t md_offset; ret = ompi_datatype_type_size(target_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } length *= target_count; md_offset = (ptl_size_t) result_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); ret = PtlGet(module->md_h, md_offset + sent, msg_length, peer, module->pt_idx, module->match_bits, offset + sent, NULL); sent += msg_length; } while (sent < length); } else { ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) return ret; ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) return ret; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); ret = PtlFetchAtomic(module->md_h, result_md_offset + sent, module->md_h, origin_md_offset + sent, msg_length, peer, module->pt_idx, module->match_bits, offset + sent, NULL, 0, ptl_op, ptl_dt); sent += msg_length; } while (sent < length); } if (OMPI_SUCCESS != ret) { return ret; } } return OMPI_SUCCESS; }
int ompi_osc_portals4_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win) { int ret; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); offset = get_displacement(module, target) * target_disp; if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { opal_output(ompi_osc_base_framework.framework_output, "MPI_Accumulate: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { ptl_size_t md_offset; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { return ret; } length *= origin_count; sent = 0; md_offset = (ptl_size_t) origin_addr; do { size_t msg_length = MIN(module->atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); if (MPI_REPLACE == op) { ret = PtlPut(module->md_h, md_offset + sent, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, offset + sent, NULL, 0); } else { ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) return ret; ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) return ret; ret = PtlAtomic(module->md_h, md_offset + sent, msg_length, PTL_ACK_REQ, peer, module->pt_idx, module->match_bits, offset + sent, NULL, 0, ptl_op, ptl_dt); } if (OMPI_SUCCESS != ret) { return ret; } sent += msg_length; } while (sent < length); } return OMPI_SUCCESS; }
int ompi_osc_portals4_rget_accumulate(const void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, void *result_addr, int result_count, struct ompi_datatype_t *result_dt, int target, MPI_Aint target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **ompi_req) { int ret; ompi_osc_portals4_request_t *request; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length, sent; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, (unsigned long) result_addr, result_count, result_dt->name, target, (int) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; *ompi_req = &request->super; offset = get_displacement(module, target) * target_disp; if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count) || !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); opal_output(ompi_osc_base_framework.framework_output, "MPI_Rget_accumulate: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { sent = 0; if (MPI_REPLACE == op) { ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= origin_count; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; ret = PtlSwap(module->req_md_h, result_md_offset + sent, module->md_h, origin_md_offset + sent, msg_length, peer, module->pt_idx, module->match_bits, offset + sent, request, 0, NULL, PTL_SWAP, ptl_dt); sent += msg_length; } while (sent < length); } else if (MPI_NO_OP == op) { ptl_size_t md_offset; ret = ompi_datatype_type_size(target_dt, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= target_count; md_offset = (ptl_size_t) result_addr; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; ret = PtlGet(module->req_md_h, md_offset + sent, msg_length, peer, module->pt_idx, module->match_bits, offset + sent, request); sent += msg_length; } while (sent < length); } else { ptl_size_t result_md_offset, origin_md_offset; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= origin_count; result_md_offset = (ptl_size_t) result_addr; origin_md_offset = (ptl_size_t) origin_addr; ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt); if (OMPI_SUCCESS != ret) return ret; ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) return ret; do { size_t msg_length = MIN(module->fetch_atomic_max, length - sent); (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected++; ret = PtlFetchAtomic(module->req_md_h, result_md_offset + sent, module->md_h, origin_md_offset + sent, msg_length, peer, module->pt_idx, module->match_bits, offset + sent, request, 0, ptl_op, ptl_dt); sent += msg_length; } while (sent < length); } if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } } return OMPI_SUCCESS; }
int ompi_osc_portals4_rget(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_win_t *win, struct ompi_request_t **ompi_req) { int ret; ompi_osc_portals4_request_t *request; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, (unsigned long) win)); OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request); if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE; *ompi_req = &request->super; offset = get_displacement(module, target) * target_disp; if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) || !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); opal_output(ompi_osc_base_framework.framework_output, "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n"); return OMPI_ERR_NOT_SUPPORTED; } else { (void)opal_atomic_add_64(&module->opcount, 1); request->ops_expected = 1; ret = ompi_datatype_type_size(origin_dt, &length); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } length *= origin_count; ret = PtlGet(module->req_md_h, (ptl_size_t) origin_addr, length, peer, module->pt_idx, module->match_bits, offset, request); if (OMPI_SUCCESS != ret) { OMPI_OSC_PORTALS4_REQUEST_RETURN(request); return ret; } } return OMPI_SUCCESS; }
int ompi_osc_portals4_fetch_and_op(void *origin_addr, void *result_addr, struct ompi_datatype_t *dt, int target, OPAL_PTRDIFF_TYPE target_disp, struct ompi_op_t *op, struct ompi_win_t *win) { int ret; ompi_osc_portals4_module_t *module = (ompi_osc_portals4_module_t*) win->w_osc_module; ptl_process_t peer = ompi_osc_portals4_get_peer(module, target); size_t length; size_t offset; ptl_op_t ptl_op; ptl_datatype_t ptl_dt; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx", (unsigned long) origin_addr, (unsigned long) result_addr, dt->name, target, (int) target_disp, op->o_name, (unsigned long) win)); ret = ompi_osc_portals4_get_dt(dt, &ptl_dt); if (OMPI_SUCCESS != ret) return ret; offset = get_displacement(module, target) * target_disp; ret = ompi_datatype_type_size(dt, &length); if (OMPI_SUCCESS != ret) return ret; assert(length < module->fetch_atomic_max); (void)opal_atomic_add_64(&module->opcount, 1); if (MPI_REPLACE == op) { ptl_handle_md_t result_md_h, origin_md_h; void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base); result_md_offset = ((char*) result_addr - (char*) result_md_base); ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); ret = PtlSwap(result_md_h, result_md_offset, origin_md_h, origin_md_offset, length, peer, module->pt_idx, module->match_bits, offset, NULL, 0, NULL, PTL_SWAP, ptl_dt); } else if (MPI_NO_OP == op) { ptl_handle_md_t md_h; void *md_base; ptl_size_t md_offset; ompi_osc_portals4_get_md(result_addr, module->md_h, &md_h, &md_base); md_offset = ((char*) result_addr - (char*) md_base); ret = PtlGet(md_h, md_offset, length, peer, module->pt_idx, module->match_bits, offset, NULL); } else { ptl_handle_md_t result_md_h, origin_md_h; void *result_md_base, *origin_md_base; ptl_size_t result_md_offset, origin_md_offset; ret = ompi_osc_portals4_get_op(op, &ptl_op); if (OMPI_SUCCESS != ret) return ret; ompi_osc_portals4_get_md(result_addr, module->md_h, &result_md_h, &result_md_base); result_md_offset = ((char*) result_addr - (char*) result_md_base); ompi_osc_portals4_get_md(origin_addr, module->md_h, &origin_md_h, &origin_md_base); origin_md_offset = ((char*) origin_addr - (char*) origin_md_base); ret = PtlFetchAtomic(result_md_h, result_md_offset, origin_md_h, origin_md_offset, length, peer, module->pt_idx, module->match_bits, offset, NULL, 0, ptl_op, ptl_dt); } if (OMPI_SUCCESS != ret) { return ret; } return OMPI_SUCCESS; }
static int start_recover(void) { int ret; int64_t epoch_counter; ompi_mtl_portals4.flowctl.flowctl_active = true; epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1); opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "Entering flowctl_start_recover %ld", epoch_counter); /* re-arm trigger/alarm for next time */ ret = setup_alarm(epoch_counter); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d setup_alarm failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* setup barrier tree for getting us out of flow control */ ret = setup_barrier(epoch_counter); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d setup_barrier failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* drain all pending sends */ while (ompi_mtl_portals4.flowctl.send_slots != ompi_mtl_portals4.flowctl.max_send_slots) { opal_progress(); } /* drain event queue */ while (0 != ompi_mtl_portals4_progress()) { ; } /* check short block active count */ ret = ompi_mtl_portals4_recv_short_link(1); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: recv_short_link failed: %d", __FILE__, __LINE__, ret); } /* reorder the pending sends by operation count */ ret = opal_list_sort(&ompi_mtl_portals4.flowctl.pending_sends, seqnum_compare); if (OMPI_SUCCESS != ret) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d opal_list_sort failed: %d\n", __FILE__, __LINE__, ret); return ret; } /* drain event queue again, just to make sure */ while (0 != ompi_mtl_portals4_progress()) { ; } /* send barrier entry message */ ret = PtlPut(ompi_mtl_portals4.zero_md_h, 0, 0, PTL_NO_ACK_REQ, ompi_mtl_portals4.flowctl.me, ompi_mtl_portals4.flowctl_idx, MTL_PORTALS4_FLOWCTL_FANIN, 0, NULL, 0); if (OPAL_UNLIKELY(PTL_OK != ret)) { opal_output_verbose(1, ompi_mtl_base_framework.framework_output, "%s:%d: PtlPut failed: %d\n", __FILE__, __LINE__, ret); goto error; } /* recovery complete when fan-out event arrives, async event, so we're done now */ ret = OMPI_SUCCESS; error: OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output, "Exiting flowctl_start_recover %ld", epoch_counter)); return ret; }