static inline int ompi_osc_pt2pt_acc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, ompi_datatype_t *source_datatype, OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); int ret; /* if we are in active target mode wait until all post messages arrive */ ompi_osc_pt2pt_sync_wait_expected (pt2pt_sync); ompi_osc_pt2pt_accumulate_lock (module); if (&ompi_mpi_op_replace.op != op) { ret = ompi_osc_base_sndrcv_op (source, source_count, source_datatype, target, target_count, target_datatype, op); } else { ret = ompi_datatype_sndrcv ((void *)source, source_count, source_datatype, target, target_count, target_datatype); } ompi_osc_pt2pt_accumulate_unlock (module); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_acc_self: failed performing accumulate operation. ret = %d", ret)); return ret; } if (request) { ompi_osc_pt2pt_request_complete (request, MPI_SUCCESS); } return OMPI_SUCCESS; }
static inline int ompi_osc_rdma_gacc_self (void *source, int source_count, ompi_datatype_t *source_datatype, void *result, int result_count, ompi_datatype_t *result_datatype, OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_module_t *module, ompi_osc_rdma_request_t *request) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); int ret; /* if we are in active target mode wait until all post messages arrive */ if (module->sc_group && !module->active_eager_send_active) { OPAL_THREAD_LOCK(&module->lock); while (0 != module->num_post_msgs) { opal_condition_wait(&module->cond, &module->lock); } OPAL_THREAD_UNLOCK(&module->lock); } if (!(module->passive_target_access_epoch || module->active_eager_send_active)) { return OMPI_ERR_RMA_SYNC; } ompi_osc_rdma_accumulate_lock (module); do { ret = ompi_datatype_sndrcv (target, target_count, target_datatype, result, result_count, result_datatype); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_gacc_self: failed copying to the target buffer. ret = %d", ret)); break; } if (&ompi_mpi_op_no_op.op != op) { if (&ompi_mpi_op_replace.op != op) { ret = ompi_osc_base_sndrcv_op (source, source_count, source_datatype, target, target_count, target_datatype, op); } else { ret = ompi_datatype_sndrcv (source, source_count, source_datatype, target, target_count, target_datatype); } } if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_gacc_self: failed performing accumulate operation. ret = %d", ret)); break; } } while (0); ompi_osc_rdma_accumulate_unlock (module); if (request) { /* NTH: is it ok to use an ompi error code here? */ ompi_osc_rdma_request_complete (request, ret); } return OMPI_SUCCESS; }
int ompi_osc_sm_rget_accumulate(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, void *result_addr, int result_count, struct ompi_datatype_t *result_dt, int target, MPI_Aint target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win, struct ompi_request_t **ompi_req) { int ret; ompi_osc_sm_request_t *request; ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; void *remote_address; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "rget_accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); OMPI_OSC_SM_REQUEST_ALLOC(win, request); if (NULL == request) return OMPI_ERR_OUT_OF_RESOURCE; *ompi_req = &request->super; remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; opal_atomic_lock(&module->node_states[target].accumulate_lock); ret = ompi_datatype_sndrcv(remote_address, target_count, target_dt, result_addr, result_count, result_dt); if (OMPI_SUCCESS != ret || op == &ompi_mpi_op_no_op.op) goto done; if (op == &ompi_mpi_op_replace.op) { ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt, op); } done: opal_atomic_unlock(&module->node_states[target].accumulate_lock); OMPI_OSC_SM_REQUEST_COMPLETE(request); return ret; }
static int ompi_osc_rdma_gacc_local (const void *source_buffer, int source_count, ompi_datatype_t *source_datatype, void *result_buffer, int result_count, ompi_datatype_t *result_datatype, ompi_osc_rdma_peer_t *peer, uint64_t target_address, mca_btl_base_registration_handle_t *target_handle, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_rdma_module_t *module, ompi_osc_rdma_request_t *request) { int ret = OMPI_SUCCESS; do { if (!ompi_osc_rdma_peer_is_exclusive (peer)) { (void) ompi_osc_rdma_lock_acquire_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); } if (NULL != result_buffer) { /* get accumulate */ ret = ompi_datatype_sndrcv ((void *) (intptr_t) target_address, target_count, target_datatype, result_buffer, result_count, result_datatype); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { break; } } if (&ompi_mpi_op_no_op.op != op) { if (&ompi_mpi_op_replace.op != op) { ret = ompi_osc_base_sndrcv_op (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address, target_count, target_datatype, op); } else { ret = ompi_datatype_sndrcv (source_buffer, source_count, source_datatype, (void *) (intptr_t) target_address, target_count, target_datatype); } } if (!ompi_osc_rdma_peer_is_exclusive (peer)) { (void) ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, accumulate_lock)); } } while (0); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_OUTPUT_VERBOSE((10, ompi_osc_base_framework.framework_output, "ompi_osc_rdma_gacc_self: failed performing accumulate operation. ret = %d", ret)); return ret; } if (request) { /* NTH: is it ok to use an ompi error code here? */ ompi_osc_rdma_request_complete (request, ret); } return ret; }
static inline int ompi_osc_pt2pt_gacc_self (ompi_osc_pt2pt_sync_t *pt2pt_sync, const void *source, int source_count, ompi_datatype_t *source_datatype, void *result, int result_count, ompi_datatype_t *result_datatype, OPAL_PTRDIFF_TYPE target_disp, int target_count, ompi_datatype_t *target_datatype, ompi_op_t *op, ompi_osc_pt2pt_module_t *module, ompi_osc_pt2pt_request_t *request) { void *target = (unsigned char*) module->baseptr + ((unsigned long) target_disp * module->disp_unit); int ret; OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: starting local " "get accumulate")); ompi_osc_pt2pt_accumulate_lock (module); do { ret = ompi_datatype_sndrcv (target, target_count, target_datatype, result, result_count, result_datatype); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: failed copying to the target buffer. ret = %d", ret)); break; } if (&ompi_mpi_op_no_op.op != op) { if (&ompi_mpi_op_replace.op != op) { ret = ompi_osc_base_sndrcv_op (source, source_count, source_datatype, target, target_count, target_datatype, op); } else { ret = ompi_datatype_sndrcv ((void *)source, source_count, source_datatype, target, target_count, target_datatype); } } if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { OPAL_OUTPUT_VERBOSE((5, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: failed performing accumulate operation. ret = %d", ret)); break; } } while (0); ompi_osc_pt2pt_accumulate_unlock (module); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, ompi_osc_base_framework.framework_output, "ompi_osc_pt2pt_gacc_self: local get " "accumulate complete")); if (request) { /* NTH: is it ok to use an ompi error code here? */ ompi_osc_pt2pt_request_complete (request, ret); } return OMPI_SUCCESS; }
int ompi_osc_sm_accumulate(void *origin_addr, int origin_count, struct ompi_datatype_t *origin_dt, int target, OPAL_PTRDIFF_TYPE target_disp, int target_count, struct ompi_datatype_t *target_dt, struct ompi_op_t *op, struct ompi_win_t *win) { int ret; ompi_osc_sm_module_t *module = (ompi_osc_sm_module_t*) win->w_osc_module; void *remote_address; OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output, "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx", (unsigned long) origin_addr, origin_count, origin_dt->name, target, (int) target_disp, target_count, target_dt->name, op->o_name, (unsigned long) win)); remote_address = ((char*) (module->bases[target])) + module->disp_units[target] * target_disp; opal_atomic_lock(&module->node_states[target].accumulate_lock); if (op == &ompi_mpi_op_replace.op) { ret = ompi_datatype_sndrcv(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt); } else { ret = ompi_osc_base_sndrcv_op(origin_addr, origin_count, origin_dt, remote_address, target_count, target_dt, op); } opal_atomic_unlock(&module->node_states[target].accumulate_lock); return ret; }