Example #1
0
int
ompi_osc_rdma_module_accumulate(void *origin_addr, int origin_count,
                                struct ompi_datatype_t *origin_dt,
                                int target, int target_disp, int target_count,
                                struct ompi_datatype_t *target_dt,
                                struct ompi_op_t *op, ompi_win_t *win)
{
    int ret;
    ompi_osc_rdma_sendreq_t *sendreq;
    ompi_osc_rdma_module_t *module = GET_MODULE(win);

    if ((OMPI_WIN_STARTED & ompi_win_get_mode(win)) &&
            (!module->m_sc_remote_active_ranks[target])) {
        return MPI_ERR_RMA_SYNC;
    }

    if (OMPI_WIN_FENCE & ompi_win_get_mode(win)) {
        /* well, we're definitely in an access epoch now */
        ompi_win_set_mode(win, OMPI_WIN_FENCE | OMPI_WIN_ACCESS_EPOCH |
                          OMPI_WIN_EXPOSE_EPOCH);
    }

    /* shortcut 0 count case */
    if (0 == origin_count || 0 == target_count) {
        return OMPI_SUCCESS;
    }

    /* create sendreq */
    ret = ompi_osc_rdma_sendreq_alloc_init(OMPI_OSC_RDMA_ACC,
                                           origin_addr,
                                           origin_count,
                                           origin_dt,
                                           target,
                                           target_disp,
                                           target_count,
                                           target_dt,
                                           module,
                                           &sendreq);
    MEMCHECKER(
        memchecker_convertor_call(&opal_memchecker_base_mem_noaccess,
                                  &sendreq->req_origin_convertor);
    );
int
ompi_osc_pt2pt_module_fence(int assert, ompi_win_t *win)
{
    unsigned int incoming_reqs;
    int ret = OMPI_SUCCESS, i;
    ompi_osc_pt2pt_module_t *module = P2P_MODULE(win);
    int num_outgoing = 0;

    if (0 != (assert & MPI_MODE_NOPRECEDE)) {
        /* check that the user didn't lie to us - since NOPRECEDED
           must be specified by all processes if it is specified by
           any process, if we see this it is safe to assume that there
           are no pending operations anywhere needed to close out this
           epoch.  No need to lock, since it's a lookup and any
           pending modification of the pending_sendreqs during this
           time is an erroneous program. */
        if (0 != opal_list_get_size(&(module->p2p_pending_sendreqs))) {
            return MPI_ERR_RMA_SYNC;
        }

    } else {
        opal_list_item_t *item;

        /* "atomically" copy all the data we're going to be modifying
           into the copy... */
        OPAL_THREAD_LOCK(&(module->p2p_lock));
        ompi_osc_pt2pt_flip_sendreqs(module);
        OPAL_THREAD_UNLOCK(&(module->p2p_lock));

        num_outgoing = opal_list_get_size(&(module->p2p_copy_pending_sendreqs));

        /* find out how much data everyone is going to send us. */
        ret = module->p2p_comm->
            c_coll.coll_reduce_scatter(module->p2p_copy_num_pending_sendreqs,
                                       &incoming_reqs,
                                       module->p2p_fence_coll_counts,
                                       MPI_UNSIGNED,
                                       MPI_SUM,
                                       module->p2p_comm,
                                       module->p2p_comm->c_coll.coll_reduce_scatter_module);

        if (OMPI_SUCCESS != ret) {
            /* put the stupid data back for the user.  This is not
               cheap, but the user lost his data if we don't. */
            OPAL_THREAD_LOCK(&(module->p2p_lock));
            opal_list_join(&module->p2p_pending_sendreqs,
                           opal_list_get_end(&module->p2p_pending_sendreqs),
                           &module->p2p_copy_pending_sendreqs);
            
            for (i = 0 ; i < ompi_comm_size(module->p2p_comm) ; ++i) {
                module->p2p_num_pending_sendreqs[i] +=
                    module->p2p_copy_num_pending_sendreqs[i];
            }

            OPAL_THREAD_UNLOCK(&(module->p2p_lock));
            return ret;
        }

        OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_output,
                             "fence: waiting on %d in and %d out",
                             module->p2p_num_pending_in,
                             module->p2p_num_pending_out));

        /* try to start all the requests.  We've copied everything we
           need out of pending_sendreqs, so don't need the lock
           here */
        while (NULL != 
               (item = opal_list_remove_first(&(module->p2p_copy_pending_sendreqs)))) {
            ompi_osc_pt2pt_sendreq_t *req = 
                (ompi_osc_pt2pt_sendreq_t*) item;

            ret = ompi_osc_pt2pt_sendreq_send(module, req);

            if (OMPI_ERR_TEMP_OUT_OF_RESOURCE == ret ) {
                opal_output_verbose(5, ompi_osc_base_output,
                                    "complete: failure in starting sendreq (%d).  Will try later.",
                                    ret);
                opal_list_append(&(module->p2p_copy_pending_sendreqs), item);
            } else if (OMPI_SUCCESS != ret) {
                return ret;
            } 
        }

        OPAL_THREAD_LOCK(&module->p2p_lock);
        /* possible we've already received a couple in messages, so
           add however many we're going to wait for */
        module->p2p_num_pending_in += incoming_reqs;
        module->p2p_num_pending_out += num_outgoing;

        /* now we know how many things we're waiting for - wait for them... */
        while (module->p2p_num_pending_in > 0 ||
               0 != module->p2p_num_pending_out) {
            opal_condition_wait(&module->p2p_cond, &module->p2p_lock);
        }
        OPAL_THREAD_UNLOCK(&module->p2p_lock);
    }

    /* all transfers are done - back to the real world we go */
    if (0 == (assert & MPI_MODE_NOSUCCEED)) {
        ompi_win_set_mode(win, OMPI_WIN_FENCE);
    } else {
        ompi_win_set_mode(win, 0);
    }

    return OMPI_SUCCESS;
}
/* dispatch for callback on message completion */
static int
component_fragment_cb(ompi_request_t *request)
{
    int ret;
    ompi_osc_pt2pt_buffer_t *buffer;
    ompi_osc_pt2pt_module_t *module;

    if (request->req_status._cancelled) {
        opal_output_verbose(5, ompi_osc_base_framework.framework_output,
                            "pt2pt request was canceled");
        return OMPI_ERR_NOT_AVAILABLE;
    }

    buffer = (ompi_osc_pt2pt_buffer_t*) request->req_complete_cb_data;
    module = (ompi_osc_pt2pt_module_t*) buffer->data;

    assert(request->req_status._ucount >= (int) sizeof(ompi_osc_pt2pt_base_header_t));

    /* handle message */
    switch (((ompi_osc_pt2pt_base_header_t*) buffer->payload)->hdr_type) {
    case OMPI_OSC_PT2PT_HDR_PUT:
        {
            /* get our header and payload */
            ompi_osc_pt2pt_send_header_t *header =
                (ompi_osc_pt2pt_send_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
            }
#endif

            if (!ompi_win_exposure_epoch(module->p2p_win)) {
                if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
                    ompi_win_set_mode(module->p2p_win, 
                                      OMPI_WIN_FENCE | 
                                      OMPI_WIN_ACCESS_EPOCH |
                                      OMPI_WIN_EXPOSE_EPOCH);
                }
            }

            ret = ompi_osc_pt2pt_sendreq_recv_put(module, header, payload);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_ACC: 
        {
            /* get our header and payload */
            ompi_osc_pt2pt_send_header_t *header =
                (ompi_osc_pt2pt_send_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
            }
#endif

            if (!ompi_win_exposure_epoch(module->p2p_win)) {
                if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
                    ompi_win_set_mode(module->p2p_win, 
                                      OMPI_WIN_FENCE | 
                                      OMPI_WIN_ACCESS_EPOCH |
                                      OMPI_WIN_EXPOSE_EPOCH);
                }
            }

            /* receive into temporary buffer */
            ret = ompi_osc_pt2pt_sendreq_recv_accum(module, header, payload);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_GET:
        {
            /* get our header and payload */
            ompi_osc_pt2pt_send_header_t *header =
                (ompi_osc_pt2pt_send_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);
            ompi_datatype_t *datatype;
            ompi_osc_pt2pt_replyreq_t *replyreq;
            ompi_proc_t *proc;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_SEND_HDR_NTOH(*header);
            }
#endif

            if (!ompi_win_exposure_epoch(module->p2p_win)) {
                if (OMPI_WIN_FENCE & ompi_win_get_mode(module->p2p_win)) {
                    ompi_win_set_mode(module->p2p_win, 
                                      OMPI_WIN_FENCE | 
                                      OMPI_WIN_ACCESS_EPOCH |
                                      OMPI_WIN_EXPOSE_EPOCH);
                }
            }

            /* create or get a pointer to our datatype */
            proc = ompi_comm_peer_lookup( module->p2p_comm, header->hdr_origin );
            datatype = ompi_osc_base_datatype_create(proc, &payload);

            if (NULL == datatype) {
                opal_output(ompi_osc_base_framework.framework_output,
                            "Error recreating datatype.  Aborting.");
                ompi_mpi_abort(module->p2p_comm, 1, false);
            }

            /* create replyreq sendreq */
            ret = ompi_osc_pt2pt_replyreq_alloc_init(module,
                                                  header->hdr_origin,
                                                  header->hdr_origin_sendreq,
                                                  header->hdr_target_disp,
                                                  header->hdr_target_count,
                                                  datatype,
                                                  &replyreq);

            /* send replyreq */
            ompi_osc_pt2pt_replyreq_send(module, replyreq);

            /* sendreq does the right retain, so we can release safely */
            OBJ_RELEASE(datatype);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_REPLY:
        {
            ompi_osc_pt2pt_reply_header_t *header = 
                (ompi_osc_pt2pt_reply_header_t*) buffer->payload;
            void *payload = (void*) (header + 1);
            ompi_osc_pt2pt_sendreq_t *sendreq;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_REPLY_HDR_NTOH(*header);
            }
#endif

            /* get original sendreq pointer */
            sendreq = (ompi_osc_pt2pt_sendreq_t*) header->hdr_origin_sendreq.pval;
            module = sendreq->req_module;

            /* receive data */
            ompi_osc_pt2pt_replyreq_recv(module, sendreq, header, payload);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_POST:
        {
            int32_t count;
            OPAL_THREAD_LOCK(&module->p2p_lock);
            count = (module->p2p_num_post_msgs -= 1);
            OPAL_THREAD_UNLOCK(&module->p2p_lock);
            if (count == 0) opal_condition_broadcast(&module->p2p_cond);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_COMPLETE:
        {
            ompi_osc_pt2pt_control_header_t *header = 
                (ompi_osc_pt2pt_control_header_t*) buffer->payload;
            int32_t count;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
            }
#endif

            /* we've heard from one more place, and have value reqs to
               process */
            OPAL_THREAD_LOCK(&module->p2p_lock);
            count = (module->p2p_num_complete_msgs -= 1);
            count += (module->p2p_num_pending_in += header->hdr_value[0]);
            OPAL_THREAD_UNLOCK(&module->p2p_lock);

            if (count == 0) opal_condition_broadcast(&module->p2p_cond);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_LOCK_REQ:
        {
            ompi_osc_pt2pt_control_header_t *header = 
                (ompi_osc_pt2pt_control_header_t*) buffer->payload;
            int32_t count;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
            }
#endif

            if (header->hdr_value[1] > 0) {
                ompi_osc_pt2pt_passive_lock(module, header->hdr_value[0], 
                                            header->hdr_value[1]);
            } else {
                OPAL_THREAD_LOCK(&module->p2p_lock);
                count = (module->p2p_lock_received_ack += 1);
                OPAL_THREAD_UNLOCK(&module->p2p_lock);

                if (count != 0) opal_condition_broadcast(&module->p2p_cond);
            }
        }
        break;

    case OMPI_OSC_PT2PT_HDR_UNLOCK_REQ:
        {
            ompi_osc_pt2pt_control_header_t *header = 
                (ompi_osc_pt2pt_control_header_t*) buffer->payload;

#if !defined(WORDS_BIGENDIAN) && OPAL_ENABLE_HETEROGENEOUS_SUPPORT
            if (header->hdr_base.hdr_flags & OMPI_OSC_PT2PT_HDR_FLAG_NBO) {
                OMPI_OSC_PT2PT_CONTROL_HDR_NTOH(*header);
            }
#endif

            ompi_osc_pt2pt_passive_unlock(module, header->hdr_value[0],
                                          header->hdr_value[1]);
        }
        break;

    case OMPI_OSC_PT2PT_HDR_UNLOCK_REPLY:
        {
            int32_t count;

            OPAL_THREAD_LOCK(&module->p2p_lock);
            count = (module->p2p_num_pending_out -= 1);
            OPAL_THREAD_UNLOCK(&module->p2p_lock);
            if (count == 0) opal_condition_broadcast(&module->p2p_cond);
        }
        break;

    default:
        opal_output_verbose(5, ompi_osc_base_framework.framework_output,
                            "received one-sided packet for with unknown type");
    }

    ompi_request_free(&request);
    ret = ompi_osc_pt2pt_component_irecv(buffer->payload,
                                         mca_osc_pt2pt_component.p2p_c_eager_size,
                                         MPI_BYTE,
                                         MPI_ANY_SOURCE,
                                         CONTROL_MSG_TAG,
                                         module->p2p_comm,
                                         &buffer->request,
                                         component_fragment_cb,
                                         buffer);

    return ret;
}