Esempio n. 1
0
int
ompi_mtl_portals4_flowctl_trigger(void)
{
    int ret;

    if (false == ompi_mtl_portals4.flowctl.flowctl_active) {
        ompi_mtl_portals4.flowctl.flowctl_active = true;

        /* send trigger to root */
        ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                     0,
                     0,
                     PTL_NO_ACK_REQ,
                     ompi_mtl_portals4.flowctl.root,
                     ompi_mtl_portals4.flowctl_idx,
                     MTL_PORTALS4_FLOWCTL_TRIGGER,
                     0,
                     NULL,
                     0);
        if (OPAL_UNLIKELY(PTL_OK != ret)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PtlPut failed: %d\n",
                                __FILE__, __LINE__, ret);
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 2
0
/*
** This is a function used to send the int value rpt to the other side
*/
void
SendRepeat(ArgStruct *p, int rpt)
{

int rc;
int index;
ptl_process_t dest;
ptl_ct_event_t cnt_value;


    send_int= rpt;
    index= PTL_SEND_INT_INDEX;
    dest.rank= p->source_node;
    rc= PtlPut(send_int_md_handle, 0, sizeof(int), PTL_NO_ACK_REQ, dest, index, 0, 0, NULL, 0);
    LIBTEST_CHECK(rc, "PtlPut in SendRepeat()");

    rc= PtlCTWait(send_int_ct_handle, total_int_sends, &cnt_value);
    LIBTEST_CHECK(rc, "PtlCTWait in SendRepeat()");
    if (cnt_value.failure != 0)   {
	fprintf(stderr, "SendRepeat() PtlPut failed %d (%d succeeded)\n",
	   (int)cnt_value.failure, (int)cnt_value.success);
    }

    total_int_sends++;

}  /* end of SendRepeat() */
Esempio n. 3
0
/*
** Send a buffer's worth of data
*/
void
SendData(ArgStruct *p)
{

int rc;
int index;
ptl_process_t dest;
ptl_ct_event_t cnt_value;


    index= PTL_XMIT_INDEX;
    dest.rank= p->source_node;
    rc= PtlPut(md_handle, 0, p->bufflen, PTL_NO_ACK_REQ, dest, index, 0, 0, NULL, 0);
    LIBTEST_CHECK(rc, "PtlPut in SendData()");

    rc= PtlCTWait(send_ct_handle, total_sends, &cnt_value);
    LIBTEST_CHECK(rc, "PtlCTWait in SendData()");
    if (cnt_value.failure != 0)   {
	fprintf(stderr, "SendData() PtlPut failed %d (%d succeeded)\n",
	   (int)cnt_value.failure, (int)cnt_value.success);
    }

    total_sends++;

}  /* end of SendData() */
Esempio n. 4
0
int mca_btl_portals4_send(struct mca_btl_base_module_t* btl_base,
                     struct mca_btl_base_endpoint_t* endpoint,
                     struct mca_btl_base_descriptor_t* descriptor,
                     mca_btl_base_tag_t tag)
{
    struct mca_btl_portals4_module_t* portals4_btl = (struct mca_btl_portals4_module_t*) btl_base;
    mca_btl_portals4_frag_t *frag = (mca_btl_portals4_frag_t*) descriptor;
    ptl_match_bits_t match_bits, msglen_type;
    ptl_size_t put_length;
    int ret;

    frag->endpoint = endpoint;
    frag->hdr.tag = tag;

    put_length       = frag->segments[0].base.seg_len;
    if (put_length > portals4_btl->super.btl_eager_limit)
         msglen_type = BTL_PORTALS4_LONG_MSG;
    else msglen_type = BTL_PORTALS4_SHORT_MSG;

    BTL_PORTALS4_SET_SEND_BITS(match_bits, 0, 0, tag, msglen_type);

    /* reserve space in the event queue for rdma operations immediately */
    while (OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, 1) >
           portals4_btl->portals_max_outstanding_ops) {
        OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                             "Call to mca_btl_portals4_component_progress (4)\n"));
        mca_btl_portals4_component_progress();
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                         "mca_btl_portals4_send: Incrementing portals_outstanding_ops=%d\n",
        portals4_btl->portals_outstanding_ops));

    OPAL_OUTPUT_VERBOSE((50, opal_btl_base_framework.framework_output,
                         "PtlPut frag=%p rank=%x pid=%x tag=%x len=%ld match_bits=%lx\n",
                         (void*)frag,  endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag,
                         put_length, (uint64_t)match_bits));

    ret = PtlPut(portals4_btl->send_md_h,
                 (ptl_size_t) frag->segments[0].base.seg_addr.pval,
                 put_length, /* fragment length */
                 (mca_btl_portals4_component.portals_need_ack ? PTL_ACK_REQ : PTL_NO_ACK_REQ),
                 endpoint->ptl_proc,
                 portals4_btl->recv_idx,
                 match_bits,                     /* match bits */
                 0,                              /* remote offset - not used */
                 (void *) frag,                  /* user ptr */
                 tag);                           /* hdr_data: tag */
    if (ret != PTL_OK) {
        opal_output(opal_btl_base_framework.framework_output, "mca_btl_portals4_send: PtlPut failed with error %d", ret);
        return OPAL_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
            "PtlPut frag=%p rank=%x pid=%x tag=%x addr=%p len=%ld match_bits=%lx",
            (void*)frag,  endpoint->ptl_proc.rank, endpoint->ptl_proc.phys.pid, tag,
            (void *)frag->segments[0].base.seg_addr.pval, put_length, (uint64_t)match_bits));

    return OPAL_SUCCESS;
}
Esempio n. 5
0
int
ompi_osc_portals4_put(void *origin_addr,
                      int origin_count,
                      struct ompi_datatype_t *origin_dt,
                      int target,
                      OPAL_PTRDIFF_TYPE target_disp,
                      int target_count,
                      struct ompi_datatype_t *target_dt,
                      struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_handle_md_t md_h;
    void *md_base;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "put: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Put: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
        length *= origin_count;
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
        ret = PtlPut(md_h,
                     (ptl_size_t) ((char*) origin_addr - (char*) md_base),
                     length,
                     PTL_ACK_REQ,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     NULL,
                     0);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 6
0
int
ompi_osc_portals4_raccumulate(void *origin_addr,
                              int origin_count,
                              struct ompi_datatype_t *origin_dt,
                              int target,
                              OPAL_PTRDIFF_TYPE target_disp,
                              int target_count,
                              struct ompi_datatype_t *target_dt,
                              struct ompi_op_t *op,
                              struct ompi_win_t *win,
                              struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;
    ptl_handle_md_t md_h;
    void *md_base;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "raccumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Raccumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        ptl_size_t md_offset;

        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
        length *= origin_count;
        sent = 0;

        ompi_osc_portals4_get_md(origin_addr, module->req_md_h, &md_h, &md_base);
        md_offset = ((char*) origin_addr - (char*) md_base);

        do {
            size_t msg_length = MIN(module->atomic_max, length - sent);
            (void)opal_atomic_add_64(&module->opcount, 1);
            request->ops_expected++;

            if (MPI_REPLACE == op) {
                ret = PtlPut(md_h,
                             md_offset + sent,
                             msg_length,
                             PTL_ACK_REQ,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             request,
                             0);
            } else {
                ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
                if (OMPI_SUCCESS != ret) return ret;

                ret = ompi_osc_portals4_get_op(op, &ptl_op);
                if (OMPI_SUCCESS != ret) return ret;

                ret = PtlAtomic(md_h,
                                offset + sent,
                                msg_length,
                                PTL_ACK_REQ,
                                peer,
                                module->pt_idx,
                                module->match_bits,
                                offset + sent,
                                request,
                                0,
                                ptl_op,
                                ptl_dt);
            }
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            sent += msg_length;
        } while (sent < length);
    }

    return OMPI_SUCCESS;
}
Esempio n. 7
0
int
ompi_mtl_portals_isend(struct mca_mtl_base_module_t* mtl,
                       struct ompi_communicator_t* comm,
                       int dest,
                       int tag,
                       struct ompi_convertor_t *convertor,
                       mca_pml_base_send_mode_t mode,
                       bool blocking,
                       mca_mtl_request_t *mtl_request)
{
    int ret;
    ptl_match_bits_t match_bits;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    ptl_handle_me_t me_h;
    ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, dest );
    mca_mtl_base_endpoint_t *endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml;
    ompi_mtl_portals_request_t *ptl_request = 
        (ompi_mtl_portals_request_t*) mtl_request;
    size_t buflen;

    assert(mtl == &ompi_mtl_portals.base);

    ret = ompi_mtl_datatype_pack(convertor, &md.start, &buflen,
                                 &(ptl_request->free_after));
    if (OMPI_SUCCESS != ret) return ret;
    md.length = buflen;

    ptl_request->event_callback = ompi_mtl_portals_send_progress;

    if ((MCA_PML_BASE_SEND_READY == mode)) {
        /* ready send (length doesn't matter) or short non-sync send.
           Eagerly send data and don't wait for completion */
        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_READY_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "ready send bits: 0x%016llx\n", 
                             match_bits));

        md.threshold = 1;
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h,
                        md,
                        PTL_UNLINK,
                        &(md_h));
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack;

        ret = PtlPut(md_h,
                     PTL_NO_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     0);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

    } else if (md.length > ompi_mtl_portals.eager_limit) {
        /* it's a long message - same protocol for all send modes
           other than ready */
        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_LONG_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "long send bits: 0x%016llx (%d)\n", 
                             match_bits, dest));

        md.threshold = 2; /* send, {ack, get} */
        md.options = PTL_MD_OP_GET | PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h,
                          OMPI_MTL_PORTALS_READ_TABLE_ID,
                          endpoint->ptl_proc,
                          (ptl_match_bits_t)(uintptr_t) ptl_request,
                          0,
                          PTL_UNLINK,
                          PTL_INS_AFTER,
                          &me_h);
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlMDAttach(me_h,
                          md,
                          PTL_UNLINK,
                          &(md_h));

        if (OMPI_SUCCESS != ret) {
            PtlMEUnlink(me_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlPut(md_h,
                     PTL_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     (ptl_hdr_data_t)(uintptr_t) ptl_request);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

    } else if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
        /* short synchronous message */
        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_SHORT_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "short ssend bits: 0x%016llx (%d)\n", 
                             match_bits, dest));

        md.threshold = 2; /* send, {ack, put} */
        md.options = PTL_MD_OP_PUT | PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMEAttach(ompi_mtl_portals.ptl_ni_h,
                          OMPI_MTL_PORTALS_ACK_TABLE_ID,
                          endpoint->ptl_proc,
                          (ptl_match_bits_t)(uintptr_t) ptl_request,
                          0,
                          PTL_UNLINK,
                          PTL_INS_AFTER,
                          &me_h);
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlMDAttach(me_h,
                          md,
                          PTL_UNLINK,
                          &(md_h));

        if (OMPI_SUCCESS != ret) {
            PtlMEUnlink(me_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ret = PtlPut(md_h,
                     PTL_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     (ptl_hdr_data_t)(uintptr_t) ptl_request);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

    } else { 
        /* short send message */

        PTL_SET_SEND_BITS(match_bits, comm->c_contextid,
                          comm->c_my_rank,
                          tag, PTL_SHORT_MSG);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "short send bits: 0x%016llx\n", 
                             match_bits));

        md.threshold = 1;
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h,
                        md,
                        PTL_UNLINK,
                        &(md_h));
        if (OMPI_SUCCESS != ret) {
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }

        ptl_request->event_callback = ompi_mtl_portals_send_progress_no_ack;

        ret = PtlPut(md_h,
                     PTL_NO_ACK_REQ,
                     endpoint->ptl_proc,
                     OMPI_MTL_PORTALS_SEND_TABLE_ID,
                     0,
                     match_bits,
                     0,
                     0);
        if (OMPI_SUCCESS != ret) {
            PtlMDUnlink(md_h);
            if (ptl_request->free_after) free(md.start);
            return ompi_common_portals_error_ptl_to_ompi(ret);
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 8
0
/* called when a receive should be progressed */
static int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request =
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    /* as soon as we've seen any event associated with a request, it's
       started */
    ptl_request->req_started = true;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            ret = PTL_FAIL;
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate expected: %ld %ld",
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

        if (ev->mlength < msg_length)
             OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "Truncated message, some PtlGet are required (protocol = %d)",
                                 ompi_mtl_portals4.protocol));

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && msg_length > ev->mlength) {
            /* If it's not a short message and we're doing rndv and the message is not complete,  we
               only have the first part of the message.  Issue the get
               to pull the second part of the message. */
            ret = read_msg((char*)ptl_request->delivery_ptr + ev->mlength,
                           ((msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length) - ev->mlength,
                           ev->initiator,
                           ev->hdr_data,
                           ev->mlength,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }
        } else {
            /* If we're either using the eager protocol or were a
               short message, all data has been received, so complete
               the message. */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            ret = PTL_FAIL;
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate unexpected: %ld %ld %d",
                                msg_length, ptl_request->delivery_len,
                                MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor,
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (OPAL_UNLIKELY(ret < 0)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                     "Recv %lu (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (OPAL_UNLIKELY(PTL_OK != ret)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {

            /* For long messages in the overflow list, ev->mlength = 0 */
            ptl_request->super.super.ompi_req->req_status._ucount = 0;

            ret = read_msg((char*)ptl_request->delivery_ptr,
                           (msg_length > ptl_request->delivery_len) ? ptl_request->delivery_len : msg_length,
                           ev->initiator,
                           ev->hdr_data,
                           0,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }
        }

        break;

    case PTL_EVENT_LINK:
        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}
Esempio n. 9
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_handle;
    ptl_process_t   *procs;
    int             rank;
    ptl_pt_index_t  pt_index, signal_pt_index;
    HANDLE_T        signal_e_handle;
    HANDLE_T        signal_e2_handle;
    int             num_procs;
    ptl_handle_eq_t eq_handle;
    ptl_handle_ct_t ct_handle;
    ptl_handle_md_t md_handle;
    ptl_ni_limits_t limits_reqd, limits_actual;
    ENTRY_T         value_e;

    limits_reqd.max_entries = 1024;
    limits_reqd.max_unexpected_headers = ITERS*2;
    limits_reqd.max_mds = 1024;
    limits_reqd.max_eqs = 1024;
    limits_reqd.max_cts = 1024;
    limits_reqd.max_pt_index = 64;
    limits_reqd.max_iovecs = 1024;
    limits_reqd.max_list_size = 1024;
    limits_reqd.max_triggered_ops = 1024;
    limits_reqd.max_msg_size = 1048576;
    limits_reqd.max_atomic_size = 1048576;
    limits_reqd.max_fetch_atomic_size = 1048576;
    limits_reqd.max_waw_ordered_size = 1048576;
    limits_reqd.max_war_ordered_size = 1048576;
    limits_reqd.max_volatile_size = 1048576;
    limits_reqd.features = 0;

    CHECK_RETURNVAL(PtlInit());

    CHECK_RETURNVAL(libtest_init());

    rank = libtest_get_rank();
    num_procs = libtest_get_size();
    if (num_procs < 2) {
        fprintf(stderr, "test_flowctl_noeq requires at least two processes\n");
        return 77;
    }

    int iters;

    if (num_procs < ITERS)
        iters = ITERS*2+1;
    else
        iters = ITERS;

    CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL,
                              PTL_PID_ANY, &limits_reqd, &limits_actual, &ni_handle));
    procs = libtest_get_mapping(ni_handle);
    CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs));


    if (0 == rank) {

        /* create data PT space */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * iters + 64, &eq_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5,
                                   &pt_index));

        /* create signal ME */
        CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 1, eq_handle, 6,
                                   &signal_pt_index));
        value_e.start = NULL;
        value_e.length = 0;
        value_e.ct_handle = ct_handle;
        value_e.uid = PTL_UID_ANY;
        value_e.options = OPTIONS | PTL_LE_EVENT_CT_COMM;
#if INTERFACE == 1
        value_e.match_id.rank = PTL_RANK_ANY;
        value_e.match_bits = 0;
        value_e.ignore_bits = 0;
#endif
        CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_OVERFLOW_LIST, NULL, &signal_e_handle));
    } else {
        ptl_md_t        md;

        /* 16 extra just in case... */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, iters*2 + 16, &eq_handle));

        md.start = NULL;
        md.length = 0;
        md.options = 0;
        md.eq_handle = eq_handle;
        md.ct_handle = PTL_CT_NONE;

        CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle));
    }

    fprintf(stderr,"at barrier \n");
    libtest_barrier();

    if (0 == rank) {
        ptl_ct_event_t  ct;
        ptl_event_t ev;
        int ret, count = 0, saw_flowctl = 0;

        fprintf(stderr,"begin ctwait \n");
        /* wait for signal counts */
        CHECK_RETURNVAL(PtlCTWait(ct_handle, iters / 2 , &ct));
        if (ct.success != iters / 2 || ct.failure != 0) {
            return 1;
        }
        fprintf(stderr,"done CT wait \n");
        /* wait for event entries */
        while (1) {
            ret = PtlEQGet(eq_handle, &ev);
            if (PTL_OK == ret) {
                count++;
                fprintf(stderr, "found EQ value \n");
            } else if (ret == PTL_EQ_EMPTY) {
                continue;
            } else {
                fprintf(stderr, "0: Unexpected return code from EQGet: %d\n", ret);
                return 1;
            }

            if (ev.type == PTL_EVENT_PT_DISABLED) {
                saw_flowctl++;
                break;
            }
        }

        fprintf(stderr, "0: Saw %d flowctl\n", saw_flowctl);
        if (saw_flowctl == 0) {
            return 1;
        }
        /* Now clear out all of the unexpected messages so we can clean up everything */
        CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e2_handle));
        ret = PTL_OK;
        while (ret != PTL_EQ_EMPTY)
            ret = PtlEQGet(eq_handle, &ev);
    } else {
        ptl_process_t target;
        ptl_event_t ev;
        int ret, count = 0, fails = 0;
        int i;

        target.rank = 0;
        printf("beginning puts \n");
        for (i = 0 ; i < iters ; ++i) {
            CHECK_RETURNVAL(PtlPut(md_handle,
                                   0,
                                   0,
                                   PTL_ACK_REQ,
                                   target,
                                   5,
                                   0,
                                   0,
                                   NULL,
                                   0));
            usleep(100);
        }

        while (count < iters) {
            ret = PtlEQGet(eq_handle, &ev);
            if (PTL_EQ_EMPTY == ret) {
                continue;
            } else if (PTL_OK != ret) {
                fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret);
                return 1;
            }

            if (ev.ni_fail_type == PTL_NI_OK) {
                if (ev.type == PTL_EVENT_SEND) {
                    continue;
                } else if (ev.type == PTL_EVENT_ACK) {
                    count++;
                } else {
                    fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type);
                }
            } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) {
                count++;
                fails++;
            } else if (ev.ni_fail_type == PTL_EQ_EMPTY) {
                continue;
            } else if (ev.ni_fail_type == PTL_EQ_DROPPED) {
                continue;
            } else {
                fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type);
                return 1;
            }
        }

        fprintf(stderr, "%d: Saw %d of %d ACKs as fails\n", rank, fails, count);
    }

    fprintf(stderr,"at final barrier \n");

    libtest_barrier();

    if (0 == rank) {

        CHECK_RETURNVAL(UNLINK(signal_e_handle));
        CHECK_RETURNVAL(UNLINK(signal_e2_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index));
        CHECK_RETURNVAL(PtlCTFree(ct_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    } else {
        CHECK_RETURNVAL(PtlMDRelease(md_handle));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    }

    fprintf(stderr,"final cleanup \n");
    CHECK_RETURNVAL(PtlNIFini(ni_handle));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
Esempio n. 10
0
int
ompi_osc_portals4_rput(const void *origin_addr,
                       int origin_count,
                       struct ompi_datatype_t *origin_dt,
                       int target,
                       OPAL_PTRDIFF_TYPE target_disp,
                       int target_count,
                       struct ompi_datatype_t *target_dt,
                       struct ompi_win_t *win,
                       struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rput: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Rput: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        request->ops_expected = 1;
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
        length *= origin_count;
        ret = PtlPut(module->req_md_h,
                     (ptl_size_t) origin_addr,
                     length,
                     PTL_ACK_REQ,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     request,
                     0);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Esempio n. 11
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_h;
    ptl_pt_index_t  pt_index;
    uint64_t       *buf;
    ENTRY_T         entry;
    HANDLE_T        entry_h;
    ptl_md_t        md;
    ptl_handle_md_t md_h;
    int             rank;
    int             num_procs;
    int             ret;
    ptl_process_t  *procs;
    ptl_handle_eq_t eq_h;
    ptl_event_t     ev;
    ptl_hdr_data_t rcvd = 0;
    ptl_hdr_data_t goal = 0;
    ptl_hdr_data_t hdr_data = 1;
    ptl_size_t offset = sizeof(uint64_t);
    uint32_t distance;
    int sends = 0;

    CHECK_RETURNVAL(PtlInit());
    CHECK_RETURNVAL(libtest_init());

    rank = libtest_get_rank();
    num_procs = libtest_get_size();

    /* This test only succeeds if we have more than one rank */
    if (num_procs < 2) return 77;

    CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL,
                              PTL_PID_ANY, NULL, NULL, &ni_h));

    procs = libtest_get_mapping(ni_h);
    CHECK_RETURNVAL(PtlSetMap(ni_h, num_procs, procs));

    CHECK_RETURNVAL(PtlEQAlloc(ni_h, 1024, &eq_h));
    CHECK_RETURNVAL(PtlPTAlloc(ni_h, 0, eq_h, 0, &pt_index));
    assert(pt_index == 0);
    
    buf = malloc(sizeof(uint64_t) * num_procs);
    assert(NULL != buf);

    md.start = buf;
    md.length = sizeof(uint64_t) * num_procs;
    md.options = PTL_MD_UNORDERED;
    md.eq_handle = eq_h;
    md.ct_handle = PTL_CT_NONE;
    CHECK_RETURNVAL(PtlMDBind(ni_h, &md, &md_h));

    entry.start = buf;
    entry.length = sizeof(uint64_t) * num_procs;
    entry.ct_handle = PTL_CT_NONE;
    entry.uid = PTL_UID_ANY;
    entry.options = OPTIONS;
#if MATCHING == 1
    entry.match_id.rank = PTL_RANK_ANY;
    entry.match_bits = 0;
    entry.ignore_bits = 0;
    entry.min_free = 0;
#endif
    CHECK_RETURNVAL(APPEND(ni_h, pt_index, &entry,
                           PTL_PRIORITY_LIST, NULL, &entry_h));

    /* ensure ME is linked before the barrier */
    CHECK_RETURNVAL(PtlEQWait(eq_h, &ev));
    assert( ev.type == PTL_EVENT_LINK );

    libtest_barrier();

    /* Bruck's Concatenation Algorithm */
    memcpy(buf, &rank, sizeof(uint64_t));
    for (distance = 1; distance < num_procs; distance *= 2) {
        ptl_size_t to_xfer;
        int peer;
        ptl_process_t proc;

        if (rank >= distance) {
            peer = rank - distance;
        } else {
            peer = rank + (num_procs - distance);
        }

        to_xfer = sizeof(uint64_t) * MIN(distance, num_procs - distance);
        proc.rank = peer;
        CHECK_RETURNVAL(PtlPut(md_h, 
                               0, 
                               to_xfer, 
                               PTL_NO_ACK_REQ, 
                               proc,
                               0,
                               0,
                               offset,
                               NULL,
                               hdr_data));
        sends += 1;

        /* wait for completion of the proper receive, and keep count
           of uncompleted sends.  "rcvd" is an accumulator to deal
           with out-of-order receives, which are IDed by the
           hdr_data */
        goal |= hdr_data;
        while ((rcvd & goal) != goal) {
            ret = PtlEQWait(eq_h, &ev);
            switch (ret) {
            case PTL_OK:
                if (ev.type == PTL_EVENT_SEND) {
                    sends -= 1;
                } else {
                    rcvd |= ev.hdr_data;
                    assert(ev.type == PTL_EVENT_PUT);
                    assert(ev.rlength == ev.mlength);
                    assert((ev.rlength == to_xfer) || (ev.hdr_data != hdr_data));
                }
                break;
            default:
                fprintf(stderr, "PtlEQWait failure: %d\n", ret);
                abort();
            }
        }
        
        hdr_data <<= 1;
        offset += to_xfer;
    }

    /* wait for any SEND_END events not yet seen */
    while (sends) {
        ret = PtlEQWait(eq_h, &ev);
        switch (ret) {
        case PTL_OK:
            assert( ev.type == PTL_EVENT_SEND );
            sends -= 1;
            break;
        default:
            fprintf(stderr, "PtlEQWait failure: %d\n", ret);
            abort();
        }
    }

    CHECK_RETURNVAL(UNLINK(entry_h));
    CHECK_RETURNVAL(PtlMDRelease(md_h));
    free(buf);

    libtest_barrier();

    /* cleanup */
    CHECK_RETURNVAL(PtlPTFree(ni_h, pt_index));
    CHECK_RETURNVAL(PtlEQFree(eq_h));
    CHECK_RETURNVAL(PtlNIFini(ni_h));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
Esempio n. 12
0
static inline int
ompi_mtl_portals4_long_isend(void *start, size_t length, int contextid, int tag,
                             int localrank,
                             ptl_process_t ptl_proc,
                             ompi_mtl_portals4_isend_request_t *ptl_request)
{
    int ret;
    ptl_match_bits_t match_bits;
    ptl_me_t me;
    ptl_hdr_data_t hdr_data;
    ptl_size_t put_length;

    MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
                               MTL_PORTALS4_LONG_MSG);

    MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length, 0);

    me.start = start;
    me.length = length;
    me.ct_handle = PTL_CT_NONE;
    me.min_free = 0;
    me.uid = ompi_mtl_portals4.uid;
    me.options =
        PTL_ME_OP_GET |
        PTL_ME_USE_ONCE |
        PTL_ME_EVENT_LINK_DISABLE |
        PTL_ME_EVENT_UNLINK_DISABLE;
    me.match_id = ptl_proc;
    me.match_bits = hdr_data;
    me.ignore_bits = 0;

    ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                      ompi_mtl_portals4.read_idx,
                      &me,
                      PTL_PRIORITY_LIST,
                      ptl_request,
                      &ptl_request->me_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMEAppend failed: %d",
                            __FILE__, __LINE__, ret);
        return ompi_mtl_portals4_get_error(ret);
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Send %lu long send with hdr_data 0x%lx (0x%lx)",
                         ptl_request->opcount, hdr_data, match_bits));

    put_length = (rndv == ompi_mtl_portals4.protocol) ?
        (ptl_size_t) ompi_mtl_portals4.eager_limit : (ptl_size_t) length;

    ret = PtlPut(ompi_mtl_portals4.send_md_h,
                 (ptl_size_t) start,
                 put_length,
                 PTL_ACK_REQ,
                 ptl_proc,
                 ompi_mtl_portals4.recv_idx,
                 match_bits,
                 0,
                 ptl_request,
                 hdr_data);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPut failed: %d",
                            __FILE__, __LINE__, ret);
	PtlMEUnlink(ptl_request->me_h);
        return ompi_mtl_portals4_get_error(ret);
    }

    return OMPI_SUCCESS;
}
Esempio n. 13
0
static inline int
ompi_mtl_portals4_short_isend(mca_pml_base_send_mode_t mode,
                              void *start, int length, int contextid, int tag,
                              int localrank,
                              ptl_process_t ptl_proc,
                              ompi_mtl_portals4_isend_request_t *ptl_request)
{
    int ret;
    ptl_match_bits_t match_bits;
    ptl_me_t me;
    ptl_hdr_data_t hdr_data;

    MTL_PORTALS4_SET_SEND_BITS(match_bits, contextid, localrank, tag,
                               MTL_PORTALS4_SHORT_MSG);

    MTL_PORTALS4_SET_HDR_DATA(hdr_data, ptl_request->opcount, length,
                              (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) ? 1 : 0);

    if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
        me.start = NULL;
        me.length = 0;
        me.ct_handle = PTL_CT_NONE;
        me.min_free = 0;
        me.uid = ompi_mtl_portals4.uid;
        me.options =
            PTL_ME_OP_PUT |
            PTL_ME_USE_ONCE |
            PTL_ME_EVENT_LINK_DISABLE |
            PTL_ME_EVENT_UNLINK_DISABLE;
        me.match_id = ptl_proc;
        me.match_bits = hdr_data;
        me.ignore_bits = 0;

        ret = PtlMEAppend(ompi_mtl_portals4.ni_h,
                          ompi_mtl_portals4.read_idx,
                          &me,
                          PTL_PRIORITY_LIST,
                          ptl_request,
                          &ptl_request->me_h);
        if (OPAL_UNLIKELY(PTL_OK != ret)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PtlMEAppend failed: %d",
                                __FILE__, __LINE__, ret);
            return ompi_mtl_portals4_get_error(ret);
        }

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Send %lu short sync send with hdr_data 0x%lx (0x%lx)",
                             ptl_request->opcount, hdr_data, match_bits));
    } else {
        ptl_request->event_count = 1;
        ptl_request->me_h = PTL_INVALID_HANDLE;

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Send %lu short send with hdr_data 0x%lx (0x%lx)",
                             ptl_request->opcount, hdr_data, match_bits));
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Send %lu, start: %p",
                         ptl_request->opcount, start));

    ret = PtlPut(ompi_mtl_portals4.send_md_h,
                 (ptl_size_t) start,
                 length,
                 PTL_ACK_REQ,
                 ptl_proc,
                 ompi_mtl_portals4.recv_idx,
                 match_bits,
                 0,
                 ptl_request,
                 hdr_data);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPut failed: %d",
                            __FILE__, __LINE__, ret);
        if (MCA_PML_BASE_SEND_SYNCHRONOUS == mode) {
            PtlMEUnlink(ptl_request->me_h);
        }
        return ompi_mtl_portals4_get_error(ret);
    }

    return OMPI_SUCCESS;
}
Esempio n. 14
0
static int
ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event, 
                          struct ompi_convertor_t *convertor,
                          ompi_mtl_portals_request_t  *ptl_request)
{
    int ret;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    size_t buflen;
    
    if (PTL_IS_SHORT_MSG(recv_event->ev.match_bits)) {
        /* the buffer is sitting in the short message queue */

        struct iovec iov;
        uint32_t iov_count = 1;
        size_t max_data;

        ompi_mtl_portals_recv_short_block_t *block = 
            recv_event->ev.md.user_ptr;

        iov.iov_base = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
        iov.iov_len = recv_event->ev.mlength;
        max_data = iov.iov_len;

        /* see if this message filled the receive block */
        if (recv_event->ev.md.length - (recv_event->ev.offset + 
                                        recv_event->ev.mlength) <
            recv_event->ev.md.max_size) {
            block->full = true;
        }

        /* pull out the data */
        if (iov.iov_len > 0) {
            ompi_convertor_unpack(convertor, &iov, &iov_count,
                                  &max_data );
        }

        /* if synchronous, return an ack */
        if (PTL_IS_SYNC_MSG(recv_event->ev)) {
            md.length = 0;
            md.start = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
            md.threshold = 1; /* send */
            md.options = PTL_MD_EVENT_START_DISABLE;
            md.user_ptr = NULL;
            md.eq_handle = ompi_mtl_portals.ptl_eq_h;

            ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                            PTL_UNLINK, &md_h);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlMDBind.  Error code - %d \n",ret);
                abort();
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                                 "acking recv: 0x%016llx\n", 
                                 recv_event->ev.match_bits));

            ret = PtlPut(md_h,
                         PTL_NO_ACK_REQ,
                         recv_event->ev.initiator,
                         OMPI_MTL_PORTALS_ACK_TABLE_ID,
                         0,
                         recv_event->ev.hdr_data,
                         0,
                         0);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlPut.  Error code - %d \n",ret);
                abort();
            }
        }

        /* finished with our buffer space */
        ompi_mtl_portals_return_block_part(&ompi_mtl_portals, block);

        ompi_convertor_get_packed_size(convertor, &buflen);

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
        ptl_request->super.ompi_req->req_status._count = 
            recv_event->ev.mlength;

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "recv complete: 0x%016llx\n", 
                             recv_event->ev.match_bits));
        
        ptl_request->super.completion_callback(&ptl_request->super);

    } else {
        ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
                                         &ptl_request->free_after);
        if (OMPI_SUCCESS != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }
        md.length = (recv_event->ev.rlength > buflen) ? buflen : recv_event->ev.rlength;
        md.threshold = 2; /* send and get */
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        /* retain because it's unclear how many events we'll get here.
           Some implementations give just the REPLY, others give SEND
           and REPLY */
        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                        PTL_RETAIN, &md_h);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->event_callback = ompi_mtl_portals_recv_progress;

        ret = PtlGet(md_h, 
                     recv_event->ev.initiator, 
                     OMPI_MTL_PORTALS_READ_TABLE_ID,
                     0, 
                     recv_event->ev.hdr_data,
                     0);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from PtlGet.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
    }

    return OMPI_SUCCESS;
}
Esempio n. 15
0
static int
start_recover(void)
{
    int ret;
    int64_t epoch_counter;

    ompi_mtl_portals4.flowctl.flowctl_active = true;
    epoch_counter = opal_atomic_add_64(&ompi_mtl_portals4.flowctl.epoch_counter, 1);

    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                        "Entering flowctl_start_recover %ld",
                        epoch_counter);

    /* re-arm trigger/alarm for next time */
    ret = setup_alarm(epoch_counter);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d setup_alarm failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* setup barrier tree for getting us out of flow control */
    ret = setup_barrier(epoch_counter);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d setup_barrier failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* drain all pending sends */
    while (ompi_mtl_portals4.flowctl.send_slots != 
           ompi_mtl_portals4.flowctl.max_send_slots) {
        opal_progress();
    }

    /* drain event queue */
    while (0 != ompi_mtl_portals4_progress()) { ; }

    /* check short block active count */
    ret = ompi_mtl_portals4_recv_short_link(1);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: recv_short_link failed: %d",
                            __FILE__, __LINE__, ret);
    }

    /* reorder the pending sends by operation count */
    ret = opal_list_sort(&ompi_mtl_portals4.flowctl.pending_sends, seqnum_compare);
    if (OMPI_SUCCESS != ret) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d opal_list_sort failed: %d\n",
                            __FILE__, __LINE__, ret);
        return ret;
    }

    /* drain event queue again, just to make sure */
    while (0 != ompi_mtl_portals4_progress()) { ; }

    /* send barrier entry message */
    ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                 0,
                 0,
                 PTL_NO_ACK_REQ,
                 ompi_mtl_portals4.flowctl.me,
                 ompi_mtl_portals4.flowctl_idx,
                 MTL_PORTALS4_FLOWCTL_FANIN,
                 0,
                 NULL,
                 0);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlPut failed: %d\n",
                            __FILE__, __LINE__, ret);
        goto error;
    }

    /* recovery complete when fan-out event arrives, async event, so
       we're done now */
    ret = OMPI_SUCCESS;

 error:
    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                         "Exiting flowctl_start_recover %ld",
                         epoch_counter));

    return ret;
}
Esempio n. 16
0
File: rptl.c Progetto: zhanglt/mpich
static int poke_progress(void)
{
    int ret = PTL_OK;
    struct rptl_target *target;
    struct rptl_op *op;
    struct rptl *rptl;
    int i;
    int mpi_errno = MPI_SUCCESS;
    ptl_process_t id;
    ptl_pt_index_t data_pt, control_pt;
    MPIDI_STATE_DECL(MPID_STATE_POKE_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_POKE_PROGRESS);

    /* make progress on local RPTLs */
    for (rptl = rptl_info.rptl_list; rptl; rptl = rptl->next) {
        /* if the local state is active, there's nothing to do */
        if (rptl->local_state == RPTL_LOCAL_STATE_ACTIVE)
            continue;

        /* if we are in a local AWAITING PAUSE ACKS state, see if we
         * can send out the unpause message */
        if (rptl->local_state == RPTL_LOCAL_STATE_AWAITING_PAUSE_ACKS &&
            rptl->pause_ack_counter == rptl_info.world_size - 1) {
            /* if we are over the max count limit, do not send an
             * unpause message yet */
            if (rptl->data.ob_curr_count > rptl->data.ob_max_count)
                continue;

            ret = PtlPTEnable(rptl->ni, rptl->data.pt);
            RPTLU_ERR_POP(ret, "Error returned while reenabling PT\n");

            rptl->local_state = RPTL_LOCAL_STATE_ACTIVE;

            for (i = 0; i < rptl_info.world_size; i++) {
                if (i == MPIDI_Process.my_pg_rank)
                    continue;
                mpi_errno = rptl_info.get_target_info(i, &id, rptl->data.pt, &data_pt, &control_pt);
                if (mpi_errno) {
                    ret = PTL_FAIL;
                    RPTLU_ERR_POP(ret, "Error getting target info\n");
                }

                /* make sure the user setup a control portal */
                assert(control_pt != PTL_PT_ANY);

                ret = rptl_put(rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt,
                               0, 0, NULL, RPTL_CONTROL_MSG_UNPAUSE, RPTL_PT_CONTROL);
                RPTLU_ERR_POP(ret, "Error sending unpause message\n");
            }
        }
    }

    /* make progress on targets */
    for (target = rptl_info.target_list; target; target = target->next) {
        if (target->state == RPTL_TARGET_STATE_RECEIVED_PAUSE) {
            for (op = target->data_op_list; op; op = op->next)
                if (op->state == RPTL_OP_STATE_ISSUED)
                    break;
            if (op)
                continue;

            /* send a pause ack message */
            assert(target->rptl);
            for (i = 0; i < rptl_info.world_size; i++) {
                if (i == MPIDI_Process.my_pg_rank)
                    continue;
                /* find the target that has this target id and get the
                 * control portal information for it */
                mpi_errno = rptl_info.get_target_info(i, &id, target->rptl->data.pt, &data_pt, &control_pt);
                if (mpi_errno) {
                    ret = PTL_FAIL;
                    RPTLU_ERR_POP(ret, "Error getting target info\n");
                }
                if (IDS_ARE_EQUAL(id, target->id))
                    break;
            }

            /* make sure the user setup a control portal */
            assert(control_pt != PTL_PT_ANY);

            target->state = RPTL_TARGET_STATE_PAUSE_ACKED;

            ret = rptl_put(target->rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt, 0,
                           0, NULL, RPTL_CONTROL_MSG_PAUSE_ACK, RPTL_PT_CONTROL);
            RPTLU_ERR_POP(ret, "Error sending pause ack message\n");

            continue;
        }

        /* issue out all the control messages first */
        for (op = target->control_op_list; op; op = op->next) {
            assert(op->op_type == RPTL_OP_PUT);

            /* skip all the issued ops */
            if (op->state == RPTL_OP_STATE_ISSUED)
                continue;

            /* we should not get any NACKs on the control portal */
            assert(op->state != RPTL_OP_STATE_NACKED);

            if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                /* too few origin events left.  we can't issue this op
                 * or any following op to this target in order to
                 * maintain ordering */
                break;
            }

            rptl_info.origin_events_left -= 2;
            target->issued_data_ops++;

            /* force request for an ACK even if the user didn't ask
             * for it.  replace the user pointer with the OP id. */
            ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length,
                         PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index,
                         op->u.put.match_bits, op->u.put.remote_offset, op,
                         op->u.put.hdr_data);
            RPTLU_ERR_POP(ret, "Error issuing PUT\n");

            op->state = RPTL_OP_STATE_ISSUED;
        }

        if (target->state == RPTL_TARGET_STATE_DISABLED || target->state == RPTL_TARGET_STATE_PAUSE_ACKED)
            continue;

        /* then issue out all the data messages */
        for (op = target->data_op_list; op; op = op->next) {
            if (op->op_type == RPTL_OP_PUT) {
                /* skip all the issued ops */
                if (op->state == RPTL_OP_STATE_ISSUED)
                    continue;

                /* if an op has been nacked, don't issue anything else
                 * to this target */
                if (op->state == RPTL_OP_STATE_NACKED)
                    break;

                if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                    /* too few origin events left.  we can't issue
                     * this op or any following op to this target in
                     * order to maintain ordering */
                    break;
                }

                rptl_info.origin_events_left -= 2;
                target->issued_data_ops++;

                /* force request for an ACK even if the user didn't
                 * ask for it.  replace the user pointer with the OP
                 * id. */
                ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length,
                             PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index,
                             op->u.put.match_bits, op->u.put.remote_offset, op,
                             op->u.put.hdr_data);
                RPTLU_ERR_POP(ret, "Error issuing PUT\n");
            }
            else if (op->op_type == RPTL_OP_GET) {
                /* skip all the issued ops */
                if (op->state == RPTL_OP_STATE_ISSUED)
                    continue;

                /* if an op has been nacked, don't issue anything else
                 * to this target */
                if (op->state == RPTL_OP_STATE_NACKED)
                    break;

                if (rptl_info.origin_events_left < 1 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                    /* too few origin events left.  we can't issue
                     * this op or any following op to this target in
                     * order to maintain ordering */
                    break;
                }

                rptl_info.origin_events_left--;
                target->issued_data_ops++;

                ret = PtlGet(op->u.get.md_handle, op->u.get.local_offset, op->u.get.length,
                             op->u.get.target_id, op->u.get.pt_index, op->u.get.match_bits,
                             op->u.get.remote_offset, op);
                RPTLU_ERR_POP(ret, "Error issuing GET\n");
            }

            op->state = RPTL_OP_STATE_ISSUED;
        }
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_POKE_PROGRESS);
    return ret;

  fn_fail:
    goto fn_exit;
}
Esempio n. 17
0
static int
ompi_coll_portals4_scatter_intra_linear_top(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
        void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
        int root,
        struct ompi_communicator_t *comm,
        ompi_coll_portals4_request_t *request,
        mca_coll_base_module_t *module)
{
    mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module;
    int ret, line;
    ptl_ct_event_t ct;

    ptl_ct_event_t sync_incr_event;

    int8_t i_am_root;

    int32_t expected_rtrs = 0;
    int32_t expected_puts = 0;
    int32_t expected_acks = 0;
    int32_t expected_ops  = 0;

    int32_t expected_chained_rtrs = 0;
    int32_t expected_chained_acks = 0;


    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:scatter_intra_linear_top enter rank %d", request->u.scatter.my_rank));

    request->type                   = OMPI_COLL_PORTALS4_TYPE_SCATTER;
    request->u.scatter.scatter_buf  = NULL;
    request->u.scatter.scatter_mdh  = PTL_INVALID_HANDLE;
    request->u.scatter.scatter_cth  = PTL_INVALID_HANDLE;
    request->u.scatter.scatter_meh  = PTL_INVALID_HANDLE;
    request->u.scatter.sync_mdh     = PTL_INVALID_HANDLE;
    request->u.scatter.sync_cth     = PTL_INVALID_HANDLE;
    request->u.scatter.sync_meh     = PTL_INVALID_HANDLE;

    request->u.scatter.my_rank   = ompi_comm_rank(comm);
    request->u.scatter.size      = ompi_comm_size(comm);
    request->u.scatter.root_rank = root;
    request->u.scatter.sbuf      = sbuf;
    request->u.scatter.rbuf      = rbuf;

    request->u.scatter.pack_src_buf    = sbuf;
    request->u.scatter.pack_src_count  = scount;
    request->u.scatter.pack_src_dtype  = sdtype;
    ompi_datatype_get_extent(request->u.scatter.pack_src_dtype,
                             &request->u.scatter.pack_src_lb,
                             &request->u.scatter.pack_src_extent);
    ompi_datatype_get_true_extent(request->u.scatter.pack_src_dtype,
                                  &request->u.scatter.pack_src_true_lb,
                                  &request->u.scatter.pack_src_true_extent);

    if ((root == request->u.scatter.my_rank) && (rbuf == MPI_IN_PLACE)) {
        request->u.scatter.unpack_dst_buf   = NULL;
        request->u.scatter.unpack_dst_count = 0;
        request->u.scatter.unpack_dst_dtype = MPI_DATATYPE_NULL;
    } else {
        request->u.scatter.unpack_dst_buf   = rbuf;
        request->u.scatter.unpack_dst_count = rcount;
        request->u.scatter.unpack_dst_dtype = rdtype;
        request->u.scatter.unpack_dst_offset = 0;
        ompi_datatype_get_extent(request->u.scatter.unpack_dst_dtype,
                                 &request->u.scatter.unpack_dst_lb,
                                 &request->u.scatter.unpack_dst_extent);
        ompi_datatype_get_true_extent(request->u.scatter.unpack_dst_dtype,
                                      &request->u.scatter.unpack_dst_true_lb,
                                      &request->u.scatter.unpack_dst_true_extent);
    }

    opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                        "%s:%d:rank(%d): request->u.scatter.unpack_dst_offset(%lu)",
                        __FILE__, __LINE__, request->u.scatter.my_rank,
                        request->u.scatter.unpack_dst_offset);

    /**********************************/
    /* Setup Common Parameters        */
    /**********************************/

    i_am_root = (request->u.scatter.my_rank == request->u.scatter.root_rank);

    request->u.scatter.coll_count = opal_atomic_add_size_t(&portals4_module->coll_count, 1);

    ret = setup_scatter_buffers_linear(comm, request, portals4_module);
    if (MPI_SUCCESS != ret) {
        line = __LINE__;
        goto err_hdlr;
    }

    ret = setup_scatter_handles(comm, request, portals4_module);
    if (MPI_SUCCESS != ret) {
        line = __LINE__;
        goto err_hdlr;
    }

    ret = setup_sync_handles(comm, request, portals4_module);
    if (MPI_SUCCESS != ret) {
        line = __LINE__;
        goto err_hdlr;
    }

    /**********************************/
    /* do the scatter                 */
    /**********************************/
    if (i_am_root) {
        /* operations on the sync counter */
        expected_rtrs = request->u.scatter.size - 1; /* expect RTRs from non-root ranks */
        expected_acks = request->u.scatter.size - 1; /* expect Recv-ACKs from non-root ranks */

        /* operations on the scatter counter */
        expected_puts         = 0;
        expected_chained_rtrs = 1;
        expected_chained_acks = 1;

        /* Chain the RTR and Recv-ACK to the Scatter CT */
        sync_incr_event.success=1;
        sync_incr_event.failure=0;
        ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth,
                                sync_incr_event,
                                request->u.scatter.sync_cth,
                                expected_rtrs);
        if (PTL_OK != ret) {
            ret = OMPI_ERROR;
            line = __LINE__;
            goto err_hdlr;
        }

        ret = PtlTriggeredCTInc(request->u.scatter.scatter_cth,
                                sync_incr_event,
                                request->u.scatter.sync_cth,
                                expected_rtrs + expected_acks);
        if (PTL_OK != ret) {
            ret = OMPI_ERROR;
            line = __LINE__;
            goto err_hdlr;
        }

        /* root, so put packed bytes to other ranks */
        for (int32_t i=0; i<request->u.scatter.size; i++) {
            /* do not put to my scatter_buf.  my data gets unpacked into my out buffer in linear_bottom(). */
            if (i == request->u.scatter.my_rank) {
                continue;
            }

            ptl_size_t offset = request->u.scatter.packed_size * i;

            opal_output_verbose(30, ompi_coll_base_framework.framework_output,
                                "%s:%d:rank(%d): offset(%lu)=rank(%d) * packed_size(%ld)",
                                __FILE__, __LINE__, request->u.scatter.my_rank,
                                offset, i, request->u.scatter.packed_size);

            ret = PtlTriggeredPut(request->u.scatter.scatter_mdh,
                                  (ptl_size_t)request->u.scatter.scatter_buf + offset,
                                  request->u.scatter.packed_size,
                                  PTL_NO_ACK_REQ,
                                  ompi_coll_portals4_get_peer(comm, i),
                                  mca_coll_portals4_component.pt_idx,
                                  request->u.scatter.scatter_match_bits,
                                  0,
                                  NULL,
                                  0,
                                  request->u.scatter.scatter_cth,
                                  expected_chained_rtrs);
            if (PTL_OK != ret) {
                ret = OMPI_ERROR;
                line = __LINE__;
                goto err_hdlr;
            }
        }
    } else {
        /* non-root, so do nothing */

        /* operations on the sync counter */
        expected_rtrs = 0;
        expected_acks = 0;

        /* operations on the scatter counter */
        expected_puts         = 1;  /* scatter put from root */
        expected_chained_rtrs = 0;
        expected_chained_acks = 0;
    }

    expected_ops = expected_chained_rtrs + expected_puts;

    /**********************************************/
    /* only non-root ranks are PUT to, so only    */
    /* non-root ranks must PUT a Recv-ACK to root */
    /**********************************************/
    if (!i_am_root) {
        ret = PtlTriggeredPut(request->u.scatter.sync_mdh,
                              0,
                              0,
                              PTL_NO_ACK_REQ,
                              ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank),
                              mca_coll_portals4_component.pt_idx,
                              request->u.scatter.sync_match_bits,
                              0,
                              NULL,
                              0,
                              request->u.scatter.scatter_cth,
                              expected_ops);
        if (PTL_OK != ret) {
            ret = OMPI_ERROR;
            line = __LINE__;
            goto err_hdlr;
        }
    }

    expected_ops += expected_chained_acks;

    if (!request->u.scatter.is_sync) {
        /******************************************/
        /* put to finish pt when all ops complete */
        /******************************************/
        ret = PtlTriggeredPut(mca_coll_portals4_component.zero_md_h,
                              0,
                              0,
                              PTL_NO_ACK_REQ,
                              ompi_coll_portals4_get_peer(comm, request->u.scatter.my_rank),
                              mca_coll_portals4_component.finish_pt_idx,
                              0,
                              0,
                              NULL,
                              (uintptr_t) request,
                              request->u.scatter.scatter_cth,
                              expected_ops);
        if (PTL_OK != ret) {
            ret = OMPI_ERROR;
            line = __LINE__;
            goto err_hdlr;
        }
    }

    /**************************************/
    /* all non-root ranks put RTR to root */
    /**************************************/
    if (!i_am_root) {
        ret = PtlPut(request->u.scatter.sync_mdh,
                     0,
                     0,
                     PTL_NO_ACK_REQ,
                     ompi_coll_portals4_get_peer(comm, request->u.scatter.root_rank),
                     mca_coll_portals4_component.pt_idx,
                     request->u.scatter.sync_match_bits,
                     0,
                     NULL,
                     0);
        if (PTL_OK != ret) {
            ret = OMPI_ERROR;
            line = __LINE__;
            goto err_hdlr;
        }
    }

    if (request->u.scatter.is_sync) {
        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                            "calling CTWait(expected_ops=%d)\n", expected_ops);

        /********************************/
        /* Wait for all ops to complete */
        /********************************/
        ret = PtlCTWait(request->u.scatter.scatter_cth, expected_ops, &ct);
        if (PTL_OK != ret) {
            ret = OMPI_ERROR;
            line = __LINE__;
            goto err_hdlr;
        }

        opal_output_verbose(1, ompi_coll_base_framework.framework_output,
                            "completed CTWait(expected_ops=%d)\n", expected_ops);
    }

    OPAL_OUTPUT((ompi_coll_base_framework.framework_output,
                 "coll:portals4:scatter_intra_linear_top exit rank %d", request->u.scatter.my_rank));

    return OMPI_SUCCESS;

err_hdlr:
    if (NULL != request->u.scatter.scatter_buf)
        free(request->u.scatter.scatter_buf);

    opal_output(ompi_coll_base_framework.framework_output,
                "%s:%4d:%4d\tError occurred ret=%d, rank %2d",
                __FILE__, __LINE__, line, ret, request->u.scatter.my_rank);

    return ret;
}
Esempio n. 18
0
int
ompi_osc_portals4_accumulate(const void *origin_addr,
                             int origin_count,
                             struct ompi_datatype_t *origin_dt,
                             int target,
                             OPAL_PTRDIFF_TYPE target_disp,
                             int target_count,
                             struct ompi_datatype_t *target_dt,
                             struct ompi_op_t *op,
                             struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "accumulate: 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Accumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        ptl_size_t md_offset;

        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
        length *= origin_count;
        sent = 0;

        md_offset = (ptl_size_t) origin_addr;

        do {
            size_t msg_length = MIN(module->atomic_max, length - sent);
            (void)opal_atomic_add_64(&module->opcount, 1);

            if (MPI_REPLACE == op) {
                ret = PtlPut(module->md_h,
                             md_offset + sent,
                             msg_length,
                             PTL_ACK_REQ,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             NULL,
                             0);
            } else {
                ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
                if (OMPI_SUCCESS != ret) return ret;

                ret = ompi_osc_portals4_get_op(op, &ptl_op);
                if (OMPI_SUCCESS != ret) return ret;

                ret = PtlAtomic(module->md_h,
                                md_offset + sent,
                                msg_length,
                                PTL_ACK_REQ,
                                peer,
                                module->pt_idx,
                                module->match_bits,
                                offset + sent,
                                NULL,
                                0,
                                ptl_op,
                                ptl_dt);
            }
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            sent += msg_length;
        } while (sent < length);
    }

    return OMPI_SUCCESS;
}
Esempio n. 19
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_handle;
    ptl_process_t   *procs;
    int             rank;
    ptl_pt_index_t  pt_index, signal_pt_index;
    HANDLE_T        value_e_handle, signal_e_handle;
    int             num_procs;
    ptl_handle_eq_t eq_handle;
    ptl_handle_ct_t ct_handle;
    ptl_handle_md_t md_handle;

    CHECK_RETURNVAL(PtlInit());

    CHECK_RETURNVAL(libtest_init());

    rank = libtest_get_rank();
    num_procs = libtest_get_size();
    if (num_procs < 2) {
        fprintf(stderr, "test_flowctl_noeq requires at least two processes\n");
        return 77;
    }

    CHECK_RETURNVAL(PtlNIInit(PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL,
                              PTL_PID_ANY, NULL, NULL, &ni_handle));
    procs = libtest_get_mapping(ni_handle);
    CHECK_RETURNVAL(PtlSetMap(ni_handle, num_procs, procs));


    if (0 == rank) {
        ENTRY_T         value_e;

        /* create data ME */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, (num_procs - 1) * ITERS / 2, &eq_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, PTL_PT_FLOWCTRL, eq_handle, 5,
                                   &pt_index));
        value_e.start = NULL;
        value_e.length = 0;
        value_e.ct_handle = PTL_CT_NONE;
        value_e.uid = PTL_UID_ANY;
        value_e.options = OPTIONS;
#if INTERFACE == 1
        value_e.match_id.rank = PTL_RANK_ANY;
        value_e.match_bits = 0;
        value_e.ignore_bits = 0;
#endif
        CHECK_RETURNVAL(APPEND(ni_handle, 5, &value_e, PTL_PRIORITY_LIST, NULL, &value_e_handle));

        /* create signal ME */
        CHECK_RETURNVAL(PtlCTAlloc(ni_handle, &ct_handle));
        CHECK_RETURNVAL(PtlPTAlloc(ni_handle, 0, PTL_EQ_NONE, 6,
                                   &signal_pt_index));
        value_e.start = NULL;
        value_e.length = 0;
        value_e.ct_handle = ct_handle;
        value_e.uid = PTL_UID_ANY;
        value_e.options = OPTIONS | PTL_LE_EVENT_SUCCESS_DISABLE | PTL_LE_EVENT_CT_COMM;
#if INTERFACE == 1
        value_e.match_id.rank = PTL_RANK_ANY;
        value_e.match_bits = 0;
        value_e.ignore_bits = 0;
#endif
        CHECK_RETURNVAL(APPEND(ni_handle, 6, &value_e, PTL_PRIORITY_LIST, NULL, &signal_e_handle));
    } else {
        ptl_md_t        md;

        /* 16 extra just in case... */
        CHECK_RETURNVAL(PtlEQAlloc(ni_handle, ITERS * 2 + 16, &eq_handle));

        md.start = NULL;
        md.length = 0;
        md.options = 0;
        md.eq_handle = eq_handle;
        md.ct_handle = PTL_CT_NONE;

        CHECK_RETURNVAL(PtlMDBind(ni_handle, &md, &md_handle));
    }

    libtest_barrier();

    if (0 == rank) {
        ptl_ct_event_t  ct;
        ptl_event_t ev;
        int ret, count = 0, saw_dropped = 0, saw_flowctl = 0;

        /* wait for signal counts */
        CHECK_RETURNVAL(PtlCTWait(ct_handle, num_procs - 1, &ct));
        if (ct.success != num_procs - 1 || ct.failure != 0) {
            return 1;
        }

        /* wait for event entries */
        while (count < ITERS * (num_procs - 1)) {
            ret = PtlEQWait(eq_handle, &ev);
            if (PTL_OK == ret) {
                ;
            } else if (PTL_EQ_DROPPED == ret) {
                saw_dropped++;
                if (ev.type == PTL_EVENT_PT_DISABLED){
                    saw_flowctl++;
                    CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index));
                }
                break;
            } else {
                fprintf(stderr, "0: Unexpected return code from EQWait: %d\n", ret);
                return 1;
            }

            if (ev.type == PTL_EVENT_PT_DISABLED) {
                CHECK_RETURNVAL(PtlPTEnable(ni_handle, pt_index));
                saw_flowctl++;
            } else {
                count++;
            }
        }

        fprintf(stderr, "0: Saw %d dropped, %d flowctl\n", saw_dropped, saw_flowctl);
        if (saw_flowctl == 0) {
            return 1;
        }
    } else {
        ptl_process_t target;
        ptl_event_t ev;
        int ret, count = 0, fails = 0;
        int i;
        int *fail_seen;

        fail_seen = malloc(sizeof(int) * ITERS);
        if (NULL == fail_seen) {
             fprintf(stderr, "%d: malloc failed\n", rank);
             return 1;
        }
        memset(fail_seen, 0, sizeof(int) * ITERS);

        target.rank = 0;
        for (i = 0 ; i < ITERS ; ++i) {
            CHECK_RETURNVAL(PtlPut(md_handle,
                                   0,
                                   0,
                                   PTL_ACK_REQ,
                                   target,
                                   5,
                                   0,
                                   0,
                                   (void*)(size_t)i,
                                   0));
            usleep(100);
        }

        while (count < ITERS) {
            ret = PtlEQGet(eq_handle, &ev);
            if (PTL_EQ_EMPTY == ret) {
                continue;
            } else if (PTL_OK != ret) {
                fprintf(stderr, "%d: PtlEQGet returned %d\n", rank, ret);
                return 1;
            }

            if (ev.ni_fail_type == PTL_NI_OK) {
                if (ev.type == PTL_EVENT_SEND) {
                    continue;
                } else if (ev.type == PTL_EVENT_ACK) {
                    count++;
                } else {
                    fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type);
                }
            } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) {
                int iter = (size_t) ev.user_ptr;
                if (fail_seen[iter]++ > 0) {
                    fprintf(stderr, "%d: Double report of PT_DISABLED for "
                            "iteration %d\n", rank, iter);
                    return 1;
                }
                count++;
                fails++;
            } else {
                fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type);
                return 1;
            }
        }

        fprintf(stderr, "%d: Saw %d of %d events as fails\n", rank, fails, count);

        CHECK_RETURNVAL(PtlPut(md_handle,
                               0,
                               0,
                               PTL_NO_ACK_REQ,
                               target,
                               6,
                               0,
                               0,
                               NULL,
                               0));
        /* wait for the send event on the last put */
        CHECK_RETURNVAL(PtlEQWait(eq_handle, &ev));

        while (fails > 0) {
            CHECK_RETURNVAL(PtlPut(md_handle,
                                   0,
                                   0,
                                   PTL_ACK_REQ,
                                   target,
                                   5,
                                   0,
                                   0,
                                   NULL,
                                   0));
            while (1) {
                ret = PtlEQWait(eq_handle, &ev);
                if (PTL_OK != ret) {
                    fprintf(stderr, "%d: PtlEQWait returned %d\n", rank, ret);
                    return 1;
                }

                if (ev.ni_fail_type == PTL_NI_OK) {
                    if (ev.type == PTL_EVENT_SEND) {
                        continue;
                    } else if (ev.type == PTL_EVENT_ACK) {
                        fails--;
                        break;
                    } else {
                        fprintf(stderr, "%d: Unexpected event type %d\n", rank, ev.type);
                    }
                } else if (ev.ni_fail_type == PTL_NI_PT_DISABLED) {
                    break;
                } else {
                    fprintf(stderr, "%d: Unexpected fail type: %d\n", rank, ev.ni_fail_type);
                    return 1;
                }
            }
        }
    }

    libtest_barrier();

    if (0 == rank) {
        CHECK_RETURNVAL(UNLINK(signal_e_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, signal_pt_index));
        CHECK_RETURNVAL(PtlCTFree(ct_handle));
        CHECK_RETURNVAL(UNLINK(value_e_handle));
        CHECK_RETURNVAL(PtlPTFree(ni_handle, pt_index));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    } else {
        CHECK_RETURNVAL(PtlMDRelease(md_handle));
        CHECK_RETURNVAL(PtlEQFree(eq_handle));
    }

    CHECK_RETURNVAL(PtlNIFini(ni_handle));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
Esempio n. 20
0
int
kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
                    unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
                    unsigned int offset, int nob)
{
        kptl_tx_t       *tx;
        ptl_err_t        ptlrc;
        kptl_msg_t      *rxmsg = rx->rx_msg;
        kptl_peer_t     *peer = rx->rx_peer;
        unsigned long    flags;
        ptl_handle_md_t  mdh;

        LASSERT (type == TX_TYPE_PUT_RESPONSE || 
                 type == TX_TYPE_GET_RESPONSE);

        tx = kptllnd_get_idle_tx(type);
        if (tx == NULL) {
                CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n",
                        type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT",
                        libcfs_id2str(peer->peer_id));
                return -ENOMEM;
        }

        kptllnd_set_tx_peer(tx, peer);
        kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob);

        ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, 
                          PTL_UNLINK, &mdh);
        if (ptlrc != PTL_OK) {
                CERROR("PtlMDBind(%s) failed: %s(%d)\n",
                       libcfs_id2str(peer->peer_id),
                       kptllnd_errtype2str(ptlrc), ptlrc);
                tx->tx_status = -EIO;
                kptllnd_tx_decref(tx);
                return -EIO;
        }

        cfs_spin_lock_irqsave(&peer->peer_lock, flags);

        tx->tx_lnet_msg = lntmsg;
        /* lnet_finalize() will be called when tx is torn down, so I must
         * return success from here on... */

        tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * CFS_HZ);
        tx->tx_rdma_mdh = mdh;
        tx->tx_active = 1;
        cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq);

        /* peer has now got my ref on 'tx' */

        cfs_spin_unlock_irqrestore(&peer->peer_lock, flags);

        tx->tx_tposted = jiffies;

        if (type == TX_TYPE_GET_RESPONSE)
                ptlrc = PtlPut(mdh,
                               tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ,
                               rx->rx_initiator,
                               *kptllnd_tunables.kptl_portal,
                               0,                     /* acl cookie */
                               rxmsg->ptlm_u.rdma.kptlrm_matchbits,
                               0,                     /* offset */
                               (lntmsg != NULL) ?     /* header data */
                               PTLLND_RDMA_OK :
                               PTLLND_RDMA_FAIL);
        else
                ptlrc = PtlGet(mdh,
                               rx->rx_initiator,
                               *kptllnd_tunables.kptl_portal,
                               0,                     /* acl cookie */
                               rxmsg->ptlm_u.rdma.kptlrm_matchbits,
                               0);                    /* offset */

        if (ptlrc != PTL_OK) {
                CERROR("Ptl%s failed: %s(%d)\n", 
                       (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get",
                       kptllnd_errtype2str(ptlrc), ptlrc);
                
                kptllnd_peer_close(peer, -EIO);
                /* Everything (including this RDMA) queued on the peer will
                 * be completed with failure */
                kptllnd_schedule_ptltrace_dump();
        }

        return 0;
}
Esempio n. 21
0
/* called when a receive should be progressed */
static int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request =
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    /* as soon as we've seen any event associated with a request, it's
       started */
    ptl_request->req_started = true;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate expected: %ld %ld",
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
            /* If it's not a short message and we're doing rndv, we
               only have the first part of the message.  Issue the get
               to pull the second part of the message. */
            ret = read_msg((char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit,
                           ((msg_length > ptl_request->delivery_len) ?
                            ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit,
                           ev->initiator,
                           ev->hdr_data,
                           ompi_mtl_portals4.eager_limit,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }

        } else {
            /* If we're either using the eager protocol or were a
               short message, all data has been received, so complete
               the message. */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_REPLY:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got reply event",
                             ptl_request->opcount, ptl_request->hdr_data));

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            PtlMDRelease(ptl_request->md_h);
            goto callback_error;
        }

        /* set the received length in the status, now that we know
           excatly how much data was sent. */
        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (ompi_mtl_portals4.protocol == rndv) {
            ptl_request->super.super.ompi_req->req_status._ucount +=
                ompi_mtl_portals4.eager_limit;
        }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
#endif

        /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
        ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                       ptl_request->delivery_ptr,
                                       ptl_request->super.super.ompi_req->req_status._ucount);
        if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                __FILE__, __LINE__, ret);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
        }
        PtlMDRelease(ptl_request->md_h);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) completed, reply",
                             ptl_request->opcount, ptl_request->hdr_data));
        ptl_request->super.super.completion_callback(&ptl_request->super.super);
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                             "Recv %lu (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        ptl_request->me_h = PTL_INVALID_HANDLE;

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG =
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (OPAL_UNLIKELY(msg_length > ptl_request->delivery_len)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "truncate unexpected: %ld %ld %d",
                                msg_length, ptl_request->delivery_len,
                                MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor,
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (OPAL_UNLIKELY(ret < 0)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                     "Recv %lu (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (OPAL_UNLIKELY(PTL_OK != ret)) {
                    opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
                                 "Recv %lu (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {
            if (ev->mlength > 0) {
                /* if rndv or triggered, copy the eager part to the right place */
                memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
            }

            ret = read_msg((char*) ptl_request->delivery_ptr + ev->mlength,
                           ((msg_length > ptl_request->delivery_len) ?
                            ptl_request->delivery_len : msg_length) - ev->mlength,
                           ev->initiator,
                           ev->hdr_data,
                           ev->mlength,
                           ptl_request);
            if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }
        }

        break;

    case PTL_EVENT_LINK:
        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}
Esempio n. 22
0
int main(int   argc,
         char *argv[])
{
    ptl_handle_ni_t ni_logical;
    ptl_pt_index_t  logical_pt_index;
    ptl_process_t   myself;
    struct timeval  start, stop;
    int             potato = 0;
    ENTRY_T         potato_catcher;
    HANDLE_T        potato_catcher_handle;
    ptl_md_t        potato_launcher;
    ptl_handle_md_t potato_launcher_handle;
    int             num_procs;

    CHECK_RETURNVAL(PtlInit());

    CHECK_RETURNVAL(libtest_init());

    num_procs = libtest_get_size();

    if (NULL != getenv("MAKELEVEL") && num_procs > 2) {
        return 77;
    }

    CHECK_RETURNVAL(PtlNIInit
                    (PTL_IFACE_DEFAULT, NI_TYPE | PTL_NI_LOGICAL, PTL_PID_ANY,
                     NULL, NULL, &ni_logical));

    CHECK_RETURNVAL(PtlSetMap(ni_logical, num_procs,
                              libtest_get_mapping(ni_logical)));

    CHECK_RETURNVAL(PtlGetId(ni_logical, &myself));
    CHECK_RETURNVAL(PtlPTAlloc
                    (ni_logical, 0, PTL_EQ_NONE, PTL_PT_ANY,
                     &logical_pt_index));
    assert(logical_pt_index == 0);
    /* Now do the initial setup on ni_logical */
    potato_catcher.start   = &potato;
    potato_catcher.length  = sizeof(potato);
    potato_catcher.uid     = PTL_UID_ANY;
    potato_catcher.options = OPTIONS;
#if INTERFACE == 1
    potato_catcher.match_id.rank = PTL_RANK_ANY;
    potato_catcher.match_bits    = 1;
    potato_catcher.ignore_bits   = ~potato_catcher.match_bits;
#endif
    CHECK_RETURNVAL(PtlCTAlloc(ni_logical, &potato_catcher.ct_handle));
    CHECK_RETURNVAL(APPEND
                    (ni_logical, logical_pt_index, &potato_catcher,
                     PTL_PRIORITY_LIST, NULL, &potato_catcher_handle));
    /* Now do a barrier (on ni_physical) to make sure that everyone has their
     * logical interface set up */
    libtest_barrier();

    /* now I can communicate between ranks with ni_logical */

    /* set up the potato launcher */
    potato_launcher.start   = &potato;
    potato_launcher.length  = sizeof(potato);
    potato_launcher.options =
        PTL_MD_EVENT_CT_ACK | PTL_MD_EVENT_CT_SEND;
    potato_launcher.eq_handle = PTL_EQ_NONE;    // i.e. don't queue send events
    CHECK_RETURNVAL(PtlCTAlloc(ni_logical, &potato_launcher.ct_handle));
    CHECK_RETURNVAL(PtlMDBind
                    (ni_logical, &potato_launcher, &potato_launcher_handle));

    /* rank 0 starts the potato going */
    if (myself.rank == 0) {
        ptl_process_t nextrank;
        nextrank.rank  = myself.rank + 1;
        nextrank.rank *= (nextrank.rank <= num_procs - 1);
        gettimeofday(&start, NULL);
        CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0, potato_launcher.length,
                               (LOOPS == 1) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ,
                               nextrank, logical_pt_index, 1, 0,
                               NULL, 1));
    }

    {   /* the potato-passing loop */
        size_t         waitfor;
        ptl_ct_event_t ctc;
        ptl_process_t  nextrank;
        nextrank.rank  = myself.rank + 1;
        nextrank.rank *= (nextrank.rank <= num_procs - 1);
        for (waitfor = 1; waitfor <= LOOPS; ++waitfor) {
            CHECK_RETURNVAL(PtlCTWait(potato_catcher.ct_handle, waitfor, &ctc));        // wait for potato
            assert(ctc.failure == 0);
            assert(ctc.success == waitfor);
            /* I have the potato! */
            ++potato;
            if (potato < LOOPS * (num_procs)) { // otherwise, the recipient may have exited
                /* Bomb's away! */
                if (myself.rank == 0) {
                    CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0,
                                           potato_launcher.length,
                                           (waitfor == (LOOPS - 1)) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ,
                                           nextrank, logical_pt_index, 3, 0, NULL, 2));
                } else {
                    CHECK_RETURNVAL(PtlPut(potato_launcher_handle, 0,
                                           potato_launcher.length,
                                           (waitfor == LOOPS) ? PTL_OC_ACK_REQ : PTL_NO_ACK_REQ,
                                           nextrank, logical_pt_index, 3, 0, NULL, 2));
                }
            }
        }
        // make sure that last send completed before exiting
        CHECK_RETURNVAL(PtlCTWait(potato_launcher.ct_handle, LOOPS+1, &ctc));
        assert(ctc.failure == 0);
    }
    if (myself.rank == 0) {
        double accumulate = 0.0;
        gettimeofday(&stop, NULL);
        accumulate =
            (stop.tv_sec + stop.tv_usec * 1e-6) - (start.tv_sec +
                    start.tv_usec * 1e-6);
        /* calculate the average time waiting */
        printf("Total time: %g secs\n", accumulate);
        accumulate /= LOOPS;
        printf("Average time around the loop: %g microseconds\n",
               accumulate * 1e6);
        accumulate /= num_procs;
        printf("Average catch-to-toss latency: %g microseconds\n",
               accumulate * 1e6);
    }

    /* cleanup */
    CHECK_RETURNVAL(PtlMDRelease(potato_launcher_handle));
    CHECK_RETURNVAL(PtlCTFree(potato_launcher.ct_handle));
    CHECK_RETURNVAL(UNLINK(potato_catcher_handle));
    CHECK_RETURNVAL(PtlCTFree(potato_catcher.ct_handle));

    /* major cleanup */
    CHECK_RETURNVAL(PtlPTFree(ni_logical, logical_pt_index));
    CHECK_RETURNVAL(PtlNIFini(ni_logical));
    CHECK_RETURNVAL(libtest_fini());
    PtlFini();

    return 0;
}
Esempio n. 23
0
/* called when a receive should be progressed */
int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request = 
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate expected: %ld %ld", 
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
            ptl_md_t md;

            md.start = (char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ompi_mtl_portals4.eager_limit,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                PtlMDRelease(ptl_request->md_h);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }

        } else {
            /* make sure the data is in the right place */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OMPI_SUCCESS != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_REPLY:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got reply event",
                             ptl_request->opcount, ptl_request->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            PtlMDRelease(ptl_request->md_h);
            goto callback_error;
        }
        /* set the status - most of this filled in right after issuing
           the PtlGet */
        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (ompi_mtl_portals4.protocol == rndv) {
            ptl_request->super.super.ompi_req->req_status._ucount +=
                ompi_mtl_portals4.eager_limit;
        }

        /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
        ret = ompi_mtl_datatype_unpack(ptl_request->convertor, 
                                       ptl_request->delivery_ptr, 
                                       ptl_request->super.super.ompi_req->req_status._ucount);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                __FILE__, __LINE__, ret);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
        }
        PtlMDRelease(ptl_request->md_h);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, reply",
                             ptl_request->opcount, ptl_request->hdr_data));
        ptl_request->super.super.completion_callback(&ptl_request->super.super);
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate unexpected: %ld %ld %d", 
                                msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor, 
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (ret < 0) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (PTL_OK != ret) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {
            ptl_md_t md;

            if (ev->mlength > 0) {
                /* if rndv or triggered, copy the eager part to the right place */
                memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
            }

            md.start = (char*) ptl_request->delivery_ptr + ev->mlength;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ev->mlength;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) getting long data",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ev->mlength,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                PtlMDRelease(ptl_request->md_h);
                goto callback_error;
            }
        }

        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = 
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}