Ejemplo n.º 1
0
static int
read_msg(void *start, ptl_size_t length, ptl_process_t target,
         ptl_match_bits_t match_bits, ptl_size_t remote_offset,
         ompi_mtl_portals4_recv_request_t *request)
{
    int ret;

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        ompi_mtl_portals4_progress();
    }
#endif

    ret = PtlGet(ompi_mtl_portals4.send_md_h,
                 (ptl_size_t) start,
                 length,
                 target,
                 ompi_mtl_portals4.read_idx,
                 match_bits,
                 remote_offset,
                 request);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlGet failed: %d",
                            __FILE__, __LINE__, ret);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 2
0
int
ompi_osc_portals4_get(void *origin_addr,
                      int origin_count,
                      struct ompi_datatype_t *origin_dt,
                      int target,
                      OPAL_PTRDIFF_TYPE target_disp,
                      int target_count,
                      struct ompi_datatype_t *target_dt,
                      struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_handle_md_t md_h;
    void *md_base;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "get: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Get: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
        length *= origin_count;
        ompi_osc_portals4_get_md(origin_addr, module->md_h, &md_h, &md_base);
        ret = PtlGet(md_h,
                     (ptl_size_t) ((char*) origin_addr - (char*) md_base),
                     length,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     NULL);
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 3
0
static int
read_msg(void *start, ptl_size_t length, ptl_process_t target,
         ptl_match_bits_t match_bits, ptl_size_t remote_offset,
         ompi_mtl_portals4_recv_request_t *request)
{
    ptl_md_t md;
    int ret;

    /* FIX ME: This needs to be on the send eq... */
    md.start = start;
    md.length = length;
    md.options = 0;
    md.eq_handle = ompi_mtl_portals4.send_eq_h;
    md.ct_handle = PTL_CT_NONE;

    ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                    &md,
                    &request->md_h);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d",
                            __FILE__, __LINE__, ret);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        ompi_mtl_portals4_progress();
    }
#endif

    ret = PtlGet(request->md_h,
                 0,
                 md.length,
                 target,
                 ompi_mtl_portals4.read_idx,
                 match_bits,
                 remote_offset,
                 request);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PtlGet failed: %d",
                            __FILE__, __LINE__, ret);
        PtlMDRelease(request->md_h);
        return OMPI_ERR_OUT_OF_RESOURCE;
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 4
0
static int
read_msg(void *start, ptl_size_t length, ptl_process_t target,
         ptl_match_bits_t match_bits, ptl_size_t remote_offset,
         ompi_mtl_portals4_recv_request_t *request)
{
    int ret, i;
    ptl_size_t rest = length, asked = 0;
    int32_t frag_count;

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    while (OPAL_UNLIKELY(OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, -1) < 0)) {
        OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
        ompi_mtl_portals4_progress();
    }
#endif

    frag_count = (length + ompi_mtl_portals4.max_msg_size_mtl - 1) / ompi_mtl_portals4.max_msg_size_mtl;
    ret = OPAL_THREAD_ADD32(&(request->pending_reply), frag_count);

    for (i = 0 ; i < frag_count ; i++) {
        opal_free_list_item_t *tmp;
        ompi_mtl_portals4_rndv_get_frag_t* frag;

        tmp = opal_free_list_get (&ompi_mtl_portals4.fl_rndv_get_frag);
        if (NULL == tmp) return OMPI_ERR_OUT_OF_RESOURCE;

        frag = (ompi_mtl_portals4_rndv_get_frag_t*) tmp;

        frag->request = request;
#if OPAL_ENABLE_DEBUG
        frag->frag_num = i;
#endif
        frag->frag_start = (char*)start + i * ompi_mtl_portals4.max_msg_size_mtl;
        frag->frag_length = (OPAL_UNLIKELY(rest > ompi_mtl_portals4.max_msg_size_mtl)) ? ompi_mtl_portals4.max_msg_size_mtl : rest;
        frag->frag_target = target;
        frag->frag_match_bits = match_bits;
        frag->frag_remote_offset = remote_offset + i * ompi_mtl_portals4.max_msg_size_mtl;

        frag->event_callback = ompi_mtl_portals4_rndv_get_frag_progress;
        frag->frag_abs_timeout_usec = 0;

        OPAL_OUTPUT_VERBOSE((90, ompi_mtl_base_framework.framework_output, "GET (fragment %d/%d, size %ld) send",
                             i + 1, frag_count, frag->frag_length));

        ret = PtlGet(ompi_mtl_portals4.send_md_h,
                     (ptl_size_t) frag->frag_start,
                     frag->frag_length,
                     frag->frag_target,
                     ompi_mtl_portals4.read_idx,
                     frag->frag_match_bits,
                     frag->frag_remote_offset,
                     frag);
        if (OPAL_UNLIKELY(PTL_OK != ret)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "%s:%d: PtlGet failed: %d",
                                __FILE__, __LINE__, ret);
            return OMPI_ERR_OUT_OF_RESOURCE;
        }
        rest -= frag->frag_length;
        asked += frag->frag_length;
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 5
0
static int
ompi_mtl_portals4_rndv_get_frag_progress(ptl_event_t *ev,
                                         ompi_mtl_portals4_rndv_get_frag_t* rndv_get_frag)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request =
        (ompi_mtl_portals4_recv_request_t*) rndv_get_frag->request;

    assert(PTL_EVENT_REPLY == ev->type);

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
        "Recv %lu (0x%lx) got reply event",
        ptl_request->opcount, ptl_request->hdr_data));


    if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_OK)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                            __FILE__, __LINE__, ev->ni_fail_type);

        if (OPAL_UNLIKELY(ev->ni_fail_type != PTL_NI_DROPPED)) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "PTL_EVENT_REPLY with ni_fail_type: %u => cannot retry",
                                (uint32_t)ev->ni_fail_type);
            ret = PTL_FAIL;
            goto callback_error;
        }

        if (0 == rndv_get_frag->frag_abs_timeout_usec) {
            /* this is the first retry of the frag.  start the timer. */
            /* instead of recording the start time, record the end time
             * and avoid addition on each retry. */
            rndv_get_frag->frag_abs_timeout_usec = opal_timer_base_get_usec() + ompi_mtl_portals4.get_retransmit_timeout;
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "setting frag timeout at %lu",
                                rndv_get_frag->frag_abs_timeout_usec);
        } else if (opal_timer_base_get_usec() >= rndv_get_frag->frag_abs_timeout_usec) {
            opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                                "timeout retrying GET");
            ret = PTL_FAIL;
            goto callback_error;
        }

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
            "Rendezvous Get Failed: Reissuing frag #%u", rndv_get_frag->frag_num));

        ret = PtlGet(ompi_mtl_portals4.send_md_h,
                     (ptl_size_t) rndv_get_frag->frag_start,
                     rndv_get_frag->frag_length,
                     rndv_get_frag->frag_target,
                     ompi_mtl_portals4.read_idx,
                     rndv_get_frag->frag_match_bits,
                     rndv_get_frag->frag_remote_offset,
                     rndv_get_frag);
        if (OPAL_UNLIKELY(PTL_OK != ret)) {
            if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
            goto callback_error;
        }
        return OMPI_SUCCESS;
    }

    /* set the received length in the status, now that we know
           exactly how much data was sent. */
    ptl_request->super.super.ompi_req->req_status._ucount += ev->mlength;

    /* this frag is complete.  return to freelist. */
    opal_free_list_return (&ompi_mtl_portals4.fl_rndv_get_frag,
                           &rndv_get_frag->super);

    ret = OPAL_THREAD_ADD32(&(ptl_request->pending_reply), -1);
    if (ret > 0) {
        return OMPI_SUCCESS;
    }
    assert(ptl_request->pending_reply == 0);

#if OMPI_MTL_PORTALS4_FLOW_CONTROL
    OPAL_THREAD_ADD32(&ompi_mtl_portals4.flowctl.send_slots, 1);
#endif

    /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
    ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                   ptl_request->delivery_ptr,
                                   ptl_request->super.super.ompi_req->req_status._ucount);
    if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
        opal_output_verbose(1, ompi_mtl_base_framework.framework_output,
                            "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                            __FILE__, __LINE__, ret);
        ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
    }

    OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_framework.framework_output,
        "Recv %lu (0x%lx) completed , reply (pending_reply: %d)",
        ptl_request->opcount, ptl_request->hdr_data, ptl_request->pending_reply));
    ptl_request->super.super.completion_callback(&ptl_request->super.super);

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR =
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}
Ejemplo n.º 6
0
int
kptllnd_active_rdma(kptl_rx_t *rx, lnet_msg_t *lntmsg, int type,
                    unsigned int niov, struct iovec *iov, lnet_kiov_t *kiov,
                    unsigned int offset, int nob)
{
        kptl_tx_t       *tx;
        ptl_err_t        ptlrc;
        kptl_msg_t      *rxmsg = rx->rx_msg;
        kptl_peer_t     *peer = rx->rx_peer;
        unsigned long    flags;
        ptl_handle_md_t  mdh;

        LASSERT (type == TX_TYPE_PUT_RESPONSE || 
                 type == TX_TYPE_GET_RESPONSE);

        tx = kptllnd_get_idle_tx(type);
        if (tx == NULL) {
                CERROR ("Can't do %s rdma to %s: can't allocate descriptor\n",
                        type == TX_TYPE_PUT_RESPONSE ? "GET" : "PUT",
                        libcfs_id2str(peer->peer_id));
                return -ENOMEM;
        }

        kptllnd_set_tx_peer(tx, peer);
        kptllnd_init_rdma_md(tx, niov, iov, kiov, offset, nob);

        ptlrc = PtlMDBind(kptllnd_data.kptl_nih, tx->tx_rdma_md, 
                          PTL_UNLINK, &mdh);
        if (ptlrc != PTL_OK) {
                CERROR("PtlMDBind(%s) failed: %s(%d)\n",
                       libcfs_id2str(peer->peer_id),
                       kptllnd_errtype2str(ptlrc), ptlrc);
                tx->tx_status = -EIO;
                kptllnd_tx_decref(tx);
                return -EIO;
        }

        cfs_spin_lock_irqsave(&peer->peer_lock, flags);

        tx->tx_lnet_msg = lntmsg;
        /* lnet_finalize() will be called when tx is torn down, so I must
         * return success from here on... */

        tx->tx_deadline = jiffies + (*kptllnd_tunables.kptl_timeout * CFS_HZ);
        tx->tx_rdma_mdh = mdh;
        tx->tx_active = 1;
        cfs_list_add_tail(&tx->tx_list, &peer->peer_activeq);

        /* peer has now got my ref on 'tx' */

        cfs_spin_unlock_irqrestore(&peer->peer_lock, flags);

        tx->tx_tposted = jiffies;

        if (type == TX_TYPE_GET_RESPONSE)
                ptlrc = PtlPut(mdh,
                               tx->tx_acked ? PTL_ACK_REQ : PTL_NOACK_REQ,
                               rx->rx_initiator,
                               *kptllnd_tunables.kptl_portal,
                               0,                     /* acl cookie */
                               rxmsg->ptlm_u.rdma.kptlrm_matchbits,
                               0,                     /* offset */
                               (lntmsg != NULL) ?     /* header data */
                               PTLLND_RDMA_OK :
                               PTLLND_RDMA_FAIL);
        else
                ptlrc = PtlGet(mdh,
                               rx->rx_initiator,
                               *kptllnd_tunables.kptl_portal,
                               0,                     /* acl cookie */
                               rxmsg->ptlm_u.rdma.kptlrm_matchbits,
                               0);                    /* offset */

        if (ptlrc != PTL_OK) {
                CERROR("Ptl%s failed: %s(%d)\n", 
                       (type == TX_TYPE_GET_RESPONSE) ? "Put" : "Get",
                       kptllnd_errtype2str(ptlrc), ptlrc);
                
                kptllnd_peer_close(peer, -EIO);
                /* Everything (including this RDMA) queued on the peer will
                 * be completed with failure */
                kptllnd_schedule_ptltrace_dump();
        }

        return 0;
}
Ejemplo n.º 7
0
int
ompi_osc_portals4_get_accumulate(const void *origin_addr,
                                 int origin_count,
                                 struct ompi_datatype_t *origin_dt,
                                 void *result_addr,
                                 int result_count,
                                 struct ompi_datatype_t *result_dt,
                                 int target,
                                 MPI_Aint target_disp,
                                 int target_count,
                                 struct ompi_datatype_t *target_dt,
                                 struct ompi_op_t *op,
                                 struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, (unsigned long) result_addr,
                         result_count, result_dt->name,
                         target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    offset = get_displacement(module, target) * target_disp;

    /* we don't support non-contiguous buffers.  but if the count is 0, we don't care if buffer is non-contiguous. */
    if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
        (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count)) ||
        (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count))) {
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Get_accumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        sent = 0;
        if (MPI_REPLACE == op) {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);

                ret = PtlSwap(module->md_h,
                              result_md_offset + sent,
                              module->md_h,
                              origin_md_offset + sent,
                              msg_length,
                              peer,
                              module->pt_idx,
                              module->match_bits,
                              offset + sent,
                              NULL,
                              0,
                              NULL,
                              PTL_SWAP,
                              ptl_dt);
                sent += msg_length;
            } while (sent < length);
        } else if (MPI_NO_OP == op) {
            ptl_size_t md_offset;

            ret = ompi_datatype_type_size(target_dt, &length);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            length *= target_count;

            md_offset = (ptl_size_t) result_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);

                ret = PtlGet(module->md_h,
                             md_offset + sent,
                             msg_length,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             NULL);
                sent += msg_length;
            } while (sent < length);
        } else {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) return ret;

            ret = ompi_osc_portals4_get_op(op, &ptl_op);
            if (OMPI_SUCCESS != ret) return ret;


            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);

                ret = PtlFetchAtomic(module->md_h,
                                     result_md_offset + sent,
                                     module->md_h,
                                     origin_md_offset + sent,
                                     msg_length,
                                     peer,
                                     module->pt_idx,
                                     module->match_bits,
                                     offset + sent,
                                     NULL,
                                     0,
                                     ptl_op,
                                     ptl_dt);
                sent += msg_length;
            } while (sent < length);
        }
        if (OMPI_SUCCESS != ret) {
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 8
0
int
ompi_osc_portals4_rget_accumulate(const void *origin_addr,
                                  int origin_count,
                                  struct ompi_datatype_t *origin_dt,
                                  void *result_addr,
                                  int result_count,
                                  struct ompi_datatype_t *result_dt,
                                  int target,
                                  MPI_Aint target_disp,
                                  int target_count,
                                  struct ompi_datatype_t *target_dt,
                                  struct ompi_op_t *op,
                                  struct ompi_win_t *win,
                                  struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length, sent;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %d, %d, %s, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, (unsigned long) result_addr,
                         result_count, result_dt->name,
                         target, (int) target_disp,
                         target_count, target_dt->name,
                         op->o_name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Rget_accumulate: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        sent = 0;

        if (MPI_REPLACE == op) {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);
                request->ops_expected++;

                ret = PtlSwap(module->req_md_h,
                              result_md_offset + sent,
                              module->md_h,
                              origin_md_offset + sent,
                              msg_length,
                              peer,
                              module->pt_idx,
                              module->match_bits,
                              offset + sent,
                              request,
                              0,
                              NULL,
                              PTL_SWAP,
                              ptl_dt);
                sent += msg_length;
            } while (sent < length);
        } else if (MPI_NO_OP == op) {
            ptl_size_t md_offset;

            ret = ompi_datatype_type_size(target_dt, &length);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            length *= target_count;

            md_offset = (ptl_size_t) result_addr;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);
                request->ops_expected++;

                ret = PtlGet(module->req_md_h,
                             md_offset + sent,
                             msg_length,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset + sent,
                             request);
                sent += msg_length;
            } while (sent < length);
        } else {
            ptl_size_t result_md_offset, origin_md_offset;

            ret = ompi_datatype_type_size(origin_dt, &length);
            if (OMPI_SUCCESS != ret) {
                OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
                return ret;
            }
            length *= origin_count;

            result_md_offset = (ptl_size_t) result_addr;
            origin_md_offset = (ptl_size_t) origin_addr;

            ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
            if (OMPI_SUCCESS != ret) return ret;

            ret = ompi_osc_portals4_get_op(op, &ptl_op);
            if (OMPI_SUCCESS != ret) return ret;

            do {
                size_t msg_length = MIN(module->fetch_atomic_max, length - sent);

                (void)opal_atomic_add_64(&module->opcount, 1);
                request->ops_expected++;

                ret = PtlFetchAtomic(module->req_md_h,
                                     result_md_offset + sent,
                                     module->md_h,
                                     origin_md_offset + sent,
                                     msg_length,
                                     peer,
                                     module->pt_idx,
                                     module->match_bits,
                                     offset + sent,
                                     request,
                                     0,
                                     ptl_op,
                                     ptl_dt);
                sent += msg_length;
            } while (sent < length);
        }
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 9
0
int
ompi_osc_portals4_rget(void *origin_addr,
                       int origin_count,
                       struct ompi_datatype_t *origin_dt,
                       int target,
                       OPAL_PTRDIFF_TYPE target_disp,
                       int target_count,
                       struct ompi_datatype_t *target_dt,
                       struct ompi_win_t *win,
                       struct ompi_request_t **ompi_req)
{
    int ret;
    ompi_osc_portals4_request_t *request;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "rget: 0x%lx, %d, %s, %d, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr, origin_count,
                         origin_dt->name, target, (int) target_disp,
                         target_count, target_dt->name,
                         (unsigned long) win));

    OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
    if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
    *ompi_req = &request->super;

    offset = get_displacement(module, target) * target_disp;

    if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count) ||
        !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
        OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
        opal_output(ompi_osc_base_framework.framework_output,
                    "MPI_Rget: transfer of non-contiguous memory is not currently supported.\n");
        return OMPI_ERR_NOT_SUPPORTED;
    } else {
        (void)opal_atomic_add_64(&module->opcount, 1);
        request->ops_expected = 1;
        ret = ompi_datatype_type_size(origin_dt, &length);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
        length *= origin_count;
        ret = PtlGet(module->req_md_h,
                     (ptl_size_t) origin_addr,
                     length,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     request);
        if (OMPI_SUCCESS != ret) {
            OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
            return ret;
        }
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 10
0
int
ompi_osc_portals4_fetch_and_op(const void *origin_addr,
                               void *result_addr,
                               struct ompi_datatype_t *dt,
                               int target,
                               OPAL_PTRDIFF_TYPE target_disp,
                               struct ompi_op_t *op,
                               struct ompi_win_t *win)
{
    int ret;
    ompi_osc_portals4_module_t *module =
        (ompi_osc_portals4_module_t*) win->w_osc_module;
    ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
    size_t length;
    size_t offset;
    ptl_op_t ptl_op;
    ptl_datatype_t ptl_dt;

    OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
                         "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %d, %s, 0x%lx",
                         (unsigned long) origin_addr,
                         (unsigned long) result_addr,
                         dt->name, target, (int) target_disp,
                         op->o_name,
                         (unsigned long) win));

    ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
    if (OMPI_SUCCESS != ret) return ret;

    offset = get_displacement(module, target) * target_disp;

    ret = ompi_datatype_type_size(dt, &length);
    if (OMPI_SUCCESS != ret) return ret;

    assert(length <= module->fetch_atomic_max);

    (void)opal_atomic_add_64(&module->opcount, 1);

    if (MPI_REPLACE == op) {
        ptl_size_t result_md_offset, origin_md_offset;

        result_md_offset = (ptl_size_t) result_addr;
        origin_md_offset = (ptl_size_t) origin_addr;

        ret = PtlSwap(module->md_h,
                      result_md_offset,
                      module->md_h,
                      origin_md_offset,
                      length,
                      peer,
                      module->pt_idx,
                      module->match_bits,
                      offset,
                      NULL,
                      0,
                      NULL,
                      PTL_SWAP,
                      ptl_dt);
    } else if (MPI_NO_OP == op) {
        ptl_size_t md_offset;

        md_offset = (ptl_size_t) result_addr;

        ret = PtlGet(module->md_h,
                     md_offset,
                     length,
                     peer,
                     module->pt_idx,
                     module->match_bits,
                     offset,
                     NULL);
    } else {
        ptl_size_t result_md_offset, origin_md_offset;

        ret = ompi_osc_portals4_get_op(op, &ptl_op);
        if (OMPI_SUCCESS != ret) return ret;

        result_md_offset = (ptl_size_t) result_addr;
        origin_md_offset = (ptl_size_t) origin_addr;

        ret = PtlFetchAtomic(module->md_h,
                             result_md_offset,
                             module->md_h,
                             origin_md_offset,
                             length,
                             peer,
                             module->pt_idx,
                             module->match_bits,
                             offset,
                             NULL,
                             0,
                             ptl_op,
                             ptl_dt);
    }
    if (OMPI_SUCCESS != ret) {
        return ret;
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 11
0
int
mca_btl_portals4_component_progress(void)
{
    mca_btl_portals4_module_t *portals4_btl;
    int num_progressed = 0;
    int ret, btl_ownership;
    mca_btl_portals4_frag_t *frag = NULL;
    mca_btl_base_tag_t tag;
    static ptl_event_t ev;
    unsigned int which;
    mca_btl_active_message_callback_t* reg;
    mca_btl_base_segment_t seg[2];
    mca_btl_base_descriptor_t btl_base_descriptor;

    while (true) {
        ret = PtlEQPoll(mca_btl_portals4_component.eqs_h, mca_btl_portals4_component.num_btls, 0, &ev, &which);

        if (PTL_OK == ret) {
            OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlEQPoll Event received: %d (fail=%d) on NI %d\n",
                ev.type, ev.ni_fail_type, which));
            num_progressed++;
            portals4_btl = mca_btl_portals4_component.btls[which];

            switch (ev.type) {

            case PTL_EVENT_SEND:   /* generated on source (origin) when put stops sending */

                frag = ev.user_ptr;
                if (NULL == frag) {
                    opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_SEND event with NULL user_ptr");
                    break;
                }
                btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

                if (!mca_btl_portals4_component.portals_need_ack) {
                    /* my part's done, in portals we trust! */
                    if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ){
                        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                            "PTL_EVENT_SEND: Direct call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
                        frag->base.des_cbfunc(&portals4_btl->super,
                                              frag->endpoint,
                                              &frag->base,
                                              OPAL_SUCCESS);
                    }
                    if (btl_ownership) {
                        mca_btl_portals4_free(&portals4_btl->super, &frag->base);
                    }
                    if (0 != frag->size) {
                        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
                        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                            "PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
                            portals4_btl->portals_outstanding_ops));
                    }
                }

                goto done;
                break;

            case PTL_EVENT_ACK:   /* Ack that a put as completed on other side. We just call the callback function */

                frag = ev.user_ptr;
                if (NULL == frag) {
                    opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_ACK event with NULL user_ptr");
                    break;
                }
                OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_ACK received rlength=%ld mlength=%ld des_flags=%d\n", ev.rlength, ev.mlength, frag->base.des_flags));
                btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);

                /* other side received the message.  should have
                   received entire thing */
                /* let the PML know we're done */
                if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_ACK: Call to des_cbfunc %lx\n", (uint64_t)frag->base.des_cbfunc));
                    frag->base.des_cbfunc(&portals4_btl->super,
                                          frag->endpoint,
                                          &frag->base,
                                          OPAL_SUCCESS);
                }
                if (btl_ownership) {
                    mca_btl_portals4_free(&portals4_btl->super, &frag->base);
                }

                if (0 != frag->size) {
                    OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
                }

                goto done;
                break;

            case PTL_EVENT_PUT:   /* Generated on destination (target) when a put into memory ends */

                tag = (unsigned char) (ev.hdr_data);

                btl_base_descriptor.des_segments = seg;
                btl_base_descriptor.des_segment_count = 1;
                seg[0].seg_addr.pval = ev.start;
                seg[0].seg_len = ev.mlength;

                reg = mca_btl_base_active_message_trigger + tag;
                OPAL_OUTPUT_VERBOSE((50, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_PUT: tag=%x base_descriptor=%p cbfunc: %lx\n", tag, (void*)&btl_base_descriptor, (uint64_t)reg->cbfunc));
                reg->cbfunc(&portals4_btl->super, tag, &btl_base_descriptor, reg->cbdata);

                goto done;
                break;

            case PTL_EVENT_PUT_OVERFLOW:
                /* */
                OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_OVERFLOW received\n"));
                goto done;
                break;

            case PTL_EVENT_LINK:
                /* */
                frag = ev.user_ptr;
                if (NULL == frag) {
                    opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_LINK event with NULL user_ptr");
                    break;
                }
                goto done;
                break;

            case PTL_EVENT_AUTO_UNLINK:
                /* */
                /* The Priority List is used, so PTL_EVENT_AUTO_FREE will never be received. So, we have to reactivate the block here */
                mca_btl_portals4_activate_block(ev.user_ptr);
                goto done;
                break;

            case PTL_EVENT_AUTO_FREE:
                /* */
                goto done;
                break;

            case PTL_EVENT_GET:   /* Generated on source (target) when a get from memory ends */
                /* */
                OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                    "PTL_EVENT_GET received at target rlength=%ld mlength=%ld\n", ev.rlength, ev.mlength));
                goto done;
                break;

            case PTL_EVENT_REPLY:
                /* */
                frag = ev.user_ptr;

                if (PTL_NI_PERM_VIOLATION == ev.ni_fail_type) {
                        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "Warning : PTL_EVENT_REPLY with PTL_NI_PERM_VIOLATION received, try to re-issue a PtlGet");

                    /* The distant PtlMEAppend is not finished (distant PTL_EVENT_LINK not received) */
                    /* Re-issue the PtlGet (see btl_portals4_rdma.c) */
                    ret = PtlGet(frag->md_h,
                                 0,
                                 frag->length,
                                 frag->peer_proc,
                                 portals4_btl->recv_idx,
                                 frag->match_bits, /* match bits */
                                 0,
                                 frag);
                    if (OPAL_UNLIKELY(PTL_OK != ret)) {
                        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                                            "%s:%d: Re-issued PtlGet failed: %d",
                                            __FILE__, __LINE__, ret);
                        PtlMDRelease(frag->md_h);
                        frag->md_h = PTL_INVALID_HANDLE;
                        return OPAL_ERROR;
                    }

                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "Re-issued PtlGet length=%ld recv_idx=%d rank=%x pid=%x nid=%x match_bits=%lx\n",
                        frag->length, portals4_btl->recv_idx,
                        frag->peer_proc.rank, frag->peer_proc.phys.pid, frag->peer_proc.phys.nid, frag->match_bits));
                }
                else {
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_REPLY: Call to rdma_cbfunc=%lx\n", (uint64_t)frag->rdma_cb.func));
                    frag->rdma_cb.func(&portals4_btl->super,
                                 frag->endpoint,
                                 ev.start,
                                 frag->rdma_cb.local_handle,
                                 frag->rdma_cb.context,
                                 frag->rdma_cb.data,
                                 OPAL_SUCCESS);
                    PtlMDRelease(frag->md_h);
                    frag->md_h = PTL_INVALID_HANDLE;

                    OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
                    OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1); 
                    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
                        "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
                    goto done;
                }
                break;

            default:
                /* */
                goto done;
                break;
            }
        } else if (PTL_EQ_EMPTY == ret) {
            /* there's nothing in the queue.  This is actually the
               common case, so the easiest way to make the compiler
               emit something that doesn't completely blow here is to
               just go back to a good old goto */
            goto done;
            break;

        } else if (PTL_EQ_DROPPED == ret) {
            opal_output(opal_btl_base_framework.framework_output,
                        "Flow control situation without recovery (EQ_DROPPED)");
            break;
        } else {
            opal_output(opal_btl_base_framework.framework_output,
                        "Error returned from PtlEQPoll: %d", ret);
            break;
        }
    }
 done:
    return num_progressed;
}
Ejemplo n.º 12
0
Archivo: rptl.c Proyecto: zhanglt/mpich
static int poke_progress(void)
{
    int ret = PTL_OK;
    struct rptl_target *target;
    struct rptl_op *op;
    struct rptl *rptl;
    int i;
    int mpi_errno = MPI_SUCCESS;
    ptl_process_t id;
    ptl_pt_index_t data_pt, control_pt;
    MPIDI_STATE_DECL(MPID_STATE_POKE_PROGRESS);

    MPIDI_FUNC_ENTER(MPID_STATE_POKE_PROGRESS);

    /* make progress on local RPTLs */
    for (rptl = rptl_info.rptl_list; rptl; rptl = rptl->next) {
        /* if the local state is active, there's nothing to do */
        if (rptl->local_state == RPTL_LOCAL_STATE_ACTIVE)
            continue;

        /* if we are in a local AWAITING PAUSE ACKS state, see if we
         * can send out the unpause message */
        if (rptl->local_state == RPTL_LOCAL_STATE_AWAITING_PAUSE_ACKS &&
            rptl->pause_ack_counter == rptl_info.world_size - 1) {
            /* if we are over the max count limit, do not send an
             * unpause message yet */
            if (rptl->data.ob_curr_count > rptl->data.ob_max_count)
                continue;

            ret = PtlPTEnable(rptl->ni, rptl->data.pt);
            RPTLU_ERR_POP(ret, "Error returned while reenabling PT\n");

            rptl->local_state = RPTL_LOCAL_STATE_ACTIVE;

            for (i = 0; i < rptl_info.world_size; i++) {
                if (i == MPIDI_Process.my_pg_rank)
                    continue;
                mpi_errno = rptl_info.get_target_info(i, &id, rptl->data.pt, &data_pt, &control_pt);
                if (mpi_errno) {
                    ret = PTL_FAIL;
                    RPTLU_ERR_POP(ret, "Error getting target info\n");
                }

                /* make sure the user setup a control portal */
                assert(control_pt != PTL_PT_ANY);

                ret = rptl_put(rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt,
                               0, 0, NULL, RPTL_CONTROL_MSG_UNPAUSE, RPTL_PT_CONTROL);
                RPTLU_ERR_POP(ret, "Error sending unpause message\n");
            }
        }
    }

    /* make progress on targets */
    for (target = rptl_info.target_list; target; target = target->next) {
        if (target->state == RPTL_TARGET_STATE_RECEIVED_PAUSE) {
            for (op = target->data_op_list; op; op = op->next)
                if (op->state == RPTL_OP_STATE_ISSUED)
                    break;
            if (op)
                continue;

            /* send a pause ack message */
            assert(target->rptl);
            for (i = 0; i < rptl_info.world_size; i++) {
                if (i == MPIDI_Process.my_pg_rank)
                    continue;
                /* find the target that has this target id and get the
                 * control portal information for it */
                mpi_errno = rptl_info.get_target_info(i, &id, target->rptl->data.pt, &data_pt, &control_pt);
                if (mpi_errno) {
                    ret = PTL_FAIL;
                    RPTLU_ERR_POP(ret, "Error getting target info\n");
                }
                if (IDS_ARE_EQUAL(id, target->id))
                    break;
            }

            /* make sure the user setup a control portal */
            assert(control_pt != PTL_PT_ANY);

            target->state = RPTL_TARGET_STATE_PAUSE_ACKED;

            ret = rptl_put(target->rptl->md, 0, 0, PTL_NO_ACK_REQ, id, control_pt, 0,
                           0, NULL, RPTL_CONTROL_MSG_PAUSE_ACK, RPTL_PT_CONTROL);
            RPTLU_ERR_POP(ret, "Error sending pause ack message\n");

            continue;
        }

        /* issue out all the control messages first */
        for (op = target->control_op_list; op; op = op->next) {
            assert(op->op_type == RPTL_OP_PUT);

            /* skip all the issued ops */
            if (op->state == RPTL_OP_STATE_ISSUED)
                continue;

            /* we should not get any NACKs on the control portal */
            assert(op->state != RPTL_OP_STATE_NACKED);

            if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                /* too few origin events left.  we can't issue this op
                 * or any following op to this target in order to
                 * maintain ordering */
                break;
            }

            rptl_info.origin_events_left -= 2;
            target->issued_data_ops++;

            /* force request for an ACK even if the user didn't ask
             * for it.  replace the user pointer with the OP id. */
            ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length,
                         PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index,
                         op->u.put.match_bits, op->u.put.remote_offset, op,
                         op->u.put.hdr_data);
            RPTLU_ERR_POP(ret, "Error issuing PUT\n");

            op->state = RPTL_OP_STATE_ISSUED;
        }

        if (target->state == RPTL_TARGET_STATE_DISABLED || target->state == RPTL_TARGET_STATE_PAUSE_ACKED)
            continue;

        /* then issue out all the data messages */
        for (op = target->data_op_list; op; op = op->next) {
            if (op->op_type == RPTL_OP_PUT) {
                /* skip all the issued ops */
                if (op->state == RPTL_OP_STATE_ISSUED)
                    continue;

                /* if an op has been nacked, don't issue anything else
                 * to this target */
                if (op->state == RPTL_OP_STATE_NACKED)
                    break;

                if (rptl_info.origin_events_left < 2 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                    /* too few origin events left.  we can't issue
                     * this op or any following op to this target in
                     * order to maintain ordering */
                    break;
                }

                rptl_info.origin_events_left -= 2;
                target->issued_data_ops++;

                /* force request for an ACK even if the user didn't
                 * ask for it.  replace the user pointer with the OP
                 * id. */
                ret = PtlPut(op->u.put.md_handle, op->u.put.local_offset, op->u.put.length,
                             PTL_ACK_REQ, op->u.put.target_id, op->u.put.pt_index,
                             op->u.put.match_bits, op->u.put.remote_offset, op,
                             op->u.put.hdr_data);
                RPTLU_ERR_POP(ret, "Error issuing PUT\n");
            }
            else if (op->op_type == RPTL_OP_GET) {
                /* skip all the issued ops */
                if (op->state == RPTL_OP_STATE_ISSUED)
                    continue;

                /* if an op has been nacked, don't issue anything else
                 * to this target */
                if (op->state == RPTL_OP_STATE_NACKED)
                    break;

                if (rptl_info.origin_events_left < 1 || target->issued_data_ops > PER_TARGET_THRESHOLD) {
                    /* too few origin events left.  we can't issue
                     * this op or any following op to this target in
                     * order to maintain ordering */
                    break;
                }

                rptl_info.origin_events_left--;
                target->issued_data_ops++;

                ret = PtlGet(op->u.get.md_handle, op->u.get.local_offset, op->u.get.length,
                             op->u.get.target_id, op->u.get.pt_index, op->u.get.match_bits,
                             op->u.get.remote_offset, op);
                RPTLU_ERR_POP(ret, "Error issuing GET\n");
            }

            op->state = RPTL_OP_STATE_ISSUED;
        }
    }

  fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_POKE_PROGRESS);
    return ret;

  fn_fail:
    goto fn_exit;
}
Ejemplo n.º 13
0
static int
ompi_mtl_portals_get_data(ompi_mtl_portals_event_t *recv_event, 
                          struct ompi_convertor_t *convertor,
                          ompi_mtl_portals_request_t  *ptl_request)
{
    int ret;
    ptl_md_t md;
    ptl_handle_md_t md_h;
    size_t buflen;
    
    if (PTL_IS_SHORT_MSG(recv_event->ev.match_bits)) {
        /* the buffer is sitting in the short message queue */

        struct iovec iov;
        uint32_t iov_count = 1;
        size_t max_data;

        ompi_mtl_portals_recv_short_block_t *block = 
            recv_event->ev.md.user_ptr;

        iov.iov_base = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
        iov.iov_len = recv_event->ev.mlength;
        max_data = iov.iov_len;

        /* see if this message filled the receive block */
        if (recv_event->ev.md.length - (recv_event->ev.offset + 
                                        recv_event->ev.mlength) <
            recv_event->ev.md.max_size) {
            block->full = true;
        }

        /* pull out the data */
        if (iov.iov_len > 0) {
            ompi_convertor_unpack(convertor, &iov, &iov_count,
                                  &max_data );
        }

        /* if synchronous, return an ack */
        if (PTL_IS_SYNC_MSG(recv_event->ev)) {
            md.length = 0;
            md.start = (((char*) recv_event->ev.md.start) + recv_event->ev.offset);
            md.threshold = 1; /* send */
            md.options = PTL_MD_EVENT_START_DISABLE;
            md.user_ptr = NULL;
            md.eq_handle = ompi_mtl_portals.ptl_eq_h;

            ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                            PTL_UNLINK, &md_h);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlMDBind.  Error code - %d \n",ret);
                abort();
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                                 "acking recv: 0x%016llx\n", 
                                 recv_event->ev.match_bits));

            ret = PtlPut(md_h,
                         PTL_NO_ACK_REQ,
                         recv_event->ev.initiator,
                         OMPI_MTL_PORTALS_ACK_TABLE_ID,
                         0,
                         recv_event->ev.hdr_data,
                         0,
                         0);
            if (PTL_OK != ret) {
                opal_output(fileno(stderr)," Error returned from PtlPut.  Error code - %d \n",ret);
                abort();
            }
        }

        /* finished with our buffer space */
        ompi_mtl_portals_return_block_part(&ompi_mtl_portals, block);

        ompi_convertor_get_packed_size(convertor, &buflen);

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
        ptl_request->super.ompi_req->req_status._count = 
            recv_event->ev.mlength;

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output,
                             "recv complete: 0x%016llx\n", 
                             recv_event->ev.match_bits));
        
        ptl_request->super.completion_callback(&ptl_request->super);

    } else {
        ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen,
                                         &ptl_request->free_after);
        if (OMPI_SUCCESS != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }
        md.length = (recv_event->ev.rlength > buflen) ? buflen : recv_event->ev.rlength;
        md.threshold = 2; /* send and get */
        md.options = PTL_MD_EVENT_START_DISABLE;
        md.user_ptr = ptl_request;
        md.eq_handle = ompi_mtl_portals.ptl_eq_h;

        /* retain because it's unclear how many events we'll get here.
           Some implementations give just the REPLY, others give SEND
           and REPLY */
        ret = PtlMDBind(ompi_mtl_portals.ptl_ni_h, md,
                        PTL_RETAIN, &md_h);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from ompi_mtl_datatype_recv_buf.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->event_callback = ompi_mtl_portals_recv_progress;

        ret = PtlGet(md_h, 
                     recv_event->ev.initiator, 
                     OMPI_MTL_PORTALS_READ_TABLE_ID,
                     0, 
                     recv_event->ev.hdr_data,
                     0);
        if (PTL_OK != ret) {
            opal_output(fileno(stderr)," Error returned from PtlGet.  Error code - %d \n",ret);
            abort();
        }

        ptl_request->super.ompi_req->req_status.MPI_SOURCE =
            PTL_GET_SOURCE(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_TAG = 
            PTL_GET_TAG(recv_event->ev.match_bits);
        ptl_request->super.ompi_req->req_status.MPI_ERROR = 
            (recv_event->ev.rlength > buflen) ?
            MPI_ERR_TRUNCATE : MPI_SUCCESS;
    }

    return OMPI_SUCCESS;
}
Ejemplo n.º 14
0
int
mca_btl_portals4_get(struct mca_btl_base_module_t* btl_base,
                    struct mca_btl_base_endpoint_t* btl_peer,
                    void *local_address,
                    uint64_t remote_address,
                    struct mca_btl_base_registration_handle_t *local_handle,
                    struct mca_btl_base_registration_handle_t *remote_handle,
                    size_t size,
                    int flags,
                    int order,
                    mca_btl_base_rdma_completion_fn_t cbfunc,
                    void *cbcontext,
                    void *cbdata)
{
    mca_btl_portals4_module_t *portals4_btl = (mca_btl_portals4_module_t *) btl_base;
    mca_btl_portals4_frag_t   *frag         = NULL;
    ptl_md_t md;
    int ret;

    /* reserve space in the event queue for rdma operations immediately */
    while (OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, 1) >
           portals4_btl->portals_max_outstanding_ops) {
        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
        OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "Call to mca_btl_portals4_component_progress (1)\n"));
        mca_btl_portals4_component_progress();
    }

    OPAL_BTL_PORTALS4_FRAG_ALLOC_USER(portals4_btl, frag);
    if (NULL == frag){
        OPAL_THREAD_ADD32(&portals4_btl->portals_outstanding_ops, -1);
        return OPAL_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
        "mca_btl_portals4_get: Incrementing portals_outstanding_ops=%d frag=%p",
        portals4_btl->portals_outstanding_ops, (void *)frag));

    frag->rdma_cb.func         = cbfunc;
    frag->rdma_cb.context      = cbcontext;
    frag->rdma_cb.data         = cbdata;
    frag->rdma_cb.local_handle = local_handle;

    frag->endpoint = btl_peer;
    frag->hdr.tag = MCA_BTL_TAG_MAX;

    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlMDBind failed: %d",
                            __FILE__, __LINE__, ret);
        return OPAL_ERROR;
    }

    frag->match_bits = remote_handle->key;
    frag->addr = local_address;
    frag->length = size;
    frag->peer_proc = btl_peer->ptl_proc;

    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
        md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));

    ret = PtlGet(portals4_btl->send_md_h,
                 (ptl_size_t) local_address,
                 size,
                 btl_peer->ptl_proc,
                 portals4_btl->recv_idx,
                 frag->match_bits, /* match bits */
                 0,
                 frag);
    if (OPAL_UNLIKELY(PTL_OK != ret)) {
        opal_output_verbose(1, opal_btl_base_framework.framework_output,
                            "%s:%d: PtlGet failed: %d",
                            __FILE__, __LINE__, ret);
        return OPAL_ERROR;
    }
    OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "SUCCESS: PtlGet start=%p length=%ld nid=%x pid=%x match_bits=%lx\n",
        md.start, md.length, btl_peer->ptl_proc.phys.nid, btl_peer->ptl_proc.phys.pid, frag->match_bits));

    return OPAL_SUCCESS;
}
Ejemplo n.º 15
0
/* called when a receive should be progressed */
int
ompi_mtl_portals4_recv_progress(ptl_event_t *ev,
                                ompi_mtl_portals4_base_request_t* ptl_base_request)
{
    int ret;
    ompi_mtl_portals4_recv_request_t* ptl_request = 
        (ompi_mtl_portals4_recv_request_t*) ptl_base_request;
    size_t msg_length = 0;

    switch (ev->type) {
    case PTL_EVENT_PUT:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate expected: %ld %ld", 
                                msg_length, ptl_request->delivery_len);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        if (!MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits) && ompi_mtl_portals4.protocol == rndv) {
            ptl_md_t md;

            md.start = (char*) ptl_request->delivery_ptr + ompi_mtl_portals4.eager_limit;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ompi_mtl_portals4.eager_limit;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ompi_mtl_portals4.eager_limit,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                PtlMDRelease(ptl_request->md_h);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                goto callback_error;
            }

        } else {
            /* make sure the data is in the right place */
            ret = ompi_mtl_datatype_unpack(ptl_request->convertor,
                                           ev->start,
                                           ev->mlength);
            if (OMPI_SUCCESS != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                    __FILE__, __LINE__, ret);
                ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
            }
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, expected",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);
        }
        break;

    case PTL_EVENT_REPLY:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got reply event",
                             ptl_request->opcount, ptl_request->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_REPLY with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            PtlMDRelease(ptl_request->md_h);
            goto callback_error;
        }
        /* set the status - most of this filled in right after issuing
           the PtlGet */
        ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
        if (ompi_mtl_portals4.protocol == rndv) {
            ptl_request->super.super.ompi_req->req_status._ucount +=
                ompi_mtl_portals4.eager_limit;
        }

        /* make sure the data is in the right place.  Use _ucount for
           the total length because it will be set correctly for all
           three protocols. mlength is only correct for eager, and
           delivery_len is the length of the buffer, not the length of
           the send. */
        ret = ompi_mtl_datatype_unpack(ptl_request->convertor, 
                                       ptl_request->delivery_ptr, 
                                       ptl_request->super.super.ompi_req->req_status._ucount);
        if (OMPI_SUCCESS != ret) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: ompi_mtl_datatype_unpack failed: %d",
                                __FILE__, __LINE__, ret);
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = ret;
        }
        PtlMDRelease(ptl_request->md_h);

        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, reply",
                             ptl_request->opcount, ptl_request->hdr_data));
        ptl_request->super.super.completion_callback(&ptl_request->super.super);
        break;

    case PTL_EVENT_PUT_OVERFLOW:
        OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) got put_overflow event",
                             ptl_request->opcount, ev->hdr_data));

        if (ev->ni_fail_type != PTL_NI_OK) {
            opal_output_verbose(1, ompi_mtl_base_output,
                                "%s:%d: PTL_EVENT_PUT_OVERFLOW with ni_fail_type: %d",
                                __FILE__, __LINE__, ev->ni_fail_type);
            goto callback_error;
        }

        msg_length = MTL_PORTALS4_GET_LENGTH(ev->hdr_data);
        ptl_request->super.super.ompi_req->req_status.MPI_SOURCE =
            MTL_PORTALS4_GET_SOURCE(ev->match_bits);
        ptl_request->super.super.ompi_req->req_status.MPI_TAG = 
            MTL_PORTALS4_GET_TAG(ev->match_bits);
        if (msg_length > ptl_request->delivery_len) {
            opal_output_verbose(1, ompi_mtl_base_output, "truncate unexpected: %ld %ld %d", 
                                msg_length, ptl_request->delivery_len, MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits));
            ptl_request->super.super.ompi_req->req_status.MPI_ERROR = MPI_ERR_TRUNCATE;
        }

#if OPAL_ENABLE_DEBUG
        ptl_request->hdr_data = ev->hdr_data;
#endif

        /* overflow case.  Short messages have the buffer stashed
           somewhere.  Long messages left in buffer at the source */
        if (MTL_PORTALS4_IS_SHORT_MSG(ev->match_bits)) {
            ptl_request->super.super.ompi_req->req_status._ucount = ev->mlength;
            if (ev->mlength > 0) {
                struct iovec iov;
                uint32_t iov_count = 1;
                size_t max_data;
                iov.iov_base = (char*) ev->start;
                iov.iov_len = ev->mlength;
                max_data = iov.iov_len;

                ret = opal_convertor_unpack(ptl_request->convertor, 
                                            &iov, &iov_count,
                                            &max_data );
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                if (ret < 0) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: opal_convertor_unpack failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }
            /* if it's a sync, send the ack */
            if (MTL_PORTALS4_IS_SYNC_MSG(ev->hdr_data)) {
                OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) sending sync ack",
                                     ptl_request->opcount, ptl_request->hdr_data));
                ret = PtlPut(ompi_mtl_portals4.zero_md_h,
                             0,
                             0,
                             PTL_NO_ACK_REQ,
                             ev->initiator,
                             ompi_mtl_portals4.read_idx,
                             ev->hdr_data,
                             0,
                             NULL,
                             0);
                if (PTL_OK != ret) {
                    opal_output_verbose(1, ompi_mtl_base_output,
                                        "%s:%d: PtlPut failed: %d",
                                        __FILE__, __LINE__, ret);
                    goto callback_error;
                }
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) completed, unexpected short (0x%lx)",
                                 ptl_request->opcount, ptl_request->hdr_data, (long) ev->start));
            ptl_request->super.super.completion_callback(&ptl_request->super.super);

        } else {
            ptl_md_t md;

            if (ev->mlength > 0) {
                /* if rndv or triggered, copy the eager part to the right place */
                memcpy(ptl_request->delivery_ptr, ev->start, ev->mlength);
            }

            md.start = (char*) ptl_request->delivery_ptr + ev->mlength;
            md.length = ((msg_length > ptl_request->delivery_len) ?
                         ptl_request->delivery_len : msg_length) - ev->mlength;
            md.options = 0;
            md.eq_handle = ompi_mtl_portals4.eq_h;
            md.ct_handle = PTL_CT_NONE;

            ret = PtlMDBind(ompi_mtl_portals4.ni_h,
                            &md,
                            &ptl_request->md_h);
            if (PTL_OK != ret) {
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlMDBind failed: %d",
                                    __FILE__, __LINE__, ret);
                goto callback_error;
            }

            OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "Recv %d (0x%lx) getting long data",
                                 ptl_request->opcount, ptl_request->hdr_data));
            ret = PtlGet(ptl_request->md_h,
                         0,
                         md.length,
                         ev->initiator,
                         ompi_mtl_portals4.read_idx,
                         ev->hdr_data,
                         ev->mlength,
                         ptl_request);
            if (PTL_OK != ret) {
                opal_output_verbose(1, ompi_mtl_base_output,
                                    "%s:%d: PtlGet failed: %d",
                                    __FILE__, __LINE__, ret);
                if (NULL != ptl_request->buffer_ptr) free(ptl_request->buffer_ptr);
                PtlMDRelease(ptl_request->md_h);
                goto callback_error;
            }
        }

        break;

    default:
        opal_output_verbose(1, ompi_mtl_base_output,
                            "Unhandled receive callback with event type %d",
                            ev->type);
        return OMPI_ERROR;
    }

    return OMPI_SUCCESS;

 callback_error:
    ptl_request->super.super.ompi_req->req_status.MPI_ERROR = 
        ompi_mtl_portals4_get_error(ret);
    ptl_request->super.super.completion_callback(&ptl_request->super.super);
    return OMPI_SUCCESS;
}