Example #1
0
static inline ucs_status_t uct_mm_iface_process_recv(uct_mm_iface_t *iface,
                                                     uct_mm_fifo_element_t* elem)
{
    ucs_status_t status;
    void         *data;

    if (ucs_likely(elem->flags & UCT_MM_FIFO_ELEM_FLAG_INLINE)) {
        /* read short (inline) messages from the FIFO elements */
        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, elem->am_id,
                           elem + 1, elem->length, "RX: AM_SHORT");
        status = uct_mm_iface_invoke_am(iface, elem->am_id, elem + 1,
                                        elem->length, 0);
    } else {
        /* read bcopy messages from the receive descriptors */
        VALGRIND_MAKE_MEM_DEFINED(elem->desc_chunk_base_addr + elem->desc_offset,
                                  elem->length);

        data = elem->desc_chunk_base_addr + elem->desc_offset;

        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, elem->am_id,
                           data, elem->length, "RX: AM_BCOPY");

        status = uct_mm_iface_invoke_am(iface, elem->am_id, data, elem->length,
                                        UCT_CB_FLAG_DESC);
        if (status != UCS_OK) {
            /* assign a new receive descriptor to this FIFO element.*/
            uct_mm_assign_desc_to_fifo_elem(iface, elem, 0);
        }
    }
    return status;
}
Example #2
0
ssize_t uct_tcp_ep_am_bcopy(uct_ep_h uct_ep, uint8_t am_id,
                            uct_pack_callback_t pack_cb, void *arg,
                            unsigned flags)
{
    uct_tcp_ep_t *ep = ucs_derived_of(uct_ep, uct_tcp_ep_t);
    uct_tcp_iface_t *iface = ucs_derived_of(uct_ep->iface, uct_tcp_iface_t);
    uct_tcp_am_hdr_t *hdr;
    size_t packed_length;

    if (!uct_tcp_ep_can_send(ep)) {
        return UCS_ERR_NO_RESOURCE;
    }

    hdr         = ep->buf;
    hdr->am_id  = am_id;
    hdr->length = packed_length = pack_cb(hdr + 1, arg);
    ep->length  = sizeof(*hdr) + packed_length;

    UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, hdr->length);
    uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, hdr->am_id,
                       hdr + 1, hdr->length, "SEND fd %d", ep->fd);
    iface->outstanding += ep->length;

    uct_tcp_ep_send(ep);
    if (ep->length > 0) {
        uct_tcp_ep_mod_events(ep, EPOLLOUT, 0);
    }
    return packed_length;
}
Example #3
0
ssize_t uct_ugni_smsg_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id,
                                  uct_pack_callback_t pack_cb,
                                  void *arg)
{
    uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t);
    uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t);
    ssize_t packed;
    uct_ugni_smsg_desc_t *desc;
    ucs_status_t rc;
    void *smsg_data;
    uct_ugni_smsg_header_t *smsg_header;

    UCT_CHECK_AM_ID(id);

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                             desc, return UCS_ERR_NO_RESOURCE);

    ucs_trace_data("AM_BCOPY [%p] am_id: %d buf=%p",
                   iface, id, arg );

    smsg_header = (uct_ugni_smsg_header_t *)(desc+1);
    smsg_data = (void*)(smsg_header+1);

    packed = pack_cb(smsg_data, arg);

    smsg_header->length = packed;

    uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND,
                       id, smsg_data, packed, "TX: AM_BCOPY");

    rc = uct_ugni_smsg_ep_am_common_send(ep, iface, id, sizeof(uct_ugni_smsg_header_t),
                                         smsg_header, packed, smsg_data, desc);

    return (UCS_OK == rc) ? packed : rc;
}
Example #4
0
ucs_status_t uct_ugni_smsg_ep_am_short(uct_ep_h tl_ep, uint8_t id, uint64_t header,
                                       const void *payload, unsigned length)
{

    uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t);
    uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t);
    uct_ugni_smsg_header_t *smsg_header;
    uint64_t *header_data;
    uct_ugni_smsg_desc_t *desc;

    UCT_CHECK_AM_ID(id);
    UCT_CHECK_LENGTH(length, iface->config.smsg_seg_size -
                     (sizeof(smsg_header) + sizeof(header)), "am_short");

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                             desc, return UCS_ERR_NO_RESOURCE);

    ucs_trace_data("AM_SHORT [%p] am_id: %d buf=%p length=%u",
                   iface, id, payload, length);

    smsg_header = (uct_ugni_smsg_header_t *)(desc+1);
    smsg_header->length = length + sizeof(header);

    header_data = (uint64_t*)(smsg_header+1);
    *header_data = header;
    memcpy((void*)(header_data+1), payload, length);

    uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND,
                       id, header_data, length, "TX: AM_SHORT");

    return  uct_ugni_smsg_ep_am_common_send(ep, iface, id, sizeof(uct_ugni_smsg_header_t),
                                            smsg_header, smsg_header->length, (void*)header_data, desc);
}
Example #5
0
static void process_mbox(uct_ugni_smsg_iface_t *iface, uct_ugni_smsg_ep_t *ep){
    ucs_status_t status;
    uint8_t tag;
    void *data_ptr;
    gni_return_t ugni_rc;
    uct_ugni_smsg_header_t *header;
    void *user_data;

    pthread_mutex_lock(&uct_ugni_global_lock);

    while(1){
        tag = GNI_SMSG_ANY_TAG;
        ugni_rc = GNI_SmsgGetNextWTag(ep->super.ep, (void **)&data_ptr, &tag);

        /* Yes, GNI_RC_NOT_DONE means that you're done with the smsg mailbox */
        if(GNI_RC_NOT_DONE == ugni_rc){
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }

        if(GNI_RC_SUCCESS != ugni_rc){
            ucs_error("Unhandled smsg error: %s %d", gni_err_str[ugni_rc], ugni_rc);
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }

        if(NULL == data_ptr){
            ucs_error("Empty data pointer in smsg.");
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }

        header = (uct_ugni_smsg_header_t *)data_ptr;
        user_data = (void *)(header + 1);
        void *user_desc = iface->user_desc+1;

        uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV,
                           tag, user_data, header->length, "RX: AM");

        pthread_mutex_unlock(&uct_ugni_global_lock);
        status = uct_iface_invoke_am(&iface->super.super, tag, user_data,
                                     header->length, user_desc);
        pthread_mutex_lock(&uct_ugni_global_lock);

        if(status != UCS_OK){
            uct_recv_desc_iface(user_desc) = &iface->super.super.super;
            UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                                     iface->user_desc, iface->user_desc = NULL);
        }

        ugni_rc = GNI_SmsgRelease(ep->super.ep);
        if(GNI_RC_SUCCESS != ugni_rc){
            ucs_error("Unhandled smsg error in GNI_SmsgRelease: %s %d", gni_err_str[ugni_rc], ugni_rc);
            pthread_mutex_unlock(&uct_ugni_global_lock);
            return;
        }
    }
}
Example #6
0
unsigned uct_tcp_ep_progress_rx(uct_tcp_ep_t *ep)
{
    uct_tcp_iface_t *iface = ucs_derived_of(ep->super.super.iface,
                                            uct_tcp_iface_t);
    uct_tcp_am_hdr_t *hdr;
    ucs_status_t status;
    size_t recv_length;
    ssize_t remainder;

    ucs_trace_func("ep=%p", ep);

    /* Receive next chunk of data */
    recv_length = iface->config.buf_size - ep->length;
    status = uct_tcp_recv(ep->fd, ep->buf + ep->length, &recv_length);
    if (status != UCS_OK) {
        if (status == UCS_ERR_CANCELED) {
            ucs_debug("tcp_ep %p: remote disconnected", ep);
            uct_tcp_ep_mod_events(ep, 0, EPOLLIN);
            uct_tcp_ep_destroy(&ep->super.super);
        }
        return 0;
    }

    ep->length += recv_length;
    ucs_trace_data("tcp_ep %p: recvd %zu bytes", ep, recv_length);

    /* Parse received active messages */
    while ((remainder = ep->length - ep->offset) >= sizeof(*hdr)) {
        hdr = ep->buf + ep->offset;
        if (remainder < sizeof(*hdr) + hdr->length) {
            break;
        }

        /* Full message was received */
        ep->offset += sizeof(*hdr) + hdr->length;

        if (hdr->am_id >= UCT_AM_ID_MAX) {
            ucs_error("invalid am id: %d", hdr->am_id);
            continue;
        }

        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_RECV, hdr->am_id,
                           hdr + 1, hdr->length, "RECV fd %d", ep->fd);
        uct_iface_invoke_am(&iface->super, hdr->am_id, hdr + 1,
                            hdr->length, 0);
    }

    /* Move the remaining data to the beginning of the buffer
     * TODO avoid extra copy on partial receive
     */
    ucs_assert(remainder >= 0);
    memmove(ep->buf, ep->buf + ep->offset, remainder);
    ep->offset = 0;
    ep->length = remainder;

    return recv_length > 0;
}
Example #7
0
static ucs_status_t processs_datagram(uct_ugni_udt_iface_t *iface, uct_ugni_udt_desc_t *desc)
{
    ucs_status_t status;
    uct_ugni_udt_header_t *header;
    void *payload;

    header = uct_ugni_udt_get_rheader(desc, iface);
    payload = uct_ugni_udt_get_rpayload(desc, iface);
    uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV,
                       header->am_id, payload, header->length, "RX: AM");
    status = uct_iface_invoke_am(&iface->super.super, header->am_id, payload,
                                 header->length, UCT_CB_FLAG_DESC);
    return status;
}
Example #8
0
ssize_t uct_ugni_smsg_ep_am_bcopy(uct_ep_h tl_ep, uint8_t id,
                                  uct_pack_callback_t pack_cb,
                                  void *arg, unsigned flags)
{
    uct_ugni_smsg_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_ugni_smsg_iface_t);
    uct_ugni_smsg_ep_t *ep = ucs_derived_of(tl_ep, uct_ugni_smsg_ep_t);
    ssize_t packed;
    uct_ugni_smsg_desc_t *desc;
    ucs_status_t rc;
    void *smsg_data;
    uct_ugni_smsg_header_t *smsg_header;

    UCT_CHECK_AM_ID(id);

    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                             desc, return UCS_ERR_NO_RESOURCE);

    ucs_trace_data("AM_BCOPY [%p] am_id: %d send request %p",
                   iface, id, arg);

    smsg_header = (uct_ugni_smsg_header_t *)(desc+1);
    smsg_data = (void*)(smsg_header+1);

    packed = pack_cb(smsg_data, arg);

    smsg_header->length = packed;

    UCT_CHECK_LENGTH(packed, 0, iface->config.smsg_seg_size -
                     0, "am_bcopy");    

    uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND,
                       id, smsg_data, packed, "TX: AM_BCOPY");

    rc = uct_ugni_smsg_ep_am_common_send(ep, iface, id, sizeof(uct_ugni_smsg_header_t),
                                         smsg_header, packed, smsg_data, desc);

    UCT_TL_EP_STAT_OP_IF_SUCCESS(rc, ucs_derived_of(ep, uct_base_ep_t), AM, BCOPY, packed);

    return (UCS_OK == rc) ? packed : rc;
}
Example #9
0
File: mm_ep.c Project: brminich/ucx
/* A common mm active message sending function.
 * The first parameter indicates the origin of the call.
 * is_short = 1 - perform AM short sending
 * is_short = 0 - perform AM bcopy sending
 */
static UCS_F_ALWAYS_INLINE ssize_t
uct_mm_ep_am_common_send(const unsigned is_short, uct_mm_ep_t *ep, uct_mm_iface_t *iface,
                         uint8_t am_id, size_t length, uint64_t header,
                         const void *payload, uct_pack_callback_t pack_cb, void *arg)
{
    uct_mm_fifo_element_t *elem;
    ucs_status_t status;
    void *base_address;
    uint64_t head;

    UCT_CHECK_AM_ID(am_id);

    head = ep->fifo_ctl->head;
    /* check if there is room in the remote process's receive FIFO to write */
    if (!UCT_MM_EP_IS_ABLE_TO_SEND(head, ep->cached_tail, iface->config.fifo_size)) {
        if (!ucs_arbiter_group_is_empty(&ep->arb_group)) {
            /* pending isn't empty. don't send now to prevent out-of-order sending */
            UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1);
            return UCS_ERR_NO_RESOURCE;
        } else {
            /* pending is empty */
            /* update the local copy of the tail to its actual value on the remote peer */
            uct_mm_ep_update_cached_tail(ep);
            if (!UCT_MM_EP_IS_ABLE_TO_SEND(head, ep->cached_tail, iface->config.fifo_size)) {
                UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1);
                return UCS_ERR_NO_RESOURCE;
            }
        }
    }

    status = uct_mm_ep_get_remote_elem(ep, head, &elem);
    if (status != UCS_OK) {
        ucs_trace_poll("couldn't get an available FIFO element");
        UCS_STATS_UPDATE_COUNTER(ep->super.stats, UCT_EP_STAT_NO_RES, 1);
        return status;
    }

    if (is_short) {
        /* AM_SHORT */
        /* write to the remote FIFO */
        *(uint64_t*) (elem + 1) = header;
        memcpy((void*) (elem + 1) + sizeof(header), payload, length);

        elem->flags |= UCT_MM_FIFO_ELEM_FLAG_INLINE;
        elem->length = length + sizeof(header);

        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, am_id,
                           elem + 1, length + sizeof(header), "TX: AM_SHORT");
        UCT_TL_EP_STAT_OP(&ep->super, AM, SHORT, sizeof(header) + length);
    } else {
        /* AM_BCOPY */
        /* write to the remote descriptor */
        /* get the base_address: local ptr to remote memory chunk after attaching to it */
        base_address = uct_mm_ep_attach_remote_seg(ep, iface, elem);
        length = pack_cb(base_address + elem->desc_offset, arg);

        elem->flags &= ~UCT_MM_FIFO_ELEM_FLAG_INLINE;
        elem->length = length;

        uct_iface_trace_am(&iface->super, UCT_AM_TRACE_TYPE_SEND, am_id,
                           base_address + elem->desc_offset, length, "TX: AM_BCOPY");

        UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, length);
    }

    elem->am_id = am_id;

    /* memory barrier - make sure that the memory is flushed before setting the
     * 'writing is complete' flag which the reader checks */
    ucs_memory_cpu_store_fence();

    /* change the owner bit to indicate that the writing is complete.
     * the owner bit flips after every FIFO wraparound */
    if (head & iface->config.fifo_size) {
        elem->flags |= UCT_MM_FIFO_ELEM_FLAG_OWNER;
    } else {
        elem->flags &= ~UCT_MM_FIFO_ELEM_FLAG_OWNER;
    }

    if (is_short) {
        return UCS_OK;
    } else {
        return length;
    }
}
Example #10
0
static void uct_ugni_udt_progress(void *arg)
{
    uint32_t rem_addr,
             rem_id;
    uint64_t id;
    void *payload;
    void *user_desc;

    ucs_status_t status;

    uct_ugni_udt_desc_t *desc;
    uct_ugni_udt_header_t *header;
    uct_ugni_udt_iface_t * iface = (uct_ugni_udt_iface_t *)arg;
    uct_ugni_udt_ep_t *ep;

    gni_ep_handle_t ugni_ep;
    gni_post_state_t post_state;
    gni_return_t ugni_rc;

    pthread_mutex_lock(&uct_ugni_global_lock);
    ugni_rc = GNI_PostDataProbeById(iface->super.nic_handle, &id);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        if (GNI_RC_NO_MATCH != ugni_rc) {
            ucs_error("GNI_PostDataProbeById , Error status: %s %d",
                      gni_err_str[ugni_rc], ugni_rc);
        }
        goto exit;
    }

    if (UCT_UGNI_UDT_ANY == id) {
        /* New incomming message */
        ep = NULL;
        ugni_ep = iface->ep_any;
        desc = iface->desc_any;
    } else {
        /* Ack message */
        ep = ucs_derived_of(uct_ugni_iface_lookup_ep(&iface->super, id),
                            uct_ugni_udt_ep_t);
        if (ucs_unlikely(NULL == ep)) {
            ucs_error("Can not lookup ep with id %"PRIx64,id);
            goto exit;
        }
        ugni_ep = ep->super.ep;
        desc = ep->posted_desc;
    }

    ugni_rc = GNI_EpPostDataWaitById(ugni_ep, id, -1, &post_state, &rem_addr, &rem_id);
    if (ucs_unlikely(GNI_RC_SUCCESS != ugni_rc)) {
        ucs_error("GNI_EpPostDataWaitById, Error status: %s %d",
                  gni_err_str[ugni_rc], ugni_rc);
        goto exit;
    }

    header = uct_ugni_udt_get_rheader(desc, iface);
    payload = uct_ugni_udt_get_rpayload(desc, iface);
    user_desc = uct_ugni_udt_get_user_desc(desc, iface);

    if (UCT_UGNI_UDT_ANY == id) {
        /* New incomming message */
        ucs_assert_always(header->type == UCT_UGNI_UDT_PAYLOAD);
        uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_RECV,
                           header->am_id, payload, header->length, "RX: AM");
        status = uct_iface_invoke_am(&iface->super.super, header->am_id, payload,
                                     header->length, user_desc);
        if (UCS_OK != status) {
            uct_ugni_udt_desc_t *new_desc;
            /* set iface for a later release call */
            uct_recv_desc_iface(user_desc) = &iface->super.super.super;
            /* Allocate a new element */
            UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                                     new_desc, goto exit);
            /* set the new desc */
            iface->desc_any = new_desc;
        }
Example #11
0
static UCS_F_ALWAYS_INLINE ssize_t
uct_ugni_udt_ep_am_common_send(const unsigned is_short, uct_ugni_udt_ep_t *ep, uct_ugni_udt_iface_t *iface,
                               uint8_t am_id, unsigned length, uint64_t header,
                               const void *payload, uct_pack_callback_t pack_cb, void *arg)
{
    gni_return_t ugni_rc;
    uint16_t msg_length;
    uct_ugni_udt_desc_t *desc;
    uct_ugni_udt_header_t *sheader,
                          *rheader;
    ssize_t packed_length;

    UCT_CHECK_AM_ID(am_id);
    if (ucs_unlikely(NULL != ep->posted_desc)) {
        UCT_TL_IFACE_STAT_TX_NO_DESC(&iface->super.super);
        return UCS_ERR_NO_RESOURCE;
    }
    UCT_TL_IFACE_GET_TX_DESC(&iface->super.super, &iface->free_desc,
                             desc, return UCS_ERR_NO_RESOURCE);

    rheader = uct_ugni_udt_get_rheader(desc, iface);
    rheader->type = UCT_UGNI_UDT_EMPTY;

    sheader = uct_ugni_udt_get_sheader(desc, iface);

    if (is_short) {
        uint64_t *hdr = (uint64_t *)uct_ugni_udt_get_spayload(desc, iface);
        *hdr = header;
        memcpy((void*)(hdr + 1), payload, length);
        sheader->length = length + sizeof(header);
        msg_length = sheader->length + sizeof(*sheader);
        UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, SHORT, sizeof(header) + length);
    } else {
        packed_length = pack_cb((void *)uct_ugni_udt_get_spayload(desc, iface),
                                arg);
        sheader->length = packed_length;
        msg_length = sheader->length + sizeof(*sheader);
        UCT_TL_EP_STAT_OP(ucs_derived_of(ep, uct_base_ep_t), AM, BCOPY, packed_length);
    }

    uct_iface_trace_am(&iface->super.super, UCT_AM_TRACE_TYPE_SEND, am_id,
                       uct_ugni_udt_get_spayload(desc, iface), length,
                       is_short ? "TX: AM_SHORT" : "TX: AM_BCOPY");

    sheader->am_id = am_id;
    sheader->type = UCT_UGNI_UDT_PAYLOAD;

    ucs_assert_always(sheader->length <= GNI_DATAGRAM_MAXSIZE);

    pthread_mutex_lock(&uct_ugni_global_lock);
    ugni_rc = GNI_EpPostDataWId(ep->super.ep,
                                sheader, msg_length,
                                rheader, (uint16_t)iface->config.udt_seg_size,
                                ep->super.hash_key);
    pthread_mutex_unlock(&uct_ugni_global_lock);
    UCT_UGNI_UDT_CHECK_RC(ugni_rc);

    ep->posted_desc = desc;
    ++ep->super.outstanding;
    ++iface->super.outstanding;
    return is_short ? UCS_OK : packed_length;
}