int ADD_SUFFIX(MPID_nem_ofi_iprobe_impl)(struct MPIDI_VC *vc,
                             int source,
                             int tag,
                             MPIR_Comm * comm,
                             int context_offset,
                             int *flag, MPI_Status * status, MPIR_Request ** rreq_ptr)
{
    int ret, mpi_errno = MPI_SUCCESS;
    fi_addr_t remote_proc = 0;
    uint64_t match_bits, mask_bits;
    size_t len;
    MPIR_Request rreq_s, *rreq;

    BEGIN_FUNC(FCNAME);
    if (rreq_ptr) {
        MPIDI_CH3I_NM_OFI_RC(MPID_nem_ofi_create_req(&rreq, 1));
        rreq->kind = MPIR_REQUEST_KIND__RECV;

        *rreq_ptr = rreq;
        rreq->comm = comm;
        rreq->dev.match.parts.rank = source;
        rreq->dev.match.parts.tag = tag;
        rreq->dev.match.parts.context_id = comm->context_id;
        MPIR_Comm_add_ref(comm);
    }
    else {
        rreq = &rreq_s;
        rreq->dev.OnDataAvail = NULL;
    }

    REQ_OFI(rreq)->pack_buffer    = NULL;
    REQ_OFI(rreq)->event_callback = ADD_SUFFIX(peek_callback);
    REQ_OFI(rreq)->match_state    = PEEK_INIT;
    OFI_ADDR_INIT(source, vc, remote_proc);
#if API_SET == API_SET_1
    match_bits = init_recvtag(&mask_bits, comm->context_id + context_offset, source, tag);
#elif API_SET == API_SET_2
    match_bits = init_recvtag_2(&mask_bits, comm->context_id + context_offset, tag);
#endif

    /* ------------------------------------------------------------------------- */
    /* fi_recvmsg with FI_PEEK:                                                  */
    /* Initiate a search for a match in the hardware or software queue.          */
    /* The search can complete immediately with -ENOMSG.                         */
    /* I successful, libfabric will enqueue a context entry into the completion  */
    /* queue to make the search nonblocking.  This code will poll until the      */
    /* entry is enqueued.                                                        */
    /* ------------------------------------------------------------------------- */
    msg_tagged_t msg;
    uint64_t     msgflags = FI_PEEK;
    msg.msg_iov   = NULL;
    msg.desc      = NULL;
    msg.iov_count = 0;
    msg.addr      = remote_proc;
    msg.tag       = match_bits;
    msg.ignore    = mask_bits;
    msg.context   = (void *) &(REQ_OFI(rreq)->ofi_context);
    msg.data      = 0;
    if(*flag == CLAIM_PEEK)
      msgflags|=FI_CLAIM;
    ret = fi_trecvmsg(gl_data.endpoint,&msg,msgflags);
    if(ret == -ENOMSG) {
      if (rreq_ptr) {
        MPIR_Request_free(rreq);
        *rreq_ptr = NULL;
        *flag = 0;
      }
      MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL);
      goto fn_exit;
    }
    MPIR_ERR_CHKANDJUMP4((ret < 0), mpi_errno, MPI_ERR_OTHER,
                         "**ofi_peek", "**ofi_peek %s %d %s %s",
                         __SHORT_FILE__, __LINE__, FCNAME, fi_strerror(-ret));

    while (PEEK_INIT == REQ_OFI(rreq)->match_state)
        MPID_nem_ofi_poll(MPID_BLOCKING_POLL);

    if (PEEK_NOT_FOUND == REQ_OFI(rreq)->match_state) {
        if (rreq_ptr) {
            MPIR_Request_free(rreq);
            *rreq_ptr = NULL;
            *flag = 0;
        }
        MPID_nem_ofi_poll(MPID_NONBLOCKING_POLL);
        goto fn_exit;
    }

    if (status != MPI_STATUS_IGNORE)
        *status = rreq->status;

    MPIR_Request_add_ref(rreq);
    *flag = 1;
    END_FUNC_RC(FCNAME);
}
static inline
int ADD_SUFFIX(MPID_nem_ofi_recv_callback)(cq_tagged_entry_t * wc, MPID_Request * rreq)
{
    int err0, err1, src, mpi_errno = MPI_SUCCESS;
    uint64_t ssend_bits;
    MPIDI_msg_sz_t sz;
    MPIDI_VC_t *vc;
    MPID_Request *sync_req;
    BEGIN_FUNC(FCNAME);
    /* ---------------------------------------------------- */
    /* Populate the MPI Status and unpack noncontig buffer  */
    /* ---------------------------------------------------- */
    rreq->status.MPI_ERROR = MPI_SUCCESS;
#if API_SET == API_SET_1
    rreq->status.MPI_SOURCE = get_source(wc->tag);
#elif API_SET == API_SET_2
    rreq->status.MPI_SOURCE = wc->data;
#endif
    src = rreq->status.MPI_SOURCE;
    rreq->status.MPI_TAG = get_tag(wc->tag);

    REQ_OFI(rreq)->req_started = 1;
    MPIR_STATUS_SET_COUNT(rreq->status, wc->len);

    if (REQ_OFI(rreq)->pack_buffer) {
        MPIDI_CH3U_Buffer_copy(REQ_OFI(rreq)->pack_buffer,
                               MPIR_STATUS_GET_COUNT(rreq->status),
                               MPI_BYTE, &err0, rreq->dev.user_buf,
                               rreq->dev.user_count, rreq->dev.datatype, &sz, &err1);
        MPIR_STATUS_SET_COUNT(rreq->status, sz);
        MPIU_Free(REQ_OFI(rreq)->pack_buffer);
        if (err0 || err1) {
            rreq->status.MPI_ERROR = MPI_ERR_TYPE;
        }
    }

    if ((wc->tag & MPID_PROTOCOL_MASK) == MPID_SYNC_SEND) {
        /* ---------------------------------------------------- */
        /* Ack the sync send and wait for the send request      */
        /* completion(when callback executed.  A protocol bit   */
        /* MPID_SYNC_SEND_ACK is set in the tag bits to provide */
        /* separation of MPI messages and protocol messages     */
        /* ---------------------------------------------------- */
        vc = REQ_OFI(rreq)->vc;
        if (!vc) {      /* MPI_ANY_SOURCE -- Post message from status, complete the VC */
            vc = rreq->comm->dev.vcrt->vcr_table[src];
            MPIU_Assert(vc);
        }
#if API_SET == API_SET_1
        ssend_bits = init_sendtag(rreq->dev.match.parts.context_id,
                                  rreq->comm->rank, rreq->status.MPI_TAG, MPID_SYNC_SEND_ACK);
#elif API_SET == API_SET_2
        ssend_bits = init_sendtag_2(rreq->dev.match.parts.context_id,
                                    rreq->status.MPI_TAG, MPID_SYNC_SEND_ACK);
#endif
        MPID_nem_ofi_create_req(&sync_req, 1);
        sync_req->dev.OnDataAvail = NULL;
        sync_req->dev.next = NULL;
        REQ_OFI(sync_req)->event_callback = MPID_nem_ofi_sync_recv_callback;
        REQ_OFI(sync_req)->parent = rreq;
#if API_SET == API_SET_1
        FI_RC_RETRY(fi_tsend(gl_data.endpoint,
#elif API_SET == API_SET_2
        FI_RC_RETRY(fi_tsenddata(gl_data.endpoint,
#endif
                         NULL,
                         0,
                         gl_data.mr,
#if API_SET == API_SET_2
                         rreq->comm->rank,
#endif
                         VC_OFI(vc)->direct_addr,
                         ssend_bits, &(REQ_OFI(sync_req)->ofi_context)), tsend);
    }