예제 #1
0
void MPID_nem_dbg_print_all_sendq(FILE *stream)
{
    int i;
    MPIDI_PG_t *pg;
    MPIDI_VC_t *vc;
    MPIDI_PG_iterator iter;

    fprintf(stream, "========================================\n");
    fprintf(stream, "MPI_COMM_WORLD  ctx=%#x rank=%d\n", MPIR_Process.comm_world->context_id, MPIR_Process.comm_world->rank);
    fprintf(stream, "MPI_COMM_SELF   ctx=%#x\n", MPIR_Process.comm_self->context_id);
    if (MPIR_Process.comm_parent) {
        fprintf(stream, "MPI_COMM_PARENT ctx=%#x recvctx=%#x\n",
                MPIR_Process.comm_self->context_id,
                MPIR_Process.comm_parent->recvcontext_id);
    }
    else {
        fprintf(stream, "MPI_COMM_PARENT (NULL)\n");
    }

    MPIDI_PG_Get_iterator(&iter);
    while (MPIDI_PG_Has_next(&iter)) {
        MPIDI_PG_Get_next(&iter, &pg);
        fprintf(stream, "PG ptr=%p size=%d id=%s refcount=%d\n", pg, pg->size, (const char*)pg->id, MPIR_Object_get_ref(pg));
        for (i = 0; i < MPIDI_PG_Get_size(pg); ++i) {
            MPIDI_PG_Get_vc(pg, i, &vc);
            MPID_nem_dbg_print_vc_sendq(stream, vc);
        }
    }

    fprintf(stream, "========================================\n");
}
예제 #2
0
static int terminate_failed_VCs(MPID_Group *new_failed_group)
{
#if defined(FINEGRAIN_MPI)
    return (MPI_SUCCESS); /* FG: Temporary bypass. Uses MPI_Group */
#endif

    int mpi_errno = MPI_SUCCESS;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_TERMINATE_FAILED_VCS);

    MPIDI_FUNC_ENTER(MPID_STATE_TERMINATE_FAILED_VCS);

    for (i = 0; i < new_failed_group->size; ++i) {
        MPIDI_VC_t *vc;
        /* terminate the VC */
        /* FIXME: This won't work for dynamic procs */
        MPIDI_PG_Get_vc(MPIDI_Process.my_pg, new_failed_group->lrank_to_lpid[i].lpid, &vc);
        mpi_errno = MPIDI_CH3_Connection_terminate(vc);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_TERMINATE_FAILED_VCS);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
예제 #3
0
static int ckpt_restart(void)
{
    int mpi_errno = MPI_SUCCESS;
    char *publish_bc_orig = NULL;
    char *bc_val          = NULL;
    int val_max_sz;
    int i;
    MPIDI_STATE_DECL(MPID_STATE_CKPT_RESTART);

    MPIDI_FUNC_ENTER(MPID_STATE_CKPT_RESTART);

    /* First, clean up.  We didn't shut anything down before the
       checkpoint, so we need to go close and free any resources */
    mpi_errno = MPID_nem_tcp_ckpt_cleanup();
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    mpi_errno = MPID_nem_tcp_send_finalize();
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    mpi_errno = MPID_nem_tcp_sm_finalize();
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);

    /* Initialize the new business card */
    mpi_errno = MPIDI_CH3I_BCInit(&bc_val, &val_max_sz);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    publish_bc_orig = bc_val;
    
    /* Now we can restart */
    mpi_errno = MPID_nem_tcp_init(MPIDI_Process.my_pg, MPIDI_Process.my_pg_rank, &bc_val, &val_max_sz);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    
    /* publish business card */
    mpi_errno = MPIDI_PG_SetConnInfo(MPIDI_Process.my_pg_rank, (const char *)publish_bc_orig);
    if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    MPIU_Free(publish_bc_orig);

    for (i = 0; i < MPIDI_Process.my_pg->size; ++i) {
        MPIDI_VC_t *vc;
        MPIDI_CH3I_VC *vc_ch;
        if (i == MPIDI_Process.my_pg_rank)
            continue;
        MPIDI_PG_Get_vc(MPIDI_Process.my_pg, i, &vc);
        vc_ch = &vc->ch;
        if (!vc_ch->is_local) {
            mpi_errno = vc_ch->ckpt_restart_vc(vc);
            if (mpi_errno) MPIR_ERR_POP(mpi_errno);
        }
    }
    

fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_CKPT_RESTART);
    return mpi_errno;
fn_fail:

    goto fn_exit;
}
예제 #4
0
int MPIDI_nem_ckpt_finish(void)
{
    int mpi_errno = MPI_SUCCESS;
    int i;
    int ret;
    MPIDI_STATE_DECL(MPID_STATE_MPIDI_NEM_CKPT_FINISH);

    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_NEM_CKPT_FINISH);

    /* Since we're checkpointing the shared memory region (i.e., the
       channels between local procs), we don't have to flush those
       channels, just make sure no one is sending or receiving during
       the checkpoint */
    mpi_errno = MPID_nem_barrier();
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    do {
        ret = sem_post(&ckpt_sem);
    } while (ret == -1 && errno == EINTR);
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_post", "**sem_post %s", MPIU_Strerror(errno));

    do {
        ret = sem_wait(&cont_sem);
    } while (ret == -1 && errno == EINTR);
    MPIU_ERR_CHKANDJUMP1(ret, mpi_errno, MPI_ERR_OTHER, "**sem_wait", "**sem_wait %s", MPIU_Strerror(errno));

    mpi_errno = MPID_nem_barrier();
    if (mpi_errno) MPIU_ERR_POP(mpi_errno);

    if (ckpt_result == CKPT_CONTINUE) {
        for (i = 0; i < MPIDI_Process.my_pg->size; ++i) {
            MPIDI_VC_t *vc;
            MPIDI_CH3I_VC *vc_ch;
            /* We didn't send a marker to ourselves. */
            if (i == MPIDI_Process.my_pg_rank)
                continue;

            MPIDI_PG_Get_vc(MPIDI_Process.my_pg, i, &vc);
            vc_ch = &vc->ch;
            if (!vc_ch->is_local) {
                mpi_errno = vc_ch->ckpt_continue_vc(vc);
                if (mpi_errno) MPIU_ERR_POP(mpi_errno);
            }
        }
    }
    
    checkpointing = FALSE;
    
fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_NEM_CKPT_FINISH);
    return mpi_errno;
fn_fail:

    goto fn_exit;
}
예제 #5
0
static int get_target_info(int rank, ptl_process_t *id, ptl_pt_index_t local_data_pt, ptl_pt_index_t *target_data_pt,
                           ptl_pt_index_t *target_control_pt)
{
    int mpi_errno = MPI_SUCCESS;
    struct MPIDI_VC *vc;
    MPID_nem_ptl_vc_area *vc_ptl;
    MPIR_FUNC_VERBOSE_STATE_DECL(MPID_STATE_MPID_NEM_GET_TARGET_INFO);

    MPIR_FUNC_VERBOSE_ENTER(MPID_STATE_MPID_NEM_GET_TARGET_INFO);

    MPIDI_PG_Get_vc(MPIDI_Process.my_pg, rank, &vc);
    vc_ptl = VC_PTL(vc);
    if (!vc_ptl->id_initialized) {
        mpi_errno = MPID_nem_ptl_init_id(vc);
        if (mpi_errno) MPIR_ERR_POP(mpi_errno);
    }

    *id = vc_ptl->id;

    MPIR_Assert(local_data_pt == MPIDI_nem_ptl_pt || local_data_pt == MPIDI_nem_ptl_get_pt ||
                local_data_pt == MPIDI_nem_ptl_control_pt);

    if (local_data_pt == MPIDI_nem_ptl_pt) {
        *target_data_pt = vc_ptl->pt;
        *target_control_pt = vc_ptl->ptr;
    }
    else if (local_data_pt == MPIDI_nem_ptl_get_pt) {
        *target_data_pt = vc_ptl->ptg;
        *target_control_pt = PTL_PT_ANY;
    }
    else if (local_data_pt == MPIDI_nem_ptl_control_pt) {
        *target_data_pt = vc_ptl->ptc;
        *target_control_pt = vc_ptl->ptrc;
    }

 fn_exit:
    MPIR_FUNC_VERBOSE_EXIT(MPID_STATE_MPID_NEM_GET_TARGET_INFO);
    return mpi_errno;

 fn_fail:
    goto fn_exit;
}
예제 #6
0
파일: ch3_init.c 프로젝트: hpc/mvapich2-cce
void rdma_process_hostid(MPIDI_PG_t * pg, int *host_ids, int my_rank, int pg_size)
{
    int i;
    int my_host_id;;
    MPIDI_VC_t* vc = NULL;
        
    pg->ch.local_process_id = 0;
    pg->ch.num_local_processes = 0;

    my_host_id = host_ids[my_rank];
    for (i = 0; i < pg_size; ++i) {
        MPIDI_PG_Get_vc(pg, i, &vc);
        if (host_ids[i] == my_host_id) {
           vc->smp.local_rank = pg->ch.num_local_processes++;
           if (i == my_rank) {
               pg->ch.local_process_id = vc->smp.local_rank;
           }
        } else {
           vc->smp.local_rank = -1;
        }
    }
}
예제 #7
0
static int nonempty_intersection(MPID_Comm *comm, MPID_Group *group, int *flag)
{
    int mpi_errno = MPI_SUCCESS;
    int i_g, i_c;
    MPIDI_VC_t *vc_g, *vc_c;
    MPIDI_STATE_DECL(MPID_STATE_NONEMPTY_INTERSECTION);

    MPIDI_FUNC_ENTER(MPID_STATE_NONEMPTY_INTERSECTION);

    /* handle common case fast */
    if (comm == MPIR_Process.comm_world || comm == MPIR_Process.icomm_world) {
        *flag = TRUE;
        MPIU_DBG_MSG(CH3_OTHER, VERBOSE, "comm is comm_world or icomm_world");
        goto fn_exit;
    }
    *flag = FALSE;
    
    /* FIXME: This algorithm assumes that the number of processes in group is
       very small (like 1).  So doing a linear search for them in comm is better
       than sorting the procs in comm and group then doing a binary search */

    for (i_g = 0; i_g < group->size; ++i_g) {
        /* FIXME: This won't work for dynamic procs */
        MPIDI_PG_Get_vc(MPIDI_Process.my_pg, group->lrank_to_lpid[i_g].lpid, &vc_g);
        for (i_c = 0; i_c < comm->remote_size; ++i_c) {
            MPIDI_Comm_get_vc(comm, i_c, &vc_c);
            if (vc_g == vc_c) {
                *flag = TRUE;
                goto fn_exit;
            }
        }
    }
    
 fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_NONEMPTY_INTERSECTION);
    return mpi_errno;
 fn_fail:
    goto fn_exit;
}
예제 #8
0
파일: ch3_init.c 프로젝트: hpc/mvapich2-cce
int MPIDI_CH3_Init(int has_parent, MPIDI_PG_t * pg, int pg_rank)
{
    int mpi_errno = MPI_SUCCESS;
    int pg_size, threshold, dpm = 0, p;
    char *dpm_str, *value, *conn_info = NULL;
    MPIDI_VC_t *vc;

    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3_INIT);
    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3_INIT);
 
    if (MPIDI_CH3_Pkt_size_index[MPIDI_CH3_PKT_CLOSE] != sizeof (MPIDI_CH3_Pkt_close_t))
    {
        MPIU_ERR_SETFATALANDJUMP1(
            mpi_errno,
            MPI_ERR_OTHER,
            "**fail",
            "**fail %s", 
            "Failed sanity check! Packet size table mismatch");
    }
    
    pg_size = MPIDI_PG_Get_size(pg);

    /*Determine to use which connection management*/
    threshold = MPIDI_CH3I_CM_DEFAULT_ON_DEMAND_THRESHOLD;

    /*check ON_DEMAND_THRESHOLD*/
    value = getenv("MV2_ON_DEMAND_THRESHOLD");
    if (value)
    {
        threshold = atoi(value);
    }

    dpm_str = getenv("MV2_SUPPORT_DPM");
    if (dpm_str) {
        dpm = !!atoi(dpm_str);
    }
    MPIDI_CH3I_Process.has_dpm = dpm;
    if(MPIDI_CH3I_Process.has_dpm) {
        setenv("MV2_ENABLE_AFFINITY", "0", 1);
    }

#ifdef _ENABLE_XRC_
    value = getenv ("MV2_USE_XRC");
    if (value) {
        USE_XRC = atoi(value);
        if (USE_XRC) {
            /* Enable on-demand */
            threshold = 1;
        }
    }
#endif /* _ENABLE_XRC_ */
#ifdef _ENABLE_UD_
    if ((value = getenv("MV2_HYBRID_ENABLE_THRESHOLD")) != NULL) {
        rdma_hybrid_enable_threshold = atoi(value);
    }
    if ((value = getenv("MV2_USE_UD_HYBRID")) != NULL) {
        rdma_enable_hybrid = atoi(value);
    }
    if (pg_size < rdma_hybrid_enable_threshold) {
        rdma_enable_hybrid = 0;
    }
#endif

    if (pg_size > threshold || dpm 
#ifdef _ENABLE_XRC_
            || USE_XRC
#endif /* _ENABLE_XRC_ */
#ifdef _ENABLE_UD_
            || rdma_enable_hybrid
#endif
            )
    {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND;
	    MPIDI_CH3I_Process.num_conn = 0;
    }
    else
    {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_BASIC_ALL2ALL;
    }

#if defined(RDMA_CM)
    if (((value = getenv("MV2_USE_RDMA_CM")) != NULL
        || (value = getenv("MV2_USE_IWARP_MODE")) != NULL)
        && atoi(value) && ! dpm) {
        MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_RDMA_CM;
    } else {
        rdma_cm_get_hca_type(&MPIDI_CH3I_RDMA_Process);
    }
#endif /* defined(RDMA_CM) */

    MPIDI_PG_GetConnKVSname(&pg->ch.kvs_name);

#if defined(CKPT)
#if defined(RDMA_CM)
    if (MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_RDMA_CM)
    {
        MPIU_Error_printf("Error: Checkpointing does not work with RDMA CM.\n"
            "Please configure and compile MVAPICH2 with checkpointing disabled "
            "or without support for RDMA CM.\n");
	MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail");
    }
#endif /* defined(RDMA_CM) */

    // Always use CM_ON_DEMAND for Checkpoint/Restart and Migration
    MPIDI_CH3I_Process.cm_type = MPIDI_CH3I_CM_ON_DEMAND;

#endif /* defined(CKPT) */
#ifdef _ENABLE_UD_
    if (rdma_enable_hybrid) {
        MPIU_Assert(MPIDI_CH3I_Process.cm_type == MPIDI_CH3I_CM_ON_DEMAND);
    }
#endif

    /* save my vc_ptr for easy access */
    MPIDI_PG_Get_vc(pg, pg_rank, &MPIDI_CH3I_Process.vc);

    /* Initialize Progress Engine */
    if ((mpi_errno = MPIDI_CH3I_Progress_init()))
    {
        MPIU_ERR_POP(mpi_errno);
    }

    /* Check for SMP only */
    MPIDI_CH3I_set_smp_only();

    if (!SMP_ONLY) 
    {
        switch (MPIDI_CH3I_Process.cm_type)
        {
            /* allocate rmda memory and set up the queues */
            case MPIDI_CH3I_CM_ON_DEMAND:
#if defined(RDMA_CM)
            case MPIDI_CH3I_CM_RDMA_CM:
#endif /* defined(RDMA_CM) */
                mpi_errno = MPIDI_CH3I_CM_Init(pg, pg_rank, &conn_info);
                if (mpi_errno != MPI_SUCCESS)
                {
                    MPIU_ERR_POP(mpi_errno);
                }
                break;
            default:
                /*call old init to setup all connections*/
                if ((mpi_errno = MPIDI_CH3I_RDMA_init(pg, pg_rank)) != MPI_SUCCESS)
                {
                    MPIU_ERR_POP(mpi_errno);
                }

                /* All vc should be connected */
                for (p = 0; p < pg_size; ++p)
                {
                    MPIDI_PG_Get_vc(pg, p, &vc);
                    vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE;
                }
                break;
        }
    }
#if defined(CKPT)
#if defined(DISABLE_PTMALLOC)
    MPIU_Error_printf("Error: Checkpointing does not work without registration "
        "caching enabled.\nPlease configure and compile MVAPICH2 without checkpointing "
        " or enable registration caching.\n");
    MPIU_ERR_SETFATALANDJUMP(mpi_errno, MPI_ERR_OTHER, "**fail");
#endif /* defined(DISABLE_PTMALLOC) */

    if ((mpi_errno = MPIDI_CH3I_CR_Init(pg, pg_rank, pg_size)))
    {
        MPIU_ERR_POP(mpi_errno);
    }
#endif /* defined(CKPT) */

    /* set connection info for dynamic process management */
    if (conn_info && dpm) {
        mpi_errno = MPIDI_PG_SetConnInfo(pg_rank, (const char *)conn_info);
        if (mpi_errno != MPI_SUCCESS)
        {
            MPIU_ERR_POP(mpi_errno);
        }
    }
    MPIU_Free(conn_info);

    /* Initialize the smp channel */
    if ((mpi_errno = MPIDI_CH3I_SMP_init(pg)))
    {
        MPIU_ERR_POP(mpi_errno);
    }

    if (SMP_INIT)
    {
        for (p = 0; p < pg_size; ++p)
        {
            MPIDI_PG_Get_vc(pg, p, &vc);
	    /* Mark the SMP VC as Idle */
	    if (vc->smp.local_nodes >= 0)
            {
                vc->ch.state = MPIDI_CH3I_VC_STATE_IDLE;
                if (SMP_ONLY) {
                    MPIDI_CH3I_SMP_Init_VC(vc);
                }
#ifdef _ENABLE_XRC_
                VC_XST_SET (vc, XF_SMP_VC);
#endif
            }
        }
    } else {
        extern int enable_shmem_collectives;
        enable_shmem_collectives = SMP_INIT;
    }

    /* Set the eager max msg size now that we know SMP and RDMA are initialized.
     * The max message size is also set during VC initialization, but the state
     * of SMP is unknown at that time.
     */
    for (p = 0; p < pg_size; ++p)
    {
        MPIDI_PG_Get_vc(pg, p, &vc);
        vc->eager_max_msg_sz = MPIDI_CH3_EAGER_MAX_MSG_SIZE(vc);
    }
    
fn_exit:
    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_INIT);
    return mpi_errno;

fn_fail:
    goto fn_exit;
}
예제 #9
0
int MPIDI_CH3I_MRAILI_Cq_poll(vbuf **vbuf_handle, 
        MPIDI_VC_t * vc_req, int receiving, int is_blocking)
{
    int ne, ret;
    MPIDI_VC_t *vc = NULL;
    struct ibv_wc wc;
    vbuf *v;
    int i = 0;
    int cq_choice = 0;
    int num_cqs = 0;
    int needed;
    int is_send_completion;
    int type = T_CHANNEL_NO_ARRIVE;
    static unsigned long nspin = 0;
    struct ibv_cq *ev_cq; 
    struct ibv_cq *chosen_cq; 
    void *ev_ctx;
    MPIDI_CH3I_MRAILI_Pkt_comm_header *p;

    int myrank;
    MPIDI_STATE_DECL(MPID_GEN2_MRAILI_CQ_POLL);
    MPIDI_FUNC_ENTER(MPID_GEN2_MRAILI_CQ_POLL);
    myrank = PMI_Get_rank(&myrank);

    *vbuf_handle = NULL;
    needed = 0;

    if (!receiving && !vc_req) {
        type = MPIDI_CH3I_MRAILI_Test_pkt(vbuf_handle);
        if (type == T_CHANNEL_EXACT_ARRIVE 
                || type == T_CHANNEL_CONTROL_MSG_ARRIVE)
            goto fn_exit;
    }

    if (rdma_iwarp_use_multiple_cq &&
        MV2_IS_CHELSIO_IWARP_CARD(MPIDI_CH3I_RDMA_Process.hca_type) &&
        (MPIDI_CH3I_RDMA_Process.cluster_size != VERY_SMALL_CLUSTER)) {
        num_cqs = 2;
    } else {
        num_cqs = 1;
    }

    for (; i < rdma_num_hcas; ++i) {
        for (cq_choice = 0; cq_choice < num_cqs; ++cq_choice) {
            if (1 == num_cqs) {
	            chosen_cq = MPIDI_CH3I_RDMA_Process.cq_hndl[i];
	        } else {
	            if (0 == cq_choice) {
	                chosen_cq = MPIDI_CH3I_RDMA_Process.send_cq_hndl[i];
                } else {
	                chosen_cq = MPIDI_CH3I_RDMA_Process.recv_cq_hndl[i];
                }
	        }
	        ne = ibv_poll_cq(chosen_cq, 1, &wc);
	        if (ne < 0 ) {
	            ibv_error_abort(IBV_RETURN_ERR, "Fail to poll cq\n");
	        } else if (ne) {         
	            v = (vbuf *) ((uintptr_t) wc.wr_id);
	
	            vc = (MPIDI_VC_t *) (v->vc);
                cq_poll_completion = 1;
	
	            if (wc.status != IBV_WC_SUCCESS) {
	                if (wc.opcode == IBV_WC_SEND ||
	                    wc.opcode == IBV_WC_RDMA_WRITE ) {
			    		fprintf(stderr, "[%d->%d] send desc error, wc_opcode=%d\n",myrank, vc->pg_rank, wc.opcode );
	                } else {
			    		fprintf(stderr, "[%d<-%d] recv desc error, wc_opcode=%d\n",myrank, vc->pg_rank, wc.opcode);
					}
                    fprintf(stderr, "[%d->%d] wc.status=%d, wc.wr_id=%p, wc.opcode=%d, vbuf->phead->type=%d = %s\n", 
                           myrank, vc->pg_rank, wc.status, v, 
			               wc.opcode,((MPIDI_CH3I_MRAILI_Pkt_comm_header*)v->pheader)->type, 
           			MPIDI_CH3_Pkt_type_to_string[((MPIDI_CH3I_MRAILI_Pkt_comm_header*)v->pheader)->type] );
	
	                ibv_va_error_abort(IBV_STATUS_ERR,
	                        "[] Got completion with error %d, "
	                        "vendor code=0x%x, dest rank=%d\n",
	                        wc.status,    
	                        wc.vendor_err, 
	                        ((MPIDI_VC_t *)v->vc)->pg_rank
	                        );
	            }

                is_send_completion = (wc.opcode == IBV_WC_SEND
                    || wc.opcode == IBV_WC_RDMA_WRITE
                    || wc.opcode == IBV_WC_RDMA_READ);
	
                if (2 == num_cqs) {
    	            if (0 == cq_choice) {
    	                if (MPIDI_CH3I_RDMA_Process.global_used_send_cq) {
                             MPIDI_CH3I_RDMA_Process.global_used_send_cq--;
    	                } else {
                            DEBUG_PRINT("[%d] Possibly received a duplicate \
                                       send completion event \n", 
                                       MPIDI_Process.my_pg_rank);
    	                }
    	            } 
                } else {
                       if(is_send_completion && 
                              (MPIDI_CH3I_RDMA_Process.global_used_send_cq > 0)) {
                             MPIDI_CH3I_RDMA_Process.global_used_send_cq--;
                       } else {
                            DEBUG_PRINT("[%d] Possibly received a duplicate \
                                       send completion event \n",
                                       MPIDI_Process.my_pg_rank);
                       }     
                }
 
	            if(!is_send_completion && (MPIDI_CH3I_RDMA_Process.has_srq
                                    || v->transport == IB_TRANSPORT_UD)) {
                    SET_PKT_LEN_HEADER(v, wc);
                    SET_PKT_HEADER_OFFSET(v);
                    p = v->pheader;
#ifdef _ENABLE_UD_
                    MPIDI_PG_Get_vc(MPIDI_Process.my_pg, p->src.rank, &vc);
#else
                    vc = (MPIDI_VC_t *)p->src.vc_addr;
#endif
                    v->vc = vc;
                    v->rail = p->rail;
	            } 
	            
	            /* get the VC and increase its wqe */
	            if (is_send_completion) {
#ifdef _ENABLE_UD_
                if (rdma_enable_hybrid) {
                    if(v->transport == IB_TRANSPORT_RC  || 
                        (v->pheader && IS_CNTL_MSG(v->pheader))) {
                        MRAILI_Process_send(v);
                    }
                    if (v->transport == IB_TRANSPORT_UD) {
                        mv2_ud_update_send_credits(v);
                    }
                    if(v->transport == IB_TRANSPORT_UD &&
                            v->flags & UD_VBUF_SEND_INPROGRESS) {
                        v->flags &= ~(UD_VBUF_SEND_INPROGRESS);
                        if (v->flags & UD_VBUF_FREE_PENIDING) {
                            v->flags &= ~(UD_VBUF_FREE_PENIDING);
                            MRAILI_Release_vbuf(v);
                        }
                    }
                }
                else
#endif
                {
	                MRAILI_Process_send(v);
                }
                    type = T_CHANNEL_NO_ARRIVE;
                    *vbuf_handle = NULL;
	            } else if ((NULL == vc_req || vc_req == vc) && 0 == receiving ){
	                /* In this case, we should return the vbuf 
	                 * any way if it is next expected*/
	                int seqnum = GetSeqNumVbuf(v);
	                *vbuf_handle = v; 
                    SET_PKT_LEN_HEADER(v, wc);
                    SET_PKT_HEADER_OFFSET(v);
                    v->seqnum =  seqnum;
                    p = v->pheader;
                    PRINT_DEBUG(DEBUG_UD_verbose>1,"Received from rank:%d seqnum :%d ack:%d size:%d type:%d trasport :%d \n",vc->pg_rank, v->seqnum, p->acknum, v->content_size, p->type, v->transport);
#ifdef _ENABLE_UD_
                    if (v->transport == IB_TRANSPORT_UD)
                    {
                        mv2_ud_ctx_t *ud_ctx = 
                            MPIDI_CH3I_RDMA_Process.ud_rails[i];
                        --ud_ctx->num_recvs_posted;
                        if(ud_ctx->num_recvs_posted < ud_ctx->credit_preserve) {
                            ud_ctx->num_recvs_posted += mv2_post_ud_recv_buffers(
                                    (rdma_default_max_ud_recv_wqe - ud_ctx->num_recvs_posted), ud_ctx);
                        }
                    }
                    else
#endif 
                    if (MPIDI_CH3I_RDMA_Process.has_srq) {
	                    pthread_spin_lock(&MPIDI_CH3I_RDMA_Process.
	                            srq_post_spin_lock);
	
	                    if(v->padding == NORMAL_VBUF_FLAG) {
	                        /* Can only be from SRQ path */
	                        --MPIDI_CH3I_RDMA_Process.posted_bufs[i];
	                    }
	
	                    if(MPIDI_CH3I_RDMA_Process.posted_bufs[i] <= 
	                            rdma_credit_preserve) {
	                        /* Need to post more to the SRQ */
	                        MPIDI_CH3I_RDMA_Process.posted_bufs[i] +=
	                            viadev_post_srq_buffers(viadev_srq_fill_size - 
	                                MPIDI_CH3I_RDMA_Process.posted_bufs[i], i);
	
	                    }
	
	                    pthread_spin_unlock(&MPIDI_CH3I_RDMA_Process.
	                            srq_post_spin_lock);
	
	                    /* Check if we need to release the SRQ limit thread */
	                    if (MPIDI_CH3I_RDMA_Process.
	                            srq_zero_post_counter[i] >= 1) {
	                        pthread_mutex_lock(
	                                &MPIDI_CH3I_RDMA_Process.
	                                srq_post_mutex_lock[i]);
	                        MPIDI_CH3I_RDMA_Process.srq_zero_post_counter[i] = 0;
	                        pthread_cond_signal(&MPIDI_CH3I_RDMA_Process.
	                                srq_post_cond[i]);
	                        pthread_mutex_unlock(
	                                &MPIDI_CH3I_RDMA_Process.
	                                srq_post_mutex_lock[i]);
	                    }
	
	                }
	                else
	                {
	                    --vc->mrail.srp.credits[v->rail].preposts;
	
	                    needed = rdma_prepost_depth + rdma_prepost_noop_extra
	                             + MIN(rdma_prepost_rendezvous_extra,
	                                   vc->mrail.srp.credits[v->rail].
	                                   rendezvous_packets_expected);
	                }
#ifdef _ENABLE_UD_
                    if (rdma_enable_hybrid){
                        if (IS_CNTL_MSG(p)){
                            type = T_CHANNEL_CONTROL_MSG_ARRIVE;
                        } else {
                            type = T_CHANNEL_HYBRID_MSG_ARRIVE;
                        }
                    }
                    else
#endif
                    {
                        if (seqnum == PKT_NO_SEQ_NUM){
                            type = T_CHANNEL_CONTROL_MSG_ARRIVE;
                        } else if (seqnum == vc->mrail.seqnum_next_torecv) {
                            vc->mrail.seqnum_next_toack = vc->mrail.seqnum_next_torecv;
                            ++vc->mrail.seqnum_next_torecv;
                            type = T_CHANNEL_EXACT_ARRIVE;
                            DEBUG_PRINT("[channel manager] get one with exact seqnum\n");
                        } else {
                            type = T_CHANNEL_OUT_OF_ORDER_ARRIVE;
                            VQUEUE_ENQUEUE(&vc->mrail.cmanager, 
                                    INDEX_GLOBAL(&vc->mrail.cmanager, v->rail),
                                    v);
                            DEBUG_PRINT("get recv %d (%d)\n", seqnum, vc->mrail.seqnum_next_torecv);
                        }
                    }
	                if (!MPIDI_CH3I_RDMA_Process.has_srq && v->transport != IB_TRANSPORT_UD) {
                          
	                    if (PKT_IS_NOOP(v)) {
	                        PREPOST_VBUF_RECV(vc, v->rail);
	                        /* noops don't count for credits */
	                        --vc->mrail.srp.credits[v->rail].local_credit;
	                    } 
	                    else if ((vc->mrail.srp.credits[v->rail].preposts 
                                 < rdma_rq_size) &&
	                             (vc->mrail.srp.credits[v->rail].preposts + 
	                             rdma_prepost_threshold < needed))
	                    {
	                        do {
	                            PREPOST_VBUF_RECV(vc, v->rail);
	                        } while (vc->mrail.srp.credits[v->rail].preposts 
                                     < rdma_rq_size &&
	                                 vc->mrail.srp.credits[v->rail].preposts 
                                     < needed);
	                    }
	
	                    MRAILI_Send_noop_if_needed(vc, v->rail);
	                }
	
	                if (type == T_CHANNEL_CONTROL_MSG_ARRIVE || 
	                        type == T_CHANNEL_EXACT_ARRIVE ||
                            type == T_CHANNEL_HYBRID_MSG_ARRIVE || 
	                        type == T_CHANNEL_OUT_OF_ORDER_ARRIVE) {
	                    goto fn_exit;
	                }
	            } else {
	                /* Commenting out the assert - possible coding error
	                 * MPIU_Assert(0);
	                 */
	                /* Now since this is not the packet we want, we have to 
                     * enqueue it */
	                type = T_CHANNEL_OUT_OF_ORDER_ARRIVE;
	                *vbuf_handle = NULL;
	                v->content_size = wc.byte_len;
	                VQUEUE_ENQUEUE(&vc->mrail.cmanager,
	                        INDEX_GLOBAL(&vc->mrail.cmanager, v->rail),
	                        v);
                    if (v->transport != IB_TRANSPORT_UD) {
                        if (MPIDI_CH3I_RDMA_Process.has_srq) {
                            pthread_spin_lock(&MPIDI_CH3I_RDMA_Process.srq_post_spin_lock);

                            if(v->padding == NORMAL_VBUF_FLAG ) {
                                /* Can only be from SRQ path */
                                --MPIDI_CH3I_RDMA_Process.posted_bufs[i];
                            }

                            if(MPIDI_CH3I_RDMA_Process.posted_bufs[i] <= rdma_credit_preserve) {
                                /* Need to post more to the SRQ */
                                MPIDI_CH3I_RDMA_Process.posted_bufs[i] +=
                                    viadev_post_srq_buffers(viadev_srq_fill_size - 
                                            MPIDI_CH3I_RDMA_Process.posted_bufs[i], i);

                            }

                            pthread_spin_unlock(&MPIDI_CH3I_RDMA_Process.
                                    srq_post_spin_lock);
                        } else {
                            --vc->mrail.srp.credits[v->rail].preposts;

                            needed = rdma_prepost_depth + rdma_prepost_noop_extra
                                + MIN(rdma_prepost_rendezvous_extra,
                                        vc->mrail.srp.credits[v->rail].
                                        rendezvous_packets_expected);

                            if (PKT_IS_NOOP(v)) {
                                PREPOST_VBUF_RECV(vc, v->rail);
                                --vc->mrail.srp.credits[v->rail].local_credit;
                            }
                            else if ((vc->mrail.srp.credits[v->rail].preposts 
                                        < rdma_rq_size) &&
                                    (vc->mrail.srp.credits[v->rail].preposts + 
                                     rdma_prepost_threshold < needed)) {
                                do {
                                    PREPOST_VBUF_RECV(vc, v->rail);
                                } while (vc->mrail.srp.credits[v->rail].preposts 
                                        < rdma_rq_size && 
                                        vc->mrail.srp.credits[v->rail].preposts 
                                        < needed);
                            }
                            MRAILI_Send_noop_if_needed(vc, v->rail);
                        }
                    }
	            }
	        } else {
	            *vbuf_handle = NULL;
	            type = T_CHANNEL_NO_ARRIVE;
	            ++nspin;
	
	            /* Blocking mode progress */
	            if(rdma_use_blocking && is_blocking && nspin >= rdma_blocking_spin_count_threshold){
	                /* Okay ... spun long enough, now time to go to sleep! */
	
	#if (MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE)
	                MPIU_THREAD_CHECK_BEGIN
	                MPID_Thread_mutex_unlock(&MPIR_ThreadInfo.global_mutex);
	                MPIU_THREAD_CHECK_END
	#endif
	                do {    
	                    ret = ibv_get_cq_event(
	                            MPIDI_CH3I_RDMA_Process.comp_channel[i], 
	                            &ev_cq, &ev_ctx);
	                    if (ret && errno != EINTR) {
	                        ibv_va_error_abort(IBV_RETURN_ERR,
	                                "Failed to get cq event: %d\n", ret);
	                    }       
	                } while (ret && errno == EINTR); 
	#if (MPICH_THREAD_LEVEL == MPI_THREAD_MULTIPLE)
	                MPIU_THREAD_CHECK_BEGIN
	                MPID_Thread_mutex_lock(&MPIR_ThreadInfo.global_mutex);
	                MPIU_THREAD_CHECK_END
	#endif
	
                    if (num_cqs == 1) {
		                if (ev_cq != MPIDI_CH3I_RDMA_Process.cq_hndl[i]) {
		                    ibv_error_abort(IBV_STATUS_ERR,
                                             "Event in unknown CQ\n");
		                }
		
	                   ibv_ack_cq_events(MPIDI_CH3I_RDMA_Process.cq_hndl[i], 1);
		
		                if (ibv_req_notify_cq(
                                    MPIDI_CH3I_RDMA_Process.cq_hndl[i], 0)) {
		                    ibv_error_abort(IBV_RETURN_ERR,
		                            "Couldn't request for CQ notification\n");
		                }
                    } else {
		                if (ev_cq == MPIDI_CH3I_RDMA_Process.send_cq_hndl[i]) {
	                        ibv_ack_cq_events(
                                    MPIDI_CH3I_RDMA_Process.send_cq_hndl[i], 1);
		
		                    if (ibv_req_notify_cq(
                                  MPIDI_CH3I_RDMA_Process.send_cq_hndl[i], 0)) {
		                        ibv_error_abort(IBV_RETURN_ERR,
		                           "Couldn't request for CQ notification\n");
		                    }
                        } else if (ev_cq == 
                                    MPIDI_CH3I_RDMA_Process.recv_cq_hndl[i]) {
	                        ibv_ack_cq_events(
                                    MPIDI_CH3I_RDMA_Process.recv_cq_hndl[i], 1);
		
		                    if (ibv_req_notify_cq(
                                  MPIDI_CH3I_RDMA_Process.recv_cq_hndl[i], 0)) {
		                        ibv_error_abort(IBV_RETURN_ERR,
		                           "Couldn't request for CQ notification\n");
		                    }
		                } else {
		                   ibv_error_abort(IBV_STATUS_ERR,
                                             "Event in unknown CQ\n");
                        }
                    }
	                nspin = 0;
	            }
	        }
        }
    }