void mca_btl_wv_frag_init(ompi_free_list_item_t* item, void* ctx) { mca_btl_wv_frag_init_data_t* init_data = (mca_btl_wv_frag_init_data_t *) ctx; mca_btl_wv_frag_t *frag = to_base_frag(item); if(MCA_BTL_WV_FRAG_RECV == frag->type) { to_recv_frag(frag)->qp_idx = init_data->order; to_com_frag(frag)->sg_entry.Length = mca_btl_wv_component.qp_infos[init_data->order].size + sizeof(mca_btl_wv_header_t) + sizeof(mca_btl_wv_header_coalesced_t) + sizeof(mca_btl_wv_control_header_t); } if(MCA_BTL_WV_FRAG_SEND == frag->type) to_send_frag(frag)->qp_idx = init_data->order; frag->list = init_data->list; }
/* Setup eager RDMA buffers and notify the remote endpoint*/ void mca_btl_openib_endpoint_connect_eager_rdma( mca_btl_openib_endpoint_t* endpoint) { mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; char *buf; mca_btl_openib_recv_frag_t *headers_buf; int i; uint32_t flag = MCA_MPOOL_FLAGS_CACHE_BYPASS; /* Set local rdma pointer to 1 temporarily so other threads will not try * to enter the function */ if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) return; headers_buf = (mca_btl_openib_recv_frag_t*) malloc(sizeof(mca_btl_openib_recv_frag_t) * mca_btl_openib_component.eager_rdma_num); if(NULL == headers_buf) goto unlock_rdma_local; #if HAVE_DECL_IBV_ACCESS_SO /* Solaris implements the Relaxed Ordering feature defined in the PCI Specification. With this in mind any memory region which relies on a buffer being written in a specific order, for example the eager rdma connections created in this routinue, must set a strong order flag when registering the memory for rdma operations. The following flag will be interpreted and the appropriate steps will be taken when the memory is registered in openib_reg_mr(). */ flag |= MCA_MPOOL_FLAGS_SO_MEM; #endif buf = (char *) openib_btl->super.btl_mpool->mpool_alloc(openib_btl->super.btl_mpool, openib_btl->eager_rdma_frag_size * mca_btl_openib_component.eager_rdma_num, mca_btl_openib_component.buffer_alignment, flag, (mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg); if(!buf) goto free_headers_buf; buf = buf + openib_btl->eager_rdma_frag_size - sizeof(mca_btl_openib_footer_t) - openib_btl->super.btl_eager_limit - sizeof(mca_btl_openib_header_t); for(i = 0; i < mca_btl_openib_component.eager_rdma_num; i++) { opal_free_list_item_t *item; mca_btl_openib_recv_frag_t * frag; mca_btl_openib_frag_init_data_t init_data; item = (opal_free_list_item_t*)&headers_buf[i]; item->registration = (mca_mpool_base_registration_t *)endpoint->eager_rdma_local.reg; item->ptr = buf + i * openib_btl->eager_rdma_frag_size; OBJ_CONSTRUCT(item, mca_btl_openib_recv_frag_t); init_data.order = mca_btl_openib_component.credits_qp; init_data.list = NULL; mca_btl_openib_frag_init(item, &init_data); frag = to_recv_frag(item); to_base_frag(frag)->type = MCA_BTL_OPENIB_FRAG_EAGER_RDMA; to_com_frag(frag)->endpoint = endpoint; frag->ftr = (mca_btl_openib_footer_t*) ((char*)to_base_frag(frag)->segment.seg_addr.pval + mca_btl_openib_component.eager_limit); MCA_BTL_OPENIB_RDMA_MAKE_REMOTE(frag->ftr); } endpoint->eager_rdma_local.frags = headers_buf; endpoint->eager_rdma_local.rd_win = mca_btl_openib_component.eager_rdma_num >> 2; endpoint->eager_rdma_local.rd_win = endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1; /* set local rdma pointer to real value */ (void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, (void*)1, buf); if(mca_btl_openib_endpoint_send_eager_rdma(endpoint) == OPAL_SUCCESS) { mca_btl_openib_device_t *device = endpoint->endpoint_btl->device; mca_btl_openib_endpoint_t **p; OBJ_RETAIN(endpoint); assert(((opal_object_t*)endpoint)->obj_reference_count == 2); do { p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count]; } while(!opal_atomic_cmpset_ptr(p, NULL, endpoint)); OPAL_THREAD_ADD32(&openib_btl->eager_rdma_channels, 1); /* from this point progress function starts to poll new buffer */ OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1); return; } openib_btl->super.btl_mpool->mpool_free(openib_btl->super.btl_mpool, buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg); free_headers_buf: free(headers_buf); unlock_rdma_local: /* set local rdma pointer back to zero. Will retry later */ (void)opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, endpoint->eager_rdma_local.base.pval, NULL); endpoint->eager_rdma_local.frags = NULL; }
/* Setup eager RDMA buffers and notify the remote endpoint*/ void mca_btl_wv_endpoint_connect_eager_rdma( mca_btl_wv_endpoint_t* endpoint) { mca_btl_wv_module_t* wv_btl = endpoint->endpoint_btl; char *buf; mca_btl_wv_recv_frag_t *headers_buf; int i; /* Set local rdma pointer to 1 temporarily so other threads will not try * to enter the function */ if(!opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, NULL, (void*)1)) return; headers_buf = (mca_btl_wv_recv_frag_t*) malloc(sizeof(mca_btl_wv_recv_frag_t) * mca_btl_wv_component.eager_rdma_num); if(NULL == headers_buf) goto unlock_rdma_local; buf = (char *) wv_btl->super.btl_mpool->mpool_alloc(wv_btl->super.btl_mpool, wv_btl->eager_rdma_frag_size * mca_btl_wv_component.eager_rdma_num, mca_btl_wv_component.buffer_alignment, MCA_MPOOL_FLAGS_CACHE_BYPASS, (mca_mpool_base_registration_t**)&endpoint->eager_rdma_local.reg); if(!buf) goto free_headers_buf; buf = buf + wv_btl->eager_rdma_frag_size - sizeof(mca_btl_wv_footer_t) - wv_btl->super.btl_eager_limit - sizeof(mca_btl_wv_header_t); for(i = 0; i < mca_btl_wv_component.eager_rdma_num; i++) { ompi_free_list_item_t *item; mca_btl_wv_recv_frag_t * frag; mca_btl_wv_frag_init_data_t init_data; item = (ompi_free_list_item_t*)&headers_buf[i]; item->registration = (mca_mpool_base_registration_t *)endpoint->eager_rdma_local.reg; item->ptr = buf + i * wv_btl->eager_rdma_frag_size; OBJ_CONSTRUCT(item, mca_btl_wv_recv_frag_t); init_data.order = mca_btl_wv_component.credits_qp; init_data.list = NULL; mca_btl_wv_frag_init(item, &init_data); frag = to_recv_frag(item); to_base_frag(frag)->type = MCA_BTL_WV_FRAG_EAGER_RDMA; to_com_frag(frag)->endpoint = endpoint; frag->ftr = (mca_btl_wv_footer_t*) ((char*)to_base_frag(frag)->segment.base.seg_addr.pval + mca_btl_wv_component.eager_limit); MCA_BTL_WV_RDMA_MAKE_REMOTE(frag->ftr); } endpoint->eager_rdma_local.frags = headers_buf; endpoint->eager_rdma_local.rd_win = mca_btl_wv_component.eager_rdma_num >> 2; endpoint->eager_rdma_local.rd_win = endpoint->eager_rdma_local.rd_win?endpoint->eager_rdma_local.rd_win:1; /* set local rdma pointer to real value */ opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, (void*)1, buf); if(mca_btl_wv_endpoint_send_eager_rdma(endpoint) == OMPI_SUCCESS) { mca_btl_wv_device_t *device = endpoint->endpoint_btl->device; mca_btl_wv_endpoint_t **p; OBJ_RETAIN(endpoint); assert(((opal_object_t*)endpoint)->obj_reference_count == 2); do { p = &device->eager_rdma_buffers[device->eager_rdma_buffers_count]; } while(!opal_atomic_cmpset_ptr(p, NULL, endpoint)); OPAL_THREAD_ADD32(&wv_btl->eager_rdma_channels, 1); /* from this point progress function starts to poll new buffer */ OPAL_THREAD_ADD32(&device->eager_rdma_buffers_count, 1); return; } wv_btl->super.btl_mpool->mpool_free(wv_btl->super.btl_mpool, buf, (mca_mpool_base_registration_t*)endpoint->eager_rdma_local.reg); free_headers_buf: free(headers_buf); unlock_rdma_local: /* set local rdma pointer back to zero. Will retry later */ opal_atomic_cmpset_ptr(&endpoint->eager_rdma_local.base.pval, endpoint->eager_rdma_local.base.pval, NULL); endpoint->eager_rdma_local.frags = NULL; }