static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) { int rc; rc = mca_btl_ugni_ep_connect_rdma (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } BTL_VERBOSE(("initiaiting connection to remote peer with address: %u id: %u proc: %p", ep->common->ep_rem_addr, ep->common->ep_rem_id, (void *)ep->peer_proc)); /* bind endpoint to remote address */ /* we bind two endpoints to seperate out local smsg completion and local fma completion */ rc = opal_common_ugni_ep_create (ep->common, ep->btl->smsg_local_cq, &ep->smsg_ep_handle); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } /* build connection data */ rc = mca_btl_ugni_ep_smsg_get_mbox (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { return rc; } ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING; memset (&ep->remote_attr, 0, sizeof (ep->remote_attr)); BTL_VERBOSE(("btl/ugni connection to remote peer initiated")); return OPAL_SUCCESS; }
mca_btl_base_descriptor_t * mca_btl_scif_alloc(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, uint8_t order, size_t size, uint32_t flags) { mca_btl_scif_base_frag_t *frag = NULL; BTL_VERBOSE(("allocating fragment of size: %u", (unsigned int)size)); if (size <= mca_btl_scif_module.super.btl_eager_limit) { (void) MCA_BTL_SCIF_FRAG_ALLOC_EAGER(endpoint, frag); } if (OPAL_UNLIKELY(NULL == frag)) { return NULL; } BTL_VERBOSE(("btl/scif_module allocated frag of size: %u, flags: %x. frag = %p", (unsigned int)size, flags, (void *) frag)); frag->base.des_flags = flags; frag->base.order = order; frag->base.des_segments = frag->segments; frag->base.des_segment_count = 1; frag->segments[0].seg_len = size; return &frag->base; }
static inline int mca_btl_scif_ep_get_buffer (mca_btl_base_endpoint_t *ep) { int rc; rc = posix_memalign ((void **) &ep->recv_buffer.buffer, opal_getpagesize(), mca_btl_scif_component.segment_size); if (0 > rc) { return OPAL_ERR_OUT_OF_RESOURCE; } memset (ep->recv_buffer.buffer, 0, mca_btl_scif_component.segment_size); ep->recv_buffer.scif_offset = scif_register (ep->scif_epd, ep->recv_buffer.buffer, mca_btl_scif_component.segment_size, 0, SCIF_PROT_READ | SCIF_PROT_WRITE, 0); if (SCIF_REGISTER_FAILED == ep->recv_buffer.scif_offset) { BTL_VERBOSE(("failed to register a scif buffer of size %d. errno = %d", mca_btl_scif_component.segment_size, errno)); free (ep->recv_buffer.buffer); ep->recv_buffer.buffer = NULL; return OPAL_ERROR; } ep->recv_buffer.startp = (uint32_t *) ep->recv_buffer.buffer; ep->recv_buffer.endp = ep->recv_buffer.startp + 1; ep->recv_buffer.startp[0] = ep->recv_buffer.endp[0] = 64; BTL_VERBOSE(("allocated buffer of size %d bytes. with scif registration %lu", mca_btl_scif_component.segment_size, (unsigned long) ep->recv_buffer.scif_offset)); return OPAL_SUCCESS; }
static void *mca_btl_scif_connect_accept (void *arg) { struct scif_pollepd pollepd = {.epd = mca_btl_scif_module.scif_fd, .events = SCIF_POLLIN, .revents = 0}; int rc; BTL_VERBOSE(("btl/scif: listening for new connections")); /* listen for connections */ while (1) { pollepd.revents = 0; rc = scif_poll (&pollepd, 1, -1); if (1 == rc) { if (SCIF_POLLIN != pollepd.revents) { break; } rc = mca_btl_scif_ep_connect_start_passive (); if (OMPI_SUCCESS != rc) { BTL_VERBOSE(("btl/scif: error accepting scif connection")); continue; } } else { break; } } BTL_VERBOSE(("btl/scif: stopped listening for new connections")); return NULL; } int mca_btl_scif_del_procs (struct mca_btl_base_module_t *btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers) { /* do nothing for now */ return OMPI_SUCCESS; } static int scif_dereg_mem (void *reg_data, mca_mpool_base_registration_t *reg) { mca_btl_scif_reg_t *scif_reg = (mca_btl_scif_reg_t *)reg; size_t size = (size_t)((uintptr_t) reg->bound - (uintptr_t) reg->base); int i; /* register the fragment with all connected endpoints */ for (i = 0 ; i < (int) mca_btl_scif_module.endpoint_count ; ++i) { if ((off_t)-1 != scif_reg->registrations[i] && MCA_BTL_SCIF_EP_STATE_CONNECTED == mca_btl_scif_module.endpoints[i].state) { (void) scif_unregister(mca_btl_scif_module.endpoints[i].scif_epd, scif_reg->registrations[i], size); } } free (scif_reg->registrations); return OMPI_SUCCESS; }
/* send the eager rdma connect message to the remote endpoint */ static int mca_btl_openib_endpoint_send_eager_rdma( mca_btl_base_endpoint_t* endpoint) { mca_btl_openib_module_t* openib_btl = endpoint->endpoint_btl; mca_btl_openib_eager_rdma_header_t *rdma_hdr; mca_btl_openib_send_control_frag_t* frag; int rc; frag = alloc_control_frag(openib_btl); if(NULL == frag) { return -1; } to_base_frag(frag)->base.des_cbfunc = mca_btl_openib_endpoint_eager_rdma_connect_cb; to_base_frag(frag)->base.des_cbdata = NULL; to_base_frag(frag)->base.des_flags |= MCA_BTL_DES_FLAGS_PRIORITY|MCA_BTL_DES_SEND_ALWAYS_CALLBACK; to_base_frag(frag)->base.order = mca_btl_openib_component.credits_qp; to_base_frag(frag)->segment.seg_len = sizeof(mca_btl_openib_eager_rdma_header_t); to_com_frag(frag)->endpoint = endpoint; frag->hdr->tag = MCA_BTL_TAG_IB; rdma_hdr = (mca_btl_openib_eager_rdma_header_t*)to_base_frag(frag)->segment.seg_addr.pval; rdma_hdr->control.type = MCA_BTL_OPENIB_CONTROL_RDMA; rdma_hdr->rkey = endpoint->eager_rdma_local.reg->mr->rkey; rdma_hdr->rdma_start.lval = opal_ptr_ptol(endpoint->eager_rdma_local.base.pval); BTL_VERBOSE(("sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64 ", pval %p, ival %" PRIu32 " type %d and sizeof(rdma_hdr) %d\n", rdma_hdr->rkey, rdma_hdr->rdma_start.lval, rdma_hdr->rdma_start.pval, rdma_hdr->rdma_start.ival, rdma_hdr->control.type, (int) sizeof(mca_btl_openib_eager_rdma_header_t) )); if(endpoint->nbo) { BTL_OPENIB_EAGER_RDMA_CONTROL_HEADER_HTON((*rdma_hdr)); BTL_VERBOSE(("after HTON: sending rkey %" PRIu32 ", rdma_start.lval %" PRIx64 ", pval %p, ival %" PRIu32 "\n", rdma_hdr->rkey, rdma_hdr->rdma_start.lval, rdma_hdr->rdma_start.pval, rdma_hdr->rdma_start.ival )); } rc = mca_btl_openib_endpoint_send(endpoint, frag); if (OPAL_SUCCESS == rc || OPAL_ERR_RESOURCE_BUSY == rc) return OPAL_SUCCESS; MCA_BTL_IB_FRAG_RETURN(frag); BTL_ERROR(("Error sending RDMA buffer: %s", strerror(errno))); return rc; }
static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep); gni_return_t grc; int rc; BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, " "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, " "msg_maxsize = %d", ep->remote_attr->smsg_attr.msg_type, ep->remote_attr->smsg_attr.msg_buffer, ep->remote_attr->smsg_attr.buff_size, ep->remote_attr->smsg_attr.mem_hndl.qword1, ep->remote_attr->smsg_attr.mem_hndl.qword2, ep->remote_attr->smsg_attr.mbox_offset, ep->remote_attr->smsg_attr.mbox_maxcredit, ep->remote_attr->smsg_attr.msg_maxsize)); BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, " "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, " "msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer, ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1, ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset, ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize)); grc = GNI_SmsgInit (ep->smsg_ep_handle.gni_handle, &ep->mailbox->attr.smsg_attr, &ep->remote_attr->smsg_attr); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) { BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc)); return mca_btl_rc_ugni_to_opal (grc); } /* set the local event data to the local index and the remote event data to my * index on the remote peer. This makes lookup of endpoints on completion take * a single lookup in the endpoints array. we will not be able to change the * remote peer's index in the endpoint's array after this point. */ GNI_EpSetEventData (ep->smsg_ep_handle.gni_handle, ep->index, ep->remote_attr->index); ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl; ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED; (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, 1); /* send all pending messages */ BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list))); rc = mca_btl_ugni_progress_send_wait_list (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); if (false == ep->wait_listed) { opal_list_append (&ugni_module->ep_wait_list, &ep->super); ep->wait_listed = true; } OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); } free (ep->remote_attr); ep->remote_attr = NULL; return OPAL_SUCCESS; }
static inline int mca_btl_scif_ep_connect_start_active (mca_btl_base_endpoint_t *ep) { int rc = OPAL_SUCCESS; BTL_VERBOSE(("initiaiting connection to remote peer %d with port: %u on local scif node: %u", ep->peer_proc->proc_name.vpid, ep->port_id.port, ep->port_id.node)); opal_mutex_lock (&ep->lock); do { if (MCA_BTL_SCIF_EP_STATE_INIT != ep->state) { /* the accept thread has already finished this connection */ rc = OPAL_SUCCESS; break; } ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTING; ep->scif_epd = scif_open (); if (OPAL_UNLIKELY(SCIF_OPEN_FAILED == ep->scif_epd)) { BTL_VERBOSE(("error creating new scif endpoint")); rc = OPAL_ERROR; break; } rc = scif_connect (ep->scif_epd, &ep->port_id); if (OPAL_UNLIKELY(-1 == rc)) { /* the connection attempt failed. this could mean the peer is currently * processing connections. we will to try again later. */ BTL_VERBOSE(("error connecting to scif peer. %d", errno)); rc = OPAL_ERR_RESOURCE_BUSY; break; } rc = scif_send (ep->scif_epd, &OPAL_PROC_MY_NAME, sizeof (OPAL_PROC_MY_NAME), SCIF_SEND_BLOCK); if (OPAL_UNLIKELY(-1 == rc)) { BTL_VERBOSE(("error in scif_send")); rc = OPAL_ERROR; break; } /* build connection data */ rc = mca_btl_scif_ep_connect_finish (ep, false); } while (0); if (OPAL_SUCCESS != rc) { scif_close (ep->scif_epd); ep->scif_epd = -1; ep->state = MCA_BTL_SCIF_EP_STATE_INIT; } opal_mutex_unlock (&ep->lock); return rc; }
/* * Connect function. Start initiation of connections to a remote * peer. We send our Queue Pair information over the RML/OOB * communication mechanism. On completion of our send, a send * completion handler is called. */ static int xoob_module_start_connect(ompi_btl_openib_connect_base_module_t *cpc, mca_btl_base_endpoint_t *endpoint) { int rc = OMPI_SUCCESS; OPAL_THREAD_LOCK(&endpoint->ib_addr->addr_lock); switch (endpoint->ib_addr->status) { case MCA_BTL_IB_ADDR_CLOSED: BTL_VERBOSE(("The IB addr: sid %" PRIx64 " lid %d" "in MCA_BTL_IB_ADDR_CLOSED status," " sending ENDPOINT_XOOB_CONNECT_REQUEST\n", endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid)); if (OMPI_SUCCESS != (rc = xoob_send_qp_create(endpoint))) { break; } /* Send connection info over to remote endpoint */ endpoint->endpoint_state = MCA_BTL_IB_CONNECTING; endpoint->ib_addr->status = MCA_BTL_IB_ADDR_CONNECTING; if (OMPI_SUCCESS != (rc = xoob_send_connect_data(endpoint, ENDPOINT_XOOB_CONNECT_REQUEST))) { BTL_ERROR(("Error sending connect request, error code %d", rc)); } break; case MCA_BTL_IB_ADDR_CONNECTING: BTL_VERBOSE(("The IB addr: sid %" PRIx64 " lid %d" "in MCA_BTL_IB_ADDR_CONNECTING status," " Subscribing to this address\n", endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid)); /* some body already connectng to this machine, lets wait */ opal_list_append(&endpoint->ib_addr->pending_ep, &(endpoint->super)); endpoint->endpoint_state = MCA_BTL_IB_CONNECTING; break; case MCA_BTL_IB_ADDR_CONNECTED: /* so we have the send qp, we just need the recive site. * Send request for SRQ numbers */ BTL_VERBOSE(("The IB addr: sid %" PRIx64 " lid %d" "in MCA_BTL_IB_ADDR_CONNECTED status," " sending ENDPOINT_XOOB_CONNECT_XRC_REQUEST\n", endpoint->ib_addr->subnet_id,endpoint->ib_addr->lid)); endpoint->endpoint_state = MCA_BTL_IB_CONNECTING; if (OMPI_SUCCESS != (rc = xoob_send_connect_data(endpoint, ENDPOINT_XOOB_CONNECT_XRC_REQUEST))) { BTL_ERROR(("error sending xrc connect request, error code %d", rc)); } break; default : BTL_ERROR(("Invalid endpoint status %d", endpoint->ib_addr->status)); } OPAL_THREAD_UNLOCK(&endpoint->ib_addr->addr_lock); return rc; }
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device, mca_btl_ugni_cq_t *cq) { mca_btl_ugni_post_descriptor_t *post_desc[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP]; gni_cq_entry_t event_data[MCA_BTL_UGNI_COMPLETIONS_PER_LOOP]; int rc; rc = mca_btl_ugni_cq_get_completed_desc (device, cq, event_data, post_desc, MCA_BTL_UGNI_COMPLETIONS_PER_LOOP); if (0 >= rc) { return rc; } BTL_VERBOSE(("got %d completed rdma descriptors", rc)); for (int i = 0 ; i < rc ; ++i) { BTL_VERBOSE(("post descriptor %p complete. GNI_CQ_STATUS_OK(): %d", post_desc[i], GNI_CQ_STATUS_OK(event_data[i]))); if (OPAL_UNLIKELY(!GNI_CQ_STATUS_OK(event_data[i]))) { uint32_t recoverable = 1; (void) GNI_CqErrorRecoverable (event_data[i], &recoverable); if (OPAL_UNLIKELY(++post_desc[i]->tries >= mca_btl_ugni_component.rdma_max_retries || !recoverable)) { char char_buffer[1024]; GNI_CqErrorStr (event_data[i], char_buffer, 1024); /* give up */ BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) post_desc[i], recoverable, char_buffer)); #if OPAL_ENABLE_DEBUG btl_ugni_dump_post_desc (post_desc[i]); #endif mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_ERROR); return OPAL_ERROR; } mca_btl_ugni_repost (ugni_module, post_desc[i]); return 0; } mca_btl_ugni_post_desc_complete (ugni_module, post_desc[i], OPAL_SUCCESS); } /* should be resources to progress the pending post list */ (void) mca_btl_ugni_post_pending (ugni_module, device); return rc; }
/** * Initiate a get operation. * * @param btl (IN) BTL module * @param endpoint (IN) BTL addressing information * @param descriptor (IN) Description of the data to be transferred */ int mca_btl_ugni_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *des) { mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) des; mca_btl_ugni_segment_t *src_seg = (mca_btl_ugni_segment_t *) des->des_remote; mca_btl_ugni_segment_t *dst_seg = (mca_btl_ugni_segment_t *) des->des_local; size_t size = src_seg->base.seg_len - src_seg->extra_byte_count; bool check; BTL_VERBOSE(("Using RDMA/FMA Get")); /* cause endpoint to bind if it isn't already (bind is sufficient for rdma) */ (void) mca_btl_ugni_check_endpoint_state(endpoint); /* Check if the get is aligned/sized on a multiple of 4 */ check = !!((des->des_remote->seg_addr.lval | des->des_local->seg_addr.lval | size) & 3); if (OPAL_UNLIKELY(check || size > mca_btl_ugni_component.ugni_get_limit)) { /* switch to put */ return OPAL_ERR_NOT_AVAILABLE; } if (src_seg->extra_byte_count) { memmove ((char *) dst_seg->base.seg_addr.pval + size, src_seg->extra_bytes, src_seg->extra_byte_count); src_seg->base.seg_len = size; dst_seg->base.seg_len = size; } des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; return mca_btl_ugni_post (frag, true, dst_seg, src_seg); }
int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep) { int rc; BTL_VERBOSE(("progressing connection for endpoint %p with state %d", (void *)ep, ep->state)); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { return OPAL_SUCCESS; } if (MCA_BTL_UGNI_EP_STATE_RDMA >= ep->state) { rc = mca_btl_ugni_ep_connect_start (ep); if (OPAL_SUCCESS != rc) { return rc; } } if (GNI_SMSG_TYPE_INVALID == ep->remote_attr.smsg_attr.msg_type) { /* use datagram to exchange connection information with the remote peer */ rc = mca_btl_ugni_directed_ep_post (ep); if (OPAL_SUCCESS == rc) { rc = OPAL_ERR_RESOURCE_BUSY; } return rc; } return mca_btl_ugni_ep_connect_finish (ep); }
static int mca_btl_ud_modex_send(void) { int rc; size_t i; size_t size; mca_btl_ud_addr_t* addrs = NULL; size = mca_btl_ofud_component.num_btls * sizeof(mca_btl_ud_addr_t); if(size != 0) { addrs = (mca_btl_ud_addr_t*)malloc(size); if(NULL == addrs) { return OMPI_ERR_OUT_OF_RESOURCE; } for(i = 0; i < mca_btl_ofud_component.num_btls; i++) { mca_btl_ud_module_t* btl = &mca_btl_ofud_component.ud_btls[i]; addrs[i] = btl->addr; BTL_VERBOSE((0, "modex_send QP num %x, LID = %x", addrs[i].qp_num, addrs[i].lid)); } } rc = ompi_modex_send( &mca_btl_ofud_component.super.btl_version, addrs, size); if(NULL != addrs) { free(addrs); } return rc; }
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) { gni_return_t rc; do { if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) { /* nothing to do */ break; } if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) { rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT); if (GNI_RC_SUCCESS != rc) { BTL_VERBOSE(("btl/ugni could not send close message")); } /* we might want to wait for local completion here (do we even care) */ } (void) ompi_common_ugni_ep_destroy (&ep->smsg_ep_handle); (void) ompi_common_ugni_ep_destroy (&ep->rdma_ep_handle); OMPI_FREE_LIST_RETURN(&ep->btl->smsg_mboxes, ((ompi_free_list_item_t *) ep->mailbox)); ep->mailbox = NULL; ep->state = MCA_BTL_UGNI_EP_STATE_INIT; } while (0); return OMPI_SUCCESS; }
/** * Prepare the dst buffer * * @param btl (IN) BTL module * @param peer (IN) BTL peer addressing * prepare dest's behavior depends on the following: * Has a valid memory registration been passed to prepare_src? * if so we attempt to use the pre-registred user-buffer, if the memory registration * is to small (only a portion of the user buffer) then we must reregister the user buffer * Has the user requested the memory to be left pinned? * if so we insert the memory registration into a memory tree for later lookup, we * may also remove a previous registration if a MRU (most recently used) list of * registions is full, this prevents resources from being exhausted. */ mca_btl_base_descriptor_t* mca_btl_openib_prepare_dst( struct mca_btl_base_module_t* btl, struct mca_btl_base_endpoint_t* endpoint, mca_mpool_base_registration_t* registration, struct ompi_convertor_t* convertor, size_t reserve, size_t* size) { mca_btl_openib_module_t *openib_btl; mca_btl_openib_frag_t *frag; mca_btl_openib_reg_t *openib_reg; int rc; ptrdiff_t lb; openib_btl = (mca_btl_openib_module_t*)btl; MCA_BTL_IB_FRAG_ALLOC_RECV_FRAG(btl, frag, rc); if(NULL == frag) { return NULL; } ompi_ddt_type_lb(convertor->pDesc, &lb); frag->segment.seg_addr.pval = convertor->pBaseBuf + lb + convertor->bConverted; if(NULL == registration){ /* we didn't get a memory registration passed in, so we have to * register the region ourselves */ rc = btl->btl_mpool->mpool_register(btl->btl_mpool, frag->segment.seg_addr.pval, *size, 0, ®istration); if(OMPI_SUCCESS != rc || NULL == registration) { MCA_BTL_IB_FRAG_RETURN(openib_btl, frag); return NULL; } /* keep track of the registration we did */ frag->registration = (mca_btl_openib_reg_t*)registration; } openib_reg = (mca_btl_openib_reg_t*)registration; frag->sg_entry.length = *size; frag->sg_entry.lkey = openib_reg->mr->lkey; frag->sg_entry.addr = (unsigned long) frag->segment.seg_addr.pval; frag->segment.seg_len = *size; frag->segment.seg_key.key32[0] = openib_reg->mr->rkey; frag->base.des_dst = &frag->segment; frag->base.des_dst_cnt = 1; frag->base.des_src = NULL; frag->base.des_src_cnt = 0; frag->base.des_flags = 0; BTL_VERBOSE(("frag->sg_entry.lkey = %lu .addr = %llu " "frag->segment.seg_key.key32[0] = %lu", frag->sg_entry.lkey, frag->sg_entry.addr, frag->segment.seg_key.key32[0])); return &frag->base; }
/* * Look for an existing TCP process instance based on the globally unique * process identifier. */ mca_btl_tcp_proc_t* mca_btl_tcp_proc_lookup(const opal_process_name_t *name) { mca_btl_tcp_proc_t* proc = NULL; OPAL_THREAD_LOCK(&mca_btl_tcp_component.tcp_lock); opal_proc_table_get_value(&mca_btl_tcp_component.tcp_procs, *name, (void**)&proc); OPAL_THREAD_UNLOCK(&mca_btl_tcp_component.tcp_lock); if (OPAL_UNLIKELY(NULL == proc)) { mca_btl_base_endpoint_t *endpoint; opal_proc_t *opal_proc; BTL_VERBOSE(("adding tcp proc for unknown peer {.jobid = 0x%x, .vpid = 0x%x}", name->jobid, name->vpid)); opal_proc = opal_proc_for_name (*name); if (NULL == opal_proc) { return NULL; } /* try adding this proc to each btl until */ for( uint32_t i = 0; i < mca_btl_tcp_component.tcp_num_btls; ++i ) { endpoint = NULL; (void) mca_btl_tcp_add_procs (&mca_btl_tcp_component.tcp_btls[i]->super, 1, &opal_proc, &endpoint, NULL); if (NULL != endpoint && NULL == proc) { /* get the proc and continue on (could probably just break here) */ proc = endpoint->endpoint_proc; } } } return proc; }
static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep) { mca_btl_ugni_modex_t *modex; size_t msg_size; int rc; assert (NULL != ep && NULL != ep->peer_proc); /* Receive the modex */ OPAL_MODEX_RECV(rc, &mca_btl_ugni_component.super.btl_version, &ep->peer_proc->proc_name, (void **)&modex, &msg_size); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { BTL_ERROR(("error receiving modex")); return rc; } ep->ep_rem_addr = modex->addr; ep->ep_rem_id = modex->id; BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", (void*)ep, ep->ep_rem_addr, ep->ep_rem_id)); free (modex); return OPAL_SUCCESS; }
static inline int mca_btl_ugni_post_pending (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device) { int pending_post_count = opal_list_get_size (&device->pending_post); mca_btl_ugni_post_descriptor_t *post_desc; int rc; /* check if there are any posts pending resources */ if (OPAL_LIKELY(0 == pending_post_count)) { return 0; } BTL_VERBOSE(("progressing %d pending FMA/RDMA operations", pending_post_count)); for (int i = 0 ; i < pending_post_count ; ++i) { mca_btl_ugni_device_lock (device); post_desc = (mca_btl_ugni_post_descriptor_t *) opal_list_remove_first (&device->pending_post); mca_btl_ugni_device_unlock (device); if (NULL == post_desc) { break; } rc = mca_btl_ugni_repost (ugni_module, post_desc); if (OPAL_SUCCESS != rc) { mca_btl_ugni_device_lock (device); opal_list_prepend (&device->pending_post, (opal_list_item_t *) post_desc); mca_btl_ugni_device_unlock (device); break; } } return 1; }
/* * Reply to a `start - connect' message */ static int reply_start_connect(mca_btl_openib_endpoint_t *endpoint, mca_btl_openib_rem_info_t *rem_info) { int rc; BTL_VERBOSE(("Initialized QPs, LID = %d", ((mca_btl_openib_module_t*)endpoint->endpoint_btl)->lid)); /* Create local QP's and post receive resources */ if (OMPI_SUCCESS != (rc = qp_create_all(endpoint))) { return rc; } /* Set the remote side info */ set_remote_info(endpoint, rem_info); /* Connect to remote endpoint qp's */ if (OMPI_SUCCESS != (rc = qp_connect_all(endpoint))) { return rc; } /* Send connection info over to remote endpoint */ endpoint->endpoint_state = MCA_BTL_IB_CONNECT_ACK; if (OMPI_SUCCESS != (rc = send_connect_data(endpoint, ENDPOINT_CONNECT_RESPONSE))) { BTL_ERROR(("error in endpoint send connect request error code is %d", rc)); return rc; } return OMPI_SUCCESS; }
static inline int mca_btl_ugni_handle_remote_smsg_overrun (mca_btl_ugni_module_t *btl) { gni_cq_entry_t event_data; unsigned int ep_index; int count, rc; BTL_VERBOSE(("btl/ugni_component detected SMSG CQ overrun. " "processing message backlog...")); /* we don't know which endpoint lost an smsg completion. clear the smsg remote cq and check all mailboxes */ /* clear out remote cq */ do { rc = GNI_CqGetEvent (btl->smsg_remote_cq, &event_data); } while (GNI_RC_NOT_DONE != rc); for (ep_index = 0, count = 0 ; ep_index < btl->endpoint_count ; ++ep_index) { mca_btl_base_endpoint_t *ep = btl->endpoints[ep_index]; if (NULL == ep || MCA_BTL_UGNI_EP_STATE_CONNECTED != ep->state) { continue; } /* clear out smsg mailbox */ rc = mca_btl_ugni_smsg_process (ep); if (OPAL_LIKELY(rc >= 0)) { count += rc; } } return count; }
void mca_btl_openib_load_apm_xrc_rcv(uint32_t qp_num, mca_btl_openib_endpoint_t *ep) { struct ibv_qp_init_attr qp_init_attr; struct ibv_qp_attr attr; enum ibv_qp_attr_mask mask = 0; struct mca_btl_openib_module_t *btl; BTL_VERBOSE(("APM XRC: Loading alternative path")); assert (NULL != ep); btl = ep->endpoint_btl; if (ibv_query_xrc_rcv_qp(btl->device->xrc_domain, qp_num, &attr, mask, &qp_init_attr)) BTL_ERROR(("Failed to ibv_query_qp, qp num: %d", qp_num)); if (mca_btl_openib_component.apm_lmc && attr.ah_attr.src_path_bits - btl->src_path_bits < mca_btl_openib_component.apm_lmc) { apm_update_attr(&attr, &mask); } else { if (mca_btl_openib_component.apm_ports) { /* Try to migrate to next port */ if (OPAL_SUCCESS != apm_update_port(ep, &attr, &mask)) return; } else { BTL_ERROR(("Failed to load alternative path, all %d were used", attr.ah_attr.src_path_bits - btl->src_path_bits)); } } ibv_modify_xrc_rcv_qp(btl->device->xrc_domain, qp_num, &attr, mask); /* Maybe the qp already was modified by other process - ignoring error */ }
/* Find endpoint for specific subnet/lid/message */ static mca_btl_openib_endpoint_t* xoob_find_endpoint(ompi_process_name_t* process_name, uint64_t subnet_id, uint16_t lid, uint8_t message_type) { size_t i; mca_btl_openib_proc_t *ib_proc; mca_btl_openib_endpoint_t *ib_endpoint = NULL; bool found = false; BTL_VERBOSE(("Searching for ep and proc with follow parameters:" "jobid %d, vpid %d, " "sid %" PRIx64 ", lid %d", process_name->jobid, process_name->vpid, subnet_id, lid)); /* find ibproc */ OPAL_THREAD_LOCK(&mca_btl_openib_component.ib_lock); for (ib_proc = (mca_btl_openib_proc_t*) opal_list_get_first(&mca_btl_openib_component.ib_procs); ib_proc != (mca_btl_openib_proc_t*) opal_list_get_end(&mca_btl_openib_component.ib_procs); ib_proc = (mca_btl_openib_proc_t*)opal_list_get_next(ib_proc)) { if (OPAL_EQUAL == ompi_rte_compare_name_fields(OMPI_RTE_CMP_ALL, &ib_proc->proc_ompi->proc_name, process_name)) { found = true; break; } } /* we found our ib_proc, lets find endpoint now */ if (found) { for (i = 0; i < ib_proc->proc_endpoint_count; i++) { ib_endpoint = ib_proc->proc_endpoints[i]; /* we need to check different * lid for different message type */ if (ENDPOINT_XOOB_CONNECT_RESPONSE == message_type || ENDPOINT_XOOB_CONNECT_XRC_RESPONSE == message_type) { /* response message */ if (ib_endpoint->subnet_id == subnet_id && ib_endpoint->ib_addr->lid == lid) { break; /* Found one */ } } else { /* request message */ if (ib_endpoint->subnet_id == subnet_id && ib_endpoint->endpoint_btl->lid == lid) { break; /* Found one */ } } } if (NULL == ib_endpoint) { BTL_ERROR(("can't find suitable endpoint for this peer\n")); } } else { BTL_ERROR(("can't find suitable endpoint for this peer\n")); } OPAL_THREAD_UNLOCK(&mca_btl_openib_component.ib_lock); return ib_endpoint; }
int mca_btl_scif_module_init (void) { int rc; /* create an endpoint to listen for connections */ mca_btl_scif_module.scif_fd = scif_open (); if (-1 == mca_btl_scif_module.scif_fd) { BTL_VERBOSE(("scif_open failed. errno = %d", errno)); return OPAL_ERROR; } /* bind the endpoint to a port */ mca_btl_scif_module.port_id.port = scif_bind (mca_btl_scif_module.scif_fd, 0); if (-1 == mca_btl_scif_module.port_id.port) { BTL_VERBOSE(("scif_bind failed. errno = %d", errno)); scif_close (mca_btl_scif_module.scif_fd); mca_btl_scif_module.scif_fd = -1; return OPAL_ERROR; } /* determine this processes node id */ rc = scif_get_nodeIDs (NULL, 0, &mca_btl_scif_module.port_id.node); if (-1 == rc) { BTL_VERBOSE(("btl/scif error getting node id of this node")); return OPAL_ERROR; } /* Listen for connections */ /* TODO - base the maximum backlog off something */ rc = scif_listen (mca_btl_scif_module.scif_fd, 64); if (-1 == rc) { BTL_VERBOSE(("scif_listen failed. errno = %d", errno)); scif_close (mca_btl_scif_module.scif_fd); mca_btl_scif_module.scif_fd = -1; return OPAL_ERROR; } BTL_VERBOSE(("btl/scif: listening @ port %u on node %u\n", mca_btl_scif_module.port_id.port, mca_btl_scif_module.port_id.node)); OBJ_CONSTRUCT(&mca_btl_scif_module.dma_frags, opal_free_list_t); OBJ_CONSTRUCT(&mca_btl_scif_module.eager_frags, opal_free_list_t); return OPAL_SUCCESS; }
int mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_module_t *btl) { mca_btl_base_endpoint_t *ep; gni_cq_entry_t event_data; gni_return_t grc; uint64_t inst_id; grc = mca_btl_ugni_gni_cq_get_event (btl->devices, btl->smsg_remote_cq, &event_data); if (GNI_RC_NOT_DONE == grc) { return 0; } if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc || !GNI_CQ_STATUS_OK(event_data) || GNI_CQ_OVERRUN(event_data))) { if (GNI_RC_ERROR_RESOURCE == grc || (GNI_RC_SUCCESS == grc && GNI_CQ_OVERRUN(event_data))) { /* recover from smsg cq overrun */ return mca_btl_ugni_handle_remote_smsg_overrun (btl); } BTL_ERROR(("unhandled error in GNI_CqGetEvent")); /* unhandled error: crash */ assert (0); return mca_btl_rc_ugni_to_opal (grc); } BTL_VERBOSE(("REMOTE CQ: Got event 0x%" PRIx64 ". msg id = %" PRIu64 ". ok = %d, type = %" PRIu64, (uint64_t) event_data, GNI_CQ_GET_INST_ID(event_data), GNI_CQ_STATUS_OK(event_data), GNI_CQ_GET_TYPE(event_data))); inst_id = GNI_CQ_GET_INST_ID(event_data); ep = (mca_btl_base_endpoint_t *) opal_pointer_array_get_item (&btl->endpoints, inst_id); if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTED != ep->state)) { /* due to the nature of datagrams we may get a smsg completion before we get mailbox info from the peer */ BTL_VERBOSE(("event occurred on an unconnected endpoint! ep state = %d", ep->state)); return 0; } return mca_btl_ugni_smsg_process (ep); }
/** * Initiate a get operation. * * @param btl (IN) BTL module * @param endpoint (IN) BTL addressing information * @param descriptor (IN) Description of the data to be transferred */ int mca_btl_scif_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *des) { mca_btl_scif_segment_t *src = (mca_btl_scif_segment_t *) des->des_src; mca_btl_scif_segment_t *dst = (mca_btl_scif_segment_t *) des->des_dst; size_t len = lmin (src->base.seg_len, dst->base.seg_len); int rc, mark, flags = 0; off_t roffset, loffset; size_t to_get; #if defined(SCIF_TIMING) struct timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); mca_btl_scif_component.get_count++; #endif BTL_VERBOSE(("Using DMA Get for frag %p from offset %lu", (void *) des, (unsigned long) src->scif_offset)); roffset = src->scif_offset + (off_t)(src->orig_ptr - src->base.seg_addr.lval); loffset = dst->scif_offset + (off_t)(dst->orig_ptr - dst->base.seg_addr.lval); if (mca_btl_scif_component.rma_use_cpu) { flags = SCIF_RMA_USECPU; } if (mca_btl_scif_component.rma_sync) { flags |= SCIF_RMA_SYNC; } /* start the read */ rc = scif_readfrom (endpoint->scif_epd, loffset, len, roffset, flags); if (OPAL_UNLIKELY(-1 == rc)) { return OMPI_ERROR; } /* always call the callback function */ des->des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK; if (!(flags & SCIF_RMA_SYNC)) { /* according to the scif documentation is is better to use a fence rather * than using the SCIF_RMA_SYNC flag with scif_readfrom */ scif_fence_mark (endpoint->scif_epd, SCIF_FENCE_INIT_SELF, &mark); scif_fence_wait (endpoint->scif_epd, mark); } #if defined(SCIF_TIMING) SCIF_UPDATE_TIMER(mca_btl_scif_component.get_time, mca_btl_scif_component.get_time_max, ts); #endif /* since we completed the fence the RMA operation is complete */ mca_btl_scif_frag_complete ((mca_btl_scif_base_frag_t *) des, OMPI_SUCCESS); return OMPI_SUCCESS; }
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep); mca_btl_ugni_device_t *device; int rc; if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) { /* nothing to do */ return OPAL_SUCCESS; } device = ep->smsg_ep_handle.device; while (device->dev_smsg_local_cq.active_operations) { /* ensure all sends are complete before removing and procs */ rc = mca_btl_ugni_progress_local_smsg (ugni_module, device); if (OPAL_SUCCESS != rc) { break; } } if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) { rc = mca_btl_ugni_ep_send_disconnect (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { BTL_VERBOSE(("could not send disconnect message to peer")); } /* wait for the disconnect messagse to go */ do { /* ensure all sends are complete before removing and procs */ rc = mca_btl_ugni_progress_local_smsg (ugni_module, device); if (OPAL_SUCCESS != rc) { break; } } while (device->dev_smsg_local_cq.active_operations); (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, -1); } mca_btl_ugni_device_lock (device); /* NTH: this call may not need the device lock. seems to work without it but * the lock is here to be safe. */ (void) mca_btl_ugni_ep_handle_cleanup (&ep->smsg_ep_handle); mca_btl_ugni_device_unlock (device); if (ep->mailbox) { opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox)); ep->mailbox = NULL; } ep->state = MCA_BTL_UGNI_EP_STATE_INIT; return OPAL_SUCCESS; }
static void mca_btl_ugni_callback_eager_get (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, struct mca_btl_base_descriptor_t *desc, int rc) { mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl; mca_btl_ugni_base_frag_t *frag = (mca_btl_ugni_base_frag_t *) desc; uint32_t len = frag->hdr.eager.send.lag & 0x00ffffff; uint8_t tag = frag->hdr.eager.send.lag >> 24; size_t payload_len = frag->hdr.eager.src_seg.base.seg_len; size_t hdr_len = len - payload_len; mca_btl_active_message_callback_t *reg; mca_btl_base_segment_t segs[2]; mca_btl_ugni_base_frag_t tmp; BTL_VERBOSE(("eager get for rem_ctx %p complete", frag->hdr.eager.ctx)); tmp.base.des_local = segs; if (hdr_len) { tmp.base.des_local_count = 2; segs[0].seg_addr.pval = frag->hdr.eager_ex.pml_header; segs[0].seg_len = hdr_len; segs[1].seg_addr.pval = frag->segments[0].base.seg_addr.pval; segs[1].seg_len = payload_len; } else { tmp.base.des_local_count = 1; segs[0].seg_addr.pval = frag->segments[0].base.seg_addr.pval; segs[0].seg_len = payload_len; } reg = mca_btl_base_active_message_trigger + tag; reg->cbfunc(&frag->endpoint->btl->super, tag, &(tmp.base), reg->cbdata); frag->hdr.rdma.ctx = frag->hdr.eager.ctx; /* once complete use this fragment for a pending eager get if any exist */ frag->base.des_cbfunc = mca_btl_ugni_callback_eager_get_progress_pending; /* tell the remote peer the operation is complete */ rc = opal_mca_btl_ugni_smsg_send (frag, &frag->hdr.rdma, sizeof (frag->hdr.rdma), NULL, 0, MCA_BTL_UGNI_TAG_RDMA_COMPLETE); if (OPAL_UNLIKELY(0 > rc)) { /* queue fragment */ if (false == endpoint->wait_listed) { OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock); opal_list_append (&ugni_module->ep_wait_list, &endpoint->super); OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock); endpoint->wait_listed = true; } OPAL_THREAD_LOCK(&endpoint->lock); opal_list_append (&endpoint->frag_wait_list, (opal_list_item_t *) frag); OPAL_THREAD_UNLOCK(&endpoint->lock); } }
static inline int mca_btl_ugni_progress_rdma (mca_btl_ugni_module_t *ugni_module) { ompi_common_ugni_post_desc_t *desc; mca_btl_ugni_base_frag_t *frag; gni_cq_entry_t event_data = 0; uint32_t recoverable = 1; gni_return_t rc; rc = GNI_CqGetEvent (ugni_module->rdma_local_cq, &event_data); if (GNI_RC_NOT_DONE == rc) { return 0; } if (OPAL_UNLIKELY((GNI_RC_SUCCESS != rc && !event_data) || GNI_CQ_OVERRUN(event_data))) { /* TODO -- need to handle overrun -- how do we do this without an event? will the event eventually come back? Ask Cray */ BTL_ERROR(("unhandled post error! ugni rc = %d", rc)); assert (0); return ompi_common_rc_ugni_to_ompi (rc); } rc = GNI_GetCompleted (ugni_module->rdma_local_cq, event_data, (gni_post_descriptor_t **) &desc); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc && GNI_RC_TRANSACTION_ERROR != rc)) { BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[rc])); return ompi_common_rc_ugni_to_ompi (rc); } frag = MCA_BTL_UGNI_DESC_TO_FRAG(desc); if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc || !GNI_CQ_STATUS_OK(event_data))) { (void) GNI_CqErrorRecoverable (event_data, &recoverable); if (OPAL_UNLIKELY(++frag->post_desc.tries >= mca_btl_ugni_component.rdma_max_retries || !recoverable)) { /* give up */ BTL_ERROR(("giving up on frag %p", (void *) frag)); frag->cbfunc (frag, OMPI_ERROR); return OMPI_ERROR; } /* repost transaction */ mca_btl_ugni_repost (frag, OMPI_SUCCESS); return 0; } BTL_VERBOSE(("RDMA/FMA complete for frag %p", (void *) frag)); frag->cbfunc (frag, ompi_common_rc_ugni_to_ompi (rc)); return 1; }
static inline int mca_btl_ugni_progress_datagram (mca_btl_ugni_module_t *ugni_module) { uint32_t remote_addr, remote_id; mca_btl_base_endpoint_t *ep; gni_post_state_t post_state; gni_ep_handle_t handle; uint64_t datagram_id; gni_return_t grc; int count = 0; /* check for datagram completion */ grc = GNI_PostDataProbeById (ugni_module->device->dev_handle, &datagram_id); if (OPAL_LIKELY(GNI_RC_SUCCESS != grc)) { return 0; } if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_WILDCARD_ID) { handle = ugni_module->wildcard_ep; } else { handle = ugni_module->endpoints[(uint32_t)(datagram_id & 0xffffffffull)]->smsg_ep_handle; } /* wait for the incoming datagram to complete (in case it isn't) */ grc = GNI_EpPostDataWaitById (handle, datagram_id, -1, &post_state, &remote_addr, &remote_id); if (GNI_RC_SUCCESS != grc) { BTL_ERROR(("GNI_EpPostDataWaitById failed with rc = %d", grc)); return ompi_common_rc_ugni_to_ompi (grc); } BTL_VERBOSE(("got a datagram completion: id = %" PRIx64 ", state = %d, " "peer = %d", datagram_id, post_state, remote_id)); ep = ugni_module->endpoints[remote_id]; /* NTH: TODO -- error handling */ (void) mca_btl_ugni_ep_connect_progress (ep); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if ((datagram_id & MCA_BTL_UGNI_DATAGRAM_MASK) == MCA_BTL_UGNI_CONNECT_WILDCARD_ID) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }
static int apm_update_port(mca_btl_openib_endpoint_t *ep, struct ibv_qp_attr *attr, enum ibv_qp_attr_mask *mask) { size_t port_i; uint16_t apm_lid = 0; if (attr->port_num == ep->endpoint_btl->apm_port) { /* all ports were used */ BTL_ERROR(("APM: already all ports were used port_num %d apm_port %d", attr->port_num, ep->endpoint_btl->apm_port)); return OPAL_ERROR; } /* looking for alternatve lid on remote site */ for(port_i = 0; port_i < ep->endpoint_proc->proc_port_count; port_i++) { if (ep->endpoint_proc->proc_ports[port_i].pm_port_info.lid == attr->ah_attr.dlid - mca_btl_openib_component.apm_lmc) { apm_lid = ep->endpoint_proc->proc_ports[port_i].pm_port_info.apm_lid; } } if (0 == apm_lid) { /* APM was disabled on one of site ? */ BTL_VERBOSE(("APM: Was disabled ? dlid %d %d %d", attr->ah_attr.dlid, attr->ah_attr.src_path_bits, ep->endpoint_btl->src_path_bits)); return OPAL_ERROR; } /* We guess cthat the LMC is the same on all ports */ attr->alt_ah_attr.static_rate = attr->ah_attr.static_rate; attr->alt_ah_attr.sl = attr->ah_attr.sl; attr->alt_pkey_index = attr->pkey_index; attr->alt_timeout = attr->timeout; attr->path_mig_state = IBV_MIG_REARM; *mask = IBV_QP_ALT_PATH|IBV_QP_PATH_MIG_STATE; attr->alt_port_num = ep->endpoint_btl->apm_port; attr->alt_ah_attr.src_path_bits = ep->endpoint_btl->src_path_bits; attr->alt_ah_attr.dlid = apm_lid; BTL_VERBOSE(("New APM port loaded: alt_src_port:%d, dlid: %d, src_bits: %d:%d, old_dlid %d", attr->alt_port_num, attr->alt_ah_attr.dlid, attr->ah_attr.src_path_bits, attr->alt_ah_attr.src_path_bits, attr->ah_attr.dlid)); return OPAL_SUCCESS; }
/* * Set remote connection info * * XXX: Currently size is unutilized, this shall change * as soon as we add more info to be exchanged at connection * setup. * */ static int mca_btl_mvapi_endpoint_set_remote_info(mca_btl_base_endpoint_t* endpoint, mca_btl_mvapi_rem_info_t* rem_info) { memcpy(&((mca_btl_mvapi_endpoint_t*) endpoint)->rem_info, rem_info, sizeof(mca_btl_mvapi_rem_info_t)); BTL_VERBOSE(("Setting High Priority QP num = %d, Low Priority QP num %d, LID = %d", endpoint->rem_info.rem_qp_num_hp, endpoint->rem_info.rem_qp_num_lp, endpoint->rem_info.rem_lid)); return ORTE_SUCCESS; }