int MPI_Status_c2f(MPI_Status *c_status, MPI_Fint *f_status) { int i, *c_ints; MEMCHECKER( if(c_status != MPI_STATUSES_IGNORE) { /* * Before checking the complete status, we need to reset the definedness * of the MPI_ERROR-field (single-completion calls wait/test). */ opal_memchecker_base_mem_defined(&c_status->MPI_ERROR, sizeof(int)); memchecker_status(c_status); } );
int MPI_Status_set_elements_x(MPI_Status *status, MPI_Datatype datatype, MPI_Count count) { int rc = MPI_SUCCESS; size_t size; MEMCHECKER( if(status != MPI_STATUSES_IGNORE) { /* * Before checking the complete status, we need to reset the definedness * of the MPI_ERROR-field (single-completion calls wait/test). */ opal_memchecker_base_mem_defined(&status->MPI_ERROR, sizeof(int)); memchecker_status (status); memchecker_datatype(datatype); } );
static inline mca_spml_ikrit_get_request_t *alloc_get_req(void) { mca_spml_ikrit_get_request_t *req; opal_free_list_item_t* item; item = opal_free_list_wait (&mca_spml_base_get_requests); req = (mca_spml_ikrit_get_request_t *) item; opal_memchecker_base_mem_undefined(req, sizeof(*req)); opal_memchecker_base_mem_defined(&req->req_get.req_base, sizeof(req->req_get.req_base)); req->req_get.req_base.req_free_called = false; req->req_get.req_base.req_oshmem.req_complete = false; return req; }
static inline void inmsg_mark_complete(ompi_osc_pt2pt_module_t *module) { int32_t count; bool need_unlock = false; OPAL_THREAD_LOCK(&module->p2p_lock); count = (module->p2p_num_pending_in -= 1); if ((0 != module->p2p_lock_status) && (opal_list_get_size(&module->p2p_unlocks_pending) != 0)) { need_unlock = true; } OPAL_THREAD_UNLOCK(&module->p2p_lock); MEMCHECKER( /* Here we need restore the initial states of memory. */ opal_memchecker_base_mem_defined( module->p2p_win->w_baseptr, module->p2p_win->w_size); );
int MPI_Get_count(const MPI_Status *status, MPI_Datatype datatype, int *count) { size_t size = 0, internal_count; int rc = MPI_SUCCESS; OPAL_CR_NOOP_PROGRESS(); MEMCHECKER( if (status != MPI_STATUSES_IGNORE) { /* * Before checking the complete status, we need to reset the definedness * of the MPI_ERROR-field (single-completion calls wait/test). */ opal_memchecker_base_mem_defined((void*)&status->MPI_ERROR, sizeof(int)); memchecker_status(status); memchecker_datatype(datatype); } );
/* look up the remote pointer in the peer rcache and attach if * necessary */ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, size_t size, int flags, void **local_ptr) { struct mca_rcache_base_module_t *rcache = ep->rcache; mca_mpool_base_registration_t *regs[10], *reg = NULL; xpmem_addr_t xpmem_addr; uintptr_t base, bound; int rc, i; /* protect rcache access */ OPAL_THREAD_LOCK(&ep->lock); /* use btl/self for self communication */ assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK); base = (uintptr_t) down_align_addr(rem_ptr, mca_btl_vader_component.log_attach_align); bound = (uintptr_t) up_align_addr((void *)((uintptr_t) rem_ptr + size - 1), mca_btl_vader_component.log_attach_align) + 1; if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { bound = VADER_MAX_ADDRESS; } /* several segments may match the base pointer */ rc = rcache->rcache_find_all (rcache, (void *) base, bound - base, regs, 10); for (i = 0 ; i < rc ; ++i) { if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) { opal_atomic_add (®s[i]->ref_count, 1); reg = regs[i]; goto reg_found; } if (regs[i]->flags & MCA_MPOOL_FLAGS_PERSIST) { continue; } /* remove this pointer from the rcache and decrement its reference count (so it is detached later) */ rc = rcache->rcache_delete (rcache, regs[i]); if (OPAL_UNLIKELY(0 != rc)) { /* someone beat us to it? */ break; } /* start the new segment from the lower of the two bases */ base = (uintptr_t) regs[i]->base < base ? (uintptr_t) regs[i]->base : base; opal_atomic_add (®s[i]->ref_count, -1); if (OPAL_LIKELY(0 == regs[i]->ref_count)) { /* this pointer is not in use */ (void) xpmem_detach (regs[i]->alloc_base); OBJ_RELEASE(regs[i]); } break; } reg = OBJ_NEW(mca_mpool_base_registration_t); if (OPAL_LIKELY(NULL != reg)) { /* stick around for awhile */ reg->ref_count = 2; reg->base = (unsigned char *) base; reg->bound = (unsigned char *) bound; reg->flags = flags; #if defined(HAVE_SN_XPMEM_H) xpmem_addr.id = ep->apid; #else xpmem_addr.apid = ep->apid; #endif xpmem_addr.offset = base; reg->alloc_base = xpmem_attach (xpmem_addr, bound - base, NULL); if (OPAL_UNLIKELY((void *)-1 == reg->alloc_base)) { OPAL_THREAD_UNLOCK(&ep->lock); OBJ_RELEASE(reg); return NULL; } opal_memchecker_base_mem_defined (reg->alloc_base, bound - base); rcache->rcache_insert (rcache, reg, 0); } reg_found: opal_atomic_wmb (); *local_ptr = (void *) ((uintptr_t) reg->alloc_base + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); OPAL_THREAD_UNLOCK(&ep->lock); return reg; }
/* * We have received a segment, take action based on the * packet type in the BTL header */ void ompi_btl_usnic_recv(ompi_btl_usnic_module_t *module, ompi_btl_usnic_recv_segment_t *seg, struct ibv_recv_wr **repost_recv_head) { ompi_btl_usnic_segment_t *bseg; mca_btl_active_message_callback_t* reg; ompi_btl_usnic_endpoint_t *endpoint; ompi_btl_usnic_btl_chunk_header_t *chunk_hdr; uint32_t window_index; #if MSGDEBUG1 char src_mac[32]; char dest_mac[32]; #endif bseg = &seg->rs_base; ++module->num_total_recvs; /* Valgrind help */ opal_memchecker_base_mem_defined((void*)(seg->rs_recv_desc.sg_list[0].addr), seg->rs_recv_desc.sg_list[0].length); #if MSGDEBUG1 memset(src_mac, 0, sizeof(src_mac)); memset(dest_mac, 0, sizeof(dest_mac)); ompi_btl_usnic_sprintf_gid_mac(src_mac, &seg->rs_protocol_header->grh.sgid); ompi_btl_usnic_sprintf_gid_mac(dest_mac, &seg->rs_protocol_header->grh.dgid); #if MSGDEBUG opal_output(0, "Got message from MAC %s", src_mac); opal_output(0, "Looking for sender: 0x%016lx", bseg->us_btl_header->sender); #endif #endif /* Find out who sent this segment */ endpoint = lookup_sender(module, bseg); seg->rs_endpoint = endpoint; if (FAKE_RECV_FRAG_DROP || OPAL_UNLIKELY(NULL == endpoint)) { /* No idea who this was from, so drop it */ #if MSGDEBUG1 opal_output(0, "=== Unknown sender; dropped: from MAC %s to MAC %s, seq %" UDSEQ, src_mac, dest_mac, bseg->us_btl_header->seq); #endif ++module->num_unk_recvs; goto repost_no_endpoint; } /***********************************************************************/ /* Segment is an incoming frag */ if (OMPI_BTL_USNIC_PAYLOAD_TYPE_FRAG == bseg->us_btl_header->payload_type) { /* Is incoming sequence # ok? */ if (!ompi_btl_usnic_check_rx_seq(endpoint, seg, &window_index)) { goto repost; } #if MSGDEBUG1 opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ ", len=%d\n", (void*) endpoint, seg->rs_base.us_btl_header->seq, seg->rs_base.us_btl_header->payload_len); #if 0 opal_output(0, "<-- Received FRAG ep %p, seq %" UDSEQ " from %s to %s: GOOD! (rel seq %d, lowest seq %" UDSEQ ", highest seq: %" UDSEQ ", rwstart %d) seg %p, module %p\n", (void*) endpoint, seg->rs_base.us_btl_header->seq, src_mac, dest_mac, window_index, endpoint->endpoint_next_contig_seq_to_recv, endpoint->endpoint_highest_seq_rcvd, endpoint->endpoint_rfstart, (void*) seg, (void*) module); if (seg->rs_base.us_btl_header->put_addr != NULL) { opal_output(0, " put_addr = %p\n", seg->rs_base.us_btl_header->put_addr); } #endif #endif /* * update window before callback because callback might * generate a send, and we'd like to piggy-back ACK if possible */ ompi_btl_usnic_update_window(endpoint, window_index); /* Stats */ ++module->num_frag_recvs; /* If this it not a PUT, Pass this segment up to the PML. * Be sure to get the payload length from the BTL header because * the L2 layer may artificially inflate (or otherwise change) * the frame length to meet minimum sizes, add protocol information, * etc. */ if (seg->rs_base.us_btl_header->put_addr == NULL) { reg = mca_btl_base_active_message_trigger + bseg->us_payload.pml_header->tag; seg->rs_segment.seg_len = bseg->us_btl_header->payload_len; reg->cbfunc(&module->super, bseg->us_payload.pml_header->tag, &seg->rs_desc, reg->cbdata); /* * If this is a PUT, need to copy it to user buffer */ } else { #if MSGDEBUG1 opal_output(0, "Copy %d PUT bytes to %p\n", seg->rs_base.us_btl_header->payload_len, chunk_hdr->ch_hdr.put_addr); #endif memcpy(seg->rs_base.us_btl_header->put_addr, seg->rs_base.us_payload.raw, seg->rs_base.us_btl_header->payload_len); } goto repost; } /***********************************************************************/ /* Segment is an incoming chunk */ if (OMPI_BTL_USNIC_PAYLOAD_TYPE_CHUNK == bseg->us_btl_header->payload_type) { int frag_index; ompi_btl_usnic_rx_frag_info_t *fip; /* Is incoming sequence # ok? */ if (!ompi_btl_usnic_check_rx_seq(endpoint, seg, &window_index)) { goto repost; } #if MSGDEBUG1 opal_output(0, "<-- Received CHUNK fid %d ep %p, seq %" UDSEQ " from %s to %s: GOOD! (rel seq %d, lowest seq %" UDSEQ ", highest seq: %" UDSEQ ", rwstart %d) seg %p, module %p\n", seg->rs_base.us_btl_chunk_header->ch_frag_id, (void*) endpoint, seg->rs_base.us_btl_chunk_header->ch_hdr.seq, src_mac, dest_mac, window_index, endpoint->endpoint_next_contig_seq_to_recv, endpoint->endpoint_highest_seq_rcvd, endpoint->endpoint_rfstart, (void*) seg, (void*) module); #endif /* start a new fragment if not one in progress * alloc memory, etc. when last byte arrives, dealloc the * frag_id and pass data to PML */ chunk_hdr = seg->rs_base.us_btl_chunk_header; frag_index = chunk_hdr->ch_frag_id % MAX_ACTIVE_FRAGS; fip = &(endpoint->endpoint_rx_frag_info[frag_index]); /* frag_id == 0 means this slot it empty, grab it! */ if (0 == fip->rfi_frag_id) { fip->rfi_frag_id = chunk_hdr->ch_frag_id; fip->rfi_frag_size = chunk_hdr->ch_frag_size; if (chunk_hdr->ch_hdr.put_addr == NULL) { int pool; fip->rfi_data = NULL; /* See which data pool this should come from, * or if it should be malloc()ed */ pool = fls(chunk_hdr->ch_frag_size-1); if (pool >= module->first_pool && pool <= module->last_pool) { ompi_free_list_item_t* item; OMPI_FREE_LIST_GET_MT(&module->module_recv_buffers[pool], item); if (OPAL_LIKELY(NULL != item)) { fip->rfi_data = (char *)item; fip->rfi_data_pool = pool; } } if (fip->rfi_data == NULL) { fip->rfi_data = malloc(chunk_hdr->ch_frag_size); fip->rfi_data_pool = 0; } if (fip->rfi_data == NULL) { abort(); } #if MSGDEBUG2 opal_output(0, "Start large recv to %p, size=%d\n", fip->rfi_data, chunk_hdr->ch_frag_size); #endif } else { #if MSGDEBUG2 opal_output(0, "Start PUT to %p\n", chunk_hdr->ch_hdr.put_addr); #endif fip->rfi_data = chunk_hdr->ch_hdr.put_addr; } fip->rfi_bytes_left = chunk_hdr->ch_frag_size; fip->rfi_frag_id = chunk_hdr->ch_frag_id; /* frag_id is not 0 - it must match, drop if not */ } else if (fip->rfi_frag_id != chunk_hdr->ch_frag_id) { ++module->num_badfrag_recvs; goto repost; } #if MSGDEBUG1 opal_output(0, "put_addr=%p, copy_addr=%p, off=%d\n", chunk_hdr->ch_hdr.put_addr, fip->rfi_data+chunk_hdr->ch_frag_offset, chunk_hdr->ch_frag_offset); #endif /* Stats */ ++module->num_chunk_recvs; /* validate offset and len to be within fragment */ assert(chunk_hdr->ch_frag_offset + chunk_hdr->ch_hdr.payload_len <= fip->rfi_frag_size); assert(fip->rfi_frag_size == chunk_hdr->ch_frag_size); /* copy the data into place */ memcpy(fip->rfi_data + chunk_hdr->ch_frag_offset, (char *)(chunk_hdr+1), chunk_hdr->ch_hdr.payload_len); /* update sliding window */ ompi_btl_usnic_update_window(endpoint, window_index); fip->rfi_bytes_left -= chunk_hdr->ch_hdr.payload_len; if (0 == fip->rfi_bytes_left) { mca_btl_base_header_t *pml_header; mca_btl_base_descriptor_t desc; mca_btl_base_segment_t segment; /* Get access to PML header in assembled fragment so we * can pull out the tag */ pml_header = (mca_btl_base_header_t *)(fip->rfi_data); segment.seg_addr.pval = pml_header; segment.seg_len = fip->rfi_frag_size; desc.des_dst = &segment; desc.des_dst_cnt = 1; /* only up to PML if this was not a put */ if (chunk_hdr->ch_hdr.put_addr == NULL) { /* Pass this segment up to the PML */ #if MSGDEBUG2 opal_output(0, " large FRAG complete, pass up %p, %d bytes, tag=%d\n", desc.des_dst->seg_addr.pval, desc.des_dst->seg_len, pml_header->tag); #endif reg = mca_btl_base_active_message_trigger + pml_header->tag; /* mca_pml_ob1_recv_frag_callback_frag() */ reg->cbfunc(&module->super, pml_header->tag, &desc, reg->cbdata); /* free temp buffer for non-put */ if (0 == fip->rfi_data_pool) { free(fip->rfi_data); } else { OMPI_FREE_LIST_RETURN_MT( &module->module_recv_buffers[fip->rfi_data_pool], (ompi_free_list_item_t *)fip->rfi_data); } #if MSGDEBUG2 } else { opal_output(0, "PUT complete, suppressing callback\n"); #endif } /* release the fragment ID */ fip->rfi_frag_id = 0; /* force immediate ACK */ endpoint->endpoint_acktime = 0; } goto repost; } /***********************************************************************/ /* Frag is an incoming ACK */ else if (OPAL_LIKELY(OMPI_BTL_USNIC_PAYLOAD_TYPE_ACK == bseg->us_btl_header->payload_type)) { ompi_btl_usnic_seq_t ack_seq; /* sequence being ACKed */ ack_seq = bseg->us_btl_header->ack_seq; /* Stats */ ++module->num_ack_recvs; #if MSGDEBUG1 opal_output(0, " Received ACK for sequence number %" UDSEQ " from %s to %s\n", bseg->us_btl_header->ack_seq, src_mac, dest_mac); #endif ompi_btl_usnic_handle_ack(endpoint, ack_seq); goto repost; } /***********************************************************************/ /* Have no idea what the frag is; drop it */ else { ++module->num_unk_recvs; opal_output(0, "==========================unknown 2"); goto repost; } /***********************************************************************/ repost: /* if endpoint exiting, and all ACKs received, release the endpoint */ if (endpoint->endpoint_exiting && ENDPOINT_DRAINED(endpoint)) { OBJ_RELEASE(endpoint); } repost_no_endpoint: ++module->num_recv_reposts; /* Add recv to linked list for reposting */ seg->rs_recv_desc.next = *repost_recv_head; *repost_recv_head = &seg->rs_recv_desc; }
/* must be called with the endpoint lock held */ static int mca_btl_scif_ep_connect_finish (mca_btl_base_endpoint_t *ep, bool passive) { int rc; rc = mca_btl_scif_ep_get_buffer (ep); if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) { BTL_VERBOSE(("error allocating buffer for scif peer")); return rc; } if (passive) { rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset, sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK); if (OPAL_LIKELY(-1 != rc)) { rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset, sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK); } } else { rc = scif_send (ep->scif_epd, &ep->recv_buffer.scif_offset, sizeof (ep->recv_buffer.scif_offset), SCIF_SEND_BLOCK); if (OPAL_LIKELY(-1 != rc)) { rc = scif_recv (ep->scif_epd, &ep->send_buffer.scif_offset, sizeof (ep->send_buffer.scif_offset), SCIF_RECV_BLOCK); } } if (OPAL_UNLIKELY(-1 == rc)) { BTL_VERBOSE(("error exchanging connection data with peer %d", ep->peer_proc->proc_name.vpid)); mca_btl_scif_ep_free_buffer (ep); return OPAL_ERROR; } BTL_VERBOSE(("remote peer %d has scif offset %lu", ep->peer_proc->proc_name.vpid, (unsigned long) ep->send_buffer.scif_offset)); ep->send_buffer.buffer = scif_mmap (0, mca_btl_scif_component.segment_size, SCIF_PROT_READ | SCIF_PROT_WRITE, 0, ep->scif_epd, ep->send_buffer.scif_offset); if (OPAL_UNLIKELY(NULL == ep->send_buffer.buffer)) { BTL_VERBOSE(("error in scif_mmap")); mca_btl_scif_ep_free_buffer (ep); return OPAL_ERROR; } opal_memchecker_base_mem_defined (ep->send_buffer.buffer, mca_btl_scif_component.segment_size); BTL_VERBOSE(("remote peer %d buffer mapped to local pointer %p", ep->peer_proc->proc_name.vpid, ep->send_buffer.buffer)); /* setup the circular send buffers */ ep->send_buffer.start = ep->send_buffer.end = 64; ep->send_buffer.startp = (uint32_t *) ep->send_buffer.buffer; ep->send_buffer.endp = ep->send_buffer.startp + 1; ep->recv_buffer.start = 64; /* connection complete */ ep->state = MCA_BTL_SCIF_EP_STATE_CONNECTED; BTL_VERBOSE(("btl/scif connection to remote peer %d established", ep->peer_proc->proc_name.vpid)); return OPAL_SUCCESS; }
/* look up the remote pointer in the peer rcache and attach if * necessary */ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, size_t size, int flags, void **local_ptr) { mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align; mca_rcache_base_registration_t *reg = NULL; vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = ®, .vma_module = vma_module}; xpmem_addr_t xpmem_addr; uintptr_t base, bound; int rc; base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t); bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1; if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { bound = VADER_MAX_ADDRESS; } check_ctx.base = base; check_ctx.bound = bound; /* several segments may match the base pointer */ rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx); if (2 == rc) { /* start the new segment from the lower of the two bases */ base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) { /* this pointer is not in use */ (void) xpmem_detach (reg->rcache_context); OBJ_RELEASE(reg); } reg = NULL; } if (NULL == reg) { reg = OBJ_NEW(mca_rcache_base_registration_t); if (OPAL_LIKELY(NULL != reg)) { /* stick around for awhile */ reg->ref_count = 2; reg->base = (unsigned char *) base; reg->bound = (unsigned char *) bound; reg->flags = flags; reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank; #if defined(HAVE_SN_XPMEM_H) xpmem_addr.id = ep->segment_data.xpmem.apid; #else xpmem_addr.apid = ep->segment_data.xpmem.apid; #endif xpmem_addr.offset = base; reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL); if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) { OBJ_RELEASE(reg); return NULL; } opal_memchecker_base_mem_defined (reg->rcache_context, bound - base); mca_rcache_base_vma_insert (vma_module, reg, 0); } } opal_atomic_wmb (); *local_ptr = (void *) ((uintptr_t) reg->rcache_context + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); return reg; }