static int progress_callback(void) { int ret, count = 0; ptl_event_t ev; ompi_osc_portals4_request_t *req; int32_t ops; while (true) { ret = PtlEQGet(mca_osc_portals4_component.matching_eq_h, &ev); if (PTL_OK == ret) { goto process; } else if (PTL_EQ_DROPPED == ret) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet reported dropped event", __FILE__, __LINE__); goto process; } else if (PTL_EQ_EMPTY == ret) { return 0; } else { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: PtlEQGet failed: %d\n", __FILE__, __LINE__, ret); return 0; } process: if (ev.ni_fail_type != PTL_OK) { opal_output_verbose(1, ompi_osc_base_framework.framework_output, "%s:%d: event failure: %d %d", __FILE__, __LINE__, ev.type, ev.ni_fail_type); return 0; } count++; if (NULL != ev.user_ptr) { /* be sure that we receive the PTL_EVENT_LINK */ if (ev.type == PTL_EVENT_LINK) { *(int *)ev.user_ptr = *(int *)ev.user_ptr + 1; opal_condition_broadcast(&mca_osc_portals4_component.cond); continue; } req = (ompi_osc_portals4_request_t*) ev.user_ptr; opal_atomic_add_size_t(&req->super.req_status._ucount, ev.mlength); ops = opal_atomic_add_32(&req->ops_committed, 1); if (ops == req->ops_expected) { OPAL_THREAD_LOCK(&ompi_request_lock); ompi_request_complete(&req->super, true); OPAL_THREAD_UNLOCK(&ompi_request_lock); } } } return count; }
static inline void lk_add32(ompi_osc_sm_module_t *module, int target, size_t offset, uint32_t delta) { opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), delta); }
static inline uint32_t lk_fetch_add32(ompi_osc_sm_module_t *module, int target, size_t offset, uint32_t delta) { /* opal_atomic_add_32 is an add then fetch so delta needs to be subtracted out to get the * old value */ return opal_atomic_add_32((int32_t*) ((char*) &module->node_states[target].lock + offset), delta) - delta; }
void vader_return_registration (mca_rcache_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep) { mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; int32_t ref_count; ref_count = opal_atomic_add_32 (®->ref_count, -1); if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_RCACHE_FLAGS_PERSIST))) { /* protect rcache access */ mca_rcache_base_vma_delete (vma_module, reg); opal_memchecker_base_mem_noaccess (reg->rcache_context, (uintptr_t)(reg->bound - reg->base)); (void)xpmem_detach (reg->rcache_context); OBJ_RELEASE (reg); } }
void opal_progress_event_users_increment(void) { int32_t val; val = opal_atomic_add_32(&num_event_users, 1); OPAL_OUTPUT((debug_output, "progress: event_users_increment setting count to %d", val)); #if OPAL_PROGRESS_USE_TIMERS /* force an update next round (we'll be past the delta) */ event_progress_last_time -= event_progress_delta; #else /* always reset the tick rate - can't hurt */ event_progress_counter = 0; #endif }
int opal_progress_event_increment() { int32_t val; val = opal_atomic_add_32(&event_num_mpi_users, 1); #if OPAL_PROGRESS_USE_TIMERS /* force an update next round (we'll be past the delta) */ event_progress_last_time -= event_progress_delta; #else /* always reset the tick rate - can't hurt */ event_progress_counter = 0; #endif return OPAL_SUCCESS; }
static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_reg, void *ctx) { mca_rcache_base_find_args_t *args = (mca_rcache_base_find_args_t *) ctx; mca_rcache_grdma_module_t *rcache_grdma = args->rcache_grdma; if ((grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) || &rcache_grdma->super != grdma_reg->rcache || grdma_reg->base > args->base || grdma_reg->bound < args->bound) { return 0; } if (OPAL_UNLIKELY((args->access_flags & grdma_reg->access_flags) != args->access_flags)) { args->access_flags |= grdma_reg->access_flags; if (0 != grdma_reg->ref_count) { if (!(grdma_reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) { mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, grdma_reg); } /* mark the registration to go away when it is deregistered */ grdma_reg->flags |= MCA_RCACHE_FLAGS_INVALID | MCA_RCACHE_FLAGS_CACHE_BYPASS; } else { if (registration_is_cacheable(grdma_reg)) { opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg); } dereg_mem (grdma_reg); } } else { if (0 == grdma_reg->ref_count) { /* Leave pinned must be set for this to still be in the rcache. */ opal_list_remove_item(&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg); } /* This segment fits fully within an existing segment. */ rcache_grdma->stat_cache_hit++; int32_t ref_cnt = opal_atomic_add_32 (&grdma_reg->ref_count, 1); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output, "returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt)); (void)ref_cnt; args->reg = grdma_reg; return 1; } /* can't use this registration */ return 0; }
static void *thread_main(void *arg) { int rank = (int) (unsigned long) arg; int i; /* thread tests */ for (i = 0; i < nreps; i++) { opal_atomic_add_32(&val32, 5); #if OPAL_HAVE_ATOMIC_MATH_64 opal_atomic_add_64(&val64, 5); #endif opal_atomic_add(&valint, 5); } return (void *) (unsigned long) (rank + 1000); }
void vader_return_registration (mca_mpool_base_registration_t *reg, struct mca_btl_base_endpoint_t *ep) { struct mca_rcache_base_module_t *rcache = ep->rcache; int32_t ref_count; ref_count = opal_atomic_add_32 (®->ref_count, -1); if (OPAL_UNLIKELY(0 == ref_count && !(reg->flags & MCA_MPOOL_FLAGS_PERSIST))) { /* protect rcache access */ OPAL_THREAD_LOCK(&ep->lock); rcache->rcache_delete (rcache, reg); OPAL_THREAD_UNLOCK(&ep->lock); opal_memchecker_base_mem_noaccess (reg->alloc_base, (uintptr_t)(reg->bound - reg->base)); (void)xpmem_detach (reg->alloc_base); OBJ_RELEASE (reg); } }
int mca_btl_sm_add_procs( struct mca_btl_base_module_t* btl, size_t nprocs, struct ompi_proc_t **procs, struct mca_btl_base_endpoint_t **peers, opal_bitmap_t* reachability) { int return_code = OMPI_SUCCESS; int32_t n_local_procs = 0, proc, j, my_smp_rank = -1; ompi_proc_t* my_proc; /* pointer to caller's proc structure */ mca_btl_sm_t *sm_btl; bool have_connected_peer = false; char **bases; /* for easy access to the mpool_sm_module */ mca_mpool_sm_module_t *sm_mpool_modp = NULL; /* initializion */ sm_btl = (mca_btl_sm_t *)btl; /* get pointer to my proc structure */ if(NULL == (my_proc = ompi_proc_local())) return OMPI_ERR_OUT_OF_RESOURCE; /* Get unique host identifier for each process in the list, * and idetify procs that are on this host. Add procs on this * host to shared memory reachbility list. Also, get number * of local procs in the procs list. */ for (proc = 0; proc < (int32_t)nprocs; proc++) { /* check to see if this proc can be reached via shmem (i.e., if they're on my local host and in my job) */ if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid || !OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) { peers[proc] = NULL; continue; } /* check to see if this is me */ if(my_proc == procs[proc]) { my_smp_rank = mca_btl_sm_component.my_smp_rank = n_local_procs++; continue; } /* sm doesn't support heterogeneous yet... */ if (procs[proc]->proc_arch != my_proc->proc_arch) { continue; } /* we have someone to talk to */ have_connected_peer = true; if(!(peers[proc] = create_sm_endpoint(n_local_procs, procs[proc]))) { return_code = OMPI_ERROR; goto CLEANUP; } n_local_procs++; /* add this proc to shared memory accessibility list */ return_code = opal_bitmap_set_bit(reachability, proc); if(OMPI_SUCCESS != return_code) goto CLEANUP; } /* jump out if there's not someone we can talk to */ if (!have_connected_peer) goto CLEANUP; /* make sure that my_smp_rank has been defined */ if (-1 == my_smp_rank) { return_code = OMPI_ERROR; goto CLEANUP; } if (!sm_btl->btl_inited) { return_code = sm_btl_first_time_init(sm_btl, my_smp_rank, mca_btl_sm_component.sm_max_procs); if (return_code != OMPI_SUCCESS) { goto CLEANUP; } } /* set local proc's smp rank in the peers structure for * rapid access and calculate reachability */ for(proc = 0; proc < (int32_t)nprocs; proc++) { if(NULL == peers[proc]) continue; mca_btl_sm_component.sm_peers[peers[proc]->peer_smp_rank] = peers[proc]; peers[proc]->my_smp_rank = my_smp_rank; } bases = mca_btl_sm_component.shm_bases; sm_mpool_modp = (mca_mpool_sm_module_t *)mca_btl_sm_component.sm_mpool; /* initialize own FIFOs */ /* * The receiver initializes all its FIFOs. All components will * be allocated near the receiver. Nothing will be local to * "the sender" since there will be many senders. */ for(j = mca_btl_sm_component.num_smp_procs; j < mca_btl_sm_component.num_smp_procs + FIFO_MAP_NUM(n_local_procs); j++) { return_code = sm_fifo_init( mca_btl_sm_component.fifo_size, mca_btl_sm_component.sm_mpool, &mca_btl_sm_component.fifo[my_smp_rank][j], mca_btl_sm_component.fifo_lazy_free); if(return_code != OMPI_SUCCESS) goto CLEANUP; } opal_atomic_wmb(); /* Sync with other local procs. Force the FIFO initialization to always * happens before the readers access it. */ opal_atomic_add_32(&mca_btl_sm_component.sm_seg->module_seg->seg_inited, 1); while( n_local_procs > mca_btl_sm_component.sm_seg->module_seg->seg_inited) { opal_progress(); opal_atomic_rmb(); } /* it is now safe to unlink the shared memory segment. only one process * needs to do this, so just let smp rank zero take care of it. */ if (0 == my_smp_rank) { if (OMPI_SUCCESS != mca_common_sm_module_unlink(mca_btl_sm_component.sm_seg)) { /* it is "okay" if this fails at this point. we have gone this far, * so just warn about the failure and continue. this is probably * only triggered by a programming error. */ opal_output(0, "WARNING: common_sm_module_unlink failed.\n"); } /* SKG - another abstraction violation here, but I don't want to add * extra code in the sm mpool for further synchronization. */ /* at this point, all processes have attached to the mpool segment. so * it is safe to unlink it here. */ if (OMPI_SUCCESS != mca_common_sm_module_unlink(sm_mpool_modp->sm_common_module)) { opal_output(0, "WARNING: common_sm_module_unlink failed.\n"); } if (-1 == unlink(mca_btl_sm_component.sm_mpool_rndv_file_name)) { opal_output(0, "WARNING: %s unlink failed.\n", mca_btl_sm_component.sm_mpool_rndv_file_name); } if (-1 == unlink(mca_btl_sm_component.sm_rndv_file_name)) { opal_output(0, "WARNING: %s unlink failed.\n", mca_btl_sm_component.sm_rndv_file_name); } } /* free up some space used by the name buffers */ free(mca_btl_sm_component.sm_mpool_ctl_file_name); free(mca_btl_sm_component.sm_mpool_rndv_file_name); free(mca_btl_sm_component.sm_ctl_file_name); free(mca_btl_sm_component.sm_rndv_file_name); /* coordinate with other processes */ for(j = mca_btl_sm_component.num_smp_procs; j < mca_btl_sm_component.num_smp_procs + n_local_procs; j++) { ptrdiff_t diff; /* spin until this element is allocated */ /* doesn't really wait for that process... FIFO might be allocated, but not initialized */ opal_atomic_rmb(); while(NULL == mca_btl_sm_component.shm_fifo[j]) { opal_progress(); opal_atomic_rmb(); } /* Calculate the difference as (my_base - their_base) */ diff = ADDR2OFFSET(bases[my_smp_rank], bases[j]); /* store local address of remote fifos */ mca_btl_sm_component.fifo[j] = (sm_fifo_t*)OFFSET2ADDR(diff, mca_btl_sm_component.shm_fifo[j]); /* cache local copy of peer memory node number */ mca_btl_sm_component.mem_nodes[j] = mca_btl_sm_component.shm_mem_nodes[j]; } /* update the local smp process count */ mca_btl_sm_component.num_smp_procs += n_local_procs; /* make sure we have enough eager fragmnents for each process */ return_code = ompi_free_list_resize_mt(&mca_btl_sm_component.sm_frags_eager, mca_btl_sm_component.num_smp_procs * 2); if (OMPI_SUCCESS != return_code) goto CLEANUP; CLEANUP: return return_code; }
int main(int argc, char *argv[]) { #if OPAL_HAVE_POSIX_THREADS int tid; pthread_t *th; #endif if (argc != 2) { printf("*** Incorrect number of arguments. Skipping test\n"); return 77; } nthreads = atoi(argv[1]); /* first test single-threaded functionality */ /* -- cmpset 32-bit tests -- */ vol32 = 42, old32 = 42, new32 = 50; assert(opal_atomic_cmpset_32(&vol32, old32, new32) == 1); opal_atomic_rmb(); assert(vol32 == new32); vol32 = 42, old32 = 420, new32 = 50; assert(opal_atomic_cmpset_32(&vol32, old32, new32) == 0); opal_atomic_rmb(); assert(vol32 == 42); vol32 = 42, old32 = 42, new32 = 50; assert(opal_atomic_cmpset_acq_32(&vol32, old32, new32) == 1); assert(vol32 == new32); vol32 = 42, old32 = 420, new32 = 50; assert(opal_atomic_cmpset_acq_32(&vol32, old32, new32) == 0); assert(vol32 == 42); vol32 = 42, old32 = 42, new32 = 50; assert(opal_atomic_cmpset_rel_32(&vol32, old32, new32) == 1); opal_atomic_rmb(); assert(vol32 == new32); vol32 = 42, old32 = 420, new32 = 50; assert(opal_atomic_cmpset_rel_32(&vol32, old32, new32) == 0); opal_atomic_rmb(); assert(vol32 == 42); /* -- cmpset 64-bit tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 vol64 = 42, old64 = 42, new64 = 50; assert(1 == opal_atomic_cmpset_64(&vol64, old64, new64)); opal_atomic_rmb(); assert(new64 == vol64); vol64 = 42, old64 = 420, new64 = 50; assert(opal_atomic_cmpset_64(&vol64, old64, new64) == 0); opal_atomic_rmb(); assert(vol64 == 42); vol64 = 42, old64 = 42, new64 = 50; assert(opal_atomic_cmpset_acq_64(&vol64, old64, new64) == 1); assert(vol64 == new64); vol64 = 42, old64 = 420, new64 = 50; assert(opal_atomic_cmpset_acq_64(&vol64, old64, new64) == 0); assert(vol64 == 42); vol64 = 42, old64 = 42, new64 = 50; assert(opal_atomic_cmpset_rel_64(&vol64, old64, new64) == 1); opal_atomic_rmb(); assert(vol64 == new64); vol64 = 42, old64 = 420, new64 = 50; assert(opal_atomic_cmpset_rel_64(&vol64, old64, new64) == 0); opal_atomic_rmb(); assert(vol64 == 42); #endif /* -- cmpset int tests -- */ volint = 42, oldint = 42, newint = 50; assert(opal_atomic_cmpset(&volint, oldint, newint) == 1); opal_atomic_rmb(); assert(volint ==newint); volint = 42, oldint = 420, newint = 50; assert(opal_atomic_cmpset(&volint, oldint, newint) == 0); opal_atomic_rmb(); assert(volint == 42); volint = 42, oldint = 42, newint = 50; assert(opal_atomic_cmpset_acq(&volint, oldint, newint) == 1); assert(volint == newint); volint = 42, oldint = 420, newint = 50; assert(opal_atomic_cmpset_acq(&volint, oldint, newint) == 0); assert(volint == 42); volint = 42, oldint = 42, newint = 50; assert(opal_atomic_cmpset_rel(&volint, oldint, newint) == 1); opal_atomic_rmb(); assert(volint == newint); volint = 42, oldint = 420, newint = 50; assert(opal_atomic_cmpset_rel(&volint, oldint, newint) == 0); opal_atomic_rmb(); assert(volint == 42); /* -- cmpset ptr tests -- */ volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; assert(opal_atomic_cmpset_ptr(&volptr, oldptr, newptr) == 1); opal_atomic_rmb(); assert(volptr == newptr); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; assert(opal_atomic_cmpset_ptr(&volptr, oldptr, newptr) == 0); opal_atomic_rmb(); assert(volptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; assert(opal_atomic_cmpset_acq_ptr(&volptr, oldptr, newptr) == 1); assert(volptr == newptr); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; assert(opal_atomic_cmpset_acq_ptr(&volptr, oldptr, newptr) == 0); assert(volptr == (void *) 42); volptr = (void *) 42, oldptr = (void *) 42, newptr = (void *) 50; assert(opal_atomic_cmpset_rel_ptr(&volptr, oldptr, newptr) == 1); opal_atomic_rmb(); assert(volptr == newptr); volptr = (void *) 42, oldptr = (void *) 420, newptr = (void *) 50; assert(opal_atomic_cmpset_rel_ptr(&volptr, oldptr, newptr) == 0); opal_atomic_rmb(); assert(volptr == (void *) 42); /* -- add_32 tests -- */ val32 = 42; assert(opal_atomic_add_32(&val32, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val32); /* -- add_64 tests -- */ #if OPAL_HAVE_ATOMIC_MATH_64 val64 = 42; assert(opal_atomic_add_64(&val64, 5) == (42 + 5)); opal_atomic_rmb(); assert((42 + 5) == val64); #endif /* -- add_int tests -- */ valint = 42; opal_atomic_add(&valint, 5); opal_atomic_rmb(); assert((42 + 5) == valint); /* threaded tests */ val32 = 0; #if OPAL_HAVE_ATOMIC_MATH_64 val64 = 0ul; #endif valint = 0; /* -- create the thread set -- */ #if OPAL_HAVE_POSIX_THREADS th = (pthread_t *) malloc(nthreads * sizeof(pthread_t)); if (!th) { perror("malloc"); exit(EXIT_FAILURE); } for (tid = 0; tid < nthreads; tid++) { if (pthread_create(&th[tid], NULL, thread_main, (void *) (unsigned long) tid) != 0) { perror("pthread_create"); exit(EXIT_FAILURE); } } /* -- wait for the thread set to finish -- */ for (tid = 0; tid < nthreads; tid++) { void *thread_return; if (pthread_join(th[tid], &thread_return) != 0) { perror("pthread_join"); exit(EXIT_FAILURE); } } free(th); opal_atomic_rmb(); assert((5 * nthreads * nreps) == val32); #if OPAL_HAVE_ATOMIC_MATH_64 opal_atomic_rmb(); assert((5 * nthreads * nreps) == val64); #endif opal_atomic_rmb(); assert((5 * nthreads * nreps) == valint); #endif return 0; }
void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep) { mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx; mca_mpool_hugepage_hugepage_t *huge_page = hugepage_module->huge_page; size_t size = *sizep; void *base = NULL; char *path = NULL; int flags = MAP_PRIVATE; int fd = -1; int rc; size = OPAL_ALIGN(size, huge_page->page_size, size_t); if (huge_page->path) { int32_t count; count = opal_atomic_add_32 (&huge_page->count, 1); rc = asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path, getpid (), count); if (0 > rc) { return NULL; } fd = open (path, O_RDWR | O_CREAT, 0600); if (-1 == fd) { free (path); return NULL; } if (0 != ftruncate (fd, size)) { close (fd); unlink (path); free (path); return NULL; } } else { #if defined(MAP_ANONYMOUS) flags |= MAP_ANONYMOUS; #elif defined(MAP_ANON) /* older versions of OS X do not define MAP_ANONYMOUS (10.9.x and older) */ flags |= MAP_ANON; #endif } base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags | huge_page->mmap_flags, fd, 0); close (fd); if (path) { unlink (path); free (path); } if (MAP_FAILED == base) { opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_verbose, "could not allocate huge page(s). falling back on standard pages"); /* fall back on regular pages */ base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0); } if (MAP_FAILED == base) { return NULL; } opal_mutex_lock (&hugepage_module->lock); opal_rb_tree_insert (&hugepage_module->allocation_tree, base, (void *) (intptr_t) size); opal_atomic_add (&mca_mpool_hugepage_component.bytes_allocated, (int64_t) size); opal_mutex_unlock (&hugepage_module->lock); OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose, "allocated segment %p of size %lu bytes", base, size)); *sizep = size; return base; }
int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device) { mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_component.modules; mca_btl_base_endpoint_t *ep; gni_ep_handle_t handle; int count = 0, rc; rc = mca_btl_ugni_get_datagram (ugni_module, device, &handle, &ep); if (1 != rc) { return rc; } BTL_VERBOSE(("remote datagram completion on handle %p", handle)); /* if this is a wildcard endpoint lookup the remote peer by the proc id we received */ if (handle == ugni_module->wildcard_ep) { struct opal_proc_t *remote_proc = opal_proc_for_name (ugni_module->wc_remote_attr.proc_name); BTL_VERBOSE(("received connection attempt on wildcard endpoint from proc: %s", OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name))); ep = mca_btl_ugni_get_ep (&ugni_module->super, remote_proc); if (OPAL_UNLIKELY(NULL == ep)) { /* there is no way to recover from this error so just abort() */ BTL_ERROR(("could not find/allocate a btl endpoint for peer %s", OPAL_NAME_PRINT(ugni_module->wc_remote_attr.proc_name))); abort (); return OPAL_ERR_NOT_FOUND; } } /* should not have gotten a NULL endpoint */ assert (NULL != ep); BTL_VERBOSE(("got a datagram completion: ep = %p. wc = %d", (void *) ep, handle == ugni_module->wildcard_ep)); /* NTH: TODO -- error handling */ opal_mutex_lock (&ep->lock); if (handle != ugni_module->wildcard_ep) { /* directed post complete */ BTL_VERBOSE(("directed datagram complete for endpoint %p", (void *) ep)); ep->dg_posted = false; (void) opal_atomic_add_32 (&ugni_module->active_datagrams, -1); } (void) mca_btl_ugni_ep_connect_progress (ep); opal_mutex_unlock (&ep->lock); if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) { /* process messages waiting in the endpoint's smsg mailbox */ count = mca_btl_ugni_smsg_process (ep); } /* repost the wildcard datagram */ if (handle == ugni_module->wildcard_ep) { mca_btl_ugni_wildcard_ep_post (ugni_module); } return count; }
/* look up the remote pointer in the peer rcache and attach if * necessary */ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, size_t size, int flags, void **local_ptr) { mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align; mca_rcache_base_registration_t *reg = NULL; vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = ®, .vma_module = vma_module}; xpmem_addr_t xpmem_addr; uintptr_t base, bound; int rc; base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t); bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1; if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { bound = VADER_MAX_ADDRESS; } check_ctx.base = base; check_ctx.bound = bound; /* several segments may match the base pointer */ rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx); if (2 == rc) { /* start the new segment from the lower of the two bases */ base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) { /* this pointer is not in use */ (void) xpmem_detach (reg->rcache_context); OBJ_RELEASE(reg); } reg = NULL; } if (NULL == reg) { reg = OBJ_NEW(mca_rcache_base_registration_t); if (OPAL_LIKELY(NULL != reg)) { /* stick around for awhile */ reg->ref_count = 2; reg->base = (unsigned char *) base; reg->bound = (unsigned char *) bound; reg->flags = flags; reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank; #if defined(HAVE_SN_XPMEM_H) xpmem_addr.id = ep->segment_data.xpmem.apid; #else xpmem_addr.apid = ep->segment_data.xpmem.apid; #endif xpmem_addr.offset = base; reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL); if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) { OBJ_RELEASE(reg); return NULL; } opal_memchecker_base_mem_defined (reg->rcache_context, bound - base); mca_rcache_base_vma_insert (vma_module, reg, 0); } } opal_atomic_wmb (); *local_ptr = (void *) ((uintptr_t) reg->rcache_context + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); return reg; }