/** * Test - if we finished with the coll fragment descriptor, * and free all resouces if so. **/ int mca_bcol_iboffload_free_tasks_frags_resources( mca_bcol_iboffload_collfrag_t *collfrag, ompi_free_list_t *frags_free) { int rc; mca_bcol_iboffload_task_t *task = collfrag->tasks_to_release; mca_bcol_iboffload_component_t *cm = &mca_bcol_iboffload_component; /* Support for multiple frags we will add later * n_outstanding_frags = coll_req->n_frags_sent - coll_req->n_frag_net_complete; */ while (NULL != task) { /* Return frag (is the reference counter is zero)*/ rc = release_frags_on_task(task, frags_free); if (OMPI_SUCCESS != rc) { return OMPI_ERROR; } /* Return task: if the pointer is NULL => we assume the task is a member of the common task list (tasks_free) */ if (NULL == task->task_list) { OMPI_FREE_LIST_RETURN(&cm->tasks_free, (ompi_free_list_item_t *) task); } else { OMPI_FREE_LIST_RETURN(task->task_list, (ompi_free_list_item_t *) task); } task = task->next_task; } return OMPI_SUCCESS; }
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool, mca_mpool_base_registration_t *reg) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; int rc = OMPI_SUCCESS; assert(reg->ref_count > 0); OPAL_THREAD_LOCK(&mpool->rcache->lock); reg->ref_count--; if(reg->ref_count > 0) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OMPI_SUCCESS; } if(mca_mpool_rdma_component.leave_pinned && !(reg->flags & (MCA_MPOOL_FLAGS_CACHE_BYPASS|MCA_MPOOL_FLAGS_PERSIST))) { /* if leave_pinned is set don't deregister memory, but put it * on MRU list for future use */ opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg); } else { rc = dereg_mem(mpool, reg); if(OMPI_SUCCESS == rc) { if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) mpool->rcache->rcache_delete(mpool->rcache, reg); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)reg); } } OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return rc; }
int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect) { gni_return_t rc; do { if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) { /* nothing to do */ break; } if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) { rc = GNI_SmsgSendWTag (ep->smsg_ep_handle, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT); if (GNI_RC_SUCCESS != rc) { BTL_VERBOSE(("btl/ugni could not send close message")); } /* we might want to wait for local completion here (do we even care) */ } (void) ompi_common_ugni_ep_destroy (&ep->smsg_ep_handle); (void) ompi_common_ugni_ep_destroy (&ep->rdma_ep_handle); OMPI_FREE_LIST_RETURN(&ep->btl->smsg_mboxes, ((ompi_free_list_item_t *) ep->mailbox)); ep->mailbox = NULL; ep->state = MCA_BTL_UGNI_EP_STATE_INIT; } while (0); return OMPI_SUCCESS; }
/* * Return all the requests in the per-file freelist to the global list */ void mca_io_base_request_return(ompi_file_t *file) { ompi_free_list_item_t *next; OPAL_THREAD_LOCK(&file->f_io_requests_lock); while (NULL != (next = (ompi_free_list_item_t*) opal_list_remove_first(&file->f_io_requests))) { OMPI_FREE_LIST_RETURN(&mca_io_base_requests, next); } OPAL_THREAD_UNLOCK(&file->f_io_requests_lock); }
int ompi_mtl_mxm_imrecv(struct mca_mtl_base_module_t* mtl, struct opal_convertor_t *convertor, struct ompi_message_t **message, struct mca_mtl_request_t *mtl_request) { #if MXM_API >= MXM_VERSION(1,5) int ret; mxm_error_t err; mxm_recv_req_t *mxm_recv_req; mca_mtl_mxm_request_t *mtl_mxm_request; ompi_mtl_mxm_message_t *msgp = (ompi_mtl_mxm_message_t *) (*message)->req_ptr; mtl_mxm_request = (mca_mtl_mxm_request_t*) mtl_request; mxm_recv_req = &mtl_mxm_request->mxm.recv; /* prepare a receive request embedded in the MTL request */ ret = ompi_mtl_mxm_recv_init(mtl_mxm_request, convertor, mxm_recv_req); if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) { return ret; } mxm_recv_req->tag = msgp->tag; mxm_recv_req->tag_mask = msgp->tag_mask; mxm_recv_req->base.mq = msgp->mq; mxm_recv_req->base.conn = msgp->conn; err = mxm_message_recv(mxm_recv_req, msgp->mxm_msg); if (OPAL_UNLIKELY(MXM_OK != err)) { orte_show_help("help-mtl-mxm.txt", "error posting message receive", true, mxm_error_string(err), mtl_mxm_request->buf, mtl_mxm_request->length); return OMPI_ERROR; } OMPI_FREE_LIST_RETURN(&mca_mtl_mxm_component.mxm_messages, (ompi_free_list_item_t *) msgp); ompi_message_return(*message); (*message) = MPI_MESSAGE_NULL; return OMPI_SUCCESS; #else return OMPI_ERR_NOT_IMPLEMENTED; #endif }
int mca_mpool_rdma_release_memory(struct mca_mpool_base_module_t *mpool, void *base, size_t size) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; mca_mpool_base_registration_t *reg; ompi_pointer_array_t regs; int reg_cnt, i, err = 0; OBJ_CONSTRUCT(®s, ompi_pointer_array_t); OPAL_THREAD_LOCK(&mpool->rcache->lock); reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, base, size, ®s); for(i = 0; i < reg_cnt; i++) { reg = (mca_mpool_base_registration_t*) ompi_pointer_array_get_item(®s, i); if(0 == reg->ref_count) { if(dereg_mem(mpool, reg) != OMPI_SUCCESS) { err++; continue; } } else { /* remove registration from cache and wait for ref_count goes to * zero before unregister memory. Note that our registered memory * statistic can go wrong at this point, but it is better than * potential memory corruption. And we return error in this case to * the caller */ reg->flags |= MCA_MPOOL_FLAGS_CACHE_BYPASS; err++; /* tell caller that something was wrong */ } mpool->rcache->rcache_delete(mpool->rcache, reg); if(0 == reg->ref_count) { opal_list_remove_item(&mpool_rdma->mru_list, (opal_list_item_t*)reg); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)reg); } } OPAL_THREAD_UNLOCK(&mpool->rcache->lock); ompi_pointer_array_remove_all(®s); return err?OMPI_ERROR:OMPI_SUCCESS; }
static inline int dereg_mem(mca_mpool_base_registration_t *reg) { mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t *) reg->mpool; int rc; if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) reg->mpool->rcache->rcache_delete(reg->mpool->rcache, reg); /* Drop the rcache lock before deregistring the memory */ OPAL_THREAD_UNLOCK(®->mpool->rcache->lock); rc = mpool_grdma->resources.deregister_mem(mpool_grdma->resources.reg_data, reg); OPAL_THREAD_LOCK(®->mpool->rcache->lock); if (OPAL_LIKELY(OMPI_SUCCESS == rc)) { OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list, (ompi_free_list_item_t *) reg); } return rc; }
int mca_mpool_rdma_deregister(struct mca_mpool_base_module_t *mpool, mca_mpool_base_registration_t *reg) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; int rc = OMPI_SUCCESS; assert(reg->ref_count > 0); OPAL_THREAD_LOCK(&mpool->rcache->lock); reg->ref_count--; if(reg->ref_count > 0) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OMPI_SUCCESS; } if(mca_mpool_rdma_component.leave_pinned && registration_is_cachebale(reg)) { /* if leave_pinned is set don't deregister memory, but put it * on MRU list for future use */ opal_list_prepend(&mpool_rdma->mru_list, (opal_list_item_t*)reg); } else { /* Remove from rcache first */ if(!(reg->flags & MCA_MPOOL_FLAGS_CACHE_BYPASS)) mpool->rcache->rcache_delete(mpool->rcache, reg); /* Drop the rcache lock before deregistring the memory */ OPAL_THREAD_UNLOCK(&mpool->rcache->lock); rc = dereg_mem(mpool, reg); OPAL_THREAD_LOCK(&mpool->rcache->lock); if(OMPI_SUCCESS == rc) { OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)reg); } } OPAL_THREAD_UNLOCK(&mpool->rcache->lock); /* Cleanup any vmas that we have deferred deletion on */ mpool->rcache->rcache_clean(mpool->rcache); return rc; }
/* This function must be called with the rcache lock held */ static void do_unregistration_gc(struct mca_mpool_base_module_t *mpool) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; mca_mpool_base_registration_t *reg; do { /* Remove registration from garbage collection list before deregistering it */ reg = (mca_mpool_base_registration_t *) opal_list_remove_first(&mpool_rdma->gc_list); mpool->rcache->rcache_delete(mpool->rcache, reg); /* Drop the rcache lock before calling dereg_mem as there may be memory allocations */ OPAL_THREAD_UNLOCK(&mpool->rcache->lock); dereg_mem(mpool, reg); OPAL_THREAD_LOCK(&mpool->rcache->lock); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)reg); } while(!opal_list_is_empty(&mpool_rdma->gc_list)); }
static int register_cache_bypass(mca_mpool_base_module_t *mpool, void *addr, size_t size, uint32_t flags, mca_mpool_base_registration_t **reg) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; mca_mpool_base_registration_t *rdma_reg; ompi_free_list_item_t *item; unsigned char *base, *bound; int rc; base = down_align_addr(addr, mca_mpool_base_page_size_log); bound = up_align_addr( (void*) ((char*) addr + size - 1), mca_mpool_base_page_size_log); OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc); if(OMPI_SUCCESS != rc) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return rc; } rdma_reg = (mca_mpool_base_registration_t*)item; rdma_reg->mpool = mpool; rdma_reg->base = base; rdma_reg->bound = bound; rdma_reg->flags = flags; rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data, base, bound - base + 1, rdma_reg); if(rc != OMPI_SUCCESS) { OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item); return rc; } *reg = rdma_reg; (*reg)->ref_count++; return OMPI_SUCCESS; }
void test2(void) { ompi_free_list_t key_list; ompi_free_list_item_t * new_value; ompi_rb_tree_t tree; int rc, i, size; void * result, * lookup; void * mem[NUM_ALLOCATIONS]; ompi_free_list_item_t * key_array[NUM_ALLOCATIONS]; struct timeval start, end; OBJ_CONSTRUCT(&key_list, ompi_free_list_t); ompi_free_list_init_new(&key_list, sizeof(ompi_test_rb_value_t), CACHE_LINE_SIZE, OBJ_CLASS(ompi_test_rb_value_t), 0,CACHE_LINE_SIZE, 0, -1 , 128, NULL); OBJ_CONSTRUCT(&tree, ompi_rb_tree_t); rc = ompi_rb_tree_init(&tree, mem_node_compare); if(!test_verify_int(OMPI_SUCCESS, rc)) { test_failure("failed to properly initialize the tree"); } size = 1; for(i = 0; i < NUM_ALLOCATIONS; i++) { mem[i] = malloc(size); if(NULL == mem[i]) { test_failure("system out of memory"); return; } OMPI_FREE_LIST_GET(&key_list, new_value, rc); if(OMPI_SUCCESS != rc) { test_failure("failed to get memory from free list"); } key_array[i] = new_value; ((ompi_test_rb_value_t *) new_value)->key.bottom = mem[i]; ((ompi_test_rb_value_t *) new_value)->key.top = (void *) ((size_t) mem[i] + size - 1); ((ompi_test_rb_value_t *) new_value)->registered_mpools[0] = (void *) i; rc = ompi_rb_tree_insert(&tree, &((ompi_test_rb_value_t *)new_value)->key, new_value); if(OMPI_SUCCESS != rc) { test_failure("failed to properly insert a new node"); } size += 1; } gettimeofday(&start, NULL); for(i = 0; i < NUM_ALLOCATIONS; i++) { lookup = (void *) ((size_t) mem[i] + i); result = ompi_rb_tree_find(&tree, &lookup); if(NULL == result) { test_failure("lookup returned null!"); } else if(i != ((int) ((ompi_test_rb_value_t *) result)->registered_mpools[0])) { test_failure("lookup returned wrong node!"); } result = ompi_rb_tree_find(&tree, &lookup); if(NULL == result) { test_failure("lookup returned null!"); } else if(i != ((int) ((ompi_test_rb_value_t *) result)->registered_mpools[0])) { test_failure("lookup returned wrong node!"); } } gettimeofday(&end, NULL); #if 0 i = (end.tv_sec - start.tv_sec) * 1000000 + (end.tv_usec - start.tv_usec); printf("In a %d node tree, %d lookups took %f microseonds each\n", NUM_ALLOCATIONS, NUM_ALLOCATIONS * 2, (float) i / (float) (NUM_ALLOCATIONS * 2)); #endif for(i = 0; i < NUM_ALLOCATIONS; i++) { if(NULL != mem[i]) { free(mem[i]); } OMPI_FREE_LIST_RETURN(&(key_list), key_array[i]); } OBJ_DESTRUCT(&tree); OBJ_DESTRUCT(&key_list); }
/* * Return a module-specific IO MPI_Request */ int mca_io_base_request_alloc(ompi_file_t *file, mca_io_base_request_t **req) { int err; mca_io_base_module_request_once_init_fn_t func; ompi_free_list_item_t *item; /* See if we've got a request on the module's freelist (which is cached on the file, since there's only one module per MPI_File). Use a quick-but-not-entirely-accurate (but good enough) check as a slight optimization to potentially having to avoid locking and unlocking. */ if (opal_list_get_size(&file->f_io_requests) > 0) { OPAL_THREAD_LOCK(&file->f_io_requests_lock); if (opal_list_get_size(&file->f_io_requests) > 0) { *req = (mca_io_base_request_t*) opal_list_remove_first(&file->f_io_requests); (*req)->free_called = false; } else { *req = NULL; } OPAL_THREAD_UNLOCK(&file->f_io_requests_lock); } else { *req = NULL; } /* Nope, we didn't have one on the file freelist, so let's get one off the global freelist */ if (NULL == *req) { OMPI_FREE_LIST_GET(&mca_io_base_requests, item, err); *req = (mca_io_base_request_t*) item; /* Call the per-use init function, if it exists */ switch (file->f_io_version) { case MCA_IO_BASE_V_2_0_0: /* These can be set once for this request since this request will always be used with the same module (and therefore, the same MPI_File). Note that (*req)->req_ompi.rq_type is already set by the constructor. */ (*req)->req_file = file; (*req)->req_ver = file->f_io_version; (*req)->free_called = false; (*req)->super.req_free = file->f_io_selected_module.v2_0_0.io_module_request_free; (*req)->super.req_cancel = file->f_io_selected_module.v2_0_0.io_module_request_cancel; /* Call the module's once-per process init, if it exists */ func = file->f_io_selected_module.v2_0_0.io_module_request_once_init; if (NULL != func) { if (OMPI_SUCCESS != (err = func(&file->f_io_selected_module, *req))) { OMPI_FREE_LIST_RETURN(&mca_io_base_requests, item); return err; } } break; default: OMPI_FREE_LIST_RETURN(&mca_io_base_requests, item); return OMPI_ERR_NOT_IMPLEMENTED; break; } } /* Initialize the request */ OMPI_REQUEST_INIT(&((*req)->super), false); (*req)->super.req_mpi_object.file = file; /* * Copied from ompi/mca/pml/base/pml_base_recvreq.h: * always set the req_status.MPI_TAG to ANY_TAG before starting the * request. This field is used if cancelled to find out if the request * has been matched or not. */ (*req)->super.req_status.MPI_TAG = MPI_ANY_TAG; (*req)->super.req_status.MPI_ERROR = OMPI_SUCCESS; (*req)->super.req_status._count = 0; (*req)->super.req_status._cancelled = 0; /* All done */ return OMPI_SUCCESS; }
/* * register memory */ int mca_mpool_grdma_register(mca_mpool_base_module_t *mpool, void *addr, size_t size, uint32_t flags, mca_mpool_base_registration_t **reg) { mca_mpool_grdma_module_t *mpool_grdma = (mca_mpool_grdma_module_t*)mpool; mca_mpool_base_registration_t *grdma_reg; ompi_free_list_item_t *item; unsigned char *base, *bound; bool bypass_cache = !!(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS); int rc; OPAL_THREAD_LOCK(&mpool->rcache->lock); /* if cache bypass is requested don't use the cache */ base = (unsigned char *) down_align_addr(addr, mca_mpool_base_page_size_log); bound = (unsigned char *) up_align_addr((void*)((char*) addr + size - 1), mca_mpool_base_page_size_log); if (!opal_list_is_empty (&mpool_grdma->pool->gc_list)) do_unregistration_gc(mpool); /* look through existing regs if not persistent registration requested. * Persistent registration are always registered and placed in the cache */ if(!(flags & MCA_MPOOL_FLAGS_PERSIST) && !bypass_cache) { /* check to see if memory is registered */ mpool->rcache->rcache_find(mpool->rcache, addr, size, reg); if(*reg != NULL && (mca_mpool_grdma_component.leave_pinned || ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) || ((*reg)->base == base && (*reg)->bound == bound))) { if(0 == (*reg)->ref_count && mca_mpool_grdma_component.leave_pinned) { opal_list_remove_item(&mpool_grdma->pool->lru_list, (opal_list_item_t*)(*reg)); } mpool_grdma->stat_cache_hit++; (*reg)->ref_count++; OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OMPI_SUCCESS; } mpool_grdma->stat_cache_miss++; *reg = NULL; /* in case previous find found something */ /* If no suitable registration is in cache and leave_pinned isn't * set don't use the cache. * This is optimisation in case limit is not set. If limit is set we * have to put registration into the cache to determine when we hit * memory registration limit. * NONE: cache is still used for persistent registrations so previous * find can find something */ if(!mca_mpool_grdma_component.leave_pinned) { bypass_cache = true; } } OMPI_FREE_LIST_GET(&mpool_grdma->reg_list, item, rc); if(OMPI_SUCCESS != rc) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return rc; } grdma_reg = (mca_mpool_base_registration_t*)item; grdma_reg->mpool = mpool; grdma_reg->base = base; grdma_reg->bound = bound; grdma_reg->flags = flags; if (false == bypass_cache) { rc = mpool->rcache->rcache_insert(mpool->rcache, grdma_reg, 0); if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list, item); return rc; } } while (OMPI_ERR_OUT_OF_RESOURCE == (rc = mpool_grdma->resources.register_mem(mpool_grdma->resources.reg_data, base, bound - base + 1, grdma_reg))) { /* try to remove one unused reg and retry */ if (!mca_mpool_grdma_evict (mpool)) { break; } } if (OPAL_UNLIKELY(rc != OMPI_SUCCESS)) { if (false == bypass_cache) { mpool->rcache->rcache_delete(mpool->rcache, grdma_reg); } OPAL_THREAD_UNLOCK(&mpool->rcache->lock); OMPI_FREE_LIST_RETURN(&mpool_grdma->reg_list, item); return rc; } *reg = grdma_reg; (*reg)->ref_count++; OPAL_THREAD_UNLOCK(&mpool->rcache->lock); /* Cleanup any vmas that we have deferred deletion on */ mpool->rcache->rcache_clean(mpool->rcache); return OMPI_SUCCESS; }
int ompi_mtl_portals_irecv(struct mca_mtl_base_module_t* mtl, struct ompi_communicator_t *comm, int src, int tag, struct ompi_convertor_t *convertor, mca_mtl_request_t *mtl_request) { ptl_match_bits_t match_bits, ignore_bits; ptl_md_t md; ptl_handle_md_t md_h; ptl_handle_me_t me_h; int ret; ptl_process_id_t remote_proc; mca_mtl_base_endpoint_t *endpoint = NULL; ompi_mtl_portals_request_t *ptl_request = (ompi_mtl_portals_request_t*) mtl_request; ompi_mtl_portals_event_t *recv_event = NULL; size_t buflen; ptl_request->convertor = convertor; if (MPI_ANY_SOURCE == src) { remote_proc.nid = PTL_NID_ANY; remote_proc.pid = PTL_PID_ANY; } else { ompi_proc_t* ompi_proc = ompi_comm_peer_lookup( comm, src ); endpoint = (mca_mtl_base_endpoint_t*) ompi_proc->proc_pml; remote_proc = endpoint->ptl_proc; } PTL_SET_RECV_BITS(match_bits, ignore_bits, comm->c_contextid, src, tag); OPAL_OUTPUT_VERBOSE((50, ompi_mtl_base_output, "recv bits: 0x%016llx 0x%016llx\n", match_bits, ignore_bits)); /* first, check the queue of processed unexpected messages */ recv_event = ompi_mtl_portals_search_unex_q(match_bits, ignore_bits); if (NULL != recv_event) { /* found it */ ompi_mtl_portals_get_data(recv_event, convertor, ptl_request); OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl, (ompi_free_list_item_t*)recv_event); goto cleanup; } else { restart_search: /* check unexpected events */ recv_event = ompi_mtl_portals_search_unex_events(match_bits, ignore_bits); if (NULL != recv_event) { /* found it */ ompi_mtl_portals_get_data(recv_event, convertor, ptl_request); OMPI_FREE_LIST_RETURN(&ompi_mtl_portals.event_fl, (ompi_free_list_item_t*)recv_event); goto cleanup; } } /* didn't find it, now post the receive */ ret = ompi_mtl_datatype_recv_buf(convertor, &md.start, &buflen, &ptl_request->free_after); md.length = buflen; /* create ME entry */ ret = PtlMEInsert(ompi_mtl_portals.ptl_match_ins_me_h, remote_proc, match_bits, ignore_bits, PTL_UNLINK, PTL_INS_BEFORE, &me_h); if( ret !=PTL_OK) { return ompi_common_portals_error_ptl_to_ompi(ret); } /* associate a memory descriptor with the Match list Entry */ md.threshold = 0; md.options = PTL_MD_OP_PUT | PTL_MD_TRUNCATE | PTL_MD_EVENT_START_DISABLE; md.user_ptr = ptl_request; md.eq_handle = ompi_mtl_portals.ptl_eq_h; ret=PtlMDAttach(me_h, md, PTL_UNLINK, &md_h); if( ret !=PTL_OK) { return ompi_common_portals_error_ptl_to_ompi(ret); } /* now try to make active */ md.threshold = 1; /* enable the memory descritor, if the ptl_unexpected_recv_eq_h * queue is empty */ ret = PtlMDUpdate(md_h, NULL, &md, ompi_mtl_portals.ptl_unexpected_recv_eq_h); if (ret == PTL_MD_NO_UPDATE) { /* a message has arrived since we searched - look again */ PtlMDUnlink(md_h); if (ptl_request->free_after) { free(md.start); } goto restart_search; } else if( PTL_OK != ret ) { return ompi_common_portals_error_ptl_to_ompi(ret); } ptl_request->event_callback = ompi_mtl_portals_recv_progress; cleanup: return OMPI_SUCCESS; }
/* * put an item back into the free list */ void mca_mpool_base_tree_item_put(mca_mpool_base_tree_item_t* item) { OMPI_FREE_LIST_RETURN(&mca_mpool_base_tree_item_free_list, &(item->super)); }
/* * register memory */ int mca_mpool_rdma_register(mca_mpool_base_module_t *mpool, void *addr, size_t size, uint32_t flags, mca_mpool_base_registration_t **reg) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; mca_mpool_base_registration_t *rdma_reg; ompi_free_list_item_t *item; unsigned char *base, *bound; int rc; /* if cache bypass is requested don't use the cache */ if(flags & MCA_MPOOL_FLAGS_CACHE_BYPASS) { return register_cache_bypass(mpool, addr, size, flags, reg); } base = down_align_addr(addr, mca_mpool_base_page_size_log); bound = up_align_addr((void*)((char*) addr + size - 1), mca_mpool_base_page_size_log); OPAL_THREAD_LOCK(&mpool->rcache->lock); /* look through existing regs if not persistent registration requested. * Persistent registration are always registered and placed in the cache */ if(!(flags & MCA_MPOOL_FLAGS_PERSIST)) { /* check to see if memory is registered */ mpool->rcache->rcache_find(mpool->rcache, addr, size, reg); if(*reg != NULL && (mca_mpool_rdma_component.leave_pinned || ((*reg)->flags & MCA_MPOOL_FLAGS_PERSIST) || ((*reg)->base == base && (*reg)->bound == bound))) { if(0 == (*reg)->ref_count && mca_mpool_rdma_component.leave_pinned) { opal_list_remove_item(&mpool_rdma->mru_list, (opal_list_item_t*)(*reg)); } mpool_rdma->stat_cache_hit++; (*reg)->ref_count++; OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return MPI_SUCCESS; } mpool_rdma->stat_cache_miss++; *reg = NULL; /* in case previous find found something */ /* If no suitable registration is in cache and leave_pinned isn't * set and size of registration cache is unlimited don't use the cache. * This is optimisation in case limit is not set. If limit is set we * have to put registration into the cache to determine when we hit * memory registration limit. * NONE: cache is still used for persistent registrations so previous * find can find something */ if(!mca_mpool_rdma_component.leave_pinned && mca_mpool_rdma_component.rcache_size_limit == 0) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return register_cache_bypass(mpool, addr, size, flags, reg); } } OMPI_FREE_LIST_GET(&mpool_rdma->reg_list, item, rc); if(OMPI_SUCCESS != rc) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return rc; } rdma_reg = (mca_mpool_base_registration_t*)item; rdma_reg->mpool = mpool; rdma_reg->base = base; rdma_reg->bound = bound; rdma_reg->flags = flags; while((rc = mpool->rcache->rcache_insert(mpool->rcache, rdma_reg, mca_mpool_rdma_component.rcache_size_limit)) == OMPI_ERR_TEMP_OUT_OF_RESOURCE) { mca_mpool_base_registration_t *old_reg; /* try to remove one unused reg and retry */ old_reg = (mca_mpool_base_registration_t*) opal_list_get_last(&mpool_rdma->mru_list); if(opal_list_get_end(&mpool_rdma->mru_list) != (opal_list_item_t*)old_reg) { rc = dereg_mem(mpool, old_reg); if(MPI_SUCCESS == rc) { mpool->rcache->rcache_delete(mpool->rcache, old_reg); opal_list_remove_item(&mpool_rdma->mru_list, (opal_list_item_t*)old_reg); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)old_reg); mpool_rdma->stat_evicted++; } else break; } else break; } if(rc != OMPI_SUCCESS) { OPAL_THREAD_UNLOCK(&mpool->rcache->lock); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item); return rc; } rc = mpool_rdma->resources.register_mem(mpool_rdma->resources.reg_data, base, bound - base + 1, rdma_reg); if(rc != OMPI_SUCCESS) { mpool->rcache->rcache_delete(mpool->rcache, rdma_reg); OPAL_THREAD_UNLOCK(&mpool->rcache->lock); OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, item); return rc; } *reg = rdma_reg; (*reg)->ref_count++; OPAL_THREAD_UNLOCK(&mpool->rcache->lock); return OMPI_SUCCESS; }
void mca_mpool_rdma_finalize(struct mca_mpool_base_module_t *mpool) { mca_mpool_rdma_module_t *mpool_rdma = (mca_mpool_rdma_module_t*)mpool; mca_mpool_base_registration_t *reg; mca_mpool_base_registration_t *regs[RDMA_MPOOL_NREGS]; int reg_cnt, i; int rc; /* Statistic */ if(true == mca_mpool_rdma_component.print_stats) { opal_output(0, "%s rdma: stats " "(hit/miss/found/not found/evicted): %d/%d/%d/%d/%d\n", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), mpool_rdma->stat_cache_hit, mpool_rdma->stat_cache_miss, mpool_rdma->stat_cache_found, mpool_rdma->stat_cache_notfound, mpool_rdma->stat_evicted); } OPAL_THREAD_LOCK(&mpool->rcache->lock); if(!opal_list_is_empty(&mpool_rdma->gc_list)) do_unregistration_gc(mpool); do { reg_cnt = mpool->rcache->rcache_find_all(mpool->rcache, 0, (size_t)-1, regs, RDMA_MPOOL_NREGS); for(i = 0; i < reg_cnt; i++) { reg = regs[i]; if(reg->ref_count) { reg->ref_count = 0; /* otherway dereg will fail on assert */ } else if (mca_mpool_rdma_component.leave_pinned) { opal_list_remove_item(&mpool_rdma->mru_list, (opal_list_item_t*)reg); } /* Remove from rcache first */ mpool->rcache->rcache_delete(mpool->rcache, reg); /* Drop lock before deregistering memory */ OPAL_THREAD_UNLOCK(&mpool->rcache->lock); rc = dereg_mem(mpool, reg); OPAL_THREAD_LOCK(&mpool->rcache->lock); if(rc != OMPI_SUCCESS) { /* Potentially lose track of registrations do we have to put it back? */ continue; } OMPI_FREE_LIST_RETURN(&mpool_rdma->reg_list, (ompi_free_list_item_t*)reg); } } while(reg_cnt == RDMA_MPOOL_NREGS); OBJ_DESTRUCT(&mpool_rdma->mru_list); OBJ_DESTRUCT(&mpool_rdma->gc_list); OBJ_DESTRUCT(&mpool_rdma->reg_list); OPAL_THREAD_UNLOCK(&mpool->rcache->lock); /* Cleanup any vmas that we have deferred deletion on */ mpool->rcache->rcache_clean(mpool->rcache); }