static ucs_status_t uct_xpmem_detach(uct_mm_remote_seg_t *mm_desc) { xpmem_apid_t apid = mm_desc->cookie; void *address; int ret; address = ucs_align_down_pow2_ptr(mm_desc->address, ucs_get_page_size()); ucs_trace("xpmem detaching address %p", address); ret = xpmem_detach(address); if (ret < 0) { ucs_error("Failed to xpmem_detach: %m"); return UCS_ERR_IO_ERROR; } VALGRIND_MAKE_MEM_UNDEFINED(mm_desc->address, mm_desc->length); ucs_trace("xpmem releasing segment apid 0x%llx", apid); ret = xpmem_release(apid); if (ret < 0) { ucs_error("Failed to release xpmem segment apid 0x%llx", apid); return UCS_ERR_IO_ERROR; } return UCS_OK; }
static ucs_status_t uct_xmpem_reg(void *address, size_t size, uct_mm_id_t *mmid_p) { xpmem_segid_t segid; void *start, *end; start = ucs_align_down_pow2_ptr(address, ucs_get_page_size()); end = ucs_align_up_pow2_ptr(address + size, ucs_get_page_size()); ucs_assert_always(start <= end); segid = xpmem_make(start, end - start, XPMEM_PERMIT_MODE, (void*)0666); VALGRIND_MAKE_MEM_DEFINED(&segid, sizeof(segid)); if (segid < 0) { ucs_error("Failed to register %p..%p with xpmem: %m", start, end); return UCS_ERR_IO_ERROR; } ucs_trace("xpmem registered %p..%p segment 0x%llx", start, end, segid); *mmid_p = segid; return UCS_OK; }
ucs_status_t ucs_mpool_chunk_mmap(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) { ucs_mmap_mpool_chunk_hdr_t *chunk; size_t real_size; real_size = ucs_align_up(*size_p + sizeof(*chunk), ucs_get_page_size()); chunk = ucs_mmap(NULL, real_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0, ucs_mpool_name(mp)); if (chunk == MAP_FAILED) { return UCS_ERR_NO_MEMORY; } chunk->size = real_size; *size_p = real_size - sizeof(*chunk); *chunk_p = chunk + 1; return UCS_OK; }
static ucs_status_t uct_xpmem_attach(uct_mm_id_t mmid, size_t length, void *remote_address, void **local_address, uint64_t *cookie, const char *path) { struct xpmem_addr addr; ucs_status_t status; ptrdiff_t offset; void *address; addr.offset = 0; addr.apid = xpmem_get(mmid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); VALGRIND_MAKE_MEM_DEFINED(&addr.apid, sizeof(addr.apid)); if (addr.apid < 0) { ucs_error("Failed to acquire xpmem segment 0x%"PRIx64": %m", mmid); status = UCS_ERR_IO_ERROR; goto err_xget; } ucs_trace("xpmem acquired segment 0x%"PRIx64" apid 0x%llx remote_address %p", mmid, addr.apid, remote_address); offset = ((uintptr_t)remote_address) % ucs_get_page_size(); address = xpmem_attach(addr, length + offset, NULL); VALGRIND_MAKE_MEM_DEFINED(&address, sizeof(address)); if (address == MAP_FAILED) { ucs_error("Failed to attach xpmem segment 0x%"PRIx64" apid 0x%llx " "with length %zu: %m", mmid, addr.apid, length); status = UCS_ERR_IO_ERROR; goto err_xattach; } VALGRIND_MAKE_MEM_DEFINED(address + offset, length); *local_address = address + offset; *cookie = addr.apid; ucs_trace("xpmem attached segment 0x%"PRIx64" apid 0x%llx %p..%p at %p (+%zd)", mmid, addr.apid, remote_address, remote_address + length, address, offset); return UCS_OK; err_xattach: xpmem_release(addr.apid); err_xget: return status; }
static UCS_CLASS_INIT_FUNC(uct_ugni_smsg_iface_t, uct_md_h md, uct_worker_h worker, const uct_iface_params_t *params, const uct_iface_config_t *tl_config) { uct_ugni_iface_config_t *config = ucs_derived_of(tl_config, uct_ugni_iface_config_t); ucs_status_t status; gni_return_t ugni_rc; unsigned int bytes_per_mbox; gni_smsg_attr_t smsg_attr; pthread_mutex_lock(&uct_ugni_global_lock); UCS_CLASS_CALL_SUPER_INIT(uct_ugni_iface_t, md, worker, params, &uct_ugni_smsg_iface_ops, &config->super UCS_STATS_ARG(NULL)); /* Setting initial configuration */ self->config.smsg_seg_size = 2048; self->config.rx_headroom = params->rx_headroom; self->config.smsg_max_retransmit = 16; self->config.smsg_max_credit = 8; self->smsg_id = 0; smsg_attr.msg_type = GNI_SMSG_TYPE_MBOX_AUTO_RETRANSMIT; smsg_attr.mbox_maxcredit = self->config.smsg_max_credit; smsg_attr.msg_maxsize = self->config.smsg_seg_size; ugni_rc = GNI_SmsgBufferSizeNeeded(&(smsg_attr), &bytes_per_mbox); self->bytes_per_mbox = ucs_align_up_pow2(bytes_per_mbox, ucs_get_page_size()); if (ugni_rc != GNI_RC_SUCCESS) { ucs_error("Smsg buffer size calculation failed"); status = UCS_ERR_INVALID_PARAM; goto exit; } status = ucs_mpool_init(&self->free_desc, 0, self->config.smsg_seg_size + sizeof(uct_ugni_smsg_desc_t), 0, UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128 , /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_smsg_desc_mpool_ops, "UGNI-SMSG-DESC"); if (UCS_OK != status) { ucs_error("Desc Mpool creation failed"); goto exit; } status = ucs_mpool_init(&self->free_mbox, 0, self->bytes_per_mbox + sizeof(uct_ugni_smsg_mbox_t), sizeof(uct_ugni_smsg_mbox_t), UCS_SYS_CACHE_LINE_SIZE, /* alignment */ 128, /* grow */ config->mpool.max_bufs, /* max buffers */ &uct_ugni_smsg_mbox_mpool_ops, "UGNI-SMSG-MBOX"); if (UCS_OK != status) { ucs_error("Mbox Mpool creation failed"); goto clean_desc; } UCT_TL_IFACE_GET_TX_DESC(&self->super.super, &self->free_desc, self->user_desc, self->user_desc = NULL); status = ugni_smsg_activate_iface(self); if (UCS_OK != status) { ucs_error("Failed to activate the interface"); goto clean_mbox; } ugni_rc = GNI_SmsgSetMaxRetrans(self->super.nic_handle, self->config.smsg_max_retransmit); if (ugni_rc != GNI_RC_SUCCESS) { ucs_error("Smsg setting max retransmit count failed."); status = UCS_ERR_INVALID_PARAM; goto clean_iface; } /* TBD: eventually the uct_ugni_progress has to be moved to * udt layer so each ugni layer will have own progress */ uct_worker_progress_register(worker, uct_ugni_smsg_progress, self); pthread_mutex_unlock(&uct_ugni_global_lock); return UCS_OK; clean_iface: ugni_smsg_deactivate_iface(self); clean_desc: ucs_mpool_put(self->user_desc); ucs_mpool_cleanup(&self->free_desc, 1); clean_mbox: ucs_mpool_cleanup(&self->free_mbox, 1); exit: ucs_error("Failed to activate interface"); pthread_mutex_unlock(&uct_ugni_global_lock); return status; }
ucs_status_t uct_mem_alloc(size_t min_length, uct_alloc_method_t *methods, unsigned num_methods, uct_md_h *mds, unsigned num_mds, const char *alloc_name, uct_allocated_memory_t *mem) { uct_alloc_method_t *method; uct_md_attr_t md_attr; ucs_status_t status; size_t alloc_length; unsigned md_index; uct_mem_h memh; uct_md_h md; void *address; int shmid; if (min_length == 0) { ucs_error("Allocation length cannot be 0"); return UCS_ERR_INVALID_PARAM; } if (num_methods == 0) { ucs_error("No allocation methods provided"); return UCS_ERR_INVALID_PARAM; } for (method = methods; method < methods + num_methods; ++method) { ucs_debug("trying allocation method %s", uct_alloc_method_names[*method]); switch (*method) { case UCT_ALLOC_METHOD_MD: /* Allocate with one of the specified memory domains */ for (md_index = 0; md_index < num_mds; ++md_index) { md = mds[md_index]; status = uct_md_query(md, &md_attr); if (status != UCS_OK) { ucs_error("Failed to query MD"); return status; } /* Check if MD supports allocation */ if (!(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) { continue; } /* Allocate memory using the MD. * If the allocation fails, it's considered an error and we don't * fall-back, because this MD already exposed support for memory * allocation. */ alloc_length = min_length; status = uct_md_mem_alloc(md, &alloc_length, &address, alloc_name, &memh); if (status != UCS_OK) { ucs_error("failed to allocate %zu bytes using md %s: %s", alloc_length, md->component->name, ucs_status_string(status)); return status; } ucs_assert(memh != UCT_INVALID_MEM_HANDLE); mem->md = md; mem->memh = memh; goto allocated; } break; case UCT_ALLOC_METHOD_HEAP: /* Allocate aligned memory using libc allocator */ alloc_length = min_length; address = ucs_memalign(UCS_SYS_CACHE_LINE_SIZE, alloc_length UCS_MEMTRACK_VAL); if (address != NULL) { goto allocated_without_md; } ucs_debug("failed to allocate %zu bytes from the heap", alloc_length); break; case UCT_ALLOC_METHOD_MMAP: /* Request memory from operating system using mmap() */ alloc_length = ucs_align_up_pow2(min_length, ucs_get_page_size()); address = ucs_mmap(NULL, alloc_length, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0 UCS_MEMTRACK_VAL); if (address != MAP_FAILED) { goto allocated_without_md; } ucs_debug("failed to mmap %zu bytes: %m", alloc_length); break; case UCT_ALLOC_METHOD_HUGE: /* Allocate huge pages */ alloc_length = min_length; status = ucs_sysv_alloc(&alloc_length, &address, SHM_HUGETLB, &shmid UCS_MEMTRACK_VAL); if (status == UCS_OK) { goto allocated_without_md; } ucs_debug("failed to allocate %zu bytes from hugetlb: %s", min_length, ucs_status_string(status)); break; default: ucs_error("Invalid allocation method %d", *method); return UCS_ERR_INVALID_PARAM; } } ucs_debug("Could not allocate memory with any of the provided methods"); return UCS_ERR_NO_MEMORY; allocated_without_md: mem->md = NULL; mem->memh = UCT_INVALID_MEM_HANDLE; allocated: ucs_debug("allocated %zu bytes at %p using %s", alloc_length, address, (mem->md == NULL) ? uct_alloc_method_names[*method] : mem->md->component->name); mem->address = address; mem->length = alloc_length; mem->method = *method; return UCS_OK; }
ucs_status_t uct_mem_alloc(void *addr, size_t min_length, unsigned flags, uct_alloc_method_t *methods, unsigned num_methods, uct_md_h *mds, unsigned num_mds, const char *alloc_name, uct_allocated_memory_t *mem) { uct_alloc_method_t *method; uct_md_attr_t md_attr; ucs_status_t status; size_t alloc_length; unsigned md_index; uct_mem_h memh; uct_md_h md; void *address; int shmid; unsigned map_flags; if (min_length == 0) { ucs_error("Allocation length cannot be 0"); return UCS_ERR_INVALID_PARAM; } if (num_methods == 0) { ucs_error("No allocation methods provided"); return UCS_ERR_INVALID_PARAM; } if ((flags & UCT_MD_MEM_FLAG_FIXED) && (!addr || ((uintptr_t)addr % ucs_get_page_size()))) { ucs_debug("UCT_MD_MEM_FLAG_FIXED requires valid page size aligned address"); return UCS_ERR_INVALID_PARAM; } for (method = methods; method < methods + num_methods; ++method) { ucs_debug("trying allocation method %s", uct_alloc_method_names[*method]); switch (*method) { case UCT_ALLOC_METHOD_MD: /* Allocate with one of the specified memory domains */ for (md_index = 0; md_index < num_mds; ++md_index) { md = mds[md_index]; status = uct_md_query(md, &md_attr); if (status != UCS_OK) { ucs_error("Failed to query MD"); return status; } /* Check if MD supports allocation */ if (!(md_attr.cap.flags & UCT_MD_FLAG_ALLOC)) { continue; } /* Check if MD supports allocation with fixed address * if it's requested */ if ((flags & UCT_MD_MEM_FLAG_FIXED) && !(md_attr.cap.flags & UCT_MD_FLAG_FIXED)) { continue; } /* Allocate memory using the MD. * If the allocation fails, it's considered an error and we don't * fall-back, because this MD already exposed support for memory * allocation. */ alloc_length = min_length; address = addr; status = uct_md_mem_alloc(md, &alloc_length, &address, flags, alloc_name, &memh); if (status != UCS_OK) { ucs_error("failed to allocate %zu bytes using md %s: %s", alloc_length, md->component->name, ucs_status_string(status)); return status; } ucs_assert(memh != UCT_MEM_HANDLE_NULL); mem->md = md; mem->memh = memh; goto allocated; } break; case UCT_ALLOC_METHOD_THP: #ifdef MADV_HUGEPAGE if (!ucs_is_thp_enabled()) { break; } /* Fixed option is not supported for thp allocation*/ if (flags & UCT_MD_MEM_FLAG_FIXED) { break; } alloc_length = ucs_align_up(min_length, ucs_get_huge_page_size()); address = ucs_memalign(ucs_get_huge_page_size(), alloc_length UCS_MEMTRACK_VAL); if (address != NULL) { status = madvise(address, alloc_length, MADV_HUGEPAGE); if (status != UCS_OK) { ucs_error("madvise failure status (%d) address(%p) len(%zu):" " %m", status, address, alloc_length); ucs_free(address); break; } else { goto allocated_without_md; } } ucs_debug("failed to allocate by thp %zu bytes: %m", alloc_length); #endif break; case UCT_ALLOC_METHOD_HEAP: /* Allocate aligned memory using libc allocator */ /* Fixed option is not supported for heap allocation*/ if (flags & UCT_MD_MEM_FLAG_FIXED) { break; } alloc_length = min_length; address = ucs_memalign(UCS_SYS_CACHE_LINE_SIZE, alloc_length UCS_MEMTRACK_VAL); if (address != NULL) { goto allocated_without_md; } ucs_debug("failed to allocate %zu bytes from the heap", alloc_length); break; case UCT_ALLOC_METHOD_MMAP: map_flags = uct_mem_get_mmap_flags(flags); /* Request memory from operating system using mmap() */ alloc_length = ucs_align_up_pow2(min_length, ucs_get_page_size()); address = ucs_mmap(addr, alloc_length, PROT_READ | PROT_WRITE, map_flags, -1, 0 UCS_MEMTRACK_VAL); if (address != MAP_FAILED) { goto allocated_without_md; } ucs_debug("failed to mmap %zu bytes: %m", alloc_length); break; case UCT_ALLOC_METHOD_HUGE: /* Allocate huge pages */ alloc_length = min_length; address = (flags & UCT_MD_MEM_FLAG_FIXED) ? addr : NULL; status = ucs_sysv_alloc(&alloc_length, &address, SHM_HUGETLB, &shmid UCS_MEMTRACK_VAL); if (status == UCS_OK) { goto allocated_without_md; } ucs_debug("failed to allocate %zu bytes from hugetlb: %s", min_length, ucs_status_string(status)); break; default: ucs_error("Invalid allocation method %d", *method); return UCS_ERR_INVALID_PARAM; } } ucs_debug("Could not allocate memory with any of the provided methods"); return UCS_ERR_NO_MEMORY; allocated_without_md: mem->md = NULL; mem->memh = UCT_MEM_HANDLE_NULL; allocated: ucs_debug("allocated %zu bytes at %p using %s", alloc_length, address, (mem->md == NULL) ? uct_alloc_method_names[*method] : mem->md->component->name); mem->address = address; mem->length = alloc_length; mem->method = *method; return UCS_OK; }