ucs_status_t ucs_mpool_hugetlb_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) { ucs_hugetlb_mpool_chunk_hdr_t *chunk; void *ptr; ucs_status_t status; size_t real_size; int shmid; /* First, try hugetlb */ real_size = *size_p; status = ucs_sysv_alloc(&real_size, (void**)&ptr, SHM_HUGETLB, &shmid UCS_MEMTRACK_NAME(ucs_mpool_name(mp))); if (status == UCS_OK) { chunk = ptr; chunk->hugetlb = 1; goto out_ok; } /* Fallback to glibc */ real_size = *size_p; chunk = ucs_malloc(real_size, ucs_mpool_name(mp)); if (chunk != NULL) { chunk->hugetlb = 0; goto out_ok; } return UCS_ERR_NO_MEMORY; out_ok: *size_p = real_size - sizeof(*chunk); *chunk_p = chunk + 1; return UCS_OK; }
static ucs_status_t uct_gdr_copy_mem_reg(uct_md_h uct_md, void *address, size_t length, unsigned flags, uct_mem_h *memh_p) { uct_gdr_copy_mem_t *mem_hndl = NULL; size_t reg_size; void *ptr; ucs_status_t status; mem_hndl = ucs_malloc(sizeof(uct_gdr_copy_mem_t), "gdr_copy handle"); if (NULL == mem_hndl) { ucs_error("failed to allocate memory for gdr_copy_mem_t"); return UCS_ERR_NO_MEMORY; } reg_size = (length + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK; ptr = (void *) ((uintptr_t)address & GPU_PAGE_MASK); status = uct_gdr_copy_mem_reg_internal(uct_md, ptr, reg_size, 0, mem_hndl); if (status != UCS_OK) { ucs_free(mem_hndl); return status; } *memh_p = mem_hndl; return UCS_OK; }
ucs_status_t ucp_config_read(const char *env_prefix, const char *filename, ucp_config_t **config_p) { ucp_config_t *config; ucs_status_t status; config = ucs_malloc(sizeof(*config), "ucp config"); if (config == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } status = ucs_config_parser_fill_opts(config, ucp_config_table, env_prefix, NULL, 0); if (status != UCS_OK) { goto err_free; } *config_p = config; return UCS_OK; err_free: ucs_free(config); err: return status; }
static ucs_status_t uct_ugni_rkey_unpack(uct_md_component_t *mdc, const void *rkey_buffer, uct_rkey_t *rkey_p, void **handle_p) { const uint64_t *ptr = rkey_buffer; gni_mem_handle_t *mem_hndl = NULL; uint64_t magic = 0; ucs_debug("Unpacking [ %"PRIx64" %"PRIx64" %"PRIx64"]", ptr[0], ptr[1], ptr[2]); magic = ptr[0]; if (magic != UCT_UGNI_RKEY_MAGIC) { ucs_error("Failed to identify key. Expected %llx but received %"PRIx64"", UCT_UGNI_RKEY_MAGIC, magic); return UCS_ERR_UNSUPPORTED; } mem_hndl = ucs_malloc(sizeof(gni_mem_handle_t), "gni_mem_handle_t"); if (NULL == mem_hndl) { ucs_error("Failed to allocate memory for gni_mem_handle_t"); return UCS_ERR_NO_MEMORY; } mem_hndl->qword1 = ptr[1]; mem_hndl->qword2 = ptr[2]; *rkey_p = (uintptr_t)mem_hndl; *handle_p = NULL; return UCS_OK; }
static UCS_CLASS_INIT_FUNC(uct_tcp_ep_t, uct_tcp_iface_t *iface, int fd, const struct sockaddr_in *dest_addr) { ucs_status_t status; UCS_CLASS_CALL_SUPER_INIT(uct_base_ep_t, &iface->super) self->buf = ucs_malloc(iface->config.buf_size, "tcp_buf"); if (self->buf == NULL) { return UCS_ERR_NO_MEMORY; } self->events = 0; self->offset = 0; self->length = 0; ucs_queue_head_init(&self->pending_q); if (fd == -1) { status = ucs_tcpip_socket_create(&self->fd); if (status != UCS_OK) { goto err; } /* TODO use non-blocking connect */ status = uct_tcp_socket_connect(self->fd, dest_addr); if (status != UCS_OK) { goto err_close; } } else { self->fd = fd; } status = ucs_sys_fcntl_modfl(self->fd, O_NONBLOCK, 0); if (status != UCS_OK) { goto err_close; } status = uct_tcp_iface_set_sockopt(iface, self->fd); if (status != UCS_OK) { goto err_close; } uct_tcp_ep_epoll_ctl(self, EPOLL_CTL_ADD); UCS_ASYNC_BLOCK(iface->super.worker->async); ucs_list_add_tail(&iface->ep_list, &self->list); UCS_ASYNC_UNBLOCK(iface->super.worker->async); ucs_debug("tcp_ep %p: created on iface %p, fd %d", self, iface, self->fd); return UCS_OK; err_close: close(self->fd); err: return status; }
/* Should be called with lock held */ static void ucs_module_loader_add_dl_dir() { char *dlpath_dup = NULL; size_t max_length; Dl_info dl_info; char *p, *path; int ret; (void)dlerror(); ret = dladdr((void*)&ucs_module_loader_state, &dl_info); if (ret == 0) { ucs_error("dladdr failed: %s", dlerror()); return; } ucs_module_debug("ucs library path: %s", dl_info.dli_fname); /* copy extension */ dlpath_dup = ucs_strdup(dl_info.dli_fname, UCS_MODULE_PATH_MEMTRACK_NAME); if (dlpath_dup == NULL) { return; } p = basename(dlpath_dup); p = strchr(p, '.'); if (p != NULL) { strncpy(ucs_module_loader_state.module_ext, p, sizeof(ucs_module_loader_state.module_ext) - 1); } ucs_free(dlpath_dup); /* copy directory component */ dlpath_dup = ucs_strdup(dl_info.dli_fname, UCS_MODULE_PATH_MEMTRACK_NAME); if (dlpath_dup == NULL) { return; } /* construct module directory path */ max_length = strlen(dlpath_dup) + /* directory */ 1 + /* '/' */ strlen(UCX_MODULE_SUBDIR) + /* sub-directory */ 1; /* '\0' */ path = ucs_malloc(max_length, UCS_MODULE_PATH_MEMTRACK_NAME); if (path == NULL) { goto out; } snprintf(path, max_length, "%s/%s", dirname(dlpath_dup), UCX_MODULE_SUBDIR); ucs_module_loader_state.srch_path[ucs_module_loader_state.srchpath_cnt++] = path; out: ucs_free(dlpath_dup); }
ucs_status_t ucp_mem_map(ucp_context_h context, void **address_p, size_t length, unsigned flags, ucp_mem_h *memh_p) { ucs_status_t status; ucp_mem_h memh; if (length == 0) { ucs_debug("mapping zero length buffer, return dummy memh"); *memh_p = &ucp_mem_dummy_handle; return UCS_OK; } /* Allocate the memory handle */ ucs_assert(context->num_pds > 0); memh = ucs_malloc(sizeof(*memh) + context->num_pds * sizeof(memh->uct[0]), "ucp_memh"); if (memh == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } if (*address_p == NULL) { status = ucp_mem_alloc(context, length, "user allocation", memh); if (status != UCS_OK) { goto err_free_memh; } *address_p = memh->address; } else { ucs_debug("registering user memory at %p length %zu", *address_p, length); memh->address = *address_p; memh->length = length; memh->alloc_method = UCT_ALLOC_METHOD_LAST; memh->alloc_pd = NULL; status = ucp_memh_reg_pds(context, memh, UCT_INVALID_MEM_HANDLE); if (status != UCS_OK) { goto err_free_memh; } } ucs_debug("%s buffer %p length %zu memh %p pd_map 0x%"PRIx64, (memh->alloc_method == UCT_ALLOC_METHOD_LAST) ? "mapped" : "allocated", memh->address, memh->length, memh, memh->pd_map); *memh_p = memh; return UCS_OK; err_free_memh: ucs_free(memh); err: return status; }
static ucs_status_t uct_cuda_mem_reg(uct_pd_h pd, void *address, size_t length, uct_mem_h *memh_p) { ucs_status_t rc; uct_mem_h * mem_hndl = NULL; mem_hndl = ucs_malloc(sizeof(void *), "cuda handle for test passing"); if (NULL == mem_hndl) { ucs_error("Failed to allocate memory for gni_mem_handle_t"); rc = UCS_ERR_NO_MEMORY; goto mem_err; } *memh_p = mem_hndl; return UCS_OK; mem_err: return rc; }
ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, void **rkey_buffer_p, size_t *size_p) { unsigned pd_index, uct_memh_index; void *rkey_buffer, *p; size_t size, pd_size; ucs_trace("packing rkeys for buffer %p memh %p pd_map 0x%"PRIx64, memh->address, memh, memh->pd_map); size = sizeof(uint64_t); for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { size += sizeof(uint8_t); pd_size = context->pd_attrs[pd_index].rkey_packed_size; ucs_assert_always(pd_size < UINT8_MAX); size += pd_size; } rkey_buffer = ucs_malloc(size, "ucp_rkey_buffer"); if (rkey_buffer == NULL) { return UCS_ERR_NO_MEMORY; } p = rkey_buffer; /* Write the PD map */ *(uint64_t*)p = memh->pd_map; p += sizeof(uint64_t); /* Write both size and rkey_buffer for each UCT rkey */ uct_memh_index = 0; for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { if (!(memh->pd_map & UCS_BIT(pd_index))) { continue; } pd_size = context->pd_attrs[pd_index].rkey_packed_size; *((uint8_t*)p++) = pd_size; uct_pd_mkey_pack(context->pds[pd_index], memh->uct[uct_memh_index], p); ++uct_memh_index; p += pd_size; } *rkey_buffer_p = rkey_buffer; *size_p = size; return UCS_OK; }
static ucs_status_t uct_posix_test_mem(size_t length, int shm_fd) { int *buf; int chunk_size = 256 * 1024; ucs_status_t status = UCS_OK; size_t size_to_write, remaining; ssize_t single_write; buf = ucs_malloc(chunk_size, "write buffer"); if (buf == NULL) { ucs_error("Failed to allocate memory for testing space for backing file."); status = UCS_ERR_NO_MEMORY; goto out; } memset(buf, 0, chunk_size); if (lseek(shm_fd, 0, SEEK_SET) < 0) { ucs_error("lseek failed. %m"); status = UCS_ERR_IO_ERROR; goto out_free_buf; } remaining = length; while (remaining > 0) { size_to_write = ucs_min(remaining, chunk_size); single_write = write(shm_fd, buf, size_to_write); if (single_write < 0) { ucs_error("Failed to write %zu bytes. %m", size_to_write); status = UCS_ERR_IO_ERROR; goto out_free_buf; } if (single_write != size_to_write) { ucs_error("Not enough memory to write total of %zu bytes. " "Please check that /dev/shm or the directory you specified has " "more available memory.", length); status = UCS_ERR_NO_MEMORY; goto out_free_buf; } remaining -= size_to_write; } out_free_buf: ucs_free(buf); out: return status; }
static ucs_status_t ucp_ep_send_wireup_am(ucp_ep_h ep, uct_ep_h uct_ep, uint8_t am_id, ucp_rsc_index_t dst_rsc_index) { ucp_rsc_index_t rsc_index = ep->uct.rsc_index; ucp_worker_h worker = ep->worker; ucp_context_h context = worker->context; uct_iface_attr_t *iface_attr = &worker->iface_attrs[rsc_index]; ucp_wireup_msg_t *msg; ucs_status_t status; size_t msg_len; msg_len = sizeof(*msg) + iface_attr->ep_addr_len; /* TODO use custom pack callback to avoid this allocation and memcopy */ msg = ucs_malloc(msg_len, "conn_req"); if (msg == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } msg->src_uuid = worker->uuid; msg->src_pd_index = context->tl_rscs[rsc_index].pd_index; msg->src_rsc_index = rsc_index; msg->dst_rsc_index = dst_rsc_index; status = uct_ep_get_address(ep->uct.next_ep, (struct sockaddr *)(msg + 1)); if (status != UCS_OK) { goto err_free; } status = uct_ep_am_bcopy(uct_ep, am_id, (uct_pack_callback_t)memcpy, msg, msg_len); if (status != UCS_OK) { ucs_debug("failed to send conn msg: %s%s", ucs_status_string(status), (status == UCS_ERR_NO_RESOURCE) ? ", will retry" : ""); goto err_free; } ucp_wireup_log(worker, am_id, msg, 1); ucs_free(msg); return UCS_OK; err_free: ucs_free(msg); err: return status; }
ucs_status_t ucp_address_pack(ucp_worker_h worker, ucp_ep_h ep, uint64_t tl_bitmap, unsigned *order, size_t *size_p, void **buffer_p) { ucp_address_packed_device_t *devices; ucp_rsc_index_t num_devices; ucs_status_t status; void *buffer; size_t size; /* Collect all devices we want to pack */ status = ucp_address_gather_devices(worker, tl_bitmap, ep != NULL, &devices, &num_devices); if (status != UCS_OK) { goto out; } /* Calculate packed size */ size = ucp_address_packed_size(worker, devices, num_devices); /* Allocate address */ buffer = ucs_malloc(size, "ucp_address"); if (buffer == NULL) { status = UCS_ERR_NO_MEMORY; goto out_free_devices; } memset(buffer, 0, size); /* Pack the address */ status = ucp_address_do_pack(worker, ep, buffer, size, tl_bitmap, order, devices, num_devices); if (status != UCS_OK) { ucs_free(buffer); goto out_free_devices; } VALGRIND_CHECK_MEM_IS_DEFINED(buffer, size); *size_p = size; *buffer_p = buffer; status = UCS_OK; out_free_devices: ucs_free(devices); out: return status; }
static ucs_status_t uct_ugni_mem_reg(uct_md_h md, void *address, size_t length, uct_mem_h *memh_p) { ucs_status_t status; gni_return_t ugni_rc; uct_ugni_md_t *ugni_md = ucs_derived_of(md, uct_ugni_md_t); gni_mem_handle_t * mem_hndl = NULL; pthread_mutex_lock(&uct_ugni_global_lock); if (0 == length) { ucs_error("Unexpected length %zu", length); return UCS_ERR_INVALID_PARAM; } mem_hndl = ucs_malloc(sizeof(gni_mem_handle_t), "gni_mem_handle_t"); if (NULL == mem_hndl) { ucs_error("Failed to allocate memory for gni_mem_handle_t"); status = UCS_ERR_NO_MEMORY; goto mem_err; } ugni_rc = GNI_MemRegister(ugni_md->nic_handle, (uint64_t)address, length, NULL, GNI_MEM_READWRITE | GNI_MEM_RELAXED_PI_ORDERING, -1, mem_hndl); if (GNI_RC_SUCCESS != ugni_rc) { ucs_error("GNI_MemRegister failed (addr %p, size %zu), Error status: %s %d", address, length, gni_err_str[ugni_rc], ugni_rc); status = UCS_ERR_IO_ERROR; goto mem_err; } ucs_debug("Memory registration address %p, len %lu, keys [%"PRIx64" %"PRIx64"]", address, length, mem_hndl->qword1, mem_hndl->qword2); *memh_p = mem_hndl; pthread_mutex_unlock(&uct_ugni_global_lock); return UCS_OK; mem_err: free(mem_hndl); pthread_mutex_unlock(&uct_ugni_global_lock); return status; }
static ucs_status_t uct_knem_rkey_unpack(uct_md_component_t *mdc, const void *rkey_buffer, uct_rkey_t *rkey_p, void **handle_p) { uct_knem_key_t *packed = (uct_knem_key_t *)rkey_buffer; uct_knem_key_t *key; key = ucs_malloc(sizeof(uct_knem_key_t), "uct_knem_key_t"); if (NULL == key) { ucs_error("Failed to allocate memory for uct_knem_key_t"); return UCS_ERR_NO_MEMORY; } key->cookie = packed->cookie; key->address = packed->address; *handle_p = NULL; *rkey_p = (uintptr_t)key; ucs_trace("unpacked rkey: key %p cookie 0x%"PRIx64" address %"PRIxPTR, key, key->cookie, key->address); return UCS_OK; }
static void uct_cm_iface_handle_sidr_req(uct_cm_iface_t *iface, struct ib_cm_event *event) { uct_cm_hdr_t *hdr = event->private_data; struct ib_cm_sidr_rep_param rep; ucs_status_t status; void *cm_desc, *desc; int ret; VALGRIND_MAKE_MEM_DEFINED(hdr, sizeof(hdr)); VALGRIND_MAKE_MEM_DEFINED(hdr + 1, hdr->length); uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_RECV, hdr, "RX: SIDR_REQ"); /* Allocate temporary buffer to serve as receive descriptor */ cm_desc = ucs_malloc(iface->super.config.rx_payload_offset + hdr->length, "cm_recv_desc"); if (cm_desc == NULL) { ucs_error("failed to allocate cm receive descriptor"); return; } /* Send reply */ ucs_trace_data("TX: SIDR_REP [id %p{%u}]", event->cm_id, event->cm_id->handle); memset(&rep, 0, sizeof rep); rep.status = IB_SIDR_SUCCESS; ret = ib_cm_send_sidr_rep(event->cm_id, &rep); if (ret) { ucs_error("ib_cm_send_sidr_rep() failed: %m"); } /* Call active message handler */ desc = cm_desc + iface->super.config.rx_headroom_offset; uct_recv_desc_iface(desc) = &iface->super.super.super; status = uct_iface_invoke_am(&iface->super.super, hdr->am_id, hdr + 1, hdr->length, desc); if (status == UCS_OK) { ucs_free(cm_desc); } }
static ucs_status_t uct_knem_mem_reg(uct_md_h md, void *address, size_t length, uct_mem_h *memh_p) { int rc; struct knem_cmd_create_region create; struct knem_cmd_param_iovec knem_iov[1]; uct_knem_md_t *knem_md = (uct_knem_md_t *)md; int knem_fd = knem_md->knem_fd; uct_knem_key_t *key; ucs_assert_always(knem_fd > -1); key = ucs_malloc(sizeof(uct_knem_key_t), "uct_knem_key_t"); if (NULL == key) { ucs_error("Failed to allocate memory for uct_knem_key_t"); return UCS_ERR_NO_MEMORY; } knem_iov[0].base = (uintptr_t) address; knem_iov[0].len = length; memset(&create, 0, sizeof(struct knem_cmd_create_region)); create.iovec_array = (uintptr_t) &knem_iov[0]; create.iovec_nr = 1; create.flags = 0; create.protection = PROT_READ | PROT_WRITE; rc = ioctl(knem_fd, KNEM_CMD_CREATE_REGION, &create); if (rc < 0) { ucs_error("KNEM create region failed: %m"); ucs_free(key); return UCS_ERR_IO_ERROR; } ucs_assert_always(create.cookie != 0); key->cookie = create.cookie; key->address = (uintptr_t)address; *memh_p = key; return UCS_OK; }
static ucs_status_t uct_gdr_copy_rkey_unpack(uct_md_component_t *mdc, const void *rkey_buffer, uct_rkey_t *rkey_p, void **handle_p) { uct_gdr_copy_key_t *packed = (uct_gdr_copy_key_t *)rkey_buffer; uct_gdr_copy_key_t *key; key = ucs_malloc(sizeof(uct_gdr_copy_key_t), "uct_gdr_copy_key_t"); if (NULL == key) { ucs_error("failed to allocate memory for uct_gdr_copy_key_t"); return UCS_ERR_NO_MEMORY; } key->vaddr = packed->vaddr; key->bar_ptr = packed->bar_ptr; *handle_p = NULL; *rkey_p = (uintptr_t)key; return UCS_OK; }
void *uct_mm_ep_attach_remote_seg(uct_mm_ep_t *ep, uct_mm_iface_t *iface, uct_mm_fifo_element_t *elem) { uct_mm_remote_seg_t *remote_seg, search; ucs_status_t status; /* take the mmid of the chunk that the desc belongs to, (the desc that the fifo_elem * is 'assigned' to), and check if the ep has already attached to it. */ search.mmid = elem->desc_mmid; remote_seg = sglib_hashed_uct_mm_remote_seg_t_find_member(ep->remote_segments_hash, &search); if (remote_seg == NULL) { /* not in the hash. attach to the memory the mmid refers to. the attach call * will return the base address of the mmid's chunk - * save this base address in a hash table (which maps mmid to base address). */ remote_seg = ucs_malloc(sizeof(*remote_seg), "mm_desc"); if (remote_seg == NULL) { ucs_fatal("Failed to allocated memory for a remote segment identifier. %m"); } status = uct_mm_md_mapper_ops(iface->super.md)->attach(elem->desc_mmid, elem->desc_mpool_size, elem->desc_chunk_base_addr, &remote_seg->address, &remote_seg->cookie, iface->path); if (status != UCS_OK) { ucs_fatal("Failed to attach to remote mmid:%zu. %s ", elem->desc_mmid, ucs_status_string(status)); } remote_seg->mmid = elem->desc_mmid; remote_seg->length = elem->desc_mpool_size; /* put the base address into the ep's hash table */ sglib_hashed_uct_mm_remote_seg_t_add(ep->remote_segments_hash, remote_seg); } return remote_seg->address; }
static ucs_status_t uct_knem_md_open(const char *md_name, const uct_md_config_t *md_config, uct_md_h *md_p) { uct_knem_md_t *knem_md; static uct_md_ops_t md_ops = { .close = uct_knem_md_close, .query = uct_knem_md_query, .mem_alloc = (void*)ucs_empty_function_return_success, .mem_free = (void*)ucs_empty_function_return_success, .mkey_pack = uct_knem_rkey_pack, .mem_reg = uct_knem_mem_reg, .mem_dereg = uct_knem_mem_dereg }; knem_md = ucs_malloc(sizeof(uct_knem_md_t), "uct_knem_md_t"); if (NULL == knem_md) { ucs_error("Failed to allocate memory for uct_knem_md_t"); return UCS_ERR_NO_MEMORY; } knem_md->super.ops = &md_ops; knem_md->super.component = &uct_knem_md_component; knem_md->knem_fd = open("/dev/knem", O_RDWR); if (knem_md->knem_fd < 0) { ucs_error("Could not open the KNEM device file at /dev/knem: %m."); free(knem_md); return UCS_ERR_IO_ERROR; } *md_p = (uct_md_h)knem_md; return UCS_OK; } UCT_MD_COMPONENT_DEFINE(uct_knem_md_component, "knem", uct_knem_query_md_resources, uct_knem_md_open, 0, uct_knem_rkey_unpack, uct_knem_rkey_release, "KNEM_", uct_md_config_table, uct_md_config_t)
static ucs_status_t ucp_ep_wireup_send(ucp_ep_h ep, uct_ep_h uct_ep, uint8_t am_id, ucp_rsc_index_t dst_rsc_index) { ucp_ep_wireup_op_t *wireup_op; ucs_status_t status; status = ucp_ep_send_wireup_am(ep, uct_ep, am_id, dst_rsc_index); if (status != UCS_ERR_NO_RESOURCE) { return status; } wireup_op = ucs_malloc(sizeof(*wireup_op), "wireup_op"); if (wireup_op == NULL) { return UCS_ERR_NO_MEMORY; } wireup_op->super.progress = ucp_ep_wireup_op_progress; wireup_op->am_id = am_id; wireup_op->dest_rsc_index = dst_rsc_index; ucp_ep_add_pending_op(ep, uct_ep, &wireup_op->super); return UCS_OK; }
ucs_status_t ucs_mpool_init(ucs_mpool_t *mp, size_t priv_size, size_t elem_size, size_t align_offset, size_t alignment, unsigned elems_per_chunk, unsigned max_elems, ucs_mpool_ops_t *ops, const char *name) { /* Check input values */ if ((elem_size == 0) || (align_offset > elem_size) || (alignment == 0) || !ucs_is_pow2(alignment) || (elems_per_chunk == 0) || (max_elems < elems_per_chunk)) { ucs_error("Invalid memory pool parameter(s)"); return UCS_ERR_INVALID_PARAM; } mp->data = ucs_malloc(sizeof(*mp->data) + priv_size, "mpool_data"); if (mp->data == NULL) { ucs_error("Failed to allocate memory pool slow-path area"); return UCS_ERR_NO_MEMORY; } mp->freelist = NULL; mp->data->elem_size = sizeof(ucs_mpool_elem_t) + elem_size; mp->data->alignment = alignment; mp->data->align_offset = sizeof(ucs_mpool_elem_t) + align_offset; mp->data->quota = max_elems; mp->data->tail = NULL; mp->data->chunk_size = sizeof(ucs_mpool_chunk_t) + alignment + elems_per_chunk * ucs_mpool_elem_total_size(mp->data); mp->data->chunks = NULL; mp->data->ops = ops; mp->data->name = strdup(name); VALGRIND_CREATE_MEMPOOL(mp, 0, 0); ucs_debug("mpool %s: align %u, maxelems %u, elemsize %u", ucs_mpool_name(mp), mp->data->alignment, max_elems, mp->data->elem_size); return UCS_OK; }
ucs_status_t ucs_mpool_chunk_malloc(ucs_mpool_t *mp, size_t *size_p, void **chunk_p) { *chunk_p = ucs_malloc(*size_p, ucs_mpool_name(mp)); return (*chunk_p == NULL) ? UCS_ERR_NO_MEMORY : UCS_OK; }
static ucs_status_t uct_ib_iface_init_lmc(uct_ib_iface_t *iface, uct_ib_iface_config_t *config) { unsigned i, j, num_path_bits; unsigned first, last; uint8_t lmc; int step; if (config->lid_path_bits.count == 0) { ucs_error("List of path bits must not be empty"); return UCS_ERR_INVALID_PARAM; } /* count the number of lid_path_bits */ num_path_bits = 0; for (i = 0; i < config->lid_path_bits.count; i++) { num_path_bits += 1 + abs(config->lid_path_bits.ranges[i].first - config->lid_path_bits.ranges[i].last); } iface->path_bits = ucs_malloc(num_path_bits * sizeof(*iface->path_bits), "ib_path_bits"); if (iface->path_bits == NULL) { return UCS_ERR_NO_MEMORY; } lmc = uct_ib_iface_port_attr(iface)->lmc; /* go over the list of values (ranges) for the lid_path_bits and set them */ iface->path_bits_count = 0; for (i = 0; i < config->lid_path_bits.count; ++i) { first = config->lid_path_bits.ranges[i].first; last = config->lid_path_bits.ranges[i].last; /* range of values or one value */ if (first < last) { step = 1; } else { step = -1; } /* fill the value/s */ for (j = first; j != (last + step); j += step) { if (j >= UCS_BIT(lmc)) { ucs_debug("Not using value %d for path_bits - must be < 2^lmc (lmc=%d)", j, lmc); if (step == 1) { break; } else { continue; } } ucs_assert(iface->path_bits_count <= num_path_bits); iface->path_bits[iface->path_bits_count] = j; iface->path_bits_count++; } } return UCS_OK; }
ssize_t uct_cm_ep_am_bcopy(uct_ep_h tl_ep, uint8_t am_id, uct_pack_callback_t pack_cb, void *arg) { uct_cm_iface_t *iface = ucs_derived_of(tl_ep->iface, uct_cm_iface_t); uct_cm_ep_t *ep = ucs_derived_of(tl_ep, uct_cm_ep_t); struct ib_cm_sidr_req_param req; struct ibv_sa_path_rec path; struct ib_cm_id *id; ucs_status_t status; uct_cm_hdr_t *hdr; size_t payload_len; size_t total_len; int ret; UCT_CHECK_AM_ID(am_id); uct_cm_enter(iface); if (iface->num_outstanding >= iface->config.max_outstanding) { status = UCS_ERR_NO_RESOURCE; goto err; } /* Allocate temporary contiguous buffer */ hdr = ucs_malloc(IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE, "cm_send_buf"); if (hdr == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } payload_len = pack_cb(hdr + 1, arg); hdr->am_id = am_id; hdr->length = payload_len; total_len = sizeof(*hdr) + payload_len; status = uct_cm_ep_fill_path_rec(ep, &path); if (status != UCS_OK) { goto err_free; } /* Fill SIDR request */ memset(&req, 0, sizeof req); req.path = &path; req.service_id = ep->dest_addr.id; req.timeout_ms = iface->config.timeout_ms; req.private_data = hdr; req.private_data_len = total_len; req.max_cm_retries = iface->config.retry_count; /* Create temporary ID for this message. Will be released when getting REP. */ ret = ib_cm_create_id(iface->cmdev, &id, NULL); if (ret) { ucs_error("ib_cm_create_id() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_free; } uct_cm_dump_path(&path); ret = ib_cm_send_sidr_req(id, &req); if (ret) { ucs_error("ib_cm_send_sidr_req() failed: %m"); status = UCS_ERR_IO_ERROR; goto err_destroy_id; } iface->outstanding[iface->num_outstanding++] = id; UCT_TL_EP_STAT_OP(&ep->super, AM, BCOPY, payload_len); uct_cm_leave(iface); uct_cm_iface_trace_data(iface, UCT_AM_TRACE_TYPE_SEND, hdr, "TX: SIDR_REQ [dlid %d svc 0x%"PRIx64"]", ntohs(path.dlid), req.service_id); ucs_free(hdr); return payload_len; err_destroy_id: ib_cm_destroy_id(id); err_free: ucs_free(hdr); err: uct_cm_leave(iface); return status; }
ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, void **rkey_buffer_p, size_t *size_p) { unsigned md_index, uct_memh_index; void *rkey_buffer, *p; size_t size, md_size; ucs_status_t status; char UCS_V_UNUSED buf[128]; /* always acquire context lock */ UCP_THREAD_CS_ENTER(&context->mt_lock); ucs_trace("packing rkeys for buffer %p memh %p md_map 0x%x", memh->address, memh, memh->md_map); if (memh->length == 0) { /* dummy memh, return dummy key */ *rkey_buffer_p = &ucp_mem_dummy_buffer; *size_p = sizeof(ucp_mem_dummy_buffer); status = UCS_OK; goto out; } size = sizeof(ucp_md_map_t); for (md_index = 0; md_index < context->num_mds; ++md_index) { size += sizeof(uint8_t); md_size = context->md_attrs[md_index].rkey_packed_size; ucs_assert_always(md_size < UINT8_MAX); size += md_size; } rkey_buffer = ucs_malloc(size, "ucp_rkey_buffer"); if (rkey_buffer == NULL) { status = UCS_ERR_NO_MEMORY; goto out; } p = rkey_buffer; /* Write the MD map */ *(ucp_md_map_t*)p = memh->md_map; p += sizeof(ucp_md_map_t); /* Write both size and rkey_buffer for each UCT rkey */ uct_memh_index = 0; for (md_index = 0; md_index < context->num_mds; ++md_index) { if (!(memh->md_map & UCS_BIT(md_index))) { continue; } md_size = context->md_attrs[md_index].rkey_packed_size; *((uint8_t*)p++) = md_size; uct_md_mkey_pack(context->mds[md_index], memh->uct[uct_memh_index], p); ucs_trace("rkey[%d]=%s for md[%d]=%s", uct_memh_index, ucs_log_dump_hex(p, md_size, buf, sizeof(buf)), md_index, context->md_rscs[md_index].md_name); ++uct_memh_index; p += md_size; } if (uct_memh_index == 0) { status = UCS_ERR_UNSUPPORTED; goto err_destroy; } *rkey_buffer_p = rkey_buffer; *size_p = size; status = UCS_OK; goto out; err_destroy: ucs_free(rkey_buffer); out: UCP_THREAD_CS_EXIT(&context->mt_lock); return status; }
ucs_status_t ucp_rkey_pack(ucp_context_h context, ucp_mem_h memh, void **rkey_buffer_p, size_t *size_p) { unsigned pd_index, uct_memh_index; void *rkey_buffer, *p; size_t size, pd_size; ucs_status_t status; char UCS_V_UNUSED buf[128]; ucs_trace("packing rkeys for buffer %p memh %p pd_map 0x%x", memh->address, memh, memh->pd_map); if (memh->length == 0) { /* dummy memh, return dummy key */ *rkey_buffer_p = &ucp_mem_dummy_buffer; *size_p = sizeof(ucp_mem_dummy_buffer); return UCS_OK; } size = sizeof(ucp_pd_map_t); for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { size += sizeof(uint8_t); pd_size = context->pd_attrs[pd_index].rkey_packed_size; ucs_assert_always(pd_size < UINT8_MAX); size += pd_size; } rkey_buffer = ucs_malloc(size, "ucp_rkey_buffer"); if (rkey_buffer == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } p = rkey_buffer; /* Write the PD map */ *(ucp_pd_map_t*)p = memh->pd_map; p += sizeof(ucp_pd_map_t); /* Write both size and rkey_buffer for each UCT rkey */ uct_memh_index = 0; for (pd_index = 0; pd_index < context->num_pds; ++pd_index) { if (!(memh->pd_map & UCS_BIT(pd_index))) { continue; } pd_size = context->pd_attrs[pd_index].rkey_packed_size; *((uint8_t*)p++) = pd_size; uct_pd_mkey_pack(context->pds[pd_index], memh->uct[uct_memh_index], p); ucs_trace("rkey[%d]=%s for pd[%d]=%s", uct_memh_index, ucs_log_dump_hex(p, pd_size, buf, sizeof(buf)), pd_index, context->pd_rscs[pd_index].pd_name); ++uct_memh_index; p += pd_size; } if (uct_memh_index == 0) { status = UCS_ERR_UNSUPPORTED; goto err_destroy; } *rkey_buffer_p = rkey_buffer; *size_p = size; return UCS_OK; err_destroy: ucs_free(rkey_buffer); err: return status; }
ucs_status_t ucp_ep_rkey_unpack(ucp_ep_h ep, void *rkey_buffer, ucp_rkey_h *rkey_p) { unsigned remote_pd_index, remote_pd_gap; unsigned rkey_index; unsigned pd_count; ucs_status_t status; ucp_rkey_h rkey; uint8_t pd_size; ucp_pd_map_t pd_map; void *p; /* Count the number of remote PDs in the rkey buffer */ p = rkey_buffer; /* Read remote PD map */ pd_map = *(ucp_pd_map_t*)p; ucs_trace("unpacking rkey with pd_map 0x%x", pd_map); if (pd_map == 0) { /* Dummy key return ok */ *rkey_p = &ucp_mem_dummy_rkey; return UCS_OK; } pd_count = ucs_count_one_bits(pd_map); p += sizeof(ucp_pd_map_t); /* Allocate rkey handle which holds UCT rkeys for all remote PDs. * We keep all of them to handle a future transport switch. */ rkey = ucs_malloc(sizeof(*rkey) + (sizeof(rkey->uct[0]) * pd_count), "ucp_rkey"); if (rkey == NULL) { status = UCS_ERR_NO_MEMORY; goto err; } rkey->pd_map = 0; remote_pd_index = 0; /* Index of remote PD */ rkey_index = 0; /* Index of the rkey in the array */ /* Unpack rkey of each UCT PD */ while (pd_map > 0) { pd_size = *((uint8_t*)p++); /* Use bit operations to iterate through the indices of the remote PDs * as provided in the pd_map. pd_map always holds a bitmap of PD indices * that remain to be used. Every time we find the "gap" until the next * valid PD index using ffs operation. If some rkeys cannot be unpacked, * we remove them from the local map. */ remote_pd_gap = ucs_ffs64(pd_map); /* Find the offset for next PD index */ remote_pd_index += remote_pd_gap; /* Calculate next index of remote PD*/ pd_map >>= remote_pd_gap; /* Remove the gap from the map */ ucs_assert(pd_map & 1); /* Unpack only reachable rkeys */ if (UCS_BIT(remote_pd_index) & ucp_ep_config(ep)->key.reachable_pd_map) { ucs_assert(rkey_index < pd_count); status = uct_rkey_unpack(p, &rkey->uct[rkey_index]); if (status != UCS_OK) { ucs_error("Failed to unpack remote key from remote pd[%d]: %s", remote_pd_index, ucs_status_string(status)); goto err_destroy; } ucs_trace("rkey[%d] for remote pd %d is 0x%lx", rkey_index, remote_pd_index, rkey->uct[rkey_index].rkey); rkey->pd_map |= UCS_BIT(remote_pd_index); ++rkey_index; } ++remote_pd_index; pd_map >>= 1; p += pd_size; } if (rkey->pd_map == 0) { ucs_debug("The unpacked rkey from the destination is unreachable"); status = UCS_ERR_UNREACHABLE; goto err_destroy; } *rkey_p = rkey; return UCS_OK; err_destroy: ucp_rkey_destroy(rkey); err: return status; }
void *ucs_class_malloc(ucs_class_t *cls) { return ucs_malloc(cls->size, cls->name); }
static ucs_status_t uct_gdr_copy_md_open(const char *md_name, const uct_md_config_t *uct_md_config, uct_md_h *md_p) { const uct_gdr_copy_md_config_t *md_config = ucs_derived_of(uct_md_config, uct_gdr_copy_md_config_t); ucs_status_t status; uct_gdr_copy_md_t *md; ucs_rcache_params_t rcache_params; md = ucs_malloc(sizeof(uct_gdr_copy_md_t), "uct_gdr_copy_md_t"); if (NULL == md) { ucs_error("failed to allocate memory for uct_gdr_copy_md_t"); return UCS_ERR_NO_MEMORY; } md->super.ops = &md_ops; md->super.component = &uct_gdr_copy_md_component; md->rcache = NULL; md->reg_cost = md_config->uc_reg_cost; md->gdrcpy_ctx = gdr_open(); if (md->gdrcpy_ctx == NULL) { ucs_error("failed to open gdr copy"); status = UCS_ERR_IO_ERROR; goto err_free_md; } if (md_config->enable_rcache != UCS_NO) { rcache_params.region_struct_size = sizeof(uct_gdr_copy_rcache_region_t); rcache_params.alignment = md_config->rcache.alignment; rcache_params.max_alignment = UCT_GDR_COPY_MD_RCACHE_DEFAULT_ALIGN; rcache_params.ucm_event_priority = md_config->rcache.event_prio; rcache_params.context = md; rcache_params.ops = &uct_gdr_copy_rcache_ops; status = ucs_rcache_create(&rcache_params, "gdr_copy", NULL, &md->rcache); if (status == UCS_OK) { md->super.ops = &md_rcache_ops; md->reg_cost.overhead = 0; md->reg_cost.growth = 0; } else { ucs_assert(md->rcache == NULL); if (md_config->enable_rcache == UCS_YES) { status = UCS_ERR_IO_ERROR; goto err_close_gdr; } else { ucs_debug("could not create registration cache for: %s", ucs_status_string(status)); } } } *md_p = (uct_md_h) md; status = UCS_OK; out: return status; err_close_gdr: gdr_close(md->gdrcpy_ctx); err_free_md: ucs_free(md); goto out; }
static UCS_F_NOINLINE ucs_status_t ucp_wireup_add_memaccess_lanes(ucp_ep_h ep, unsigned address_count, const ucp_address_entry_t *address_list, ucp_wireup_lane_desc_t *lane_descs, ucp_lane_index_t *num_lanes_p, const ucp_wireup_criteria_t *criteria, uint32_t usage) { ucp_wireup_criteria_t mem_criteria = *criteria; ucp_address_entry_t *address_list_copy; ucp_rsc_index_t rsc_index, dst_md_index; size_t address_list_size; double score, reg_score; uint64_t remote_md_map; unsigned addr_index; ucs_status_t status; char title[64]; remote_md_map = -1; /* Create a copy of the address list */ address_list_size = sizeof(*address_list_copy) * address_count; address_list_copy = ucs_malloc(address_list_size, "rma address list"); if (address_list_copy == NULL) { status = UCS_ERR_NO_MEMORY; goto out; } memcpy(address_list_copy, address_list, address_list_size); /* Select best transport which can reach registered memory */ snprintf(title, sizeof(title), criteria->title, "registered"); mem_criteria.title = title; mem_criteria.remote_md_flags = UCT_MD_FLAG_REG; status = ucp_wireup_select_transport(ep, address_list_copy, address_count, &mem_criteria, remote_md_map, 1, &rsc_index, &addr_index, &score); if (status != UCS_OK) { goto out_free_address_list; } dst_md_index = address_list_copy[addr_index].md_index; reg_score = score; /* Add to the list of lanes and remove all occurrences of the remote md * from the address list, to avoid selecting the same remote md again.*/ ucp_wireup_add_lane_desc(lane_descs, num_lanes_p, rsc_index, addr_index, dst_md_index, score, usage); remote_md_map &= ~UCS_BIT(dst_md_index); /* Select additional transports which can access allocated memory, but only * if their scores are better. We need this because a remote memory block can * be potentially allocated using one of them, and we might get better performance * than the transports which support only registered remote memory. */ snprintf(title, sizeof(title), criteria->title, "allocated"); mem_criteria.title = title; mem_criteria.remote_md_flags = UCT_MD_FLAG_ALLOC; while (address_count > 0) { status = ucp_wireup_select_transport(ep, address_list_copy, address_count, &mem_criteria, remote_md_map, 0, &rsc_index, &addr_index, &score); if ((status != UCS_OK) || (score <= reg_score)) { break; } /* Add lane description and remove all occurrences of the remote md */ dst_md_index = address_list_copy[addr_index].md_index; ucp_wireup_add_lane_desc(lane_descs, num_lanes_p, rsc_index, addr_index, dst_md_index, score, usage); remote_md_map &= ~UCS_BIT(dst_md_index); } status = UCS_OK; out_free_address_list: ucs_free(address_list_copy); out: return status; }