/** * test_fork - test if forks are handled properly * Description: * Called by xpmem_master, but do nothing. xpmem_proc1 does the fork. * Return Values: * Success: 0 * Failure: -2 */ int test_fork(test_args *xpmem_args) { xpmem_segid_t segid; xpmem_apid_t apid; struct xpmem_addr addr; int i, ret=0, *data; segid = strtol(xpmem_args->share, NULL, 16); apid = xpmem_get(segid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); addr.apid = apid; addr.offset = PAGE_SIZE; data = (int *)xpmem_attach(addr, PAGE_SIZE, NULL); if (data == (void *)-1) { perror("xpmem_attach"); return -2; } printf("xpmem_proc2: mypid = %d\n", getpid()); printf("xpmem_proc2: segid = %llx\n", segid); printf("xpmem_proc2: attached at %p\n", data); printf("xpmem_proc2: reading to pin pages\n"); for (i = 0; i < PAGE_INT_SIZE; i++) { if (*(data + i) != PAGE_INT_SIZE + i) { printf("xpmem_proc2: ***mismatch at %d: expected %lu " "got %d\n", i, PAGE_INT_SIZE + i, *(data + i)); ret = -2; } } /* Now wait for xpmem_proc1 to invoke COW */ printf("xpmem_proc2: waiting for COW...\n\n"); while (xpmem_args->share[COW_LOCK_INDEX] == 0) { xpmem_args->share[COW_LOCK_INDEX] = 1; } sleep(1); printf("xpmem_proc2: adding 1 to all elems\n\n"); for (i = 0; i < PAGE_INT_SIZE; i++) *(data + i) += 1; xpmem_detach(data); xpmem_release(apid); return ret; }
static ucs_status_t uct_xpmem_attach(uct_mm_id_t mmid, size_t length, void *remote_address, void **local_address, uint64_t *cookie, const char *path) { struct xpmem_addr addr; ucs_status_t status; ptrdiff_t offset; void *address; addr.offset = 0; addr.apid = xpmem_get(mmid, XPMEM_RDWR, XPMEM_PERMIT_MODE, NULL); VALGRIND_MAKE_MEM_DEFINED(&addr.apid, sizeof(addr.apid)); if (addr.apid < 0) { ucs_error("Failed to acquire xpmem segment 0x%"PRIx64": %m", mmid); status = UCS_ERR_IO_ERROR; goto err_xget; } ucs_trace("xpmem acquired segment 0x%"PRIx64" apid 0x%llx remote_address %p", mmid, addr.apid, remote_address); offset = ((uintptr_t)remote_address) % ucs_get_page_size(); address = xpmem_attach(addr, length + offset, NULL); VALGRIND_MAKE_MEM_DEFINED(&address, sizeof(address)); if (address == MAP_FAILED) { ucs_error("Failed to attach xpmem segment 0x%"PRIx64" apid 0x%llx " "with length %zu: %m", mmid, addr.apid, length); status = UCS_ERR_IO_ERROR; goto err_xattach; } VALGRIND_MAKE_MEM_DEFINED(address + offset, length); *local_address = address + offset; *cookie = addr.apid; ucs_trace("xpmem attached segment 0x%"PRIx64" apid 0x%llx %p..%p at %p (+%zd)", mmid, addr.apid, remote_address, remote_address + length, address, offset); return UCS_OK; err_xattach: xpmem_release(addr.apid); err_xget: return status; }
/* look up the remote pointer in the peer rcache and attach if * necessary */ mca_mpool_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, size_t size, int flags, void **local_ptr) { struct mca_rcache_base_module_t *rcache = ep->rcache; mca_mpool_base_registration_t *regs[10], *reg = NULL; xpmem_addr_t xpmem_addr; uintptr_t base, bound; int rc, i; /* protect rcache access */ OPAL_THREAD_LOCK(&ep->lock); /* use btl/self for self communication */ assert (ep->peer_smp_rank != MCA_BTL_VADER_LOCAL_RANK); base = (uintptr_t) down_align_addr(rem_ptr, mca_btl_vader_component.log_attach_align); bound = (uintptr_t) up_align_addr((void *)((uintptr_t) rem_ptr + size - 1), mca_btl_vader_component.log_attach_align) + 1; if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { bound = VADER_MAX_ADDRESS; } /* several segments may match the base pointer */ rc = rcache->rcache_find_all (rcache, (void *) base, bound - base, regs, 10); for (i = 0 ; i < rc ; ++i) { if (bound <= (uintptr_t)regs[i]->bound && base >= (uintptr_t)regs[i]->base) { opal_atomic_add (®s[i]->ref_count, 1); reg = regs[i]; goto reg_found; } if (regs[i]->flags & MCA_MPOOL_FLAGS_PERSIST) { continue; } /* remove this pointer from the rcache and decrement its reference count (so it is detached later) */ rc = rcache->rcache_delete (rcache, regs[i]); if (OPAL_UNLIKELY(0 != rc)) { /* someone beat us to it? */ break; } /* start the new segment from the lower of the two bases */ base = (uintptr_t) regs[i]->base < base ? (uintptr_t) regs[i]->base : base; opal_atomic_add (®s[i]->ref_count, -1); if (OPAL_LIKELY(0 == regs[i]->ref_count)) { /* this pointer is not in use */ (void) xpmem_detach (regs[i]->alloc_base); OBJ_RELEASE(regs[i]); } break; } reg = OBJ_NEW(mca_mpool_base_registration_t); if (OPAL_LIKELY(NULL != reg)) { /* stick around for awhile */ reg->ref_count = 2; reg->base = (unsigned char *) base; reg->bound = (unsigned char *) bound; reg->flags = flags; #if defined(HAVE_SN_XPMEM_H) xpmem_addr.id = ep->apid; #else xpmem_addr.apid = ep->apid; #endif xpmem_addr.offset = base; reg->alloc_base = xpmem_attach (xpmem_addr, bound - base, NULL); if (OPAL_UNLIKELY((void *)-1 == reg->alloc_base)) { OPAL_THREAD_UNLOCK(&ep->lock); OBJ_RELEASE(reg); return NULL; } opal_memchecker_base_mem_defined (reg->alloc_base, bound - base); rcache->rcache_insert (rcache, reg, 0); } reg_found: opal_atomic_wmb (); *local_ptr = (void *) ((uintptr_t) reg->alloc_base + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); OPAL_THREAD_UNLOCK(&ep->lock); return reg; }
int shmem_transport_xpmem_startup(void) { int ret, i, peer_num, num_on_node = 0; struct share_info_t info; struct xpmem_addr addr; for (i = 0 ; i < shmem_internal_num_pes; ++i) { if (-1 != SHMEM_GET_RANK_SAME_NODE(i)) { num_on_node++; } } /* allocate space for local peers */ shmem_transport_xpmem_peers = calloc(num_on_node, sizeof(struct shmem_transport_xpmem_peer_info_t)); if (NULL == shmem_transport_xpmem_peers) return 1; /* get local peer info and map into our address space ... */ for (i = 0 ; i < shmem_internal_num_pes; ++i) { peer_num = SHMEM_GET_RANK_SAME_NODE(i); if (-1 == peer_num) continue; if (shmem_internal_my_pe == i) { shmem_transport_xpmem_peers[peer_num].data_ptr = shmem_internal_data_base; shmem_transport_xpmem_peers[peer_num].heap_ptr = shmem_internal_heap_base; } else { ret = shmem_runtime_get(i, "xpmem-segids", &info, sizeof(struct share_info_t)); if (0 != ret) { fprintf(stderr, "[%03d] ERROR: runtime_get failed: %d\n", shmem_internal_my_pe, ret); return 1; } shmem_transport_xpmem_peers[peer_num].data_apid = xpmem_get(info.data_seg, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void*)0666); if (shmem_transport_xpmem_peers[peer_num].data_apid < 0) { fprintf(stderr, "[%03d] ERROR: could not get data apid: %s\n", shmem_internal_my_pe, strerror(errno)); return 1; } addr.apid = shmem_transport_xpmem_peers[peer_num].data_apid; addr.offset = 0; shmem_transport_xpmem_peers[peer_num].data_attach_ptr = xpmem_attach(addr, info.data_len, NULL); if ((size_t) shmem_transport_xpmem_peers[peer_num].data_ptr == XPMEM_MAXADDR_SIZE) { fprintf(stderr, "[%03d] ERROR: could not get data segment: %s\n", shmem_internal_my_pe, strerror(errno)); return 1; } shmem_transport_xpmem_peers[peer_num].data_ptr = (char*) shmem_transport_xpmem_peers[peer_num].data_attach_ptr + info.data_off; shmem_transport_xpmem_peers[peer_num].heap_apid = xpmem_get(info.heap_seg, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void*)0666); if (shmem_transport_xpmem_peers[peer_num].heap_apid < 0) { fprintf(stderr, "[%03d] ERROR: could not get heap apid: %s\n", shmem_internal_my_pe, strerror(errno)); return 1; } addr.apid = shmem_transport_xpmem_peers[peer_num].heap_apid; addr.offset = 0; shmem_transport_xpmem_peers[peer_num].heap_attach_ptr = xpmem_attach(addr, info.heap_len, NULL); if ((size_t) shmem_transport_xpmem_peers[peer_num].heap_ptr == XPMEM_MAXADDR_SIZE) { fprintf(stderr, "[%03d] ERROR: could not get data segment: %s\n", shmem_internal_my_pe, strerror(errno)); return 1; } shmem_transport_xpmem_peers[peer_num].heap_ptr = (char*) shmem_transport_xpmem_peers[peer_num].heap_attach_ptr + info.heap_off; } } return 0; }
/* look up the remote pointer in the peer rcache and attach if * necessary */ mca_rcache_base_registration_t *vader_get_registation (struct mca_btl_base_endpoint_t *ep, void *rem_ptr, size_t size, int flags, void **local_ptr) { mca_rcache_base_vma_module_t *vma_module = mca_btl_vader_component.vma_module; uint64_t attach_align = 1 << mca_btl_vader_component.log_attach_align; mca_rcache_base_registration_t *reg = NULL; vader_check_reg_ctx_t check_ctx = {.ep = ep, .reg = ®, .vma_module = vma_module}; xpmem_addr_t xpmem_addr; uintptr_t base, bound; int rc; base = OPAL_DOWN_ALIGN((uintptr_t) rem_ptr, attach_align, uintptr_t); bound = OPAL_ALIGN((uintptr_t) rem_ptr + size - 1, attach_align, uintptr_t) + 1; if (OPAL_UNLIKELY(bound > VADER_MAX_ADDRESS)) { bound = VADER_MAX_ADDRESS; } check_ctx.base = base; check_ctx.bound = bound; /* several segments may match the base pointer */ rc = mca_rcache_base_vma_iterate (vma_module, (void *) base, bound - base, vader_check_reg, &check_ctx); if (2 == rc) { /* start the new segment from the lower of the two bases */ base = (uintptr_t) reg->base < base ? (uintptr_t) reg->base : base; if (OPAL_LIKELY(0 == opal_atomic_add_32 (®->ref_count, -1))) { /* this pointer is not in use */ (void) xpmem_detach (reg->rcache_context); OBJ_RELEASE(reg); } reg = NULL; } if (NULL == reg) { reg = OBJ_NEW(mca_rcache_base_registration_t); if (OPAL_LIKELY(NULL != reg)) { /* stick around for awhile */ reg->ref_count = 2; reg->base = (unsigned char *) base; reg->bound = (unsigned char *) bound; reg->flags = flags; reg->alloc_base = (void *) (intptr_t) ep->peer_smp_rank; #if defined(HAVE_SN_XPMEM_H) xpmem_addr.id = ep->segment_data.xpmem.apid; #else xpmem_addr.apid = ep->segment_data.xpmem.apid; #endif xpmem_addr.offset = base; reg->rcache_context = xpmem_attach (xpmem_addr, bound - base, NULL); if (OPAL_UNLIKELY((void *)-1 == reg->rcache_context)) { OBJ_RELEASE(reg); return NULL; } opal_memchecker_base_mem_defined (reg->rcache_context, bound - base); mca_rcache_base_vma_insert (vma_module, reg, 0); } } opal_atomic_wmb (); *local_ptr = (void *) ((uintptr_t) reg->rcache_context + (ptrdiff_t)((uintptr_t) rem_ptr - (uintptr_t) reg->base)); return reg; }
/* * User ioctl to the XPMEM driver. Only 64-bit user applications are * supported. */ static long xpmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { long ret; switch (cmd) { case XPMEM_CMD_VERSION: { return XPMEM_CURRENT_VERSION; } case XPMEM_CMD_MAKE: { struct xpmem_cmd_make make_info; xpmem_segid_t segid; if (copy_from_user(&make_info, (void __user *)arg, sizeof(struct xpmem_cmd_make))) return -EFAULT; ret = xpmem_make(make_info.vaddr, make_info.size, make_info.permit_type, (void *)make_info.permit_value, &segid); if (ret != 0) return ret; if (put_user(segid, &((struct xpmem_cmd_make __user *)arg)->segid)) { (void)xpmem_remove(segid); return -EFAULT; } return 0; } case XPMEM_CMD_REMOVE: { struct xpmem_cmd_remove remove_info; if (copy_from_user(&remove_info, (void __user *)arg, sizeof(struct xpmem_cmd_remove))) return -EFAULT; return xpmem_remove(remove_info.segid); } case XPMEM_CMD_GET: { struct xpmem_cmd_get get_info; xpmem_apid_t apid; if (copy_from_user(&get_info, (void __user *)arg, sizeof(struct xpmem_cmd_get))) return -EFAULT; ret = xpmem_get(get_info.segid, get_info.flags, get_info.permit_type, (void *)get_info.permit_value, &apid); if (ret != 0) return ret; if (put_user(apid, &((struct xpmem_cmd_get __user *)arg)->apid)) { (void)xpmem_release(apid); return -EFAULT; } return 0; } case XPMEM_CMD_RELEASE: { struct xpmem_cmd_release release_info; if (copy_from_user(&release_info, (void __user *)arg, sizeof(struct xpmem_cmd_release))) return -EFAULT; return xpmem_release(release_info.apid); } case XPMEM_CMD_ATTACH: { struct xpmem_cmd_attach attach_info; u64 at_vaddr; if (copy_from_user(&attach_info, (void __user *)arg, sizeof(struct xpmem_cmd_attach))) return -EFAULT; ret = xpmem_attach(file, attach_info.apid, attach_info.offset, attach_info.size, attach_info.vaddr, attach_info.fd, attach_info.flags, &at_vaddr); if (ret != 0) return ret; if (put_user(at_vaddr, &((struct xpmem_cmd_attach __user *)arg)->vaddr)) { (void)xpmem_detach(at_vaddr); return -EFAULT; } return 0; } case XPMEM_CMD_DETACH: { struct xpmem_cmd_detach detach_info; if (copy_from_user(&detach_info, (void __user *)arg, sizeof(struct xpmem_cmd_detach))) return -EFAULT; return xpmem_detach(detach_info.vaddr); } case XPMEM_CMD_FORK_BEGIN: { return xpmem_fork_begin(); } case XPMEM_CMD_FORK_END: { return xpmem_fork_end(); } default: break; } return -ENOIOCTLCMD; }