/* This test is run with overridden MEMKIND_HBW_NODES environment variable * and tries to perform allocation from DRAM using hbw_malloc() using * default HBW_POLICY_PREFERRED policy. */ int main() { struct bitmask *expected_nodemask = NULL; struct bitmask *returned_nodemask = NULL; void *ptr = NULL; int ret = 0; int status = 0; ptr = hbw_malloc(KB); if (ptr == NULL) { printf("Error: allocation failed\n"); goto exit; } expected_nodemask = numa_allocate_nodemask(); status = memkind_hbw_all_get_mbind_nodemask(NULL, expected_nodemask->maskp, expected_nodemask->size); if (status != MEMKIND_ERROR_ENVIRON) { printf("Error: wrong return value from memkind_hbw_all_get_mbind_nodemask()\n"); printf("Expected: %d\n", MEMKIND_ERROR_ENVIRON); printf("Actual: %d\n", status); goto exit; } returned_nodemask = numa_allocate_nodemask(); status = get_mempolicy(NULL, returned_nodemask->maskp, returned_nodemask->size, ptr, MPOL_F_ADDR); if (status) { printf("Error: get_mempolicy() returned %d\n", status); goto exit; } ret = numa_bitmask_equal(returned_nodemask, expected_nodemask); if (!ret) { printf("Error: Memkind hbw and allocated pointer nodemasks are not equal\n"); } exit: if (expected_nodemask) { numa_free_nodemask(expected_nodemask); } if (returned_nodemask) { numa_free_nodemask(returned_nodemask); } if (ptr) { hbw_free(ptr); } return ret; }
void bind2node_id(int node_id) { struct bitmask *bmp = numa_allocate_nodemask(); numa_bitmask_setbit(bmp, node_id); numa_bind(bmp); numa_free_nodemask(bmp); }
void ConfigureTableThread() { int32_t node_id = GlobalContext::get_numa_index(); struct bitmask *mask = numa_allocate_nodemask(); mask = numa_bitmask_setbit(mask, node_id); // set NUMA zone binding to be prefer numa_set_bind_policy(0); numa_set_membind(mask); numa_free_nodemask(mask); }
void ConfigureTableThread() { int32_t idx = ThreadContext::get_id() - GlobalContext::get_head_table_thread_id(); int32_t node_id = idx % num_mem_nodes_; CHECK_EQ(numa_run_on_node(node_id), 0); struct bitmask *mask = numa_allocate_nodemask(); mask = numa_bitmask_setbit(mask, node_id); // set NUMA zone binding to be prefer numa_set_bind_policy(0); numa_set_membind(mask); numa_free_nodemask(mask); }
void open_mmap() { int ret = posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL); assert(ret==0); data = (T *)mmap(NULL, sizeof(T) * length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); struct bitmask *bmask=numa_allocate_nodemask(); numa_bitmask_setall(bmask); numa_tonodemask_memory(data,sizeof(T)*length,bmask); numa_free_nodemask(bmask); assert(data!=MAP_FAILED); is_open = true; }
static void verify_mempolicy(unsigned int node, int mode) { struct bitmask *bm = numa_allocate_nodemask(); unsigned int i; numa_bitmask_setbit(bm, node); TEST(set_mempolicy(mode, bm->maskp, bm->size+1)); if (TST_RET) { tst_res(TFAIL | TTERRNO, "set_mempolicy(%s) node %u", tst_numa_mode_name(mode), node); return; } tst_res(TPASS, "set_mempolicy(%s) node %u", tst_numa_mode_name(mode), node); numa_free_nodemask(bm); const char *prefix = "child: "; if (SAFE_FORK()) { prefix = "parent: "; tst_reap_children(); } tst_nodemap_reset_counters(nodes); alloc_fault_count(nodes, NULL, PAGES_ALLOCATED * page_size); tst_nodemap_print_counters(nodes); for (i = 0; i < nodes->cnt; i++) { if (nodes->map[i] == node) { if (nodes->counters[i] == PAGES_ALLOCATED) { tst_res(TPASS, "%sNode %u allocated %u", prefix, node, PAGES_ALLOCATED); } else { tst_res(TFAIL, "%sNode %u allocated %u, expected %u", prefix, node, nodes->counters[i], PAGES_ALLOCATED); } continue; } if (nodes->counters[i]) { tst_res(TFAIL, "%sNode %u allocated %u, expected 0", prefix, i, nodes->counters[i]); } } }
void* StorageManager::allocateSlots(const std::size_t num_slots, const int numa_node) { #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) static constexpr int kLargePageMmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB; #elif defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE) static constexpr int kLargePageMmapFlags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER; #endif makeRoomForBlockOrBlob(num_slots); void *slots = nullptr; #if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) || defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE) slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, kLargePageMmapFlags, -1, 0); // Fallback to regular mmap() if large page allocation failed. Even on // systems with large page support, large page allocation may fail if the // user running the executable is not a member of hugetlb_shm_group on Linux, // or if all the reserved hugepages are already in use. if (slots == MAP_FAILED) { slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); } if (slots == MAP_FAILED) { slots = nullptr; } #elif defined(QUICKSTEP_HAVE_MMAP_PLAIN) slots = mmap(nullptr, num_slots * kSlotSizeBytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); if (slots == MAP_FAILED) { slots = nullptr; } #else slots = malloc_with_alignment(num_slots * kSlotSizeBytes, kCacheLineBytes); if (slots != nullptr) { memset(slots, 0x0, num_slots * kSlotSizeBytes); } #endif if (slots == nullptr) { throw OutOfMemory(); } #if defined(QUICKSTEP_HAVE_LIBNUMA) if (numa_node != -1) { DEBUG_ASSERT(numa_node < numa_num_configured_nodes()); struct bitmask *numa_node_bitmask = numa_allocate_nodemask(); // numa_node can be 0 through n-1, where n is the num of NUMA nodes. numa_bitmask_setbit(numa_node_bitmask, numa_node); long mbind_status = mbind(slots, // NOLINT(runtime/int) num_slots * kSlotSizeBytes, MPOL_PREFERRED, numa_node_bitmask->maskp, numa_node_bitmask->size, 0); numa_free_nodemask(numa_node_bitmask); if (mbind_status == -1) { LOG(WARNING) << "mbind() failed with errno " << errno << " (" << std::strerror(errno) << ")"; } } #endif // QUICKSTEP_HAVE_LIBNUMA total_memory_usage_ += num_slots; return slots; }
/* * Function: _get_numa_nodes * Description: * Returns a count of the NUMA nodes that the application is running on. * * Returns an array of NUMA nodes that the application is running on. * * * IN char* path -- The path to the directory containing the files containing * information about NUMA nodes. * * OUT *cnt -- The number of NUMA nodes in the array * OUT **numa_array -- An integer array containing the NUMA nodes. * This array must be xfreed by the caller. * * RETURN * 0 on success and -1 on failure. */ static int _get_numa_nodes(char *path, int *cnt, int32_t **numa_array) { struct bitmask *bm; int i, index, rc = 0; int lsz; size_t sz; char buffer[PATH_MAX]; FILE *f = NULL; char *lin = NULL; rc = snprintf(buffer, sizeof(buffer), "%s/%s", path, "mems"); if (rc < 0) { CRAY_ERR("snprintf failed. Return code: %d", rc); } f = fopen(buffer, "r"); if (f == NULL ) { CRAY_ERR("Failed to open file %s: %m", buffer); return -1; } lsz = getline(&lin, &sz, f); if (lsz > 0) { if (lin[strlen(lin) - 1] == '\n') { lin[strlen(lin) - 1] = '\0'; } bm = numa_parse_nodestring(lin); if (bm == NULL ) { CRAY_ERR("Error numa_parse_nodestring:" " Invalid node string: %s", lin); free(lin); return SLURM_ERROR; } } else { CRAY_ERR("Reading %s failed", buffer); return SLURM_ERROR; } free(lin); *cnt = numa_bitmask_weight(bm); if (*cnt == 0) { CRAY_ERR("No NUMA Nodes found"); return -1; } if (debug_flags & DEBUG_FLAG_TASK) { info("Bitmask %#lx size: %lu sizeof(*(bm->maskp)): %zd" " weight: %u", *(bm->maskp), bm->size, sizeof(*(bm->maskp)), *cnt); } *numa_array = xmalloc(*cnt * sizeof(int32_t)); index = 0; for (i = 0; i < bm->size; i++) { if (*(bm->maskp) & ((long unsigned) 1 << i)) { if (debug_flags & DEBUG_FLAG_TASK) { info("(%s: %d: %s) NUMA Node %d is present", THIS_FILE, __LINE__, __FUNCTION__, i); } (*numa_array)[index++] = i; } } numa_free_nodemask(bm); return 0; }
static void * s_numa_alloc(size_t sz, int cpu) { void *ret = NULL; if (likely(sz > 0)) { if (likely(cpu >= 0)) { if (likely(s_numa_nodes != NULL && s_n_cpus > 0)) { unsigned int node = s_numa_nodes[cpu]; unsigned int allocd_node = UINT_MAX; struct bitmask *bmp; int r; bmp = numa_allocate_nodemask(); numa_bitmask_setbit(bmp, node); errno = 0; r = (int)set_mempolicy(MPOL_BIND, bmp->maskp, bmp->size + 1); if (likely(r == 0)) { errno = 0; ret = numa_alloc_onnode(sz, (int)node); if (likely(ret != NULL)) { lagopus_result_t rl; /* * We need this "first touch" even using the * numa_alloc_onnode(). */ (void)memset(ret, 0, sz); errno = 0; r = (int)get_mempolicy((int *)&allocd_node, NULL, 0, ret, MPOL_F_NODE|MPOL_F_ADDR); if (likely(r == 0)) { if (unlikely(node != allocd_node)) { /* * The memory is not allocated on the node, but it is * still usable. Just return it. */ lagopus_msg_warning("can't allocate " PFSZ(u) " bytes memory " "for CPU %d (NUMA node %d).\n", sz, cpu, node); } } else { lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR); lagopus_msg_error("get_mempolicy() returned %d.\n", r); } rl = s_add_addr(ret, sz); if (unlikely(rl != LAGOPUS_RESULT_OK)) { lagopus_perror(rl); lagopus_msg_error("can't register the allocated address.\n"); numa_free(ret, sz); ret = NULL; } } } else { /* r == 0 */ lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR); lagopus_msg_error("set_mempolicy() returned %d.\n", r); } numa_free_nodemask(bmp); set_mempolicy(MPOL_DEFAULT, NULL, 0); } else { /* s_numa_nodes != NULL && s_n_cpus > 0 */ /* * Not initialized or initialization failure. */ lagopus_msg_warning("The NUMA related information is not initialized. " "Use malloc(3) instead.\n"); ret = malloc(sz); } } else { /* cpu >= 0 */ /* * Use pure malloc(3). */ ret = malloc(sz); } } return ret; }
/** * @brief Do memory binding. * * This is handling the binding types map_mem, mask_mem and rank. * The types local (default) and none are handled directly by the deamon. * * When using libnuma with API v1, this is a noop, just giving a warning. * * @param step Step structure * @param task Task structure * * @return No return value. */ void doMemBind(Step_t *step, PStask_t *task) { # ifndef HAVE_NUMA_ALLOCATE_NODEMASK mlog("%s: psslurm does not support memory binding types map_mem, mask_mem" " and rank with libnuma v1\n", __func__); fprintf(stderr, "Memory binding type not supported with used libnuma" " version"); return; # else const char delimiters[] = ","; uint32_t lTID; char *next, *saveptr, *ents, *myent, *endptr; char **entarray; unsigned int numents; uint16_t mynode; struct bitmask *nodemask = NULL; if (!(step->memBindType & MEM_BIND_MAP) && !(step->memBindType & MEM_BIND_MASK) && !(step->memBindType & MEM_BIND_RANK)) { /* things are handled elsewhere */ return; } if (!PSIDnodes_bindMem(PSC_getMyID()) || getenv("__PSI_NO_MEMBIND")) { // info messages already printed in doClamps() return; } if (numa_available()==-1) { fprintf(stderr, "NUMA not available:"); return; } nodemask = numa_allocate_nodemask(); if (!nodemask) { fprintf(stderr, "Allocation of nodemask failed:"); return; } lTID = getLocalRankID(task->rank, step, step->localNodeId); if (step->memBindType & MEM_BIND_RANK) { if (lTID > (unsigned int)numa_max_node()) { mlog("%s: memory binding to ranks not possible for rank %d." " (local rank %d > #numa_nodes %d)\n", __func__, task->rank, lTID, numa_max_node()); fprintf(stderr, "Memory binding to ranks not possible for rank %d," " local rank %u larger than max numa node %d.", task->rank, lTID, numa_max_node()); if (nodemask) numa_free_nodemask(nodemask); return; } if (numa_bitmask_isbitset(numa_get_mems_allowed(), lTID)) { numa_bitmask_setbit(nodemask, lTID); } else { mlog("%s: setting bit %d in memory mask not allowed in rank" " %d\n", __func__, lTID, task->rank); fprintf(stderr, "Not allowed to set bit %u in memory mask" " of rank %d\n", lTID, task->rank); } numa_set_membind(nodemask); if (nodemask) numa_free_nodemask(nodemask); return; } ents = ustrdup(step->memBind); entarray = umalloc(step->tasksToLaunch[step->localNodeId] * sizeof(char*)); numents = 0; myent = NULL; entarray[0] = NULL; next = strtok_r(ents, delimiters, &saveptr); while (next && (numents < step->tasksToLaunch[step->localNodeId])) { entarray[numents++] = next; if (numents == lTID+1) { myent = next; break; } next = strtok_r(NULL, delimiters, &saveptr); } if (!myent && numents) { myent = entarray[lTID % numents]; } if (!myent) { numa_set_membind(numa_all_nodes_ptr); if (step->memBindType & MEM_BIND_MASK) { mlog("%s: invalid mem mask string '%s'\n", __func__, ents); } else if (step->memBindType & MEM_BIND_MAP) { mlog("%s: invalid mem map string '%s'\n", __func__, ents); } goto cleanup; } if (step->memBindType & MEM_BIND_MAP) { if (strncmp(myent, "0x", 2) == 0) { mynode = strtoul (myent+2, &endptr, 16); } else { mynode = strtoul (myent, &endptr, 10); } if (*endptr == '\0' && mynode <= numa_max_node()) { if (numa_bitmask_isbitset(numa_get_mems_allowed(), mynode)) { numa_bitmask_setbit(nodemask, mynode); } else { mlog("%s: setting bit %d in memory mask not allowed in rank" " %d\n", __func__, mynode, task->rank); fprintf(stderr, "Not allowed to set bit %d in memory mask" " of rank %d\n", mynode, task->rank); } } else { mlog("%s: invalid memory map entry '%s' (%d) for rank %d\n", __func__, myent, mynode, task->rank); fprintf(stderr, "Invalid memory map entry '%s' for rank %d\n", myent, task->rank); numa_set_membind(numa_all_nodes_ptr); goto cleanup; } mdbg(PSSLURM_LOG_PART, "%s: (bind_map) node %i local task %i" " memstr '%s'\n", __func__, step->localNodeId, lTID, myent); } else if (step->memBindType & MEM_BIND_MASK) { parseNUMAmask(nodemask, myent, task->rank); } numa_set_membind(nodemask); cleanup: ufree(ents); ufree(entarray); if (nodemask) numa_free_nodemask(nodemask); # endif return; }