/* This test is run with overridden MEMKIND_HBW_NODES environment variable
 * and tries to perform allocation from DRAM using hbw_malloc() using
 * default HBW_POLICY_PREFERRED policy.
 */
int main()
{
    struct bitmask *expected_nodemask = NULL;
    struct bitmask *returned_nodemask = NULL;
    void *ptr = NULL;
    int ret = 0;
    int status = 0;

    ptr = hbw_malloc(KB);
    if (ptr == NULL) {
        printf("Error: allocation failed\n");
        goto exit;
    }

    expected_nodemask = numa_allocate_nodemask();
    status = memkind_hbw_all_get_mbind_nodemask(NULL, expected_nodemask->maskp,
                                                expected_nodemask->size);
    if (status != MEMKIND_ERROR_ENVIRON) {
        printf("Error: wrong return value from memkind_hbw_all_get_mbind_nodemask()\n");
        printf("Expected: %d\n", MEMKIND_ERROR_ENVIRON);
        printf("Actual: %d\n", status);
        goto exit;
    }

    returned_nodemask = numa_allocate_nodemask();
    status = get_mempolicy(NULL, returned_nodemask->maskp, returned_nodemask->size,
                           ptr, MPOL_F_ADDR);
    if (status) {
        printf("Error: get_mempolicy() returned %d\n", status);
        goto exit;
    }

    ret = numa_bitmask_equal(returned_nodemask, expected_nodemask);
    if (!ret) {
        printf("Error: Memkind hbw and allocated pointer nodemasks are not equal\n");
    }

exit:
    if (expected_nodemask) {
        numa_free_nodemask(expected_nodemask);
    }
    if (returned_nodemask) {
        numa_free_nodemask(returned_nodemask);
    }
    if (ptr) {
        hbw_free(ptr);
    }

    return ret;
}
Exemple #2
0
void bind2node_id(int node_id)
{
	struct bitmask *bmp = numa_allocate_nodemask();
	numa_bitmask_setbit(bmp, node_id);
	numa_bind(bmp);
	numa_free_nodemask(bmp);
}
Exemple #3
0
  void ConfigureTableThread() {
    int32_t node_id = GlobalContext::get_numa_index();

    struct bitmask *mask = numa_allocate_nodemask();
    mask = numa_bitmask_setbit(mask, node_id);

    // set NUMA zone binding to be prefer
    numa_set_bind_policy(0);
    numa_set_membind(mask);
    numa_free_nodemask(mask);
  }
Exemple #4
0
  void ConfigureTableThread() {
    int32_t idx = ThreadContext::get_id() - GlobalContext::get_head_table_thread_id();
    int32_t node_id = idx % num_mem_nodes_;
    CHECK_EQ(numa_run_on_node(node_id), 0);

    struct bitmask *mask = numa_allocate_nodemask();
    mask = numa_bitmask_setbit(mask, node_id);

    // set NUMA zone binding to be prefer
    numa_set_bind_policy(0);
    numa_set_membind(mask);
    numa_free_nodemask(mask);
  }
Exemple #5
0
	void open_mmap() {
		int ret = posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
		assert(ret==0);
		data = (T *)mmap(NULL, sizeof(T) * length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
		
		struct bitmask *bmask=numa_allocate_nodemask();
        numa_bitmask_setall(bmask);
        numa_tonodemask_memory(data,sizeof(T)*length,bmask);
		numa_free_nodemask(bmask);
		
		assert(data!=MAP_FAILED);
		is_open = true;
	}
Exemple #6
0
static void verify_mempolicy(unsigned int node, int mode)
{
	struct bitmask *bm = numa_allocate_nodemask();
	unsigned int i;

	numa_bitmask_setbit(bm, node);

	TEST(set_mempolicy(mode, bm->maskp, bm->size+1));

	if (TST_RET) {
		tst_res(TFAIL | TTERRNO,
		        "set_mempolicy(%s) node %u",
		        tst_numa_mode_name(mode), node);
		return;
	}

	tst_res(TPASS, "set_mempolicy(%s) node %u",
	        tst_numa_mode_name(mode), node);

	numa_free_nodemask(bm);

	const char *prefix = "child: ";

	if (SAFE_FORK()) {
		prefix = "parent: ";
		tst_reap_children();
	}

	tst_nodemap_reset_counters(nodes);
	alloc_fault_count(nodes, NULL, PAGES_ALLOCATED * page_size);
	tst_nodemap_print_counters(nodes);

	for (i = 0; i < nodes->cnt; i++) {
		if (nodes->map[i] == node) {
			if (nodes->counters[i] == PAGES_ALLOCATED) {
				tst_res(TPASS, "%sNode %u allocated %u",
				        prefix, node, PAGES_ALLOCATED);
			} else {
				tst_res(TFAIL, "%sNode %u allocated %u, expected %u",
				        prefix, node, nodes->counters[i],
				        PAGES_ALLOCATED);
			}
			continue;
		}

		if (nodes->counters[i]) {
			tst_res(TFAIL, "%sNode %u allocated %u, expected 0",
			        prefix, i, nodes->counters[i]);
		}
	}
}
void* StorageManager::allocateSlots(const std::size_t num_slots,
                                    const int numa_node) {
#if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB)
  static constexpr int kLargePageMmapFlags
      = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
#elif defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE)
  static constexpr int kLargePageMmapFlags
      = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
#endif

  makeRoomForBlockOrBlob(num_slots);
  void *slots = nullptr;

#if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) || defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE)
  slots = mmap(nullptr,
               num_slots * kSlotSizeBytes,
               PROT_READ | PROT_WRITE,
               kLargePageMmapFlags,
               -1, 0);

  // Fallback to regular mmap() if large page allocation failed. Even on
  // systems with large page support, large page allocation may fail if the
  // user running the executable is not a member of hugetlb_shm_group on Linux,
  // or if all the reserved hugepages are already in use.
  if (slots == MAP_FAILED) {
    slots = mmap(nullptr,
                 num_slots * kSlotSizeBytes,
                 PROT_READ | PROT_WRITE,
                 MAP_PRIVATE | MAP_ANONYMOUS,
                 -1, 0);
  }
  if (slots == MAP_FAILED) {
    slots = nullptr;
  }
#elif defined(QUICKSTEP_HAVE_MMAP_PLAIN)
  slots = mmap(nullptr,
               num_slots * kSlotSizeBytes,
               PROT_READ | PROT_WRITE,
               MAP_PRIVATE | MAP_ANONYMOUS,
               -1, 0);
  if (slots == MAP_FAILED) {
    slots = nullptr;
  }
#else
  slots = malloc_with_alignment(num_slots * kSlotSizeBytes,
                                kCacheLineBytes);
  if (slots != nullptr) {
    memset(slots, 0x0, num_slots * kSlotSizeBytes);
  }
#endif

  if (slots == nullptr) {
    throw OutOfMemory();
  }

#if defined(QUICKSTEP_HAVE_LIBNUMA)
  if (numa_node != -1) {
    DEBUG_ASSERT(numa_node < numa_num_configured_nodes());
    struct bitmask *numa_node_bitmask = numa_allocate_nodemask();
    // numa_node can be 0 through n-1, where n is the num of NUMA nodes.
    numa_bitmask_setbit(numa_node_bitmask, numa_node);
    long mbind_status = mbind(slots,  // NOLINT(runtime/int)
                              num_slots * kSlotSizeBytes,
                              MPOL_PREFERRED,
                              numa_node_bitmask->maskp,
                              numa_node_bitmask->size,
                              0);
    numa_free_nodemask(numa_node_bitmask);
    if (mbind_status == -1) {
      LOG(WARNING) << "mbind() failed with errno " << errno << " ("
                   << std::strerror(errno) << ")";
    }
  }
#endif  // QUICKSTEP_HAVE_LIBNUMA

  total_memory_usage_ += num_slots;
  return slots;
}
/*
 * Function: _get_numa_nodes
 * Description:
 *  Returns a count of the NUMA nodes that the application is running on.
 *
 *  Returns an array of NUMA nodes that the application is running on.
 *
 *
 *  IN char* path -- The path to the directory containing the files containing
 *                   information about NUMA nodes.
 *
 *  OUT *cnt -- The number of NUMA nodes in the array
 *  OUT **numa_array -- An integer array containing the NUMA nodes.
 *                      This array must be xfreed by the caller.
 *
 * RETURN
 *  0 on success and -1 on failure.
 */
static int _get_numa_nodes(char *path, int *cnt, int32_t **numa_array) {
	struct bitmask *bm;
	int i, index, rc = 0;
	int lsz;
	size_t sz;
	char buffer[PATH_MAX];
	FILE *f = NULL;
	char *lin = NULL;

	rc = snprintf(buffer, sizeof(buffer), "%s/%s", path, "mems");
	if (rc < 0) {
		CRAY_ERR("snprintf failed. Return code: %d", rc);
	}

	f = fopen(buffer, "r");
	if (f == NULL ) {
		CRAY_ERR("Failed to open file %s: %m", buffer);
		return -1;
	}

	lsz = getline(&lin, &sz, f);
	if (lsz > 0) {
		if (lin[strlen(lin) - 1] == '\n') {
			lin[strlen(lin) - 1] = '\0';
		}
		bm = numa_parse_nodestring(lin);
		if (bm == NULL ) {
			CRAY_ERR("Error numa_parse_nodestring:"
				 " Invalid node string: %s", lin);
			free(lin);
			return SLURM_ERROR;
		}
	} else {
		CRAY_ERR("Reading %s failed", buffer);
		return SLURM_ERROR;
	}
	free(lin);

	*cnt = numa_bitmask_weight(bm);
	if (*cnt == 0) {
		CRAY_ERR("No NUMA Nodes found");
		return -1;
	}

	if (debug_flags & DEBUG_FLAG_TASK) {
		info("Bitmask %#lx size: %lu sizeof(*(bm->maskp)): %zd"
		     " weight: %u",
		     *(bm->maskp), bm->size, sizeof(*(bm->maskp)), *cnt);
	}

	*numa_array = xmalloc(*cnt * sizeof(int32_t));

	index = 0;
	for (i = 0; i < bm->size; i++) {
		if (*(bm->maskp) & ((long unsigned) 1 << i)) {
			if (debug_flags & DEBUG_FLAG_TASK) {
				info("(%s: %d: %s) NUMA Node %d is present",
				     THIS_FILE,	__LINE__, __FUNCTION__, i);
			}
			(*numa_array)[index++] = i;
		}
	}

	numa_free_nodemask(bm);

	return 0;
}
Exemple #9
0
static void *
s_numa_alloc(size_t sz, int cpu) {
  void *ret = NULL;

  if (likely(sz > 0)) {
    if (likely(cpu >= 0)) {
      if (likely(s_numa_nodes != NULL && s_n_cpus > 0)) {
        unsigned int node = s_numa_nodes[cpu];
        unsigned int allocd_node = UINT_MAX;
        struct bitmask *bmp;
        int r;
  
        bmp = numa_allocate_nodemask();
        numa_bitmask_setbit(bmp, node);

        errno = 0;
        r = (int)set_mempolicy(MPOL_BIND, bmp->maskp, bmp->size + 1);
        if (likely(r == 0)) {
          errno = 0;
          ret = numa_alloc_onnode(sz, (int)node);
          if (likely(ret != NULL)) {
            lagopus_result_t rl;

            /*
             * We need this "first touch" even using the
             * numa_alloc_onnode().
             */
            (void)memset(ret, 0, sz);

            errno = 0;
            r = (int)get_mempolicy((int *)&allocd_node, NULL, 0, ret,
                                   MPOL_F_NODE|MPOL_F_ADDR);
            if (likely(r == 0)) {
              if (unlikely(node != allocd_node)) {
                /*
                 * The memory is not allocated on the node, but it is
                 * still usable. Just return it.
                 */
                lagopus_msg_warning("can't allocate " PFSZ(u) " bytes memory "
                                    "for CPU %d (NUMA node %d).\n",
                                    sz, cpu, node);
              }
            } else {
              lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR);
              lagopus_msg_error("get_mempolicy() returned %d.\n", r);
            }

            rl = s_add_addr(ret, sz);
            if (unlikely(rl != LAGOPUS_RESULT_OK)) {
              lagopus_perror(rl);
              lagopus_msg_error("can't register the allocated address.\n");
              numa_free(ret, sz);
              ret = NULL;
            }
          }

        } else {	/* r == 0 */
          lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR);
          lagopus_msg_error("set_mempolicy() returned %d.\n", r);
        }

        numa_free_nodemask(bmp);
        set_mempolicy(MPOL_DEFAULT, NULL, 0);

      } else {	/* s_numa_nodes != NULL && s_n_cpus > 0 */
        /*
         * Not initialized or initialization failure.
         */
        lagopus_msg_warning("The NUMA related information is not initialized. "
                            "Use malloc(3) instead.\n");
        ret = malloc(sz);
      }

    } else {	/* cpu >= 0 */
      /*
       * Use pure malloc(3).
       */
      ret = malloc(sz);
    }

  }

  return ret;
}
Exemple #10
0
/**
 * @brief Do memory binding.
 *
 * This is handling the binding types map_mem, mask_mem and rank.
 * The types local (default) and none are handled directly by the deamon.
 *
 * When using libnuma with API v1, this is a noop, just giving a warning.
 *
 * @param step  Step structure
 * @param task  Task structure
 *
 * @return No return value.
 */
void doMemBind(Step_t *step, PStask_t *task)
{

# ifndef HAVE_NUMA_ALLOCATE_NODEMASK
    mlog("%s: psslurm does not support memory binding types map_mem, mask_mem"
	    " and rank with libnuma v1\n", __func__);
    fprintf(stderr, "Memory binding type not supported with used libnuma"
	   " version");
    return;
# else

    const char delimiters[] = ",";
    uint32_t lTID;
    char *next, *saveptr, *ents, *myent, *endptr;
    char **entarray;
    unsigned int numents;
    uint16_t mynode;

    struct bitmask *nodemask = NULL;

    if (!(step->memBindType & MEM_BIND_MAP)
	    && !(step->memBindType & MEM_BIND_MASK)
	    && !(step->memBindType & MEM_BIND_RANK)) {
	/* things are handled elsewhere */
	return;
    }

    if (!PSIDnodes_bindMem(PSC_getMyID()) || getenv("__PSI_NO_MEMBIND")) {
	    // info messages already printed in doClamps()
	return;
    }

    if (numa_available()==-1) {
	fprintf(stderr, "NUMA not available:");
	return;
    }

    nodemask = numa_allocate_nodemask();
    if (!nodemask) {
	fprintf(stderr, "Allocation of nodemask failed:");
	return;
    }

    lTID = getLocalRankID(task->rank, step, step->localNodeId);

    if (step->memBindType & MEM_BIND_RANK) {
	if (lTID > (unsigned int)numa_max_node()) {
	    mlog("%s: memory binding to ranks not possible for rank %d."
		    " (local rank %d > #numa_nodes %d)\n", __func__,
		    task->rank, lTID, numa_max_node());
	    fprintf(stderr, "Memory binding to ranks not possible for rank %d,"
		    " local rank %u larger than max numa node %d.",
		    task->rank, lTID, numa_max_node());
	    if (nodemask) numa_free_nodemask(nodemask);
	    return;
	}
	if (numa_bitmask_isbitset(numa_get_mems_allowed(), lTID)) {
	    numa_bitmask_setbit(nodemask, lTID);
	} else {
	    mlog("%s: setting bit %d in memory mask not allowed in rank"
		    " %d\n", __func__, lTID, task->rank);
	    fprintf(stderr, "Not allowed to set bit %u in memory mask"
		    " of rank %d\n", lTID, task->rank);
	}
	numa_set_membind(nodemask);
	if (nodemask) numa_free_nodemask(nodemask);
	return;
    }

    ents = ustrdup(step->memBind);
    entarray = umalloc(step->tasksToLaunch[step->localNodeId] * sizeof(char*));
    numents = 0;
    myent = NULL;
    entarray[0] = NULL;

    next = strtok_r(ents, delimiters, &saveptr);
    while (next && (numents < step->tasksToLaunch[step->localNodeId])) {
	entarray[numents++] = next;
	if (numents == lTID+1) {
	    myent = next;
	    break;
	}
	next = strtok_r(NULL, delimiters, &saveptr);
    }

    if (!myent && numents) {
	myent = entarray[lTID % numents];
    }

    if (!myent) {
	numa_set_membind(numa_all_nodes_ptr);
	if (step->memBindType & MEM_BIND_MASK) {
	    mlog("%s: invalid mem mask string '%s'\n", __func__, ents);
	}
	else if (step->memBindType & MEM_BIND_MAP) {
	    mlog("%s: invalid mem map string '%s'\n", __func__, ents);
	}
	goto cleanup;
    }

    if (step->memBindType & MEM_BIND_MAP) {

	if (strncmp(myent, "0x", 2) == 0) {
	    mynode = strtoul (myent+2, &endptr, 16);
	} else {
	    mynode = strtoul (myent, &endptr, 10);
	}

	if (*endptr == '\0' && mynode <= numa_max_node()) {
	    if (numa_bitmask_isbitset(numa_get_mems_allowed(), mynode)) {
		numa_bitmask_setbit(nodemask, mynode);
	    } else {
		mlog("%s: setting bit %d in memory mask not allowed in rank"
			" %d\n", __func__, mynode, task->rank);
		fprintf(stderr, "Not allowed to set bit %d in memory mask"
			" of rank %d\n", mynode, task->rank);
	    }
	} else {
	    mlog("%s: invalid memory map entry '%s' (%d) for rank %d\n",
		    __func__, myent, mynode, task->rank);
	    fprintf(stderr, "Invalid memory map entry '%s' for rank %d\n",
		    myent, task->rank);
	    numa_set_membind(numa_all_nodes_ptr);
	    goto cleanup;
	}
	mdbg(PSSLURM_LOG_PART, "%s: (bind_map) node %i local task %i"
	     " memstr '%s'\n", __func__, step->localNodeId, lTID, myent);

    } else if (step->memBindType & MEM_BIND_MASK) {
	parseNUMAmask(nodemask, myent, task->rank);
    }

    numa_set_membind(nodemask);

    cleanup:

    ufree(ents);
    ufree(entarray);
    if (nodemask) numa_free_nodemask(nodemask);
# endif

    return;
}