/* This test is run with overridden MEMKIND_HBW_NODES environment variable
 * and tries to perform allocation from DRAM using hbw_malloc() using
 * default HBW_POLICY_PREFERRED policy.
 */
int main()
{
    struct bitmask *expected_nodemask = NULL;
    struct bitmask *returned_nodemask = NULL;
    void *ptr = NULL;
    int ret = 0;
    int status = 0;

    ptr = hbw_malloc(KB);
    if (ptr == NULL) {
        printf("Error: allocation failed\n");
        goto exit;
    }

    expected_nodemask = numa_allocate_nodemask();
    status = memkind_hbw_all_get_mbind_nodemask(NULL, expected_nodemask->maskp,
                                                expected_nodemask->size);
    if (status != MEMKIND_ERROR_ENVIRON) {
        printf("Error: wrong return value from memkind_hbw_all_get_mbind_nodemask()\n");
        printf("Expected: %d\n", MEMKIND_ERROR_ENVIRON);
        printf("Actual: %d\n", status);
        goto exit;
    }

    returned_nodemask = numa_allocate_nodemask();
    status = get_mempolicy(NULL, returned_nodemask->maskp, returned_nodemask->size,
                           ptr, MPOL_F_ADDR);
    if (status) {
        printf("Error: get_mempolicy() returned %d\n", status);
        goto exit;
    }

    ret = numa_bitmask_equal(returned_nodemask, expected_nodemask);
    if (!ret) {
        printf("Error: Memkind hbw and allocated pointer nodemasks are not equal\n");
    }

exit:
    if (expected_nodemask) {
        numa_free_nodemask(expected_nodemask);
    }
    if (returned_nodemask) {
        numa_free_nodemask(returned_nodemask);
    }
    if (ptr) {
        hbw_free(ptr);
    }

    return ret;
}
Esempio n. 2
0
void* mmap_1g(void* addr /* = nullptr */, int node /* = -1 */) {
#ifdef __linux__
  if (s_num1GPages >= kMaxNum1GPages) return nullptr;
  if (get_huge1g_info(node).free_hugepages <= 0) return nullptr;
  if (node >= 0 && !numa_node_allowed(node)) return nullptr;
#ifdef HAVE_NUMA
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  if (node >= 0 && numa_num_nodes > 1) {
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    bitmask* mask = numa_allocate_nodemask();
    numa_bitmask_setbit(mask, node);
    numa_set_membind(mask);
    numa_bitmask_free(mask);
  }
#endif
  void* ret = mmap_1g_impl(addr);
  if (ret != nullptr) {
    s_1GPages[s_num1GPages++] = ret;
  }
#ifdef HAVE_NUMA
  if (memMask) {
    assert(interleaveMask);
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(memMask);
    numa_bitmask_free(interleaveMask);
  }
#endif
  return ret;
#else
  return nullptr;
#endif
}
Esempio n. 3
0
void bind2node_id(int node_id)
{
	struct bitmask *bmp = numa_allocate_nodemask();
	numa_bitmask_setbit(bmp, node_id);
	numa_bind(bmp);
	numa_free_nodemask(bmp);
}
Esempio n. 4
0
/**
 * \brief returns the mask of nodes from which memory can currently be allocated.
 *
 * \return bitmap of nodes from which can be allocated
 */
struct bitmap *numa_get_membind(void)
{
    assert(numa_alloc_bind_mask);
    struct bitmap *im = numa_allocate_nodemask();
    if (im == NULL) {
        return NULL;
    }
    bitmap_copy(im, numa_alloc_bind_mask);
    return im;
}
Esempio n. 5
0
/** \brief   returns the current interleave mask
 *
 * \returns bitmask representing the current interleave state
 *
 * returns the current interleave mask if the task's memory allocation policy is
 * page interleaved. Otherwise, this function returns an empty mask.
 */
struct bitmap *numa_get_interleave_mask(void)
{
    assert(numa_alloc_interleave_mask);
    struct bitmap *im = numa_allocate_nodemask();
    if (im == NULL) {
        return NULL;
    }
    bitmap_copy(im, numa_alloc_interleave_mask);
    return im;
}
Esempio n. 6
0
  void ConfigureTableThread() {
    int32_t node_id = GlobalContext::get_numa_index();

    struct bitmask *mask = numa_allocate_nodemask();
    mask = numa_bitmask_setbit(mask, node_id);

    // set NUMA zone binding to be prefer
    numa_set_bind_policy(0);
    numa_set_membind(mask);
    numa_free_nodemask(mask);
  }
Esempio n. 7
0
/**
 * mem_alloc_pages_onnode - allocates pages on a given numa node
 * @nr: the number of pages
 * @size: the page size (4KB, 2MB, or 1GB)
 * @numa_node: the numa node to allocate the pages from
 * @numa_policy: how strictly to take @numa_node
 *
 * Returns a pointer (virtual address) to a page or NULL if fail.
 */
void *mem_alloc_pages_onnode(int nr, int size, int node, int numa_policy)
{
	void *vaddr;
	struct bitmask *mask = numa_allocate_nodemask();

	numa_bitmask_setbit(mask, node);
	vaddr = mem_alloc_pages(nr, size, mask, numa_policy);
	numa_bitmask_free(mask);

	return vaddr;
}
Esempio n. 8
0
void *__mem_alloc_pages_onnode(void *base, int nr, int size, int node)
{
	void *vaddr;
	struct bitmask *mask = numa_allocate_nodemask();

	numa_bitmask_setbit(mask, node);
	vaddr = __mem_alloc_pages(base, nr, size, mask, MPOL_BIND);
	numa_bitmask_free(mask);

	return vaddr;
}
Esempio n. 9
0
  void ConfigureTableThread() {
    int32_t idx = ThreadContext::get_id() - GlobalContext::get_head_table_thread_id();
    int32_t node_id = idx % num_mem_nodes_;
    CHECK_EQ(numa_run_on_node(node_id), 0);

    struct bitmask *mask = numa_allocate_nodemask();
    mask = numa_bitmask_setbit(mask, node_id);

    // set NUMA zone binding to be prefer
    numa_set_bind_policy(0);
    numa_set_membind(mask);
    numa_free_nodemask(mask);
  }
Esempio n. 10
0
	void open_mmap() {
		int ret = posix_fadvise(fd, 0, 0, POSIX_FADV_SEQUENTIAL);
		assert(ret==0);
		data = (T *)mmap(NULL, sizeof(T) * length, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
		
		struct bitmask *bmask=numa_allocate_nodemask();
        numa_bitmask_setall(bmask);
        numa_tonodemask_memory(data,sizeof(T)*length,bmask);
		numa_free_nodemask(bmask);
		
		assert(data!=MAP_FAILED);
		is_open = true;
	}
Esempio n. 11
0
static void verify_mempolicy(unsigned int node, int mode)
{
	struct bitmask *bm = numa_allocate_nodemask();
	unsigned int i;

	numa_bitmask_setbit(bm, node);

	TEST(set_mempolicy(mode, bm->maskp, bm->size+1));

	if (TST_RET) {
		tst_res(TFAIL | TTERRNO,
		        "set_mempolicy(%s) node %u",
		        tst_numa_mode_name(mode), node);
		return;
	}

	tst_res(TPASS, "set_mempolicy(%s) node %u",
	        tst_numa_mode_name(mode), node);

	numa_free_nodemask(bm);

	const char *prefix = "child: ";

	if (SAFE_FORK()) {
		prefix = "parent: ";
		tst_reap_children();
	}

	tst_nodemap_reset_counters(nodes);
	alloc_fault_count(nodes, NULL, PAGES_ALLOCATED * page_size);
	tst_nodemap_print_counters(nodes);

	for (i = 0; i < nodes->cnt; i++) {
		if (nodes->map[i] == node) {
			if (nodes->counters[i] == PAGES_ALLOCATED) {
				tst_res(TPASS, "%sNode %u allocated %u",
				        prefix, node, PAGES_ALLOCATED);
			} else {
				tst_res(TFAIL, "%sNode %u allocated %u, expected %u",
				        prefix, node, nodes->counters[i],
				        PAGES_ALLOCATED);
			}
			continue;
		}

		if (nodes->counters[i]) {
			tst_res(TFAIL, "%sNode %u allocated %u, expected 0",
			        prefix, i, nodes->counters[i]);
		}
	}
}
Esempio n. 12
0
static void regular_nodes_init(void)
{
    int i, node = 0, nodes_num = numa_num_configured_nodes();
    struct bitmask *node_cpus = numa_allocate_cpumask();

    regular_nodes_mask = numa_allocate_nodemask();

    for (i = 0; i < nodes_num; i++) {
        numa_node_to_cpus(node, node_cpus);
        if (numa_bitmask_weight(node_cpus))
            numa_bitmask_setbit(regular_nodes_mask, i);
    }
    numa_bitmask_free(node_cpus);
}
Esempio n. 13
0
///This function tries to fill bandwidth array based on knowledge about known CPU models
static int fill_bandwidth_values_heuristically(int* bandwidth, int bandwidth_len)
{
    int ret = MEMKIND_ERROR_UNAVAILABLE; // Default error returned if heuristic aproach fails
    int i, nodes_num, memory_only_nodes_num = 0;
    struct bitmask *memory_only_nodes, *node_cpus;

    if (is_cpu_xeon_phi_x200() == 0) {
        log_info("Known CPU model detected: Intel(R) Xeon Phi(TM) x200.");
        nodes_num = numa_num_configured_nodes();

        // Check if number of numa-nodes meets expectations for
        // supported configurations of Intel Xeon Phi x200
        if( nodes_num != 2 && nodes_num != 4 && nodes_num!= 8 ) {
            return ret;
        }

        memory_only_nodes = numa_allocate_nodemask();
        node_cpus = numa_allocate_cpumask();

        for(i=0; i<nodes_num; i++) {
            numa_node_to_cpus(i, node_cpus);
            if(numa_bitmask_weight(node_cpus) == 0) {
                memory_only_nodes_num++;
                numa_bitmask_setbit(memory_only_nodes, i);
            }
        }

        // Check if number of memory-only nodes is equal number of memory+cpu nodes
        // If it passes change ret to 0 (success) and fill bw table
        if ( memory_only_nodes_num == (nodes_num - memory_only_nodes_num) ) {

            ret = 0;
            assign_arbitrary_bandwidth_values(bandwidth, bandwidth_len, memory_only_nodes);
        }

        numa_bitmask_free(memory_only_nodes);
        numa_bitmask_free(node_cpus);
    }

    return ret;
}
Esempio n. 14
0
int main(void)
{
	void* ptr;
	struct bitmask *nmask;
	int err;

	nmask = numa_allocate_nodemask();
	numa_bitmask_setbit(nmask, 0);

	ptr = shmem_open();

	err = mbind(ptr, 4096 * 3, MPOL_INTERLEAVE,
		    nmask->maskp, nmask->size, 0);
	if (err < 0)
		perror("mbind1"), exit(1);

	err = mbind(ptr + 4096, 4096, MPOL_BIND,
		    nmask->maskp, nmask->size, 0);
	if (err < 0)
		perror("mbind1"), exit(1);

	return 0;
}
Esempio n. 15
0
void* mmap_2m(void* addr, int prot, int node /* = -1 */,
              bool map_shared /* = false */, bool map_fixed /* = false */) {
#ifdef __linux__
  if (get_huge2m_info(node).free_hugepages <= 0) return nullptr;
#ifdef HAVE_NUMA
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  if (node >= 0 && numa_num_nodes > 1) {
    assert(numa_node_set != 0);
    if ((numa_node_set & (1u << node)) == 0) {
      // Numa policy forbids allocation on the node.
      return nullptr;
    }
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    bitmask* mask = numa_allocate_nodemask();
    numa_bitmask_setbit(mask, node);
    numa_set_membind(mask);
    numa_bitmask_free(mask);
  }
#endif
  void* ret = mmap_2m_impl(addr, prot, map_shared, map_fixed);
  s_num2MPages += !!ret;
#ifdef HAVE_NUMA
  if (memMask) {
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(memMask);
    numa_bitmask_free(interleaveMask);
  }
#endif
  return ret;
#else  // not linux
  return nullptr;
#endif
}
Esempio n. 16
0
File: vma02.c Progetto: kraj/ltp
int main(int argc, char **argv)
{
	FILE *fp;
	void *addr, *start, *end, *lastend;
	int node, err, lc;
	char buf[BUFSIZ];
	struct bitmask *nmask = numa_allocate_nodemask();

	pagesize = getpagesize();
	tst_parse_opts(argc, argv, options, usage);

	if (opt_node) {
		node = SAFE_STRTOL(NULL, opt_nodestr, 1, LONG_MAX);
	} else {
		err = get_allowed_nodes(NH_MEMS | NH_MEMS, 1, &node);
		if (err == -3)
			tst_brkm(TCONF, NULL, "requires at least one node.");
		else if (err < 0)
			tst_brkm(TBROK | TERRNO, NULL, "get_allowed_nodes");
	}
	numa_bitmask_setbit(nmask, node);

	for (lc = 0; TEST_LOOPING(lc); lc++) {
		tst_count = 0;
		addr = mmap(NULL, pagesize * 3, PROT_WRITE,
			    MAP_ANON | MAP_PRIVATE, 0, 0);
		if (addr == MAP_FAILED)
			tst_brkm(TBROK | TERRNO, NULL, "mmap");

		tst_resm(TINFO, "pid = %d addr = %p", getpid(), addr);
		/* make page populate */
		memset(addr, 0, pagesize * 3);

		/* first mbind */
		err = mbind(addr + pagesize, pagesize, MPOL_BIND, nmask->maskp,
			    nmask->size, MPOL_MF_MOVE_ALL);
		if (err != 0) {
			if (errno != ENOSYS)
				tst_brkm(TBROK | TERRNO, NULL, "mbind1");
			else
				tst_brkm(TCONF, NULL,
					 "mbind syscall not implemented on this system.");
		}

		/* second mbind */
		err = mbind(addr, pagesize * 3, MPOL_DEFAULT, NULL, 0, 0);
		if (err != 0)
			tst_brkm(TBROK | TERRNO, NULL, "mbind2");

		/* /proc/self/maps in the form of
		   "00400000-00406000 r-xp 00000000". */
		fp = fopen("/proc/self/maps", "r");
		if (fp == NULL)
			tst_brkm(TBROK | TERRNO, NULL, "fopen");

		while (fgets(buf, BUFSIZ, fp) != NULL) {
			if (sscanf(buf, "%p-%p ", &start, &end) != 2)
				continue;

			if (start == addr) {
				tst_resm(TINFO, "start = %p, end = %p",
					 start, end);
				if (end == addr + pagesize * 3) {
					tst_resm(TPASS, "only 1 VMA.");
					break;
				}

				lastend = end;
				while (fgets(buf, BUFSIZ, fp) != NULL) {
					/* No more VMAs, break */
					if (sscanf(buf, "%p-%p ", &start,
						   &end) != 2)
						break;
					tst_resm(TINFO, "start = %p, end = %p",
						 start, end);

					/* more VMAs found */
					if (start == lastend)
						lastend = end;
					if (end == addr + pagesize * 3) {
						tst_resm(TFAIL,
							 ">1 unmerged VMAs.");
						break;
					}
				}
				if (end != addr + pagesize * 3)
					tst_resm(TFAIL, "no matched VMAs.");
				break;
			}
		}
		fclose(fp);
		if (munmap(addr, pagesize * 3) == -1)
			tst_brkm(TWARN | TERRNO, NULL, "munmap");
	}
	tst_exit();
}
Esempio n. 17
0
int main (int argc, char** argv) {                                     
	int  ret, c;
	int i, repeat = 5;
	int cpu = 2;
	static int errortype = 1;
	static int verbose = 1;
	static int disableHuge = 0;
	static int madvisePoison = 0;
	static int poll_exit=0;
	static long length;
 	struct bitmask *nodes, *gnodes;
	int gpolicy;
	unsigned long error_opt;

	void *vaddrmin = (void *)-1UL, *vaddrmax = NULL;

        static size_t           pdcount=0;
        unsigned long           mattr, addrend, pages, count, nodeid, paddr = 0;
        unsigned long           addr_start=0, nodeid_start=-1, mattr_start=-1;
        unsigned int            pagesize = getpagesize();
        char                    pte_str[20];

        struct dlook_get_map_info req;
        static page_desc_t        *pdbegin=NULL;
        page_desc_t               *pd, *pdend;

	length = memsize("100k");
	nodes  = numa_allocate_nodemask();
	gnodes = numa_allocate_nodemask();
	progname = argv[0];


	while (1)
	{
		static struct option long_options[] =
		{
		  {"verbose",       no_argument,       &verbose, 1},
		  {"delay",         no_argument,       &delay, 1},
		  {"disableHuge",   no_argument,       &disableHuge, 1},
		  {"poll",          no_argument,       &poll_exit, 1},
		  {"madvisePoison", no_argument,       &madvisePoison, 1},
		  {"manual",        no_argument,       &manual, 1},
		  {"cpu",           required_argument, 0, 'c'},
		  {"errortype",     required_argument, 0, 'e'},
		  {"help",          no_argument,       0, 'h'},
		  {"length",        required_argument, 0, 'l'}
		};
		/* getopt_long stores the option index here. */
		int option_index = 0;

		c = getopt_long (argc, argv, "hc:e:l:",
			       long_options, &option_index);

		/* Detect the end of the options. */
		if (c == -1)
		break;

		switch (c)
		{
			case 'c':
                          cpu = atoi(optarg);
			  break;
			case 'e':
                          errortype = atoi(optarg);
			  break;
			case 'h':
			  help();
			case 'l':
			  /* Not exposed */
			  printf ("option -l with value `%s'\n", optarg);
			  length = memsize("optarg");
			  break;
			case '?':
			  /* getopt_long already printed an error message. */
			  exit(-1);
		}
	}

	cpu_process_setaffinity(getpid(), cpu);

	error_opt = get_etype(errortype);

	buf = mmap(NULL, length, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);

        if (mbind((void *)buf, length,  MPOL_DEFAULT, nodes->maskp, nodes->size, 0) < 0){
                perror("mbind error\n");
        } 
	/* Disable Hugepages */
	if (disableHuge)
		madvise((void *)buf, length, MADV_NOHUGEPAGE);

	if (madvisePoison)
		madvise((void *)buf, length,MADV_HWPOISON );

    	gpolicy = -1;
        if (get_mempolicy(&gpolicy, gnodes->maskp, gnodes->size, (void *)buf, MPOL_F_ADDR) < 0)
                perror("get_mempolicy");
        if (!numa_bitmask_equal(gnodes, nodes)) {
                printf("nodes differ %lx, %lx!\n", gnodes->maskp[0], nodes->maskp[0]);
        }

	strcpy(pte_str, "");
        addrend = ((unsigned long)buf)+length;        
        pages = (addrend-((unsigned long)buf))/pagesize;

        if (pages > pdcount) {
                pdbegin = realloc(pdbegin, sizeof(page_desc_t)*pages);
                pdcount = pages;
        }

        req.pid = getpid();
        req.start_vaddr = (unsigned long)buf;
        req.end_vaddr = addrend;
        req.pd = pdbegin;

	sigaction(SIGBUS, &recover_act, NULL);

	/*Fault in Pages */
	if(!poll_exit)
		hog((void *)buf, length);

	/* Get mmap phys_addrs */
	if ((fd = open(UVMCE_DEVICE, O_RDWR)) < 0) {                 
		printf("Failed to open: %s\n", UVMCE_DEVICE);  
		exit (1);                                     
	}                                               
	    
	if (ioctl(fd, UVMCE_DLOOK, &req ) < 0){        
		printf("Failed to INJECT_UCE\n");
		exit(1);                                      
	}                                               


	process_map(pd,pdbegin, pdend, pages, buf, addrend, pagesize, mattr,
		    nodeid, paddr, pte_str, nodeid_start, mattr_start, addr_start);

	printf("\n\tstart_vaddr\t 0x%016lx length\t 0x%x\n\tend_vaddr\t 0x%016lx pages\t %ld\n", 
		 buf , length, addrend, pages);


	uv_inject(pd,pdbegin, pdend, pages, (unsigned long)buf, addrend, pagesize, mattr,
		    nodeid, paddr, pte_str, nodeid_start, 
		    mattr_start, addr_start, error_opt);

	
	if (delay){
		printf("Enter char to consume bad memory..");
		getchar();
	}

	if (error_opt !=  UVMCE_PATROL_SCRUB_UCE){
		consume_it((void *)buf, length);
	}
out:
	close(fd);                                      
	return 0;                                       
}
Esempio n. 18
0
/**
 * @brief Do memory binding.
 *
 * This is handling the binding types map_mem, mask_mem and rank.
 * The types local (default) and none are handled directly by the deamon.
 *
 * When using libnuma with API v1, this is a noop, just giving a warning.
 *
 * @param step  Step structure
 * @param task  Task structure
 *
 * @return No return value.
 */
void doMemBind(Step_t *step, PStask_t *task)
{

# ifndef HAVE_NUMA_ALLOCATE_NODEMASK
    mlog("%s: psslurm does not support memory binding types map_mem, mask_mem"
	    " and rank with libnuma v1\n", __func__);
    fprintf(stderr, "Memory binding type not supported with used libnuma"
	   " version");
    return;
# else

    const char delimiters[] = ",";
    uint32_t lTID;
    char *next, *saveptr, *ents, *myent, *endptr;
    char **entarray;
    unsigned int numents;
    uint16_t mynode;

    struct bitmask *nodemask = NULL;

    if (!(step->memBindType & MEM_BIND_MAP)
	    && !(step->memBindType & MEM_BIND_MASK)
	    && !(step->memBindType & MEM_BIND_RANK)) {
	/* things are handled elsewhere */
	return;
    }

    if (!PSIDnodes_bindMem(PSC_getMyID()) || getenv("__PSI_NO_MEMBIND")) {
	    // info messages already printed in doClamps()
	return;
    }

    if (numa_available()==-1) {
	fprintf(stderr, "NUMA not available:");
	return;
    }

    nodemask = numa_allocate_nodemask();
    if (!nodemask) {
	fprintf(stderr, "Allocation of nodemask failed:");
	return;
    }

    lTID = getLocalRankID(task->rank, step, step->localNodeId);

    if (step->memBindType & MEM_BIND_RANK) {
	if (lTID > (unsigned int)numa_max_node()) {
	    mlog("%s: memory binding to ranks not possible for rank %d."
		    " (local rank %d > #numa_nodes %d)\n", __func__,
		    task->rank, lTID, numa_max_node());
	    fprintf(stderr, "Memory binding to ranks not possible for rank %d,"
		    " local rank %u larger than max numa node %d.",
		    task->rank, lTID, numa_max_node());
	    if (nodemask) numa_free_nodemask(nodemask);
	    return;
	}
	if (numa_bitmask_isbitset(numa_get_mems_allowed(), lTID)) {
	    numa_bitmask_setbit(nodemask, lTID);
	} else {
	    mlog("%s: setting bit %d in memory mask not allowed in rank"
		    " %d\n", __func__, lTID, task->rank);
	    fprintf(stderr, "Not allowed to set bit %u in memory mask"
		    " of rank %d\n", lTID, task->rank);
	}
	numa_set_membind(nodemask);
	if (nodemask) numa_free_nodemask(nodemask);
	return;
    }

    ents = ustrdup(step->memBind);
    entarray = umalloc(step->tasksToLaunch[step->localNodeId] * sizeof(char*));
    numents = 0;
    myent = NULL;
    entarray[0] = NULL;

    next = strtok_r(ents, delimiters, &saveptr);
    while (next && (numents < step->tasksToLaunch[step->localNodeId])) {
	entarray[numents++] = next;
	if (numents == lTID+1) {
	    myent = next;
	    break;
	}
	next = strtok_r(NULL, delimiters, &saveptr);
    }

    if (!myent && numents) {
	myent = entarray[lTID % numents];
    }

    if (!myent) {
	numa_set_membind(numa_all_nodes_ptr);
	if (step->memBindType & MEM_BIND_MASK) {
	    mlog("%s: invalid mem mask string '%s'\n", __func__, ents);
	}
	else if (step->memBindType & MEM_BIND_MAP) {
	    mlog("%s: invalid mem map string '%s'\n", __func__, ents);
	}
	goto cleanup;
    }

    if (step->memBindType & MEM_BIND_MAP) {

	if (strncmp(myent, "0x", 2) == 0) {
	    mynode = strtoul (myent+2, &endptr, 16);
	} else {
	    mynode = strtoul (myent, &endptr, 10);
	}

	if (*endptr == '\0' && mynode <= numa_max_node()) {
	    if (numa_bitmask_isbitset(numa_get_mems_allowed(), mynode)) {
		numa_bitmask_setbit(nodemask, mynode);
	    } else {
		mlog("%s: setting bit %d in memory mask not allowed in rank"
			" %d\n", __func__, mynode, task->rank);
		fprintf(stderr, "Not allowed to set bit %d in memory mask"
			" of rank %d\n", mynode, task->rank);
	    }
	} else {
	    mlog("%s: invalid memory map entry '%s' (%d) for rank %d\n",
		    __func__, myent, mynode, task->rank);
	    fprintf(stderr, "Invalid memory map entry '%s' for rank %d\n",
		    myent, task->rank);
	    numa_set_membind(numa_all_nodes_ptr);
	    goto cleanup;
	}
	mdbg(PSSLURM_LOG_PART, "%s: (bind_map) node %i local task %i"
	     " memstr '%s'\n", __func__, step->localNodeId, lTID, myent);

    } else if (step->memBindType & MEM_BIND_MASK) {
	parseNUMAmask(nodemask, myent, task->rank);
    }

    numa_set_membind(nodemask);

    cleanup:

    ufree(ents);
    ufree(entarray);
    if (nodemask) numa_free_nodemask(nodemask);
# endif

    return;
}
Esempio n. 19
0
size_t remap_interleaved_2m_pages(void* addr, size_t pages, int prot,
                                  bool shared /* = false */) {
#ifdef __linux__
  assert(reinterpret_cast<uintptr_t>(addr) % size2m == 0);
  assert(addr != nullptr);

  if (pages == 0) return 0;

#ifdef HAVE_NUMA
  const int maxNode = numa_max_node();
  bitmask* memMask = nullptr;
  bitmask* interleaveMask = nullptr;
  bitmask* mask = nullptr;
  if (maxNode > 0) {
    memMask = numa_get_membind();
    interleaveMask = numa_get_interleave_mask();
    mask = numa_allocate_nodemask();
  }
#else
  constexpr int maxNode = 0;
#endif
  int node = -1;
  int failed = 0;                       // consecutive failure count
  int mapped_count = 0;
  do {
#ifdef HAVE_NUMA
    if (maxNode > 0) {
      if (++node > maxNode) node = 0;
      if (!numa_node_allowed(node)) {
        // Numa policy forbids allocation on node
        if (++failed > maxNode) break;
        continue;
      }
      numa_bitmask_setbit(mask, node);
      numa_set_membind(mask);
      numa_bitmask_clearbit(mask, node);
    }
#endif
    // Fail early if we don't have huge pages reserved.
    if (get_huge2m_info(node).free_hugepages > 0 &&
        mmap_2m_impl(addr, prot, shared, true /* MAP_FIXED */)) {
      addr = (char*)addr + size2m;
      ++mapped_count;
      failed = 0;
      continue;
    }
    // We failed on node, give up if we have failed on all nodes
    if (++failed > maxNode) break;
  } while (mapped_count < pages);

#ifdef HAVE_NUMA
  if (mask) {
    numa_set_membind(memMask);
    numa_set_interleave_mask(interleaveMask);
    numa_bitmask_free(mask);
    numa_bitmask_free(interleaveMask);
    numa_bitmask_free(memMask);
  }
#endif
  return mapped_count;
#else  // not linux
  return 0;
#endif
}
void* StorageManager::allocateSlots(const std::size_t num_slots,
                                    const int numa_node) {
#if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB)
  static constexpr int kLargePageMmapFlags
      = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
#elif defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE)
  static constexpr int kLargePageMmapFlags
      = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
#endif

  makeRoomForBlockOrBlob(num_slots);
  void *slots = nullptr;

#if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) || defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE)
  slots = mmap(nullptr,
               num_slots * kSlotSizeBytes,
               PROT_READ | PROT_WRITE,
               kLargePageMmapFlags,
               -1, 0);

  // Fallback to regular mmap() if large page allocation failed. Even on
  // systems with large page support, large page allocation may fail if the
  // user running the executable is not a member of hugetlb_shm_group on Linux,
  // or if all the reserved hugepages are already in use.
  if (slots == MAP_FAILED) {
    slots = mmap(nullptr,
                 num_slots * kSlotSizeBytes,
                 PROT_READ | PROT_WRITE,
                 MAP_PRIVATE | MAP_ANONYMOUS,
                 -1, 0);
  }
  if (slots == MAP_FAILED) {
    slots = nullptr;
  }
#elif defined(QUICKSTEP_HAVE_MMAP_PLAIN)
  slots = mmap(nullptr,
               num_slots * kSlotSizeBytes,
               PROT_READ | PROT_WRITE,
               MAP_PRIVATE | MAP_ANONYMOUS,
               -1, 0);
  if (slots == MAP_FAILED) {
    slots = nullptr;
  }
#else
  slots = malloc_with_alignment(num_slots * kSlotSizeBytes,
                                kCacheLineBytes);
  if (slots != nullptr) {
    memset(slots, 0x0, num_slots * kSlotSizeBytes);
  }
#endif

  if (slots == nullptr) {
    throw OutOfMemory();
  }

#if defined(QUICKSTEP_HAVE_LIBNUMA)
  if (numa_node != -1) {
    DEBUG_ASSERT(numa_node < numa_num_configured_nodes());
    struct bitmask *numa_node_bitmask = numa_allocate_nodemask();
    // numa_node can be 0 through n-1, where n is the num of NUMA nodes.
    numa_bitmask_setbit(numa_node_bitmask, numa_node);
    long mbind_status = mbind(slots,  // NOLINT(runtime/int)
                              num_slots * kSlotSizeBytes,
                              MPOL_PREFERRED,
                              numa_node_bitmask->maskp,
                              numa_node_bitmask->size,
                              0);
    numa_free_nodemask(numa_node_bitmask);
    if (mbind_status == -1) {
      LOG(WARNING) << "mbind() failed with errno " << errno << " ("
                   << std::strerror(errno) << ")";
    }
  }
#endif  // QUICKSTEP_HAVE_LIBNUMA

  total_memory_usage_ += num_slots;
  return slots;
}
Esempio n. 21
0
 unique_bitmask_ptr make_nodemask_ptr()
 {
     return unique_bitmask_ptr(numa_allocate_nodemask(), numa_free_nodemask);
 }
Esempio n. 22
0
static void *
s_numa_alloc(size_t sz, int cpu) {
  void *ret = NULL;

  if (likely(sz > 0)) {
    if (likely(cpu >= 0)) {
      if (likely(s_numa_nodes != NULL && s_n_cpus > 0)) {
        unsigned int node = s_numa_nodes[cpu];
        unsigned int allocd_node = UINT_MAX;
        struct bitmask *bmp;
        int r;
  
        bmp = numa_allocate_nodemask();
        numa_bitmask_setbit(bmp, node);

        errno = 0;
        r = (int)set_mempolicy(MPOL_BIND, bmp->maskp, bmp->size + 1);
        if (likely(r == 0)) {
          errno = 0;
          ret = numa_alloc_onnode(sz, (int)node);
          if (likely(ret != NULL)) {
            lagopus_result_t rl;

            /*
             * We need this "first touch" even using the
             * numa_alloc_onnode().
             */
            (void)memset(ret, 0, sz);

            errno = 0;
            r = (int)get_mempolicy((int *)&allocd_node, NULL, 0, ret,
                                   MPOL_F_NODE|MPOL_F_ADDR);
            if (likely(r == 0)) {
              if (unlikely(node != allocd_node)) {
                /*
                 * The memory is not allocated on the node, but it is
                 * still usable. Just return it.
                 */
                lagopus_msg_warning("can't allocate " PFSZ(u) " bytes memory "
                                    "for CPU %d (NUMA node %d).\n",
                                    sz, cpu, node);
              }
            } else {
              lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR);
              lagopus_msg_error("get_mempolicy() returned %d.\n", r);
            }

            rl = s_add_addr(ret, sz);
            if (unlikely(rl != LAGOPUS_RESULT_OK)) {
              lagopus_perror(rl);
              lagopus_msg_error("can't register the allocated address.\n");
              numa_free(ret, sz);
              ret = NULL;
            }
          }

        } else {	/* r == 0 */
          lagopus_perror(LAGOPUS_RESULT_POSIX_API_ERROR);
          lagopus_msg_error("set_mempolicy() returned %d.\n", r);
        }

        numa_free_nodemask(bmp);
        set_mempolicy(MPOL_DEFAULT, NULL, 0);

      } else {	/* s_numa_nodes != NULL && s_n_cpus > 0 */
        /*
         * Not initialized or initialization failure.
         */
        lagopus_msg_warning("The NUMA related information is not initialized. "
                            "Use malloc(3) instead.\n");
        ret = malloc(sz);
      }

    } else {	/* cpu >= 0 */
      /*
       * Use pure malloc(3).
       */
      ret = malloc(sz);
    }

  }

  return ret;
}