Ejemplo n.º 1
0
    void check_numa_nodes(unique_bitmask_ptr &expected_bitmask, int policy,
                          void *ptr, size_t size)
    {

        const size_t page_size = sysconf(_SC_PAGESIZE);
        size_t pages_number = get_num_of_pages(size, page_size);
        std::vector<void *> address = get_address_of_pages(ptr, pages_number,
                                                           page_size);
        unique_bitmask_ptr returned_bitmask = make_nodemask_ptr();
        int status = -1;

        for (size_t page_num = 0; page_num < address.size(); page_num++) {
            ASSERT_EQ(0, get_mempolicy(&status, returned_bitmask->maskp,
                                       returned_bitmask->size, address[page_num], MPOL_F_ADDR));
            ASSERT_EQ(policy, status);
            switch(policy) {
                case MPOL_INTERLEAVE:
                    EXPECT_TRUE(numa_bitmask_equal(expected_bitmask.get(), returned_bitmask.get()));
                    break;
                case MPOL_DEFAULT:
                    break;
                case MPOL_BIND:
                case MPOL_PREFERRED:
                    for(int i=0; i < numa_num_possible_nodes(); i++) {
                        if(numa_bitmask_isbitset(returned_bitmask.get(), i)) {
                            EXPECT_TRUE(numa_bitmask_isbitset(expected_bitmask.get(), i));
                        }
                    }
                    break;
                default:
                    assert(!"Unknown policy\n");
            }
        }
    }
Ejemplo n.º 2
0
void print_bitmask(struct bitmask* bitmask) {
    int i;
    for (i=0; i<numa_num_possible_nodes(); i++) {
        if (numa_bitmask_isbitset(bitmask, i)) {
            DBG_LOG(INFO, "bit %d\n", i);
        }
    }
    return;
}
Ejemplo n.º 3
0
void show(void)
{
	unsigned long prefnode;
	struct bitmask *membind, *interleave, *cpubind;
	unsigned long cur;
	int policy;
	int numa_num_nodes = numa_num_possible_nodes();
	
	if (numa_available() < 0) {
		show_physcpubind();
		printf("No NUMA support available on this system.\n");
		exit(1);
	}

	cpubind = numa_get_run_node_mask();

	prefnode = numa_preferred();
	interleave = numa_get_interleave_mask();
	membind = numa_get_membind();
	cur = numa_get_interleave_node();

	policy = 0;
	if (get_mempolicy(&policy, NULL, 0, 0, 0) < 0)
		perror("get_mempolicy");

	printf("policy: %s\n", policy_name(policy));
		
	printf("preferred node: ");
	switch (policy) {
	case MPOL_PREFERRED:
		if (prefnode != -1) {
			printf("%ld\n", prefnode);
			break;
		}
		/*FALL THROUGH*/
	case MPOL_DEFAULT:
		printf("current\n");
		break;
	case MPOL_INTERLEAVE:
		printf("%ld (interleave next)\n",cur);
		break;
	case MPOL_BIND:
		printf("%d\n", find_first_bit(&membind, numa_num_nodes));
		break;
	}
	if (policy == MPOL_INTERLEAVE) {
		printmask("interleavemask", interleave);
		printf("interleavenode: %ld\n", cur);
	}
	show_physcpubind();
	printmask("cpubind", cpubind);  // for compatibility
	printmask("nodebind", cpubind);
	printmask("membind", membind);
}
Ejemplo n.º 4
0
int next_cpu(struct bitmask* bitmask, int cpu_id)
{
    int i;
    // if we had knowledge of the bitmask structure then we could
    // count the bits faster but bitmask seems to be an opaque structure
    for (i=cpu_id; i<numa_num_possible_nodes(); i++) {
        if (numa_bitmask_isbitset(bitmask, i)) {
            return i;
        }
    }
    return -1;
}
Ejemplo n.º 5
0
int num_cpus(struct bitmask* bitmask) 
{
    int i,n;
    // if we had knowledge of the bitmask structure then we could
    // count the bits faster but bitmask seems to be an opaque structure
    for (i=0, n=0; i< numa_num_possible_nodes(); i++) {
        if (numa_bitmask_isbitset(bitmask, i)) {
            n++;
        }
    }
    return n;
}
Ejemplo n.º 6
0
int main(void)
{
	int max = numa_max_node();
	int maxmask = numa_num_possible_nodes();
	struct bitmask *nodes, *mask;
	int pagesize = getpagesize();
	int i;
	int pol;
	int node;
	int err = 0;
	nodes = numa_bitmask_alloc(maxmask);
	mask = numa_bitmask_alloc(maxmask);

	for (i = max; i >= 0; --i) {
		char *mem = mmap(NULL, pagesize*(max+1), PROT_READ|PROT_WRITE,
					MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
		char *adr = mem;

		if (mem == (char *)-1)
			err("mmap");

		printf("%d offset %lx\n", i, (long)(adr - mem));

		numa_bitmask_clearall(nodes);
		numa_bitmask_clearall(mask);
		numa_bitmask_setbit(nodes, i);

		if (mbind(adr,  pagesize, MPOL_PREFERRED, nodes->maskp,
							nodes->size, 0) < 0)
			err("mbind");

		++*adr;

		if (get_mempolicy(&pol, mask->maskp, mask->size, adr, MPOL_F_ADDR) < 0)
			err("get_mempolicy");

		assert(pol == MPOL_PREFERRED);
		assert(numa_bitmask_isbitset(mask, i));

		node = 0x123;

		if (get_mempolicy(&node, NULL, 0, adr, MPOL_F_ADDR|MPOL_F_NODE) < 0)
			err("get_mempolicy2");

		printf("got node %d expected %d\n", node, i);

		if (node != i)
			err = 1;
	}
	return err;
}
Ejemplo n.º 7
0
void myhbwmalloc_init(void)
{
    /* set to NULL before trying to initialize.  if we return before
     * successful creation of the mspace, then it will still be NULL,
     * and we can use that in subsequent library calls to determine
     * that the library failed to initialize. */
    myhbwmalloc_mspace = NULL;

    /* verbose printout? */
    myhbwmalloc_verbose = 0;
    {
        char * env_char = getenv("HBWMALLOC_VERBOSE");
        if (env_char != NULL) {
            myhbwmalloc_verbose = 1;
            printf("hbwmalloc: HBWMALLOC_VERBOSE set\n");
        }
    }

    /* fail hard or soft? */
    myhbwmalloc_hardfail = 1;
    {
        char * env_char = getenv("HBWMALLOC_SOFTFAIL");
        if (env_char != NULL) {
            myhbwmalloc_hardfail = 0;
            printf("hbwmalloc: HBWMALLOC_SOFTFAIL set\n");
        }
    }

    /* set the atexit handler that will destroy the mspace and free the numa allocation */
    atexit(myhbwmalloc_final);

    /* detect and configure use of NUMA memory nodes */
    {
        int max_possible_node        = numa_max_possible_node();
        int num_possible_nodes       = numa_num_possible_nodes();
        int max_numa_nodes           = numa_max_node();
        int num_configured_nodes     = numa_num_configured_nodes();
        int num_configured_cpus      = numa_num_configured_cpus();
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_max_possible_node()    = %d\n", max_possible_node);
            printf("hbwmalloc: numa_num_possible_nodes()   = %d\n", num_possible_nodes);
            printf("hbwmalloc: numa_max_node()             = %d\n", max_numa_nodes);
            printf("hbwmalloc: numa_num_configured_nodes() = %d\n", num_configured_nodes);
            printf("hbwmalloc: numa_num_configured_cpus()  = %d\n", num_configured_cpus);
        }
        /* FIXME this is a hack.  assumes HBW is only numa node 1. */
        if (num_configured_nodes <= 2) {
            myhbwmalloc_numa_node = num_configured_nodes-1;
        } else {
            fprintf(stderr,"hbwmalloc: we support only 2 numa nodes, not %d\n", num_configured_nodes);
        }

        if (myhbwmalloc_verbose) {
            for (int i=0; i<num_configured_nodes; i++) {
                unsigned max_numa_cpus = numa_num_configured_cpus();
                struct bitmask * mask = numa_bitmask_alloc( max_numa_cpus );
                int rc = numa_node_to_cpus(i, mask);
                if (rc != 0) {
                    fprintf(stderr, "hbwmalloc: numa_node_to_cpus failed\n");
                } else {
                    printf("hbwmalloc: numa node %d cpu mask:", i);
                    for (unsigned j=0; j<max_numa_cpus; j++) {
                        int bit = numa_bitmask_isbitset(mask,j);
                        printf(" %d", bit);
                    }
                    printf("\n");
                }
                numa_bitmask_free(mask);
            }
            fflush(stdout);
        }
    }

#if 0 /* unused */
    /* see if the user specifies a slab size */
    size_t slab_size_requested = 0;
    {
        char * env_char = getenv("HBWMALLOC_BYTES");
        if (env_char!=NULL) {
            long units = 1L;
            if      ( NULL != strstr(env_char,"G") ) units = 1000000000L;
            else if ( NULL != strstr(env_char,"M") ) units = 1000000L;
            else if ( NULL != strstr(env_char,"K") ) units = 1000L;
            else                                     units = 1L;

            int num_count = strspn(env_char, "0123456789");
            memset( &env_char[num_count], ' ', strlen(env_char)-num_count);
            slab_size_requested = units * atol(env_char);
        }
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: requested slab_size_requested = %zu\n", slab_size_requested);
        }
    }
#endif

    /* see what libnuma says is available */
    size_t myhbwmalloc_slab_size;
    {
        int node = myhbwmalloc_numa_node;
        long long freemem;
        long long maxmem = numa_node_size64(node, &freemem);
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_node_size64 says maxmem=%lld freemem=%lld for numa node %d\n",
                    maxmem, freemem, node);
        }
        myhbwmalloc_slab_size = freemem;
    }

    /* assume threads, disable if MPI knows otherwise, then allow user to override. */
    int multithreaded = 1;
#ifdef HAVE_MPI
    int nprocs;
    {
        int is_init, is_final;
        MPI_Initialized(&is_init);
        MPI_Finalized(&is_final);
        if (is_init && !is_final) {
            MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
        }

        /* give equal portion to every MPI process */
        myhbwmalloc_slab_size /= nprocs;

        /* if the user initializes MPI with MPI_Init or
         * MPI_Init_thread(MPI_THREAD_SINGLE), they assert there
         * are no threads at all, which means we can skip the
         * malloc mspace lock.
         *
         * if the user lies to MPI, they deserve any bad thing
         * that comes of it. */
        int provided;
        MPI_Query_thread(&provided);
        if (provided==MPI_THREAD_SINGLE) {
            multithreaded = 0;
        } else {
            multithreaded = 1;
        }

        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: MPI processes = %d (threaded = %d)\n", nprocs, multithreaded);
            printf("hbwmalloc: myhbwmalloc_slab_size = %d\n", myhbwmalloc_slab_size);
        }
    }
#endif

    /* user can assert that hbwmalloc and friends need not be thread-safe */
    {
        char * env_char = getenv("HBWMALLOC_LOCKLESS");
        if (env_char != NULL) {
            multithreaded = 0;
            if (myhbwmalloc_verbose) {
                printf("hbwmalloc: user has disabled locking in mspaces by setting HBWMALLOC_LOCKLESS\n");
            }
        }
    }

    myhbwmalloc_slab = numa_alloc_onnode( myhbwmalloc_slab_size, myhbwmalloc_numa_node);
    if (myhbwmalloc_slab==NULL) {
        fprintf(stderr, "hbwmalloc: numa_alloc_onnode returned NULL for size = %zu\n", myhbwmalloc_slab_size);
        return;
    } else {
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_alloc_onnode succeeded for size %zu\n", myhbwmalloc_slab_size);
        }

        /* part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping,
         * so the capacity must be at least this large */
        if (myhbwmalloc_slab_size < 128*sizeof(size_t)) {
            fprintf(stderr, "hbwmalloc: not enough space for mspace bookkeeping\n");
            return;
        }

        /* see above regarding if the user lies to MPI. */
        int locked = multithreaded;
        myhbwmalloc_mspace = create_mspace_with_base( myhbwmalloc_slab, myhbwmalloc_slab_size, locked);
        if (myhbwmalloc_mspace == NULL) {
            fprintf(stderr, "hbwmalloc: create_mspace_with_base returned NULL\n");
            return;
        } else if (myhbwmalloc_verbose) {
            printf("hbwmalloc: create_mspace_with_base succeeded for size %zu\n", myhbwmalloc_slab_size);
        }
    }
}
Ejemplo n.º 8
0
int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual_topology_t** virtual_topologyp)
{
    char* mc_pci_file;
    char* str;
    char* saveptr;
    char* token = "NULL";
    int* physical_node_ids;
    physical_node_t** physical_nodes;
    int num_physical_nodes;
    int n, v, i, j, sibling_idx, node_i_idx;
    int node_id;
    physical_node_t* node_i, *node_j, *sibling_node;
    int ret;
    int min_distance;
    int hyperthreading;
    struct bitmask* mem_nodes;
    virtual_topology_t* virtual_topology;

    __cconfig_lookup_string(cfg, "topology.physical_nodes", &str);

    // parse the physical nodes string
    physical_node_ids = calloc(numa_num_possible_nodes(), sizeof(*physical_node_ids));
    num_physical_nodes = 0;
    while (token = strtok_r(str, ",", &saveptr)) {
        physical_node_ids[num_physical_nodes] = atoi(token);
        str = NULL;
        if (++num_physical_nodes > numa_num_possible_nodes()) {
            // we re being asked to run on more nodes than available
            free(physical_node_ids);
            ret = E_ERROR;
            goto done;
        }
    }
    physical_nodes = calloc(num_physical_nodes, sizeof(*physical_nodes));

    // select those nodes we can run on (e.g. not constrained by any numactl)
    mem_nodes = numa_get_mems_allowed();
    for (i=0, n=0; i<num_physical_nodes; i++) {
        node_id = physical_node_ids[i];
        if (numa_bitmask_isbitset(mem_nodes, node_id)) {
            physical_nodes[n] = malloc(sizeof(**physical_nodes));
            physical_nodes[n]->node_id = node_id;
            // TODO: what if we want to avoid using only a single hardware contexts of a hyperthreaded core?
            physical_nodes[n]->cpu_bitmask = numa_allocate_cpumask();
            numa_node_to_cpus(node_id, physical_nodes[n]->cpu_bitmask);
            __cconfig_lookup_bool(cfg, "topology.hyperthreading", &hyperthreading);
            if (hyperthreading) {
                physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask);
            } else {
                DBG_LOG(INFO, "Not using hyperthreading.\n");
                // disable the upper half of the processors in the bitmask
                physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask) / 2;
                int fc = first_cpu(physical_nodes[n]->cpu_bitmask);
                for (j=fc+system_num_cpus()/2; j<fc+system_num_cpus()/2+physical_nodes[n]->num_cpus; j++) {
                    if (numa_bitmask_isbitset(physical_nodes[n]->cpu_bitmask, j)) {
                        numa_bitmask_clearbit(physical_nodes[n]->cpu_bitmask, j);
                    }
                }
            }
            n++;
        }
    }
    free(physical_node_ids);
    num_physical_nodes = n;

    // if pci bus topology of each physical node is not provided then discover it 
    if (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_FALSE || 
        (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_TRUE &&
         load_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes) != E_SUCCESS)) 
    {
        discover_mc_pci_topology(cpu_model, physical_nodes, num_physical_nodes);
        save_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes);
    }

    // form virtual nodes by grouping physical nodes that are close to each other
    virtual_topology = malloc(sizeof(*virtual_topology));
    virtual_topology->num_virtual_nodes = num_physical_nodes / 2 + num_physical_nodes % 2;
    virtual_topology->virtual_nodes = calloc(virtual_topology->num_virtual_nodes, 
                                             sizeof(*(virtual_topology->virtual_nodes)));
    
    for (i=0, v=0; i<num_physical_nodes; i++) {
        min_distance = INT_MAX;
        sibling_node = NULL;
        sibling_idx = -1;
        if ((node_i = physical_nodes[i]) == NULL) {
            continue;
        }
        for (j=i+1; j<num_physical_nodes; j++) {
            if ((node_j = physical_nodes[j]) == NULL) {
                continue;
            }
            if (numa_distance(node_i->node_id,node_j->node_id) < min_distance) {
                sibling_node = node_j;
                sibling_idx = j;
            }
        }
        if (sibling_node) {
            physical_nodes[i] = physical_nodes[sibling_idx] = NULL;
            virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v];
            virtual_node->dram_node = node_i;
            virtual_node->nvram_node = sibling_node;
            virtual_node->node_id = v;
            virtual_node->cpu_model = cpu_model;
            DBG_LOG(INFO, "Fusing physical nodes %d %d into virtual node %d\n", 
                    node_i->node_id, sibling_node->node_id, virtual_node->node_id);
            v++;
        }
    }

    // any physical node that is not paired with another physical node is 
    // formed into a virtual node on its own
    if (2*v < num_physical_nodes) {
        for (i=0; i<num_physical_nodes; i++) {
            node_i = physical_nodes[i];
            virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v];
            virtual_node->dram_node = virtual_node->nvram_node = node_i;
            virtual_node->node_id = v;
 
            DBG_LOG(WARNING, "Forming physical node %d into virtual node %d without a sibling node.\n",
                    node_i->node_id, virtual_node->node_id);
        }
    }

    *virtual_topologyp = virtual_topology;
    ret = E_SUCCESS;

done:
    free(physical_nodes);
    return ret;
}