C++ (Cpp) numa_node_to_cpus Exemples

Exemple #1

0

Afficher le fichier

Fichier : numa.c Projet : kwangiit/SLURMPP

/* return the numa node for the given cpuid */
extern uint16_t slurm_get_numa_node(uint16_t cpuid)
{
	uint16_t maxcpus = 0, nnid = 0;
	int size, retry, max_node;
	unsigned long *cpu_mask;

	maxcpus = conf->sockets * conf->cores * conf->threads;
	if (cpuid >= maxcpus)
		return 0;

	if (numa_array) {
		return numa_array[cpuid];
	}

	/* need to load the numa_array */
	max_node = numa_max_node();

	/* The required size of the mask buffer for numa_node_to_cpus()
	 * is goofed up. The third argument is supposed to be the size
	 * of the mask, which is an array of unsigned longs. The *unit*
	 * of the third argument is unclear - should it be in bytes or
	 * in unsigned longs??? Since I don't know, I'm using this retry
	 * loop to try and determine an acceptable size. If anyone can
	 * fix this interaction, please do!!
	 */
	size = 8;
	cpu_mask = xmalloc(sizeof(unsigned long) * size);
	retry = 0;
	while (retry++ < 8 && numa_node_to_cpus(nnid, cpu_mask, size) < 0) {
		size *= 2;
		xrealloc(cpu_mask, sizeof(unsigned long) * size);
	}
	if (retry >= 8) {
		xfree(cpu_mask);
		error("NUMA problem with numa_node_to_cpus arguments");
		return 0;
	}
	numa_array = xmalloc(sizeof(uint16_t) * maxcpus);
	_add_numa_mask_to_array(cpu_mask, size, maxcpus, nnid);
	while (nnid++ < max_node) {
		if (numa_node_to_cpus(nnid, cpu_mask, size) < 0) {
			error("NUMA problem - numa_node_to_cpus 2nd call fail");
			xfree(cpu_mask);
			xfree(numa_array);
			numa_array = NULL;
			return 0;
		}
		_add_numa_mask_to_array(cpu_mask, size, maxcpus, nnid);
	}
	xfree(cpu_mask);
	return numa_array[cpuid];
}

Exemple #2

0

Afficher le fichier

Fichier : nodemap.c Projet : SvenDowideit/clearlinux

int main(void)
{
	int i, k, w, ncpus;
	struct bitmask *cpus;
	int maxnode = numa_num_configured_nodes()-1;

	if (numa_available() < 0)  {
		printf("no numa\n");
		exit(1);
	}
	cpus = numa_allocate_cpumask();
	ncpus = cpus->size;

	for (i = 0; i <= maxnode ; i++) {
		if (numa_node_to_cpus(i, cpus) < 0) {
			printf("node %d failed to convert\n",i); 
		}		
		printf("%d: ", i); 
		w = 0;
		for (k = 0; k < ncpus; k++)
			if (numa_bitmask_isbitset(cpus, k))
				printf(" %s%d", w>0?",":"", k);
		putchar('\n');		
	}
	return 0;
}

Exemple #3

0

Afficher le fichier

Fichier : xio_if_numa_cpus.c Projet : SUSE/accelio

/*---------------------------------------------------------------------------*/
static int numa_node_to_cpusmask(int node, uint64_t *cpusmask, int *nr)
{
	struct bitmask *mask;
	uint64_t	bmask = 0;
	int		retval = -1;
	unsigned int	i;

	mask = numa_allocate_cpumask();
	retval = numa_node_to_cpus(node, mask);
	if (retval < 0)
		goto cleanup;

	*nr = 0;
	for (i = 0; i < mask->size && i < 64; i++) {
		if (numa_bitmask_isbitset(mask, i)) {
			cpusmask_set_bit(i, &bmask);
			(*nr)++;
		}
	}

	retval = 0;
cleanup:
	*cpusmask = bmask;

	numa_free_cpumask(mask);
	return retval;
}

Exemple #4

0

Afficher le fichier

Fichier : pin.c Projet : BLepers/PinThreads

int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) {
    int core;
    int ret;
    cpu_set_t mask;
    CPU_ZERO(&mask);

    ret = old_pthread_create(thread, attr, start_routine, arg);

    if(!get_shm()->active)
        return ret;

    core = get_next_core();

    if(!get_shm()->per_node) {
        CPU_SET(core, &mask);
    } else {
        int i, node = numa_node_of_cpu(core);
        struct bitmask * bmp = numa_allocate_cpumask();
        numa_node_to_cpus(node, bmp);
        for(i = 0; i < numa_num_configured_cpus(); i++) {
            if(numa_bitmask_isbitset(bmp, i))
                CPU_SET(i, &mask);
        }
        numa_free_cpumask(bmp);
    }
    old_pthread_setaffinity_np(*thread, sizeof(mask), &mask);

    VERBOSE("-> Set affinity to %d\n", core);

    return ret;
}

Exemple #5

0

Afficher le fichier

Fichier : virnuma.c Projet : AGSaidi/hacked-libvirt

int
virNumaGetNodeCPUs(int node,
                   virBitmapPtr *cpus)
{
    unsigned long *mask = NULL;
    unsigned long *allonesmask = NULL;
    virBitmapPtr cpumap = NULL;
    int ncpus = 0;
    int max_n_cpus = virNumaGetMaxCPUs();
    int mask_n_bytes = max_n_cpus / 8;
    size_t i;
    int ret = -1;

    *cpus = NULL;

    if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof(*mask)) < 0)
        goto cleanup;

    if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof(*mask)) < 0)
        goto cleanup;

    memset(allonesmask, 0xff, mask_n_bytes);

    /* The first time this returns -1, ENOENT if node doesn't exist... */
    if (numa_node_to_cpus(node, mask, mask_n_bytes) < 0) {
        VIR_WARN("NUMA topology for cell %d is not available, ignoring", node);
        ret = -2;
        goto cleanup;
    }

    /* second, third... times it returns an all-1's mask */
    if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
        VIR_DEBUG("NUMA topology for cell %d is invalid, ignoring", node);
        ret = -2;
        goto cleanup;
    }

    if (!(cpumap = virBitmapNew(max_n_cpus)))
        goto cleanup;

    for (i = 0; i < max_n_cpus; i++) {
        if (MASK_CPU_ISSET(mask, i)) {
            ignore_value(virBitmapSetBit(cpumap, i));
            ncpus++;
        }
    }

    *cpus = cpumap;
    cpumap = NULL;
    ret = ncpus;

cleanup:
    VIR_FREE(mask);
    VIR_FREE(allonesmask);
    virBitmapFree(cpumap);

    return ret;
}

Exemple #6

0

Afficher le fichier

Fichier : NumaNative.c Projet : ZYJ-SH-CN/jnuma

JNIEXPORT void JNICALL Java_xerial_jnuma_NumaNative_nodeToCpus
(JNIEnv *env, jobject obj, jint node, jlongArray array) {

    unsigned long* buf = (unsigned long*) (*env)->GetPrimitiveArrayCritical(env, (jarray) array, 0);
    int len = (int) (*env)->GetArrayLength(env, array);
    int ret = numa_node_to_cpus((int) node, buf, len * 8);
    (*env)->ReleasePrimitiveArrayCritical(env, (jarray) array, buf, 0);
    if(ret != 0)
        throwException(env, obj, errno);
}

Exemple #7

0

Afficher le fichier

Fichier : bar-bench.c Projet : libsmelt/libsmelt

/**
 * \brief get a array of cores with a ceartain placement
 */
static coreid_t* placement(uint32_t n, bool do_fill) 
{
    coreid_t* result = malloc(sizeof(coreid_t)*n);
    uint32_t numa_nodes = numa_max_node()+1;
    uint32_t num_cores = numa_num_configured_cpus();
    struct bitmask* nodes[numa_nodes];

    for (int i = 0; i < numa_nodes; i++) {
        nodes[i] = numa_allocate_cpumask();
        numa_node_to_cpus(i, nodes[i]);
    }

    int num_taken = 0;
    if (numa_available() == 0) {
        if (do_fill) {
            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        result[num_taken] = j;
                        num_taken++;
                    }  
 
                    if (num_taken == n) {
                        return result;                       
                    }
                }
           }
        } else {
            uint8_t ith_of_node = 0;
            // go through numa nodes
            for (int i = 0; i < numa_nodes; i++) {
                // go through cores and see if part of numa node
                for (int j = 0; j < num_cores; j++) {
                    // take the ith core of the node
                    if (numa_bitmask_isbitset(nodes[i], j)){
                        int index = i+ith_of_node*numa_nodes;
                        if (index < n) {
                            result[i+ith_of_node*numa_nodes] = j;
                            num_taken++;
                            ith_of_node++;
                        }
                    }
                    if (num_taken == n) {
                        return result;
                    }
                }
                ith_of_node = 0;
            }
        }
    } else {
        printf("Libnuma not available \n");
        return NULL;
    }
    return NULL;
}

Exemple #8

0

Afficher le fichier

Fichier : memkind_regular.c Projet : krzycz/memkind

static void regular_nodes_init(void)
{
    int i, node = 0, nodes_num = numa_num_configured_nodes();
    struct bitmask *node_cpus = numa_allocate_cpumask();

    regular_nodes_mask = numa_allocate_nodemask();

    for (i = 0; i < nodes_num; i++) {
        numa_node_to_cpus(node, node_cpus);
        if (numa_bitmask_weight(node_cpus))
            numa_bitmask_setbit(regular_nodes_mask, i);
    }
    numa_bitmask_free(node_cpus);
}

Exemple #9

0

Afficher le fichier

Fichier : numactl.c Projet : backyes/10G-TRACE-REPLAY

void print_node_cpus(int node)
{
	int i, err;
	struct bitmask *cpus;

	cpus = numa_allocate_cpumask();
	err = numa_node_to_cpus(node, cpus);
	if (err >= 0) {
		for (i = 0; i < cpus->size; i++)
			if (numa_bitmask_isbitset(cpus, i))
				printf(" %d", i);
	}
	putchar('\n');
}

Exemple #10

0

Afficher le fichier

Fichier : memkind_hbw.c Projet : francois-wellenreiter/memkind

///This function tries to fill bandwidth array based on knowledge about known CPU models
static int fill_bandwidth_values_heuristically(int* bandwidth, int bandwidth_len)
{
    int ret = MEMKIND_ERROR_UNAVAILABLE; // Default error returned if heuristic aproach fails
    int i, nodes_num, memory_only_nodes_num = 0;
    struct bitmask *memory_only_nodes, *node_cpus;

    if (is_cpu_xeon_phi_x200() == 0) {
        log_info("Known CPU model detected: Intel(R) Xeon Phi(TM) x200.");
        nodes_num = numa_num_configured_nodes();

        // Check if number of numa-nodes meets expectations for
        // supported configurations of Intel Xeon Phi x200
        if( nodes_num != 2 && nodes_num != 4 && nodes_num!= 8 ) {
            return ret;
        }

        memory_only_nodes = numa_allocate_nodemask();
        node_cpus = numa_allocate_cpumask();

        for(i=0; i<nodes_num; i++) {
            numa_node_to_cpus(i, node_cpus);
            if(numa_bitmask_weight(node_cpus) == 0) {
                memory_only_nodes_num++;
                numa_bitmask_setbit(memory_only_nodes, i);
            }
        }

        // Check if number of memory-only nodes is equal number of memory+cpu nodes
        // If it passes change ret to 0 (success) and fill bw table
        if ( memory_only_nodes_num == (nodes_num - memory_only_nodes_num) ) {

            ret = 0;
            assign_arbitrary_bandwidth_values(bandwidth, bandwidth_len, memory_only_nodes);
        }

        numa_bitmask_free(memory_only_nodes);
        numa_bitmask_free(node_cpus);
    }

    return ret;
}

Exemple #11

0

Afficher le fichier

Fichier : task_cray.c Projet : dinesh121991/Backup-M2R-Intern-Bull-Slurm-Codes

static int _get_cpu_masks(int num_numa_nodes, int32_t *numa_array,
			  cpu_set_t **cpuMasks) {

	struct bitmask **remaining_numa_node_cpus = NULL, *collective;
	unsigned long **numa_node_cpus = NULL;
	int i, j, at_least_one_cpu = 0, rc = 0;
	cpu_set_t *cpusetptr;
	char *bitmask_str = NULL;

	if (numa_available()) {
		CRAY_ERR("Libnuma not available");
		return -1;
	}

	/*
	 * numa_node_cpus: The CPUs available to the NUMA node.
	 * numa_all_cpus_ptr: all CPUs on which the calling task may execute.
	 * remaining_numa_node_cpus: Bitwise-AND of the above two to get all of
	 *                           the CPUs that the task can run on in this
	 *                           NUMA node.
	 * collective: Collects all of the CPUs as a precaution.
	 */
	remaining_numa_node_cpus = xmalloc(num_numa_nodes *
					   sizeof(struct bitmask *));
	collective = numa_allocate_cpumask();
	numa_node_cpus = xmalloc(num_numa_nodes * sizeof(unsigned long*));
	for (i = 0; i < num_numa_nodes; i++) {
		remaining_numa_node_cpus[i] = numa_allocate_cpumask();
		numa_node_cpus[i] = xmalloc(sizeof(unsigned long) *
					    NUM_INTS_TO_HOLD_ALL_CPUS);
		rc = numa_node_to_cpus(numa_array[i], numa_node_cpus[i],
				       NUM_INTS_TO_HOLD_ALL_CPUS);
		if (rc) {
			CRAY_ERR("numa_node_to_cpus failed: Return code %d",
				 rc);
		}
		for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
			(remaining_numa_node_cpus[i]->maskp[j]) =
				(numa_node_cpus[i][j]) &
				(numa_all_cpus_ptr->maskp[j]);
			collective->maskp[j] |=
				(remaining_numa_node_cpus[i]->maskp[j]);
		}
	}

	/*
	 * Ensure that we have not masked off all of the CPUs.
	 * If we have, just re-enable them all.  Better to clear them all than
	 * none of them.
	 */
	for (j = 0; j < collective->size; j++) {
		if (numa_bitmask_isbitset(collective, j)) {
			at_least_one_cpu = 1;
		}
	}

	if (!at_least_one_cpu) {
		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j <
				     (remaining_numa_node_cpus[i]->size /
				      (sizeof(unsigned long) * 8));
			     j++) {
				(remaining_numa_node_cpus[i]->maskp[j]) =
					(numa_all_cpus_ptr->maskp[j]);
			}
		}
	}

	if (debug_flags & DEBUG_FLAG_TASK) {
		bitmask_str = NULL;
		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
				xstrfmtcat(bitmask_str, "%6lx ",
					   numa_node_cpus[i][j]);
			}
		}
		info("%sBitmask: Allowed CPUs for NUMA Node", bitmask_str);
		xfree(bitmask_str);
		bitmask_str = NULL;

		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
				xstrfmtcat(bitmask_str, "%6lx ",
					  numa_all_cpus_ptr->maskp[j]);
			}
		}
		info("%sBitmask: Allowed CPUs for cpuset", bitmask_str);
		xfree(bitmask_str);
		bitmask_str = NULL;

		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
				xstrfmtcat(bitmask_str, "%6lx ",
					   remaining_numa_node_cpus[i]->
					   maskp[j]);
			}
		}
		info("%sBitmask: Allowed CPUs between cpuset and NUMA Node",
		     bitmask_str);
		xfree(bitmask_str);
	}


	// Convert bitmasks to cpu_set_t types
	cpusetptr = xmalloc(num_numa_nodes * sizeof(cpu_set_t));

	for (i = 0; i < num_numa_nodes; i++) {
		CPU_ZERO(&cpusetptr[i]);
		for (j = 0; j < remaining_numa_node_cpus[i]->size; j++) {
			if (numa_bitmask_isbitset(remaining_numa_node_cpus[i],
						  j)) {
				CPU_SET(j, &cpusetptr[i]);
			}
		}
		if (debug_flags & DEBUG_FLAG_TASK) {
			info("CPU_COUNT() of set: %d",
			     CPU_COUNT(&cpusetptr[i]));
		}
	}

	*cpuMasks = cpusetptr;

	// Freeing Everything
	numa_free_cpumask(collective);
	for (i = 0; i < num_numa_nodes; i++) {
		xfree(numa_node_cpus[i]);
		numa_free_cpumask(remaining_numa_node_cpus[i]);
	}
	xfree(numa_node_cpus);
	xfree(numa_node_cpus);
	xfree(remaining_numa_node_cpus);

	return 0;
}

Exemple #12

0

Afficher le fichier

Fichier : nodeinfo.c Projet : amery/libvirt-vserver

int
nodeCapsInitNUMA(virCapsPtr caps)
{
    int n;
    unsigned long *mask = NULL;
    unsigned long *allonesmask = NULL;
    int *cpus = NULL;
    int ret = -1;
    int max_n_cpus = NUMA_MAX_N_CPUS;

    if (numa_available() < 0)
        return 0;

    int mask_n_bytes = max_n_cpus / 8;
    if (VIR_ALLOC_N(mask, mask_n_bytes / sizeof *mask) < 0)
        goto cleanup;
    if (VIR_ALLOC_N(allonesmask, mask_n_bytes / sizeof *mask) < 0)
        goto cleanup;
    memset(allonesmask, 0xff, mask_n_bytes);

    for (n = 0 ; n <= numa_max_node() ; n++) {
        int i;
        int ncpus;
        /* The first time this returns -1, ENOENT if node doesn't exist... */
        if (numa_node_to_cpus(n, mask, mask_n_bytes) < 0) {
            VIR_WARN("NUMA topology for cell %d of %d not available, ignoring",
                     n, numa_max_node()+1);
            continue;
        }
        /* second, third... times it returns an all-1's mask */
        if (memcmp(mask, allonesmask, mask_n_bytes) == 0) {
            VIR_DEBUG("NUMA topology for cell %d of %d is all ones, ignoring",
                      n, numa_max_node()+1);
            continue;
        }

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                ncpus++;

        if (VIR_ALLOC_N(cpus, ncpus) < 0)
            goto cleanup;

        for (ncpus = 0, i = 0 ; i < max_n_cpus ; i++)
            if (MASK_CPU_ISSET(mask, i))
                cpus[ncpus++] = i;

        if (virCapabilitiesAddHostNUMACell(caps,
                                           n,
                                           ncpus,
                                           cpus) < 0)
            goto cleanup;

        VIR_FREE(cpus);
    }

    ret = 0;

cleanup:
    VIR_FREE(cpus);
    VIR_FREE(mask);
    VIR_FREE(allonesmask);
    return ret;
}

Exemple #13

0

Afficher le fichier

Fichier : topology.c Projet : harveson/nvmalloc

int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual_topology_t** virtual_topologyp)
{
    char* mc_pci_file;
    char* str;
    char* saveptr;
    char* token = "NULL";
    int* physical_node_ids;
    physical_node_t** physical_nodes;
    int num_physical_nodes;
    int n, v, i, j, sibling_idx, node_i_idx;
    int node_id;
    physical_node_t* node_i, *node_j, *sibling_node;
    int ret;
    int min_distance;
    int hyperthreading;
    struct bitmask* mem_nodes;
    virtual_topology_t* virtual_topology;

    __cconfig_lookup_string(cfg, "topology.physical_nodes", &str);

    // parse the physical nodes string
    physical_node_ids = calloc(numa_num_possible_nodes(), sizeof(*physical_node_ids));
    num_physical_nodes = 0;
    while (token = strtok_r(str, ",", &saveptr)) {
        physical_node_ids[num_physical_nodes] = atoi(token);
        str = NULL;
        if (++num_physical_nodes > numa_num_possible_nodes()) {
            // we re being asked to run on more nodes than available
            free(physical_node_ids);
            ret = E_ERROR;
            goto done;
        }
    }
    physical_nodes = calloc(num_physical_nodes, sizeof(*physical_nodes));

    // select those nodes we can run on (e.g. not constrained by any numactl)
    mem_nodes = numa_get_mems_allowed();
    for (i=0, n=0; i<num_physical_nodes; i++) {
        node_id = physical_node_ids[i];
        if (numa_bitmask_isbitset(mem_nodes, node_id)) {
            physical_nodes[n] = malloc(sizeof(**physical_nodes));
            physical_nodes[n]->node_id = node_id;
            // TODO: what if we want to avoid using only a single hardware contexts of a hyperthreaded core?
            physical_nodes[n]->cpu_bitmask = numa_allocate_cpumask();
            numa_node_to_cpus(node_id, physical_nodes[n]->cpu_bitmask);
            __cconfig_lookup_bool(cfg, "topology.hyperthreading", &hyperthreading);
            if (hyperthreading) {
                physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask);
            } else {
                DBG_LOG(INFO, "Not using hyperthreading.\n");
                // disable the upper half of the processors in the bitmask
                physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask) / 2;
                int fc = first_cpu(physical_nodes[n]->cpu_bitmask);
                for (j=fc+system_num_cpus()/2; j<fc+system_num_cpus()/2+physical_nodes[n]->num_cpus; j++) {
                    if (numa_bitmask_isbitset(physical_nodes[n]->cpu_bitmask, j)) {
                        numa_bitmask_clearbit(physical_nodes[n]->cpu_bitmask, j);
                    }
                }
            }
            n++;
        }
    }
    free(physical_node_ids);
    num_physical_nodes = n;

    // if pci bus topology of each physical node is not provided then discover it 
    if (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_FALSE || 
        (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_TRUE &&
         load_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes) != E_SUCCESS)) 
    {
        discover_mc_pci_topology(cpu_model, physical_nodes, num_physical_nodes);
        save_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes);
    }

    // form virtual nodes by grouping physical nodes that are close to each other
    virtual_topology = malloc(sizeof(*virtual_topology));
    virtual_topology->num_virtual_nodes = num_physical_nodes / 2 + num_physical_nodes % 2;
    virtual_topology->virtual_nodes = calloc(virtual_topology->num_virtual_nodes, 
                                             sizeof(*(virtual_topology->virtual_nodes)));
    
    for (i=0, v=0; i<num_physical_nodes; i++) {
        min_distance = INT_MAX;
        sibling_node = NULL;
        sibling_idx = -1;
        if ((node_i = physical_nodes[i]) == NULL) {
            continue;
        }
        for (j=i+1; j<num_physical_nodes; j++) {
            if ((node_j = physical_nodes[j]) == NULL) {
                continue;
            }
            if (numa_distance(node_i->node_id,node_j->node_id) < min_distance) {
                sibling_node = node_j;
                sibling_idx = j;
            }
        }
        if (sibling_node) {
            physical_nodes[i] = physical_nodes[sibling_idx] = NULL;
            virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v];
            virtual_node->dram_node = node_i;
            virtual_node->nvram_node = sibling_node;
            virtual_node->node_id = v;
            virtual_node->cpu_model = cpu_model;
            DBG_LOG(INFO, "Fusing physical nodes %d %d into virtual node %d\n", 
                    node_i->node_id, sibling_node->node_id, virtual_node->node_id);
            v++;
        }
    }

    // any physical node that is not paired with another physical node is 
    // formed into a virtual node on its own
    if (2*v < num_physical_nodes) {
        for (i=0; i<num_physical_nodes; i++) {
            node_i = physical_nodes[i];
            virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v];
            virtual_node->dram_node = virtual_node->nvram_node = node_i;
            virtual_node->node_id = v;
 
            DBG_LOG(WARNING, "Forming physical node %d into virtual node %d without a sibling node.\n",
                    node_i->node_id, virtual_node->node_id);
        }
    }

    *virtual_topologyp = virtual_topology;
    ret = E_SUCCESS;

done:
    free(physical_nodes);
    return ret;
}

Exemple #14

0

Afficher le fichier

Fichier : pinthreads.c Projet : BLepers/PinThreads

char * build_default_affinity_string (int shuffle) {
   int nr_nodes = numa_num_configured_nodes();
   int nr_cores = numa_num_configured_cpus();

   char * str;
   int str_size = 512;
   int str_written = 0;

   int i;

   struct bitmask ** bm = (struct bitmask**) malloc(sizeof(struct bitmask*) * nr_nodes);

   for (i = 0; i < nr_nodes; i++) {
      bm[i] = numa_allocate_cpumask();
      numa_node_to_cpus(i, bm[i]);
   }

   str = (char*) malloc(str_size * sizeof(char));
   assert(str);

   if(!shuffle) {
      for(i = 0; i < nr_nodes; i++) {
         int j;
         for(j = 0; j < nr_cores; j++) {
            if (numa_bitmask_isbitset(bm[i], j)) {
               add_core_to_str(&str, &str_size, &str_written, j);
            }
         }
      }
   }
   else {
      int next_node = 0;

      for(i = 0; i < nr_cores; i++) {
         int idx = (i / nr_nodes) + 1;
         int found = 0;
         int j = 0;

         do {
            if (numa_bitmask_isbitset(bm[next_node], j)) {
               found++;
            }

            if(found == idx){
               add_core_to_str(&str, &str_size, &str_written, j);
               break;
            }

            j = (j + 1) % nr_cores;
         } while (found != idx);

         next_node = (next_node + 1) % nr_nodes;
      }
   }

   if(str_written) {
      str[str_written - 1] = 0;
   }

   return str;
}

Exemple #15

0

Afficher le fichier

Fichier : numa-test.c Projet : hpc12/lec10-demo

int main(int argc, const char **argv)
{
  int num_cpus = numa_num_task_cpus();
  printf("num cpus: %d\n", num_cpus);

  printf("numa available: %d\n", numa_available());
  numa_set_localalloc();

  struct bitmask *bm = numa_bitmask_alloc(num_cpus);
  for (int i=0; i<=numa_max_node(); ++i)
  {
    numa_node_to_cpus(i, bm);
    printf("numa node %d ", i);
    print_bitmask(bm);
    printf(" - %g GiB\n", numa_node_size(i, 0) / (1024.*1024*1024.));
  }
  numa_bitmask_free(bm);

  puts("");

  char *x;
  const size_t cache_line_size = 64;
  const size_t array_size = 100*1000*1000;
  size_t ntrips = 2;

#pragma omp parallel
  {
    assert(omp_get_num_threads() == num_cpus);
    int tid = omp_get_thread_num();

    pin_to_core(tid);
    if(tid == 0)
      x = (char *) numa_alloc_local(array_size);

    // {{{ single access
#pragma omp barrier
    for (size_t i = 0; i<num_cpus; ++i)
    {
      if (tid == i)
      {
        double t = measure_access(x, array_size, ntrips);
        printf("sequential core %d -> core 0 : BW %g MB/s\n",
            i, array_size*ntrips*cache_line_size / t / 1e6);
      }
#pragma omp barrier
    }
    // }}}

    // {{{ everybody contends for one

    {
      if (tid == 0) puts("");

#pragma omp barrier
      double t = measure_access(x, array_size, ntrips);
#pragma omp barrier
      for (size_t i = 0; i<num_cpus; ++i)
      {
        if (tid == i)
          printf("all-contention core %d -> core 0 : BW %g MB/s\n",
              tid, array_size*ntrips*cache_line_size / t / 1e6);
#pragma omp barrier
      }
    }

    // }}}

    // {{{ zero and someone else contending

    if (tid == 0) puts("");

#pragma omp barrier
    for (size_t i = 1; i<num_cpus; ++i)
    {
      double t;
      if (tid == i || tid == 0)
        t = measure_access(x, array_size, ntrips);

#pragma omp barrier
      if (tid == 0)
      {
        printf("two-contention core %d -> core 0 : BW %g MB/s\n",
            tid, array_size*ntrips*cache_line_size / t / 1e6);
      }
#pragma omp barrier
      if (tid == i)
      {
        printf("two-contention core %d -> core 0 : BW %g MB/s\n\n",
            tid, array_size*ntrips*cache_line_size / t / 1e6);
      }
#pragma omp barrier
    }
  }
  numa_free(x, array_size);

  return 0;
}

Exemple #16

0

Afficher le fichier

Fichier : hbwmalloc.c Projet : jeffhammond/myhbwmalloc

void myhbwmalloc_init(void)
{
    /* set to NULL before trying to initialize.  if we return before
     * successful creation of the mspace, then it will still be NULL,
     * and we can use that in subsequent library calls to determine
     * that the library failed to initialize. */
    myhbwmalloc_mspace = NULL;

    /* verbose printout? */
    myhbwmalloc_verbose = 0;
    {
        char * env_char = getenv("HBWMALLOC_VERBOSE");
        if (env_char != NULL) {
            myhbwmalloc_verbose = 1;
            printf("hbwmalloc: HBWMALLOC_VERBOSE set\n");
        }
    }

    /* fail hard or soft? */
    myhbwmalloc_hardfail = 1;
    {
        char * env_char = getenv("HBWMALLOC_SOFTFAIL");
        if (env_char != NULL) {
            myhbwmalloc_hardfail = 0;
            printf("hbwmalloc: HBWMALLOC_SOFTFAIL set\n");
        }
    }

    /* set the atexit handler that will destroy the mspace and free the numa allocation */
    atexit(myhbwmalloc_final);

    /* detect and configure use of NUMA memory nodes */
    {
        int max_possible_node        = numa_max_possible_node();
        int num_possible_nodes       = numa_num_possible_nodes();
        int max_numa_nodes           = numa_max_node();
        int num_configured_nodes     = numa_num_configured_nodes();
        int num_configured_cpus      = numa_num_configured_cpus();
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_max_possible_node()    = %d\n", max_possible_node);
            printf("hbwmalloc: numa_num_possible_nodes()   = %d\n", num_possible_nodes);
            printf("hbwmalloc: numa_max_node()             = %d\n", max_numa_nodes);
            printf("hbwmalloc: numa_num_configured_nodes() = %d\n", num_configured_nodes);
            printf("hbwmalloc: numa_num_configured_cpus()  = %d\n", num_configured_cpus);
        }
        /* FIXME this is a hack.  assumes HBW is only numa node 1. */
        if (num_configured_nodes <= 2) {
            myhbwmalloc_numa_node = num_configured_nodes-1;
        } else {
            fprintf(stderr,"hbwmalloc: we support only 2 numa nodes, not %d\n", num_configured_nodes);
        }

        if (myhbwmalloc_verbose) {
            for (int i=0; i<num_configured_nodes; i++) {
                unsigned max_numa_cpus = numa_num_configured_cpus();
                struct bitmask * mask = numa_bitmask_alloc( max_numa_cpus );
                int rc = numa_node_to_cpus(i, mask);
                if (rc != 0) {
                    fprintf(stderr, "hbwmalloc: numa_node_to_cpus failed\n");
                } else {
                    printf("hbwmalloc: numa node %d cpu mask:", i);
                    for (unsigned j=0; j<max_numa_cpus; j++) {
                        int bit = numa_bitmask_isbitset(mask,j);
                        printf(" %d", bit);
                    }
                    printf("\n");
                }
                numa_bitmask_free(mask);
            }
            fflush(stdout);
        }
    }

#if 0 /* unused */
    /* see if the user specifies a slab size */
    size_t slab_size_requested = 0;
    {
        char * env_char = getenv("HBWMALLOC_BYTES");
        if (env_char!=NULL) {
            long units = 1L;
            if      ( NULL != strstr(env_char,"G") ) units = 1000000000L;
            else if ( NULL != strstr(env_char,"M") ) units = 1000000L;
            else if ( NULL != strstr(env_char,"K") ) units = 1000L;
            else                                     units = 1L;

            int num_count = strspn(env_char, "0123456789");
            memset( &env_char[num_count], ' ', strlen(env_char)-num_count);
            slab_size_requested = units * atol(env_char);
        }
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: requested slab_size_requested = %zu\n", slab_size_requested);
        }
    }
#endif

    /* see what libnuma says is available */
    size_t myhbwmalloc_slab_size;
    {
        int node = myhbwmalloc_numa_node;
        long long freemem;
        long long maxmem = numa_node_size64(node, &freemem);
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_node_size64 says maxmem=%lld freemem=%lld for numa node %d\n",
                    maxmem, freemem, node);
        }
        myhbwmalloc_slab_size = freemem;
    }

    /* assume threads, disable if MPI knows otherwise, then allow user to override. */
    int multithreaded = 1;
#ifdef HAVE_MPI
    int nprocs;
    {
        int is_init, is_final;
        MPI_Initialized(&is_init);
        MPI_Finalized(&is_final);
        if (is_init && !is_final) {
            MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
        }

        /* give equal portion to every MPI process */
        myhbwmalloc_slab_size /= nprocs;

        /* if the user initializes MPI with MPI_Init or
         * MPI_Init_thread(MPI_THREAD_SINGLE), they assert there
         * are no threads at all, which means we can skip the
         * malloc mspace lock.
         *
         * if the user lies to MPI, they deserve any bad thing
         * that comes of it. */
        int provided;
        MPI_Query_thread(&provided);
        if (provided==MPI_THREAD_SINGLE) {
            multithreaded = 0;
        } else {
            multithreaded = 1;
        }

        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: MPI processes = %d (threaded = %d)\n", nprocs, multithreaded);
            printf("hbwmalloc: myhbwmalloc_slab_size = %d\n", myhbwmalloc_slab_size);
        }
    }
#endif

    /* user can assert that hbwmalloc and friends need not be thread-safe */
    {
        char * env_char = getenv("HBWMALLOC_LOCKLESS");
        if (env_char != NULL) {
            multithreaded = 0;
            if (myhbwmalloc_verbose) {
                printf("hbwmalloc: user has disabled locking in mspaces by setting HBWMALLOC_LOCKLESS\n");
            }
        }
    }

    myhbwmalloc_slab = numa_alloc_onnode( myhbwmalloc_slab_size, myhbwmalloc_numa_node);
    if (myhbwmalloc_slab==NULL) {
        fprintf(stderr, "hbwmalloc: numa_alloc_onnode returned NULL for size = %zu\n", myhbwmalloc_slab_size);
        return;
    } else {
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_alloc_onnode succeeded for size %zu\n", myhbwmalloc_slab_size);
        }

        /* part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping,
         * so the capacity must be at least this large */
        if (myhbwmalloc_slab_size < 128*sizeof(size_t)) {
            fprintf(stderr, "hbwmalloc: not enough space for mspace bookkeeping\n");
            return;
        }

        /* see above regarding if the user lies to MPI. */
        int locked = multithreaded;
        myhbwmalloc_mspace = create_mspace_with_base( myhbwmalloc_slab, myhbwmalloc_slab_size, locked);
        if (myhbwmalloc_mspace == NULL) {
            fprintf(stderr, "hbwmalloc: create_mspace_with_base returned NULL\n");
            return;
        } else if (myhbwmalloc_verbose) {
            printf("hbwmalloc: create_mspace_with_base succeeded for size %zu\n", myhbwmalloc_slab_size);
        }
    }
}

Exemple #17

0

Afficher le fichier

Fichier : parse_cores.c Projet : libsmelt/libsmelt

static uint32_t* placement(uint32_t n, bool do_fill, bool hyper)
{
    uint32_t* result = (uint32_t*) malloc(sizeof(uint32_t)*n);
    uint32_t numa_nodes = numa_max_node()+1;
    uint32_t num_cores = 0;
    if (hyper) {
        num_cores = numa_num_configured_cpus()/2;
    } else {
        num_cores = numa_num_configured_cpus();
    }
    struct bitmask* nodes[numa_nodes];

    for (int i = 0; i < numa_nodes; i++) {
        nodes[i] = numa_allocate_cpumask();
        numa_node_to_cpus(i, nodes[i]);
    }

    int num_taken = 0;
    if (numa_available() == 0) {
        if (do_fill) {
            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        result[num_taken] = j;
                        num_taken++;
                    }

                    if (num_taken == n) {
                        return result;
                    }
                }
           }
        } else {
            int cores_per_node = n/numa_nodes;
            int rest = n - (cores_per_node*numa_nodes);
            int taken_per_node = 0;

            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        if (taken_per_node == cores_per_node) {
                            if (rest > 0) {
                                result[num_taken] = j;
                                num_taken++;
                                rest--;
                                if (num_taken == n) {
                                    return result;
                                }
                            }
                            break;
                        }
                        result[num_taken] = j;
                        num_taken++;
                        taken_per_node++;

                        if (num_taken == n) {
                            return result;
                        }
                    }
                }
                taken_per_node = 0;
            }
        }
    } else {
        printf("Libnuma not available \n");
        return NULL;
    }
    return NULL;
}

Exemple #18

0

Afficher le fichier

Fichier : ab-bench-scale.c Projet : libsmelt/libsmelt

/**
 * @brief Returns an array of cores of size req_cores choosen
 *     round-robin from NUMA nodes in batches of req_step.
 *
 * @param req_step The step with - how many cores should be picked
 *     from each NUMA node in each iteration. Use a negative value
 *     for a "fill"-strategy, where NUMA nodes are completely filled
 *     before moving on to the next one.
 */
void placement(size_t req_cores, size_t req_step, coreid_t *cores)
{
    // For convenience, allows to lookup 2*n for n in 0..n/2
    if (req_step==0)
        req_step=1;

    size_t max_node = numa_max_node();
    size_t num_cores = numa_num_configured_cpus();
    size_t cores_per_node = num_cores/(max_node+1);

    printf("req_cores: %zu\n", req_cores);
    printf("req_step: %zu\n", req_step);
    printf("cores / NUMA node: %zu\n", cores_per_node);
    printf("max_node: %zu\n", max_node);

    size_t num_selected = 0;
    size_t curr_numa_idx = 0;

    // How many nodes to choose from each NUMA node
    size_t choose_per_node[max_node+1];
    memset(choose_per_node, 0, sizeof(size_t)*(max_node+1));

    // Step 1:
    // Figure out how many cores to choose from each node

    while (num_selected<req_cores) {

        // Determine number of cores of that node

        // How many cores should be choosen in this step?
        // At max req_step
        size_t num_choose = min(min(req_step, req_cores-num_selected),
                                cores_per_node-choose_per_node[curr_numa_idx]);

        // Increment counter indicating how many to choose from this node
        choose_per_node[curr_numa_idx] += num_choose;
        num_selected += num_choose;

        // Move on to the next NUMA node
        curr_numa_idx = (curr_numa_idx + 1) % (max_node+1);
    }

    // Step 2:
    // Get the cores from each NUMA node
    //
    // hyperthreads? -> should have higher core IDs, and hence picked in
    // the end.

    struct bitmask *mask = numa_allocate_cpumask();

    size_t idx = 0;

    for (size_t i=0; i<=max_node; i++) {

        dbg_printf("node %2zu choosing %2zu\n", i, choose_per_node[i]);

        // Determine which cores are on node i
        numa_node_to_cpus(i, mask);

        size_t choosen = 0;

        for (coreid_t p=0; p<num_cores && choosen<choose_per_node[i]; p++) {

            // Is processor p on node i
            if (numa_bitmask_isbitset(mask, p)) {

                cores[idx++] = p;
                choosen++;

                dbg_printf("Choosing %" PRIuCOREID " on node %zu\n", p, i);
            }
        }
    }

    assert (idx == req_cores);

}