Exemplo n.º 1
0
int main(void)
{
	int i, k, w, ncpus;
	struct bitmask *cpus;
	int maxnode = numa_num_configured_nodes()-1;

	if (numa_available() < 0)  {
		printf("no numa\n");
		exit(1);
	}
	cpus = numa_allocate_cpumask();
	ncpus = cpus->size;

	for (i = 0; i <= maxnode ; i++) {
		if (numa_node_to_cpus(i, cpus) < 0) {
			printf("node %d failed to convert\n",i); 
		}		
		printf("%d: ", i); 
		w = 0;
		for (k = 0; k < ncpus; k++)
			if (numa_bitmask_isbitset(cpus, k))
				printf(" %s%d", w>0?",":"", k);
		putchar('\n');		
	}
	return 0;
}
Exemplo n.º 2
0
void backend_set_numa(unsigned id)
{
    struct bitmask *bm = numa_allocate_cpumask();
    numa_bitmask_setbit(bm, id);
    numa_sched_setaffinity(0, bm);
    numa_free_cpumask(bm);
}
Exemplo n.º 3
0
CPU_Set_t *
CPU_GetPossible(void)
{
        CPU_Set_t *cs = numa_allocate_cpumask();
        copy_bitmask_to_bitmask(numa_all_cpus_ptr, cs);
        return cs;
}
Exemplo n.º 4
0
int pthread_create(pthread_t *thread, const pthread_attr_t *attr, void *(*start_routine) (void *), void *arg) {
    int core;
    int ret;
    cpu_set_t mask;
    CPU_ZERO(&mask);

    ret = old_pthread_create(thread, attr, start_routine, arg);

    if(!get_shm()->active)
        return ret;

    core = get_next_core();

    if(!get_shm()->per_node) {
        CPU_SET(core, &mask);
    } else {
        int i, node = numa_node_of_cpu(core);
        struct bitmask * bmp = numa_allocate_cpumask();
        numa_node_to_cpus(node, bmp);
        for(i = 0; i < numa_num_configured_cpus(); i++) {
            if(numa_bitmask_isbitset(bmp, i))
                CPU_SET(i, &mask);
        }
        numa_free_cpumask(bmp);
    }
    old_pthread_setaffinity_np(*thread, sizeof(mask), &mask);

    VERBOSE("-> Set affinity to %d\n", core);

    return ret;
}
Exemplo n.º 5
0
/*---------------------------------------------------------------------------*/
static int numa_node_to_cpusmask(int node, uint64_t *cpusmask, int *nr)
{
	struct bitmask *mask;
	uint64_t	bmask = 0;
	int		retval = -1;
	unsigned int	i;

	mask = numa_allocate_cpumask();
	retval = numa_node_to_cpus(node, mask);
	if (retval < 0)
		goto cleanup;

	*nr = 0;
	for (i = 0; i < mask->size && i < 64; i++) {
		if (numa_bitmask_isbitset(mask, i)) {
			cpusmask_set_bit(i, &bmask);
			(*nr)++;
		}
	}

	retval = 0;
cleanup:
	*cpusmask = bmask;

	numa_free_cpumask(mask);
	return retval;
}
Exemplo n.º 6
0
int bind_cpu(int cpu) {
	struct bitmask *nodemask =  numa_parse_nodestring("0");
	struct bitmask *cpumask = numa_allocate_cpumask();

	numa_bind(nodemask);
	cpumask = numa_bitmask_clearall(cpumask);
	return numa_sched_setaffinity( getpid(), numa_bitmask_setbit(cpumask, cpu-1) );
}
Exemplo n.º 7
0
CPU_Set_t *
CPU_GetAffinity(void)
{
        struct bitmask *cs = numa_allocate_cpumask();
        int res = numa_sched_getaffinity(0, cs);
        if (res < 0)
                epanic("numa_sched_getaffinity");
        return cs;
}
Exemplo n.º 8
0
void
CPU_Bind(int cpu)
{
        CPU_Set_t *cs = numa_allocate_cpumask();
        numa_bitmask_setbit(cs, cpu);
        int res = numa_sched_setaffinity(0, cs);
        if (res < 0)
                epanic("bindToCPU(%d)", cpu);
        CPU_FreeSet(cs);
}
Exemplo n.º 9
0
/**
 * \brief get a array of cores with a ceartain placement
 */
static coreid_t* placement(uint32_t n, bool do_fill) 
{
    coreid_t* result = malloc(sizeof(coreid_t)*n);
    uint32_t numa_nodes = numa_max_node()+1;
    uint32_t num_cores = numa_num_configured_cpus();
    struct bitmask* nodes[numa_nodes];

    for (int i = 0; i < numa_nodes; i++) {
        nodes[i] = numa_allocate_cpumask();
        numa_node_to_cpus(i, nodes[i]);
    }

    int num_taken = 0;
    if (numa_available() == 0) {
        if (do_fill) {
            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        result[num_taken] = j;
                        num_taken++;
                    }  
 
                    if (num_taken == n) {
                        return result;                       
                    }
                }
           }
        } else {
            uint8_t ith_of_node = 0;
            // go through numa nodes
            for (int i = 0; i < numa_nodes; i++) {
                // go through cores and see if part of numa node
                for (int j = 0; j < num_cores; j++) {
                    // take the ith core of the node
                    if (numa_bitmask_isbitset(nodes[i], j)){
                        int index = i+ith_of_node*numa_nodes;
                        if (index < n) {
                            result[i+ith_of_node*numa_nodes] = j;
                            num_taken++;
                            ith_of_node++;
                        }
                    }
                    if (num_taken == n) {
                        return result;
                    }
                }
                ith_of_node = 0;
            }
        }
    } else {
        printf("Libnuma not available \n");
        return NULL;
    }
    return NULL;
}
Exemplo n.º 10
0
static void regular_nodes_init(void)
{
    int i, node = 0, nodes_num = numa_num_configured_nodes();
    struct bitmask *node_cpus = numa_allocate_cpumask();

    regular_nodes_mask = numa_allocate_nodemask();

    for (i = 0; i < nodes_num; i++) {
        numa_node_to_cpus(node, node_cpus);
        if (numa_bitmask_weight(node_cpus))
            numa_bitmask_setbit(regular_nodes_mask, i);
    }
    numa_bitmask_free(node_cpus);
}
Exemplo n.º 11
0
void print_node_cpus(int node)
{
	int i, err;
	struct bitmask *cpus;

	cpus = numa_allocate_cpumask();
	err = numa_node_to_cpus(node, cpus);
	if (err >= 0) {
		for (i = 0; i < cpus->size; i++)
			if (numa_bitmask_isbitset(cpus, i))
				printf(" %d", i);
	}
	putchar('\n');
}
Exemplo n.º 12
0
///This function tries to fill bandwidth array based on knowledge about known CPU models
static int fill_bandwidth_values_heuristically(int* bandwidth, int bandwidth_len)
{
    int ret = MEMKIND_ERROR_UNAVAILABLE; // Default error returned if heuristic aproach fails
    int i, nodes_num, memory_only_nodes_num = 0;
    struct bitmask *memory_only_nodes, *node_cpus;

    if (is_cpu_xeon_phi_x200() == 0) {
        log_info("Known CPU model detected: Intel(R) Xeon Phi(TM) x200.");
        nodes_num = numa_num_configured_nodes();

        // Check if number of numa-nodes meets expectations for
        // supported configurations of Intel Xeon Phi x200
        if( nodes_num != 2 && nodes_num != 4 && nodes_num!= 8 ) {
            return ret;
        }

        memory_only_nodes = numa_allocate_nodemask();
        node_cpus = numa_allocate_cpumask();

        for(i=0; i<nodes_num; i++) {
            numa_node_to_cpus(i, node_cpus);
            if(numa_bitmask_weight(node_cpus) == 0) {
                memory_only_nodes_num++;
                numa_bitmask_setbit(memory_only_nodes, i);
            }
        }

        // Check if number of memory-only nodes is equal number of memory+cpu nodes
        // If it passes change ret to 0 (success) and fill bw table
        if ( memory_only_nodes_num == (nodes_num - memory_only_nodes_num) ) {

            ret = 0;
            assign_arbitrary_bandwidth_values(bandwidth, bandwidth_len, memory_only_nodes);
        }

        numa_bitmask_free(memory_only_nodes);
        numa_bitmask_free(node_cpus);
    }

    return ret;
}
Exemplo n.º 13
0
/**
 * @brief Returns an array of cores of size req_cores choosen
 *     round-robin from NUMA nodes in batches of req_step.
 *
 * @param req_step The step with - how many cores should be picked
 *     from each NUMA node in each iteration. Use a negative value
 *     for a "fill"-strategy, where NUMA nodes are completely filled
 *     before moving on to the next one.
 */
void placement(size_t req_cores, size_t req_step, coreid_t *cores)
{
    // For convenience, allows to lookup 2*n for n in 0..n/2
    if (req_step==0)
        req_step=1;

    size_t max_node = numa_max_node();
    size_t num_cores = numa_num_configured_cpus();
    size_t cores_per_node = num_cores/(max_node+1);

    printf("req_cores: %zu\n", req_cores);
    printf("req_step: %zu\n", req_step);
    printf("cores / NUMA node: %zu\n", cores_per_node);
    printf("max_node: %zu\n", max_node);

    size_t num_selected = 0;
    size_t curr_numa_idx = 0;

    // How many nodes to choose from each NUMA node
    size_t choose_per_node[max_node+1];
    memset(choose_per_node, 0, sizeof(size_t)*(max_node+1));

    // Step 1:
    // Figure out how many cores to choose from each node

    while (num_selected<req_cores) {

        // Determine number of cores of that node

        // How many cores should be choosen in this step?
        // At max req_step
        size_t num_choose = min(min(req_step, req_cores-num_selected),
                                cores_per_node-choose_per_node[curr_numa_idx]);

        // Increment counter indicating how many to choose from this node
        choose_per_node[curr_numa_idx] += num_choose;
        num_selected += num_choose;

        // Move on to the next NUMA node
        curr_numa_idx = (curr_numa_idx + 1) % (max_node+1);
    }

    // Step 2:
    // Get the cores from each NUMA node
    //
    // hyperthreads? -> should have higher core IDs, and hence picked in
    // the end.

    struct bitmask *mask = numa_allocate_cpumask();

    size_t idx = 0;

    for (size_t i=0; i<=max_node; i++) {

        dbg_printf("node %2zu choosing %2zu\n", i, choose_per_node[i]);

        // Determine which cores are on node i
        numa_node_to_cpus(i, mask);

        size_t choosen = 0;

        for (coreid_t p=0; p<num_cores && choosen<choose_per_node[i]; p++) {

            // Is processor p on node i
            if (numa_bitmask_isbitset(mask, p)) {

                cores[idx++] = p;
                choosen++;

                dbg_printf("Choosing %" PRIuCOREID " on node %zu\n", p, i);
            }
        }
    }

    assert (idx == req_cores);

}
static int _get_cpu_masks(int num_numa_nodes, int32_t *numa_array,
			  cpu_set_t **cpuMasks) {

	struct bitmask **remaining_numa_node_cpus = NULL, *collective;
	unsigned long **numa_node_cpus = NULL;
	int i, j, at_least_one_cpu = 0, rc = 0;
	cpu_set_t *cpusetptr;
	char *bitmask_str = NULL;

	if (numa_available()) {
		CRAY_ERR("Libnuma not available");
		return -1;
	}

	/*
	 * numa_node_cpus: The CPUs available to the NUMA node.
	 * numa_all_cpus_ptr: all CPUs on which the calling task may execute.
	 * remaining_numa_node_cpus: Bitwise-AND of the above two to get all of
	 *                           the CPUs that the task can run on in this
	 *                           NUMA node.
	 * collective: Collects all of the CPUs as a precaution.
	 */
	remaining_numa_node_cpus = xmalloc(num_numa_nodes *
					   sizeof(struct bitmask *));
	collective = numa_allocate_cpumask();
	numa_node_cpus = xmalloc(num_numa_nodes * sizeof(unsigned long*));
	for (i = 0; i < num_numa_nodes; i++) {
		remaining_numa_node_cpus[i] = numa_allocate_cpumask();
		numa_node_cpus[i] = xmalloc(sizeof(unsigned long) *
					    NUM_INTS_TO_HOLD_ALL_CPUS);
		rc = numa_node_to_cpus(numa_array[i], numa_node_cpus[i],
				       NUM_INTS_TO_HOLD_ALL_CPUS);
		if (rc) {
			CRAY_ERR("numa_node_to_cpus failed: Return code %d",
				 rc);
		}
		for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
			(remaining_numa_node_cpus[i]->maskp[j]) =
				(numa_node_cpus[i][j]) &
				(numa_all_cpus_ptr->maskp[j]);
			collective->maskp[j] |=
				(remaining_numa_node_cpus[i]->maskp[j]);
		}
	}

	/*
	 * Ensure that we have not masked off all of the CPUs.
	 * If we have, just re-enable them all.  Better to clear them all than
	 * none of them.
	 */
	for (j = 0; j < collective->size; j++) {
		if (numa_bitmask_isbitset(collective, j)) {
			at_least_one_cpu = 1;
		}
	}

	if (!at_least_one_cpu) {
		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j <
				     (remaining_numa_node_cpus[i]->size /
				      (sizeof(unsigned long) * 8));
			     j++) {
				(remaining_numa_node_cpus[i]->maskp[j]) =
					(numa_all_cpus_ptr->maskp[j]);
			}
		}
	}

	if (debug_flags & DEBUG_FLAG_TASK) {
		bitmask_str = NULL;
		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
				xstrfmtcat(bitmask_str, "%6lx ",
					   numa_node_cpus[i][j]);
			}
		}
		info("%sBitmask: Allowed CPUs for NUMA Node", bitmask_str);
		xfree(bitmask_str);
		bitmask_str = NULL;

		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
				xstrfmtcat(bitmask_str, "%6lx ",
					  numa_all_cpus_ptr->maskp[j]);
			}
		}
		info("%sBitmask: Allowed CPUs for cpuset", bitmask_str);
		xfree(bitmask_str);
		bitmask_str = NULL;

		for (i = 0; i < num_numa_nodes; i++) {
			for (j = 0; j < NUM_INTS_TO_HOLD_ALL_CPUS; j++) {
				xstrfmtcat(bitmask_str, "%6lx ",
					   remaining_numa_node_cpus[i]->
					   maskp[j]);
			}
		}
		info("%sBitmask: Allowed CPUs between cpuset and NUMA Node",
		     bitmask_str);
		xfree(bitmask_str);
	}


	// Convert bitmasks to cpu_set_t types
	cpusetptr = xmalloc(num_numa_nodes * sizeof(cpu_set_t));

	for (i = 0; i < num_numa_nodes; i++) {
		CPU_ZERO(&cpusetptr[i]);
		for (j = 0; j < remaining_numa_node_cpus[i]->size; j++) {
			if (numa_bitmask_isbitset(remaining_numa_node_cpus[i],
						  j)) {
				CPU_SET(j, &cpusetptr[i]);
			}
		}
		if (debug_flags & DEBUG_FLAG_TASK) {
			info("CPU_COUNT() of set: %d",
			     CPU_COUNT(&cpusetptr[i]));
		}
	}

	*cpuMasks = cpusetptr;

	// Freeing Everything
	numa_free_cpumask(collective);
	for (i = 0; i < num_numa_nodes; i++) {
		xfree(numa_node_cpus[i]);
		numa_free_cpumask(remaining_numa_node_cpus[i]);
	}
	xfree(numa_node_cpus);
	xfree(numa_node_cpus);
	xfree(remaining_numa_node_cpus);

	return 0;
}
Exemplo n.º 15
0
static uint32_t* placement(uint32_t n, bool do_fill, bool hyper)
{
    uint32_t* result = (uint32_t*) malloc(sizeof(uint32_t)*n);
    uint32_t numa_nodes = numa_max_node()+1;
    uint32_t num_cores = 0;
    if (hyper) {
        num_cores = numa_num_configured_cpus()/2;
    } else {
        num_cores = numa_num_configured_cpus();
    }
    struct bitmask* nodes[numa_nodes];

    for (int i = 0; i < numa_nodes; i++) {
        nodes[i] = numa_allocate_cpumask();
        numa_node_to_cpus(i, nodes[i]);
    }

    int num_taken = 0;
    if (numa_available() == 0) {
        if (do_fill) {
            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        result[num_taken] = j;
                        num_taken++;
                    }

                    if (num_taken == n) {
                        return result;
                    }
                }
           }
        } else {
            int cores_per_node = n/numa_nodes;
            int rest = n - (cores_per_node*numa_nodes);
            int taken_per_node = 0;

            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        if (taken_per_node == cores_per_node) {
                            if (rest > 0) {
                                result[num_taken] = j;
                                num_taken++;
                                rest--;
                                if (num_taken == n) {
                                    return result;
                                }
                            }
                            break;
                        }
                        result[num_taken] = j;
                        num_taken++;
                        taken_per_node++;

                        if (num_taken == n) {
                            return result;
                        }
                    }
                }
                taken_per_node = 0;
            }
        }
    } else {
        printf("Libnuma not available \n");
        return NULL;
    }
    return NULL;
}
Exemplo n.º 16
0
int init_virtual_topology(config_t* cfg, cpu_model_t* cpu_model, virtual_topology_t** virtual_topologyp)
{
    char* mc_pci_file;
    char* str;
    char* saveptr;
    char* token = "NULL";
    int* physical_node_ids;
    physical_node_t** physical_nodes;
    int num_physical_nodes;
    int n, v, i, j, sibling_idx, node_i_idx;
    int node_id;
    physical_node_t* node_i, *node_j, *sibling_node;
    int ret;
    int min_distance;
    int hyperthreading;
    struct bitmask* mem_nodes;
    virtual_topology_t* virtual_topology;

    __cconfig_lookup_string(cfg, "topology.physical_nodes", &str);

    // parse the physical nodes string
    physical_node_ids = calloc(numa_num_possible_nodes(), sizeof(*physical_node_ids));
    num_physical_nodes = 0;
    while (token = strtok_r(str, ",", &saveptr)) {
        physical_node_ids[num_physical_nodes] = atoi(token);
        str = NULL;
        if (++num_physical_nodes > numa_num_possible_nodes()) {
            // we re being asked to run on more nodes than available
            free(physical_node_ids);
            ret = E_ERROR;
            goto done;
        }
    }
    physical_nodes = calloc(num_physical_nodes, sizeof(*physical_nodes));

    // select those nodes we can run on (e.g. not constrained by any numactl)
    mem_nodes = numa_get_mems_allowed();
    for (i=0, n=0; i<num_physical_nodes; i++) {
        node_id = physical_node_ids[i];
        if (numa_bitmask_isbitset(mem_nodes, node_id)) {
            physical_nodes[n] = malloc(sizeof(**physical_nodes));
            physical_nodes[n]->node_id = node_id;
            // TODO: what if we want to avoid using only a single hardware contexts of a hyperthreaded core?
            physical_nodes[n]->cpu_bitmask = numa_allocate_cpumask();
            numa_node_to_cpus(node_id, physical_nodes[n]->cpu_bitmask);
            __cconfig_lookup_bool(cfg, "topology.hyperthreading", &hyperthreading);
            if (hyperthreading) {
                physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask);
            } else {
                DBG_LOG(INFO, "Not using hyperthreading.\n");
                // disable the upper half of the processors in the bitmask
                physical_nodes[n]->num_cpus = num_cpus(physical_nodes[n]->cpu_bitmask) / 2;
                int fc = first_cpu(physical_nodes[n]->cpu_bitmask);
                for (j=fc+system_num_cpus()/2; j<fc+system_num_cpus()/2+physical_nodes[n]->num_cpus; j++) {
                    if (numa_bitmask_isbitset(physical_nodes[n]->cpu_bitmask, j)) {
                        numa_bitmask_clearbit(physical_nodes[n]->cpu_bitmask, j);
                    }
                }
            }
            n++;
        }
    }
    free(physical_node_ids);
    num_physical_nodes = n;

    // if pci bus topology of each physical node is not provided then discover it 
    if (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_FALSE || 
        (__cconfig_lookup_string(cfg, "topology.mc_pci", &mc_pci_file) == CONFIG_TRUE &&
         load_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes) != E_SUCCESS)) 
    {
        discover_mc_pci_topology(cpu_model, physical_nodes, num_physical_nodes);
        save_mc_pci_topology(mc_pci_file, physical_nodes, num_physical_nodes);
    }

    // form virtual nodes by grouping physical nodes that are close to each other
    virtual_topology = malloc(sizeof(*virtual_topology));
    virtual_topology->num_virtual_nodes = num_physical_nodes / 2 + num_physical_nodes % 2;
    virtual_topology->virtual_nodes = calloc(virtual_topology->num_virtual_nodes, 
                                             sizeof(*(virtual_topology->virtual_nodes)));
    
    for (i=0, v=0; i<num_physical_nodes; i++) {
        min_distance = INT_MAX;
        sibling_node = NULL;
        sibling_idx = -1;
        if ((node_i = physical_nodes[i]) == NULL) {
            continue;
        }
        for (j=i+1; j<num_physical_nodes; j++) {
            if ((node_j = physical_nodes[j]) == NULL) {
                continue;
            }
            if (numa_distance(node_i->node_id,node_j->node_id) < min_distance) {
                sibling_node = node_j;
                sibling_idx = j;
            }
        }
        if (sibling_node) {
            physical_nodes[i] = physical_nodes[sibling_idx] = NULL;
            virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v];
            virtual_node->dram_node = node_i;
            virtual_node->nvram_node = sibling_node;
            virtual_node->node_id = v;
            virtual_node->cpu_model = cpu_model;
            DBG_LOG(INFO, "Fusing physical nodes %d %d into virtual node %d\n", 
                    node_i->node_id, sibling_node->node_id, virtual_node->node_id);
            v++;
        }
    }

    // any physical node that is not paired with another physical node is 
    // formed into a virtual node on its own
    if (2*v < num_physical_nodes) {
        for (i=0; i<num_physical_nodes; i++) {
            node_i = physical_nodes[i];
            virtual_node_t* virtual_node = &virtual_topology->virtual_nodes[v];
            virtual_node->dram_node = virtual_node->nvram_node = node_i;
            virtual_node->node_id = v;
 
            DBG_LOG(WARNING, "Forming physical node %d into virtual node %d without a sibling node.\n",
                    node_i->node_id, virtual_node->node_id);
        }
    }

    *virtual_topologyp = virtual_topology;
    ret = E_SUCCESS;

done:
    free(physical_nodes);
    return ret;
}
Exemplo n.º 17
0
    unique_bitmask_ptr make_cpumask_ptr()
    {
        return unique_bitmask_ptr(numa_allocate_cpumask(), numa_free_nodemask);

    }
Exemplo n.º 18
0
char * build_default_affinity_string (int shuffle) {
   int nr_nodes = numa_num_configured_nodes();
   int nr_cores = numa_num_configured_cpus();

   char * str;
   int str_size = 512;
   int str_written = 0;

   int i;

   struct bitmask ** bm = (struct bitmask**) malloc(sizeof(struct bitmask*) * nr_nodes);

   for (i = 0; i < nr_nodes; i++) {
      bm[i] = numa_allocate_cpumask();
      numa_node_to_cpus(i, bm[i]);
   }

   str = (char*) malloc(str_size * sizeof(char));
   assert(str);

   if(!shuffle) {
      for(i = 0; i < nr_nodes; i++) {
         int j;
         for(j = 0; j < nr_cores; j++) {
            if (numa_bitmask_isbitset(bm[i], j)) {
               add_core_to_str(&str, &str_size, &str_written, j);
            }
         }
      }
   }
   else {
      int next_node = 0;

      for(i = 0; i < nr_cores; i++) {
         int idx = (i / nr_nodes) + 1;
         int found = 0;
         int j = 0;

         do {
            if (numa_bitmask_isbitset(bm[next_node], j)) {
               found++;
            }

            if(found == idx){
               add_core_to_str(&str, &str_size, &str_written, j);
               break;
            }

            j = (j + 1) % nr_cores;
         } while (found != idx);

         next_node = (next_node + 1) % nr_nodes;
      }
   }

   if(str_written) {
      str[str_written - 1] = 0;
   }

   return str;
}