Exemple #1
0
static void memkind_hbw_closest_numanode_init(void)
{
    struct memkind_hbw_closest_numanode_t *g =
                &memkind_hbw_closest_numanode_g;
    int *bandwidth = NULL;
    int num_unique = 0;
    int high_bandwidth = 0;
    int node;
    struct bandwidth_nodes_t *bandwidth_nodes = NULL;
    char *hbw_nodes_env;
    struct bitmask *hbw_nodes_bm;

    g->num_cpu = numa_num_configured_cpus();
    g->closest_numanode = (int *)je_malloc(sizeof(int) * g->num_cpu);
    bandwidth = (int *)je_malloc(sizeof(int) * NUMA_NUM_NODES);
    if (!(g->closest_numanode && bandwidth)) {
        g->init_err = MEMKIND_ERROR_MALLOC;
    }
    if (!g->init_err) {
        hbw_nodes_env = getenv("MEMKIND_HBW_NODES");
        if (hbw_nodes_env) {
            hbw_nodes_bm = numa_parse_nodestring(hbw_nodes_env);
            if (!hbw_nodes_bm) {
                g->init_err = MEMKIND_ERROR_ENVIRON;
            }
            else {
                for (node = 0; node < NUMA_NUM_NODES; ++node) {
                    if (numa_bitmask_isbitset(hbw_nodes_bm, node)) {
                        bandwidth[node] = 2;
                    }
                    else {
                        bandwidth[node] = 1;
                    }
                }
                numa_bitmask_free(hbw_nodes_bm);
            }
        }
        else {
            g->init_err = parse_node_bandwidth(NUMA_NUM_NODES, bandwidth,
                                               MEMKIND_BANDWIDTH_PATH);
        }
    }
    if (!g->init_err) {
        g->init_err = create_bandwidth_nodes(NUMA_NUM_NODES, bandwidth,
                                             &num_unique, &bandwidth_nodes);
    }
    if (!g->init_err) {
        if (num_unique == 1) {
            g->init_err = MEMKIND_ERROR_UNAVAILABLE;
        }
    }
    if (!g->init_err) {
        high_bandwidth = bandwidth_nodes[num_unique-1].bandwidth;
        g->init_err = set_closest_numanode(num_unique, bandwidth_nodes,
                                           high_bandwidth, g->num_cpu,
                                           g->closest_numanode);
    }
    if (bandwidth_nodes) {
        je_free(bandwidth_nodes);
    }
    if (bandwidth) {
        je_free(bandwidth);
    }
    if (g->init_err) {
        if (g->closest_numanode) {
            je_free(g->closest_numanode);
            g->closest_numanode = NULL;
        }
    }
}
Exemple #2
0
void myhbwmalloc_init(void)
{
    /* set to NULL before trying to initialize.  if we return before
     * successful creation of the mspace, then it will still be NULL,
     * and we can use that in subsequent library calls to determine
     * that the library failed to initialize. */
    myhbwmalloc_mspace = NULL;

    /* verbose printout? */
    myhbwmalloc_verbose = 0;
    {
        char * env_char = getenv("HBWMALLOC_VERBOSE");
        if (env_char != NULL) {
            myhbwmalloc_verbose = 1;
            printf("hbwmalloc: HBWMALLOC_VERBOSE set\n");
        }
    }

    /* fail hard or soft? */
    myhbwmalloc_hardfail = 1;
    {
        char * env_char = getenv("HBWMALLOC_SOFTFAIL");
        if (env_char != NULL) {
            myhbwmalloc_hardfail = 0;
            printf("hbwmalloc: HBWMALLOC_SOFTFAIL set\n");
        }
    }

    /* set the atexit handler that will destroy the mspace and free the numa allocation */
    atexit(myhbwmalloc_final);

    /* detect and configure use of NUMA memory nodes */
    {
        int max_possible_node        = numa_max_possible_node();
        int num_possible_nodes       = numa_num_possible_nodes();
        int max_numa_nodes           = numa_max_node();
        int num_configured_nodes     = numa_num_configured_nodes();
        int num_configured_cpus      = numa_num_configured_cpus();
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_max_possible_node()    = %d\n", max_possible_node);
            printf("hbwmalloc: numa_num_possible_nodes()   = %d\n", num_possible_nodes);
            printf("hbwmalloc: numa_max_node()             = %d\n", max_numa_nodes);
            printf("hbwmalloc: numa_num_configured_nodes() = %d\n", num_configured_nodes);
            printf("hbwmalloc: numa_num_configured_cpus()  = %d\n", num_configured_cpus);
        }
        /* FIXME this is a hack.  assumes HBW is only numa node 1. */
        if (num_configured_nodes <= 2) {
            myhbwmalloc_numa_node = num_configured_nodes-1;
        } else {
            fprintf(stderr,"hbwmalloc: we support only 2 numa nodes, not %d\n", num_configured_nodes);
        }

        if (myhbwmalloc_verbose) {
            for (int i=0; i<num_configured_nodes; i++) {
                unsigned max_numa_cpus = numa_num_configured_cpus();
                struct bitmask * mask = numa_bitmask_alloc( max_numa_cpus );
                int rc = numa_node_to_cpus(i, mask);
                if (rc != 0) {
                    fprintf(stderr, "hbwmalloc: numa_node_to_cpus failed\n");
                } else {
                    printf("hbwmalloc: numa node %d cpu mask:", i);
                    for (unsigned j=0; j<max_numa_cpus; j++) {
                        int bit = numa_bitmask_isbitset(mask,j);
                        printf(" %d", bit);
                    }
                    printf("\n");
                }
                numa_bitmask_free(mask);
            }
            fflush(stdout);
        }
    }

#if 0 /* unused */
    /* see if the user specifies a slab size */
    size_t slab_size_requested = 0;
    {
        char * env_char = getenv("HBWMALLOC_BYTES");
        if (env_char!=NULL) {
            long units = 1L;
            if      ( NULL != strstr(env_char,"G") ) units = 1000000000L;
            else if ( NULL != strstr(env_char,"M") ) units = 1000000L;
            else if ( NULL != strstr(env_char,"K") ) units = 1000L;
            else                                     units = 1L;

            int num_count = strspn(env_char, "0123456789");
            memset( &env_char[num_count], ' ', strlen(env_char)-num_count);
            slab_size_requested = units * atol(env_char);
        }
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: requested slab_size_requested = %zu\n", slab_size_requested);
        }
    }
#endif

    /* see what libnuma says is available */
    size_t myhbwmalloc_slab_size;
    {
        int node = myhbwmalloc_numa_node;
        long long freemem;
        long long maxmem = numa_node_size64(node, &freemem);
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_node_size64 says maxmem=%lld freemem=%lld for numa node %d\n",
                    maxmem, freemem, node);
        }
        myhbwmalloc_slab_size = freemem;
    }

    /* assume threads, disable if MPI knows otherwise, then allow user to override. */
    int multithreaded = 1;
#ifdef HAVE_MPI
    int nprocs;
    {
        int is_init, is_final;
        MPI_Initialized(&is_init);
        MPI_Finalized(&is_final);
        if (is_init && !is_final) {
            MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
        }

        /* give equal portion to every MPI process */
        myhbwmalloc_slab_size /= nprocs;

        /* if the user initializes MPI with MPI_Init or
         * MPI_Init_thread(MPI_THREAD_SINGLE), they assert there
         * are no threads at all, which means we can skip the
         * malloc mspace lock.
         *
         * if the user lies to MPI, they deserve any bad thing
         * that comes of it. */
        int provided;
        MPI_Query_thread(&provided);
        if (provided==MPI_THREAD_SINGLE) {
            multithreaded = 0;
        } else {
            multithreaded = 1;
        }

        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: MPI processes = %d (threaded = %d)\n", nprocs, multithreaded);
            printf("hbwmalloc: myhbwmalloc_slab_size = %d\n", myhbwmalloc_slab_size);
        }
    }
#endif

    /* user can assert that hbwmalloc and friends need not be thread-safe */
    {
        char * env_char = getenv("HBWMALLOC_LOCKLESS");
        if (env_char != NULL) {
            multithreaded = 0;
            if (myhbwmalloc_verbose) {
                printf("hbwmalloc: user has disabled locking in mspaces by setting HBWMALLOC_LOCKLESS\n");
            }
        }
    }

    myhbwmalloc_slab = numa_alloc_onnode( myhbwmalloc_slab_size, myhbwmalloc_numa_node);
    if (myhbwmalloc_slab==NULL) {
        fprintf(stderr, "hbwmalloc: numa_alloc_onnode returned NULL for size = %zu\n", myhbwmalloc_slab_size);
        return;
    } else {
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_alloc_onnode succeeded for size %zu\n", myhbwmalloc_slab_size);
        }

        /* part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping,
         * so the capacity must be at least this large */
        if (myhbwmalloc_slab_size < 128*sizeof(size_t)) {
            fprintf(stderr, "hbwmalloc: not enough space for mspace bookkeeping\n");
            return;
        }

        /* see above regarding if the user lies to MPI. */
        int locked = multithreaded;
        myhbwmalloc_mspace = create_mspace_with_base( myhbwmalloc_slab, myhbwmalloc_slab_size, locked);
        if (myhbwmalloc_mspace == NULL) {
            fprintf(stderr, "hbwmalloc: create_mspace_with_base returned NULL\n");
            return;
        } else if (myhbwmalloc_verbose) {
            printf("hbwmalloc: create_mspace_with_base succeeded for size %zu\n", myhbwmalloc_slab_size);
        }
    }
}
Exemple #3
0
static uint32_t* placement(uint32_t n, bool do_fill, bool hyper)
{
    uint32_t* result = (uint32_t*) malloc(sizeof(uint32_t)*n);
    uint32_t numa_nodes = numa_max_node()+1;
    uint32_t num_cores = 0;
    if (hyper) {
        num_cores = numa_num_configured_cpus()/2;
    } else {
        num_cores = numa_num_configured_cpus();
    }
    struct bitmask* nodes[numa_nodes];

    for (int i = 0; i < numa_nodes; i++) {
        nodes[i] = numa_allocate_cpumask();
        numa_node_to_cpus(i, nodes[i]);
    }

    int num_taken = 0;
    if (numa_available() == 0) {
        if (do_fill) {
            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        result[num_taken] = j;
                        num_taken++;
                    }

                    if (num_taken == n) {
                        return result;
                    }
                }
           }
        } else {
            int cores_per_node = n/numa_nodes;
            int rest = n - (cores_per_node*numa_nodes);
            int taken_per_node = 0;

            for (int i = 0; i < numa_nodes; i++) {
                for (int j = 0; j < num_cores; j++) {
                    if (numa_bitmask_isbitset(nodes[i], j)) {
                        if (taken_per_node == cores_per_node) {
                            if (rest > 0) {
                                result[num_taken] = j;
                                num_taken++;
                                rest--;
                                if (num_taken == n) {
                                    return result;
                                }
                            }
                            break;
                        }
                        result[num_taken] = j;
                        num_taken++;
                        taken_per_node++;

                        if (num_taken == n) {
                            return result;
                        }
                    }
                }
                taken_per_node = 0;
            }
        }
    } else {
        printf("Libnuma not available \n");
        return NULL;
    }
    return NULL;
}
Exemple #4
0
  EvenNumaObj() {
    num_cpus_ = numa_num_configured_cpus();
    num_mem_nodes_ = numa_num_configured_nodes();
    LOG(INFO) << "num_cpus = " << num_cpus_
              << " num_mem_nodes = " << num_mem_nodes_;
 }
Exemple #5
0
char * build_default_affinity_string (int shuffle) {
   int nr_nodes = numa_num_configured_nodes();
   int nr_cores = numa_num_configured_cpus();

   char * str;
   int str_size = 512;
   int str_written = 0;

   int i;

   struct bitmask ** bm = (struct bitmask**) malloc(sizeof(struct bitmask*) * nr_nodes);

   for (i = 0; i < nr_nodes; i++) {
      bm[i] = numa_allocate_cpumask();
      numa_node_to_cpus(i, bm[i]);
   }

   str = (char*) malloc(str_size * sizeof(char));
   assert(str);

   if(!shuffle) {
      for(i = 0; i < nr_nodes; i++) {
         int j;
         for(j = 0; j < nr_cores; j++) {
            if (numa_bitmask_isbitset(bm[i], j)) {
               add_core_to_str(&str, &str_size, &str_written, j);
            }
         }
      }
   }
   else {
      int next_node = 0;

      for(i = 0; i < nr_cores; i++) {
         int idx = (i / nr_nodes) + 1;
         int found = 0;
         int j = 0;

         do {
            if (numa_bitmask_isbitset(bm[next_node], j)) {
               found++;
            }

            if(found == idx){
               add_core_to_str(&str, &str_size, &str_written, j);
               break;
            }

            j = (j + 1) % nr_cores;
         } while (found != idx);

         next_node = (next_node + 1) % nr_nodes;
      }
   }

   if(str_written) {
      str[str_written - 1] = 0;
   }

   return str;
}
Exemple #6
0
static int memkind_store(void *memptr, void **mmapptr, struct memkind **kind,
                         size_t *req_size, size_t *size, int mode)
{
    static int table_len = 0;
    static int is_init = 0;
    static memkind_table_node_t *table = NULL;
    static pthread_mutex_t init_mutex = PTHREAD_MUTEX_INITIALIZER;
    int err = 0;
    int hash, i;
    memkind_list_node_t *storeptr, *lastptr;

    if (!is_init && *mmapptr == NULL) {
        return -1;
    }

    if (!is_init) {
        pthread_mutex_lock(&init_mutex);
        if (!is_init) {
            table_len = numa_num_configured_cpus();
            table = jemk_malloc(sizeof(memkind_table_node_t) * table_len);
            if (table == NULL) {
                err = MEMKIND_ERROR_MALLOC;
            }
            else {
                for (i = 0; i < table_len; ++i) {
                    pthread_mutex_init(&(table[i].mutex), NULL);
                    table[i].list = NULL;
                }
                is_init = 1;
            }
        }
        pthread_mutex_unlock(&init_mutex);
    }
    if (is_init) {
        hash = ptr_hash(memptr, table_len);
        pthread_mutex_lock(&(table[hash].mutex));
        if (mode == GBTLB_STORE_REMOVE || mode == GBTLB_STORE_QUERY) {
            /*
               memkind_store() call is a query
               GBTLB_STORE_REMOVE -> Query if found remove and
               return the address and size;
               GBTLB_STORE_QUERTY -> Query if found and return;
            */
            storeptr = table[hash].list;
            lastptr = NULL;
            while (storeptr && storeptr->ptr != memptr) {
                lastptr = storeptr;
                storeptr = storeptr->next;
            }
            if (storeptr == NULL) {
                err = MEMKIND_ERROR_RUNTIME;
            }
            if (!err) {
                *mmapptr = storeptr->mmapptr;
                *size = storeptr->size;
                *req_size = storeptr->requested_size;
                *kind = storeptr->kind;
            }
            if (!err && mode == GBTLB_STORE_REMOVE) {
                if (lastptr) {
                    lastptr->next = storeptr->next;
                }
                else {
                    table[hash].list = storeptr->next;
                }
                jemk_free(storeptr);
            }
        }
        else { /* memkind_store() call is a store */
            storeptr = table[hash].list;
            table[hash].list = (memkind_list_node_t*)jemk_malloc(sizeof(memkind_list_node_t));
            table[hash].list->ptr = memptr;
            table[hash].list->mmapptr = *mmapptr;
            table[hash].list->size = *size;
            table[hash].list->requested_size = *req_size;
            table[hash].list->kind = *kind;
            table[hash].list->next = storeptr;
        }
        pthread_mutex_unlock(&(table[hash].mutex));
    }
    else {
        err = MEMKIND_ERROR_MALLOC;
    }
    return err;
}
Exemple #7
0
/**
 * @brief Returns an array of cores of size req_cores choosen
 *     round-robin from NUMA nodes in batches of req_step.
 *
 * @param req_step The step with - how many cores should be picked
 *     from each NUMA node in each iteration. Use a negative value
 *     for a "fill"-strategy, where NUMA nodes are completely filled
 *     before moving on to the next one.
 */
void placement(size_t req_cores, size_t req_step, coreid_t *cores)
{
    // For convenience, allows to lookup 2*n for n in 0..n/2
    if (req_step==0)
        req_step=1;

    size_t max_node = numa_max_node();
    size_t num_cores = numa_num_configured_cpus();
    size_t cores_per_node = num_cores/(max_node+1);

    printf("req_cores: %zu\n", req_cores);
    printf("req_step: %zu\n", req_step);
    printf("cores / NUMA node: %zu\n", cores_per_node);
    printf("max_node: %zu\n", max_node);

    size_t num_selected = 0;
    size_t curr_numa_idx = 0;

    // How many nodes to choose from each NUMA node
    size_t choose_per_node[max_node+1];
    memset(choose_per_node, 0, sizeof(size_t)*(max_node+1));

    // Step 1:
    // Figure out how many cores to choose from each node

    while (num_selected<req_cores) {

        // Determine number of cores of that node

        // How many cores should be choosen in this step?
        // At max req_step
        size_t num_choose = min(min(req_step, req_cores-num_selected),
                                cores_per_node-choose_per_node[curr_numa_idx]);

        // Increment counter indicating how many to choose from this node
        choose_per_node[curr_numa_idx] += num_choose;
        num_selected += num_choose;

        // Move on to the next NUMA node
        curr_numa_idx = (curr_numa_idx + 1) % (max_node+1);
    }

    // Step 2:
    // Get the cores from each NUMA node
    //
    // hyperthreads? -> should have higher core IDs, and hence picked in
    // the end.

    struct bitmask *mask = numa_allocate_cpumask();

    size_t idx = 0;

    for (size_t i=0; i<=max_node; i++) {

        dbg_printf("node %2zu choosing %2zu\n", i, choose_per_node[i]);

        // Determine which cores are on node i
        numa_node_to_cpus(i, mask);

        size_t choosen = 0;

        for (coreid_t p=0; p<num_cores && choosen<choose_per_node[i]; p++) {

            // Is processor p on node i
            if (numa_bitmask_isbitset(mask, p)) {

                cores[idx++] = p;
                choosen++;

                dbg_printf("Choosing %" PRIuCOREID " on node %zu\n", p, i);
            }
        }
    }

    assert (idx == req_cores);

}