TEST(StorageManagerTest, DifferentNUMANodeBlobTestWithEviction) {
  EvictionPolicy *eviction_policy = LRUKEvictionPolicyFactory::ConstructLRUKEvictionPolicy(
          2, std::chrono::seconds(100));
  EvictionPolicy::Status status;
  static constexpr std::size_t kNumSlots = 10;
  const block_id_domain block_domain = 1000;
  // Set the max_memory_usage to 4 GB.
  const size_t max_memory_usage = 2000;
  std::unique_ptr<StorageManager> storage_manager;
  storage_manager.reset(
      new StorageManager("temp_storage", block_domain, max_memory_usage, eviction_policy));

  const std::size_t num_numa_nodes = numa_num_configured_nodes();

  block_id blob_id;
  MutableBlobReference blob_obj;
  char* blob_memory;
  BlobReference new_blob_obj;
  const char* new_blob_memory;
  std::size_t new_numa_node = 0;

  for (std::size_t numa_node = 0; numa_node < num_numa_nodes; ++numa_node) {
    blob_id = storage_manager->createBlob(kNumSlots, numa_node);
    blob_obj =
        storage_manager->getBlobMutable(blob_id, numa_node);
    blob_memory =
        static_cast<char*>(blob_obj->getMemoryMutable());

    // Write some contents into the memory.
    for (std::size_t i = 0; i < kNumSlots * kSlotSizeBytes; ++i) {
      blob_memory[i] = static_cast<char>(i);
    }

    // Dereference the blob.
    blob_obj.release();

    // Choose a blob for eviction.
    status = storage_manager->eviction_policy_->chooseBlockToEvict(&blob_id);
    ASSERT_EQ(EvictionPolicy::Status::kOk, status);
    // Save the blob to disk.
    storage_manager->saveBlockOrBlob(blob_id, true);
    // Evict the blob from the buffer pool.
    storage_manager->evictBlockOrBlob(blob_id);
    // Inform the eviction policy that this blob has been evicted.
    storage_manager->eviction_policy_->blockEvicted(blob_id);

    new_numa_node = (numa_node + 1) % num_numa_nodes;

    new_blob_obj =
        storage_manager->getBlob(blob_id, new_numa_node);
    new_blob_memory =
        static_cast<const char*>(new_blob_obj->getMemory());

    // Read the contents of the blob by giving a different NUMA node hint and
    // verify if we still read the same blob that we actually wrote to.
    for (std::size_t i = 0; i < kNumSlots * kSlotSizeBytes; ++i) {
      EXPECT_EQ(static_cast<char>(i), new_blob_memory[i]);
    }
  }
}
				/**
				 * @brief construct global 'cohort' lock.
				 *
				 * This lock performs handovers in three levels: First within
				 * the same NUMA node, then within the same ArgoDSM node, and
				 * finally over ArgoDSM nodes.
				 */
				cohort_lock() :
					has_global_lock(false),
					numanodes(1), // sane default
					numahandover(0),
					nodelockowner(NO_OWNER),
					tas_flag(argo::conew_<bool>(false)),
					global_lock(new argo::globallock::global_tas_lock(tas_flag)),
					node_lock(new argo::locallock::ticket_lock())
				{
					int num_cpus = sysconf(_SC_NPROCESSORS_CONF); // sane default
					numa_mapping.resize(num_cpus, 0);
					#ifdef ARGO_USE_LIBNUMA
					/* use libnuma only if it is actually available */
					if(numa_available() != -1) {
						numanodes = numa_num_configured_nodes();
						/* Initialize the NUMA map */
						for (int i = 0; i < num_cpus; ++i) {
							numa_mapping[i] = numa_node_of_cpu(i);
						}
					}
					#endif
					/* initialize hierarchy components */
					handovers = new int[numanodes]();
					local_lock = new argo::locallock::mcs_lock[numanodes];
				}
Exemple #3
0
int main(void)
{
	int i, k, w, ncpus;
	struct bitmask *cpus;
	int maxnode = numa_num_configured_nodes()-1;

	if (numa_available() < 0)  {
		printf("no numa\n");
		exit(1);
	}
	cpus = numa_allocate_cpumask();
	ncpus = cpus->size;

	for (i = 0; i <= maxnode ; i++) {
		if (numa_node_to_cpus(i, cpus) < 0) {
			printf("node %d failed to convert\n",i); 
		}		
		printf("%d: ", i); 
		w = 0;
		for (k = 0; k < ncpus; k++)
			if (numa_bitmask_isbitset(cpus, k))
				printf(" %s%d", w>0?",":"", k);
		putchar('\n');		
	}
	return 0;
}
Exemple #4
0
/**
 * \brief sets the memory allocation mask.
 *
 * \param nodemask  bitmap representing the nodes
 *
 * The task will only allocate memory from the nodes set in nodemask.
 *
 * an empty mask or not allowed nodes in the mask will result in an error
 */
errval_t numa_set_membind(struct bitmap *nodemask)
{
    assert(numa_alloc_bind_mask);
    assert(numa_alloc_interleave_mask);

    if (!nodemask) {
        return NUMA_ERR_BITMAP_PARSE;
    }

    if (bitmap_get_nbits(nodemask) < NUMA_MAX_NUMNODES) {
        NUMA_WARNING("supplied interleave mask (%p) has to less bits!", nodemask);
        return NUMA_ERR_BITMAP_RANGE;
    }

    /* copy new membind mask and clear out invalid bits */
    bitmap_copy(numa_alloc_bind_mask, nodemask);
    bitmap_clear_range(numa_alloc_bind_mask, numa_num_configured_nodes(),
                       bitmap_get_nbits(numa_alloc_bind_mask));

    if (bitmap_get_weight(numa_alloc_bind_mask) == 0) {
        /* cannot bind to no node, restore with all nodes pointer*/
        bitmap_copy(numa_alloc_bind_mask, numa_all_nodes_ptr);
        return NUMA_ERR_NUMA_MEMBIND;
    }

    /* disable interleaving mode */
    bitmap_clear_all(numa_alloc_interleave_mask);

    return SYS_ERR_OK;
}
 /**
  * @brief Add a mapping between a block and the NUMA node on which the block
  *        is placed.
  *
  * @param block The block_id of the block for which the NUMA node mapping is
  *              added.
  * @param numa_node The numa node id on which the block is placed.
  **/
 void addBlockToNUMANodeMap(const block_id block, const int numa_node) {
   // Make sure that the block doesn't have a mapping already.
   // A block will be mapped to only one NUMA node.
   // A NUMA node will be associated with a block only once.
   DCHECK(block_to_numa_node_map_.find(block) == block_to_numa_node_map_.end());
   DCHECK_GT(numa_num_configured_nodes(), numa_node)
       << "NUMA node above the valid value.";
   block_to_numa_node_map_[block] = numa_node;
 }
 /**
  * @brief Constructor.
  *
  * @param num_partitions The number of partitions of the Catalog Relation.
  *                       This would be the same as the number of partitions
  *                       in the Partition Scheme of the relation.
  **/
 explicit NUMAPlacementScheme(const std::size_t num_partitions)
     : num_numa_nodes_(numa_num_configured_nodes()),
       num_partitions_(num_partitions) {
   // Assign each partition to exactly one NUMA node.
   // Partitions are assigned in a round robin way to NUMA nodes.
   for (std::size_t part_id = 0;
        part_id < num_partitions_;
        ++part_id) {
     partition_to_numa_node_map_[part_id] = part_id % num_numa_nodes_;
   }
 }
Exemple #7
0
    void check_all_numa_nodes(int policy, void *ptr, size_t size)
    {
        if (policy != MPOL_INTERLEAVE && policy != MPOL_DEFAULT) return;

        unique_bitmask_ptr expected_bitmask = make_nodemask_ptr();

        for(int i=0; i < numa_num_configured_nodes(); i++) {
            numa_bitmask_setbit(expected_bitmask.get(), i);
        }

        check_numa_nodes(expected_bitmask, policy, ptr, size);
    }
Exemple #8
0
static void regular_nodes_init(void)
{
    int i, node = 0, nodes_num = numa_num_configured_nodes();
    struct bitmask *node_cpus = numa_allocate_cpumask();

    regular_nodes_mask = numa_allocate_nodemask();

    for (i = 0; i < nodes_num; i++) {
        numa_node_to_cpus(node, node_cpus);
        if (numa_bitmask_weight(node_cpus))
            numa_bitmask_setbit(regular_nodes_mask, i);
    }
    numa_bitmask_free(node_cpus);
}
Exemple #9
0
numa_init(void)
{
	int max,i;

	if (sizes_set)
		return;

	set_sizes();
	/* numa_all_nodes should represent existing nodes on this system */
        max = numa_num_configured_nodes();
        for (i = 0; i < max; i++)
                nodemask_set_compat((nodemask_t *)&numa_all_nodes, i);
	memset(&numa_no_nodes, 0, sizeof(numa_no_nodes));
}
TEST(StorageManagerTest, DifferentNUMANodeBlobTest) {
  std::unique_ptr<StorageManager> storage_manager;
  static constexpr std::size_t kNumSlots = 10;
  storage_manager.reset(new StorageManager("temp_storage"));
  const std::size_t num_numa_nodes = numa_num_configured_nodes();

  block_id blob_id;
  MutableBlobReference blob_obj;
  char* blob_memory;
  BlobReference new_blob_obj;
  const char* new_blob_memory;
  std::size_t new_numa_node = 0;

  for (std::size_t numa_node = 0; numa_node < num_numa_nodes; ++numa_node) {
    blob_id = storage_manager->createBlob(kNumSlots, numa_node);
    blob_obj =
        storage_manager->getBlobMutable(blob_id, numa_node);
    blob_memory =
        static_cast<char*>(blob_obj->getMemoryMutable());

    // Write some contents into the memory.
    for (std::size_t i = 0; i < kNumSlots * kSlotSizeBytes; ++i) {
      blob_memory[i] = static_cast<char>(i);
    }

    // Dereference the blob.
    blob_obj.release();

    new_numa_node = (numa_node + 1) % num_numa_nodes;

    new_blob_obj =
        storage_manager->getBlob(blob_id, new_numa_node);
    new_blob_memory =
        static_cast<const char*>(new_blob_obj->getMemory());
    // Read the contents of the blob by giving a different NUMA node hint and
    // verify if we still read the same blob that we actually wrote to.
    for (std::size_t i = 0; i < kNumSlots * kSlotSizeBytes; ++i) {
      EXPECT_EQ(static_cast<char>(i), new_blob_memory[i]);
    }
  }
}
///This function tries to fill bandwidth array based on knowledge about known CPU models
static int fill_bandwidth_values_heuristically(int* bandwidth, int bandwidth_len)
{
    int ret = MEMKIND_ERROR_UNAVAILABLE; // Default error returned if heuristic aproach fails
    int i, nodes_num, memory_only_nodes_num = 0;
    struct bitmask *memory_only_nodes, *node_cpus;

    if (is_cpu_xeon_phi_x200() == 0) {
        log_info("Known CPU model detected: Intel(R) Xeon Phi(TM) x200.");
        nodes_num = numa_num_configured_nodes();

        // Check if number of numa-nodes meets expectations for
        // supported configurations of Intel Xeon Phi x200
        if( nodes_num != 2 && nodes_num != 4 && nodes_num!= 8 ) {
            return ret;
        }

        memory_only_nodes = numa_allocate_nodemask();
        node_cpus = numa_allocate_cpumask();

        for(i=0; i<nodes_num; i++) {
            numa_node_to_cpus(i, node_cpus);
            if(numa_bitmask_weight(node_cpus) == 0) {
                memory_only_nodes_num++;
                numa_bitmask_setbit(memory_only_nodes, i);
            }
        }

        // Check if number of memory-only nodes is equal number of memory+cpu nodes
        // If it passes change ret to 0 (success) and fill bw table
        if ( memory_only_nodes_num == (nodes_num - memory_only_nodes_num) ) {

            ret = 0;
            assign_arbitrary_bandwidth_values(bandwidth, bandwidth_len, memory_only_nodes);
        }

        numa_bitmask_free(memory_only_nodes);
        numa_bitmask_free(node_cpus);
    }

    return ret;
}
static void assign_arbitrary_bandwidth_values(int* bandwidth, int bandwidth_len, struct bitmask* hbw_nodes)
{
    int i, nodes_num = numa_num_configured_nodes();

    // Assigning arbitrary bandwidth values for nodes:
    // 2 - high BW node (if bit set in hbw_nodes nodemask),
    // 1 - low  BW node,
    // 0 - node not present
    for (i=0; i<NUMA_NUM_NODES; i++) {
        if (i >= nodes_num) {
            bandwidth[i] = 0;
        }
        else if (numa_bitmask_isbitset(hbw_nodes, i)) {
            bandwidth[i] = 2;
        }
        else {
            bandwidth[i] = 1;
        }
    }

}
Exemple #13
0
/**
 * \brief sets the memory interleave mask for the current task to nodemask
 *
 * \param nodemask bitmask representing the nodes
 *
 * All new memory allocations are page interleaved over all nodes in the interleave
 * mask. Interleaving can be turned off again by passing an empty mask.
 *
 * This bitmask is considered to be a hint. Fallback to other nodes may be possible
 */
void numa_set_interleave_mask(struct bitmap *nodemask)
{
    assert(numa_alloc_interleave_mask);

    if (!nodemask) {
        bitmap_clear_all(numa_alloc_interleave_mask);
        return;
    }

    if (bitmap_get_nbits(nodemask) < NUMA_MAX_NUMNODES) {
        NUMA_WARNING("supplied interleave mask (%p) has to less bits!", nodemask);
        return;
    }
    bitmap_copy(numa_alloc_interleave_mask, nodemask);

    /* clear out the invalid nodes */
    bitmap_clear_range(numa_alloc_interleave_mask, numa_num_configured_nodes(),
                       bitmap_get_nbits(numa_alloc_interleave_mask));

    /* clear the bind mask as we are using interleaving mode now */
    bitmap_clear_all(numa_alloc_bind_mask);
}
TEST(StorageManagerTest, NUMAAwareBlobTest) {
  std::unique_ptr<StorageManager> storage_manager;
  static constexpr std::size_t kNumSlots = 10;
  storage_manager.reset(new StorageManager("temp_storage"));
  const std::size_t num_numa_nodes = numa_num_configured_nodes();

  block_id blob_id;
  MutableBlobReference blob_obj;
  char* blob_memory;
  BlobReference new_blob_obj;
  const char* new_blob_memory;

  for (std::size_t numa_node = 0; numa_node < num_numa_nodes; ++numa_node) {
    blob_id = storage_manager->createBlob(kNumSlots, numa_node);
    blob_obj =
        storage_manager->getBlobMutable(blob_id, numa_node);
    blob_memory =
        static_cast<char*>(blob_obj->getMemoryMutable());

    // Write some contents into the memory.
    for (std::size_t i = 0; i < kNumSlots * kSlotSizeBytes; ++i) {
      blob_memory[i] = static_cast<char>(i);
    }

    // Dereference the blob.
    blob_obj.release();

    new_blob_obj =
        storage_manager->getBlob(blob_id, numa_node);
    new_blob_memory =
        static_cast<const char*>(new_blob_obj->getMemory());
    // Read the contents of the blob on the same NUMA node on which the blob was
    // created and verify if they match with what we wrote into the blob.
    for (std::size_t i = 0; i < kNumSlots * kSlotSizeBytes; ++i) {
      EXPECT_EQ(static_cast<char>(i), new_blob_memory[i]);
    }
  }
}
Exemple #15
0
  EvenNumaObj() {
    num_cpus_ = numa_num_configured_cpus();
    num_mem_nodes_ = numa_num_configured_nodes();
    LOG(INFO) << "num_cpus = " << num_cpus_
              << " num_mem_nodes = " << num_mem_nodes_;
 }
Exemple #16
0
void myhbwmalloc_init(void)
{
    /* set to NULL before trying to initialize.  if we return before
     * successful creation of the mspace, then it will still be NULL,
     * and we can use that in subsequent library calls to determine
     * that the library failed to initialize. */
    myhbwmalloc_mspace = NULL;

    /* verbose printout? */
    myhbwmalloc_verbose = 0;
    {
        char * env_char = getenv("HBWMALLOC_VERBOSE");
        if (env_char != NULL) {
            myhbwmalloc_verbose = 1;
            printf("hbwmalloc: HBWMALLOC_VERBOSE set\n");
        }
    }

    /* fail hard or soft? */
    myhbwmalloc_hardfail = 1;
    {
        char * env_char = getenv("HBWMALLOC_SOFTFAIL");
        if (env_char != NULL) {
            myhbwmalloc_hardfail = 0;
            printf("hbwmalloc: HBWMALLOC_SOFTFAIL set\n");
        }
    }

    /* set the atexit handler that will destroy the mspace and free the numa allocation */
    atexit(myhbwmalloc_final);

    /* detect and configure use of NUMA memory nodes */
    {
        int max_possible_node        = numa_max_possible_node();
        int num_possible_nodes       = numa_num_possible_nodes();
        int max_numa_nodes           = numa_max_node();
        int num_configured_nodes     = numa_num_configured_nodes();
        int num_configured_cpus      = numa_num_configured_cpus();
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_max_possible_node()    = %d\n", max_possible_node);
            printf("hbwmalloc: numa_num_possible_nodes()   = %d\n", num_possible_nodes);
            printf("hbwmalloc: numa_max_node()             = %d\n", max_numa_nodes);
            printf("hbwmalloc: numa_num_configured_nodes() = %d\n", num_configured_nodes);
            printf("hbwmalloc: numa_num_configured_cpus()  = %d\n", num_configured_cpus);
        }
        /* FIXME this is a hack.  assumes HBW is only numa node 1. */
        if (num_configured_nodes <= 2) {
            myhbwmalloc_numa_node = num_configured_nodes-1;
        } else {
            fprintf(stderr,"hbwmalloc: we support only 2 numa nodes, not %d\n", num_configured_nodes);
        }

        if (myhbwmalloc_verbose) {
            for (int i=0; i<num_configured_nodes; i++) {
                unsigned max_numa_cpus = numa_num_configured_cpus();
                struct bitmask * mask = numa_bitmask_alloc( max_numa_cpus );
                int rc = numa_node_to_cpus(i, mask);
                if (rc != 0) {
                    fprintf(stderr, "hbwmalloc: numa_node_to_cpus failed\n");
                } else {
                    printf("hbwmalloc: numa node %d cpu mask:", i);
                    for (unsigned j=0; j<max_numa_cpus; j++) {
                        int bit = numa_bitmask_isbitset(mask,j);
                        printf(" %d", bit);
                    }
                    printf("\n");
                }
                numa_bitmask_free(mask);
            }
            fflush(stdout);
        }
    }

#if 0 /* unused */
    /* see if the user specifies a slab size */
    size_t slab_size_requested = 0;
    {
        char * env_char = getenv("HBWMALLOC_BYTES");
        if (env_char!=NULL) {
            long units = 1L;
            if      ( NULL != strstr(env_char,"G") ) units = 1000000000L;
            else if ( NULL != strstr(env_char,"M") ) units = 1000000L;
            else if ( NULL != strstr(env_char,"K") ) units = 1000L;
            else                                     units = 1L;

            int num_count = strspn(env_char, "0123456789");
            memset( &env_char[num_count], ' ', strlen(env_char)-num_count);
            slab_size_requested = units * atol(env_char);
        }
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: requested slab_size_requested = %zu\n", slab_size_requested);
        }
    }
#endif

    /* see what libnuma says is available */
    size_t myhbwmalloc_slab_size;
    {
        int node = myhbwmalloc_numa_node;
        long long freemem;
        long long maxmem = numa_node_size64(node, &freemem);
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_node_size64 says maxmem=%lld freemem=%lld for numa node %d\n",
                    maxmem, freemem, node);
        }
        myhbwmalloc_slab_size = freemem;
    }

    /* assume threads, disable if MPI knows otherwise, then allow user to override. */
    int multithreaded = 1;
#ifdef HAVE_MPI
    int nprocs;
    {
        int is_init, is_final;
        MPI_Initialized(&is_init);
        MPI_Finalized(&is_final);
        if (is_init && !is_final) {
            MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
        }

        /* give equal portion to every MPI process */
        myhbwmalloc_slab_size /= nprocs;

        /* if the user initializes MPI with MPI_Init or
         * MPI_Init_thread(MPI_THREAD_SINGLE), they assert there
         * are no threads at all, which means we can skip the
         * malloc mspace lock.
         *
         * if the user lies to MPI, they deserve any bad thing
         * that comes of it. */
        int provided;
        MPI_Query_thread(&provided);
        if (provided==MPI_THREAD_SINGLE) {
            multithreaded = 0;
        } else {
            multithreaded = 1;
        }

        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: MPI processes = %d (threaded = %d)\n", nprocs, multithreaded);
            printf("hbwmalloc: myhbwmalloc_slab_size = %d\n", myhbwmalloc_slab_size);
        }
    }
#endif

    /* user can assert that hbwmalloc and friends need not be thread-safe */
    {
        char * env_char = getenv("HBWMALLOC_LOCKLESS");
        if (env_char != NULL) {
            multithreaded = 0;
            if (myhbwmalloc_verbose) {
                printf("hbwmalloc: user has disabled locking in mspaces by setting HBWMALLOC_LOCKLESS\n");
            }
        }
    }

    myhbwmalloc_slab = numa_alloc_onnode( myhbwmalloc_slab_size, myhbwmalloc_numa_node);
    if (myhbwmalloc_slab==NULL) {
        fprintf(stderr, "hbwmalloc: numa_alloc_onnode returned NULL for size = %zu\n", myhbwmalloc_slab_size);
        return;
    } else {
        if (myhbwmalloc_verbose) {
            printf("hbwmalloc: numa_alloc_onnode succeeded for size %zu\n", myhbwmalloc_slab_size);
        }

        /* part (less than 128*sizeof(size_t) bytes) of this space is used for bookkeeping,
         * so the capacity must be at least this large */
        if (myhbwmalloc_slab_size < 128*sizeof(size_t)) {
            fprintf(stderr, "hbwmalloc: not enough space for mspace bookkeeping\n");
            return;
        }

        /* see above regarding if the user lies to MPI. */
        int locked = multithreaded;
        myhbwmalloc_mspace = create_mspace_with_base( myhbwmalloc_slab, myhbwmalloc_slab_size, locked);
        if (myhbwmalloc_mspace == NULL) {
            fprintf(stderr, "hbwmalloc: create_mspace_with_base returned NULL\n");
            return;
        } else if (myhbwmalloc_verbose) {
            printf("hbwmalloc: create_mspace_with_base succeeded for size %zu\n", myhbwmalloc_slab_size);
        }
    }
}
Exemple #17
0
void force_move_pages(const void* data_, const size_t n, const size_t selem,
		      const enum numa_distrib_type distrib, const size_t distrib_parameter) {
  const char* data = (const char*)data_;
  const size_t elem_per_page = ASSUMED_PAGE_SIZE/selem;
  const size_t np = n / elem_per_page;
  int status[np];
  int nodes[np];
  const char* pages[np];
  size_t i;
  long res;

#ifndef __MIC__
  const int nmn = numa_num_configured_nodes();

  // fprintf(stderr, "%s:%d elem_per_page = %zd, nmn = %d ; np = %zd\n", __PRETTY_FUNCTION__, __LINE__, elem_per_page, nmn, np);
  
  for (i = 0 ; i < np ; i++) {
    pages[i] = data + i * ASSUMED_PAGE_SIZE;
    switch (distrib) {
    case HYDRO_NUMA_NONE:
      nodes[i] = -1;
      break;
    case HYDRO_NUMA_INTERLEAVED:
      nodes[i] = i % nmn;
      break;
    case HYDRO_NUMA_ONE_BLOCK_PER_NODE: {
      const size_t ppernode = np / nmn;
      size_t nnode = i / ppernode;
      if (nnode > (nmn-1))
	nnode = nmn - 1;
      nodes[i] = nnode;
    } break;
    case HYDRO_NUMA_SIZED_BLOCK_RR: {
      const size_t numb = i / (distrib_parameter/elem_per_page);
      size_t nnode = numb % nmn;
      nodes[i] = nnode;
    } break;
    }
  }
  
  if (HYDRO_NUMA_NONE != distrib) {
    res = move_pages(0, np, (void**)pages, nodes, status, MPOL_MF_MOVE);
  } else {
    res = move_pages(0, np, (void**)pages, NULL , status, MPOL_MF_MOVE);
  }
  
  if (res != 0) {
    fprintf(stderr, "%s:%d: move_pages -> errno = %d\n", __PRETTY_FUNCTION__, __LINE__, errno);
  } else {
    int last_node = status[0];
    const char* last;
    const char* cur = data;
    // fprintf(stderr, "%s:%d: move_pages for %p of %zd elements (%zd bytes)\n", __PRETTY_FUNCTION__, __LINE__, data, n, n * selem);
    // fprintf(stderr, "\t%d: %p ... ", last_node, cur );
    last = cur;
    for (i = 1 ; i < np ; i++) {
      if (status[i] != last_node) {
        cur += ASSUMED_PAGE_SIZE;
        // fprintf(stderr, "%p (%llu)\n", cur, (unsigned long long)cur - (unsigned long long)last);
        last_node = status[i];
        // fprintf(stderr, "\t%d: %p ... ", last_node, cur);
        last = cur;
      } else {
        cur += ASSUMED_PAGE_SIZE;
      }
    }
    // fprintf(stderr, "%p (%llu)\n", cur, (unsigned long long)cur - (unsigned long long)last);
  }
#endif
}
Exemple #18
0
char * build_default_affinity_string (int shuffle) {
   int nr_nodes = numa_num_configured_nodes();
   int nr_cores = numa_num_configured_cpus();

   char * str;
   int str_size = 512;
   int str_written = 0;

   int i;

   struct bitmask ** bm = (struct bitmask**) malloc(sizeof(struct bitmask*) * nr_nodes);

   for (i = 0; i < nr_nodes; i++) {
      bm[i] = numa_allocate_cpumask();
      numa_node_to_cpus(i, bm[i]);
   }

   str = (char*) malloc(str_size * sizeof(char));
   assert(str);

   if(!shuffle) {
      for(i = 0; i < nr_nodes; i++) {
         int j;
         for(j = 0; j < nr_cores; j++) {
            if (numa_bitmask_isbitset(bm[i], j)) {
               add_core_to_str(&str, &str_size, &str_written, j);
            }
         }
      }
   }
   else {
      int next_node = 0;

      for(i = 0; i < nr_cores; i++) {
         int idx = (i / nr_nodes) + 1;
         int found = 0;
         int j = 0;

         do {
            if (numa_bitmask_isbitset(bm[next_node], j)) {
               found++;
            }

            if(found == idx){
               add_core_to_str(&str, &str_size, &str_written, j);
               break;
            }

            j = (j + 1) % nr_cores;
         } while (found != idx);

         next_node = (next_node + 1) % nr_nodes;
      }
   }

   if(str_written) {
      str[str_written - 1] = 0;
   }

   return str;
}
Exemple #19
0
/**
 * \brief allocates size bytes of memory page interleaved the nodes specified in
 *        the nodemask.
 *
 * \param size     size of the memory region in bytes
 * \param nodemask subset of nodes to consider for allocation
 * \param pagesize  preferred page size to be used
 *
 * \returns pointer to the mapped memory region
 *
 * should only be used for large areas consisting of multiple pages.
 * The memory must be freed with numa_free(). On errors NULL is returned.
 */
void *numa_alloc_interleaved_subset(size_t size, size_t pagesize,
                                    struct bitmap *nodemask)
{
    errval_t err;

    /* clear out invalid bits */
    bitmap_clear_range(nodemask, numa_num_configured_nodes(),
                       bitmap_get_nbits(nodemask));

    /* get the number of nodes */
    nodeid_t nodes = bitmap_get_weight(nodemask);
    if (nodes == 0) {
        return NULL;
    }

    NUMA_DEBUG_ALLOC("allocating interleaved using %" PRIuNODEID " nodes\n", nodes);

    assert(nodes <= numa_num_configured_nodes());

    vregion_flags_t flags;
    validate_page_size(&pagesize, &flags);
    size_t stride = pagesize;

    size_t node_size = size / nodes;
    node_size = (node_size + pagesize - 1) & ~(pagesize - 1);

    /* update total size as this may change due to rounding of node sizes*/
    size = nodes * node_size;

    /*
     * XXX: we may want to keep track of numa alloced frames
     */

    struct memobj_numa *memobj = calloc(1, sizeof(struct memobj_numa));
    err = memobj_create_numa(memobj, size, 0, numa_num_configured_nodes(), stride);
    if (err_is_fail(err)) {
        return NULL;
    }

    bitmap_bit_t node = bitmap_get_first(nodemask);
    nodeid_t node_idx=0;
    while(node != BITMAP_BIT_NONE) {
        struct capref frame;
        err = numa_frame_alloc_on_node(&frame, node_size, (nodeid_t)node, NULL);
        if (err_is_fail(err)) {
            DEBUG_ERR(err, "numa_frame_alloc_on_node");
            goto out_err;
        }
        memobj->m.f.fill(&memobj->m, node_idx, frame, 0);
        ++node_idx;
        node = bitmap_get_next(nodemask, node);
    }

    struct vregion *vreg = calloc(1, sizeof(struct vregion));
    if (vreg == NULL) {
        goto out_err;
    }
    err = vregion_map_aligned(vreg, get_current_vspace(), &memobj->m, 0, size,
                        flags, pagesize);
    if (err_is_fail(err)) {
        DEBUG_ERR(err, "vregion_map_aligned");
        goto out_err;
    }

    err = memobj->m.f.pagefault(&memobj->m, vreg, 0, 0);
    if (err_is_fail(err)) {
        vregion_destroy(vreg);
        free(vreg);
        DEBUG_ERR(err, "memobj.m.f.pagefault");
        goto out_err;
    }

    // XXX - Is this right?
    return (void *)(uintptr_t)vregion_get_base_addr(vreg);

    out_err:
    for (int i = 0; i < node_idx; ++i) {
        struct capref frame;
        memobj->m.f.unfill(&memobj->m, node_idx, &frame, NULL);
        cap_delete(frame);
    }
    return NULL;

}
void* StorageManager::allocateSlots(const std::size_t num_slots,
                                    const int numa_node) {
#if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB)
  static constexpr int kLargePageMmapFlags
      = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
#elif defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE)
  static constexpr int kLargePageMmapFlags
      = MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER;
#endif

  makeRoomForBlockOrBlob(num_slots);
  void *slots = nullptr;

#if defined(QUICKSTEP_HAVE_MMAP_LINUX_HUGETLB) || defined(QUICKSTEP_HAVE_MMAP_BSD_SUPERPAGE)
  slots = mmap(nullptr,
               num_slots * kSlotSizeBytes,
               PROT_READ | PROT_WRITE,
               kLargePageMmapFlags,
               -1, 0);

  // Fallback to regular mmap() if large page allocation failed. Even on
  // systems with large page support, large page allocation may fail if the
  // user running the executable is not a member of hugetlb_shm_group on Linux,
  // or if all the reserved hugepages are already in use.
  if (slots == MAP_FAILED) {
    slots = mmap(nullptr,
                 num_slots * kSlotSizeBytes,
                 PROT_READ | PROT_WRITE,
                 MAP_PRIVATE | MAP_ANONYMOUS,
                 -1, 0);
  }
  if (slots == MAP_FAILED) {
    slots = nullptr;
  }
#elif defined(QUICKSTEP_HAVE_MMAP_PLAIN)
  slots = mmap(nullptr,
               num_slots * kSlotSizeBytes,
               PROT_READ | PROT_WRITE,
               MAP_PRIVATE | MAP_ANONYMOUS,
               -1, 0);
  if (slots == MAP_FAILED) {
    slots = nullptr;
  }
#else
  slots = malloc_with_alignment(num_slots * kSlotSizeBytes,
                                kCacheLineBytes);
  if (slots != nullptr) {
    memset(slots, 0x0, num_slots * kSlotSizeBytes);
  }
#endif

  if (slots == nullptr) {
    throw OutOfMemory();
  }

#if defined(QUICKSTEP_HAVE_LIBNUMA)
  if (numa_node != -1) {
    DEBUG_ASSERT(numa_node < numa_num_configured_nodes());
    struct bitmask *numa_node_bitmask = numa_allocate_nodemask();
    // numa_node can be 0 through n-1, where n is the num of NUMA nodes.
    numa_bitmask_setbit(numa_node_bitmask, numa_node);
    long mbind_status = mbind(slots,  // NOLINT(runtime/int)
                              num_slots * kSlotSizeBytes,
                              MPOL_PREFERRED,
                              numa_node_bitmask->maskp,
                              numa_node_bitmask->size,
                              0);
    numa_free_nodemask(numa_node_bitmask);
    if (mbind_status == -1) {
      LOG(WARNING) << "mbind() failed with errno " << errno << " ("
                   << std::strerror(errno) << ")";
    }
  }
#endif  // QUICKSTEP_HAVE_LIBNUMA

  total_memory_usage_ += num_slots;
  return slots;
}
Exemple #21
0
	void p_setup()
	{
#if SWEET_THREADING || SWEET_REXI_THREAD_PARALLEL_SUM
		if (omp_in_parallel())
		{
			std::cerr << "ERROR: NUMAMemManager may not be initialized within parallel region!" << std::endl;
			std::cerr << "       Call NUMAMemManager::setup() at program start" << std::endl;
			exit(1);
		}
#endif

		if (setup_done)
			return;

		const char* env_verbosity = getenv("NUMA_BLOCK_ALLOC_VERBOSITY");
		if (env_verbosity == nullptr)
			verbosity = 0;
		else
			verbosity = atoi(env_verbosity);


#if  NUMA_BLOCK_ALLOCATOR_TYPE == 0

		if (verbosity > 0)
			std::cout << "NUMA block alloc: Using default system's allocator" << std::endl;

		num_alloc_domains = 1;
		getThreadLocalDomainIdRef() = 0;

#elif  NUMA_BLOCK_ALLOCATOR_TYPE == 1

		if (verbosity > 0)
			std::cout << "NUMA block alloc: Using NUMA node granularity" << std::endl;

		/*
		 * NUMA granularity
		 */
		num_alloc_domains = numa_num_configured_nodes();
		if (verbosity > 0)
			std::cout << "num_alloc_domains: " << num_alloc_domains << std::endl;

		// set NUMA id in case that master thread has a different id than the first thread
		int cpuid = sched_getcpu();
		getThreadLocalDomainIdRef() = numa_node_of_cpu(cpuid);


#if SWEET_THREADING || SWEET_REXI_THREAD_PARALLEL_SUM
#pragma omp parallel
		{
			int cpuid = sched_getcpu();
			getThreadLocalDomainIdRef() = numa_node_of_cpu(cpuid);
		}
#else
		getThreadLocalDomainIdRef() = 0;
#endif

#elif NUMA_BLOCK_ALLOCATOR_TYPE == 2

		if (verbosity > 0)
			std::cout << "NUMA block alloc: Using allocator based on thread granularity" << std::endl;

		/*
		 * Thread granularity, use this also per default
		 */
#if SWEET_THREADING || SWEET_REXI_THREAD_PARALLEL_SUM
		num_alloc_domains = omp_get_max_threads();
#else
		num_alloc_domains = 1;
#endif

		if (verbosity > 0)
			std::cout << "num_alloc_domains: " << num_alloc_domains << std::endl;

		// set NUMA id in case that master thread has a different id than the first thread
#if SWEET_THREADING || SWEET_REXI_THREAD_PARALLEL_SUM
		getThreadLocalDomainIdRef() = omp_get_thread_num();

#pragma omp parallel
		getThreadLocalDomainIdRef() = omp_get_thread_num();

#else
		getThreadLocalDomainIdRef() = 0;
#endif


#elif  NUMA_BLOCK_ALLOCATOR_TYPE == 3


		if (verbosity > 0)
			std::cout << "NUMA block alloc: Using non-numa single memory block chain" << std::endl;

		num_alloc_domains = 1;
		getThreadLocalDomainIdRef() = 0;

#else

#	error "Invalid NUMA_BLOCK_ALLOCATOR_TYPE"

#endif

#if SWEET_THREADING || SWEET_REXI_THREAD_PARALLEL_SUM
		if (verbosity > 0)
		{
			#pragma omp parallel
			{
				#pragma omp critical
				{
					std::cout << "	thread id " << omp_get_thread_num() << " is assigned to memory allocator domain " << getThreadLocalDomainIdRef() << std::endl;
				}
			}
		}
#endif

		domain_block_groups.resize(num_alloc_domains);

#if 0
		// TODO: care about first-touch policy
		for (auto& n : domain_block_groups)
		{
			std::size_t S = num_alloc_domains*10;

			// preallocate S different size of blocks which should be sufficient
			n.block_groups.reserve(S);
		}
#endif

		setup_done = true;
	}