Exemplo n.º 1
0
/*
 * Class:     xerial_jnuma_NumaNative
 * Method:    allocateLocal
 * Signature: (J)J
 */
JNIEXPORT jlong JNICALL Java_xerial_jnuma_NumaNative_allocateLocal
    (JNIEnv *env, jobject obj, jlong capacity) {
  void* mem = numa_alloc_local((size_t) capacity);
  if(mem != NULL) {
    return (jlong) mem;
  }
  throwException(env, obj, 11);
  return 0L;
}
Exemplo n.º 2
0
JNIEXPORT jobject JNICALL Java_xerial_jnuma_NumaNative_allocLocal
(JNIEnv *env, jobject jobj, jint capacity)
{
    void* mem = numa_alloc_local((size_t) capacity);
    //printf("allocate local memory\n");
    if(mem == NULL)
        printf("failed to allocate local memory\n");
    return (*env)->NewDirectByteBuffer(env, mem, (jlong) capacity);
}
Exemplo n.º 3
0
/* nrealloc() - allocates a new buffer */
void numa_allocator::nrealloc(void) {
	// increase size of our old_buffers to store the previously allocated memory
	num_buffers++;
	if(other_buffers == NULL) {
		assert(num_buffers == 1);
		other_buffers = (void**)malloc(num_buffers * sizeof(void*));
		*other_buffers = buf_start;
	} else {
		void** new_bufs = (void**)malloc(num_buffers * sizeof(void*));
		for(int i = 0; i < num_buffers - 1; ++i) {
			new_bufs[i] = other_buffers[i];
		}
		new_bufs[num_buffers-1] = buf_start;
		free(other_buffers);
		other_buffers = new_bufs;
	}

	// allocate new buffer & update pointers and total size
	buf_cur = buf_start = numa_alloc_local(buf_size);
}
Exemplo n.º 4
0
/* Constructor */
numa_allocator::numa_allocator(unsigned ssize)
	:buf_size(ssize), empty(false), num_buffers(0), buf_old(NULL),
	 other_buffers(NULL), last_alloc_half(false), cache_size(CACHE_LINE_SIZE)
{
	buf_cur = buf_start = numa_alloc_local(buf_size);
}
Exemplo n.º 5
0
int main(int argc, const char **argv)
{
  int num_cpus = numa_num_task_cpus();
  printf("num cpus: %d\n", num_cpus);

  printf("numa available: %d\n", numa_available());
  numa_set_localalloc();

  struct bitmask *bm = numa_bitmask_alloc(num_cpus);
  for (int i=0; i<=numa_max_node(); ++i)
  {
    numa_node_to_cpus(i, bm);
    printf("numa node %d ", i);
    print_bitmask(bm);
    printf(" - %g GiB\n", numa_node_size(i, 0) / (1024.*1024*1024.));
  }
  numa_bitmask_free(bm);

  puts("");

  char *x;
  const size_t cache_line_size = 64;
  const size_t array_size = 100*1000*1000;
  size_t ntrips = 2;

#pragma omp parallel
  {
    assert(omp_get_num_threads() == num_cpus);
    int tid = omp_get_thread_num();

    pin_to_core(tid);
    if(tid == 0)
      x = (char *) numa_alloc_local(array_size);

    // {{{ single access
#pragma omp barrier
    for (size_t i = 0; i<num_cpus; ++i)
    {
      if (tid == i)
      {
        double t = measure_access(x, array_size, ntrips);
        printf("sequential core %d -> core 0 : BW %g MB/s\n",
            i, array_size*ntrips*cache_line_size / t / 1e6);
      }
#pragma omp barrier
    }
    // }}}

    // {{{ everybody contends for one

    {
      if (tid == 0) puts("");

#pragma omp barrier
      double t = measure_access(x, array_size, ntrips);
#pragma omp barrier
      for (size_t i = 0; i<num_cpus; ++i)
      {
        if (tid == i)
          printf("all-contention core %d -> core 0 : BW %g MB/s\n",
              tid, array_size*ntrips*cache_line_size / t / 1e6);
#pragma omp barrier
      }
    }

    // }}}

    // {{{ zero and someone else contending

    if (tid == 0) puts("");

#pragma omp barrier
    for (size_t i = 1; i<num_cpus; ++i)
    {
      double t;
      if (tid == i || tid == 0)
        t = measure_access(x, array_size, ntrips);

#pragma omp barrier
      if (tid == 0)
      {
        printf("two-contention core %d -> core 0 : BW %g MB/s\n",
            tid, array_size*ntrips*cache_line_size / t / 1e6);
      }
#pragma omp barrier
      if (tid == i)
      {
        printf("two-contention core %d -> core 0 : BW %g MB/s\n\n",
            tid, array_size*ntrips*cache_line_size / t / 1e6);
      }
#pragma omp barrier
    }
  }
  numa_free(x, array_size);

  return 0;
}
Exemplo n.º 6
0
int csr_to_blk(struct csr_mat_t *csr, struct blk_mat_t *blk)
{
    blk->rows = csr->rows;
    blk->cols = csr->cols;
    blk->non_zeros = csr->non_zeros;

    int blk_row = (csr->rows + BLOCK_SIZE - 1) / BLOCK_SIZE;
    int blk_col = (csr->cols + BLOCK_SIZE - 1) / BLOCK_SIZE;

    printf("Notify: blk_row = %d, blk_col = %d.\n", blk_row, blk_col);

    int blk_num = blk_row * blk_col;

    blk->types = (blk_type_t*)numa_alloc_local(blk_num * sizeof(blk_type_t));
    blk->row_id = (DWORD*)numa_alloc_local((blk_num + 1) * sizeof(DWORD));
    blk->col_idx = (WORD*)numa_alloc_local(blk->non_zeros * sizeof(DWORD));
    blk->vals = (FLOAT*)numa_alloc_local(blk->non_zeros * sizeof(FLOAT));
    blk->row_id[0] = 0;

    int *blk_cnt = (int*)calloc(blk_num, sizeof(int));
    int idx;
    int i, j;
    int x, y;
    for (i = 0; i < csr->rows; i++)
    {
        x = i / BLOCK_SIZE;
        for (j = csr->row_ptr[i]; j < csr->row_ptr[i + 1]; j++)
        {
            y = csr->col_idx[j] / BLOCK_SIZE;
            idx = x * blk_col + y;
            blk_cnt[idx]++;
        }
    }

    int block_size;
    for (i = 0; i < blk_row; i++)
    {
        for (j = 0; j < blk_col; j++)
        {
            // get the real block size
            block_size = blk->rows - i * BLOCK_SIZE;
            if (block_size > BLOCK_SIZE)
            {
                block_size = BLOCK_SIZE;
            }

            idx = i * blk_col + j;

            if (blk_cnt[idx] >= block_size * CSR_THRESHOLD)
            {
                blk->row_id[idx + 1] = blk->row_id[idx] + block_size;
                blk->types[idx]= BLK_CSR;
            }
            else
            {
                blk->row_id[idx + 1] = blk->row_id[idx] + blk_cnt[idx];
                blk->types[idx]= BLK_COO;
            }
        }
    }
    blk->row_info = (WORD*)numa_alloc_local(blk->row_id[blk_num] * sizeof(WORD));
    memset(blk->row_info, 0, blk->row_id[blk_num] * sizeof(WORD));

    int *blk_idx = (int*)calloc(blk_num, sizeof(int));
    INT64 *blk_pos = (INT64*)malloc(blk_num * sizeof(INT64));
    blk_pos[0] = 0;
    for (i = 1; i < blk_num; i++)
    {
        blk_pos[i] = blk_pos[i - 1] + blk_cnt[i - 1];
    }

    WORD *cur_row_info;
    WORD *cur_col_idx;
    FLOAT *cur_vals;
    for (i = 0; i < csr->rows; i++)
    {
        x = i / BLOCK_SIZE;
        for (j = csr->row_ptr[i]; j < csr->row_ptr[i + 1]; j++)
        {
            y = csr->col_idx[j] / BLOCK_SIZE;
            idx = x * blk_col + y;

            cur_row_info = blk->row_info + blk->row_id[idx];
            cur_col_idx = blk->col_idx + blk_pos[idx];
            cur_vals = blk->vals + blk_pos[idx];

            if (blk->types[idx] == BLK_COO)
            {
                cur_row_info[blk_idx[idx]] = i - x * BLOCK_SIZE;
            }
            else if (blk->types[idx] == BLK_CSR)
            {
                cur_row_info[i - x * BLOCK_SIZE]++;
            }
            cur_col_idx[blk_idx[idx]] = csr->col_idx[j] - y * BLOCK_SIZE;
            cur_vals[blk_idx[idx]] = csr->vals[j];
            blk_idx[idx]++;
        }
    }

    free(blk_cnt);
    free(blk_idx);
    free(blk_pos);

    return 0;
}