Example #1
0
int read_csr_mat(const char *file_name, struct csr_mat_t *mat)
{
    FILE *fp = fopen(file_name, "rb");
    if (fp == NULL)
    {
        return -1;
    }

    fread(&mat->rows, sizeof(int), 1, fp);
    fread(&mat->cols, sizeof(int), 1, fp);
    fread(&mat->non_zeros, sizeof(INT64), 1, fp);

    mat->row_ptr = (DWORD*)numa_alloc((mat->rows + 1) * sizeof(DWORD));
    mat->col_idx = (int*)numa_alloc(mat->non_zeros * sizeof(int));
    mat->vals    = (FLOAT*)numa_alloc(mat->non_zeros * sizeof(FLOAT));

    fread(mat->row_ptr, sizeof(DWORD), mat->rows + 1, fp);
    fread(mat->col_idx, sizeof(int), mat->non_zeros, fp);
    fread(mat->vals, sizeof(FLOAT), mat->non_zeros, fp);

    printf("Row x Column: %d x %d\n", mat->rows, mat->cols);
    printf("Non-zero elements number: %ld\n", mat->non_zeros);

    return 0;
}
Example #2
0
int csr_reorder(struct csr_mat_t *csr, struct csr_mat_t *csr_re, int *reorder_map)
{
    int *row_len = (int*)malloc(csr->rows * sizeof(int));
    int i, j;
    for (i = 0; i < csr->rows; i++)
    {
        reorder_map[i] = i;
        row_len[i] = csr->row_ptr[i + 1] - csr->row_ptr[i];
    }
    row_sort(row_len, reorder_map, csr->rows);

    csr_re->rows = csr->rows;
    csr_re->cols = csr->cols;
    csr_re->non_zeros = csr->non_zeros;

    csr_re->row_ptr = (DWORD*)numa_alloc((csr_re->rows + 1) * sizeof(DWORD));
    csr_re->col_idx = (int*)numa_alloc(csr_re->non_zeros * sizeof(int));
    csr_re->vals = (FLOAT*)numa_alloc(csr_re->non_zeros * sizeof(FLOAT));

    int idx = 0;
    csr_re->row_ptr[0] = 0;
    for (i = 0; i < csr_re->rows; i++)
    {
        memcpy(csr_re->col_idx + idx, csr->col_idx + csr->row_ptr[reorder_map[i]], row_len[i] * sizeof(int));
        memcpy(csr_re->vals + idx, csr->vals + csr->row_ptr[reorder_map[i]], row_len[i] * sizeof(FLOAT));
        idx += row_len[i];
        csr_re->row_ptr[i + 1] = idx;
    }

    free(row_len);

    return 0;
}
Example #3
0
JNIEXPORT jlong JNICALL Java_xerial_jnuma_NumaNative_allocMemory
(JNIEnv *env, jobject obj, jlong capacity) {
    void* mem = numa_alloc((size_t) capacity);
    if(mem == NULL)
        printf("failed to allocate local memory\n");
    return (jlong) mem;
}
Example #4
0
/*
 * Class:     xerial_jnuma_NumaNative
 * Method:    alloc
 * Signature: (I)Ljava/nio/ByteBuffer;
 */
JNIEXPORT jobject JNICALL Java_xerial_jnuma_NumaNative_alloc
(JNIEnv *env, jobject obj, jint capacity) {
    void* mem = numa_alloc((size_t) capacity);
    //printf("allocate local memory\n");
    if(mem == NULL)
        printf("failed to allocate local memory\n");
    return (*env)->NewDirectByteBuffer(env, mem, (jlong) capacity);
}
Example #5
0
/*
 * Class:     xerial_jnuma_NumaNative
 * Method:    allocate
 * Signature: (J)J
 */
JNIEXPORT jlong JNICALL Java_xerial_jnuma_NumaNative_allocate
    (JNIEnv *env, jobject obj, jlong capacity) {
  void* mem = numa_alloc((size_t) capacity);
  if(mem != NULL) {
    return (jlong) mem;
  }
  throwException(env, obj, 11);
  return 0L;
}
Example #6
0
int csr_transpose(struct csr_mat_t *csr, struct csr_mat_t *csr_t)
{
    csr_t->cols = csr->rows;
    csr_t->rows = csr->cols;
    csr_t->non_zeros = csr->non_zeros;

    csr_t->row_ptr = (DWORD*)numa_alloc((csr_t->rows + 1) * sizeof(DWORD));
    csr_t->col_idx = (int*)numa_alloc(csr_t->non_zeros * sizeof(int));
    csr_t->vals = (FLOAT*)numa_alloc(csr_t->non_zeros * sizeof(FLOAT));
    memset(csr_t->row_ptr, 0, (csr_t->rows + 1) * sizeof(DWORD));

    int i, j;
    for (i = 0; i < csr->rows; i++)
    {
        for (j = csr->row_ptr[i]; j < csr->row_ptr[i + 1]; j++)
        {
            csr_t->row_ptr[csr->col_idx[j] + 1]++;
        }
    }
    for (i = 1; i <= csr_t->rows; i++)
    {
        csr_t->row_ptr[i] += csr_t->row_ptr[i - 1];
    }

    int *row_start = (int*)malloc(csr_t->rows * sizeof(int));
    memcpy(row_start, csr_t->row_ptr, csr_t->rows * sizeof(int));

    for (i = 0; i < csr->rows; i++)
    {
        for (j = csr->row_ptr[i]; j < csr->row_ptr[i + 1]; j++)
        {
            int row = row_start[csr->col_idx[j]];
            csr_t->col_idx[row] = i;
            csr_t->vals[row] = csr->vals[j];
            row_start[csr->col_idx[j]]++;
        }
    }

    free(row_start);
    return 0;
}
Example #7
0
int split_csr_lb_nz(struct csr_mat_t *csr, struct csr_cont_t *csr_cont, int count, split_dir_t dir)
{
    int i, j;

    csr_cont->dir = dir;
    csr_cont->count = count;
    csr_cont->split_idx = (int*)numa_alloc((count + 1) * sizeof(int));
    csr_cont->csrs = (struct csr_mat_t*)numa_alloc(count * sizeof(struct csr_mat_t));

    if (dir == SPLIT_HORIZON)
    {
        struct csr_mat_t *csrs = csr_cont->csrs;
        int *split_idx = csr_cont->split_idx;
        split_idx[0] = 0;

        int avg_ele = csr->non_zeros / count, split_val;
        for (i = 1, j = 1; i < count; i++)
        {
            split_val = i * avg_ele;
            while (csr->row_ptr[j] < split_val)
            {
                j++;
            }
            if (csr->row_ptr[j] - split_val > split_val - csr->row_ptr[j - 1])
            {
                j--;
            }
            split_idx[i] = j;
        }
        split_idx[i] = csr->rows;

        int item_idx = 0;
        for (i = 0; i < count; i++)
        {
            csrs[i].rows = split_idx[i + 1] - split_idx[i];
            printf("csrs[%d].rows = %d\n", i, csrs[i].rows);
            csrs[i].cols = csr->cols;
            csrs[i].non_zeros = csr->row_ptr[split_idx[i + 1]] - csr->row_ptr[split_idx[i]];

            csrs[i].row_ptr = (DWORD*)numa_alloc((csrs[i].rows + 1) * sizeof(DWORD));
            csrs[i].col_idx = (int*)numa_alloc(csrs[i].non_zeros * sizeof(int));
            csrs[i].vals = (FLOAT*)numa_alloc(csrs[i].non_zeros * sizeof(FLOAT));

            memcpy(csrs[i].row_ptr, csr->row_ptr + split_idx[i], (csrs[i].rows + 1) * sizeof(DWORD));
            memcpy(csrs[i].col_idx, csr->col_idx + item_idx, csrs[i].non_zeros * sizeof(int));
            memcpy(csrs[i].vals, csr->vals + item_idx, csrs[i].non_zeros * sizeof(FLOAT));

            for (j = 0; j <= csrs[i].rows; j++)
            {
                csrs[i].row_ptr[j] -= csr->row_ptr[split_idx[i]];
            }
            item_idx += csrs[i].non_zeros;
        }
    }
    else if (dir == SPLIT_VERTICAL)
    {
        struct csr_mat_t *csrs = csr_cont->csrs;
        int *split_idx = csr_cont->split_idx;
        split_idx[0] = 0;

        INT64 *col_cnt = (INT64*)calloc((csr->cols + 1), sizeof(INT64));
        for (i = 0; i < csr->rows; i++)
        {
            for (j = csr->row_ptr[i]; j < csr->row_ptr[i + 1]; j++)
            {
                col_cnt[csr->col_idx[j] + 1]++;
            }
        }

        int avg_ele = csr->non_zeros / count, split_val;
        int cur_col = 0;
        for (i = 1, j = 1; i < count; i++)
        {
            split_val = i * avg_ele;
            do 
            {
                cur_col += col_cnt[j++];
            }
            while (cur_col < split_val);
            if (cur_col - split_val > split_val - (cur_col - col_cnt[j]))
            {
                cur_col -= col_cnt[j--];
            }
            split_idx[i] = j;
        }
        split_idx[i] = csr->cols;

        for (i = 0; i < csr->cols; i++)
        {
            col_cnt[i + 1] += col_cnt[i];
        }

        for (i = 0; i < count; i++)
        {
            csrs[i].rows = csr->rows;
            csrs[i].cols = split_idx[i + 1] - split_idx[i];
            csrs[i].non_zeros = col_cnt[split_idx[i + 1]] - col_cnt[split_idx[i]];
            csrs[i].row_ptr = (DWORD*)numa_alloc((csrs[i].rows + 1) * sizeof(DWORD));
            csrs[i].col_idx = (int*)numa_alloc(csrs[i].non_zeros * sizeof(int));
            csrs[i].vals = (FLOAT*)numa_alloc(csrs[i].non_zeros * sizeof(FLOAT));
            memset(csrs[i].row_ptr, 0, (csrs[i].rows + 1) * sizeof(DWORD));
        }

        int col, k;
        for (i = 0; i < csr->rows; i++)
        {
            for (j = 0; j < count; j++)
            {
                csrs[j].row_ptr[i + 1] = csrs[j].row_ptr[i];
            }
            for (j = csr->row_ptr[i]; j < csr->row_ptr[i + 1]; j++)
            {
                col = csr->col_idx[j];
                for (k = 0; k < count; k++)
                {
                    if (col < split_idx[k + 1])
                    {
                        break;
                    }
                }
                csrs[k].col_idx[csrs[k].row_ptr[i + 1]] = csr->col_idx[j] - split_idx[k];
                csrs[k].vals[csrs[k].row_ptr[i + 1]] = csr->vals[j];
                csrs[k].row_ptr[i + 1]++;
            }
        }
        
        free(col_cnt);
    }

    return 0;
}
Example #8
0
void INTERNAL *qt_affinity_alloc(size_t bytes)
{                                      /*{{{ */
    return numa_alloc(bytes);
}                                      /*}}} */
Example #9
0
File: main.c Project: pigirons/spmv
int main(int argc, char *argv[])
{
    if (argc != 2)
    {
        fprintf(stderr, "usage: %s csr_matrix_file\n", argv[0]);
        exit(0);
    }

    int i, j, k;

    struct timespec start, end;

    int num_threads = 1;
#pragma omp parallel
    {
#pragma omp master
        {
            num_threads = omp_get_num_threads();
        }
    }
    printf("Thread number: %d.\n", num_threads);

#pragma omp parallel for
    for (i = 0; i < num_threads; i++)
    {
        int cpu = omp_get_thread_num();
        thread_bind(cpu);
    }

    FILE *fp;

    struct csr_mat_t csr, csr_re, csr_t, csr_t_re, csr_t_t;
    struct blk_mat_t blk;

    struct csr_cont_t csr_h, csr_v;
    struct blk_cont_t blk_h, blk_t_h;

    read_csr_mat(argv[1], &csr);
    int rows = csr.rows;
    int cols = csr.cols;
    INT64 non_zeros = csr.non_zeros;

    csr_transpose(&csr, &csr_t);
    release_csr_mat(&csr);
    int *reorder_map = (int*)malloc(cols * sizeof(int));
    csr_reorder(&csr_t, &csr_re, reorder_map);
    release_csr_mat(&csr_t);
    csr_transpose(&csr_re, &csr_t_t);
    release_csr_mat(&csr_re);
    split_csr_lb_nz(&csr_t_t, &csr_h, num_threads, SPLIT_HORIZON);
    release_csr_mat(&csr_t_t);
    csr_cont_to_blk_cont(&csr_h, &blk_h);
    release_csr_cont(&csr_h);

    printf("Notify: finished the preprocessing.\n");

    FLOAT *x = (FLOAT*)numa_alloc(cols * sizeof(FLOAT));
    FLOAT *y = (FLOAT*)numa_alloc(rows * sizeof(FLOAT));

    for (i = 0; i < cols; i++)
    {
        x[i] = 1.0;
    }

    // warm up
    spmv_blks(&blk_h, x, y, NULL);

    printf("Notify: begin csr spmv.\n");
    clock_gettime(CLOCK_MONOTONIC_RAW, &start);
    for (i = 0; i < LOOP_TIME; i++)
    {
        spmv_blks(&blk_h, x, y, NULL);
    }
    clock_gettime(CLOCK_MONOTONIC_RAW, &end);
    double time = get_sec(&start, &end) / LOOP_TIME;
    double gflops = 2.0 * non_zeros / time * 1e-9;
    printf("Notify: blk spmv time = %lfs, perf = %lf GFLOPS.\n", time, gflops);
    // result_file(y, rows);

    return 0;
}
Example #10
0
	static
	inline
	T *alloc(
			std::size_t i_size		///< size of block
	)
	{
		T *data = nullptr;

#if NUMA_BLOCK_ALLOCATOR_TYPE == 1 || NUMA_BLOCK_ALLOCATOR_TYPE == 2

#	if NUMA_BLOCK_ALLOCATOR_TYPE == 1
		// dummy call here to initialize this class as part of the singleton out of critical region
		getSingletonRef();

#	if SWEET_THREADING || SWEET_REXI_THREAD_PARALLEL_SUM
#		pragma omp critical
#	endif

#	endif
		{
			std::vector<void*>& block_list = getBlocksSameSize(i_size);

			if (block_list.size() > 0)
			{
				data = (T*)block_list.back();
				block_list.pop_back();
			}
		}

		if (data != nullptr)
			return data;

		return (T*)first_touch_init(numa_alloc(i_size), i_size);

#elif NUMA_BLOCK_ALLOCATOR_TYPE == 3

#if SWEET_THREADING || SWEET_REXI_THREAD_PARALLEL_SUM
#	pragma omp critical
#endif
		{
			std::vector<void*>& block_list = getBlocksSameSize(i_size);

			if (block_list.size() > 0)
			{
				data = (T*)block_list.back();
				block_list.pop_back();
			}
		}

		if (data != nullptr)
			return data;

		int retval = posix_memalign((void**)&data, 4096, i_size);
		if (retval != 0)
		{
			std::cerr << "Unable to allocate memory" << std::endl;
			assert(false);
			exit(-1);
		}

		first_touch_init(data, i_size);
		return data;

#else

		// allocate a new element to the list of blocks given in block_list

		// posix_memalign is thread safe
		// http://www.qnx.com/developers/docs/6.3.0SP3/neutrino/lib_ref/p/posix_memalign.html
		int retval = posix_memalign((void**)&data, 4096, i_size);
		if (retval != 0)
		{
			std::cerr << "Unable to allocate memory" << std::endl;
			assert(false);
			exit(-1);
		}

		first_touch_init(data, i_size);
		return data;

#endif
	}