END_TEST START_TEST(test_gemmBatch_3d_F) { GpuArray A; GpuArray B; GpuArray C; size_t dims[3] = {2, 3, 3}; float data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; const float res[] = {42, 78, 78, 60, 114, 114, 51, 69, 96, 66, 39, 111, 54, 54, 90, 78, 78, 132}; ga_assert_ok(GpuArray_empty(&A, ctx, GA_FLOAT, 3, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_empty(&B, ctx, GA_FLOAT, 3, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_empty(&C, ctx, GA_FLOAT, 3, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&A, data, sizeof(data))); ga_assert_ok(GpuArray_write(&B, data, sizeof(data))); ga_assert_ok(GpuArray_rgemmBatch_3d(cb_no_trans, cb_no_trans, 1, &A, &B, 0, &C, 0)); ga_assert_ok(GpuArray_read(data, sizeof(data), &C)); ck_assert_fbuf_eq(data, res, sizeof(res)/sizeof(float)); }
END_TEST START_TEST(test_basic_simple) { GpuArray a; GpuArray b; GpuArray c; GpuElemwise *ge; static const uint32_t data1[3] = {1, 2, 3}; static const uint32_t data2[3] = {4, 5, 6}; uint32_t data3[3] = {0}; size_t dims[2]; gpuelemwise_arg args[3] = {{0}}; void *rargs[3]; dims[0] = 1; dims[1] = 3; ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 2, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1))); ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 2, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2))); ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 2, dims, GA_C_ORDER)); args[0].name = "a"; args[0].typecode = GA_UINT; args[0].flags = GE_READ; args[1].name = "b"; args[1].typecode = GA_UINT; args[1].flags = GE_READ; args[2].name = "c"; args[2].typecode = GA_UINT; args[2].flags = GE_WRITE; ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 2, 0); ck_assert_ptr_ne(ge, NULL); rargs[0] = &a; rargs[1] = &b; rargs[2] = &c; ga_assert_ok(GpuElemwise_call(ge, rargs, GE_NOCOLLAPSE)); ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c)); ck_assert_int_eq(data3[0], 5); ck_assert_int_eq(data3[1], 7); ck_assert_int_eq(data3[2], 9); }
int GpuArray_fdump(FILE *fd, const GpuArray *a) { char *buf, *p; size_t s = GpuArray_ITEMSIZE(a); size_t k; unsigned int i; int err; for (i = 0; i < a->nd; i++) s *= a->dimensions[i]; buf = malloc(s); if (buf == NULL) return GA_MEMORY_ERROR; err = GpuArray_read(buf, s, a); if (err != GA_NO_ERROR) { free(buf); return err; } p = buf; k = 0; while (s) { fprintf(fd, "[%" SPREFIX "u] = ", k); switch (a->typecode) { case GA_UINT: fprintf(fd, "%u", *(unsigned int *)p); break; case GA_LONG: fprintf(fd, "%lld", (long long)*(int64_t *)p); break; case GA_FLOAT: fprintf(fd, "%f", *(float *)p); break; case GA_SSIZE: fprintf(fd, "%" SPREFIX "d", *(ssize_t *)p); break; default: free(buf); fprintf(fd, "<unsupported data type %d>\n", a->typecode); return GA_UNSUPPORTED_ERROR; } s -= gpuarray_get_elsize(a->typecode); p += gpuarray_get_elsize(a->typecode); k++; fprintf(fd, "\n"); } free(buf); return GA_NO_ERROR; }
END_TEST START_TEST(test_gemmBatch_3d_S) { GpuArray A; GpuArray B; GpuArray C; ssize_t t; size_t dims[3] = {2, 3, 3}; float data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; const float res[] = {14, 32, 50, 50, 122, 194, 32, 77, 122, 26, 62, 98, 17, 53, 89, 44, 107, 170}; ga_assert_ok(GpuArray_empty(&A, ctx, GA_FLOAT, 3, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_empty(&B, ctx, GA_FLOAT, 3, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_empty(&C, ctx, GA_FLOAT, 3, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&A, data, sizeof(data))); ga_assert_ok(GpuArray_write(&B, data, sizeof(data))); A.strides[0] = 8; A.strides[1] = 24; A.strides[2] = 4; GpuArray_fix_flags(&A); t = B.strides[1]; B.strides[1] = B.strides[2]; B.strides[2] = t; GpuArray_fix_flags(&B); ga_assert_ok(GpuArray_rgemmBatch_3d(cb_no_trans, cb_no_trans, 1, &A, &B, 0, &C, 1)); ga_assert_ok(GpuArray_read(data, sizeof(data), &C)); ck_assert_fbuf_eq(data, res, sizeof(res)/sizeof(float)); }
END_TEST START_TEST(test_basic_neg_strides) { GpuArray a; GpuArray b; GpuArray c; GpuElemwise *ge; static const uint32_t data1[6] = {1, 2, 3, 4, 5, 6}; static const uint32_t data2[6] = {7, 8, 9, 10, 11, 12}; uint32_t data3[6] = {0}; size_t dims[1]; gpuelemwise_arg args[3] = {{0}}; void *rargs[3]; ssize_t starts[1]; ssize_t stops[1]; ssize_t steps[1]; dims[0] = 6; ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 1, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1))); ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 1, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2))); starts[0] = 5; stops[0] = -1; steps[0] = -1; ga_assert_ok(GpuArray_index_inplace(&b, starts, stops, steps)); ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 1, dims, GA_C_ORDER)); args[0].name = "a"; args[0].typecode = GA_UINT; args[0].flags = GE_READ; args[1].name = "b"; args[1].typecode = GA_UINT; args[1].flags = GE_READ; args[2].name = "c"; args[2].typecode = GA_UINT; args[2].flags = GE_WRITE; ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 1, 0); ck_assert_ptr_ne(ge, NULL); rargs[0] = &a; rargs[1] = &b; rargs[2] = &c; ga_assert_ok(GpuElemwise_call(ge, rargs, 0)); ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c)); ck_assert_int_eq(data3[0], 13); ck_assert_int_eq(data3[1], 13); ck_assert_int_eq(data3[2], 13); ck_assert_int_eq(data3[3], 13); ck_assert_int_eq(data3[4], 13); ck_assert_int_eq(data3[5], 13); }
END_TEST START_TEST(test_basic_remove1) { GpuArray a; GpuArray b; GpuArray c; GpuElemwise *ge; static const uint32_t data1[6] = {1, 2, 3, 4, 5, 6}; static const uint32_t data2[6] = {7, 8, 9, 10, 11, 12}; uint32_t data3[6] = {0}; size_t dims[4]; gpuelemwise_arg args[3] = {{0}}; void *rargs[3]; dims[0] = 1; dims[1] = 3; dims[2] = 2; dims[3] = 1; ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 4, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1))); ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 4, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2))); ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 4, dims, GA_C_ORDER)); args[0].name = "a"; args[0].typecode = GA_UINT; args[0].flags = GE_READ; args[1].name = "b"; args[1].typecode = GA_UINT; args[1].flags = GE_READ; args[2].name = "c"; args[2].typecode = GA_UINT; args[2].flags = GE_WRITE; ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 0, 0); ck_assert_ptr_ne(ge, NULL); rargs[0] = &a; rargs[1] = &b; rargs[2] = &c; ga_assert_ok(GpuElemwise_call(ge, rargs, 0)); ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c)); ck_assert_int_eq(data3[0], 8); ck_assert_int_eq(data3[1], 12); ck_assert_int_eq(data3[2], 11); ck_assert_int_eq(data3[3], 15); ck_assert_int_eq(data3[4], 14); ck_assert_int_eq(data3[5], 18); }