END_TEST START_TEST(test_gemmBatch_3d_F) { GpuArray A; GpuArray B; GpuArray C; size_t dims[3] = {2, 3, 3}; float data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; const float res[] = {42, 78, 78, 60, 114, 114, 51, 69, 96, 66, 39, 111, 54, 54, 90, 78, 78, 132}; ga_assert_ok(GpuArray_empty(&A, ctx, GA_FLOAT, 3, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_empty(&B, ctx, GA_FLOAT, 3, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_empty(&C, ctx, GA_FLOAT, 3, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&A, data, sizeof(data))); ga_assert_ok(GpuArray_write(&B, data, sizeof(data))); ga_assert_ok(GpuArray_rgemmBatch_3d(cb_no_trans, cb_no_trans, 1, &A, &B, 0, &C, 0)); ga_assert_ok(GpuArray_read(data, sizeof(data), &C)); ck_assert_fbuf_eq(data, res, sizeof(res)/sizeof(float)); }
END_TEST START_TEST(test_take1_offset) { const uint32_t data[4] = {0, 1, 2, 3}; const size_t data_dims[1] = {4}; const size_t out_dims[1] = {2}; const uint32_t idx[4] = {20, 3, 3, 2}; GpuArray v; GpuArray i; GpuArray r; ga_assert_ok(GpuArray_empty(&v, ctx, GA_UINT, 1, data_dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&v, data, sizeof(data))); ga_assert_ok(GpuArray_empty(&i, ctx, GA_UINT, 1, data_dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&i, idx, sizeof(idx))); ga_assert_ok(GpuArray_empty(&r, ctx, GA_UINT, 1, out_dims, GA_C_ORDER)); /* Fake subtensor for offset */ i.offset = 8; i.dimensions[0] = 2; ga_assert_ok(GpuArray_take1(&r, &v, &i, 1)); /* The actual results are not important, this is just to check that we don't trigger the out of bounds check */ }
END_TEST START_TEST(test_basic_simple) { GpuArray a; GpuArray b; GpuArray c; GpuElemwise *ge; static const uint32_t data1[3] = {1, 2, 3}; static const uint32_t data2[3] = {4, 5, 6}; uint32_t data3[3] = {0}; size_t dims[2]; gpuelemwise_arg args[3] = {{0}}; void *rargs[3]; dims[0] = 1; dims[1] = 3; ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 2, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1))); ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 2, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2))); ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 2, dims, GA_C_ORDER)); args[0].name = "a"; args[0].typecode = GA_UINT; args[0].flags = GE_READ; args[1].name = "b"; args[1].typecode = GA_UINT; args[1].flags = GE_READ; args[2].name = "c"; args[2].typecode = GA_UINT; args[2].flags = GE_WRITE; ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 2, 0); ck_assert_ptr_ne(ge, NULL); rargs[0] = &a; rargs[1] = &b; rargs[2] = &c; ga_assert_ok(GpuElemwise_call(ge, rargs, GE_NOCOLLAPSE)); ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c)); ck_assert_int_eq(data3[0], 5); ck_assert_int_eq(data3[1], 7); ck_assert_int_eq(data3[2], 9); }
END_TEST START_TEST(test_gemmBatch_3d_S) { GpuArray A; GpuArray B; GpuArray C; ssize_t t; size_t dims[3] = {2, 3, 3}; float data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3, 4, 5, 6, 7, 8, 9}; const float res[] = {14, 32, 50, 50, 122, 194, 32, 77, 122, 26, 62, 98, 17, 53, 89, 44, 107, 170}; ga_assert_ok(GpuArray_empty(&A, ctx, GA_FLOAT, 3, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_empty(&B, ctx, GA_FLOAT, 3, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_empty(&C, ctx, GA_FLOAT, 3, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&A, data, sizeof(data))); ga_assert_ok(GpuArray_write(&B, data, sizeof(data))); A.strides[0] = 8; A.strides[1] = 24; A.strides[2] = 4; GpuArray_fix_flags(&A); t = B.strides[1]; B.strides[1] = B.strides[2]; B.strides[2] = t; GpuArray_fix_flags(&B); ga_assert_ok(GpuArray_rgemmBatch_3d(cb_no_trans, cb_no_trans, 1, &A, &B, 0, &C, 1)); ga_assert_ok(GpuArray_read(data, sizeof(data), &C)); ck_assert_fbuf_eq(data, res, sizeof(res)/sizeof(float)); }
END_TEST START_TEST(test_basic_neg_strides) { GpuArray a; GpuArray b; GpuArray c; GpuElemwise *ge; static const uint32_t data1[6] = {1, 2, 3, 4, 5, 6}; static const uint32_t data2[6] = {7, 8, 9, 10, 11, 12}; uint32_t data3[6] = {0}; size_t dims[1]; gpuelemwise_arg args[3] = {{0}}; void *rargs[3]; ssize_t starts[1]; ssize_t stops[1]; ssize_t steps[1]; dims[0] = 6; ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 1, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1))); ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 1, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2))); starts[0] = 5; stops[0] = -1; steps[0] = -1; ga_assert_ok(GpuArray_index_inplace(&b, starts, stops, steps)); ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 1, dims, GA_C_ORDER)); args[0].name = "a"; args[0].typecode = GA_UINT; args[0].flags = GE_READ; args[1].name = "b"; args[1].typecode = GA_UINT; args[1].flags = GE_READ; args[2].name = "c"; args[2].typecode = GA_UINT; args[2].flags = GE_WRITE; ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 1, 0); ck_assert_ptr_ne(ge, NULL); rargs[0] = &a; rargs[1] = &b; rargs[2] = &c; ga_assert_ok(GpuElemwise_call(ge, rargs, 0)); ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c)); ck_assert_int_eq(data3[0], 13); ck_assert_int_eq(data3[1], 13); ck_assert_int_eq(data3[2], 13); ck_assert_int_eq(data3[3], 13); ck_assert_int_eq(data3[4], 13); ck_assert_int_eq(data3[5], 13); }
END_TEST START_TEST(test_basic_remove1) { GpuArray a; GpuArray b; GpuArray c; GpuElemwise *ge; static const uint32_t data1[6] = {1, 2, 3, 4, 5, 6}; static const uint32_t data2[6] = {7, 8, 9, 10, 11, 12}; uint32_t data3[6] = {0}; size_t dims[4]; gpuelemwise_arg args[3] = {{0}}; void *rargs[3]; dims[0] = 1; dims[1] = 3; dims[2] = 2; dims[3] = 1; ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 4, dims, GA_C_ORDER)); ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1))); ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 4, dims, GA_F_ORDER)); ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2))); ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 4, dims, GA_C_ORDER)); args[0].name = "a"; args[0].typecode = GA_UINT; args[0].flags = GE_READ; args[1].name = "b"; args[1].typecode = GA_UINT; args[1].flags = GE_READ; args[2].name = "c"; args[2].typecode = GA_UINT; args[2].flags = GE_WRITE; ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 0, 0); ck_assert_ptr_ne(ge, NULL); rargs[0] = &a; rargs[1] = &b; rargs[2] = &c; ga_assert_ok(GpuElemwise_call(ge, rargs, 0)); ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c)); ck_assert_int_eq(data3[0], 8); ck_assert_int_eq(data3[1], 12); ck_assert_int_eq(data3[2], 11); ck_assert_int_eq(data3[3], 15); ck_assert_int_eq(data3[4], 14); ck_assert_int_eq(data3[5], 18); }