예제 #1
0
END_TEST

START_TEST(test_gemmBatch_3d_F) {
  GpuArray A;
  GpuArray B;
  GpuArray C;

  size_t dims[3] = {2, 3, 3};
  float data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9,
                  1, 2, 3, 4, 5, 6, 7, 8, 9};
  const float res[] = {42, 78, 78, 60, 114, 114, 51, 69, 96,
                       66, 39, 111, 54, 54, 90, 78, 78, 132};

  ga_assert_ok(GpuArray_empty(&A, ctx, GA_FLOAT, 3, dims, GA_F_ORDER));
  ga_assert_ok(GpuArray_empty(&B, ctx, GA_FLOAT, 3, dims, GA_F_ORDER));
  ga_assert_ok(GpuArray_empty(&C, ctx, GA_FLOAT, 3, dims, GA_C_ORDER));

  ga_assert_ok(GpuArray_write(&A, data, sizeof(data)));
  ga_assert_ok(GpuArray_write(&B, data, sizeof(data)));

  ga_assert_ok(GpuArray_rgemmBatch_3d(cb_no_trans, cb_no_trans, 1, &A, &B, 0, &C, 0));

  ga_assert_ok(GpuArray_read(data, sizeof(data), &C));

  ck_assert_fbuf_eq(data, res, sizeof(res)/sizeof(float));
}
예제 #2
0
END_TEST

START_TEST(test_take1_offset) {
  const uint32_t data[4] = {0, 1, 2, 3};
  const size_t data_dims[1] = {4};
  const size_t out_dims[1] = {2};
  const uint32_t idx[4] = {20, 3, 3, 2};
  GpuArray v;
  GpuArray i;
  GpuArray r;

  ga_assert_ok(GpuArray_empty(&v, ctx, GA_UINT, 1, data_dims, GA_C_ORDER));
  ga_assert_ok(GpuArray_write(&v, data, sizeof(data)));

  ga_assert_ok(GpuArray_empty(&i, ctx, GA_UINT, 1, data_dims, GA_C_ORDER));
  ga_assert_ok(GpuArray_write(&i, idx, sizeof(idx)));

  ga_assert_ok(GpuArray_empty(&r, ctx, GA_UINT, 1, out_dims, GA_C_ORDER));

  /* Fake subtensor for offset */
  i.offset = 8;
  i.dimensions[0] = 2;

  ga_assert_ok(GpuArray_take1(&r, &v, &i, 1));
  /* The actual results are not important, this is just to check that
     we don't trigger the out of bounds check */
}
예제 #3
0
END_TEST


START_TEST(test_basic_simple)
{
  GpuArray a;
  GpuArray b;
  GpuArray c;

  GpuElemwise *ge;

  static const uint32_t data1[3] = {1, 2, 3};
  static const uint32_t data2[3] = {4, 5, 6};
  uint32_t data3[3] = {0};

  size_t dims[2];

  gpuelemwise_arg args[3] = {{0}};
  void *rargs[3];

  dims[0] = 1;
  dims[1] = 3;

  ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 2, dims, GA_C_ORDER));
  ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1)));

  ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 2, dims, GA_F_ORDER));
  ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2)));

  ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 2, dims, GA_C_ORDER));

  args[0].name = "a";
  args[0].typecode = GA_UINT;
  args[0].flags = GE_READ;

  args[1].name = "b";
  args[1].typecode = GA_UINT;
  args[1].flags = GE_READ;

  args[2].name = "c";
  args[2].typecode = GA_UINT;
  args[2].flags = GE_WRITE;

  ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 2, 0);

  ck_assert_ptr_ne(ge, NULL);

  rargs[0] = &a;
  rargs[1] = &b;
  rargs[2] = &c;

  ga_assert_ok(GpuElemwise_call(ge, rargs, GE_NOCOLLAPSE));

  ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c));

  ck_assert_int_eq(data3[0], 5);
  ck_assert_int_eq(data3[1], 7);
  ck_assert_int_eq(data3[2], 9);
}
예제 #4
0
END_TEST

START_TEST(test_gemmBatch_3d_S) {
  GpuArray A;
  GpuArray B;
  GpuArray C;
  ssize_t t;

  size_t dims[3] = {2, 3, 3};
  float data[] = {1, 2, 3, 4, 5, 6, 7, 8, 9,
                  1, 2, 3, 4, 5, 6, 7, 8, 9};
  const float res[] = {14, 32, 50, 50, 122, 194, 32, 77, 122,
                       26, 62, 98, 17, 53, 89, 44, 107, 170};

  ga_assert_ok(GpuArray_empty(&A, ctx, GA_FLOAT, 3, dims, GA_F_ORDER));
  ga_assert_ok(GpuArray_empty(&B, ctx, GA_FLOAT, 3, dims, GA_C_ORDER));
  ga_assert_ok(GpuArray_empty(&C, ctx, GA_FLOAT, 3, dims, GA_C_ORDER));

  ga_assert_ok(GpuArray_write(&A, data, sizeof(data)));
  ga_assert_ok(GpuArray_write(&B, data, sizeof(data)));

  A.strides[0] = 8;
  A.strides[1] = 24;
  A.strides[2] = 4;
  GpuArray_fix_flags(&A);

  t = B.strides[1];
  B.strides[1] = B.strides[2];
  B.strides[2] = t;
  GpuArray_fix_flags(&B);

  ga_assert_ok(GpuArray_rgemmBatch_3d(cb_no_trans, cb_no_trans, 1, &A, &B, 0, &C, 1));

  ga_assert_ok(GpuArray_read(data, sizeof(data), &C));

  ck_assert_fbuf_eq(data, res, sizeof(res)/sizeof(float));
}
예제 #5
0
END_TEST

START_TEST(test_basic_neg_strides)
{
  GpuArray a;
  GpuArray b;
  GpuArray c;

  GpuElemwise *ge;

  static const uint32_t data1[6] = {1, 2, 3, 4, 5, 6};
  static const uint32_t data2[6] = {7, 8, 9, 10, 11, 12};
  uint32_t data3[6] = {0};

  size_t dims[1];

  gpuelemwise_arg args[3] = {{0}};
  void *rargs[3];

  ssize_t starts[1];
  ssize_t stops[1];
  ssize_t steps[1];

  dims[0] = 6;

  ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 1, dims, GA_C_ORDER));
  ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1)));

  ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 1, dims, GA_C_ORDER));
  ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2)));

  starts[0] = 5;
  stops[0] = -1;
  steps[0] = -1;

  ga_assert_ok(GpuArray_index_inplace(&b, starts, stops, steps));

  ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 1, dims, GA_C_ORDER));

  args[0].name = "a";
  args[0].typecode = GA_UINT;
  args[0].flags = GE_READ;

  args[1].name = "b";
  args[1].typecode = GA_UINT;
  args[1].flags = GE_READ;

  args[2].name = "c";
  args[2].typecode = GA_UINT;
  args[2].flags = GE_WRITE;

  ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 1, 0);

  ck_assert_ptr_ne(ge, NULL);

  rargs[0] = &a;
  rargs[1] = &b;
  rargs[2] = &c;

  ga_assert_ok(GpuElemwise_call(ge, rargs, 0));

  ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c));

  ck_assert_int_eq(data3[0], 13);
  ck_assert_int_eq(data3[1], 13);
  ck_assert_int_eq(data3[2], 13);
  ck_assert_int_eq(data3[3], 13);
  ck_assert_int_eq(data3[4], 13);
  ck_assert_int_eq(data3[5], 13);
}
예제 #6
0
END_TEST


START_TEST(test_basic_remove1)
{
  GpuArray a;
  GpuArray b;
  GpuArray c;

  GpuElemwise *ge;

  static const uint32_t data1[6] = {1, 2, 3, 4, 5, 6};
  static const uint32_t data2[6] = {7, 8, 9, 10, 11, 12};
  uint32_t data3[6] = {0};

  size_t dims[4];

  gpuelemwise_arg args[3] = {{0}};
  void *rargs[3];

  dims[0] = 1;
  dims[1] = 3;
  dims[2] = 2;
  dims[3] = 1;

  ga_assert_ok(GpuArray_empty(&a, ctx, GA_UINT, 4, dims, GA_C_ORDER));
  ga_assert_ok(GpuArray_write(&a, data1, sizeof(data1)));

  ga_assert_ok(GpuArray_empty(&b, ctx, GA_UINT, 4, dims, GA_F_ORDER));
  ga_assert_ok(GpuArray_write(&b, data2, sizeof(data2)));

  ga_assert_ok(GpuArray_empty(&c, ctx, GA_UINT, 4, dims, GA_C_ORDER));

  args[0].name = "a";
  args[0].typecode = GA_UINT;
  args[0].flags = GE_READ;

  args[1].name = "b";
  args[1].typecode = GA_UINT;
  args[1].flags = GE_READ;

  args[2].name = "c";
  args[2].typecode = GA_UINT;
  args[2].flags = GE_WRITE;

  ge = GpuElemwise_new(ctx, "", "c = a + b", 3, args, 0, 0);

  ck_assert_ptr_ne(ge, NULL);

  rargs[0] = &a;
  rargs[1] = &b;
  rargs[2] = &c;

  ga_assert_ok(GpuElemwise_call(ge, rargs, 0));

  ga_assert_ok(GpuArray_read(data3, sizeof(data3), &c));

  ck_assert_int_eq(data3[0], 8);
  ck_assert_int_eq(data3[1], 12);
  ck_assert_int_eq(data3[2], 11);
  ck_assert_int_eq(data3[3], 15);
  ck_assert_int_eq(data3[4], 14);
  ck_assert_int_eq(data3[5], 18);
}