static void test_sycl_simple_argmax(const Eigen::SyclDevice &sycl_device){ Tensor<DataType, 3, Layout, DenseIndex> in(Eigen::array<DenseIndex, 3>{{2,2,2}}); Tensor<DenseIndex, 0, Layout, DenseIndex> out_max; Tensor<DenseIndex, 0, Layout, DenseIndex> out_min; in.setRandom(); in *= in.constant(100.0); in(0, 0, 0) = -1000.0; in(1, 1, 1) = 1000.0; std::size_t in_bytes = in.size() * sizeof(DataType); std::size_t out_bytes = out_max.size() * sizeof(DenseIndex); DataType * d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes)); DenseIndex* d_out_max = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes)); DenseIndex* d_out_min = static_cast<DenseIndex*>(sycl_device.allocate(out_bytes)); Eigen::TensorMap<Eigen::Tensor<DataType, 3, Layout, DenseIndex> > gpu_in(d_in, Eigen::array<DenseIndex, 3>{{2,2,2}}); Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_max(d_out_max); Eigen::TensorMap<Eigen::Tensor<DenseIndex, 0, Layout, DenseIndex> > gpu_out_min(d_out_min); sycl_device.memcpyHostToDevice(d_in, in.data(),in_bytes); gpu_out_max.device(sycl_device) = gpu_in.argmax(); gpu_out_min.device(sycl_device) = gpu_in.argmin(); sycl_device.memcpyDeviceToHost(out_max.data(), d_out_max, out_bytes); sycl_device.memcpyDeviceToHost(out_min.data(), d_out_min, out_bytes); VERIFY_IS_EQUAL(out_max(), 2*2*2 - 1); VERIFY_IS_EQUAL(out_min(), 0); sycl_device.deallocate(d_in); sycl_device.deallocate(d_out_max); sycl_device.deallocate(d_out_min); }
static void test_simple_padding(const Eigen::SyclDevice& sycl_device) { IndexType sizeDim1 = 2; IndexType sizeDim2 = 3; IndexType sizeDim3 = 5; IndexType sizeDim4 = 7; array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); tensor.setRandom(); array<std::pair<IndexType, IndexType>, 4> paddings; paddings[0] = std::make_pair(0, 0); paddings[1] = std::make_pair(2, 1); paddings[2] = std::make_pair(3, 4); paddings[3] = std::make_pair(0, 0); IndexType padedSizeDim1 = 2; IndexType padedSizeDim2 = 6; IndexType padedSizeDim3 = 12; IndexType padedSizeDim4 = 7; array<IndexType, 4> padedtensorRange = {{padedSizeDim1, padedSizeDim2, padedSizeDim3, padedSizeDim4}}; Tensor<DataType, 4, DataLayout, IndexType> padded(padedtensorRange); DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(padded.size()*sizeof(DataType))); TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu2(gpu_data2, padedtensorRange); VERIFY_IS_EQUAL(padded.dimension(0), 2+0); VERIFY_IS_EQUAL(padded.dimension(1), 3+3); VERIFY_IS_EQUAL(padded.dimension(2), 5+7); VERIFY_IS_EQUAL(padded.dimension(3), 7+0); sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); gpu2.device(sycl_device)=gpu1.pad(paddings); sycl_device.memcpyDeviceToHost(padded.data(), gpu_data2,(padded.size())*sizeof(DataType)); for (IndexType i = 0; i < padedSizeDim1; ++i) { for (IndexType j = 0; j < padedSizeDim2; ++j) { for (IndexType k = 0; k < padedSizeDim3; ++k) { for (IndexType l = 0; l < padedSizeDim4; ++l) { if (j >= 2 && j < 5 && k >= 3 && k < 8) { VERIFY_IS_EQUAL(padded(i,j,k,l), tensor(i,j-2,k-3,l)); } else { VERIFY_IS_EQUAL(padded(i,j,k,l), 0.0f); } } } } } sycl_device.deallocate(gpu_data1); sycl_device.deallocate(gpu_data2); }
static void test_padded_expr(const Eigen::SyclDevice& sycl_device) { IndexType sizeDim1 = 2; IndexType sizeDim2 = 3; IndexType sizeDim3 = 5; IndexType sizeDim4 = 7; array<IndexType, 4> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; Tensor<DataType, 4, DataLayout, IndexType> tensor(tensorRange); tensor.setRandom(); array<std::pair<IndexType, IndexType>, 4> paddings; paddings[0] = std::make_pair(0, 0); paddings[1] = std::make_pair(2, 1); paddings[2] = std::make_pair(3, 4); paddings[3] = std::make_pair(0, 0); Eigen::DSizes<IndexType, 2> reshape_dims; reshape_dims[0] = 12; reshape_dims[1] = 84; Tensor<DataType, 2, DataLayout, IndexType> result(reshape_dims); DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(result.size()*sizeof(DataType))); TensorMap<Tensor<DataType, 4,DataLayout,IndexType>> gpu1(gpu_data1, tensorRange); TensorMap<Tensor<DataType, 2,DataLayout,IndexType>> gpu2(gpu_data2, reshape_dims); sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); gpu2.device(sycl_device)=gpu1.pad(paddings).reshape(reshape_dims); sycl_device.memcpyDeviceToHost(result.data(), gpu_data2,(result.size())*sizeof(DataType)); for (IndexType i = 0; i < 2; ++i) { for (IndexType j = 0; j < 6; ++j) { for (IndexType k = 0; k < 12; ++k) { for (IndexType l = 0; l < 7; ++l) { const float result_value = DataLayout == ColMajor ? result(i+2*j,k+12*l) : result(j+6*i,l+7*k); if (j >= 2 && j < 5 && k >= 3 && k < 8) { VERIFY_IS_EQUAL(result_value, tensor(i,j-2,k-3,l)); } else { VERIFY_IS_EQUAL(result_value, 0.0f); } } } } } sycl_device.deallocate(gpu_data1); sycl_device.deallocate(gpu_data2); }
static void test_simple_slice(const Eigen::SyclDevice &sycl_device) { int sizeDim1 = 2; int sizeDim2 = 3; int sizeDim3 = 5; int sizeDim4 = 7; int sizeDim5 = 11; array<int, 5> tensorRange = {{sizeDim1, sizeDim2, sizeDim3, sizeDim4, sizeDim5}}; Tensor<DataType, 5,DataLayout> tensor(tensorRange); tensor.setRandom(); array<int, 5> slice1_range ={{1, 1, 1, 1, 1}}; Tensor<DataType, 5,DataLayout> slice1(slice1_range); DataType* gpu_data1 = static_cast<DataType*>(sycl_device.allocate(tensor.size()*sizeof(DataType))); DataType* gpu_data2 = static_cast<DataType*>(sycl_device.allocate(slice1.size()*sizeof(DataType))); TensorMap<Tensor<DataType, 5,DataLayout>> gpu1(gpu_data1, tensorRange); TensorMap<Tensor<DataType, 5,DataLayout>> gpu2(gpu_data2, slice1_range); Eigen::DSizes<ptrdiff_t, 5> indices(1,2,3,4,5); Eigen::DSizes<ptrdiff_t, 5> sizes(1,1,1,1,1); sycl_device.memcpyHostToDevice(gpu_data1, tensor.data(),(tensor.size())*sizeof(DataType)); gpu2.device(sycl_device)=gpu1.slice(indices, sizes); sycl_device.memcpyDeviceToHost(slice1.data(), gpu_data2,(slice1.size())*sizeof(DataType)); VERIFY_IS_EQUAL(slice1(0,0,0,0,0), tensor(1,2,3,4,5)); array<int, 5> slice2_range ={{1,1,2,2,3}}; Tensor<DataType, 5,DataLayout> slice2(slice2_range); DataType* gpu_data3 = static_cast<DataType*>(sycl_device.allocate(slice2.size()*sizeof(DataType))); TensorMap<Tensor<DataType, 5,DataLayout>> gpu3(gpu_data3, slice2_range); Eigen::DSizes<ptrdiff_t, 5> indices2(1,1,3,4,5); Eigen::DSizes<ptrdiff_t, 5> sizes2(1,1,2,2,3); gpu3.device(sycl_device)=gpu1.slice(indices2, sizes2); sycl_device.memcpyDeviceToHost(slice2.data(), gpu_data3,(slice2.size())*sizeof(DataType)); for (int i = 0; i < 2; ++i) { for (int j = 0; j < 2; ++j) { for (int k = 0; k < 3; ++k) { VERIFY_IS_EQUAL(slice2(0,0,i,j,k), tensor(1,1,3+i,4+j,5+k)); } } } sycl_device.deallocate(gpu_data1); sycl_device.deallocate(gpu_data2); sycl_device.deallocate(gpu_data3); }
static void test_broadcast_sycl(const Eigen::SyclDevice &sycl_device){ // BROADCAST test: array<int, 4> in_range = {{2, 3, 5, 7}}; array<int, 4> broadcasts = {{2, 3, 1, 4}}; array<int, 4> out_range; // = in_range * broadcasts for (size_t i = 0; i < out_range.size(); ++i) out_range[i] = in_range[i] * broadcasts[i]; Tensor<float, 4> input(in_range); Tensor<float, 4> out(out_range); for (size_t i = 0; i < in_range.size(); ++i) VERIFY_IS_EQUAL(out.dimension(i), out_range[i]); for (int i = 0; i < input.size(); ++i) input(i) = static_cast<float>(i); float * gpu_in_data = static_cast<float*>(sycl_device.allocate(input.dimensions().TotalSize()*sizeof(float))); float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); TensorMap<Tensor<float, 4>> gpu_in(gpu_in_data, in_range); TensorMap<Tensor<float, 4>> gpu_out(gpu_out_data, out_range); sycl_device.memcpyHostToDevice(gpu_in_data, input.data(),(input.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) = gpu_in.broadcast(broadcasts); sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < 4; ++i) { for (int j = 0; j < 9; ++j) { for (int k = 0; k < 5; ++k) { for (int l = 0; l < 28; ++l) { VERIFY_IS_APPROX(input(i%2,j%3,k%5,l%7), out(i,j,k,l)); } } } } printf("Broadcast Test Passed\n"); sycl_device.deallocate(gpu_in_data); sycl_device.deallocate(gpu_out_data); }
void test_forced_eval_sycl(const Eigen::SyclDevice &sycl_device) { int sizeDim1 = 100; int sizeDim2 = 20; int sizeDim3 = 20; Eigen::array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; Eigen::Tensor<DataType, 3, DataLayout> in1(tensorRange); Eigen::Tensor<DataType, 3, DataLayout> in2(tensorRange); Eigen::Tensor<DataType, 3, DataLayout> out(tensorRange); DataType * gpu_in1_data = static_cast<DataType*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(DataType))); DataType * gpu_in2_data = static_cast<DataType*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(DataType))); DataType * gpu_out_data = static_cast<DataType*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(DataType))); in1 = in1.random() + in1.constant(10.0f); in2 = in2.random() + in2.constant(10.0f); // creating TensorMap from tensor Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout>> gpu_in1(gpu_in1_data, tensorRange); Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout>> gpu_in2(gpu_in2_data, tensorRange); Eigen::TensorMap<Eigen::Tensor<DataType, 3, DataLayout>> gpu_out(gpu_out_data, tensorRange); sycl_device.memcpyHostToDevice(gpu_in1_data, in1.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in1.dimensions().TotalSize())*sizeof(DataType)); /// c=(a+b)*b gpu_out.device(sycl_device) =(gpu_in1 + gpu_in2).eval() * gpu_in2; sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(DataType)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) + in2(i, j, k)) * in2(i, j, k)); } } } printf("(a+b)*b Test Passed\n"); sycl_device.deallocate(gpu_in1_data); sycl_device.deallocate(gpu_in2_data); sycl_device.deallocate(gpu_out_data); }
static void test_entire_volume_patch_sycl(const Eigen::SyclDevice& sycl_device) { const int depth = 4; const int patch_z = 2; const int patch_y = 3; const int patch_x = 5; const int batch = 7; array<IndexType, 5> tensorColMajorRange = {{depth, patch_z, patch_y, patch_x, batch}}; array<IndexType, 5> tensorRowMajorRange = {{batch, patch_x, patch_y, patch_z, depth}}; Tensor<DataType, 5, DataLayout,IndexType> tensor_col_major(tensorColMajorRange); Tensor<DataType, 5, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange); tensor_col_major.setRandom(); DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); TensorMap<Tensor<DataType, 5, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange); TensorMap<Tensor<DataType, 5, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType)); // single volume patch: ColMajor array<IndexType, 6> patchColMajorTensorRange={{depth,patch_z, patch_y, patch_x, patch_z*patch_y*patch_x, batch}}; Tensor<DataType, 6, DataLayout,IndexType> entire_volume_patch_col_major(patchColMajorTensorRange); size_t patchTensorBuffSize =entire_volume_patch_col_major.size()*sizeof(DataType); DataType* gpu_data_entire_volume_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); TensorMap<Tensor<DataType, 6, DataLayout,IndexType>> gpu_entire_volume_patch_col_major(gpu_data_entire_volume_patch_col_major, patchColMajorTensorRange); gpu_entire_volume_patch_col_major.device(sycl_device)=gpu_col_major.extract_volume_patches(patch_z, patch_y, patch_x); sycl_device.memcpyDeviceToHost(entire_volume_patch_col_major.data(), gpu_data_entire_volume_patch_col_major, patchTensorBuffSize); // Tensor<float, 5> tensor(depth, patch_z, patch_y, patch_x, batch); // tensor.setRandom(); // Tensor<float, 5, RowMajor> tensor_row_major = tensor.swap_layout(); //Tensor<float, 6> entire_volume_patch; //entire_volume_patch = tensor.extract_volume_patches(patch_z, patch_y, patch_x); VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(0), depth); VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(1), patch_z); VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(2), patch_y); VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(3), patch_x); VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(4), patch_z * patch_y * patch_x); VERIFY_IS_EQUAL(entire_volume_patch_col_major.dimension(5), batch); // Tensor<float, 6, RowMajor> entire_volume_patch_row_major; //entire_volume_patch_row_major = tensor_row_major.extract_volume_patches(patch_z, patch_y, patch_x); array<IndexType, 6> patchRowMajorTensorRange={{batch,patch_z*patch_y*patch_x, patch_x, patch_y, patch_z, depth}}; Tensor<DataType, 6, RowMajor,IndexType> entire_volume_patch_row_major(patchRowMajorTensorRange); patchTensorBuffSize =entire_volume_patch_row_major.size()*sizeof(DataType); DataType* gpu_data_entire_volume_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); TensorMap<Tensor<DataType, 6, RowMajor,IndexType>> gpu_entire_volume_patch_row_major(gpu_data_entire_volume_patch_row_major, patchRowMajorTensorRange); gpu_entire_volume_patch_row_major.device(sycl_device)=gpu_row_major.extract_volume_patches(patch_z, patch_y, patch_x); sycl_device.memcpyDeviceToHost(entire_volume_patch_row_major.data(), gpu_data_entire_volume_patch_row_major, patchTensorBuffSize); VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(0), batch); VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(1), patch_z * patch_y * patch_x); VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(2), patch_x); VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(3), patch_y); VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(4), patch_z); VERIFY_IS_EQUAL(entire_volume_patch_row_major.dimension(5), depth); const int dz = patch_z - 1; const int dy = patch_y - 1; const int dx = patch_x - 1; const int forward_pad_z = dz - dz / 2; const int forward_pad_y = dy - dy / 2; const int forward_pad_x = dx - dx / 2; for (int pz = 0; pz < patch_z; pz++) { for (int py = 0; py < patch_y; py++) { for (int px = 0; px < patch_x; px++) { const int patchId = pz + patch_z * (py + px * patch_y); for (int z = 0; z < patch_z; z++) { for (int y = 0; y < patch_y; y++) { for (int x = 0; x < patch_x; x++) { for (int b = 0; b < batch; b++) { for (int d = 0; d < depth; d++) { float expected = 0.0f; float expected_row_major = 0.0f; const int eff_z = z - forward_pad_z + pz; const int eff_y = y - forward_pad_y + py; const int eff_x = x - forward_pad_x + px; if (eff_z >= 0 && eff_y >= 0 && eff_x >= 0 && eff_z < patch_z && eff_y < patch_y && eff_x < patch_x) { expected = tensor_col_major(d, eff_z, eff_y, eff_x, b); expected_row_major = tensor_row_major(b, eff_x, eff_y, eff_z, d); } VERIFY_IS_EQUAL(entire_volume_patch_col_major(d, z, y, x, patchId, b), expected); VERIFY_IS_EQUAL(entire_volume_patch_row_major(b, patchId, x, y, z, d), expected_row_major); } } } } } } } } sycl_device.deallocate(gpu_data_col_major); sycl_device.deallocate(gpu_data_row_major); sycl_device.deallocate(gpu_data_entire_volume_patch_col_major); sycl_device.deallocate(gpu_data_entire_volume_patch_row_major); }
static void test_single_voxel_patch_sycl(const Eigen::SyclDevice& sycl_device) { IndexType sizeDim0 = 4; IndexType sizeDim1 = 2; IndexType sizeDim2 = 3; IndexType sizeDim3 = 5; IndexType sizeDim4 = 7; array<IndexType, 5> tensorColMajorRange = {{sizeDim0, sizeDim1, sizeDim2, sizeDim3, sizeDim4}}; array<IndexType, 5> tensorRowMajorRange = {{sizeDim4, sizeDim3, sizeDim2, sizeDim1, sizeDim0}}; Tensor<DataType, 5, DataLayout,IndexType> tensor_col_major(tensorColMajorRange); Tensor<DataType, 5, RowMajor,IndexType> tensor_row_major(tensorRowMajorRange); tensor_col_major.setRandom(); DataType* gpu_data_col_major = static_cast<DataType*>(sycl_device.allocate(tensor_col_major.size()*sizeof(DataType))); DataType* gpu_data_row_major = static_cast<DataType*>(sycl_device.allocate(tensor_row_major.size()*sizeof(DataType))); TensorMap<Tensor<DataType, 5, ColMajor, IndexType>> gpu_col_major(gpu_data_col_major, tensorColMajorRange); TensorMap<Tensor<DataType, 5, RowMajor, IndexType>> gpu_row_major(gpu_data_row_major, tensorRowMajorRange); sycl_device.memcpyHostToDevice(gpu_data_col_major, tensor_col_major.data(),(tensor_col_major.size())*sizeof(DataType)); gpu_row_major.device(sycl_device)=gpu_col_major.swap_layout(); // single volume patch: ColMajor array<IndexType, 6> patchColMajorTensorRange={{sizeDim0,1, 1, 1, sizeDim1*sizeDim2*sizeDim3, sizeDim4}}; Tensor<DataType, 6, DataLayout,IndexType> single_voxel_patch_col_major(patchColMajorTensorRange); size_t patchTensorBuffSize =single_voxel_patch_col_major.size()*sizeof(DataType); DataType* gpu_data_single_voxel_patch_col_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); TensorMap<Tensor<DataType, 6, DataLayout,IndexType>> gpu_single_voxel_patch_col_major(gpu_data_single_voxel_patch_col_major, patchColMajorTensorRange); gpu_single_voxel_patch_col_major.device(sycl_device)=gpu_col_major.extract_volume_patches(1, 1, 1); sycl_device.memcpyDeviceToHost(single_voxel_patch_col_major.data(), gpu_data_single_voxel_patch_col_major, patchTensorBuffSize); VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(0), 4); VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(1), 1); VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(2), 1); VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(3), 1); VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(4), 2 * 3 * 5); VERIFY_IS_EQUAL(single_voxel_patch_col_major.dimension(5), 7); array<IndexType, 6> patchRowMajorTensorRange={{sizeDim4, sizeDim1*sizeDim2*sizeDim3, 1, 1, 1, sizeDim0}}; Tensor<DataType, 6, RowMajor,IndexType> single_voxel_patch_row_major(patchRowMajorTensorRange); patchTensorBuffSize =single_voxel_patch_row_major.size()*sizeof(DataType); DataType* gpu_data_single_voxel_patch_row_major = static_cast<DataType*>(sycl_device.allocate(patchTensorBuffSize)); TensorMap<Tensor<DataType, 6, RowMajor,IndexType>> gpu_single_voxel_patch_row_major(gpu_data_single_voxel_patch_row_major, patchRowMajorTensorRange); gpu_single_voxel_patch_row_major.device(sycl_device)=gpu_row_major.extract_volume_patches(1, 1, 1); sycl_device.memcpyDeviceToHost(single_voxel_patch_row_major.data(), gpu_data_single_voxel_patch_row_major, patchTensorBuffSize); VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(0), 7); VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(1), 2 * 3 * 5); VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(2), 1); VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(3), 1); VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(4), 1); VERIFY_IS_EQUAL(single_voxel_patch_row_major.dimension(5), 4); sycl_device.memcpyDeviceToHost(tensor_row_major.data(), gpu_data_row_major, (tensor_col_major.size())*sizeof(DataType)); for (IndexType i = 0; i < tensor_col_major.size(); ++i) { VERIFY_IS_EQUAL(tensor_col_major.data()[i], single_voxel_patch_col_major.data()[i]); VERIFY_IS_EQUAL(tensor_row_major.data()[i], single_voxel_patch_row_major.data()[i]); VERIFY_IS_EQUAL(tensor_col_major.data()[i], tensor_row_major.data()[i]); } sycl_device.deallocate(gpu_data_col_major); sycl_device.deallocate(gpu_data_row_major); sycl_device.deallocate(gpu_data_single_voxel_patch_col_major); sycl_device.deallocate(gpu_data_single_voxel_patch_row_major); }
void test_sycl_cpu(const Eigen::SyclDevice &sycl_device) { int sizeDim1 = 100; int sizeDim2 = 100; int sizeDim3 = 100; array<int, 3> tensorRange = {{sizeDim1, sizeDim2, sizeDim3}}; Tensor<float, 3> in1(tensorRange); Tensor<float, 3> in2(tensorRange); Tensor<float, 3> in3(tensorRange); Tensor<float, 3> out(tensorRange); in2 = in2.random(); in3 = in3.random(); float * gpu_in1_data = static_cast<float*>(sycl_device.allocate(in1.dimensions().TotalSize()*sizeof(float))); float * gpu_in2_data = static_cast<float*>(sycl_device.allocate(in2.dimensions().TotalSize()*sizeof(float))); float * gpu_in3_data = static_cast<float*>(sycl_device.allocate(in3.dimensions().TotalSize()*sizeof(float))); float * gpu_out_data = static_cast<float*>(sycl_device.allocate(out.dimensions().TotalSize()*sizeof(float))); TensorMap<Tensor<float, 3>> gpu_in1(gpu_in1_data, tensorRange); TensorMap<Tensor<float, 3>> gpu_in2(gpu_in2_data, tensorRange); TensorMap<Tensor<float, 3>> gpu_in3(gpu_in3_data, tensorRange); TensorMap<Tensor<float, 3>> gpu_out(gpu_out_data, tensorRange); /// a=1.2f gpu_in1.device(sycl_device) = gpu_in1.constant(1.2f); sycl_device.memcpyDeviceToHost(in1.data(), gpu_in1_data ,(in1.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(in1(i,j,k), 1.2f); } } } printf("a=1.2f Test passed\n"); /// a=b*1.2f gpu_out.device(sycl_device) = gpu_in1 * 1.2f; sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data ,(out.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * 1.2f); } } } printf("a=b*1.2f Test Passed\n"); /// c=a*b sycl_device.memcpyHostToDevice(gpu_in2_data, in2.data(),(in2.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) = gpu_in1 * gpu_in2; sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * in2(i,j,k)); } } } printf("c=a*b Test Passed\n"); /// c=a+b gpu_out.device(sycl_device) = gpu_in1 + gpu_in2; sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) + in2(i,j,k)); } } } printf("c=a+b Test Passed\n"); /// c=a*a gpu_out.device(sycl_device) = gpu_in1 * gpu_in1; sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * in1(i,j,k)); } } } printf("c= a*a Test Passed\n"); //a*3.14f + b*2.7f gpu_out.device(sycl_device) = gpu_in1 * gpu_in1.constant(3.14f) + gpu_in2 * gpu_in2.constant(2.7f); sycl_device.memcpyDeviceToHost(out.data(),gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i,j,k), in1(i,j,k) * 3.14f + in2(i,j,k) * 2.7f); } } } printf("a*3.14f + b*2.7f Test Passed\n"); ///d= (a>0.5? b:c) sycl_device.memcpyHostToDevice(gpu_in3_data, in3.data(),(in3.dimensions().TotalSize())*sizeof(float)); gpu_out.device(sycl_device) =(gpu_in1 > gpu_in1.constant(0.5f)).select(gpu_in2, gpu_in3); sycl_device.memcpyDeviceToHost(out.data(), gpu_out_data,(out.dimensions().TotalSize())*sizeof(float)); for (int i = 0; i < sizeDim1; ++i) { for (int j = 0; j < sizeDim2; ++j) { for (int k = 0; k < sizeDim3; ++k) { VERIFY_IS_APPROX(out(i, j, k), (in1(i, j, k) > 0.5f) ? in2(i, j, k) : in3(i, j, k)); } } } printf("d= (a>0.5? b:c) Test Passed\n"); sycl_device.deallocate(gpu_in1_data); sycl_device.deallocate(gpu_in2_data); sycl_device.deallocate(gpu_in3_data); sycl_device.deallocate(gpu_out_data); }
static void test_sycl_argmax_dim(const Eigen::SyclDevice &sycl_device) { DenseIndex sizeDim0=9; DenseIndex sizeDim1=3; DenseIndex sizeDim2=5; DenseIndex sizeDim3=7; Tensor<DataType, 4, DataLayout, DenseIndex> tensor(sizeDim0,sizeDim1,sizeDim2,sizeDim3); std::vector<DenseIndex> dims; dims.push_back(sizeDim0); dims.push_back(sizeDim1); dims.push_back(sizeDim2); dims.push_back(sizeDim3); for (DenseIndex dim = 0; dim < 4; ++dim) { array<DenseIndex, 3> out_shape; for (DenseIndex d = 0; d < 3; ++d) out_shape[d] = (d < dim) ? dims[d] : dims[d+1]; Tensor<DenseIndex, 3, DataLayout, DenseIndex> tensor_arg(out_shape); array<DenseIndex, 4> ix; for (DenseIndex i = 0; i < sizeDim0; ++i) { for (DenseIndex j = 0; j < sizeDim1; ++j) { for (DenseIndex k = 0; k < sizeDim2; ++k) { for (DenseIndex l = 0; l < sizeDim3; ++l) { ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; // suppose dim == 1, then for all i, k, l, set tensor(i, 0, k, l) = 10.0 tensor(ix)=(ix[dim] != 0)?-1.0:10.0; } } } } std::size_t in_bytes = tensor.size() * sizeof(DataType); std::size_t out_bytes = tensor_arg.size() * sizeof(DenseIndex); DataType * d_in = static_cast<DataType*>(sycl_device.allocate(in_bytes)); DenseIndex* d_out= static_cast<DenseIndex*>(sycl_device.allocate(out_bytes)); Eigen::TensorMap<Eigen::Tensor<DataType, 4, DataLayout, DenseIndex> > gpu_in(d_in, Eigen::array<DenseIndex, 4>{{sizeDim0,sizeDim1,sizeDim2,sizeDim3}}); Eigen::TensorMap<Eigen::Tensor<DenseIndex, 3, DataLayout, DenseIndex> > gpu_out(d_out, out_shape); sycl_device.memcpyHostToDevice(d_in, tensor.data(),in_bytes); gpu_out.device(sycl_device) = gpu_in.argmax(dim); sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes); VERIFY_IS_EQUAL(static_cast<size_t>(tensor_arg.size()), size_t(sizeDim0*sizeDim1*sizeDim2*sizeDim3 / tensor.dimension(dim))); for (DenseIndex n = 0; n < tensor_arg.size(); ++n) { // Expect max to be in the first index of the reduced dimension VERIFY_IS_EQUAL(tensor_arg.data()[n], 0); } sycl_device.synchronize(); for (DenseIndex i = 0; i < sizeDim0; ++i) { for (DenseIndex j = 0; j < sizeDim1; ++j) { for (DenseIndex k = 0; k < sizeDim2; ++k) { for (DenseIndex l = 0; l < sizeDim3; ++l) { ix[0] = i; ix[1] = j; ix[2] = k; ix[3] = l; // suppose dim == 1, then for all i, k, l, set tensor(i, 2, k, l) = 20.0 tensor(ix)=(ix[dim] != tensor.dimension(dim) - 1)?-1.0:20.0; } } } } sycl_device.memcpyHostToDevice(d_in, tensor.data(),in_bytes); gpu_out.device(sycl_device) = gpu_in.argmax(dim); sycl_device.memcpyDeviceToHost(tensor_arg.data(), d_out, out_bytes); for (DenseIndex n = 0; n < tensor_arg.size(); ++n) { // Expect max to be in the last index of the reduced dimension VERIFY_IS_EQUAL(tensor_arg.data()[n], tensor.dimension(dim) - 1); } sycl_device.deallocate(d_in); sycl_device.deallocate(d_out); } }