void weightedMeanAllTest(af::dim4 dims) { typedef typename meanOutType<T>::type outType; if (noDoubleTests<T>()) return; if (noDoubleTests<outType>()) return; if (noDoubleTests<wtsType>()) return; using af::array; using af::mean; std::srand(std::time(0)); vector<T> data(dims.elements()); vector<wtsType> wts(dims.elements()); std::generate(data.begin(), data.end(), random<T>); std::generate(wts.begin(), wts.end(), random<wtsType>); outType wtdSum = outType(0); wtsType wtsSum = wtsType(0); for(int i = 0; i < (int)data.size(); i++) { wtdSum = wtdSum + data[i]*wts[i]; wtsSum = wtsSum + wts[i]; } outType gold = wtdSum / wtsSum; array a(dims, &(data.front())); array w(dims, &(wts.front())); outType output = mean<outType>(a, w); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-2); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-2); }
//Strong Exception Guarantee af_err af_copy_array(af_array *out, const af_array in) { ArrayInfo info = getInfo(in); const unsigned ndims = info.ndims(); const af::dim4 dims = info.dims(); const af_dtype type = info.getType(); af_err ret = AF_ERR_ARG; ret = af_create_handle(out, ndims, dims.get(), type); if(ret != AF_SUCCESS) { return ret; } try { switch(type) { case f32: copyArray<float >(out, in); break; case c32: copyArray<cfloat >(out, in); break; case f64: copyArray<double >(out, in); break; case c64: copyArray<cdouble >(out, in); break; case b8: copyArray<char >(out, in); break; case s32: copyArray<int >(out, in); break; case u32: copyArray<unsigned>(out, in); break; case u8: copyArray<uchar >(out, in); break; default: ret = AF_ERR_NOT_SUPPORTED; break; } } CATCHALL return ret; }
Array<T> resize(const Array<T> &in, const dim_type odim0, const dim_type odim1, const af_interp_type method) { if ((std::is_same<T, double>::value || std::is_same<T, cdouble>::value) && !isDoubleSupported(getActiveDeviceId())) { OPENCL_NOT_SUPPORTED(); } const af::dim4 iDims = in.dims(); af::dim4 oDims(odim0, odim1, iDims[2], iDims[3]); if(iDims.elements() == 0 || oDims.elements() == 0) { throw std::runtime_error("Elements is 0"); } Array<T> out = createEmptyArray<T>(oDims); switch(method) { case AF_INTERP_NEAREST: kernel::resize<T, AF_INTERP_NEAREST> (out, in); break; case AF_INTERP_BILINEAR: kernel::resize<T, AF_INTERP_BILINEAR>(out, in); break; default: break; } return out; }
// Split a MxNx3 image into 3 separate channel matrices. // Produce 3 channels if needed static af_err channel_split(const af_array rgb, const af::dim4 &dims, af_array *outr, af_array *outg, af_array *outb, af_array *outa) { try { af_seq idx[4][3] = {{af_span, af_span, {0, 0, 1}}, {af_span, af_span, {1, 1, 1}}, {af_span, af_span, {2, 2, 1}}, {af_span, af_span, {3, 3, 1}} }; if (dims[2] == 4) { AF_CHECK(af_index(outr, rgb, dims.ndims(), idx[0])); AF_CHECK(af_index(outg, rgb, dims.ndims(), idx[1])); AF_CHECK(af_index(outb, rgb, dims.ndims(), idx[2])); AF_CHECK(af_index(outa, rgb, dims.ndims(), idx[3])); } else if (dims[2] == 3) { AF_CHECK(af_index(outr, rgb, dims.ndims(), idx[0])); AF_CHECK(af_index(outg, rgb, dims.ndims(), idx[1])); AF_CHECK(af_index(outb, rgb, dims.ndims(), idx[2])); } else { AF_CHECK(af_index(outr, rgb, dims.ndims(), idx[0])); } } CATCHALL; return AF_SUCCESS; }
static Array<T> diff(const Array<T> &in, const int dim) { const af::dim4 iDims = in.dims(); af::dim4 oDims = iDims; oDims[dim] -= (isDiff2 + 1); if(iDims.elements() == 0 || oDims.elements() == 0) { throw std::runtime_error("Elements are 0"); } Array<T> out = createEmptyArray<T>(oDims); switch (dim) { case (0): kernel::diff<T, 0, isDiff2>(out, in, in.ndims()); break; case (1): kernel::diff<T, 1, isDiff2>(out, in, in.ndims()); break; case (2): kernel::diff<T, 2, isDiff2>(out, in, in.ndims()); break; case (3): kernel::diff<T, 3, isDiff2>(out, in, in.ndims()); break; } return out; }
void randnTest(af::dim4 &dims) { if (noDoubleTests<T>()) return; af_array outArray = 0; ASSERT_EQ(AF_SUCCESS, af_randn(&outArray, dims.ndims(), dims.get(), (af_dtype) af::dtype_traits<T>::af_type)); if(outArray != 0) af_destroy_array(outArray); }
void randnTest<unsigned char>(af::dim4 &dims) { if (noDoubleTests<unsigned char>()) return; af_array outArray = 0; ASSERT_EQ(AF_ERR_NOT_SUPPORTED, af_randn(&outArray, dims.ndims(), dims.get(), (af_dtype) af::dtype_traits<unsigned char>::af_type)); if(outArray != 0) af_destroy_array(outArray); }
Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device) : info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data((is_device ? (T *)in_data : memAlloc<T>(dims.elements())), memFree<T>), data_dims(dims), node(), offset(0), ready(true), owner(true) { #if __cplusplus > 199711L static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type"); static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>"); #endif if (!is_device) { CUDA_CHECK(cudaMemcpy(data.get(), in_data, dims.elements() * sizeof(T), cudaMemcpyHostToDevice)); } }
af_err af_tile(af_array *out, const af_array in, const af::dim4 &tileDims) { try { ArrayInfo info = getInfo(in); af_dtype type = info.getType(); if(info.ndims() == 0) { return af_retain_array(out, in); } DIM_ASSERT(1, info.dims().elements() > 0); DIM_ASSERT(2, tileDims.elements() > 0); af_array output; switch(type) { case f32: output = tile<float >(in, tileDims); break; case c32: output = tile<cfloat >(in, tileDims); break; case f64: output = tile<double >(in, tileDims); break; case c64: output = tile<cdouble>(in, tileDims); break; case b8: output = tile<char >(in, tileDims); break; case s32: output = tile<int >(in, tileDims); break; case u32: output = tile<uint >(in, tileDims); break; case s64: output = tile<intl >(in, tileDims); break; case u64: output = tile<uintl >(in, tileDims); break; case s16: output = tile<short >(in, tileDims); break; case u16: output = tile<ushort >(in, tileDims); break; case u8: output = tile<uchar >(in, tileDims); break; default: TYPE_ERROR(1, type); } std::swap(*out,output); } CATCHALL; return AF_SUCCESS; }
af_err af_scale(af_array *out, const af_array in, const float scale0, const float scale1, const dim_t odim0, const dim_t odim1, const af_interp_type method) { try { ArrayInfo i_info = getInfo(in); af::dim4 idims = i_info.dims(); dim_t _odim0 = odim0, _odim1 = odim1; float sx, sy; if(_odim0 == 0 || _odim1 == 0) { DIM_ASSERT(2, scale0 != 0); DIM_ASSERT(3, scale1 != 0); sx = 1.f / scale0, sy = 1.f / scale1; _odim0 = idims[0] / sx; _odim1 = idims[1] / sy; } else if (scale0 == 0 || scale1 == 0) { DIM_ASSERT(4, odim0 != 0); DIM_ASSERT(5, odim1 != 0); sx = idims[0] / (float)_odim0; sy = idims[1] / (float)_odim1; } else { sx = 1.f / scale0, sy = 1.f / scale1; } static float trans_mat[6] = {1, 0, 0, 0, 1, 0}; trans_mat[0] = sx; trans_mat[4] = sy; static af::dim4 tdims(3, 2, 1, 1); af_array t = 0; AF_CHECK(af_create_array(&t, trans_mat, tdims.ndims(), tdims.get(), f32)); AF_CHECK(af_transform(out, in, t, _odim0, _odim1, method, true)); AF_CHECK(af_release_array(t)); } CATCHALL; return AF_SUCCESS; }
Array<T> tile(const Array<T> &in, const af::dim4 &tileDims) { const af::dim4 iDims = in.dims(); af::dim4 oDims = iDims; oDims *= tileDims; if(iDims.elements() == 0 || oDims.elements() == 0) { throw std::runtime_error("Elements are 0"); } Array<T> out = createEmptyArray<T>(oDims); T* outPtr = out.get(); const T* inPtr = in.get(); const af::dim4 ist = in.strides(); const af::dim4 ost = out.strides(); for(dim_t ow = 0; ow < oDims[3]; ow++) { const dim_t iw = ow % iDims[3]; const dim_t iW = iw * ist[3]; const dim_t oW = ow * ost[3]; for(dim_t oz = 0; oz < oDims[2]; oz++) { const dim_t iz = oz % iDims[2]; const dim_t iZW = iW + iz * ist[2]; const dim_t oZW = oW + oz * ost[2]; for(dim_t oy = 0; oy < oDims[1]; oy++) { const dim_t iy = oy % iDims[1]; const dim_t iYZW = iZW + iy * ist[1]; const dim_t oYZW = oZW + oy * ost[1]; for(dim_t ox = 0; ox < oDims[0]; ox++) { const dim_t ix = ox % iDims[0]; const dim_t iMem = iYZW + ix; const dim_t oMem = oYZW + ox; outPtr[oMem] = inPtr[iMem]; } } } } return out; }
Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device, bool copy_device) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data(((is_device & !copy_device) ? const_cast<T*>(in_data) : memAlloc<T>(dims.elements()).release()), memFree<T>), data_dims(dims), node(bufferNodePtr<T>()), ready(true), owner(true) { #if __cplusplus > 199711L static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type"); static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>"); #endif if (!is_device) { CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T), cudaMemcpyHostToDevice, cuda::getActiveStream())); CUDA_CHECK(cudaStreamSynchronize(cuda::getActiveStream())); } else if (copy_device) { CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T), cudaMemcpyDeviceToDevice, cuda::getActiveStream())); CUDA_CHECK(cudaStreamSynchronize(cuda::getActiveStream())); } }
af_err af_translate(af_array *out, const af_array in, const float trans0, const float trans1, const dim_t odim0, const dim_t odim1, const af_interp_type method) { try { static float trans_mat[6] = {1, 0, 0, 0, 1, 0}; trans_mat[2] = trans0; trans_mat[5] = trans1; static af::dim4 tdims(3, 2, 1, 1); af_array t = 0; AF_CHECK(af_create_array(&t, trans_mat, tdims.ndims(), tdims.get(), f32)); AF_CHECK(af_transform(out, in, t, odim0, odim1, method, true)); AF_CHECK(af_release_array(t)); } CATCHALL; return AF_SUCCESS; }
void testCPPMean(T const_value, af::dim4 dims) { typedef typename meanOutType<T>::type outType; if (noDoubleTests<T>()) return; using af::array; using af::mean; vector<T> hundred(dims.elements(), const_value); outType gold = outType(0); //for(auto i:hundred) gold += i; for(int i = 0; i < (int)hundred.size(); i++) { gold += hundred[i]; } gold /= dims.elements(); array a(dims, &(hundred.front())); outType output = mean<outType>(a); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); }
af_err af_skew(af_array *out, const af_array in, const float skew0, const float skew1, const dim_t odim0, const dim_t odim1, const af_interp_type method, const bool inverse) { try { float tx = std::tan(skew0); float ty = std::tan(skew1); static float trans_mat[6] = {1, 0, 0, 0, 1, 0}; trans_mat[1] = ty; trans_mat[3] = tx; if(inverse) { if(tx == 0 || ty == 0) { trans_mat[1] = tx; trans_mat[3] = ty; } else { //calc_tranform_inverse(trans_mat); //short cut of calc_transform_inverse float d = 1.0f / (1.0f - tx * ty); trans_mat[0] = d; trans_mat[1] = ty * d; trans_mat[3] = tx * d; trans_mat[4] = d; } } static af::dim4 tdims(3, 2, 1, 1); af_array t = 0; AF_CHECK(af_create_array(&t, trans_mat, tdims.ndims(), tdims.get(), f32)); AF_CHECK(af_transform(out, in, t, odim0, odim1, method, true)); AF_CHECK(af_release_array(t)); } CATCHALL; return AF_SUCCESS; }
void testCPPVar(T const_value, af::dim4 dims) { typedef typename varOutType<T>::type outType; if (noDoubleTests<T>()) return; if (noDoubleTests<outType>()) return; using af::array; using af::var; vector<T> hundred(dims.elements(), const_value); outType gold = outType(0); array a(dims, &(hundred.front())); outType output = var<outType>(a, false); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); output = var<outType>(a, true); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); gold = outType(2.5); outType tmp[] = { outType(0), outType(1), outType(2), outType(3), outType(4) }; array b(5, tmp); output = var<outType>(b, false); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); gold = outType(2); output = var<outType>(b, true); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); }
size_t ndims() const { return dim_size.ndims(); }
size_t elements() const { return dim_size.elements(); }
Array<T>::Array(af::dim4 dims) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data((dims.elements() ? memAlloc<T>(dims.elements()).release() : nullptr), memFree<T>), data_dims(dims), node(bufferNodePtr<T>()), ready(true), owner(true) {}
Array<T>::Array(af::dim4 dims) : info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data(memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims), node(), offset(0), ready(true), owner(true) {}