void weightedMeanAllTest(af::dim4 dims) { typedef typename meanOutType<T>::type outType; if (noDoubleTests<T>()) return; if (noDoubleTests<outType>()) return; if (noDoubleTests<wtsType>()) return; using af::array; using af::mean; std::srand(std::time(0)); vector<T> data(dims.elements()); vector<wtsType> wts(dims.elements()); std::generate(data.begin(), data.end(), random<T>); std::generate(wts.begin(), wts.end(), random<wtsType>); outType wtdSum = outType(0); wtsType wtsSum = wtsType(0); for(int i = 0; i < (int)data.size(); i++) { wtdSum = wtdSum + data[i]*wts[i]; wtsSum = wtsSum + wts[i]; } outType gold = wtdSum / wtsSum; array a(dims, &(data.front())); array w(dims, &(wts.front())); outType output = mean<outType>(a, w); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-2); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-2); }
Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device) : info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data((is_device ? (T *)in_data : memAlloc<T>(dims.elements())), memFree<T>), data_dims(dims), node(), offset(0), ready(true), owner(true) { #if __cplusplus > 199711L static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type"); static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>"); #endif if (!is_device) { CUDA_CHECK(cudaMemcpy(data.get(), in_data, dims.elements() * sizeof(T), cudaMemcpyHostToDevice)); } }
static Array<T> diff(const Array<T> &in, const int dim) { const af::dim4 iDims = in.dims(); af::dim4 oDims = iDims; oDims[dim] -= (isDiff2 + 1); if(iDims.elements() == 0 || oDims.elements() == 0) { throw std::runtime_error("Elements are 0"); } Array<T> out = createEmptyArray<T>(oDims); switch (dim) { case (0): kernel::diff<T, 0, isDiff2>(out, in, in.ndims()); break; case (1): kernel::diff<T, 1, isDiff2>(out, in, in.ndims()); break; case (2): kernel::diff<T, 2, isDiff2>(out, in, in.ndims()); break; case (3): kernel::diff<T, 3, isDiff2>(out, in, in.ndims()); break; } return out; }
af_err af_tile(af_array *out, const af_array in, const af::dim4 &tileDims) { try { ArrayInfo info = getInfo(in); af_dtype type = info.getType(); if(info.ndims() == 0) { return af_retain_array(out, in); } DIM_ASSERT(1, info.dims().elements() > 0); DIM_ASSERT(2, tileDims.elements() > 0); af_array output; switch(type) { case f32: output = tile<float >(in, tileDims); break; case c32: output = tile<cfloat >(in, tileDims); break; case f64: output = tile<double >(in, tileDims); break; case c64: output = tile<cdouble>(in, tileDims); break; case b8: output = tile<char >(in, tileDims); break; case s32: output = tile<int >(in, tileDims); break; case u32: output = tile<uint >(in, tileDims); break; case s64: output = tile<intl >(in, tileDims); break; case u64: output = tile<uintl >(in, tileDims); break; case s16: output = tile<short >(in, tileDims); break; case u16: output = tile<ushort >(in, tileDims); break; case u8: output = tile<uchar >(in, tileDims); break; default: TYPE_ERROR(1, type); } std::swap(*out,output); } CATCHALL; return AF_SUCCESS; }
Array<T> resize(const Array<T> &in, const dim_type odim0, const dim_type odim1, const af_interp_type method) { if ((std::is_same<T, double>::value || std::is_same<T, cdouble>::value) && !isDoubleSupported(getActiveDeviceId())) { OPENCL_NOT_SUPPORTED(); } const af::dim4 iDims = in.dims(); af::dim4 oDims(odim0, odim1, iDims[2], iDims[3]); if(iDims.elements() == 0 || oDims.elements() == 0) { throw std::runtime_error("Elements is 0"); } Array<T> out = createEmptyArray<T>(oDims); switch(method) { case AF_INTERP_NEAREST: kernel::resize<T, AF_INTERP_NEAREST> (out, in); break; case AF_INTERP_BILINEAR: kernel::resize<T, AF_INTERP_BILINEAR>(out, in); break; default: break; } return out; }
Array<T>::Array(af::dim4 dims, const T * const in_data, bool is_device, bool copy_device) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data(((is_device & !copy_device) ? const_cast<T*>(in_data) : memAlloc<T>(dims.elements()).release()), memFree<T>), data_dims(dims), node(bufferNodePtr<T>()), ready(true), owner(true) { #if __cplusplus > 199711L static_assert(std::is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type"); static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>"); #endif if (!is_device) { CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T), cudaMemcpyHostToDevice, cuda::getActiveStream())); CUDA_CHECK(cudaStreamSynchronize(cuda::getActiveStream())); } else if (copy_device) { CUDA_CHECK(cudaMemcpyAsync(data.get(), in_data, dims.elements() * sizeof(T), cudaMemcpyDeviceToDevice, cuda::getActiveStream())); CUDA_CHECK(cudaStreamSynchronize(cuda::getActiveStream())); } }
void testCPPMean(T const_value, af::dim4 dims) { typedef typename meanOutType<T>::type outType; if (noDoubleTests<T>()) return; using af::array; using af::mean; vector<T> hundred(dims.elements(), const_value); outType gold = outType(0); //for(auto i:hundred) gold += i; for(int i = 0; i < (int)hundred.size(); i++) { gold += hundred[i]; } gold /= dims.elements(); array a(dims, &(hundred.front())); outType output = mean<outType>(a); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); }
Array<T> tile(const Array<T> &in, const af::dim4 &tileDims) { const af::dim4 iDims = in.dims(); af::dim4 oDims = iDims; oDims *= tileDims; if(iDims.elements() == 0 || oDims.elements() == 0) { throw std::runtime_error("Elements are 0"); } Array<T> out = createEmptyArray<T>(oDims); T* outPtr = out.get(); const T* inPtr = in.get(); const af::dim4 ist = in.strides(); const af::dim4 ost = out.strides(); for(dim_t ow = 0; ow < oDims[3]; ow++) { const dim_t iw = ow % iDims[3]; const dim_t iW = iw * ist[3]; const dim_t oW = ow * ost[3]; for(dim_t oz = 0; oz < oDims[2]; oz++) { const dim_t iz = oz % iDims[2]; const dim_t iZW = iW + iz * ist[2]; const dim_t oZW = oW + oz * ost[2]; for(dim_t oy = 0; oy < oDims[1]; oy++) { const dim_t iy = oy % iDims[1]; const dim_t iYZW = iZW + iy * ist[1]; const dim_t oYZW = oZW + oy * ost[1]; for(dim_t ox = 0; ox < oDims[0]; ox++) { const dim_t ix = ox % iDims[0]; const dim_t iMem = iYZW + ix; const dim_t oMem = oYZW + ox; outPtr[oMem] = inPtr[iMem]; } } } } return out; }
void testCPPVar(T const_value, af::dim4 dims) { typedef typename varOutType<T>::type outType; if (noDoubleTests<T>()) return; if (noDoubleTests<outType>()) return; using af::array; using af::var; vector<T> hundred(dims.elements(), const_value); outType gold = outType(0); array a(dims, &(hundred.front())); outType output = var<outType>(a, false); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); output = var<outType>(a, true); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); gold = outType(2.5); outType tmp[] = { outType(0), outType(1), outType(2), outType(3), outType(4) }; array b(5, tmp); output = var<outType>(b, false); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); gold = outType(2); output = var<outType>(b, true); ASSERT_NEAR(::real(output), ::real(gold), 1.0e-3); ASSERT_NEAR(::imag(output), ::imag(gold), 1.0e-3); }
size_t elements() const { return dim_size.elements(); }
Array<T>::Array(af::dim4 dims) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data((dims.elements() ? memAlloc<T>(dims.elements()).release() : nullptr), memFree<T>), data_dims(dims), node(bufferNodePtr<T>()), ready(true), owner(true) {}
Array<T>::Array(af::dim4 dims) : info(getActiveDeviceId(), dims, af::dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data(memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims), node(), offset(0), ready(true), owner(true) {}