array constant(cfloat val, const dim4 &dims) { af_array res; AF_THROW(af_constant_complex(&res, real(val), imag(val), dims.ndims(), dims.get(), c32)); return array(res); }
Array<T> convolve2(Array<T> const& signal, Array<accT> const& c_filter, Array<accT> const& r_filter) { const dim4 cfDims = c_filter.dims(); const dim4 rfDims = r_filter.dims(); const dim_t cfLen= cfDims.elements(); const dim_t rfLen= rfDims.elements(); const dim4 sDims = signal.dims(); dim4 tDims = sDims; dim4 oDims = sDims; if (expand) { tDims[0] += cfLen - 1; oDims[0] += cfLen - 1; oDims[1] += rfLen - 1; } Array<T> temp= createEmptyArray<T>(tDims); Array<T> out = createEmptyArray<T>(oDims); kernel::convolve2<T, accT, 0, expand>(temp, signal, c_filter); kernel::convolve2<T, accT, 1, expand>(out, temp, r_filter); return out; }
Array<T>::Array(dim4 dims, const T * const in_data): ArrayInfo(getActiveDeviceId(), dims, dim4(0,0,0,0), calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data(memAlloc<T>(dims.elements()), memFree<T>), data_dims(dims), node(), ready(true), offset(0), owner(true) { std::copy(in_data, in_data + dims.elements(), data.get()); }
static void assign(af_array &out, const unsigned &ndims, const af_seq *index, const af_array &in) { ArrayInfo iInfo = getInfo(in); ArrayInfo oInfo = getInfo(out); af_dtype iType = iInfo.getType(); dim4 const outDs = oInfo.dims(); dim4 const iDims = iInfo.dims(); ARG_ASSERT(0, (outDs.ndims()>=iDims.ndims())); ARG_ASSERT(1, (outDs.ndims()>=(int)ndims)); AF_CHECK(af_eval(out)); vector<af_seq> index_(index, index+ndims); dim4 const oStrides = af::toStride(index_, outDs); dim4 oDims = af::toDims(index_, outDs); dim4 oOffsets = af::toOffset(index_, outDs); Array<T> *dst = createRefArray<T>(getArray<T>(out), oDims, oOffsets, oStrides); for (int i = 0; i < 4; i++) { if (oDims[i] != iDims[i]) AF_ERROR("Size mismatch between input and output", AF_ERR_SIZE); } bool noCaseExecuted = true; if (isComplex) { noCaseExecuted = false; switch(iType) { case c64: copy<cdouble, T>(*dst, getArray<cdouble>(in), scalar<T>(0), 1.0); break; case c32: copy<cfloat , T>(*dst, getArray<cfloat >(in), scalar<T>(0), 1.0); break; default : noCaseExecuted = true; break; } } static const T ZERO = scalar<T>(0); if(noCaseExecuted) { noCaseExecuted = false; switch(iType) { case f64: copy<double , T>(*dst, getArray<double>(in), ZERO, 1.0); break; case f32: copy<float , T>(*dst, getArray<float >(in), ZERO, 1.0); break; case s32: copy<int , T>(*dst, getArray<int >(in), ZERO, 1.0); break; case u32: copy<uint , T>(*dst, getArray<uint >(in), ZERO, 1.0); break; case u8 : copy<uchar , T>(*dst, getArray<uchar >(in), ZERO, 1.0); break; case b8 : copy<char , T>(*dst, getArray<char >(in), ZERO, 1.0); break; default : noCaseExecuted = true; break; } } if (noCaseExecuted) TYPE_ERROR(1, iType); delete dst; }
AFAPI array constant(cdouble val, const dim4 &dims, const af::dtype type) { if (type != c32 && type != c64) { return constant(real(val), dims, type); } af_array res; AF_THROW(af_constant_complex(&res, real(val), imag(val), dims.ndims(), dims.get(), type)); return array(res); }
Array<T>::Array(dim4 dims, const T * const in_data, bool is_device, bool copy_device): info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type), data((is_device & !copy_device) ? (T*)in_data : memAlloc<T>(dims.elements()).release(), memFree<T>), data_dims(dims), node(bufferNodePtr<T>()), ready(true), owner(true) { static_assert(is_standard_layout<Array<T>>::value, "Array<T> must be a standard layout type"); static_assert(offsetof(Array<T>, info) == 0, "Array<T>::info must be the first member variable of Array<T>"); if (!is_device || copy_device) { // Ensure the memory being written to isnt used anywhere else. getQueue().sync(); copy(in_data, in_data + dims.elements(), data.get()); } }
static void assign(Array<Tout> &out, const unsigned &ndims, const af_seq *index, const Array<Tin> &in_) { dim4 const outDs = out.dims(); dim4 const iDims = in_.dims(); DIM_ASSERT(0, (outDs.ndims()>=iDims.ndims())); DIM_ASSERT(0, (outDs.ndims()>=(dim_t)ndims)); out.eval(); vector<af_seq> index_(index, index+ndims); dim4 oDims = toDims(index_, outDs); bool is_vector = true; for (int i = 0; is_vector && i < (int)oDims.ndims() - 1; i++) { is_vector &= oDims[i] == 1; } is_vector &= in_.isVector() || in_.isScalar(); for (dim_t i = ndims; i < (int)in_.ndims(); i++) { oDims[i] = 1; } if (is_vector) { if (oDims.elements() != (dim_t)in_.elements() && in_.elements() != 1) { AF_ERROR("Size mismatch between input and output", AF_ERR_SIZE); } // If both out and in are vectors of equal elements, reshape in to out dims Array<Tin> in = in_.elements() == 1 ? tile(in_, oDims) : modDims(in_, oDims); Array<Tout> dst = createSubArray<Tout>(out, index_, false); copyArray<Tin , Tout>(dst, in); } else { for (int i = 0; i < 4; i++) { if (oDims[i] != iDims[i]) { AF_ERROR("Size mismatch between input and output", AF_ERR_SIZE); } } Array<Tout> dst = createSubArray<Tout>(out, index_, false); copyArray<Tin , Tout>(dst, in_); } }
Array<T>::Array(dim4 dims) : info(getActiveDeviceId(), dims, 0, calcStrides(dims), (af_dtype)dtype_traits<T>::af_type) , data(memAlloc<T>(dims.elements()).release(), memFree<T>) , data_dims(dims) , node(bufferNodePtr<T>()) , ready(true) , owner(true) {}
dim4 calcStrides(const dim4 &parentDim) { dim4 out(1, 1, 1, 1); dim_t *out_dims = out.get(); const dim_t *parent_dims = parentDim.get(); for (dim_t i=1; i < 4; i++) { out_dims[i] = out_dims[i - 1] * parent_dims[i-1]; } return out; }
Array<in_t> lookup(const Array<in_t> &input, const Array<idx_t> &indices, const unsigned dim) { const dim4 iDims = input.dims(); dim4 oDims(1); for (int d=0; d<4; ++d) oDims[d] = (d==int(dim) ? indices.elements() : iDims[d]); Array<in_t> out = createEmptyArray<in_t>(oDims); dim_t nDims = iDims.ndims(); switch(dim) { case 0: kernel::lookup<in_t, idx_t, 0>(out, input, indices, nDims); break; case 1: kernel::lookup<in_t, idx_t, 1>(out, input, indices, nDims); break; case 2: kernel::lookup<in_t, idx_t, 2>(out, input, indices, nDims); break; case 3: kernel::lookup<in_t, idx_t, 3>(out, input, indices, nDims); break; } return out; }
AF_BATCH_KIND identifyBatchKind(const dim4 &sDims, const dim4 &fDims) { dim_t sn = sDims.ndims(); dim_t fn = fDims.ndims(); if (sn == baseDim && fn == baseDim) return AF_BATCH_NONE; else if (sn == baseDim && (fn > baseDim && fn <= 4)) return AF_BATCH_RHS; else if ((sn > baseDim && sn <= 4) && fn == baseDim) return AF_BATCH_LHS; else if ((sn > baseDim && sn <= 4) && (fn > baseDim && fn <= 4)) { bool doesDimensionsMatch = true; bool isInterleaved = true; for (dim_t i = baseDim; i < 4; i++) { doesDimensionsMatch &= (sDims[i] == fDims[i]); isInterleaved &= (sDims[i] == 1 || fDims[i] == 1 || sDims[i] == fDims[i]); } if (doesDimensionsMatch) return AF_BATCH_SAME; return (isInterleaved ? AF_BATCH_DIFF : AF_BATCH_UNSUPPORTED); } else return AF_BATCH_UNSUPPORTED; }
ConvolveBatchKind identifyBatchKind(const dim4 &sDims, const dim4 &fDims) { dim_t sn = sDims.ndims(); dim_t fn = fDims.ndims(); if (sn==baseDim && fn==baseDim) return ONE2ONE; else if (sn==baseDim && (fn>baseDim && fn<=4)) return ONE2MANY; else if ((sn>baseDim && sn<=4) && fn==baseDim) return MANY2ONE; else if ((sn>baseDim && sn<=4) && (fn>baseDim && fn<=4)) { bool doesDimensionsMatch = true; for (dim_t i=baseDim; i<4; i++) { if (sDims[i]!=fDims[i]) { doesDimensionsMatch = false; break; } } return (doesDimensionsMatch ? MANY2MANY : CONVOLVE_UNSUPPORTED_BATCH_MODE); } else return CONVOLVE_UNSUPPORTED_BATCH_MODE; }
array constant(T val, const dim4 &dims, const af::dtype type) { af_array res; if (type != s64 && type != u64) { AF_THROW(af_constant(&res, (double)val, dims.ndims(), dims.get(), type)); } else if (type == s64) { AF_THROW(af_constant_long (&res, ( intl)val, dims.ndims(), dims.get())); } else { AF_THROW(af_constant_ulong(&res, (uintl)val, dims.ndims(), dims.get())); } return array(res); }
array identity(const dim4 &dims, const af::dtype type) { af_array res; AF_THROW(af_identity(&res, dims.ndims(), dims.get(), type)); return array(res); }
void fast_pyramid(std::vector<unsigned>& feat_pyr, std::vector<float*>& d_x_pyr, std::vector<float*>& d_y_pyr, std::vector<unsigned>& lvl_best, std::vector<float>& lvl_scl, std::vector<CParam<T> >& img_pyr, CParam<T> in, const float fast_thr, const unsigned max_feat, const float scl_fctr, const unsigned levels, const unsigned patch_size) { unsigned min_side = std::min(in.dims[0], in.dims[1]); unsigned max_levels = 0; float scl_sum = 0.f; for (unsigned i = 0; i < levels; i++) { min_side /= scl_fctr; // Minimum image side for a descriptor to be computed if (min_side < patch_size || max_levels == levels) break; max_levels++; scl_sum += 1.f / (float)std::pow(scl_fctr,(float)i); } // Compute number of features to keep for each level lvl_best.resize(max_levels); lvl_scl.resize(max_levels); unsigned feat_sum = 0; for (unsigned i = 0; i < max_levels-1; i++) { float scl = (float)std::pow(scl_fctr,(float)i); lvl_scl[i] = scl; lvl_best[i] = ceil((max_feat / scl_sum) / lvl_scl[i]); feat_sum += lvl_best[i]; } lvl_scl[max_levels-1] = (float)std::pow(scl_fctr,(float)max_levels-1); lvl_best[max_levels-1] = max_feat - feat_sum; // Hold multi-scale image pyramids static const dim4 dims0; static const CParam<T> emptyCParam(NULL, dims0.get(), dims0.get()); // Need to do this as CParam does not have a default constructor // And resize needs a default constructor or default value prior to C++11 img_pyr.resize(max_levels, emptyCParam); // Create multi-scale image pyramid for (unsigned i = 0; i < max_levels; i++) { if (i == 0) { // First level is used in its original size img_pyr[i].ptr = in.ptr; for (int k = 0; k < 4; k++) { img_pyr[i].dims[k] = in.dims[k]; img_pyr[i].strides[k] = in.strides[k]; } } else { // Resize previous level image to current level dimensions Param<T> lvl_img; lvl_img.dims[0] = round(in.dims[0] / lvl_scl[i]); lvl_img.dims[1] = round(in.dims[1] / lvl_scl[i]); lvl_img.strides[0] = 1; lvl_img.strides[1] = lvl_img.dims[0] * lvl_img.strides[0]; for (int k = 2; k < 4; k++) { lvl_img.dims[k] = 1; lvl_img.strides[k] = lvl_img.dims[k - 1] * lvl_img.strides[k - 1]; } int lvl_elem = lvl_img.strides[3] * lvl_img.dims[3]; lvl_img.ptr = memAlloc<T>(lvl_elem); resize<T, AF_INTERP_BILINEAR>(lvl_img, img_pyr[i-1]); img_pyr[i].ptr = lvl_img.ptr; for (int k = 0; k < 4; k++) { img_pyr[i].dims[k] = lvl_img.dims[k]; img_pyr[i].strides[k] = lvl_img.strides[k]; } } } feat_pyr.resize(max_levels); d_x_pyr.resize(max_levels); d_y_pyr.resize(max_levels); for (unsigned i = 0; i < max_levels; i++) { unsigned lvl_feat = 0; float* d_x_feat = NULL; float* d_y_feat = NULL; float* d_score_feat = NULL; // Round feature size to nearest odd integer float size = 2.f * floor(patch_size / 2.f) + 1.f; // Avoid keeping features that are too wide and might not fit the image, // sqrt(2.f) is the radius when angle is 45 degrees and represents // widest case possible unsigned edge = ceil(size * sqrt(2.f) / 2.f); // Detects FAST features fast(&lvl_feat, &d_x_feat, &d_y_feat, &d_score_feat, img_pyr[i], fast_thr, 9, 1, 0.15f, edge); // FAST score is not used memFree(d_score_feat); if (lvl_feat == 0) { feat_pyr[i] = 0; d_x_pyr[i] = NULL; d_x_pyr[i] = NULL; } else { feat_pyr[i] = lvl_feat; d_x_pyr[i] = d_x_feat; d_y_pyr[i] = d_y_feat; } } }
array randn(const dim4 &dims, const dtype ty, randomEngine &r) { af_array out; AF_THROW(af_random_normal(&out, dims.ndims(), dims.get(), ty, r.get())); return array(out); }
array constant(double val, const dim4 &dims, af_dtype type) { af_array res; AF_THROW(af_constant(&res, val, dims.ndims(), dims.get(), type)); return array(res); }
array iota(const dim4 &dims, const unsigned rep, af_dtype ty) { af_array out; AF_THROW(af_iota(&out, dims.ndims(), dims.get(), rep, ty)); return array(out); }
// Assign values to an array array::array_proxy& af::array::array_proxy::operator=(const array &other) { unsigned nd = numDims(impl->parent_->get()); const dim4 this_dims = getDims(impl->parent_->get()); const dim4 other_dims = other.dims(); int dim = gforDim(impl->indices_); af_array other_arr = other.get(); bool batch_assign = false; bool is_reordered = false; if (dim >= 0) { //FIXME: Figure out a faster, cleaner way to do this dim4 out_dims = seqToDims(impl->indices_, this_dims, false); batch_assign = true; for (int i = 0; i < AF_MAX_DIMS; i++) { if (this->impl->indices_[i].isBatch) batch_assign &= (other_dims[i] == 1); else batch_assign &= (other_dims[i] == out_dims[i]); } if (batch_assign) { af_array out; AF_THROW(af_tile(&out, other_arr, out_dims[0] / other_dims[0], out_dims[1] / other_dims[1], out_dims[2] / other_dims[2], out_dims[3] / other_dims[3])); other_arr = out; } else if (out_dims != other_dims) { // HACK: This is a quick check to see if other has been reordered inside gfor // TODO: Figure out if this breaks and implement a cleaner method other_arr = gforReorder(other_arr, dim); is_reordered = true; } } af_array par_arr = 0; if (impl->is_linear_) { AF_THROW(af_flat(&par_arr, impl->parent_->get())); nd = 1; } else { par_arr = impl->parent_->get(); } af_array tmp = 0; AF_THROW(af_assign_gen(&tmp, par_arr, nd, impl->indices_, other_arr)); af_array res = 0; if (impl->is_linear_) { AF_THROW(af_moddims(&res, tmp, this_dims.ndims(), this_dims.get())); AF_THROW(af_release_array(par_arr)); AF_THROW(af_release_array(tmp)); } else { res = tmp; } impl->parent_->set(res); if (dim >= 0 && (is_reordered || batch_assign)) { if (other_arr) AF_THROW(af_release_array(other_arr)); } return *this; }
array::array(const array& input, const dim4& dims) : arr(0) { AF_THROW(af_moddims(&arr, input.get(), AF_MAX_DIMS, dims.get())); }
af_err af_approx1_uniform(af_array *yo, const af_array yi, const af_array xo, const int xdim, const double xi_beg, const double xi_step, const af_interp_type method, const float offGrid) { try { const ArrayInfo& yi_info = getInfo(yi); const ArrayInfo& xo_info = getInfo(xo); const dim4 yi_dims = yi_info.dims(); const dim4 xo_dims = xo_info.dims(); ARG_ASSERT(1, yi_info.isFloating()); // Only floating and complex types ARG_ASSERT(2, xo_info.isRealFloating()) ; // Only floating types ARG_ASSERT(1, yi_info.isSingle() == xo_info.isSingle()); // Must have same precision ARG_ASSERT(1, yi_info.isDouble() == xo_info.isDouble()); // Must have same precision ARG_ASSERT(3, xdim >= 0 && xdim < 4); // POS should either be (x, 1, 1, 1) or (1, yi_dims[1], yi_dims[2], yi_dims[3]) if (xo_dims[xdim] != xo_dims.elements()) { for (int i = 0; i < 4; i++) { if (xdim != i) DIM_ASSERT(2, xo_dims[i] == yi_dims[i]); } } ARG_ASSERT(5, xi_step != 0); ARG_ASSERT(6, (method == AF_INTERP_CUBIC || method == AF_INTERP_CUBIC_SPLINE || method == AF_INTERP_LINEAR || method == AF_INTERP_LINEAR_COSINE || method == AF_INTERP_LOWER || method == AF_INTERP_NEAREST)); if (yi_dims.ndims() == 0 || xo_dims.ndims() == 0) { *yo = createHandle(dim4(0,0,0,0), yi_info.getType()); return AF_SUCCESS; } dim4 yo_dims = yi_dims; yo_dims[xdim] = xo_dims[xdim]; if (*yo == 0) { *yo = createHandle(yo_dims, yi_info.getType()); } DIM_ASSERT(1, getInfo(*yo).dims() == yo_dims); switch(yi_info.getType()) { case f32: approx1<float , float >(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; case f64: approx1<double , double>(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; case c32: approx1<cfloat , float >(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; case c64: approx1<cdouble, double>(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; default: TYPE_ERROR(1, yi_info.getType()); } } CATCHALL; return AF_SUCCESS; }
int num = (int)dims.elements(); vector<float> hb(num); vector<float> hc(num); b.host(&hb[0]); c.host(&hc[0]); for (int i = 0; i < num; i++) { EXPECT_NEAR(hc[i], hb[i], 1e-7) << "at " << i; } } TEST(Select, 4D) { dim4 dims(2, 3, 4, 2); array cond = randu(dims) > 0.5; array a = randu(dims); array b = select(cond, a - a * 0.9, a); array c = a - a * cond * 0.9; int num = (int)dims.elements(); vector<float> hb(num); vector<float> hc(num); b.host(&hb[0]); c.host(&hc[0]); for (int i = 0; i < num; i++) { EXPECT_NEAR(hc[i], hb[i], 1e-7) << "at " << i; }
array randn(const dim4 &dims, const af::dtype type) { af_array res; AF_THROW(af_randn(&res, dims.ndims(), dims.get(), type)); return array(res); }
array range(const dim4 &dims, const int seq_dim, const af::dtype ty) { af_array out; AF_THROW(af_range(&out, dims.ndims(), dims.get(), seq_dim, ty)); return array(out); }
array moddims(const array& in, const dim4& dims) { return af::moddims(in, dims.ndims(), dims.get()); }
array iota(const dim4 &dims, const dim4 &tile_dims, const af::dtype ty) { af_array out; AF_THROW(af_iota(&out, dims.ndims(), dims.get(), tile_dims.ndims(), tile_dims.get(), ty)); return array(out); }