void sparseArithTesterMul(const int m, const int n, int factor, const double eps) { deviceGC(); if (noDoubleTests<T>()) return; #if 1 array A = cpu_randu<T>(dim4(m, n)); array B = cpu_randu<T>(dim4(m, n)); #else array A = randu(m, n, (dtype)dtype_traits<T>::af_type); array B = randu(m, n, (dtype)dtype_traits<T>::af_type); #endif A = makeSparse<T>(A, factor); array RA = sparse(A, AF_STORAGE_CSR); array OA = sparse(A, AF_STORAGE_COO); // Forward { // Arith Op array resR = arith_op<af_mul_t>()(RA, B); array resO = arith_op<af_mul_t>()(OA, B); // We will test this by converting the COO to CSR and CSR to COO and // comparing them. In essense, we are comparing the resR and resO // TODO: Make a better comparison using dense // Check resR against conR array conR = sparseConvertTo(resR, AF_STORAGE_CSR); sparseCompare<T>(resR, conR, eps); // Check resO against conO array conO = sparseConvertTo(resR, AF_STORAGE_COO); sparseCompare<T>(resO, conO, eps); } // Reverse { // Arith Op array resR = arith_op<af_mul_t>()(B, RA); array resO = arith_op<af_mul_t>()(B, OA); // We will test this by converting the COO to CSR and CSR to COO and // comparing them. In essense, we are comparing the resR and resO // TODO: Make a better comparison using dense // Check resR against conR array conR = sparseConvertTo(resR, AF_STORAGE_CSR); sparseCompare<T>(resR, conR, eps); // Check resO against conO array conO = sparseConvertTo(resR, AF_STORAGE_COO); sparseCompare<T>(resO, conO, eps); } }
TEST(Where, MaxDim) { const size_t largeDim = 65535 * 32 + 2; array input = range(dim4(1, largeDim), 1); array output = where(input % 2 == 0); array gold = 2 * range(largeDim / 2); ASSERT_ARRAYS_EQ(gold.as(u32), output); input = range(dim4(1, 1, 1, largeDim), 3); output = where(input % 2 == 0); ASSERT_ARRAYS_EQ(gold.as(u32), output); }
void assign(Array<T>& out, const af_index_t idxrs[], const Array<T>& rhs) { out.eval(); rhs.eval(); vector<bool> isSeq(4); vector<af_seq> seqs(4, af_span); // create seq vector to retrieve output dimensions, offsets & offsets for (dim_t x=0; x<4; ++x) { if (idxrs[x].isSeq) { seqs[x] = idxrs[x].idx.seq; } isSeq[x] = idxrs[x].isSeq; } vector< Array<uint> > idxArrs(4, createEmptyArray<uint>(dim4())); // look through indexs to read af_array indexs for (dim_t x=0; x<4; ++x) { if (!isSeq[x]) { idxArrs[x] = castArray<uint>(idxrs[x].idx.arr); idxArrs[x].eval(); } } vector<CParam<uint>> idxParams(idxArrs.begin(), idxArrs.end()); getQueue().enqueue(kernel::assign<T>, out, out.getDataDims(), rhs, move(isSeq), move(seqs), move(idxParams)); }
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval) { float step = (maxval - minval)/(float)nbins; const dim4 inDims = in.dims(); dim4 iStrides = in.strides(); dim4 outDims = dim4(nbins,1,inDims[2],inDims[3]); Array<outType> out = createValueArray<outType>(outDims, outType(0)); dim4 oStrides = out.strides(); dim_t nElems = inDims[0]*inDims[1]; outType *outData = out.get(); const inType* inData= in.get(); for(dim_t b3 = 0; b3 < outDims[3]; b3++) { for(dim_t b2 = 0; b2 < outDims[2]; b2++) { for(dim_t i=0; i<nElems; i++) { int bin = (int)((inData[i] - minval) / step); bin = std::max(bin, 0); bin = std::min(bin, (int)(nbins - 1)); outData[bin]++; } inData += iStrides[2]; outData += oStrides[2]; } } return out; }
TEST(Accum, MaxDim) { const size_t largeDim = 65535 * 32 + 1; //first dimension kernel tests array input = constant(0, 2, largeDim, 2, 2); input(span, seq(0, 9999), span, span) = 1; array gold_first = constant(0, 2, largeDim, 2, 2); gold_first(span, seq(0, 9999), span, span) = range(2, 10000, 2, 2) + 1; array output_first = accum(input, 0); ASSERT_ARRAYS_EQ(gold_first, output_first); input = constant(0, 2, 2, 2, largeDim); input(span, span, span, seq(0, 9999)) = 1; gold_first = constant(0, 2, 2, 2, largeDim); gold_first(span, span, span, seq(0, 9999)) = range(2, 2, 2, 10000) + 1; output_first = accum(input, 0); ASSERT_ARRAYS_EQ(gold_first, output_first); //other dimension kernel tests input = constant(0, 2, largeDim, 2, 2); input(span, seq(0, 9999), span, span) = 1; array gold_dim = constant(10000, 2, largeDim, 2, 2); gold_dim(span, seq(0, 9999), span, span) = range(dim4(2, 10000, 2, 2), 1) + 1; array output_dim = accum(input, 1); ASSERT_ARRAYS_EQ(gold_dim, output_dim); input = constant(0, 2, 2, 2, largeDim); input(span, span, span, seq(0, 9999)) = 1; gold_dim = constant(0, 2, 2, 2, largeDim); gold_dim(span, span, span, seq(0, 9999)) = range(dim4(2, 2, 2, 10000), 1) + 1; output_dim = accum(input, 1); ASSERT_ARRAYS_EQ(gold_dim, output_dim); }
static inline uint rank(const af_array in, double tol) { Array<T> In = getArray<T>(in); Array<T> r = createEmptyArray<T>(dim4()); // Scoping to get rid of q and t as they are not necessary { Array<T> q = createEmptyArray<T>(dim4()); Array<T> t = createEmptyArray<T>(dim4()); qr(q, r, t, In); } Array<T> val = createValueArray<T>(r.dims(), scalar<T>(tol)); Array<char> gt = logicOp<T, af_gt_t>(r, val, val.dims()); Array<char> at = reduce<af_or_t, char, char>(gt, 1); return reduce_all<af_notzero_t, char, uint>(at); }
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval) { const dim4 dims = in.dims(); dim4 outDims = dim4(nbins, 1, dims[2], dims[3]); Array<outType> out = createValueArray<outType>(outDims, outType(0)); kernel::histogram<inType, outType, isLinear>(out, in, nbins, minval, maxval); return out; }
static forge::Image* convert_and_copy_image(const af_array in) { const Array<T> _in = getArray<T>(in); dim4 inDims = _in.dims(); dim4 rdims = (inDims[2]>1 ? dim4(2, 1, 0, 3) : dim4(1, 0, 2, 3)); Array<T> imgData = reorder(_in, rdims); ForgeManager& fgMngr = ForgeManager::getInstance(); // The inDims[2] * 100 is a hack to convert to forge::ChannelFormat // TODO Write a proper conversion function forge::Image* ret_val = fgMngr.getImage(inDims[1], inDims[0], (forge::ChannelFormat)(inDims[2] * 100), getGLType<T>()); copy_image<T>(normalizePerType<T>(imgData), ret_val); return ret_val; }
void sparseArithTesterDiv(const int m, const int n, int factor, const double eps) { deviceGC(); if (noDoubleTests<T>()) return; #if 1 array A = cpu_randu<T>(dim4(m, n)); array B = cpu_randu<T>(dim4(m, n)); #else array A = randu(m, n, (dtype)dtype_traits<T>::af_type); array B = randu(m, n, (dtype)dtype_traits<T>::af_type); #endif A = makeSparse<T>(A, factor); array RA = sparse(A, AF_STORAGE_CSR); array OA = sparse(A, AF_STORAGE_COO); // Arith Op array resR = arith_op<af_div_t>()(RA, B); array resO = arith_op<af_div_t>()(OA, B); // Assert division by sparse is not allowed af_array out_temp = 0; ASSERT_EQ(AF_ERR_NOT_SUPPORTED, af_div(&out_temp, B.get(), RA.get(), false)); ASSERT_EQ(AF_ERR_NOT_SUPPORTED, af_div(&out_temp, B.get(), OA.get(), false)); if(out_temp != 0) af_release_array(out_temp); // We will test this by converting the COO to CSR and CSR to COO and // comparing them. In essense, we are comparing the resR and resO // TODO: Make a better comparison using dense // Check resR against conR array conR = sparseConvertTo(resR, AF_STORAGE_CSR); sparseCompare<T>(resR, conR, eps); // Check resO against conO array conO = sparseConvertTo(resR, AF_STORAGE_COO); sparseCompare<T>(resO, conO, eps); }
Array<T> * transpose(const Array<T> &in) { if ((std::is_same<T, double>::value || std::is_same<T, cdouble>::value) && !isDoubleSupported(getActiveDeviceId())) { OPENCL_NOT_SUPPORTED(); } const dim4 inDims = in.dims(); dim4 outDims = dim4(inDims[1],inDims[0],inDims[2],inDims[3]); Array<T>* out = createEmptyArray<T>(outDims); kernel::transpose<T>(*out, in); return out; }
static af_features susan(af_array const &in, const unsigned radius, const float diff_thr, const float geom_thr, const float feature_ratio, const unsigned edge) { Array<float> x = createEmptyArray<float>(dim4()); Array<float> y = createEmptyArray<float>(dim4()); Array<float> score = createEmptyArray<float>(dim4()); af_features_t feat; feat.n = susan<T>(x, y, score, getArray<T>(in), radius, diff_thr, geom_thr, feature_ratio, edge); feat.x = getHandle(x); feat.y = getHandle(y); feat.score = getHandle(score); feat.orientation = getHandle(feat.n > 0 ? createValueArray<float>(feat.n, 0.0) : createEmptyArray<float>(dim4())); feat.size = getHandle(feat.n > 0 ? createValueArray<float>(feat.n, 1.0) : createEmptyArray<float>(dim4())); return getFeaturesHandle(feat); }
void sparseArithTester(const int m, const int n, int factor, const double eps) { deviceGC(); if (noDoubleTests<T>()) return; #if 1 array A = cpu_randu<T>(dim4(m, n)); array B = cpu_randu<T>(dim4(m, n)); #else array A = randu(m, n, (dtype)dtype_traits<T>::af_type); array B = randu(m, n, (dtype)dtype_traits<T>::af_type); #endif A = makeSparse<T>(A, factor); array RA = sparse(A, AF_STORAGE_CSR); array OA = sparse(A, AF_STORAGE_COO); // Arith Op array resR = arith_op<op>()(RA, B); array resO = arith_op<op>()(OA, B); array resD = arith_op<op>()( A, B); array revR = arith_op<op>()(B, RA); array revO = arith_op<op>()(B, OA); array revD = arith_op<op>()(B, A); ASSERT_NEAR(0, sum<double>(abs(real(resR - resD))) / (m * n), eps); ASSERT_NEAR(0, sum<double>(abs(imag(resR - resD))) / (m * n), eps); ASSERT_NEAR(0, sum<double>(abs(real(resO - resD))) / (m * n), eps); ASSERT_NEAR(0, sum<double>(abs(imag(resO - resD))) / (m * n), eps); ASSERT_NEAR(0, sum<double>(abs(real(revR - revD))) / (m * n), eps); ASSERT_NEAR(0, sum<double>(abs(imag(revR - revD))) / (m * n), eps); ASSERT_NEAR(0, sum<double>(abs(real(revO - revD))) / (m * n), eps); ASSERT_NEAR(0, sum<double>(abs(imag(revO - revD))) / (m * n), eps); }
static void sift(af_features& feat_, af_array& descriptors, const af_array& in, const unsigned n_layers, const float contrast_thr, const float edge_thr, const float init_sigma, const bool double_input, const float img_scale, const float feature_ratio, const bool compute_GLOH) { Array<float> x = createEmptyArray<float>(dim4()); Array<float> y = createEmptyArray<float>(dim4()); Array<float> score = createEmptyArray<float>(dim4()); Array<float> ori = createEmptyArray<float>(dim4()); Array<float> size = createEmptyArray<float>(dim4()); Array<float> desc = createEmptyArray<float>(dim4()); af_features_t feat; feat.n = sift<T, convAccT>(x, y, score, ori, size, desc, getArray<T>(in), n_layers, contrast_thr, edge_thr, init_sigma, double_input, img_scale, feature_ratio, compute_GLOH); feat.x = getHandle(x); feat.y = getHandle(y); feat.score = getHandle(score); feat.orientation = getHandle(ori); feat.size = getHandle(size); feat_ = getFeaturesHandle(feat); descriptors = getHandle<float>(desc); }
T det(const af_array a) { const Array<T> A = getArray<T>(a); const int num = A.dims()[0]; if(num == 0) { T res = scalar<T>(1.0); return res; } std::vector<T> hD(num); std::vector<int> hP(num); Array<T> D = createEmptyArray<T>(dim4()); Array<int> pivot = createEmptyArray<int>(dim4()); // Free memory as soon as possible { Array<T> A_copy = copyArray<T>(A); Array<int> pivot = lu_inplace(A_copy, false); copyData(&hP[0], pivot); Array<T> D = diagExtract(A_copy, 0); copyData(&hD[0], D); } bool is_neg = false; T res = scalar<T>(is_neg ? -1 : 1); for (int i = 0; i < num; i++) { res = res * hD[i]; is_neg ^= (hP[i] != (i+1)); } if (is_neg) res = res * scalar<T>(-1); return res; }
static af_features fast(af_array const &in, const float thr, const unsigned arc_length, const bool non_max, const float feature_ratio, const unsigned edge) { Array<float> x = createEmptyArray<float>(dim4()); Array<float> y = createEmptyArray<float>(dim4()); Array<float> score = createEmptyArray<float>(dim4()); af_features feat; feat.n = fast<T>(x, y, score, getArray<T>(in), thr, arc_length, non_max, feature_ratio, edge); Array<float> orientation = createValueArray<float>(feat.n, 0.0); Array<float> size = createValueArray<float>(feat.n, 1.0); feat.x = getHandle(x); feat.y = getHandle(y); feat.score = getHandle(score); feat.orientation = getHandle(orientation); feat.size = getHandle(size); return feat; }
dim4 getOutDims(const dim4 &ldims, const dim4 &rdims, bool batchMode) { if (!batchMode) { DIM_ASSERT(1, ldims == rdims); return ldims; } dim_t odims[] = {1, 1, 1, 1}; for (int i = 0; i < 4; i++) { DIM_ASSERT(1, ldims[i] == rdims[i] || ldims[i] == 1 || rdims[i] == 1); odims[i] = std::max(ldims[i], rdims[i]); } return dim4(4, odims); }
void whereTest(string pTestFile, bool isSubRef = false, const vector<af_seq> seqv = vector<af_seq>()) { SUPPORTED_TYPE_CHECK(T); vector<dim4> numDims; vector<vector<int> > data; vector<vector<int> > tests; readTests<int, int, int>(pTestFile, numDims, data, tests); dim4 dims = numDims[0]; vector<T> in(data[0].begin(), data[0].end()); af_array inArray = 0; af_array outArray = 0; af_array tempArray = 0; // Get input array if (isSubRef) { ASSERT_SUCCESS(af_create_array(&tempArray, &in.front(), dims.ndims(), dims.get(), (af_dtype)dtype_traits<T>::af_type)); ASSERT_SUCCESS( af_index(&inArray, tempArray, seqv.size(), &seqv.front())); } else { ASSERT_SUCCESS(af_create_array(&inArray, &in.front(), dims.ndims(), dims.get(), (af_dtype)dtype_traits<T>::af_type)); } // Compare result vector<uint> currGoldBar(tests[0].begin(), tests[0].end()); // Run sum ASSERT_SUCCESS(af_where(&outArray, inArray)); ASSERT_VEC_ARRAY_EQ(currGoldBar, dim4(tests[0].size()), outArray); if (inArray != 0) af_release_array(inArray); if (outArray != 0) af_release_array(outArray); if (tempArray != 0) af_release_array(tempArray); }
//////////////////////////////////// CPP ///////////////////////////////// // TYPED_TEST(Where, CPP) { SUPPORTED_TYPE_CHECK(TypeParam); vector<dim4> numDims; vector<vector<int> > data; vector<vector<int> > tests; readTests<int, int, int>(string(TEST_DIR "/where/where.test"), numDims, data, tests); dim4 dims = numDims[0]; vector<float> in(data[0].begin(), data[0].end()); array input(dims, &in.front(), afHost); array output = where(input); // Compare result vector<uint> currGoldBar(tests[0].begin(), tests[0].end()); ASSERT_VEC_ARRAY_EQ(currGoldBar, dim4(tests[0].size()), output); }
Array<outType> * histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval) { const dim4 inDims = in.dims(); dim4 outDims = dim4(nbins,1,inDims[2],inDims[3]); // create an array with first two dimensions swapped Array<outType>* out = createEmptyArray<outType>(outDims); // get data pointers for input and output Arrays outType *outData = out->get(); const inType* inData= in.get(); dim_type batchCount = inDims[2]; dim_type batchStride= in.strides()[2]; dim_type numElements= inDims[0]*inDims[1]; // set all bin elements to zero outType *temp = outData; for(int batchId = 0; batchId < batchCount; batchId++) { for(int i=0; i < (int)nbins; i++) temp[i] = 0; temp += nbins; } float step = (maxval - minval)/(float)nbins; for(dim_type batchId = 0; batchId < batchCount; batchId++) { for(dim_type i=0; i<numElements; i++) { int bin = (int)((inData[i] - minval) / step); bin = std::max(bin, 0); bin = std::min(bin, (int)(nbins - 1)); outData[bin]++; } inData += batchStride; outData += nbins; } return out; }
Array<T> transpose(const Array<T> &in, const bool conjugate) { const dim4 inDims = in.dims(); dim4 outDims = dim4(inDims[1],inDims[0],inDims[2],inDims[3]); // create an array with first two dimensions swapped Array<T> out = createEmptyArray<T>(outDims); // get data pointers for input and output Arrays T* outData = out.get(); const T* inData = in.get(); if(conjugate) { transpose_<T, true>(outData, inData, out.dims(), in.dims(), out.strides(), in.strides()); } else { transpose_<T, false>(outData, inData, out.dims(), in.dims(), out.strides(), in.strides()); } return out; }
Array<uint> where(const Array<T> &in) { const dim_t *dims = in.dims().get(); const dim_t *strides = in.strides().get(); static const T zero = scalar<T>(0); const T *iptr = in.get(); uint *out_vec = memAlloc<uint>(in.elements()); dim_t count = 0; dim_t idx = 0; for (dim_t w = 0; w < dims[3]; w++) { uint offw = w * strides[3]; for (dim_t z = 0; z < dims[2]; z++) { uint offz = offw + z * strides[2]; for (dim_t y = 0; y < dims[1]; y++) { uint offy = y * strides[1] + offz; for (dim_t x = 0; x < dims[0]; x++) { T val = iptr[offy + x]; if (val != zero) { out_vec[count] = idx; count++; } idx++; } } } } Array<uint> out = createHostDataArray(dim4(count), out_vec); memFree<uint>(out_vec); return out; }
double LPQNorm(const Array<T> &A, double p, double q) { Array<T> A_p_norm = createEmptyArray<T>(dim4()); if (p == 1) { A_p_norm = reduce<af_add_t, T, T>(A, 0); } else { Array<T> P = createValueArray<T>(A.dims(), scalar<T>(p)); Array<T> invP = createValueArray<T>(A.dims(), scalar<T>(1.0/p)); Array<T> A_p = arithOp<T, af_pow_t>(A, P, A.dims()); Array<T> A_p_sum = reduce<af_add_t, T, T>(A_p, 0); A_p_norm = arithOp<T, af_pow_t>(A_p_sum, invP, invP.dims()); } if (q == 1) { return reduce_all<af_add_t, T, T>(A_p_norm); } Array<T> Q = createValueArray<T>(A_p_norm.dims(), scalar<T>(q)); Array<T> A_p_norm_q = arithOp<T, af_pow_t>(A_p_norm, Q, Q.dims()); return std::pow(reduce_all<af_add_t, T, T>(A_p_norm_q), 1.0/q); }
Array<outType> histogram(const Array<inType> &in, const unsigned &nbins, const double &minval, const double &maxval) { ARG_ASSERT(1, (nbins<=kernel::MAX_BINS)); const dim4 dims = in.dims(); dim4 outDims = dim4(nbins, 1, dims[2], dims[3]); Array<outType> out = createValueArray<outType>(outDims, outType(0)); // create an array to hold min and max values for // batch operation handling, this will reduce // number of concurrent reads to one single memory location dim_t mmNElems= dims[2] * dims[3]; cfloat init; init.s[0] = minval; init.s[1] = maxval; vector<cfloat> h_minmax(mmNElems, init); dim4 minmax_dims(mmNElems*2); Array<cfloat> minmax = createHostDataArray<cfloat>(minmax_dims, h_minmax.data()); kernel::histogram<inType, outType>(out, in, minmax, nbins); return out; }
// This tests batching of different forms // tf0 rotates by 90 clockwise // tf1 rotates by 90 counter clockwise // This test simply makes sure the batching is working correctly TEST(TransformBatching, CPP) { vector<dim4> vDims; vector<vector<float> > in; vector<vector<float> > gold; readTests<float, float, int>(string(TEST_DIR"/transform/transform_batching.test"), vDims, in, gold); array img0 (vDims[0], &(in[0].front())); array img1 (vDims[1], &(in[1].front())); array ip_tile (vDims[2], &(in[2].front())); array ip_quad (vDims[3], &(in[3].front())); array ip_mult (vDims[4], &(in[4].front())); array ip_tile3 (vDims[5], &(in[5].front())); array ip_quad3 (vDims[6], &(in[6].front())); array tf0 (vDims[7 + 0], &(in[7 + 0].front())); array tf1 (vDims[7 + 1], &(in[7 + 1].front())); array tf_tile (vDims[7 + 2], &(in[7 + 2].front())); array tf_quad (vDims[7 + 3], &(in[7 + 3].front())); array tf_mult (vDims[7 + 4], &(in[7 + 4].front())); array tf_mult3 (vDims[7 + 5], &(in[7 + 5].front())); array tf_mult3x(vDims[7 + 6], &(in[7 + 6].front())); const int X = img0.dims(0); const int Y = img0.dims(1); ASSERT_EQ(gold.size(), 21u); vector<array> out(gold.size()); out[0 ] = transform(img0 , tf0 , Y, X, AF_INTERP_NEAREST); // 1,1 x 1,1 out[1 ] = transform(img0 , tf1 , Y, X, AF_INTERP_NEAREST); // 1,1 x 1,1 out[2 ] = transform(img1 , tf0 , Y, X, AF_INTERP_NEAREST); // 1,1 x 1,1 out[3 ] = transform(img1 , tf1 , Y, X, AF_INTERP_NEAREST); // 1,1 x 1,1 out[4 ] = transform(img0 , tf_tile , Y, X, AF_INTERP_NEAREST); // 1,1 x N,1 out[5 ] = transform(img0 , tf_mult , Y, X, AF_INTERP_NEAREST); // 1,1 x N,N out[6 ] = transform(img0 , tf_quad , Y, X, AF_INTERP_NEAREST); // 1,1 x 1,N out[7 ] = transform(ip_tile , tf0 , Y, X, AF_INTERP_NEAREST); // N,1 x 1,1 out[8 ] = transform(ip_tile , tf_tile , Y, X, AF_INTERP_NEAREST); // N,1 x N,1 out[9 ] = transform(ip_tile , tf_mult , Y, X, AF_INTERP_NEAREST); // N,N x N,N out[10] = transform(ip_tile , tf_quad , Y, X, AF_INTERP_NEAREST); // N,1 x 1,N out[11] = transform(ip_quad , tf0 , Y, X, AF_INTERP_NEAREST); // 1,N x 1,1 out[12] = transform(ip_quad , tf_quad , Y, X, AF_INTERP_NEAREST); // 1,N x 1,N out[13] = transform(ip_quad , tf_mult , Y, X, AF_INTERP_NEAREST); // 1,N x N,N out[14] = transform(ip_quad , tf_tile , Y, X, AF_INTERP_NEAREST); // 1,N x N,1 out[15] = transform(ip_mult , tf0 , Y, X, AF_INTERP_NEAREST); // N,N x 1,1 out[16] = transform(ip_mult , tf_tile , Y, X, AF_INTERP_NEAREST); // N,N x N,1 out[17] = transform(ip_mult , tf_mult , Y, X, AF_INTERP_NEAREST); // N,N x N,N out[18] = transform(ip_mult , tf_quad , Y, X, AF_INTERP_NEAREST); // N,N x 1,N out[19] = transform(ip_tile3, tf_mult3 , Y, X, AF_INTERP_NEAREST); // N,1 x N,N out[20] = transform(ip_quad3, tf_mult3x, Y, X, AF_INTERP_NEAREST); // 1,N x N,N array x_(dim4(35, 40, 1, 1), &(gold[1].front())); for(int i = 0; i < (int)gold.size(); i++) { // Get result vector<float> outData(out[i].elements()); out[i].host((void*)&outData.front()); for(int iter = 0; iter < (int)gold[i].size(); iter++) { ASSERT_EQ(gold[i][iter], outData[iter]) << "at: " << iter << endl << "for " << i << "-th operation"<< endl; } } }
Array<T> *initArray() { return new Array<T>(dim4()); }
forge::Chart* setup_surface(const forge::Window* const window, const af_array xVals, const af_array yVals, const af_array zVals, const af_cell* const props) { Array<T> xIn = getArray<T>(xVals); Array<T> yIn = getArray<T>(yVals); Array<T> zIn = getArray<T>(zVals); const ArrayInfo& Xinfo = getInfo(xVals); const ArrayInfo& Yinfo = getInfo(yVals); const ArrayInfo& Zinfo = getInfo(zVals); af::dim4 X_dims = Xinfo.dims(); af::dim4 Y_dims = Yinfo.dims(); af::dim4 Z_dims = Zinfo.dims(); if(Xinfo.isVector()){ // Convert xIn is a column vector xIn = modDims(xIn, xIn.elements()); // Now tile along second dimension dim4 x_tdims(1, Y_dims[0], 1, 1); xIn = tile(xIn, x_tdims); // Convert yIn to a row vector yIn= modDims(yIn, af::dim4(1, yIn.elements())); // Now tile along first dimension dim4 y_tdims(X_dims[0], 1, 1, 1); yIn = tile(yIn, y_tdims); } // Flatten xIn, yIn and zIn into row vectors dim4 rowDims = dim4(1, zIn.elements()); xIn = modDims(xIn, rowDims); yIn = modDims(yIn, rowDims); zIn = modDims(zIn, rowDims); // Now join along first dimension, skip reorder std::vector<Array<T> > inputs{xIn, yIn, zIn}; Array<T> Z = join(0, inputs); ForgeManager& fgMngr = ForgeManager::getInstance(); // Get the chart for the current grid position (if any) forge::Chart* chart = NULL; if (props->col>-1 && props->row>-1) chart = fgMngr.getChart(window, props->row, props->col, FG_CHART_3D); else chart = fgMngr.getChart(window, 0, 0, FG_CHART_3D); forge::Surface* surface = fgMngr.getSurface(chart, Z_dims[0], Z_dims[1], getGLType<T>()); surface->setColor(0.0, 1.0, 0.0, 1.0); // If chart axes limits do not have a manual override // then compute and set axes limits if(!fgMngr.getChartAxesOverride(chart)) { float cmin[3], cmax[3]; T dmin[3], dmax[3]; chart->getAxesLimits(&cmin[0], &cmax[0], &cmin[1], &cmax[1], &cmin[2], &cmax[2]); dmin[0] = reduce_all<af_min_t, T, T>(xIn); dmax[0] = reduce_all<af_max_t, T, T>(xIn); dmin[1] = reduce_all<af_min_t, T, T>(yIn); dmax[1] = reduce_all<af_max_t, T, T>(yIn); dmin[2] = reduce_all<af_min_t, T, T>(zIn); dmax[2] = reduce_all<af_max_t, T, T>(zIn); if(cmin[0] == 0 && cmax[0] == 0 && cmin[1] == 0 && cmax[1] == 0 && cmin[2] == 0 && cmax[2] == 0) { // No previous limits. Set without checking cmin[0] = step_round(dmin[0], false); cmax[0] = step_round(dmax[0], true); cmin[1] = step_round(dmin[1], false); cmax[1] = step_round(dmax[1], true); cmin[2] = step_round(dmin[2], false); cmax[2] = step_round(dmax[2], true); } else { if(cmin[0] > dmin[0]) cmin[0] = step_round(dmin[0], false); if(cmax[0] < dmax[0]) cmax[0] = step_round(dmax[0], true); if(cmin[1] > dmin[1]) cmin[1] = step_round(dmin[1], false); if(cmax[1] < dmax[1]) cmax[1] = step_round(dmax[1], true); if(cmin[2] > dmin[2]) cmin[2] = step_round(dmin[2], false); if(cmax[2] < dmax[2]) cmax[2] = step_round(dmax[2], true); } chart->setAxesLimits(cmin[0], cmax[0], cmin[1], cmax[1], cmin[2], cmax[2]); } copy_surface<T>(Z, surface); return chart; }
af_err af_approx1_uniform(af_array *yo, const af_array yi, const af_array xo, const int xdim, const double xi_beg, const double xi_step, const af_interp_type method, const float offGrid) { try { const ArrayInfo& yi_info = getInfo(yi); const ArrayInfo& xo_info = getInfo(xo); const dim4 yi_dims = yi_info.dims(); const dim4 xo_dims = xo_info.dims(); ARG_ASSERT(1, yi_info.isFloating()); // Only floating and complex types ARG_ASSERT(2, xo_info.isRealFloating()) ; // Only floating types ARG_ASSERT(1, yi_info.isSingle() == xo_info.isSingle()); // Must have same precision ARG_ASSERT(1, yi_info.isDouble() == xo_info.isDouble()); // Must have same precision ARG_ASSERT(3, xdim >= 0 && xdim < 4); // POS should either be (x, 1, 1, 1) or (1, yi_dims[1], yi_dims[2], yi_dims[3]) if (xo_dims[xdim] != xo_dims.elements()) { for (int i = 0; i < 4; i++) { if (xdim != i) DIM_ASSERT(2, xo_dims[i] == yi_dims[i]); } } ARG_ASSERT(5, xi_step != 0); ARG_ASSERT(6, (method == AF_INTERP_CUBIC || method == AF_INTERP_CUBIC_SPLINE || method == AF_INTERP_LINEAR || method == AF_INTERP_LINEAR_COSINE || method == AF_INTERP_LOWER || method == AF_INTERP_NEAREST)); if (yi_dims.ndims() == 0 || xo_dims.ndims() == 0) { *yo = createHandle(dim4(0,0,0,0), yi_info.getType()); return AF_SUCCESS; } dim4 yo_dims = yi_dims; yo_dims[xdim] = xo_dims[xdim]; if (*yo == 0) { *yo = createHandle(yo_dims, yi_info.getType()); } DIM_ASSERT(1, getInfo(*yo).dims() == yo_dims); switch(yi_info.getType()) { case f32: approx1<float , float >(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; case f64: approx1<double , double>(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; case c32: approx1<cfloat , float >(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; case c64: approx1<cdouble, double>(yo, yi, xo, xdim, xi_beg, xi_step, method, offGrid); break; default: TYPE_ERROR(1, yi_info.getType()); } } CATCHALL; return AF_SUCCESS; }
unsigned harris(Array<float> &x_out, Array<float> &y_out, Array<float> &resp_out, const Array<T> &in, const unsigned max_corners, const float min_response, const float sigma, const unsigned filter_len, const float k_thr) { dim4 idims = in.dims(); // Window filter convAccT* h_filter = memAlloc<convAccT>(filter_len); // Decide between rectangular or circular filter if (sigma < 0.5f) { for (unsigned i = 0; i < filter_len; i++) h_filter[i] = (T)1.f / (filter_len); } else { gaussian1D<convAccT>(h_filter, (int)filter_len, sigma); } Array<convAccT> filter = createDeviceDataArray<convAccT>(dim4(filter_len), (const void*)h_filter); unsigned border_len = filter_len / 2 + 1; Array<T> ix = createEmptyArray<T>(idims); Array<T> iy = createEmptyArray<T>(idims); // Compute first order derivatives gradient<T>(iy, ix, in); Array<T> ixx = createEmptyArray<T>(idims); Array<T> ixy = createEmptyArray<T>(idims); Array<T> iyy = createEmptyArray<T>(idims); // Compute second-order derivatives second_order_deriv<T>(ixx.get(), ixy.get(), iyy.get(), in.elements(), ix.get(), iy.get()); // Convolve second-order derivatives with proper window filter ixx = convolve2<T, convAccT, false>(ixx, filter, filter); ixy = convolve2<T, convAccT, false>(ixy, filter, filter); iyy = convolve2<T, convAccT, false>(iyy, filter, filter); const unsigned corner_lim = in.elements() * 0.2f; float* x_corners = memAlloc<float>(corner_lim); float* y_corners = memAlloc<float>(corner_lim); float* resp_corners = memAlloc<float>(corner_lim); T* resp = memAlloc<T>(in.elements()); // Calculate Harris responses for all pixels harris_responses<T>(resp, idims[0], idims[1], ixx.get(), ixy.get(), iyy.get(), k_thr, border_len); const unsigned min_r = (max_corners > 0) ? 0.f : min_response; unsigned corners_found = 0; // Performs non-maximal suppression non_maximal<T>(x_corners, y_corners, resp_corners, &corners_found, idims[0], idims[1], resp, min_r, border_len, corner_lim); memFree(resp); const unsigned corners_out = (max_corners > 0) ? min(corners_found, max_corners) : min(corners_found, corner_lim); if (corners_out == 0) return 0; if (max_corners > 0 && corners_found > corners_out) { Array<float> harris_responses = createDeviceDataArray<float>(dim4(corners_found), (void*)resp_corners); Array<float> harris_sorted = createEmptyArray<float>(dim4(corners_found)); Array<unsigned> harris_idx = createEmptyArray<unsigned>(dim4(corners_found)); // Sort Harris responses sort_index<float, false>(harris_sorted, harris_idx, harris_responses, 0); x_out = createEmptyArray<float>(dim4(corners_out)); y_out = createEmptyArray<float>(dim4(corners_out)); resp_out = createEmptyArray<float>(dim4(corners_out)); // Keep only the corners with higher Harris responses keep_corners(x_out.get(), y_out.get(), resp_out.get(), x_corners, y_corners, harris_sorted.get(), harris_idx.get(), corners_out); memFree(x_corners); memFree(y_corners); } else if (max_corners == 0 && corners_found < corner_lim) { x_out = createEmptyArray<float>(dim4(corners_out)); y_out = createEmptyArray<float>(dim4(corners_out)); resp_out = createEmptyArray<float>(dim4(corners_out)); memcpy(x_out.get(), x_corners, corners_out * sizeof(float)); memcpy(y_out.get(), y_corners, corners_out * sizeof(float)); memcpy(resp_out.get(), resp_corners, corners_out * sizeof(float)); memFree(x_corners); memFree(y_corners); memFree(resp_corners); } else { x_out = createDeviceDataArray<float>(dim4(corners_out), (void*)x_corners); y_out = createDeviceDataArray<float>(dim4(corners_out), (void*)y_corners); resp_out = createDeviceDataArray<float>(dim4(corners_out), (void*)resp_corners); } return corners_out; }
TYPED_TEST(Select, LeftScalar) { selectScalarTest<TypeParam, true>(dim4(1000, 1000)); }
TYPED_TEST(Select, Simple) { selectTest<TypeParam>(dim4(1024, 1024)); }