void show(vsip::const_Matrix<T, BlockT> view) { using vsip::no_subblock; using vsip::index_type; typedef T value_type; std::cout << "[" << vsip::local_processor() << "] " << "show\n"; if (subblock(view) != no_subblock) { for (index_type lr=0; lr<view.local().size(0); ++lr) for (index_type lc=0; lc<view.local().size(1); ++lc) { index_type gr = global_from_local_index(view, 0, lr); index_type gc = global_from_local_index(view, 1, lc); std::cout << "[" << vsip::local_processor() << "] " << lr << "," << lc << " g:" << gr << "," << gc << " = " << view.local().get(lr, lc) << std::endl; } } else std::cout << "[" << vsip::local_processor() << "] " << "show: no local subblock\n"; }
void check(vsip::const_Matrix<T, BlockT> view, int k, int rshift=0, int cshift=0) { using vsip::no_subblock; using vsip::index_type; typedef T value_type; #if VERBOSE std::cout << "check(k=" << k << ", rshift=" << rshift << ", cshift=" << cshift << "):" << std::endl; #endif if (subblock(view) != no_subblock) { for (index_type lr=0; lr<view.local().size(0); ++lr) for (index_type lc=0; lc<view.local().size(1); ++lc) { index_type gr = global_from_local_index(view, 0, lr); index_type gc = global_from_local_index(view, 1, lc); #if VERBOSE std::cout << " - " << lr << ", " << lc << " g:" << gr << ", " << gc << " = " << view.local().get(lr, lc) << " exp: " << value<T>(gr+rshift, gc + cshift, k) << std::endl; #endif #if DO_ASSERT test_assert(view.local().get(lr, lc) == value<T>(gr+rshift, gc+cshift, k)); #endif } } }
void apply_proj( vsip::const_Matrix<CoeffT, Block1> P, T u, T v, T& x, T& y) { T w = u * P.get(2, 0) + v * P.get(2, 1) + P.get(2,2); x = (u * P.get(0, 0) + v * P.get(0, 1) + P.get(0,2)) / w; y = (u * P.get(1, 0) + v * P.get(1, 1) + P.get(1,2)) / w; }
void apply_proj_w( vsip::const_Matrix<CoeffT, Block1> P, T u, T v, T& x, T& y, T& w) { x = u * P.get(0, 0) + v * P.get(0, 1) + P.get(0,2); y = u * P.get(1, 0) + v * P.get(1, 1) + P.get(1,2); w = u * P.get(2, 0) + v * P.get(2, 1) + P.get(2,2); }
float prod_check( vsip::const_Matrix<T, Block1> a, vsip::const_Matrix<T, Block2> b, vsip::Matrix<T, Block3> c) { using vsip::index_type; typedef typename ovxx::scalar_of<T>::type scalar_type; assert(a.size(0) == c.size(0)); assert(b.size(1) == c.size(1)); assert(a.size(1) == b.size(0)); float err = 0.f; for (index_type i=0; i<c.size(0); ++i) for (index_type j=0; j<c.size(1); ++j) { T tmp = T(); scalar_type guage = scalar_type(); for (index_type k=0; k<a.size(1); ++k) { tmp += a.get(i, k) * b.get(k, j); guage += vsip::mag(a.get(i, k)) * vsip::mag(b.get(k, j)); } float err_ij = vsip::mag(tmp - c(i, j)) / test::precision<scalar_type>::eps; if (guage > scalar_type()) err_ij = err_ij/guage; err = std::max(err, err_ij); } return err; }
void prodh(vsip::const_Matrix<T, Block1> a, vsip::const_Matrix<T, Block2> b, vsip::Matrix <T, Block3> c) { using vsip::index_type; assert(a.size(1) == c.size(0)); assert(b.size(1) == c.size(1)); assert(a.size(0) == b.size(0)); for (index_type i=0; i<c.size(0); ++i) for (index_type j=0; j<c.size(1); ++j) { T tmp = T(); for (index_type k=0; k<a.size(0); ++k) tmp += Test_traits<T>::conj(a.get(k, i)) * b.get(k, j); c(i, j) = tmp; } }
void pwarp_block( vsip::const_Matrix<CoeffT, Block1> P, vsip::const_Matrix<T, Block2> in, vsip::Matrix<T, Block3> out) { using vsip::length_type; using vsip::index_type; using vsip::Domain; using vsip_csl::img::impl::apply_proj; using std::min; using std::max; length_type rows = out.size(0); length_type cols = out.size(1); length_type row_chunk_size = 128; length_type col_chunk_size = 128; length_type row_quantum = 1; length_type col_quantum = 128/sizeof(T); for (index_type r=0; r<rows; r += row_chunk_size) { length_type my_r_size = std::min(row_chunk_size, rows - r); for (index_type c=0; c<cols; c += col_chunk_size) { length_type my_c_size = std::min(col_chunk_size, cols-c); CoeffT u00, v00; CoeffT u01, v01; CoeffT u10, v10; CoeffT u11, v11; apply_proj<CoeffT>(P, c+0*my_c_size, r+0*my_r_size, u00, v00); apply_proj<CoeffT>(P, c+0*my_c_size, r+1*my_r_size, u01, v01); apply_proj<CoeffT>(P, c+1*my_c_size, r+0*my_r_size, u10, v10); apply_proj<CoeffT>(P, c+1*my_c_size, r+1*my_r_size, u11, v11); CoeffT min_u = max(CoeffT(0), min(min(u00, u01), min(u10, u11))); CoeffT min_v = max(CoeffT(0), min(min(v00, v01), min(v10, v11))); CoeffT max_u = min(CoeffT(in.size(1)-1), max(max(u00, u01),max(u10, u11))); CoeffT max_v = min(CoeffT(in.size(0)-1), max(max(v00, v01),max(v10, v11))); index_type in_r0 = quantize_floor((index_type)floorf(min_v), row_quantum); index_type in_c0 = quantize_floor((index_type)floorf(min_u), col_quantum); index_type in_r1 = quantize_ceil((index_type)ceilf(max_v), row_quantum,in.size(0)-1); index_type in_c1 = quantize_ceil((index_type)ceilf(max_u), col_quantum,in.size(1)-1); Domain<2> in_dom(Domain<1>(in_r0, 1, in_r1 - in_r0 + 1), Domain<1>(in_c0, 1, in_c1 - in_c0 + 1)); length_type out_r0 = r; length_type out_c0 = c; Domain<2> out_dom(Domain<1>(out_r0, 1, my_r_size), Domain<1>(out_c0, 1, my_c_size)); pwarp_offset(P, in(in_dom), in_r0, in_c0, out(out_dom), out_r0, out_c0); } } }
void corr( vsip::bias_type bias, vsip::support_region_type sup, vsip::const_Matrix<T, Block1> ref, vsip::const_Matrix<T, Block2> in, vsip::Matrix<T, Block3> out) { using vsip::index_type; using vsip::length_type; using vsip::stride_type; using vsip::Matrix; using vsip::Domain; using vsip::unbiased; typedef typename vsip::impl::Scalar_of<T>::type scalar_type; length_type Mr = ref.size(0); length_type Mc = ref.size(1); length_type Nr = in.size(0); length_type Nc = in.size(1); length_type Pr = out.size(0); length_type Pc = out.size(1); length_type expected_Pr = corr_output_size(sup, Mr, Nr); length_type expected_Pc = corr_output_size(sup, Mc, Nc); stride_type shift_r = expected_shift(sup, Mr); stride_type shift_c = expected_shift(sup, Mc); assert(expected_Pr == Pr); assert(expected_Pc == Pc); Matrix<T> sub(Mr, Mc); Domain<1> sub_dom_r; Domain<1> sub_dom_c; Domain<1> in_dom_r; Domain<1> in_dom_c; // compute correlation for (index_type r=0; r<Pr; ++r) { stride_type pos_r = static_cast<stride_type>(r) + shift_r; for (index_type c=0; c<Pc; ++c) { stride_type pos_c = static_cast<stride_type>(c) + shift_c; scalar_type scale = scalar_type(1); if (pos_r < 0) { sub_dom_r = Domain<1>(-pos_r, 1, Mr + pos_r); in_dom_r = Domain<1>(0, 1, Mr+pos_r); scale *= scalar_type(Mr + pos_r); } else if (pos_r + Mr > Nr) { sub_dom_r = Domain<1>(0, 1, Nr-pos_r); in_dom_r = Domain<1>(pos_r, 1, Nr-pos_r); scale *= scalar_type(Nr - pos_r); } else { sub_dom_r = Domain<1>(0, 1, Mr); in_dom_r = Domain<1>(pos_r, 1, Mr); scale *= scalar_type(Mr); } if (pos_c < 0) { sub_dom_c = Domain<1>(-pos_c, 1, Mc + pos_c); in_dom_c = Domain<1>(0, 1, Mc+pos_c); scale *= scalar_type(Mc + pos_c); } else if (pos_c + Mc > Nc) { sub_dom_c = Domain<1>(0, 1, Nc-pos_c); in_dom_c = Domain<1>(pos_c, 1, Nc-pos_c); scale *= scalar_type(Nc - pos_c); } else { sub_dom_c = Domain<1>(0, 1, Mc); in_dom_c = Domain<1>(pos_c, 1, Mc); scale *= scalar_type(Mc); } sub = T(); sub(Domain<2>(sub_dom_r, sub_dom_c)) = in(Domain<2>(in_dom_r, in_dom_c)); T val = sumval(ref * impl_conj(sub)); if (bias == unbiased) val /= scale; out(r, c) = val; } } }