bool solve(const_Matrix<T, Block0> b, Matrix<T, Block1> x) { typedef typename Block_layout<Block0>::order_type order_type; typedef typename Block_layout<Block0>::complex_type complex_type; typedef Layout<2, order_type, Stride_unit_dense, complex_type> data_LP; typedef Strided<2, T, data_LP, Local_map> block_type; assert(b.size(0) == length_); assert(b.size(0) == x.size(0) && b.size(1) == x.size(1)); Matrix<T, block_type> b_int(b.size(0), b.size(1)); assign_local(b_int, b); if (tr == mat_conj || (tr == mat_trans && Is_complex<T>::value) || (tr == mat_herm && !Is_complex<T>::value)) VSIP_IMPL_THROW(unimplemented( "LU solver (CVSIP backend) does not implement this transformation")); { Ext_data<block_type> b_ext(b_int.block()); cvsip::View<2,T,true> cvsip_b_int(b_ext.data(),0,b_ext.stride(0),b_ext.size(0), b_ext.stride(1),b_ext.size(1)); cvsip_b_int.block().admit(true); traits::lu_solve(lu_, tr, cvsip_b_int.ptr()); cvsip_b_int.block().release(true); } assign_local(x, b_int); return true; }
static void add( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block()); vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block()); vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block()); // int cost = raw_res.cost + raw1.cost + raw2.cost; // cout << "Tag_plain " << cost << endl; float *pR = raw_res.ptr(); float const *p1 = raw1.ptr(); float const *p2 = raw2.ptr(); for (index_type c=0; c<res.size(1); ++c) { for (index_type r=0; r<res.size(0); ++r) { pR[r*raw_res.stride(0) + c*raw_res.stride(1)] = p1[r*raw1.stride(0) + c*raw1.stride(1)] + p2[r*raw2.stride(0) + c*raw2.stride(1)]; } } }
static void add( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { typedef typename BlockR::layout_type layout_type; // Check that no memory is required. // test_assert((dda::Data<BlockR, layout_type>::CT_Mem_not_req)); // test_assert((dda::Data<Block1, layout_type>::CT_Mem_not_req)); // test_assert((dda::Data<Block2, layout_type>::CT_Mem_not_req)); vsip::dda::Data<BlockR, vsip::dda::out, layout_type> raw_res(res.block()); vsip::dda::Data<Block1, vsip::dda::in, layout_type> raw1(op1.block()); vsip::dda::Data<Block2, vsip::dda::in, layout_type> raw2(op2.block()); // int cost = raw_res.cost + raw1.cost + raw2.cost; // cout << "Tag_contig " << cost << endl; float* pR = raw_res.ptr(); float* p1 = raw1.ptr(); float* p2 = raw2.ptr(); for (index_type i=0; i<res.size(); ++i) { *pR = *p1 + *p2; ++pR; ++p1; ++p2; } }
void generic_prodj( const_Matrix<T0, Block0> a, const_Matrix<T1, Block1> b, Matrix<T2, Block2> r) { assert(r.size(0) == a.size(0)); assert(r.size(1) == b.size(1)); assert(a.size(1) == b.size(0)); #ifdef VSIP_IMPL_REF_IMPL impl::generic_prod(a, conj(b), r); #else vsip_csl::dispatch<vsip_csl::dispatcher::op::prod_mm_conj, void, Block2&, Block0 const&, Block1 const&> (r.block(), a.block(), b.block()); #endif }
void interpolate( const_Matrix<IT, Block1> indices, // n x m Tensor<T, Block2> window, // n x m x I const_Matrix<complex<T>, Block3> in, // n x m Matrix<complex<T>, Block4> out, // nx x m length_type depth, length_type padded_depth) { // All blocks must have the same dimension ordering typedef typename Block_layout<Block1>::order_type order1_type; typedef typename Block_layout<Block2>::order_type order2_type; typedef typename Block_layout<Block3>::order_type order3_type; typedef typename Block_layout<Block4>::order_type order4_type; assert(order1_type::impl_dim0 == order2_type::impl_dim0); assert(order1_type::impl_dim0 == order3_type::impl_dim0); assert(order1_type::impl_dim0 == order4_type::impl_dim0); assert(order1_type::impl_dim1 == order2_type::impl_dim1); assert(order1_type::impl_dim1 == order3_type::impl_dim1); assert(order1_type::impl_dim1 == order4_type::impl_dim1); Device_memory<Block1> dev_indices(indices.block(), impl::SYNC_IN); Device_memory<Block2> dev_window(window.block(), impl::SYNC_IN); Device_memory<Block3> dev_in(in.block(), impl::SYNC_IN); Device_memory<Block4> dev_out(out.block(), impl::SYNC_OUT); size_t rows_in = in.size(0); size_t rows_out = out.size(0); size_t cols = in.size(1); assert(cols == out.size(1)); interpolate( dev_indices.data(), dev_window.data(), reinterpret_cast<cuComplex const*>(dev_in.data()), reinterpret_cast<cuComplex*>(dev_out.data()), depth, padded_depth, rows_in, rows_out, cols); }
void generic_prod( const_Matrix<T0, Block0> a, const_Vector<T1, Block1> b, Vector<T2, Block2> r) { using namespace vsip_csl::dispatcher; assert(r.size() == a.size(0)); assert(a.size(1) == b.size()); #ifdef VSIP_IMPL_REF_IMPL Evaluator<op::prod_mv, be::cvsip, void(Block2&, Block0 const&, Block1 const&)>::exec (r.block(), a.block(), b.block()); #else vsip_csl::dispatch<op::prod_mv, void, Block2&, Block0 const&, Block1 const&> (r.block(), a.block(), b.block()); #endif }
void matrix_add_1( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block()); float *p_raw = raw_res.ptr(); stride_type row_str_raw = raw_res.stride(0); stride_type col_str_raw = raw_res.stride(1); vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block()); float const *p1 = raw1.ptr(); stride_type row_str1 = raw1.stride(0); stride_type col_str1 = raw1.stride(1); vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block()); float const *p2 = raw2.ptr(); stride_type row_str2 = raw2.stride(0); stride_type col_str2 = raw2.stride(1); for (index_type r=0; r<res.size(0); ++r) { float* row_raw = p_raw; float const *row_1 = p1; float const *row_2 = p2; for (index_type c=0; c<res.size(1); ++c) { *row_raw = *row_1 + *row_2; row_1 += col_str1; row_2 += col_str2; row_raw += col_str_raw; } p_raw += row_str_raw; p1 += row_str1; p2 += row_str2; } }
typename vsip::impl::scalar_of<T>::type norm_1(const_Matrix<T, Block> m) { typedef typename vsip::impl::scalar_of<T>::type scalar_type; scalar_type norm = sumval(mag(m.col(0))); for (index_type j=1; j<m.size(1); ++j) { norm = std::max(norm, sumval(mag(m.col(j)))); } return norm; }
double error_db(const_Matrix<T1, Block1> v1, const_Matrix<T2, Block2> v2) { double maxsum = -250; for (unsigned i = 0; i < v1.size(0); ++i) { double sum = error_db(v1.row(i), v2.row(i)); if (sum > maxsum) maxsum = sum; } return maxsum; }
inline bool equal(const_Matrix<T1, B1> v, const_Matrix<T2, B2> w) { if (v.size(0) != w.size(0) || v.size(1) != w.size(1)) return false; for (length_type i = 0; i != v.size(0); ++i) for (length_type j = 0; j != v.size(1); ++j) if (!equal(v.get(i, j), w.get(i, j))) return false; return true; }
void matrix_add_2( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block()); vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block()); vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block()); float *pR = raw_res.ptr(); float const *p1 = raw1.ptr(); float const *p2 = raw2.ptr(); for (index_type r=0; r<res.size(0); ++r) { for (index_type c=0; c<res.size(1); ++c) { pR[r*raw_res.stride(0) + c*raw_res.stride(1)] = p1[r*raw1.stride(0) + c*raw1.stride(1)] + p2[r*raw2.stride(0) + c*raw2.stride(1)]; } } }
void interpolate( const_Matrix<IT, Block1> indices, // n x m Tensor<T, Block2> window, // n x m x I const_Matrix<complex<T>, Block3> in, // n x m Matrix<complex<T>, Block4> out, // nx x m length_type depth) { length_type n = indices.size(0); length_type m = indices.size(1); length_type nx = out.size(0); length_type I = depth; // window.size(2) may include padding assert(n == in.size(0)); assert(m == in.size(1)); assert(m == out.size(1)); assert(window.size(0) == n); assert(window.size(1) == m); out = complex<T>(0); for (index_type j = 0; j < m; ++j) { for (index_type i = 0; i < n; ++i) { index_type ikxrows = indices.get(i, j); index_type i_shift = (i + n/2) % n; for (index_type h = 0; h < I; ++h) { out.put(ikxrows + h, j, out.get(ikxrows + h, j) + (in.get(i_shift, j) * window.get(i, j, h))); } } out.col(j)(Domain<1>(j%2, 2, nx/2)) *= T(-1); } }