static void add( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block()); vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block()); vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block()); // int cost = raw_res.cost + raw1.cost + raw2.cost; // cout << "Tag_plain " << cost << endl; float *pR = raw_res.ptr(); float const *p1 = raw1.ptr(); float const *p2 = raw2.ptr(); for (index_type c=0; c<res.size(1); ++c) { for (index_type r=0; r<res.size(0); ++r) { pR[r*raw_res.stride(0) + c*raw_res.stride(1)] = p1[r*raw1.stride(0) + c*raw1.stride(1)] + p2[r*raw2.stride(0) + c*raw2.stride(1)]; } } }
static void add( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { typedef typename BlockR::layout_type layout_type; // Check that no memory is required. // test_assert((dda::Data<BlockR, layout_type>::CT_Mem_not_req)); // test_assert((dda::Data<Block1, layout_type>::CT_Mem_not_req)); // test_assert((dda::Data<Block2, layout_type>::CT_Mem_not_req)); vsip::dda::Data<BlockR, vsip::dda::out, layout_type> raw_res(res.block()); vsip::dda::Data<Block1, vsip::dda::in, layout_type> raw1(op1.block()); vsip::dda::Data<Block2, vsip::dda::in, layout_type> raw2(op2.block()); // int cost = raw_res.cost + raw1.cost + raw2.cost; // cout << "Tag_contig " << cost << endl; float* pR = raw_res.ptr(); float* p1 = raw1.ptr(); float* p2 = raw2.ptr(); for (index_type i=0; i<res.size(); ++i) { *pR = *p1 + *p2; ++pR; ++p1; ++p2; } }
void generic_prodj( const_Matrix<T0, Block0> a, const_Matrix<T1, Block1> b, Matrix<T2, Block2> r) { assert(r.size(0) == a.size(0)); assert(r.size(1) == b.size(1)); assert(a.size(1) == b.size(0)); #ifdef VSIP_IMPL_REF_IMPL impl::generic_prod(a, conj(b), r); #else vsip_csl::dispatch<vsip_csl::dispatcher::op::prod_mm_conj, void, Block2&, Block0 const&, Block1 const&> (r.block(), a.block(), b.block()); #endif }
void interpolate( const_Matrix<IT, Block1> indices, // n x m Tensor<T, Block2> window, // n x m x I const_Matrix<complex<T>, Block3> in, // n x m Matrix<complex<T>, Block4> out, // nx x m length_type depth, length_type padded_depth) { // All blocks must have the same dimension ordering typedef typename Block_layout<Block1>::order_type order1_type; typedef typename Block_layout<Block2>::order_type order2_type; typedef typename Block_layout<Block3>::order_type order3_type; typedef typename Block_layout<Block4>::order_type order4_type; assert(order1_type::impl_dim0 == order2_type::impl_dim0); assert(order1_type::impl_dim0 == order3_type::impl_dim0); assert(order1_type::impl_dim0 == order4_type::impl_dim0); assert(order1_type::impl_dim1 == order2_type::impl_dim1); assert(order1_type::impl_dim1 == order3_type::impl_dim1); assert(order1_type::impl_dim1 == order4_type::impl_dim1); Device_memory<Block1> dev_indices(indices.block(), impl::SYNC_IN); Device_memory<Block2> dev_window(window.block(), impl::SYNC_IN); Device_memory<Block3> dev_in(in.block(), impl::SYNC_IN); Device_memory<Block4> dev_out(out.block(), impl::SYNC_OUT); size_t rows_in = in.size(0); size_t rows_out = out.size(0); size_t cols = in.size(1); assert(cols == out.size(1)); interpolate( dev_indices.data(), dev_window.data(), reinterpret_cast<cuComplex const*>(dev_in.data()), reinterpret_cast<cuComplex*>(dev_out.data()), depth, padded_depth, rows_in, rows_out, cols); }
void generic_prod( const_Vector<T0, Block0> a, const_Matrix<T1, Block1> b, Vector<T2, Block2> r) { using namespace vsip_csl::dispatcher; assert(r.size() == b.size(1)); assert(a.size() == b.size(0)); #ifdef VSIP_IMPL_REF_IMPL Evaluator<op::prod_vm, dispatcher::be::cvsip, void(Block2&, Block0 const&, Block1 const&)>::exec (r.block(), a.block(), b.block()); #else vsip_csl::dispatch<op::prod_vm, void, Block2&, Block0 const&, Block1 const&> (r.block(), a.block(), b.block()); #endif }
void matrix_add_1( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block()); float *p_raw = raw_res.ptr(); stride_type row_str_raw = raw_res.stride(0); stride_type col_str_raw = raw_res.stride(1); vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block()); float const *p1 = raw1.ptr(); stride_type row_str1 = raw1.stride(0); stride_type col_str1 = raw1.stride(1); vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block()); float const *p2 = raw2.ptr(); stride_type row_str2 = raw2.stride(0); stride_type col_str2 = raw2.stride(1); for (index_type r=0; r<res.size(0); ++r) { float* row_raw = p_raw; float const *row_1 = p1; float const *row_2 = p2; for (index_type c=0; c<res.size(1); ++c) { *row_raw = *row_1 + *row_2; row_1 += col_str1; row_2 += col_str2; row_raw += col_str_raw; } p_raw += row_str_raw; p1 += row_str1; p2 += row_str2; } }
void matrix_add_2( const_Matrix<TR, BlockR> res, const_Matrix<T1, Block1> op1, const_Matrix<T2, Block2> op2) { vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block()); vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block()); vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block()); float *pR = raw_res.ptr(); float const *p1 = raw1.ptr(); float const *p2 = raw2.ptr(); for (index_type r=0; r<res.size(0); ++r) { for (index_type c=0; c<res.size(1); ++c) { pR[r*raw_res.stride(0) + c*raw_res.stride(1)] = p1[r*raw1.stride(0) + c*raw1.stride(1)] + p2[r*raw2.stride(0) + c*raw2.stride(1)]; } } }
void out_of_place(BE *backend, const_Matrix<InT, Block0> in, Matrix<OutT, Block1> out) { { dda::Data<Block0, dda::in> in_data(in.block()); dda::Data<Block1, dda::out> out_data(out.block()); backend->out_of_place(in_data.ptr(), in_data.stride(0), in_data.stride(1), out_data.ptr(), out_data.stride(0), out_data.stride(1), select_fft_size<InT, OutT>(in_data.size(0), out_data.size(0)), select_fft_size<InT, OutT>(in_data.size(1), out_data.size(1))); } // Scale the data if not already done by the backend. if (!backend->supports_scale() && !almost_equal(scale_, scalar_type(1.))) out *= scale_; }
void by_reference(BE *backend, const_Matrix<InT, Block0> in, Matrix<OutT, Block1> out) { { Ext_data<Block0> in_ext (in.block(), SYNC_IN); Ext_data<Block1> out_ext(out.block(), SYNC_OUT); backend->by_reference( in_ext.data(), in_ext.stride(0), in_ext.stride(1), out_ext.data(), out_ext.stride(0), out_ext.stride(1), select_fft_size<InT, OutT>(in_ext.size(0), out_ext.size(0)), select_fft_size<InT, OutT>(in_ext.size(1), out_ext.size(1))); } // Scale the data if not already done by the backend. if (!backend->supports_scale() && !almost_equal(scale_, scalar_type(1.))) out *= scale_; }