Example #1
0
  static void add(
    const_Matrix<TR, BlockR> res,
    const_Matrix<T1, Block1> op1,
    const_Matrix<T2, Block2> op2)
  {
    vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block());
    vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block());
    vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block());

    // int cost = raw_res.cost + raw1.cost + raw2.cost;
    // cout << "Tag_plain " << cost << endl;

    float *pR = raw_res.ptr();
    float const *p1 = raw1.ptr();
    float const *p2 = raw2.ptr();

    for (index_type c=0; c<res.size(1); ++c)
    {
      for (index_type r=0; r<res.size(0); ++r)
      {
	pR[r*raw_res.stride(0) + c*raw_res.stride(1)] =
	  p1[r*raw1.stride(0) + c*raw1.stride(1)] +
	  p2[r*raw2.stride(0) + c*raw2.stride(1)];
      }
    }
  }
Example #2
0
  static void add(
    const_Matrix<TR, BlockR> res,
    const_Matrix<T1, Block1> op1,
    const_Matrix<T2, Block2> op2)
  {
    typedef typename BlockR::layout_type layout_type;

    // Check that no memory is required.
    // test_assert((dda::Data<BlockR, layout_type>::CT_Mem_not_req));
    // test_assert((dda::Data<Block1, layout_type>::CT_Mem_not_req));
    // test_assert((dda::Data<Block2, layout_type>::CT_Mem_not_req));

    vsip::dda::Data<BlockR, vsip::dda::out, layout_type> raw_res(res.block());
    vsip::dda::Data<Block1, vsip::dda::in, layout_type> raw1(op1.block());
    vsip::dda::Data<Block2, vsip::dda::in, layout_type> raw2(op2.block());

    // int cost = raw_res.cost + raw1.cost + raw2.cost;
    // cout << "Tag_contig " << cost << endl;

    float*   pR = raw_res.ptr();
    float*   p1 = raw1.ptr();
    float*   p2 = raw2.ptr();

    for (index_type i=0; i<res.size(); ++i)
    {
      *pR = *p1 + *p2;
      ++pR;
      ++p1;
      ++p2;
    }
  }
void
generic_prodj(
  const_Matrix<T0, Block0> a,
  const_Matrix<T1, Block1> b,
  Matrix<T2, Block2>       r)
{
  assert(r.size(0) == a.size(0));
  assert(r.size(1) == b.size(1));
  assert(a.size(1) == b.size(0));

#ifdef VSIP_IMPL_REF_IMPL
  impl::generic_prod(a, conj(b), r);
#else
  vsip_csl::dispatch<vsip_csl::dispatcher::op::prod_mm_conj, void,
    Block2&, Block0 const&, Block1 const&>
    (r.block(), a.block(), b.block());
#endif
}
Example #4
0
void
interpolate(
  const_Matrix<IT, Block1>	   indices,  // n x m
  Tensor<T, Block2>                window,   // n x m x I
  const_Matrix<complex<T>, Block3> in,       // n x m
  Matrix<complex<T>, Block4>       out,      // nx x m
  length_type                      depth,
  length_type                      padded_depth)
{
  // All blocks must have the same dimension ordering
  typedef typename Block_layout<Block1>::order_type order1_type;
  typedef typename Block_layout<Block2>::order_type order2_type;
  typedef typename Block_layout<Block3>::order_type order3_type;
  typedef typename Block_layout<Block4>::order_type order4_type;
  assert(order1_type::impl_dim0 == order2_type::impl_dim0);
  assert(order1_type::impl_dim0 == order3_type::impl_dim0);
  assert(order1_type::impl_dim0 == order4_type::impl_dim0);
  assert(order1_type::impl_dim1 == order2_type::impl_dim1);
  assert(order1_type::impl_dim1 == order3_type::impl_dim1);
  assert(order1_type::impl_dim1 == order4_type::impl_dim1);

  Device_memory<Block1> dev_indices(indices.block(), impl::SYNC_IN);
  Device_memory<Block2> dev_window(window.block(), impl::SYNC_IN);
  Device_memory<Block3> dev_in(in.block(), impl::SYNC_IN);
  Device_memory<Block4> dev_out(out.block(), impl::SYNC_OUT);

  size_t rows_in = in.size(0);
  size_t rows_out = out.size(0);
  size_t cols = in.size(1);
  assert(cols == out.size(1));

  interpolate(
    dev_indices.data(),
    dev_window.data(),
    reinterpret_cast<cuComplex const*>(dev_in.data()),
    reinterpret_cast<cuComplex*>(dev_out.data()),
    depth,
    padded_depth,
    rows_in,
    rows_out,
    cols);
}
void
generic_prod(
  const_Vector<T0, Block0> a,
  const_Matrix<T1, Block1> b,
  Vector<T2, Block2>       r)
{
  using namespace vsip_csl::dispatcher;

  assert(r.size() == b.size(1));
  assert(a.size() == b.size(0));

#ifdef VSIP_IMPL_REF_IMPL
  Evaluator<op::prod_vm, dispatcher::be::cvsip,
    void(Block2&, Block0 const&, Block1 const&)>::exec
    (r.block(), a.block(), b.block());
#else
  vsip_csl::dispatch<op::prod_vm, void,
    Block2&, Block0 const&, Block1 const&>
    (r.block(), a.block(), b.block());
#endif
}
Example #6
0
void
matrix_add_1(
  const_Matrix<TR, BlockR> res,
  const_Matrix<T1, Block1> op1,
  const_Matrix<T2, Block2> op2)
{
  vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block());
  float *p_raw = raw_res.ptr();
  stride_type row_str_raw = raw_res.stride(0);
  stride_type col_str_raw = raw_res.stride(1);

  vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block());
  float const *p1 = raw1.ptr();
  stride_type row_str1 = raw1.stride(0);
  stride_type col_str1 = raw1.stride(1);

  vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block());
  float const *p2 = raw2.ptr();
  stride_type row_str2 = raw2.stride(0);
  stride_type col_str2 = raw2.stride(1);

  for (index_type r=0; r<res.size(0); ++r)
  {
    float* row_raw = p_raw;
    float const *row_1   = p1;
    float const *row_2   = p2;

    for (index_type c=0; c<res.size(1); ++c)
    {
      *row_raw = *row_1 + *row_2;

      row_1   += col_str1;
      row_2   += col_str2;
      row_raw += col_str_raw;
    }
    p_raw += row_str_raw;
    p1    += row_str1;
    p2    += row_str2;
  }
}
Example #7
0
void
matrix_add_2(
  const_Matrix<TR, BlockR> res,
  const_Matrix<T1, Block1> op1,
  const_Matrix<T2, Block2> op2)
{
  vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block());
  vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block());
  vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block());

  float *pR = raw_res.ptr();
  float const *p1 = raw1.ptr();
  float const *p2 = raw2.ptr();

  for (index_type r=0; r<res.size(0); ++r)
  {
    for (index_type c=0; c<res.size(1); ++c)
    {
      pR[r*raw_res.stride(0) + c*raw_res.stride(1)] =
	p1[r*raw1.stride(0) + c*raw1.stride(1)] +
	p2[r*raw2.stride(0) + c*raw2.stride(1)];
    }
  }
}
Example #8
0
  void out_of_place(BE *backend,
		    const_Matrix<InT, Block0> in,
		    Matrix<OutT, Block1> out)
  {
    {
      dda::Data<Block0, dda::in> in_data(in.block());
      dda::Data<Block1, dda::out> out_data(out.block());

      backend->out_of_place(in_data.ptr(), in_data.stride(0), in_data.stride(1),
			    out_data.ptr(), out_data.stride(0), out_data.stride(1),
			    select_fft_size<InT, OutT>(in_data.size(0), out_data.size(0)),
			    select_fft_size<InT, OutT>(in_data.size(1), out_data.size(1)));
    }

    // Scale the data if not already done by the backend.
    if (!backend->supports_scale() && !almost_equal(scale_, scalar_type(1.)))
      out *= scale_;
  }
  void by_reference(BE *backend,
		    const_Matrix<InT, Block0> in,
		    Matrix<OutT, Block1> out)
  {
    {
      Ext_data<Block0> in_ext (in.block(),  SYNC_IN);
      Ext_data<Block1> out_ext(out.block(), SYNC_OUT);

      backend->by_reference(
		in_ext.data(),  in_ext.stride(0),  in_ext.stride(1),
		out_ext.data(), out_ext.stride(0), out_ext.stride(1),
		select_fft_size<InT, OutT>(in_ext.size(0), out_ext.size(0)),
		select_fft_size<InT, OutT>(in_ext.size(1), out_ext.size(1)));
    }

    // Scale the data if not already done by the backend.
    if (!backend->supports_scale() && !almost_equal(scale_, scalar_type(1.)))
      out *= scale_;
  }