示例#1
0
文件: lu.hpp 项目: GWTWFN/vsipl
  bool solve(const_Matrix<T, Block0> b, Matrix<T, Block1> x)
  {
    typedef typename Block_layout<Block0>::order_type order_type;
    typedef typename Block_layout<Block0>::complex_type complex_type;
    typedef Layout<2, order_type, Stride_unit_dense, complex_type> data_LP;
    typedef Strided<2, T, data_LP, Local_map> block_type;

    assert(b.size(0) == length_);
    assert(b.size(0) == x.size(0) && b.size(1) == x.size(1));

    Matrix<T, block_type> b_int(b.size(0), b.size(1));
    assign_local(b_int, b);

    if (tr == mat_conj || 
        (tr == mat_trans && Is_complex<T>::value) ||
        (tr == mat_herm && !Is_complex<T>::value))
      VSIP_IMPL_THROW(unimplemented(
        "LU solver (CVSIP backend) does not implement this transformation"));
    {
      Ext_data<block_type> b_ext(b_int.block());

      cvsip::View<2,T,true>
        cvsip_b_int(b_ext.data(),0,b_ext.stride(0),b_ext.size(0),
                    b_ext.stride(1),b_ext.size(1));

      cvsip_b_int.block().admit(true);
      traits::lu_solve(lu_, tr, cvsip_b_int.ptr());
      cvsip_b_int.block().release(true);
    }
    assign_local(x, b_int);
    return true;
  }
示例#2
0
  static void add(
    const_Matrix<TR, BlockR> res,
    const_Matrix<T1, Block1> op1,
    const_Matrix<T2, Block2> op2)
  {
    vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block());
    vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block());
    vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block());

    // int cost = raw_res.cost + raw1.cost + raw2.cost;
    // cout << "Tag_plain " << cost << endl;

    float *pR = raw_res.ptr();
    float const *p1 = raw1.ptr();
    float const *p2 = raw2.ptr();

    for (index_type c=0; c<res.size(1); ++c)
    {
      for (index_type r=0; r<res.size(0); ++r)
      {
	pR[r*raw_res.stride(0) + c*raw_res.stride(1)] =
	  p1[r*raw1.stride(0) + c*raw1.stride(1)] +
	  p2[r*raw2.stride(0) + c*raw2.stride(1)];
      }
    }
  }
示例#3
0
  static void add(
    const_Matrix<TR, BlockR> res,
    const_Matrix<T1, Block1> op1,
    const_Matrix<T2, Block2> op2)
  {
    typedef typename BlockR::layout_type layout_type;

    // Check that no memory is required.
    // test_assert((dda::Data<BlockR, layout_type>::CT_Mem_not_req));
    // test_assert((dda::Data<Block1, layout_type>::CT_Mem_not_req));
    // test_assert((dda::Data<Block2, layout_type>::CT_Mem_not_req));

    vsip::dda::Data<BlockR, vsip::dda::out, layout_type> raw_res(res.block());
    vsip::dda::Data<Block1, vsip::dda::in, layout_type> raw1(op1.block());
    vsip::dda::Data<Block2, vsip::dda::in, layout_type> raw2(op2.block());

    // int cost = raw_res.cost + raw1.cost + raw2.cost;
    // cout << "Tag_contig " << cost << endl;

    float*   pR = raw_res.ptr();
    float*   p1 = raw1.ptr();
    float*   p2 = raw2.ptr();

    for (index_type i=0; i<res.size(); ++i)
    {
      *pR = *p1 + *p2;
      ++pR;
      ++p1;
      ++p2;
    }
  }
void
generic_prodj(
  const_Matrix<T0, Block0> a,
  const_Matrix<T1, Block1> b,
  Matrix<T2, Block2>       r)
{
  assert(r.size(0) == a.size(0));
  assert(r.size(1) == b.size(1));
  assert(a.size(1) == b.size(0));

#ifdef VSIP_IMPL_REF_IMPL
  impl::generic_prod(a, conj(b), r);
#else
  vsip_csl::dispatch<vsip_csl::dispatcher::op::prod_mm_conj, void,
    Block2&, Block0 const&, Block1 const&>
    (r.block(), a.block(), b.block());
#endif
}
示例#5
0
void
interpolate(
  const_Matrix<IT, Block1>	   indices,  // n x m
  Tensor<T, Block2>                window,   // n x m x I
  const_Matrix<complex<T>, Block3> in,       // n x m
  Matrix<complex<T>, Block4>       out,      // nx x m
  length_type                      depth,
  length_type                      padded_depth)
{
  // All blocks must have the same dimension ordering
  typedef typename Block_layout<Block1>::order_type order1_type;
  typedef typename Block_layout<Block2>::order_type order2_type;
  typedef typename Block_layout<Block3>::order_type order3_type;
  typedef typename Block_layout<Block4>::order_type order4_type;
  assert(order1_type::impl_dim0 == order2_type::impl_dim0);
  assert(order1_type::impl_dim0 == order3_type::impl_dim0);
  assert(order1_type::impl_dim0 == order4_type::impl_dim0);
  assert(order1_type::impl_dim1 == order2_type::impl_dim1);
  assert(order1_type::impl_dim1 == order3_type::impl_dim1);
  assert(order1_type::impl_dim1 == order4_type::impl_dim1);

  Device_memory<Block1> dev_indices(indices.block(), impl::SYNC_IN);
  Device_memory<Block2> dev_window(window.block(), impl::SYNC_IN);
  Device_memory<Block3> dev_in(in.block(), impl::SYNC_IN);
  Device_memory<Block4> dev_out(out.block(), impl::SYNC_OUT);

  size_t rows_in = in.size(0);
  size_t rows_out = out.size(0);
  size_t cols = in.size(1);
  assert(cols == out.size(1));

  interpolate(
    dev_indices.data(),
    dev_window.data(),
    reinterpret_cast<cuComplex const*>(dev_in.data()),
    reinterpret_cast<cuComplex*>(dev_out.data()),
    depth,
    padded_depth,
    rows_in,
    rows_out,
    cols);
}
void
generic_prod(
  const_Matrix<T0, Block0> a,
  const_Vector<T1, Block1> b,
  Vector<T2, Block2>       r)
{
  using namespace vsip_csl::dispatcher;

  assert(r.size() == a.size(0));
  assert(a.size(1) == b.size());

#ifdef VSIP_IMPL_REF_IMPL
  Evaluator<op::prod_mv, be::cvsip,
    void(Block2&, Block0 const&, Block1 const&)>::exec
    (r.block(), a.block(), b.block());
#else
  vsip_csl::dispatch<op::prod_mv, void,
    Block2&, Block0 const&, Block1 const&>
    (r.block(), a.block(), b.block());
#endif
}
示例#7
0
void
matrix_add_1(
  const_Matrix<TR, BlockR> res,
  const_Matrix<T1, Block1> op1,
  const_Matrix<T2, Block2> op2)
{
  vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block());
  float *p_raw = raw_res.ptr();
  stride_type row_str_raw = raw_res.stride(0);
  stride_type col_str_raw = raw_res.stride(1);

  vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block());
  float const *p1 = raw1.ptr();
  stride_type row_str1 = raw1.stride(0);
  stride_type col_str1 = raw1.stride(1);

  vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block());
  float const *p2 = raw2.ptr();
  stride_type row_str2 = raw2.stride(0);
  stride_type col_str2 = raw2.stride(1);

  for (index_type r=0; r<res.size(0); ++r)
  {
    float* row_raw = p_raw;
    float const *row_1   = p1;
    float const *row_2   = p2;

    for (index_type c=0; c<res.size(1); ++c)
    {
      *row_raw = *row_1 + *row_2;

      row_1   += col_str1;
      row_2   += col_str2;
      row_raw += col_str_raw;
    }
    p_raw += row_str_raw;
    p1    += row_str1;
    p2    += row_str2;
  }
}
示例#8
0
文件: svd.cpp 项目: bambang/vsipl
typename vsip::impl::scalar_of<T>::type
norm_1(const_Matrix<T, Block> m)
{
  typedef typename vsip::impl::scalar_of<T>::type scalar_type;
  scalar_type norm = sumval(mag(m.col(0)));

  for (index_type j=1; j<m.size(1); ++j)
  {
    norm = std::max(norm, sumval(mag(m.col(j))));
  }

  return norm;
}
示例#9
0
double
error_db(const_Matrix<T1, Block1> v1,
	 const_Matrix<T2, Block2> v2)
{
  double maxsum = -250;
  for (unsigned i = 0; i < v1.size(0); ++i)
  {
    double sum = error_db(v1.row(i), v2.row(i));
    if (sum > maxsum)
      maxsum = sum;
  }
  return maxsum;
}
示例#10
0
inline bool equal(const_Matrix<T1, B1> v, const_Matrix<T2, B2> w)
{
  if (v.size(0) != w.size(0) || v.size(1) != w.size(1)) return false;
  for (length_type i = 0; i != v.size(0); ++i)
    for (length_type j = 0; j != v.size(1); ++j)
      if (!equal(v.get(i, j), w.get(i, j)))
	return false;
  return true;
}
示例#11
0
void
matrix_add_2(
  const_Matrix<TR, BlockR> res,
  const_Matrix<T1, Block1> op1,
  const_Matrix<T2, Block2> op2)
{
  vsip::dda::Data<BlockR, vsip::dda::out> raw_res(res.block());
  vsip::dda::Data<Block1, vsip::dda::in> raw1(op1.block());
  vsip::dda::Data<Block2, vsip::dda::in> raw2(op2.block());

  float *pR = raw_res.ptr();
  float const *p1 = raw1.ptr();
  float const *p2 = raw2.ptr();

  for (index_type r=0; r<res.size(0); ++r)
  {
    for (index_type c=0; c<res.size(1); ++c)
    {
      pR[r*raw_res.stride(0) + c*raw_res.stride(1)] =
	p1[r*raw1.stride(0) + c*raw1.stride(1)] +
	p2[r*raw2.stride(0) + c*raw2.stride(1)];
    }
  }
}
示例#12
0
void
interpolate(
  const_Matrix<IT, Block1>	   indices,  // n x m
  Tensor<T, Block2>                window,   // n x m x I
  const_Matrix<complex<T>, Block3> in,       // n x m
  Matrix<complex<T>, Block4>       out,      // nx x m
  length_type                      depth)
{
  length_type n = indices.size(0);
  length_type m = indices.size(1);
  length_type nx = out.size(0);
  length_type I = depth; // window.size(2) may include padding
  assert(n == in.size(0));
  assert(m == in.size(1));
  assert(m == out.size(1));
  assert(window.size(0) == n);
  assert(window.size(1) == m);

  out = complex<T>(0);

  for (index_type j = 0; j < m; ++j)
  {
    for (index_type i = 0; i < n; ++i)
    {
      index_type ikxrows = indices.get(i, j);
      index_type i_shift = (i + n/2) % n;
      for (index_type h = 0; h < I; ++h)
      {

        out.put(ikxrows + h, j, out.get(ikxrows + h, j) + 
          (in.get(i_shift, j) * window.get(i, j, h)));
      }
    }
    out.col(j)(Domain<1>(j%2, 2, nx/2)) *= T(-1);
  }
}