コード例 #1
0
ファイル: multi-dgemm-type.cpp プロジェクト: JWilhelm/cp2k
int multi_dgemm_type::operator()(size_t index, size_t size)
{
  LIBXSTREAM_CHECK_CONDITION(ready() && (index + size) <= m_host_data->size());

  if (0 < size) {
    if (0 == demux()) {
      // This manual synchronization prevents multiple threads from queuing work into the *same* stream (at the same time).
      // This is only needed if the stream was created without demux support in order to rely on manual synchronization.
      LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_stream_lock(m_stream));
    }
    const size_t i0 = m_host_data->idata()[index], i1 = m_host_data->idata()[index+size];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->adata() + i0, m_adata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->bdata() + i0, m_bdata, sizeof(double) * (i1 - i0), m_stream));
    // transferring cdata is part of the benchmark; since it is all zeros we could do better with libxstream_memset_zero
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->cdata() + i0, m_cdata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->idata() + index, m_idata, sizeof(size_t) * size, m_stream));
#if defined(LIBXSTREAM_DEBUG)
    size_t n = 0;
    LIBXSTREAM_ASSERT(LIBXSTREAM_ERROR_NONE == libxstream_fn_nargs(m_signature, &n) && 6 == n);
#endif
    const size_t nn = i1 - m_host_data->idata()[index+size-1];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input(m_signature, 0, &size, libxstream_map_to_type(size), 0, 0));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input(m_signature, 1,   &nn, libxstream_map_to_type(nn  ), 0, 0));
    LIBXSTREAM_ASSERT(LIBXSTREAM_ERROR_NONE == libxstream_get_arity(m_signature, &n) && 6 == n);
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_call(m_host_data->process(), m_signature, m_stream, LIBXSTREAM_CALL_DEFAULT));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_d2h(m_cdata, m_host_data->cdata() + i0, sizeof(double) * (i1 - i0), m_stream));
    if (0 == demux()) {
      LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_stream_unlock(m_stream));
    }
  }

  return LIBXSTREAM_ERROR_NONE;
}
コード例 #2
0
ファイル: multi-dgemm-type.cpp プロジェクト: JWilhelm/cp2k
int multi_dgemm_type::init(const char* name, host_data_type& host_data, int device, int demux, size_t max_batch)
{
  LIBXSTREAM_CHECK_CALL(deinit());
  const size_t max_msize = max_batch * host_data.max_matrix_size();
  m_host_data = &host_data;
  m_max_batch = max_batch;

  LIBXSTREAM_CHECK_CALL(libxstream_stream_create(&m_stream, device, demux, 0, name));
  LIBXSTREAM_CHECK_CALL(libxstream_mem_allocate(device, reinterpret_cast<void**>(&m_adata), sizeof(double) * max_msize, 0));
  LIBXSTREAM_CHECK_CALL(libxstream_mem_allocate(device, reinterpret_cast<void**>(&m_bdata), sizeof(double) * max_msize, 0));
  LIBXSTREAM_CHECK_CALL(libxstream_mem_allocate(device, reinterpret_cast<void**>(&m_cdata), sizeof(double) * max_msize, 0));
  LIBXSTREAM_CHECK_CALL(libxstream_mem_allocate(device, reinterpret_cast<void**>(&m_idata), sizeof(size_t) * max_batch, 0));

  LIBXSTREAM_CHECK_CALL(libxstream_fn_create_signature(&m_signature, 6));
  LIBXSTREAM_CHECK_CALL(libxstream_fn_input (m_signature, 2, m_idata, libxstream_map_to_type(m_idata), 1, &max_msize));
  LIBXSTREAM_CHECK_CALL(libxstream_fn_input (m_signature, 3, m_adata, libxstream_map_to_type(m_adata), 1, &max_msize));
  LIBXSTREAM_CHECK_CALL(libxstream_fn_input (m_signature, 4, m_bdata, libxstream_map_to_type(m_bdata), 1, &max_msize));
  LIBXSTREAM_CHECK_CALL(libxstream_fn_output(m_signature, 5, m_cdata, libxstream_map_to_type(m_cdata), 1, &max_msize));

  return LIBXSTREAM_ERROR_NONE;
}
コード例 #3
0
ファイル: multi-dgemm-type.cpp プロジェクト: 01org/pyMIC
int multi_dgemm_type::operator()(size_t index, size_t size)
{
  LIBXSTREAM_CHECK_CONDITION(ready() && (index + size) <= m_host_data->size());

  if (0 < size) {
    const size_t i0 = m_host_data->idata()[index], i1 = m_host_data->idata()[index+size];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->adata() + i0, m_adata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->bdata() + i0, m_bdata, sizeof(double) * (i1 - i0), m_stream));
    // transferring cdata is part of the benchmark; since it is all zeros we could do better with libxstream_memset_zero
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->cdata() + i0, m_cdata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->idata() + index, m_idata, sizeof(size_t) * size, m_stream));

    libxstream_argument* signature = 0;
    const size_t max_msize = m_max_batch * m_host_data->max_matrix_size(), nn = i1 - m_host_data->idata()[index+size-1];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_signature(&signature));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 0,   &size, libxstream_map_to_type(size), 0, 0));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 1,     &nn, libxstream_map_to_type(nn  ), 0, 0));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 2, m_idata, libxstream_map_to_type(m_idata), 1, &max_msize));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 3, m_adata, libxstream_map_to_type(m_adata), 1, &max_msize));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 4, m_bdata, libxstream_map_to_type(m_bdata), 1, &max_msize));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_output(signature, 5, m_cdata, libxstream_map_to_type(m_cdata), 1, &max_msize));

    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_call(m_host_data->process(), signature, m_stream, LIBXSTREAM_CALL_DEFAULT));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_d2h(m_cdata, m_host_data->cdata() + i0, sizeof(double) * (i1 - i0), m_stream));
  }

  return LIBXSTREAM_ERROR_NONE;
}