示例#1
0
int libxstream_construct(libxstream_argument arguments[], size_t arg, libxstream_argument::kind_type kind, const void* value, libxstream_type type, size_t dims, const size_t shape[])
{
  size_t typesize = 0;
  const bool weak_candidate = LIBXSTREAM_TYPE_VOID == type || (LIBXSTREAM_ERROR_NONE == libxstream_get_typesize(type, &typesize) && 1 == typesize);
  LIBXSTREAM_CHECK_CONDITION((((libxstream_argument::kind_invalid == kind || libxstream_argument::kind_inout == kind) && LIBXSTREAM_TYPE_INVALID == type) || LIBXSTREAM_TYPE_INVALID > type)
    && ((0 == dims && 0 == shape) || (0 == dims && 0 != shape && weak_candidate) || (0 < dims))
    && (LIBXSTREAM_MAX_NDIMS) >= dims);

  LIBXSTREAM_ASSERT((LIBXSTREAM_MAX_NARGS) >= arg);
  libxstream_argument& argument = arguments[arg];

#if defined(LIBXSTREAM_DEBUG)
  memset(argument.data.self, 0, sizeof(libxstream_argument)); // avoid false pos. with mem. analysis
#endif
#if defined(LIBXSTREAM_PRINT)
  static const char *const context[] = { "", "input", "output", "inout" };
#endif
  argument.kind = kind;
  argument.dims = dims;

  if (shape) {
    if (0 < dims || !weak_candidate) {
#if defined(LIBXSTREAM_PRINT)
      if (0 == dims && !weak_candidate) {
        LIBXSTREAM_PRINT_WARN("libxstream_fn_%s: signature=0x%llx arg=%lu is strong-typed (ignored shape)!",
          context[kind], reinterpret_cast<unsigned long long>(arguments), static_cast<unsigned long>(arg));
      }
#endif
      argument.type = type;
    }
    else { // 0 == dims && weak_candidate
      argument.type = LIBXSTREAM_TYPE_VOID;
      LIBXSTREAM_CHECK_CONDITION(sizeof(libxstream_argument::data_union) >= *shape);
      argument.shape[0] = shape[0];
    }

#if defined(__INTEL_COMPILER)
#   pragma loop_count min(0), max(LIBXSTREAM_MAX_NDIMS), avg(2)
#endif
    for (size_t i = 0; i < dims; ++i) argument.shape[i] = shape[i];
  }
  else {
#if defined(LIBXSTREAM_PRINT)
    if (0 < dims && 0 == shape) {
      LIBXSTREAM_PRINT_WARN("libxstream_fn_%s: signature=0x%llx arg=%lu is weak-typed (no shape information)!",
        context[kind], reinterpret_cast<unsigned long long>(arguments), static_cast<unsigned long>(arg));
    }
#endif
    std::fill_n(argument.shape, dims, 0);
    argument.type = type;
  }

  return libxstream_argument::kind_invalid != kind ? libxstream_set_value(argument, value) : LIBXSTREAM_ERROR_NONE;
}
示例#2
0
int multi_dgemm_type::operator()(size_t index, size_t size)
{
  LIBXSTREAM_CHECK_CONDITION(ready() && (index + size) <= m_host_data->size());

  if (0 < size) {
    if (0 == demux()) {
      // This manual synchronization prevents multiple threads from queuing work into the *same* stream (at the same time).
      // This is only needed if the stream was created without demux support in order to rely on manual synchronization.
      LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_stream_lock(m_stream));
    }
    const size_t i0 = m_host_data->idata()[index], i1 = m_host_data->idata()[index+size];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->adata() + i0, m_adata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->bdata() + i0, m_bdata, sizeof(double) * (i1 - i0), m_stream));
    // transferring cdata is part of the benchmark; since it is all zeros we could do better with libxstream_memset_zero
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->cdata() + i0, m_cdata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->idata() + index, m_idata, sizeof(size_t) * size, m_stream));
#if defined(LIBXSTREAM_DEBUG)
    size_t n = 0;
    LIBXSTREAM_ASSERT(LIBXSTREAM_ERROR_NONE == libxstream_fn_nargs(m_signature, &n) && 6 == n);
#endif
    const size_t nn = i1 - m_host_data->idata()[index+size-1];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input(m_signature, 0, &size, libxstream_map_to_type(size), 0, 0));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input(m_signature, 1,   &nn, libxstream_map_to_type(nn  ), 0, 0));
    LIBXSTREAM_ASSERT(LIBXSTREAM_ERROR_NONE == libxstream_get_arity(m_signature, &n) && 6 == n);
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_call(m_host_data->process(), m_signature, m_stream, LIBXSTREAM_CALL_DEFAULT));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_d2h(m_cdata, m_host_data->cdata() + i0, sizeof(double) * (i1 - i0), m_stream));
    if (0 == demux()) {
      LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_stream_unlock(m_stream));
    }
  }

  return LIBXSTREAM_ERROR_NONE;
}
示例#3
0
int multi_dgemm_type::operator()(size_t index, size_t size)
{
  LIBXSTREAM_CHECK_CONDITION(ready() && (index + size) <= m_host_data->size());

  if (0 < size) {
    const size_t i0 = m_host_data->idata()[index], i1 = m_host_data->idata()[index+size];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->adata() + i0, m_adata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->bdata() + i0, m_bdata, sizeof(double) * (i1 - i0), m_stream));
    // transferring cdata is part of the benchmark; since it is all zeros we could do better with libxstream_memset_zero
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->cdata() + i0, m_cdata, sizeof(double) * (i1 - i0), m_stream));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->idata() + index, m_idata, sizeof(size_t) * size, m_stream));

    libxstream_argument* signature = 0;
    const size_t max_msize = m_max_batch * m_host_data->max_matrix_size(), nn = i1 - m_host_data->idata()[index+size-1];
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_signature(&signature));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 0,   &size, libxstream_map_to_type(size), 0, 0));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 1,     &nn, libxstream_map_to_type(nn  ), 0, 0));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 2, m_idata, libxstream_map_to_type(m_idata), 1, &max_msize));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 3, m_adata, libxstream_map_to_type(m_adata), 1, &max_msize));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input (signature, 4, m_bdata, libxstream_map_to_type(m_bdata), 1, &max_msize));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_output(signature, 5, m_cdata, libxstream_map_to_type(m_cdata), 1, &max_msize));

    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_call(m_host_data->process(), signature, m_stream, LIBXSTREAM_CALL_DEFAULT));
    LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_d2h(m_cdata, m_host_data->cdata() + i0, sizeof(double) * (i1 - i0), m_stream));
  }

  return LIBXSTREAM_ERROR_NONE;
}
示例#4
0
int libxstream_construct(libxstream_argument* signature, size_t nargs)
{
  LIBXSTREAM_CHECK_CONDITION((0 != signature || 0 == nargs) && (LIBXSTREAM_MAX_NARGS) >= nargs);

  if (0 != signature) {
#if defined(__INTEL_COMPILER)
#   pragma loop_count min(0), max(LIBXSTREAM_MAX_NARGS), avg(LIBXSTREAM_MAX_NARGS/2)
#endif
    for (size_t i = 0; i < nargs; ++i) {
      LIBXSTREAM_CHECK_CALL(libxstream_construct(signature, i, libxstream_argument::kind_inout, 0, LIBXSTREAM_TYPE_INVALID, 0, 0));
    }
    LIBXSTREAM_CHECK_CALL(libxstream_construct(signature, nargs, libxstream_argument::kind_invalid, 0, LIBXSTREAM_TYPE_INVALID, 0, 0));
  }

  return LIBXSTREAM_ERROR_NONE;
}