int multi_dgemm_type::operator()(size_t index, size_t size) { LIBXSTREAM_CHECK_CONDITION(ready() && (index + size) <= m_host_data->size()); if (0 < size) { if (0 == demux()) { // This manual synchronization prevents multiple threads from queuing work into the *same* stream (at the same time). // This is only needed if the stream was created without demux support in order to rely on manual synchronization. LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_stream_lock(m_stream)); } const size_t i0 = m_host_data->idata()[index], i1 = m_host_data->idata()[index+size]; LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->adata() + i0, m_adata, sizeof(double) * (i1 - i0), m_stream)); LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->bdata() + i0, m_bdata, sizeof(double) * (i1 - i0), m_stream)); // transferring cdata is part of the benchmark; since it is all zeros we could do better with libxstream_memset_zero LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->cdata() + i0, m_cdata, sizeof(double) * (i1 - i0), m_stream)); LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_h2d(m_host_data->idata() + index, m_idata, sizeof(size_t) * size, m_stream)); #if defined(LIBXSTREAM_DEBUG) size_t n = 0; LIBXSTREAM_ASSERT(LIBXSTREAM_ERROR_NONE == libxstream_fn_nargs(m_signature, &n) && 6 == n); #endif const size_t nn = i1 - m_host_data->idata()[index+size-1]; LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input(m_signature, 0, &size, libxstream_map_to_type(size), 0, 0)); LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_input(m_signature, 1, &nn, libxstream_map_to_type(nn ), 0, 0)); LIBXSTREAM_ASSERT(LIBXSTREAM_ERROR_NONE == libxstream_get_arity(m_signature, &n) && 6 == n); LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_fn_call(m_host_data->process(), m_signature, m_stream, LIBXSTREAM_CALL_DEFAULT)); LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_memcpy_d2h(m_cdata, m_host_data->cdata() + i0, sizeof(double) * (i1 - i0), m_stream)); if (0 == demux()) { LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_stream_unlock(m_stream)); } } return LIBXSTREAM_ERROR_NONE; }
int multi_dgemm_type::demux() const { LIBXSTREAM_ASSERT(ready()); int value = 0; LIBXSTREAM_CHECK_CALL_THROW(libxstream_stream_demux(m_stream, &value)); return value; }
LIBXSTREAM_TARGET(mic) int libxstream_set_value(libxstream_argument& arg, const void* data) { if (0 != arg.dims || 0 != (libxstream_argument::kind_output & arg.kind)) { // by-pointer *reinterpret_cast<const void**>(&arg) = data; LIBXSTREAM_ASSERT(libxstream_get_value(arg).const_pointer == data); } else { // by-value: copy from the given pointer size_t typesize = 0 != arg.dims ? 1 : *arg.shape; if (LIBXSTREAM_TYPE_VOID != arg.type) { LIBXSTREAM_CHECK_CALL(libxstream_get_typesize(arg.type, &typesize)); } if (data) { const char *const src = static_cast<const char*>(data); for (size_t i = 0; i < typesize; ++i) arg.data.self[i] = src[i]; } else { for (size_t i = 0; i < typesize; ++i) arg.data.self[i] = 0; } // allows to promote smaller types to pointer-size for (size_t i = typesize; i < sizeof(void*); ++i) arg.data.self[i] = 0; } return LIBXSTREAM_ERROR_NONE; }
libxstream_event* multi_dgemm_type::event() { if (0 == m_event) { libxstream_event_create(&m_event); LIBXSTREAM_ASSERT(0 != m_event); } return m_event; }
size_t multi_dgemm_type::host_data_type::max_matrix_size() const { LIBXSTREAM_ASSERT(0 == m_size || 0 == m_idata[0]); size_t result = 0, i0 = 0; for (size_t i = 0; i < m_size; ++i) { const size_t i1 = m_idata[i+1]; result = std::max(result, i1 - i0); i0 = i1; } return result; }
int libxstream_construct(libxstream_argument arguments[], size_t arg, libxstream_argument::kind_type kind, const void* value, libxstream_type type, size_t dims, const size_t shape[]) { size_t typesize = 0; const bool weak_candidate = LIBXSTREAM_TYPE_VOID == type || (LIBXSTREAM_ERROR_NONE == libxstream_get_typesize(type, &typesize) && 1 == typesize); LIBXSTREAM_CHECK_CONDITION((((libxstream_argument::kind_invalid == kind || libxstream_argument::kind_inout == kind) && LIBXSTREAM_TYPE_INVALID == type) || LIBXSTREAM_TYPE_INVALID > type) && ((0 == dims && 0 == shape) || (0 == dims && 0 != shape && weak_candidate) || (0 < dims)) && (LIBXSTREAM_MAX_NDIMS) >= dims); LIBXSTREAM_ASSERT((LIBXSTREAM_MAX_NARGS) >= arg); libxstream_argument& argument = arguments[arg]; #if defined(LIBXSTREAM_DEBUG) memset(argument.data.self, 0, sizeof(libxstream_argument)); // avoid false pos. with mem. analysis #endif #if defined(LIBXSTREAM_PRINT) static const char *const context[] = { "", "input", "output", "inout" }; #endif argument.kind = kind; argument.dims = dims; if (shape) { if (0 < dims || !weak_candidate) { #if defined(LIBXSTREAM_PRINT) if (0 == dims && !weak_candidate) { LIBXSTREAM_PRINT_WARN("libxstream_fn_%s: signature=0x%llx arg=%lu is strong-typed (ignored shape)!", context[kind], reinterpret_cast<unsigned long long>(arguments), static_cast<unsigned long>(arg)); } #endif argument.type = type; } else { // 0 == dims && weak_candidate argument.type = LIBXSTREAM_TYPE_VOID; LIBXSTREAM_CHECK_CONDITION(sizeof(libxstream_argument::data_union) >= *shape); argument.shape[0] = shape[0]; } #if defined(__INTEL_COMPILER) # pragma loop_count min(0), max(LIBXSTREAM_MAX_NDIMS), avg(2) #endif for (size_t i = 0; i < dims; ++i) argument.shape[i] = shape[i]; } else { #if defined(LIBXSTREAM_PRINT) if (0 < dims && 0 == shape) { LIBXSTREAM_PRINT_WARN("libxstream_fn_%s: signature=0x%llx arg=%lu is weak-typed (no shape information)!", context[kind], reinterpret_cast<unsigned long long>(arguments), static_cast<unsigned long>(arg)); } #endif std::fill_n(argument.shape, dims, 0); argument.type = type; } return libxstream_argument::kind_invalid != kind ? libxstream_set_value(argument, value) : LIBXSTREAM_ERROR_NONE; }
size_t multi_dgemm_type::bytes() const { LIBXSTREAM_ASSERT(ready()); return m_max_batch * m_host_data->max_matrix_size() * (3 * sizeof(double) + sizeof(size_t)); }
bool multi_dgemm_type::ready() const { LIBXSTREAM_ASSERT(0 == m_host_data || (m_signature && m_stream && m_adata && m_bdata && m_cdata && m_idata)); return 0 != m_host_data; }
bool multi_dgemm_type::host_data_type::ready() const { LIBXSTREAM_ASSERT(0 == m_process || (m_adata && m_bdata && m_cdata && m_idata)); return 0 != m_process; }
template<typename T> libxstream_stream* cast_to_stream(T stream) { libxstream_use_sink(&stream); LIBXSTREAM_ASSERT(0 == stream); return static_cast<libxstream_stream*>(0); }