示例#1
0
文件: entropy.c 项目: 01org/pyMIC
LIBXSTREAM_TARGET(mic) void makehist(const char* data, size_t* histogram)
{
  size_t size;
  LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_get_shape(0/*current context*/, 0/*data*/, &size));
  LIBXSTREAM_CONCATENATE(histogram,HISTOGRAM)(data, size, histogram);
}
示例#2
0
}


LIBXSTREAM_TARGET(mic) void mkl_imatcopy(size_t m, size_t n, double* matrix)
{
#if defined(LIBMICSMM_USE_MKLTRANS) && defined(__MKL)
  MKL_Dimatcopy('R', 'T', m, n, 1.0, matrix, n, m);
#endif
}


template<typename T, typename U>
LIBXSTREAM_TARGET(mic) void kernel(const U *LIBXSTREAM_RESTRICT stack, LIBXSTREAM_INVAL(U) m, LIBXSTREAM_INVAL(U) n, T *LIBXSTREAM_RESTRICT matrix)
{
  size_t stacksize = 0;
  LIBXSTREAM_CHECK_CALL_ASSERT(libxstream_get_shape(0/*current context*/, 0/*stack*/, &stacksize));
  LIBXSTREAM_PRINT_INFO("libsmm_acc_transpose (mic): stacksize=%%lu m=%i n=%i", static_cast<unsigned long>(stacksize), LIBXSTREAM_GETVAL(m), LIBXSTREAM_GETVAL(n));
#if defined(LIBXSTREAM_DEBUG) && defined(_OPENMP)
  const double start = omp_get_wtime();
#endif

#if defined(_OPENMP)
# pragma omp parallel for
#endif
  for (U s = 0; s < stacksize; ++s) {
    T *const mat = matrix + stack[s];

#if defined(LIBMICSMM_USE_MKLTRANS) && defined(__MKL)
    mkl_imatcopy(static_cast<size_t>(LIBXSTREAM_GETVAL(m)), static_cast<size_t>(LIBXSTREAM_GETVAL(n)), mat);
#else
    LIBXSTREAM_ALIGNED(T tmp[LIBMICSMM_MAX_MATRIX_SIZE], LIBXSTREAM_MAX_SIMD);