Exemplo n.º 1
0
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void LIBXSMM_FSYMBOL(__wrap_dgemm)(
  const char* transa, const char* transb,
  const libxsmm_blasint* m, const libxsmm_blasint* n, const libxsmm_blasint* k,
  const double* alpha, const double* a, const libxsmm_blasint* lda,
  const double* b, const libxsmm_blasint* ldb,
  const double* beta, double* c, const libxsmm_blasint* ldc)
{
  int flags = LIBXSMM_FLAGS;
  flags = (0 != transa
      ? (('N' == *transa || 'n' == *transa) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_A)
                                            : (flags |  LIBXSMM_GEMM_FLAG_TRANS_A))
      : flags);
  flags = (0 != transb
      ? (('N' == *transb || 'n' == *transb) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_B)
                                            : (flags |  LIBXSMM_GEMM_FLAG_TRANS_B))
      : flags);
  assert(m && n && k && a && b && c);
#if !defined(NDEBUG) /* library code is expected to be mute */
  if (0 == LIBXSMM_FSYMBOL(__real_dgemm)) {
    fprintf(stderr, "LIBXSMM: application is required to link against LAPACK/BLAS!\n");
  }
  else
#endif
  {
    LIBXSMM_XGEMM(double, libxsmm_blasint, LIBXSMM_FSYMBOL(__real_dgemm), flags, *m, *n, *k,
      0 != alpha ? *alpha : ((double)LIBXSMM_ALPHA),
      a, *(lda ? lda : LIBXSMM_LD(m, k)), b, *(ldb ? ldb : LIBXSMM_LD(k, n)),
      0 != beta ? *beta : ((double)LIBXSMM_BETA),
      c, *(ldc ? ldc : LIBXSMM_LD(m, n)));
  }
}
Exemplo n.º 2
0
LIBXSMM_GEMM_SYMBOL_VISIBILITY void LIBXSMM_FSYMBOL(dgemm)(LIBXSMM_GEMM_CONST char* transa, LIBXSMM_GEMM_CONST char* transb,
  LIBXSMM_GEMM_CONST libxsmm_blasint* m, LIBXSMM_GEMM_CONST libxsmm_blasint* n, LIBXSMM_GEMM_CONST libxsmm_blasint* k,
  LIBXSMM_GEMM_CONST double* alpha, LIBXSMM_GEMM_CONST double* a, LIBXSMM_GEMM_CONST libxsmm_blasint* lda,
  LIBXSMM_GEMM_CONST double* b, LIBXSMM_GEMM_CONST libxsmm_blasint* ldb,
  LIBXSMM_GEMM_CONST double* beta, double* c, LIBXSMM_GEMM_CONST libxsmm_blasint* ldc)
{
  LIBXSMM_FSYMBOL(__wrap_dgemm)(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc);
}
Exemplo n.º 3
0
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE LIBXSMM_ATTRIBUTE(weak) void LIBXSMM_FSYMBOL(dgemm)(
  const char* transa, const char* transb,
  const libxsmm_blasint* m, const libxsmm_blasint* n, const libxsmm_blasint* k,
  const double* alpha, const double* a, const libxsmm_blasint* lda,
  const double* b, const libxsmm_blasint* ldb,
  const double* beta, double* c, const libxsmm_blasint* ldc)
{
  typedef void (*function_type)(
    const char*, const char*,
    const libxsmm_blasint*, const libxsmm_blasint*, const libxsmm_blasint*,
    const double*, const double*, const libxsmm_blasint*,
    const double*, const libxsmm_blasint*,
    const double*, double*, const libxsmm_blasint*);
  static LIBXSMM_RETARGETABLE union {
    function_type fn;
    void* pv;
  } original = { 0 };
  int flags = LIBXSMM_FLAGS;
  flags = (0 != transa
      ? (('N' == *transa || 'n' == *transa) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_A)
                                            : (flags |  LIBXSMM_GEMM_FLAG_TRANS_A))
      : flags);
  flags = (0 != transb
      ? (('N' == *transb || 'n' == *transb) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_B)
                                            : (flags |  LIBXSMM_GEMM_FLAG_TRANS_B))
      : flags);
  if (0 == original.pv) {
    original.pv = dlsym(RTLD_NEXT, LIBXSMM_STRINGIFY(LIBXSMM_FSYMBOL(dgemm)));
  }
  assert(m && n && k && a && b && c);
  LIBXSMM_XGEMM(double, libxsmm_blasint, original.fn, flags, *m, *n, *k,
    0 != alpha ? *alpha : ((double)LIBXSMM_ALPHA),
    a, *(lda ? lda : LIBXSMM_LD(m, k)), b, *(ldb ? ldb : LIBXSMM_LD(k, n)),
    0 != beta ? *beta : ((double)LIBXSMM_BETA),
    c, *(ldc ? ldc : LIBXSMM_LD(m, n)));
}
Exemplo n.º 4
0
LIBXSMM_API_DEFINITION void libxsmm_gemm_configure(int archid, int prefetch)
{
  int config = 0;
  LIBXSMM_UNUSED(prefetch);
  internal_gemm_prefetch = LIBXSMM_PREFETCH_AL2_AHEAD;
  internal_gemm_nt = 2;
  internal_gemm = 2;
  {
    /* behaviour of libxsmm_omp_?gemm routines or LD_PRELOAD ?GEMM routines
     * 0: sequential below-threshold routine (no OpenMP); may fall-back to BLAS,
     * 1: OpenMP-parallelized but without internal parallel region,
     * 2: OpenMP-parallelized with internal parallel region" )
     */
    const char *const env = getenv("LIBXSMM_GEMM");
    if (0 != env && 0 != *env) {
      internal_gemm = atoi(env);
    }
  }
#if defined(LIBXSMM_EXT_GEMM_TASKS)
  { /* consider user input about using (OpenMP-)tasks; this code must be here
    * because maybe only this translation unit is compiled with OpenMP support
    */
    const char *const env_tasks = getenv("LIBXSMM_TASKS");
    if (0 != env_tasks && 0 != *env_tasks) {
      internal_gemm_tasks = atoi(env_tasks);
    }
  }
#endif
#if defined(__MIC__)
  LIBXSMM_UNUSED(archid);
#else
  if (LIBXSMM_X86_AVX512_MIC == archid)
#endif
  {
    internal_gemm_nt = 4;
    config = 1;
  }
  { /* attempt to setup tile sizes from the environment (LIBXSMM_M, LIBXSMM_N, and LIBXSMM_K) */
    const int tile_configs[/*configs*/][2/*DP/SP*/][3/*TILE_M,TILE_N,TILE_K*/] = {
      { { 72, 32, 16 }, { 72, 32, 16 } }, /*generic*/
      { { 72, 32, 16 }, { 72, 32, 16 } }  /*knl*/
    };
    const char* env[3];
    env[0] = getenv("LIBXSMM_M"); env[1] = getenv("LIBXSMM_N"); env[2] = getenv("LIBXSMM_K");
    internal_gemm_tile[0/*DP*/][0/*M*/] = (env[0] ? atoi(env[0]) : 0);
    internal_gemm_tile[0/*DP*/][1/*N*/] = (env[1] ? atoi(env[1]) : 0);
    internal_gemm_tile[0/*DP*/][2/*K*/] = (env[2] ? atoi(env[2]) : 0);
    /* environment-defined tile sizes applies for DP and SP */
    internal_gemm_tile[1/*SP*/][0/*M*/] = internal_gemm_tile[0/*DP*/][0];
    internal_gemm_tile[1/*SP*/][1/*N*/] = internal_gemm_tile[0/*DP*/][1];
    internal_gemm_tile[1/*SP*/][2/*K*/] = internal_gemm_tile[0/*DP*/][2];
    /* load predefined configuration if tile size is not setup by the environment */
    if (0 >= internal_gemm_tile[0/*DP*/][0/*M*/]) internal_gemm_tile[0][0] = tile_configs[config][0][0];
    if (0 >= internal_gemm_tile[0/*DP*/][1/*N*/]) internal_gemm_tile[0][1] = tile_configs[config][0][1];
    if (0 >= internal_gemm_tile[0/*DP*/][2/*K*/]) internal_gemm_tile[0][2] = tile_configs[config][0][2];
    if (0 >= internal_gemm_tile[1/*SP*/][0/*M*/]) internal_gemm_tile[1][0] = tile_configs[config][1][0];
    if (0 >= internal_gemm_tile[1/*SP*/][1/*N*/]) internal_gemm_tile[1][1] = tile_configs[config][1][1];
    if (0 >= internal_gemm_tile[1/*SP*/][2/*K*/]) internal_gemm_tile[1][2] = tile_configs[config][1][2];
  }
#if defined(__STATIC) && defined(LIBXSMM_BUILD) && !defined(__CYGWIN__) && \
  !(defined(__APPLE__) && defined(__MACH__) /*&& defined(__clang__)*/)
  if (0 == libxsmm_original_sgemm) {
    libxsmm_original_sgemm = LIBXSMM_FSYMBOL(__real_sgemm);
  }
#endif
#if !defined(__BLAS) || (0 != __BLAS)
  if (0 == libxsmm_original_sgemm) {
    libxsmm_original_sgemm = LIBXSMM_FSYMBOL(sgemm);
  }
#endif
#if defined(LIBXSMM_RTLD_NEXT)
  if (0 == libxsmm_original_sgemm) {
    union { const void* pv; libxsmm_sgemm_function pf; } gemm = { NULL };
    gemm.pv = dlsym(RTLD_NEXT, LIBXSMM_STRINGIFY(LIBXSMM_FSYMBOL(sgemm)));
    libxsmm_original_sgemm = gemm.pf;
  }
#endif
#if defined(__STATIC) && defined(LIBXSMM_BUILD) && !defined(__CYGWIN__) && \
  !(defined(__APPLE__) && defined(__MACH__) /*&& defined(__clang__)*/)
  if (0 == libxsmm_original_dgemm) {
    libxsmm_original_dgemm = LIBXSMM_FSYMBOL(__real_dgemm);
  }
#endif
#if !defined(__BLAS) || (0 != __BLAS)
  if (0 == libxsmm_original_dgemm) {
    libxsmm_original_dgemm = LIBXSMM_FSYMBOL(dgemm);
  }
#endif
#if defined(LIBXSMM_RTLD_NEXT)
  if (0 == libxsmm_original_dgemm) {
    union { const void* pv; libxsmm_dgemm_function pf; } gemm = { NULL };
    gemm.pv = dlsym(RTLD_NEXT, LIBXSMM_STRINGIFY(LIBXSMM_FSYMBOL(dgemm)));
    libxsmm_original_dgemm = gemm.pf;
  }
#endif
}