LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE void LIBXSMM_FSYMBOL(__wrap_dgemm)( const char* transa, const char* transb, const libxsmm_blasint* m, const libxsmm_blasint* n, const libxsmm_blasint* k, const double* alpha, const double* a, const libxsmm_blasint* lda, const double* b, const libxsmm_blasint* ldb, const double* beta, double* c, const libxsmm_blasint* ldc) { int flags = LIBXSMM_FLAGS; flags = (0 != transa ? (('N' == *transa || 'n' == *transa) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_A) : (flags | LIBXSMM_GEMM_FLAG_TRANS_A)) : flags); flags = (0 != transb ? (('N' == *transb || 'n' == *transb) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_B) : (flags | LIBXSMM_GEMM_FLAG_TRANS_B)) : flags); assert(m && n && k && a && b && c); #if !defined(NDEBUG) /* library code is expected to be mute */ if (0 == LIBXSMM_FSYMBOL(__real_dgemm)) { fprintf(stderr, "LIBXSMM: application is required to link against LAPACK/BLAS!\n"); } else #endif { LIBXSMM_XGEMM(double, libxsmm_blasint, LIBXSMM_FSYMBOL(__real_dgemm), flags, *m, *n, *k, 0 != alpha ? *alpha : ((double)LIBXSMM_ALPHA), a, *(lda ? lda : LIBXSMM_LD(m, k)), b, *(ldb ? ldb : LIBXSMM_LD(k, n)), 0 != beta ? *beta : ((double)LIBXSMM_BETA), c, *(ldc ? ldc : LIBXSMM_LD(m, n))); } }
LIBXSMM_GEMM_SYMBOL_VISIBILITY void LIBXSMM_FSYMBOL(dgemm)(LIBXSMM_GEMM_CONST char* transa, LIBXSMM_GEMM_CONST char* transb, LIBXSMM_GEMM_CONST libxsmm_blasint* m, LIBXSMM_GEMM_CONST libxsmm_blasint* n, LIBXSMM_GEMM_CONST libxsmm_blasint* k, LIBXSMM_GEMM_CONST double* alpha, LIBXSMM_GEMM_CONST double* a, LIBXSMM_GEMM_CONST libxsmm_blasint* lda, LIBXSMM_GEMM_CONST double* b, LIBXSMM_GEMM_CONST libxsmm_blasint* ldb, LIBXSMM_GEMM_CONST double* beta, double* c, LIBXSMM_GEMM_CONST libxsmm_blasint* ldc) { LIBXSMM_FSYMBOL(__wrap_dgemm)(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc); }
LIBXSMM_EXTERN_C LIBXSMM_RETARGETABLE LIBXSMM_ATTRIBUTE(weak) void LIBXSMM_FSYMBOL(dgemm)( const char* transa, const char* transb, const libxsmm_blasint* m, const libxsmm_blasint* n, const libxsmm_blasint* k, const double* alpha, const double* a, const libxsmm_blasint* lda, const double* b, const libxsmm_blasint* ldb, const double* beta, double* c, const libxsmm_blasint* ldc) { typedef void (*function_type)( const char*, const char*, const libxsmm_blasint*, const libxsmm_blasint*, const libxsmm_blasint*, const double*, const double*, const libxsmm_blasint*, const double*, const libxsmm_blasint*, const double*, double*, const libxsmm_blasint*); static LIBXSMM_RETARGETABLE union { function_type fn; void* pv; } original = { 0 }; int flags = LIBXSMM_FLAGS; flags = (0 != transa ? (('N' == *transa || 'n' == *transa) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_A) : (flags | LIBXSMM_GEMM_FLAG_TRANS_A)) : flags); flags = (0 != transb ? (('N' == *transb || 'n' == *transb) ? (flags & ~LIBXSMM_GEMM_FLAG_TRANS_B) : (flags | LIBXSMM_GEMM_FLAG_TRANS_B)) : flags); if (0 == original.pv) { original.pv = dlsym(RTLD_NEXT, LIBXSMM_STRINGIFY(LIBXSMM_FSYMBOL(dgemm))); } assert(m && n && k && a && b && c); LIBXSMM_XGEMM(double, libxsmm_blasint, original.fn, flags, *m, *n, *k, 0 != alpha ? *alpha : ((double)LIBXSMM_ALPHA), a, *(lda ? lda : LIBXSMM_LD(m, k)), b, *(ldb ? ldb : LIBXSMM_LD(k, n)), 0 != beta ? *beta : ((double)LIBXSMM_BETA), c, *(ldc ? ldc : LIBXSMM_LD(m, n))); }
LIBXSMM_API_DEFINITION void libxsmm_gemm_configure(int archid, int prefetch) { int config = 0; LIBXSMM_UNUSED(prefetch); internal_gemm_prefetch = LIBXSMM_PREFETCH_AL2_AHEAD; internal_gemm_nt = 2; internal_gemm = 2; { /* behaviour of libxsmm_omp_?gemm routines or LD_PRELOAD ?GEMM routines * 0: sequential below-threshold routine (no OpenMP); may fall-back to BLAS, * 1: OpenMP-parallelized but without internal parallel region, * 2: OpenMP-parallelized with internal parallel region" ) */ const char *const env = getenv("LIBXSMM_GEMM"); if (0 != env && 0 != *env) { internal_gemm = atoi(env); } } #if defined(LIBXSMM_EXT_GEMM_TASKS) { /* consider user input about using (OpenMP-)tasks; this code must be here * because maybe only this translation unit is compiled with OpenMP support */ const char *const env_tasks = getenv("LIBXSMM_TASKS"); if (0 != env_tasks && 0 != *env_tasks) { internal_gemm_tasks = atoi(env_tasks); } } #endif #if defined(__MIC__) LIBXSMM_UNUSED(archid); #else if (LIBXSMM_X86_AVX512_MIC == archid) #endif { internal_gemm_nt = 4; config = 1; } { /* attempt to setup tile sizes from the environment (LIBXSMM_M, LIBXSMM_N, and LIBXSMM_K) */ const int tile_configs[/*configs*/][2/*DP/SP*/][3/*TILE_M,TILE_N,TILE_K*/] = { { { 72, 32, 16 }, { 72, 32, 16 } }, /*generic*/ { { 72, 32, 16 }, { 72, 32, 16 } } /*knl*/ }; const char* env[3]; env[0] = getenv("LIBXSMM_M"); env[1] = getenv("LIBXSMM_N"); env[2] = getenv("LIBXSMM_K"); internal_gemm_tile[0/*DP*/][0/*M*/] = (env[0] ? atoi(env[0]) : 0); internal_gemm_tile[0/*DP*/][1/*N*/] = (env[1] ? atoi(env[1]) : 0); internal_gemm_tile[0/*DP*/][2/*K*/] = (env[2] ? atoi(env[2]) : 0); /* environment-defined tile sizes applies for DP and SP */ internal_gemm_tile[1/*SP*/][0/*M*/] = internal_gemm_tile[0/*DP*/][0]; internal_gemm_tile[1/*SP*/][1/*N*/] = internal_gemm_tile[0/*DP*/][1]; internal_gemm_tile[1/*SP*/][2/*K*/] = internal_gemm_tile[0/*DP*/][2]; /* load predefined configuration if tile size is not setup by the environment */ if (0 >= internal_gemm_tile[0/*DP*/][0/*M*/]) internal_gemm_tile[0][0] = tile_configs[config][0][0]; if (0 >= internal_gemm_tile[0/*DP*/][1/*N*/]) internal_gemm_tile[0][1] = tile_configs[config][0][1]; if (0 >= internal_gemm_tile[0/*DP*/][2/*K*/]) internal_gemm_tile[0][2] = tile_configs[config][0][2]; if (0 >= internal_gemm_tile[1/*SP*/][0/*M*/]) internal_gemm_tile[1][0] = tile_configs[config][1][0]; if (0 >= internal_gemm_tile[1/*SP*/][1/*N*/]) internal_gemm_tile[1][1] = tile_configs[config][1][1]; if (0 >= internal_gemm_tile[1/*SP*/][2/*K*/]) internal_gemm_tile[1][2] = tile_configs[config][1][2]; } #if defined(__STATIC) && defined(LIBXSMM_BUILD) && !defined(__CYGWIN__) && \ !(defined(__APPLE__) && defined(__MACH__) /*&& defined(__clang__)*/) if (0 == libxsmm_original_sgemm) { libxsmm_original_sgemm = LIBXSMM_FSYMBOL(__real_sgemm); } #endif #if !defined(__BLAS) || (0 != __BLAS) if (0 == libxsmm_original_sgemm) { libxsmm_original_sgemm = LIBXSMM_FSYMBOL(sgemm); } #endif #if defined(LIBXSMM_RTLD_NEXT) if (0 == libxsmm_original_sgemm) { union { const void* pv; libxsmm_sgemm_function pf; } gemm = { NULL }; gemm.pv = dlsym(RTLD_NEXT, LIBXSMM_STRINGIFY(LIBXSMM_FSYMBOL(sgemm))); libxsmm_original_sgemm = gemm.pf; } #endif #if defined(__STATIC) && defined(LIBXSMM_BUILD) && !defined(__CYGWIN__) && \ !(defined(__APPLE__) && defined(__MACH__) /*&& defined(__clang__)*/) if (0 == libxsmm_original_dgemm) { libxsmm_original_dgemm = LIBXSMM_FSYMBOL(__real_dgemm); } #endif #if !defined(__BLAS) || (0 != __BLAS) if (0 == libxsmm_original_dgemm) { libxsmm_original_dgemm = LIBXSMM_FSYMBOL(dgemm); } #endif #if defined(LIBXSMM_RTLD_NEXT) if (0 == libxsmm_original_dgemm) { union { const void* pv; libxsmm_dgemm_function pf; } gemm = { NULL }; gemm.pv = dlsym(RTLD_NEXT, LIBXSMM_STRINGIFY(LIBXSMM_FSYMBOL(dgemm))); libxsmm_original_dgemm = gemm.pf; } #endif }