void run_jit_float( const float* i_a, const float* i_b, float* o_c, const int i_M, const int i_N, const int i_K, const libxsmm_prefetch_type i_prefetch, const char* i_arch ) { /* define function pointer */ libxsmm_smmfunction l_test_jit; double l_jittime = 0.0, l_runtime = 0.0; float l_alpha = 1.0f; float l_beta = 1.0f; unsigned long long l_start; unsigned int l_t; if ( l_beta != 0.0f && l_beta != 1.0f ) { fprintf(stderr, "JIT float: beta needs to be 0.0 or 1.0!\n"); exit(-1); } if ( l_alpha != 1.0f ) { fprintf(stderr, "JIT float: alpha needs to be 1.0!\n"); exit(-1); } l_start = libxsmm_timer_tick(); l_test_jit = libxsmm_smmdispatch(i_M, i_N, i_K, &i_M, &i_K, &i_M, &l_alpha, &l_beta, NULL, &i_prefetch ); l_jittime = libxsmm_timer_duration(l_start, libxsmm_timer_tick()); printf("function pointer address: %llx\n", (size_t)l_test_jit); l_start = libxsmm_timer_tick(); if ( i_prefetch == LIBXSMM_PREFETCH_NONE ) { for ( l_t = 0; l_t < g_jit_code_reps; l_t++ ) { l_test_jit(i_a, i_b, o_c); } } else { for ( l_t = 0; l_t < g_jit_code_reps; l_t++ ) { l_test_jit(i_a, i_b, o_c, i_a, i_b, o_c); } } l_runtime = libxsmm_timer_duration(l_start, libxsmm_timer_tick()); printf("%fs for creating jit\n", l_jittime); printf("%fs for executing jit\n", l_runtime); printf("%f GFLOPS for jit\n", ((double)((double)g_jit_code_reps * (double)i_M * (double)i_N * (double)i_K) * 2.0) / (l_runtime * 1.0e9)); }
void run_jit_float( const float* i_a, const float* i_b, float* o_c, const libxsmm_xgemm_descriptor* i_xgemm_desc, const char* i_arch ) { struct timeval l_start, l_end; /* define function pointer */ typedef void (*jitfun)(const float* a, const float* b, float* c); typedef void (*jitfun_pf)(const float* a, const float* b, float* c, const float* a_pf, const float* b_pf, const float* c_pf); jitfun l_test_jit; jitfun_pf l_test_jit_pf; double l_jittime = 0.0; gettimeofday(&l_start, NULL); /* allocate buffer for code */ unsigned char* l_gen_code = (unsigned char*) malloc( 32768 * sizeof(unsigned char) ); libxsmm_generated_code l_generated_code; l_generated_code.generated_code = (void*)l_gen_code; l_generated_code.buffer_size = 32768; l_generated_code.code_size = 0; l_generated_code.code_type = 2; l_generated_code.last_error = 0; /* generate kernel */ libxsmm_generator_dense_kernel( &l_generated_code, i_xgemm_desc, i_arch ); /* handle evetl. errors */ if ( l_generated_code.last_error != 0 ) { fprintf(stderr, "%s\n", libxsmm_strerror( l_generated_code.last_error ) ); exit(-1); } /* create executable buffer */ int l_code_pages = (((l_generated_code.code_size-1)*sizeof(unsigned char))/LIBXSMM_BUILD_PAGESIZE)+1; int l_code_page_size = LIBXSMM_BUILD_PAGESIZE*l_code_pages; int l_fd = open("/dev/zero", O_RDWR); void* p = mmap(0, l_code_page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, l_fd, 0); close(l_fd); /* explicitly disable THP for this memory region, kernel 2.6.38 or higher! madvise(p, l_code_page_size, MADV_NOHUGEPAGE); */ if (p == MAP_FAILED) { fprintf(stderr, "LIBXSMM: something bad happend in mmap, couldn't allocate code buffer!\n"); exit(-1); } unsigned char* l_code = (unsigned char*)p; memset( l_code, 0, l_code_page_size ); memcpy( l_code, l_gen_code, l_generated_code.code_size ); int error = mprotect( (void*)l_code, l_code_page_size, PROT_EXEC | PROT_READ ); if (error == -1) { int errsv = errno; if (errsv == EINVAL) { fprintf(stderr, "mprotect failed: addr is not a valid pointer, or not a multiple of the system page size!\n"); } else if (errsv == ENOMEM) { fprintf(stderr, "mprotect failed: Internal kernel structures could not be allocated!\n"); } else if (errsv == EACCES) { fprintf(stderr, "mprotect failed: The memory cannot be given the specified access!\n"); } else { fprintf(stderr, "mprotect failed: Unknown Error!\n"); } exit(-1); } /* set function pointer and jitted code */ if ( i_xgemm_desc->prefetch == LIBXSMM_PREFETCH_NONE ) { l_test_jit = (jitfun)l_code; } else { l_test_jit_pf = (jitfun_pf)l_code; } gettimeofday(&l_end, NULL); l_jittime = sec(l_start, l_end); printf("size of generated code: %i\n", l_generated_code.code_size ); /* write buffer for manual decode as binary to a file */ char l_objdump_name[128]; sprintf( l_objdump_name, "kernel_%i_%i_%i.bin", i_xgemm_desc->m, i_xgemm_desc->n, i_xgemm_desc->k ); FILE *l_byte_code = fopen( l_objdump_name, "wb"); if ( l_byte_code != NULL ){ fwrite( (const void*)l_gen_code, 1, l_generated_code.code_size, l_byte_code); fclose( l_byte_code ); } else { /* error */ } unsigned int l_t; double l_runtime = 0.0; gettimeofday(&l_start, NULL); if ( i_xgemm_desc->prefetch == LIBXSMM_PREFETCH_NONE ) { for ( l_t = 0; l_t < REPS; l_t++ ) { l_test_jit(i_a, i_b, o_c); } } else { for ( l_t = 0; l_t < REPS; l_t++ ) { l_test_jit_pf(i_a, i_b, o_c, i_a, i_b, o_c); } } gettimeofday(&l_end, NULL); l_runtime = sec(l_start, l_end); printf("%fs for creating jit\n", l_jittime); printf("%fs for executing jit\n", l_runtime); printf("%f GFLOPS for jit\n", ((double)((double)REPS * (double)i_xgemm_desc->m * (double)i_xgemm_desc->n * (double)i_xgemm_desc->k) * 2.0) / (l_runtime * 1.0e9)); free(l_gen_code); }