Пример #1
0
void run_jit_float( const float*               i_a,
                    const float*               i_b,
                    float*                     o_c,
                    const int                   i_M,
                    const int                   i_N,
                    const int                   i_K,
                    const libxsmm_prefetch_type i_prefetch,
                    const char*                 i_arch ) {

  /* define function pointer */
  libxsmm_smmfunction l_test_jit;
  double l_jittime = 0.0, l_runtime = 0.0;
  float l_alpha = 1.0f;
  float l_beta = 1.0f;
  unsigned long long l_start;
  unsigned int l_t;

  if ( l_beta != 0.0f && l_beta != 1.0f ) {
    fprintf(stderr, "JIT float: beta needs to be 0.0 or 1.0!\n");
    exit(-1);
  }
  if ( l_alpha != 1.0f ) {
    fprintf(stderr, "JIT float: alpha needs to be 1.0!\n");
    exit(-1);
  }

  l_start = libxsmm_timer_tick();
  l_test_jit = libxsmm_smmdispatch(i_M, i_N, i_K, &i_M, &i_K, &i_M, &l_alpha, &l_beta, NULL, &i_prefetch );
  l_jittime = libxsmm_timer_duration(l_start, libxsmm_timer_tick());  
  printf("function pointer address: %llx\n", (size_t)l_test_jit);

  l_start = libxsmm_timer_tick();

  if ( i_prefetch == LIBXSMM_PREFETCH_NONE ) {
    for ( l_t = 0; l_t < g_jit_code_reps; l_t++ ) {
      l_test_jit(i_a, i_b, o_c);
    }
  } else {
    for ( l_t = 0; l_t < g_jit_code_reps; l_t++ ) {
      l_test_jit(i_a, i_b, o_c, i_a, i_b, o_c);
    }
  }

  l_runtime = libxsmm_timer_duration(l_start, libxsmm_timer_tick());

  printf("%fs for creating jit\n", l_jittime);
  printf("%fs for executing jit\n", l_runtime);
  printf("%f GFLOPS for jit\n", ((double)((double)g_jit_code_reps * (double)i_M * (double)i_N * (double)i_K) * 2.0) / (l_runtime * 1.0e9));
}
Пример #2
0
void run_jit_float( const float*                    i_a,
                    const float*                    i_b,
                    float*                          o_c,
                    const libxsmm_xgemm_descriptor* i_xgemm_desc,
                    const char*                     i_arch ) {
  struct timeval l_start, l_end;

  /* define function pointer */
  typedef void (*jitfun)(const float* a, const float* b, float* c);
  typedef void (*jitfun_pf)(const float* a, const float* b, float* c, const float* a_pf, const float* b_pf, const float* c_pf);
  jitfun l_test_jit;
  jitfun_pf l_test_jit_pf;

  double l_jittime = 0.0;
  gettimeofday(&l_start, NULL);
  
  /* allocate buffer for code */
  unsigned char* l_gen_code = (unsigned char*) malloc( 32768 * sizeof(unsigned char) );
  libxsmm_generated_code l_generated_code;
  l_generated_code.generated_code = (void*)l_gen_code;
  l_generated_code.buffer_size = 32768;
  l_generated_code.code_size = 0;
  l_generated_code.code_type = 2;
  l_generated_code.last_error = 0;

  /* generate kernel */
  libxsmm_generator_dense_kernel( &l_generated_code,
                                  i_xgemm_desc,
                                  i_arch );

  /* handle evetl. errors */
  if ( l_generated_code.last_error != 0 ) {
    fprintf(stderr, "%s\n", libxsmm_strerror( l_generated_code.last_error ) );
    exit(-1);
  }

  /* create executable buffer */
  int l_code_pages = (((l_generated_code.code_size-1)*sizeof(unsigned char))/LIBXSMM_BUILD_PAGESIZE)+1;
  int l_code_page_size = LIBXSMM_BUILD_PAGESIZE*l_code_pages;
  int l_fd = open("/dev/zero", O_RDWR);
  void* p = mmap(0, l_code_page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE, l_fd, 0);
  close(l_fd);
  /* explicitly disable THP for this memory region, kernel 2.6.38 or higher! 
  madvise(p, l_code_page_size, MADV_NOHUGEPAGE); */
  if (p == MAP_FAILED) {
    fprintf(stderr, "LIBXSMM: something bad happend in mmap, couldn't allocate code buffer!\n");
    exit(-1);
  }
  unsigned char* l_code = (unsigned char*)p;
  memset( l_code, 0, l_code_page_size );
  memcpy( l_code, l_gen_code, l_generated_code.code_size );
  int error = mprotect( (void*)l_code, l_code_page_size, PROT_EXEC | PROT_READ );
  if (error == -1) {
    int errsv = errno;
    if (errsv == EINVAL) {
      fprintf(stderr, "mprotect failed: addr is not a valid pointer, or not a multiple of the system page size!\n");
    } else if (errsv == ENOMEM) {
      fprintf(stderr, "mprotect failed: Internal kernel structures could not be allocated!\n");
    } else if (errsv == EACCES) {
      fprintf(stderr, "mprotect failed: The memory cannot be given the specified access!\n");
    } else {
      fprintf(stderr, "mprotect failed: Unknown Error!\n");
    }
    exit(-1);
  }

  /* set function pointer and jitted code */
  if ( i_xgemm_desc->prefetch == LIBXSMM_PREFETCH_NONE ) {
    l_test_jit = (jitfun)l_code;
  } else {
    l_test_jit_pf = (jitfun_pf)l_code;
  }

  gettimeofday(&l_end, NULL);  
  l_jittime = sec(l_start, l_end);

  printf("size of generated code: %i\n", l_generated_code.code_size );

  /* write buffer for manual decode as binary to a file */
  char l_objdump_name[128];
  sprintf( l_objdump_name, "kernel_%i_%i_%i.bin", i_xgemm_desc->m, i_xgemm_desc->n, i_xgemm_desc->k ); 
  FILE *l_byte_code = fopen( l_objdump_name, "wb");

  if ( l_byte_code != NULL ){
    fwrite( (const void*)l_gen_code, 1, l_generated_code.code_size, l_byte_code);
    fclose( l_byte_code );
  } else {
    /* error */
  }

  unsigned int l_t;
  double l_runtime = 0.0;

  gettimeofday(&l_start, NULL);

  if ( i_xgemm_desc->prefetch == LIBXSMM_PREFETCH_NONE ) {
    for ( l_t = 0; l_t < REPS; l_t++ ) {
      l_test_jit(i_a, i_b, o_c);
    }
  } else {
    for ( l_t = 0; l_t < REPS; l_t++ ) {
      l_test_jit_pf(i_a, i_b, o_c, i_a, i_b, o_c);
    }
  }

  gettimeofday(&l_end, NULL);  
  l_runtime = sec(l_start, l_end);

  printf("%fs for creating jit\n", l_jittime);
  printf("%fs for executing jit\n", l_runtime);
  printf("%f GFLOPS for jit\n", ((double)((double)REPS * (double)i_xgemm_desc->m * (double)i_xgemm_desc->n * (double)i_xgemm_desc->k) * 2.0) / (l_runtime * 1.0e9));

  free(l_gen_code);
}