Exemplo n.º 1
0
void CTimer::Stop(void)
{
#ifdef _WIN32
  QueryPerformanceCounter(&m_t1);
#endif

  m_c1 = ippGetCpuClocks();

  return;
} // CTimer::Stop()
Exemplo n.º 2
0
void CTimer::Start(void)
{
#ifdef _WIN32
  QueryPerformanceCounter(&m_t0);
#endif

#ifdef linux
  m_t0 = clock();
#endif

  m_c0 = ippGetCpuClocks();

  return;
} // CTimer::Start()
Exemplo n.º 3
0
static void measure_end(MeasureIt *m)
{
   m->p_end = ippGetCpuClocks();
}
Exemplo n.º 4
0
// START FUNC DECL
int
srt_uniform_I8(
	       long long *X, /* input */
	       long long nR,
	       long long *Y, /* output */
	       int shift,
	       char *mode /* "asc" or "dsc" */
	       )
// STOP FUNC DECL
{
  int status = 0;
  long long **t_cnt = NULL, *cnt = NULL, *bin_offset = NULL; int nT = 0; 
  long long t[16];

  t[0] = ippGetCpuClocks();

  if ( shift < 0 ) { go_BYE(-1); }
  if ( X == NULL ) { go_BYE(-1); }
  if ( mode == NULL ) { go_BYE(-1); }
  if ( nR < 16*1048576 ) { 
    fprintf(stderr, "Too few elements to sort \n"); go_BYE(-1); 
  }
  if ( ( strcmp(mode, "asc") != 0 ) && ( strcmp(mode, "dsc") != 0 ) ) {
    go_BYE(-1); 
  }
  nT = NUM_THREADS;
  long long block_size = nR / nT;

  cnt = malloc(nT * sizeof(long long));
  return_if_malloc_failed(cnt);
  assign_const_I8(cnt, nT, 0); 

  bin_offset = malloc(nT * sizeof(long long));
  return_if_malloc_failed(bin_offset);

  t_cnt = malloc(nT * sizeof(long long *));
  return_if_malloc_failed(t_cnt);
  for ( int i = 0; i < nT; i++ ) { 
    t_cnt[i] = malloc(nT * sizeof(long long));
    return_if_malloc_failed(t_cnt[i]);
    assign_const_I8(t_cnt[i], nT, 0); 
  }
  t[1] = ippGetCpuClocks();

  // Figure out how many elements in each bin
  cilkfor ( int tid = 0; tid < nT; tid++ ) { 
    long long lb = tid * block_size;
    long long ub = lb  + block_size;
    if ( tid == (nT-1) ) { ub = nR; }
    long long *l_cnt = t_cnt[tid];
    for ( long long i = lb; i < ub; i++ ) { 
      unsigned long long inval = (unsigned long long) X[i];
      int bin = inval >> shift;
      if ( bin >= NUM_THREADS ) { status = -1; continue; }
      l_cnt[bin]++;
    }
  }
  t[2] = ippGetCpuClocks();
  // Sum up over local counts 
  for ( int i = 0; i < nT; i++ ) { 
    long long tempsum = 0;
    for ( int j = 0; j < nT; j++ ) { 
      tempsum += t_cnt[j][i];
    }
    cnt[i] = tempsum;
  }
  // Quick debugging check 
  long long chk_nX = 0;
  for ( int i = 0; i < nT; i++ ) { 
    chk_nX += cnt[i];
  }
  // Create cumulative counts
  bin_offset[0] = 0;
  for ( int i = 1; i < nT; i++ ) {
    bin_offset[i] = bin_offset[i-1] + cnt[i-1];
  }
  /*
  for ( int i = 1; i < nT; i++ ) {
    fprintf(stderr, "%d --> %lld \n", i, cnt[i]);
  }
  */
  t[3] = ippGetCpuClocks();

  if ( chk_nX != nR ) { go_BYE(-1); }
  // Now let us get elements to their correct bins
  for ( int i = 0; i < nR; i++ ) { 
    unsigned long long inval = (unsigned long long) X[i];
    int bin = inval >> shift;
    // if ( bin >= NUM_THREADS ) { status = -1; continue; }
    Y[bin_offset[bin]] = X[i];
    bin_offset[bin]++; 
  }
  cBYE(status);
  t[4] = ippGetCpuClocks();
  // Re-Create cumulative counts
  bin_offset[0] = 0;
  for ( int i = 1; i < nT; i++ ) {
    bin_offset[i] = bin_offset[i-1] + cnt[i-1];
  }

  // ------------------------------------------------------------
  // Now we can sort individual bins in parallel
  cilkfor ( int tid = 0; tid < nT; tid++ ) { 
    long long *lY = (long long *)Y; lY += bin_offset[tid];
    if ( cnt[tid] == 0 ) { continue; }
    if ( strcmp(mode, "asc") == 0 ) {
      qsort_asc_I8(lY, cnt[tid], sizeof(long long), NULL);
    }
    else if ( strcmp(mode, "dsc") == 0 ) {
      qsort_dsc_I8(lY, cnt[tid], sizeof(long long), NULL);
    }
    else { status = -1; continue; }
  }
  cBYE(status);
  t[5] = ippGetCpuClocks();
  /*
  for ( int i = 1; i < 5; i++ ) { 
    fprintf(stderr, "%d --> %lld \n", i, t[i] - t[i-1]);
  }
  */
  // ------------------------------------------------------------
 BYE:
  if ( t_cnt != NULL ) { 
    for ( int i = 0; i < nT; i++ ) { 
      free_if_non_null(t_cnt[i]);
    }
    free_if_non_null(t_cnt);
  }
  free_if_non_null(cnt);
  free_if_non_null(bin_offset);
  return(status);
}
Exemplo n.º 5
0
static void measure_start(MeasureIt *m)
{
   m->p_start = ippGetCpuClocks();
}