예제 #1
0
/*
 * Generates uniformly random keys [0, MAX_KEY_VAL] on each rank using the time and rank
 * number as a seed
 */
static KEY_TYPE * make_input(void)
{
  timer_start(&timers[TIMER_INPUT]);

  KEY_TYPE * restrict const my_keys = malloc(NUM_KEYS_PER_PE * sizeof(KEY_TYPE));

  pcg32_random_t rng = seed_my_rank();

  for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i) {
    my_keys[i] = pcg32_boundedrand_r(&rng, MAX_KEY_VAL);
  }

  timer_stop(&timers[TIMER_INPUT]);

#ifdef DEBUG
  wait_my_turn();
  char msg[1024];
  const int my_rank = shmem_my_pe();
  sprintf(msg,"Rank %d: Initial Keys: ", my_rank);
  for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){
    if(i < PRINT_MAX)
    sprintf(msg + strlen(msg),"%d ", my_keys[i]);
  }
  sprintf(msg + strlen(msg),"\n");
  printf("%s",msg);
  fflush(stdout);
  my_turn_complete();
#endif
  return my_keys;
}
예제 #2
0
파일: isx.c 프로젝트: habanero-rice/hclib
/*
 * Counts the occurence of each key in my bucket. 
 * Key indices into the count array are the key's value minus my bucket's 
 * minimum key value to allow indexing from 0.
 * my_bucket_keys: All keys in my bucket unsorted [my_rank * BUCKET_WIDTH, (my_rank+1)*BUCKET_WIDTH)
 */
static int * count_local_keys(KEY_TYPE const * const my_bucket_keys)
{
  int * const my_local_key_counts = malloc(BUCKET_WIDTH * sizeof(int));
  assert(my_local_key_counts);
  memset(my_local_key_counts, 0, BUCKET_WIDTH * sizeof(int));

  timer_start(&timers[TIMER_SORT]);

  const int my_rank = shmem_my_pe();
  const int my_min_key = my_rank * BUCKET_WIDTH;

#ifdef ISX_PROFILING
  unsigned long long start = current_time_ns();
#endif

  // Count the occurences of each key in my bucket
  for(long long int i = 0; i < my_bucket_size; ++i){
    const unsigned int key_index = my_bucket_keys[i] - my_min_key;

    assert(my_bucket_keys[i] >= my_min_key);
    assert(key_index < BUCKET_WIDTH);

    my_local_key_counts[key_index]++;
  }

#ifdef ISX_PROFILING
  unsigned long long end = current_time_ns();
  if (shmem_my_pe() == 0)
  printf("Counting local took %llu ns, my_bucket_size = %u, BUCKET_WIDTH = "
          "%llu\n", end - start, my_bucket_size, BUCKET_WIDTH);
#endif

  timer_stop(&timers[TIMER_SORT]);

#ifdef DEBUG
  wait_my_turn();
  char msg[4096];
  sprintf(msg,"Rank %d: Bucket Size %lld | Local Key Counts:", my_rank, my_bucket_size);
  for(uint64_t i = 0; i < BUCKET_WIDTH; ++i){
    if(i < PRINT_MAX)
    sprintf(msg + strlen(msg),"%d ", my_local_key_counts[i]);
  }
  sprintf(msg + strlen(msg),"\n");
  printf("%s",msg);
  fflush(stdout);
  my_turn_complete();
#endif

  return my_local_key_counts;
}
예제 #3
0
파일: isx.c 프로젝트: habanero-rice/hclib
/*
 * Places local keys into their corresponding local bucket.
 * The contents of each bucket are not sorted.
 */
static KEY_TYPE * bucketize_local_keys(KEY_TYPE const * const my_keys,
                                              int * const local_bucket_offsets)
{
  KEY_TYPE * const my_local_bucketed_keys = malloc(NUM_KEYS_PER_PE * sizeof(KEY_TYPE));
  assert(my_local_bucketed_keys);

  timer_start(&timers[TIMER_BUCKETIZE]);

#ifdef ISX_PROFILING
  unsigned long long start = current_time_ns();
#endif

  for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){
    const KEY_TYPE key = my_keys[i];
    const uint32_t bucket_index = key / BUCKET_WIDTH;
    uint32_t index;
    assert(local_bucket_offsets[bucket_index] >= 0);
    index = local_bucket_offsets[bucket_index]++;
    assert(index < NUM_KEYS_PER_PE);
    my_local_bucketed_keys[index] = key;
  }

#ifdef ISX_PROFILING
  unsigned long long end = current_time_ns();
  if (shmem_my_pe() == 0)
  printf("Bucketizing took %llu ns\n", end - start);
#endif

  timer_stop(&timers[TIMER_BUCKETIZE]);

#ifdef DEBUG
  wait_my_turn();
  char msg[1024];
  const int my_rank = shmem_my_pe();
  sprintf(msg,"Rank %d: local bucketed keys: ", my_rank);
  for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){
    if(i < PRINT_MAX)
    sprintf(msg + strlen(msg),"%d ", my_local_bucketed_keys[i]);
  }
  sprintf(msg + strlen(msg),"\n");
  printf("%s",msg);
  fflush(stdout);
  my_turn_complete();
#endif
  return my_local_bucketed_keys;
}
예제 #4
0
파일: isx.c 프로젝트: habanero-rice/hclib
/*
 * Computes the size of each bucket by iterating all keys and incrementing
 * their corresponding bucket's size
 */
static int * count_local_bucket_sizes(KEY_TYPE const * const my_keys)
{
  int * const local_bucket_sizes = malloc(NUM_BUCKETS * sizeof(int));
  assert(local_bucket_sizes);

  timer_start(&timers[TIMER_BCOUNT]);

  init_array(local_bucket_sizes, NUM_BUCKETS);

#ifdef ISX_PROFILING
  unsigned long long start = current_time_ns();
#endif

  for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){
    const uint32_t bucket_index = my_keys[i]/BUCKET_WIDTH;
    local_bucket_sizes[bucket_index]++;
  }

#ifdef ISX_PROFILING
  unsigned long long end = current_time_ns();
  if (shmem_my_pe() == 0)
  printf("Counting local bucket sizes took %llu ns\n", end - start);
#endif

  timer_stop(&timers[TIMER_BCOUNT]);

#ifdef DEBUG
  wait_my_turn();
  char msg[1024];
  const int my_rank = shmem_my_pe();
  sprintf(msg,"Rank %d: local bucket sizes: ", my_rank);
  for(uint64_t i = 0; i < NUM_BUCKETS; ++i){
    if(i < PRINT_MAX)
    sprintf(msg + strlen(msg),"%d ", local_bucket_sizes[i]);
  }
  sprintf(msg + strlen(msg),"\n");
  printf("%s",msg);
  fflush(stdout);
  my_turn_complete();
#endif

  return local_bucket_sizes;
}
예제 #5
0
파일: isx.c 프로젝트: habanero-rice/hclib
/*
 * Generates uniformly random keys [0, MAX_KEY_VAL] on each rank using the time and rank
 * number as a seed
 */
static KEY_TYPE * make_input(void)
{
  timer_start(&timers[TIMER_INPUT]);

  KEY_TYPE * const my_keys = malloc(NUM_KEYS_PER_PE * sizeof(KEY_TYPE));
  assert(my_keys);

  pcg32_random_t rng = seed_my_rank();

#ifdef ISX_PROFILING
  unsigned long long start = current_time_ns();
#endif

  for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i) {
    my_keys[i] = pcg32_boundedrand_r(&rng, MAX_KEY_VAL);
  }

#ifdef ISX_PROFILING
  unsigned long long end = current_time_ns();
  if (shmem_my_pe() == 0)
  printf("Making input took %llu ns\n", end - start);
#endif

  timer_stop(&timers[TIMER_INPUT]);

#ifdef DEBUG
  wait_my_turn();
  char msg[1024];
  const int my_rank = shmem_my_pe();
  sprintf(msg,"Rank %d: Initial Keys: ", my_rank);
  for(uint64_t i = 0; i < NUM_KEYS_PER_PE; ++i){
    if(i < PRINT_MAX)
    sprintf(msg + strlen(msg),"%d ", my_keys[i]);
  }
  sprintf(msg + strlen(msg),"\n");
  printf("%s",msg);
  fflush(stdout);
  my_turn_complete();
#endif
  return my_keys;
}
예제 #6
0
파일: isx.c 프로젝트: habanero-rice/hclib
/*
 * Computes the prefix scan of the bucket sizes to determine the starting locations
 * of each bucket in the local bucketed array
 * Stores a copy of the bucket offsets for use in exchanging keys because the
 * original bucket_offsets array is modified in the bucketize function
 */
static int * compute_local_bucket_offsets(int const * const local_bucket_sizes,
                                                 int ** send_offsets)
{
  int * const local_bucket_offsets = malloc(NUM_BUCKETS * sizeof(int));
  assert(local_bucket_offsets);

  timer_start(&timers[TIMER_BOFFSET]);

  (*send_offsets) = malloc(NUM_BUCKETS * sizeof(int));
  assert(*send_offsets);

  local_bucket_offsets[0] = 0;
  (*send_offsets)[0] = 0;
  int temp = 0;
  for(uint64_t i = 1; i < NUM_BUCKETS; i++){
    temp = local_bucket_offsets[i-1] + local_bucket_sizes[i-1];
    local_bucket_offsets[i] = temp; 
    (*send_offsets)[i] = temp;
  }
  timer_stop(&timers[TIMER_BOFFSET]);

#ifdef DEBUG
  wait_my_turn();
  char msg[1024];
  const int my_rank = shmem_my_pe();
  sprintf(msg,"Rank %d: local bucket offsets: ", my_rank);
  for(uint64_t i = 0; i < NUM_BUCKETS; ++i){
    if(i < PRINT_MAX)
    sprintf(msg + strlen(msg),"%d ", local_bucket_offsets[i]);
  }
  sprintf(msg + strlen(msg),"\n");
  printf("%s",msg);
  fflush(stdout);
  my_turn_complete();
#endif
  return local_bucket_offsets;
}
예제 #7
0
파일: isx.c 프로젝트: habanero-rice/hclib
/*
 * Each PE sends the contents of its local buckets to the PE that owns that bucket.
 */
static KEY_TYPE * exchange_keys(int const * const send_offsets,
                                       int const * const local_bucket_sizes,
                                       KEY_TYPE const * const my_local_bucketed_keys)
{
  timer_start(&timers[TIMER_ATA_KEYS]);

  const int my_rank = shmem_my_pe();
  unsigned int total_keys_sent = 0;

  // Keys destined for local key buffer can be written with memcpy
  const long long int write_offset_into_self = shmem_longlong_fadd(
          &receive_offset, (long long int)local_bucket_sizes[my_rank], my_rank);
  assert((unsigned long long)write_offset_into_self +
          (unsigned long long)local_bucket_sizes[my_rank] <= KEY_BUFFER_SIZE);
  memcpy(&my_bucket_keys[write_offset_into_self], 
         &my_local_bucketed_keys[send_offsets[my_rank]], 
         local_bucket_sizes[my_rank]*sizeof(KEY_TYPE));


  for(uint64_t i = 0; i < NUM_PES; ++i){

#ifdef PERMUTE
    const int target_pe = permute_array[i];
#elif INCAST
    const int target_pe = i;
#else
    const int target_pe = (my_rank + i) % NUM_PES;
#endif

    // Local keys already written with memcpy
    if(target_pe == my_rank){ continue; }

    const int read_offset_from_self = send_offsets[target_pe];
    const int my_send_size = local_bucket_sizes[target_pe];

    const long long int write_offset_into_target = shmem_longlong_fadd(
            &receive_offset, (long long int)my_send_size, target_pe);

#ifdef DEBUG
    printf("Rank: %d Target: %d Offset into target: %lld Offset into myself: %d Send Size: %d\n",
        my_rank, target_pe, write_offset_into_target, read_offset_from_self, my_send_size);
#endif

    // fprintf(stderr, "PUTTING %llu\n", my_send_size);
    assert((unsigned long long)write_offset_into_target +
            (unsigned long long)my_send_size <= KEY_BUFFER_SIZE);
    assert((unsigned long long)read_offset_from_self +
            (unsigned long long)my_send_size <= NUM_KEYS_PER_PE);
    shmem_int_put(&(my_bucket_keys[write_offset_into_target]), 
                  &(my_local_bucketed_keys[read_offset_from_self]), 
                  my_send_size, 
                  target_pe);

    total_keys_sent += my_send_size;
  }

#ifdef BARRIER_ATA
  SHMEM_BARRIER_AT_EXCHANGE;
#endif

  timer_stop(&timers[TIMER_ATA_KEYS]);
  timer_count(&timers[TIMER_ATA_KEYS], total_keys_sent);

#ifdef DEBUG
  wait_my_turn();
  char msg[1024];
  sprintf(msg,"Rank %d: Bucket Size %lld | Total Keys Sent: %u | Keys after exchange:", 
                        my_rank, receive_offset, total_keys_sent);
  for(long long int i = 0; i < receive_offset; ++i){
    if(i < PRINT_MAX)
    sprintf(msg + strlen(msg),"%d ", my_bucket_keys[i]);
  }
  sprintf(msg + strlen(msg),"\n");
  printf("%s",msg);
  fflush(stdout);
  my_turn_complete();
#endif

  return my_bucket_keys;
}