Exemplo n.º 1
0
  int
main(void)
{
  int i,j,k;
  int *target;
  int *source;
  int me, npes;
  struct timeval start, end;
  long time_taken,start_time,end_time;

  start_pes(0);
  me = _my_pe();
  npes = _num_pes();

  source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) );

  time_taken = 0;

  for (i = 0; i < N_ELEMENTS; i += 1) {
    source[i] = (i + 1)*10 + me;
  }
  target = (int *) shmalloc( N_ELEMENTS * sizeof(*target)*npes );
  for (i = 0; i < N_ELEMENTS * npes ; i += 1) {
    target[i] = -90;
  }
  for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE ; i += 1) {
    pSyncA[i] = _SHMEM_SYNC_VALUE;
    pSyncB[i] = _SHMEM_SYNC_VALUE;
  }
  shmem_barrier_all();

  for(i=0;i<10000;i++){
    gettimeofday(&start, NULL);
 
    start_time = (start.tv_sec * 1000000.0) + start.tv_usec;
 
    /* alternate between 2 pSync arrays to synchronize
     * consequent collectives of even and odd iterations */
    if(i % 2)
      shmem_collect32(target, source, N_ELEMENTS, 0, 0, npes, pSyncA);
    else
      shmem_collect32(target, source, N_ELEMENTS, 0, 0, npes, pSyncB);
 
    gettimeofday(&end, NULL);

    end_time = (end.tv_sec * 1000000.0) + end.tv_usec;
    if(me==0){
      time_taken = time_taken + (end_time - start_time);
    }

  }
  if(me == 0)
    printf("Time required to collect %d bytes of data, with %d PEs is %ld microseconds\n",(4*N_ELEMENTS * npes),npes,time_taken/10000);

  shmem_barrier_all();
 
  shfree(target);
  shfree(source);
  return 0;
}
Exemplo n.º 2
0
/*
 * Aggregates the per PE timing 'count' information 
 */
static unsigned int * gather_rank_counts(_timer_t * const timer)
{
  if(timer->count_iter > 0){
    const unsigned int num_records = NUM_PES * timer->num_iters;

    unsigned int * my_counts = shmem_malloc(timer->num_iters * sizeof(unsigned int));
    assert(my_counts);
    memcpy(my_counts, timer->count, timer->num_iters*sizeof(unsigned int));

    unsigned int * all_counts = shmem_malloc( num_records * sizeof(unsigned int) );
    assert(all_counts);

    shmem_barrier_all();

    shmem_collect32(all_counts, my_counts, timer->num_iters, 0, 0, NUM_PES, pSync);

    shmem_barrier_all();

    shmem_free(my_counts);

    return all_counts;
  }
  else{
    return NULL;
  }

}
Exemplo n.º 3
0
void
FORTRANIFY (shmem_collect4) (void *target, const void *source, int *nelems,
                             int *PE_start, int *logPE_stride, int *PE_size,
                             int *pSync)
{
    shmem_collect32 (target, source, *nelems,
                     *PE_start, *logPE_stride, *PE_size, (long *) pSync);
}
Exemplo n.º 4
0
void
collect(int *target, int *src, int elements, int me, int npes, int loops)
{
    int i;
	double start_time, elapsed_time;
    long total_bytes = loops * elements * sizeof(*src);
    long *ps, *pSync, *pSync1;

	pSync = (long*) shmem_malloc( 2 * sizeof(long) * _SHMEM_COLLECT_SYNC_SIZE );
	pSync1 = &pSync[_SHMEM_COLLECT_SYNC_SIZE];
    for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
        pSync[i] = pSync1[i] = _SHMEM_SYNC_VALUE;
    }
	target = (int *) shmem_malloc( elements * sizeof(*target) * npes );

    if (me==0 && Verbose) {
        fprintf(stdout,"%s: %d loops of collect32(%ld bytes) over %d PEs: ",
                __FUNCTION__,loops,(elements*sizeof(*src)),npes);
        fflush(stdout);
    }

	shmem_barrier_all();

    start_time = shmemx_wtime();
    for(i = 0; i < loops; i++) {
        ps = (i & 1) ? pSync1 : pSync;
        shmem_collect32( target, src, elements, 0, 0, npes, ps );
    }
    elapsed_time = shmemx_wtime() - start_time;

    if (me==0 && Verbose) {
        printf("%7.3f secs\n", elapsed_time);
        printf("  %7.5f usecs / collect32(), %ld Kbytes @ %7.4f MB/sec\n\n",
                (elapsed_time/((double)loops*npes))*1000000.0,
                (total_bytes/1024),
                ((double)total_bytes/(1024.0*1024.0)) / elapsed_time );
    }
	shmem_barrier_all();
	shmem_free(target);
    shmem_free( pSync );
	shmem_barrier_all();
}
int main(void)
{
   int i, me, npes;
   int *target;

   start_pes(0);
   me = _my_pe();
   npes = _num_pes();

   source[0] = me * 2;
   source[1] = me * 2 + 1;
   target = (int *)shmalloc(sizeof(int) * npes * 2);
   for (i=0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
      pSync[i] = _SHMEM_SYNC_VALUE;
   }
   shmem_barrier_all(); /* Wait for all PEs to initialize pSync */

   shmem_collect32(target, source, 2, 0, 0, npes, pSync);
   printf("%d: %d", me, target[0]);
   for (i = 1; i < npes * 2; i++)
      printf(", %d", target[i]);
   printf("\n");
   return 0;
}
Exemplo n.º 6
0
int osh_coll_tc9(const TE_NODE *node, int argc, const char *argv[])
{
  /* General initialisations			*/

  int rc = TC_PASS;

  int ii, numprocs, count, d, nlong;
  int32_t *source, *target, *displ;
  long *pSync;

  UNREFERENCED_PARAMETER(node);
  UNREFERENCED_PARAMETER(argc);
  UNREFERENCED_PARAMETER(argv);

  numprocs = _num_pes();

  nlong = _my_pe() + 1;

  source = NULL;
  displ = NULL;
  target = NULL;
  pSync = NULL;

  if (numprocs == 1)
  {
    log_debug(OSH_TC, "Using more than 1 CPU makes the tests of this program more interesting\n");
    return TC_SETUP_FAIL;
  }

  displ = malloc(sizeof(int) * numprocs);

  count = 0;
  for (ii = 0; ii < numprocs; ii++)
  {
    displ[ii] = count;
    count = count + ii + 1;
  }

  pSync = shmalloc(sizeof(long) *_SHMEM_COLLECT_SYNC_SIZE);
  for (ii=0; ii < _SHMEM_COLLECT_SYNC_SIZE; ii++)
    pSync[ii] = _SHMEM_SYNC_VALUE;

  target = shmalloc(sizeof(int) * count);
  for (ii = 0; ii < count; ii++)
    target[ii] = 0;

  source = shmalloc(sizeof(int) * numprocs);
  for (ii = 0; ii < nlong; ii++)
    source[ii] = ii;

  shmem_barrier_all();		/* Wait for all CPUs to initialize pSync */

  /* Collect function				*/

  shmem_collect32( target, source, nlong, 0, 0,
                 numprocs, pSync );

  ii = d = 0;
  while (ii < numprocs)
  {
    for(count = 0; count <= ii; count++)
      if (target[d + count] != count)
        rc = TC_FAIL;
    d = displ[count];
    ii++;
  }

  /* Finalizes					*/
  shfree(source);
  shfree(target);
  shfree(pSync);
  free(displ);

  return rc;
}
Exemplo n.º 7
0
static int test_item3(void)
{
    int rc = TC_PASS;
    TYPE_VALUE* shmem_addr = NULL;
    TYPE_VALUE my_value = 0;
    TYPE_VALUE* check_arr = NULL;
    int num_proc = 0;
    int my_proc = 0;
    int peer_proc = 0;
    int i = 0;
    int j = 0;
    int k = 0;
    int flag = 0;
    int missed_values = 0;
    static long* pSync = NULL;

    num_proc = _num_pes();
    my_proc = _my_pe();

    shmem_addr = shmalloc(sizeof(*shmem_addr));
    check_arr = shmalloc(sizeof(*check_arr) * num_proc);

    pSync = shmalloc(sizeof(*pSync) * _SHMEM_COLLECT_SYNC_SIZE);
    for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i++) {
        pSync[i] = _SHMEM_SYNC_VALUE;
    }

    if (shmem_addr && pSync && check_arr)
    {
        static TYPE_VALUE value = 0;

        /* Store my value */
        my_value = (TYPE_VALUE)my_proc;
        *shmem_addr = DEFAULT_VALUE;

        shmem_barrier_all();
        for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++)
        {
            missed_values = 0;
            my_value = (TYPE_VALUE)my_proc;

            value = FUNC_VALUE(shmem_addr, my_value, peer_proc);

            shmem_barrier_all();
            shmem_collect32(check_arr, &value, (sizeof(value) + 3 ) / 4, 0, 0, num_proc, pSync);
            shmem_barrier_all();
            for (j = 0; j < num_proc ; j++)
            {
                flag = 0;
                for (k = 0; k < num_proc; k++)
                {
                    if (sys_fcompare(check_arr[k], j))
                    {
                        flag = 1;
                        break;
                    }
                }
                if (flag == 0)
                {
                    missed_values++;
                }
                if (missed_values > 1)
                {
                    rc = TC_FAIL;
                    break;
                }
            }
        }
        shmem_barrier_all();

        log_debug(OSH_TC, "my(#%d:%lld) missed_values expected = 1 vs missed_values = %d\n",
                           my_proc, (INT64_TYPE)my_value, missed_values);
    }
    else
    {
        rc = TC_SETUP_FAIL;
    }

    if (shmem_addr)
    {
        shfree(shmem_addr);
    }
    if (pSync)
    {
        shfree(pSync);
    }

    return rc;
}