Esempio n. 1
0
File: pingpong.c Progetto: caomw/SOS
int
main(int argc, char* argv[])
{
	int c, j, loops, k, l;
	int my_pe, nProcs, nWorkers;
	int  nWords=1;
	int  failures=0;
	char *prog_name;
	long *wp,work_sz;

    for(j=0; j < SHMEM_BARRIER_SYNC_SIZE; j++) {
        pSync0[j] = pSync1[j] = pSync2[j] = pSync3[j] =
            pSync4[j] = SHMEM_SYNC_VALUE;
    }

	shmem_init();
	my_pe = shmem_my_pe();
	nProcs = shmem_n_pes();
	nWorkers = nProcs - 1;

	if (nProcs == 1) {
   		Rfprintf(stderr,
			"ERR - Requires > 1 PEs\n");
		shmem_finalize();
		return 0;
	}

	for(j=0; j < nProcs; j++)
		if ( shmem_pe_accessible(j) != 1 ) {
			fprintf(stderr,
				"ERR - pe %d not accessible from pe %d\n",
				j, my_pe);
		}

	prog_name = strrchr(argv[0],'/');
	if ( prog_name )
		prog_name++;
	else
		prog_name = argv[0];

	while((c=getopt(argc,argv,"hvM:s")) != -1) {
		switch(c) {
		  case 's':
			Slow++;
			break;
		  case 'v':
			Verbose++;
			break;
		  case 'M':
			output_mod = atoi(optarg);
			if (output_mod <= 0) {
    				Rfprintf(stderr, "ERR - output modulo arg out of "
						"bounds '%d'?\n", output_mod);
				shmem_finalize();
				return 1;
			}
   			Rfprintf(stderr,"%s: output modulo %d\n",
					prog_name,output_mod);
			break;
		  case 'h':
			Rfprintf(stderr,
				"usage: %s {nWords-2-put(%d)K/M} {Loop-count(%d)K/M}\n",
				prog_name, DFLT_NWORDS, DFLT_LOOPS);
			shmem_finalize();
			return 1;
		  default:
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		nWords = DFLT_NWORDS;
	else {
		nWords = atoi_scaled(argv[optind++]);
		if (nWords <= 0) {
    			Rfprintf(stderr, "ERR - Bad nWords arg '%d'?\n", nWords);
			shmem_finalize();
			return 1;
		}
	}

	if (optind == argc)
		loops = DFLT_LOOPS;
	else {
		loops = atoi_scaled(argv[optind++]);
		if (loops <= 0 || loops > 1000000) {
    			Rfprintf(stderr,
				"ERR - loops arg out of bounds '%d'?\n", loops);
			shmem_finalize();
			return 1;
		}
	}

    work_sz = (nProcs*nWords) * sizeof(long);
	work = shmem_malloc( work_sz );
	if ( !work ) {
   		fprintf(stderr,"[%d] ERR - work = shmem_malloc(%ld) ?\n",my_pe,work_sz);
		shmem_global_exit(1);
	}

	Target = shmem_malloc( 2 * nWords * sizeof(long) );
	if ( !Target ) {
   		fprintf(stderr,"[%d] ERR - Target = shmem_malloc(%ld) ?\n",
                my_pe, (nWords * sizeof(long)));
		shmem_global_exit(1);
	}
    src = &Target[nWords];

#if _DEBUG
	Rprintf("%s: %d loops of %d longs per put\n",prog_name,loops,nWords);
#endif

	for(j=0; j < nWords; j++)
		src[j] = VAL;

	for(j=0; j < loops; j++) {

#if _DEBUG
		if ( Verbose && (j==0 || (j % output_mod) == 0) )
    			fprintf(stderr,"[%d] +(%d)\n", my_pe,j);
#endif
        shmem_barrier(0, 0, nProcs, pSync0);
		if ( my_pe == 0 ) {
			int p;
			for(p=1; p < nProcs; p++)
				shmem_long_put(Target, src, nWords, p);
		}
		else {
			if (Slow) {
				/* wait for each put to complete */
				for(k=0; k < nWords; k++)
					shmem_wait(&Target[k],my_pe);
			} else {
				/* wait for last word to be written */
				shmem_wait(&Target[nWords-1],my_pe);
			}
		}
#if _DEBUG
		if ( Verbose && (j==0 || (j % output_mod) == 0) )
    			fprintf(stderr,"[%d] -(%d)\n", shmem_my_pe(),j);
#endif
        shmem_barrier(0, 0, nProcs, pSync1);

		RDprintf("Workers[1 ... %d] verify Target data put by proc0\n",
			nWorkers);

		/* workers verify put data is expected */
		if ( my_pe != 0 ) {
			for(k=0; k < nWords; k++) {
				if (Target[k] != VAL) {
					fprintf(stderr, "[%d] Target[%d] %#lx "
							"!= %#x?\n",
							my_pe,k,Target[k],VAL);
					failures++;
				}
				assert(Target[k] == VAL);
				Target[k] = my_pe;
			}
		}
		else	/* clear results buffer, workers will put here */
			memset(work, 0, work_sz);

        shmem_barrier(0, 0, nProcs, pSync2);

		RDprintf("Workers[1 ... %d] put Target data to PE0 work "
			"vector\n",nWorkers);

		if ( my_pe != 0 ) {
			/* push nWords of val my_pe back to PE zero */
			shmem_long_put(&work[my_pe * nWords], Target, nWords, 0);
		}
		else {
			/* wait for procs 1 ... nProcs to complete put()s */
			for(l=1; l < nProcs; l++) {
				wp = &work[ l*nWords ]; // procs nWords chunk
#if 1
				/* wait for last long to be written from each PE */
				shmem_wait(&wp[nWords-1],0);
#else
				for(k=0; k < nWords; k++)
					shmem_wait(&wp[k],0);
#endif
			}
		}

        shmem_barrier(0, 0, nProcs, pSync3);

		if ( my_pe == 0 ) {
			RDprintf("Loop(%d) PE0 verifing work data.\n",j);
			for(l=1; l < nProcs; l++) {
				wp = &work[ l*nWords ]; // procs nWords chunk
				for(k=0; k < nWords; k++) {
					if (wp[k] != l) {
						fprintf(stderr,
						"[0] PE(%d)_work[%d] %ld "
							"!= %d?\n",
							l,k,work[k],l);
						failures++;
					}
					assert(wp[k] == l);
					break;
				}
				if (failures)
					break;
			}
		}
        shmem_barrier(0, 0, nProcs, pSync4);
#if _DEBUG
		if (loops > 1) {
			Rfprintf(stderr,".");
			RDprintf("Loop(%d) Pass.\n",j);
		}
#endif
	}

    shmem_free( work );
    shmem_free( Target );

#if _DEBUG
	Rfprintf(stderr,"\n");fflush(stderr);
	shmem_barrier_all();
	RDprintf("%d(%d) Exit(%d)\n", my_pe, nProcs, failures);
#endif

	shmem_finalize();

	return failures;
}
Esempio n. 2
0
int
main(int argc, char **argv)
{
    int loops=DFLT_LOOPS;
    char *pgm;
    int *Target;
    int *Source;
    int i, me, npes;
    int target_PE;
    long bytes;
    double start_time, *total_time;

    shmem_init();
    me = shmem_my_pe();
    npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:st")) != EOF) {
        switch (i)
        {
            case 'v':
                Verbose++;
                break;
            case 'e':
                if ((elements = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 'l':
                if ((loops = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 's':
                Sync++;
                break;
            case 't':
                Track++;
                break;
            case 'h':
                if (me == 0)
                    usage(pgm);
                return 0;
            default:
                if (me == 0) {
                    fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                    usage(pgm);
                }
                shmem_finalize();
                return 1;
        }
    }

    for(i=0; i < SHMEM_REDUCE_SYNC_SIZE; i++)
        pSync[i] = SHMEM_SYNC_VALUE;

    target_PE = (me+1) % npes;

    total_time = (double *) shmem_malloc( npes * sizeof(double) );
    if (!total_time) {
        fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n",
                (elements * sizeof(double)));
        shmem_global_exit(1);
    }
    for(i=0; i < npes; i++)
        total_time[i] = -1.0;

    Source = (int *) shmem_malloc( elements * sizeof(*Source) );
    if (!Source) {
        fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    Target = (int *) shmem_malloc( elements * sizeof(*Target) );
    if (!Target) {
        fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(Source);
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    for (i = 0; i < elements; i++) {
        Target[i] = -90;
        Source[i] = i + 1;
    }

    bytes = loops * sizeof(int) * elements;

    if (Verbose && me==0) {
        fprintf(stderr,
                "%s: INFO - %d loops, put %d (int) elements to PE+1 Max put ??\n",
                pgm, loops, elements);
    }
    shmem_barrier_all();

    for(i=0; i < loops; i++) {

        start_time = shmemx_wtime();

        shmem_int_put(Target, Source, elements, target_PE);

        time_taken += (shmemx_wtime() - start_time);

        if (me==0) {
            if ( Track && i > 0 && ((i % 200) == 0))
                fprintf(stderr,".%d",i);
        }
        if (Sync)
            shmem_barrier_all();
    }

    // collect time per node.
    shmem_double_put( &total_time[me], &time_taken, 1, 0 );
    shmem_double_sum_to_all(&sum_time, &time_taken, 1, 0, 0, npes, pWrk, pSync);

    shmem_barrier_all();

    for (i = 0; i < elements; i++) {
        if (Target[i] != i + 1) {
            printf("%d: Error Target[%d] = %d, expected %d\n",
                   me, i, Target[i], i + 1);
            shmem_global_exit(1);
        }
    }

    if ( Track && me == 0 ) fprintf(stderr,"\n");

    if(Verbose && me == 0) {
        double rate, comp_time;

        if (Verbose > 1)
            fprintf(stdout,"Individule PE times: (seconds)\n");
        for(i=0,comp_time=0.0; i < npes; i++) {
            comp_time += total_time[i];
            if (Verbose > 1)
                fprintf(stdout,"  PE[%d] %8.6f\n",i,total_time[i]);
        }

        sum_time /= (double)npes;
        comp_time /= (double)npes;
        if (sum_time != comp_time)
            printf("%s: computed_time %7.5f != sum_to_all_time %7.5f)\n",
                   pgm, comp_time, sum_time );

        rate = ((double)bytes/(1024.0*1024.0)) / comp_time;
        printf("%s: shmem_int_put() %7.4f MB/sec (bytes %ld secs %7.4f)\n",
               pgm, rate, bytes, sum_time);
    }

    shmem_free(total_time);
    shmem_free(Target);
    shmem_free(Source);

    shmem_finalize();

    return 0;
}
Esempio n. 3
0
int
main(int argc, char* argv[])
{
	int c, j, cloop, loops = DFLT_LOOPS;
	int mpe, num_pes;
	int nWords=1;
	int nIncr=1;
	int failures=0;
	char *pgm;

	start_pes(0);
	mpe = _my_pe();
	num_pes = _num_pes();

	if (num_pes == 1) {
   		Rfprintf(stderr,
			"ERR - Requires > 1 PEs\n");
		return 1;
	}
	pgm = strrchr(argv[0],'/');
	if ( pgm )
		pgm++;
	else
		pgm = argv[0];

	while((c=getopt(argc,argv,"hqVvl:")) != -1) {
		switch(c) {
		  case 'V':
		  case 'v':
			Verbose++;
			break;
		  case 'l':
            loops = atoi(optarg);
            break;
		  case 'h':
			Rfprintf(stderr,
                "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n",
                    pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR);
			return 1;
		  default:
			return 1;
		}
	}

	if (optind == argc)
		nWords = DFLT_NWORDS;
	else {
		nWords = atoi_scaled(argv[optind++]);
		if (nWords <= 0) {
    			Rfprintf(stderr, "ERR - Bad nBytes arg?\n");
			return 1;
		}
	}

	if (optind == argc)
		nIncr = DFLT_INCR;
	else {
		loops = atoi(argv[optind++]);
		if (nIncr <= 0 ) {
   		    Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr);
			return 1;
		}
	}

    if ( nWords % 8 ) { // integral multiple of longs
	    Rprintf("%s: nWords(%d) not a multiple of %ld?\n",
            pgm,nWords,sizeof(long));
        return 1;
    }

    for (c = 0; c < _SHMEM_COLLECT_SYNC_SIZE;c++)
        pSync[c] = _SHMEM_SYNC_VALUE;

    if (Verbose && mpe == 0)
	    fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n",
            loops,nWords,nIncr);

	for(cloop=1; cloop <= loops; cloop++) {

        c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation.
        //nWords /= sizeof(long); // convert input of bytes --> longs.

        src = (long*)shmalloc(c);
        if ( !src ) {
	        Rprintf("[%d] %s: shmalloc(%d) failed?\n", mpe, pgm,c);
            return 0;
        }
        dst = &src[nWords];

	    for(j=0; j < nWords; j++)
		    src[j] = (long) (j + mpe*nWords);

		shmem_barrier_all();

        shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync);

        // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1
        for(j=0; j < (nWords*num_pes); j++) {
            if ( dst[j] != (long) j ) {
                fprintf(stderr,
                    "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j);
                return 1;
            }
        }
		shmem_barrier_all();

		if (Verbose && mpe == 0 && loops > 1) {
			fprintf(stderr,".");
		}
        nWords += nIncr;
	}

    if (Verbose && mpe == 0) {
	    fprintf(stderr,"\n");fflush(stderr);
    }
    shfree( (void*)src );
	shmem_barrier_all();
	if (Verbose)
        printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures);

	return failures;
}
Esempio n. 4
0
int
main(int argc, char **argv)
{
	int i,ps,ps_cnt=2;
	int *target;
	int *source;
	int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
    char *pgm;
	double start_time, time_taken;

	start_pes(0);
	me = _my_pe();
	npes = _num_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) {
        switch (i)
        {
          case 'v':
              Verbose++;
              break;
          case 'e':
              if ((elements = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                  return 1;
              }
              break;
          case 'l':
              if ((loops = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                  return 1;
              }
              break;
          case 'p':
              if ((ps_cnt = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops);
                  return 1;
              }
              break;
          case 's':
              Serialize++;
              break;
          case 'h':
              if (me == 0)
                  usage(pgm);
              return 0;
          default:
              if (me == 0) {
                  fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                  usage(pgm);
              }
              return 1;
        }
    }

	ps_cnt *= _SHMEM_BCAST_SYNC_SIZE;
	pSync = shmalloc( ps_cnt * sizeof(long) );

	for (i = 0; i < ps_cnt; i++)
	  pSync[i] = _SHMEM_SYNC_VALUE;

	source = (int *) shmalloc( elements * sizeof(*source) );

	target = (int *) shmalloc( elements * sizeof(*target) );
	for (i = 0; i < elements; i += 1) {
	    source[i] = i + 1;
	    target[i] = -90;
	}

    if (me==0 && Verbose)
        fprintf(stderr,"ps_cnt %d loops %d nElems %d\n",
                        ps_cnt,loops,elements);

	shmem_barrier_all();

	for(time_taken = 0.0, ps = i = 0; i < loops; i++) {

	    start_time = shmem_wtime();

	    shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]);

        if (Serialize) shmem_barrier_all();

	    time_taken += (shmem_wtime() - start_time);

        if (ps_cnt > 1 ) {
	        ps += _SHMEM_BCAST_SYNC_SIZE;
	        if ( ps >= ps_cnt ) ps = 0;
        }
	}

	if(me == 0 && Verbose) {
        printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n",
            loops, (elements*sizeof(*source)), npes, time_taken);
        elements = (elements * loops * sizeof(*source)) / (1024*1024);
        printf("  %7.5f secs per broadcast() @ %7.4f MB/sec\n",
               (time_taken/(double)loops), ((double)elements / time_taken) );
    }

    if (Verbose > 1)  fprintf(stderr,"[%d] pre B1\n",me);

	shmem_barrier_all();

    if (Verbose > 1)  fprintf(stderr,"[%d] post B1\n",me);

	shfree(pSync);
	shfree(target);
	shfree(source);

	return 0;
}
Esempio n. 5
0
int
main(int argc, char **argv)
{
    int loops=DFLT_LOOPS;
    char *pgm;
    int *Target;
    int *Source;
    int i, me, npes;
    int target_pe;
    long bytes;
    double time_taken=0.0, start_time;

    shmem_init();
    me = shmem_my_pe();
    npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    while ((i = getopt (argc, argv, "hve:l:st")) != EOF) {
        switch (i)
        {
            case 'v':
                Verbose++;
                break;
            case 'e':
                if ((elements = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 'l':
                if ((loops = atoi_scaled(optarg)) <= 0) {
                    fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                    shmem_finalize();
                    return 1;
                }
                break;
            case 's':
                Sync++;
                break;
            case 't':
                Track++;
                break;
            case 'h':
                if (me == 0)
                    usage(pgm);
                return 0;
            default:
                if (me == 0) {
                    fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                    usage(pgm);
                }
                shmem_finalize();
                return 1;
        }
    }

    target_pe = (me+1) % npes;

    total_time = (double *) shmem_malloc( npes * sizeof(double) );
    if (!total_time) {
        fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n",
                (elements * sizeof(double)));
        shmem_global_exit(1);
    }

    Source = (int *) shmem_malloc( elements * sizeof(*Source) );
    if (!Source) {
        fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    Target = (int *) shmem_malloc( elements * sizeof(*Target) );
    if (!Target) {
        fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n",
                (elements * sizeof(*Target)));
        shmem_free(Source);
        shmem_free(total_time);
        shmem_global_exit(1);
    }

    for (i = 0; i < elements; i++) {
        Target[i] = -90;
        Source[i] = i + 1;
    }

    bytes = loops * sizeof(int) * elements;

    if (Verbose && me==0)
        fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n",
                pgm, loops, elements);

    shmem_barrier_all();

    for(i=0; i < loops; i++) {

        start_time = shmemx_wtime();

        shmem_int_get( Target, Source, elements, target_pe );

        time_taken += shmemx_wtime() - start_time;

        if (me==0) {
            if ( Track && i > 0 && ((i % 200) == 0))
                fprintf(stderr,".%d",i);
        }
        if (Sync)
            shmem_barrier_all();
    }

    // collect time per node elapsed time.
    shmem_double_put( &total_time[me], &time_taken, 1, 0 );

    shmem_barrier_all();

    for (i = 0; i < elements; i++) {
        if (Target[i] != i + 1) {
            printf("%d: Error Target[%d] = %d, expected %d\n",
                   me, i, Target[i], i + 1);
            shmem_global_exit(1);
        }
    }

    if ( Track && me == 0 )
        fprintf(stderr,"\n");

    if (Verbose && me == 0) {
        double rate,secs;

        // average time
        for(i=0,secs=0.0; i < npes; i++)
            secs += total_time[i];
        secs /= (double)npes;
        rate = ((double)bytes/(1024.0*1024.0)) / secs;
        printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n",
               pgm, rate, bytes, secs);
    }

    shmem_free(total_time);
    shmem_free(Target);
    shmem_free(Source);

    shmem_finalize();

    return 0;
}
Esempio n. 6
0
int
main(int argc, char **argv)
{
    int i;
	int *target;
	int *source;
	int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS;
    char *pgm;

	shmem_init();
	me = shmem_my_pe();
	npes = shmem_n_pes();

    if ((pgm=strrchr(argv[0],'/')))
        pgm++;
    else
        pgm = argv[0];

    /* lower-case switch enable only a specific test; otherwise run all tests */
    while ((i = getopt (argc, argv, "hvqe:l:abcmn")) != EOF) {
        switch (i)
        {
          case 'a':
              All2++;
              break;
          case 'b':
              Bcast++;
              break;
          case 'c':
              Collect++;
              break;
          case 'm':
              Many++;
              break;
          case 'n':
              Neighbor++;
              break;
          case 'q':
              Verbose=0;
              break;
          case 'v':
              Verbose++;
              break;
          case 'e':
              if ((elements = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad elements count %d\n",elements);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'l':
              if ((loops = atoi_scaled(optarg)) <= 0) {
                  fprintf(stderr,"ERR: Bad loop count %d\n",loops);
                  shmem_finalize();
                  return 1;
              }
              break;
          case 'h':
              if (me == 0)
                  usage(pgm);
              shmem_finalize();
              return 0;
          default:
              if (me == 0) {
                  fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i);
                  usage(pgm);
              }
              shmem_finalize();
              return 1;
        }
    }

    if (All2==0 && Bcast==0 && Collect==0 && Many==0 && Neighbor==0)
        All2 = Bcast = Collect = Many = Neighbor = 1;

	source = (int *) shmem_malloc( elements * sizeof(*source) );
	target = (int *) shmem_malloc( elements * sizeof(*target) );

	for (i = 0; i < elements; i += 1) {
	    source[i] = i + 1;
	    target[i] = -90;
	}

	shmem_barrier_all();

    if (Neighbor) {
        neighbor_put( target, source, elements, me, npes, loops );
        neighbor_get( target, source, elements, me, npes, loops );
    }

    if (All2) {
        all2all_put( target, source, elements, me, npes, loops );
        all2all_get( target, source, elements, me, npes, loops );
    }

    if (Many) {
        one2many_put( target, source, elements, me, npes, loops );
        many2one_get( target, source, elements, me, npes, loops );
    }

    if (Bcast) bcast( target, source, elements, me, npes, loops );

    if (Collect) {
        collect( NULL, source, elements, me, npes, loops );
        fcollect( NULL, source, elements, me, npes, loops );
    }

	shmem_barrier_all();

	shmem_free(target);
	shmem_free(source);

	shmem_finalize();

	return 0;
}