int main(void) { int i, me, npes; int errors = 0; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); for (i = 0; i < NELEM; i++) { src[i] = me; dst_max[i] = -1; dst_min[i] = -1; } for (i = 0; i < SHMEM_REDUCE_SYNC_SIZE; i++) { max_psync[i] = SHMEM_SYNC_VALUE; max_psync[i] = SHMEM_SYNC_VALUE; } if (me == 0) printf("Shrinking active set test\n"); shmem_barrier_all(); /* A total of npes tests are performed, where the active set in each test * includes PEs i..npes-1 */ for (i = 0; i <= me; i++) { int j; if (me == i) printf(" + PE_start=%d, logPE_stride=0, PE_size=%d\n", i, npes-i); shmem_long_max_to_all(dst_max, src, NELEM, i, 0, npes-i, max_pwrk, max_psync); /* Validate reduced data */ for (j = 0; j < NELEM; j++) { long expected = npes-1; if (dst_max[j] != expected) { printf("%d: Max expected dst_max[%d] = %ld, got dst_max[%d] = %ld, iteration %d\n", me, j, expected, j, dst_max[j], i); errors++; } } shmem_long_min_to_all(dst_min, src, NELEM, i, 0, npes-i, min_pwrk, min_psync); /* Validate reduced data */ for (j = 0; j < NELEM; j++) { long expected = i; if (dst_min[j] != expected) { printf("%d: Min expected dst_min[%d] = %ld, got dst_min[%d] = %ld, iteration %d\n", me, j, expected, j, dst_min[j], i); errors++; } } } shmem_finalize(); return errors != 0; }
int main(int argc, char **argv) { int i,j,iter; int my_pe,n_pes; int *flag,*one; size_t max_elements,max_elements_bytes; size_t elements[16] = {1,2,4,8,12,16,24,32,64,128,256,512,1024,2048,4096,8192}; int num_elements = 16; short *srce_short,*targ_short; int *srce_int,*targ_int; long *srce_long,*targ_long; float *srce_float,*targ_float; double *srce_double,*targ_double; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); flag = shmem_malloc((size_t) sizeof(int)); one = shmem_malloc((size_t) sizeof(int)); *one = 1; /* fail if trying to use odd number of processors */ if ( (n_pes % 2) != 0 ){ fprintf(stderr, "FAIL - test requires even number of PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_both_put_nb_size(%s)\n", argv[0]); /* alloc arrays */ max_elements = (size_t) (MAX_SIZE / sizeof(int)); max_elements_bytes = (size_t) (sizeof(int)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_int_put_nb max_elements = %d\n",max_elements); srce_int = shmem_malloc(max_elements_bytes); targ_int = shmem_malloc(max_elements_bytes); if((srce_int == NULL) || (targ_int == NULL)) shmalloc_error(); max_elements = (size_t) (MAX_SIZE / sizeof(short)); max_elements_bytes = (size_t) (sizeof(short)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_short_put max_elements = %d\n",max_elements); srce_short = shmem_malloc(max_elements_bytes); targ_short = shmem_malloc(max_elements_bytes); if((srce_short == NULL) || (targ_short == NULL)) shmalloc_error(); max_elements = (size_t) (MAX_SIZE / sizeof(long)); max_elements_bytes = (size_t) (sizeof(long)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_long_put_nb max_elements = %d\n",max_elements); srce_long = shmem_malloc(max_elements_bytes); targ_long = shmem_malloc(max_elements_bytes); if((srce_long == NULL) || (targ_long == NULL)) shmalloc_error(); max_elements = (size_t) (MAX_SIZE / sizeof(float)); max_elements_bytes = (size_t) (sizeof(float)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_float_put_nb max_elements = %d\n",max_elements); srce_float = shmem_malloc(max_elements_bytes); targ_float = shmem_malloc(max_elements_bytes); if((srce_float == NULL) || (targ_float == NULL)) shmalloc_error(); max_elements = (size_t) (MAX_SIZE / sizeof(double)); max_elements_bytes = (size_t) (sizeof(double)*max_elements); if(my_pe == 0) fprintf(stderr,"shmem_double_put_nb max_elements = %d\n",max_elements); srce_double = shmem_malloc(max_elements_bytes); targ_double = shmem_malloc(max_elements_bytes); if((srce_double == NULL) || (targ_double == NULL)) shmalloc_error(); if(my_pe == 0) fprintf(stderr,"Actual value used for max_elements = %d\n",max_elements); /* try the different sizes MAX_ITER times */ for (iter = 0; iter < MAX_ITER; iter++) { for (i = 0; i < num_elements; i++) { *flag = 0; if (elements[i] <= max_elements) { if ( (my_pe % 2) == 0 ) for(j = 0; j < elements[i]; j++) { srce_short[j] = (short)(my_pe+j); srce_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j); srce_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j); srce_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j); srce_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j); } else for(j = 0; j < elements[i]; j++) { targ_short[j] = (short)(my_pe+j); targ_int[j] = (int)(iter*10000+elements[i]*100+my_pe+j); targ_long[j] = (long)(iter*10000+elements[i]*100+my_pe+j); targ_float[j] = (float)(iter*10000+elements[i]*100+my_pe+j); targ_double[j] = (double)(iter*10000+elements[i]*100+my_pe+j); } shmem_barrier_all(); if ( (my_pe % 2) == 0 ) { #ifndef OPENSHMEM shmemx_int_put_nb(targ_int,srce_int,elements[i],my_pe+1,NULL); shmemx_long_put_nb(targ_long,srce_long,elements[i],my_pe+1,NULL); shmemx_float_put_nb(targ_float,srce_float,elements[i],my_pe+1,NULL); shmemx_double_put_nb(targ_double,srce_double,elements[i],my_pe+1,NULL); #else shmem_int_put_nbi(targ_int,srce_int,elements[i],my_pe+1); shmem_long_put_nbi(targ_long,srce_long,elements[i],my_pe+1); shmem_float_put_nbi(targ_float,srce_float,elements[i],my_pe+1); shmem_double_put_nbi(targ_double,srce_double,elements[i],my_pe+1); #endif /* this one is blocking */ shmem_short_put(targ_short,srce_short,elements[i],my_pe+1); shmem_quiet(); shmem_int_put(flag,one,(size_t)1,my_pe+1); } else { shmem_int_wait(flag,0); for(j = 0; j < elements[i]; j++) { if ( targ_short[j] != (short)(my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] iter=%d i=%d targ_short[%d]=%d not equal %d\n", my_pe,iter,i,j,targ_short[j],my_pe+j-1); if ( targ_int[j] != (int)(iter*10000+elements[i]*100+my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] iter=%d i=%d targ_int[%d]=%d not equal %d\n", my_pe,iter,i,j,targ_int[j],iter*10000+elements[i]*100+my_pe+j-1); if ( targ_long[j] != (long)(iter*10000+elements[i]*100+my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%d not equal %d\n", my_pe,iter,i,j,targ_long[j],iter*10000+elements[i]*100+my_pe+j-1); if ( targ_float[j] != (float)(iter*10000+elements[i]*100+my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] iter=%d i=%d targ_long[%d]=%f not equal %d\n", my_pe,iter,i,j,targ_float[j],iter*10000+elements[i]*100+my_pe+j-1); if ( targ_double[j] != (double)(iter*10000+elements[i]*100+my_pe+j-1) ) fprintf(stderr, "FAIL: PE [%d] iter=%d i=%d targ_double[%d]=%f not equal %d\n", my_pe,iter,i,j,targ_double[j],iter*10000+elements[i]*100+my_pe+j-1); } } } } } shmem_free(srce_short); shmem_free(targ_short); shmem_free(srce_int); shmem_free(targ_int); shmem_free(srce_long); shmem_free(targ_long); shmem_free(srce_float); shmem_free(targ_float); shmem_free(srce_double); shmem_free(targ_double); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main(int argc, char** argv) { #ifndef SEQUENTIAL TM2C_INIT; #else SEQ_INIT; #endif struct option long_options[] = { // These options don't set a flag {"help", no_argument, NULL, 'h'}, {"verbose", no_argument, NULL, 'v'}, {"duration", required_argument, NULL, 'd'}, {"initial-size", required_argument, NULL, 'i'}, {"range", required_argument, NULL, 'r'}, {"update-rate", required_argument, NULL, 'u'}, {"elasticity", required_argument, NULL, 'x'}, {"effective", required_argument, NULL, 'f'}, {NULL, 0, NULL, 0} }; intset_t* set; int i, c, size; val_t last = 0; val_t val = 0; thread_data_t* data; double duration = DEFAULT_DURATION; int initial = DEFAULT_INITIAL; int nb_app_cores = NUM_APP_NODES; #if defined(SEQUENTIAL) nb_app_cores = 1; #endif long range = DEFAULT_RANGE; int update = DEFAULT_UPDATE; int unit_tx = DEFAULT_ELASTICITY; int alternate = DEFAULT_ALTERNATE; int effective = DEFAULT_EFFECTIVE; int verbose = DEFAULT_VERBOSE; unsigned int seed = 0; while (1) { i = 0; c = getopt_long(argc, argv, "hAf:d:i:r:u:x:v", long_options, &i); if (c == -1) break; if (c == 0 && long_options[i].flag == 0) c = long_options[i].val; switch (c) { case 0: /* Flag is automatically set */ break; case 'h': ONCE { printf("intset -- STM stress test " "(linked list)\n" "\n" "Usage:\n" " intset [options...]\n" "\n" "Options:\n" " -h, --help\n" " Print this message\n" " -A, --alternate (default="XSTR(DEFAULT_ALTERNATE)")\n" " Consecutive insert/remove target the same value\n" " -f, --effective <int>\n" " update txs must effectively write (0=trial, 1=effective, default=" XSTR(DEFAULT_EFFECTIVE) ")\n" " -d, --duration secs<double>\n" " Test duration in milliseconds (0=infinite, default=" XSTR(DEFAULT_DURATION) ")\n" " -i, --initial-size <int>\n" " Number of elements to insert before test (default=" XSTR(DEFAULT_INITIAL) ")\n" " -r, --range <int>\n" " Range of integer values inserted in set (default=" XSTR(DEFAULT_RANGE) ")\n" " -u, --update-rate <int>\n" " Percentage of update transactions (default=" XSTR(DEFAULT_UPDATE) ")\n" " -v , --verbose\n" " Print detailed stats" ); } goto end; case 'A': alternate = 1; break; case 'f': effective = atoi(optarg); break; case 'd': duration = atof(optarg); break; case 'i': initial = atoi(optarg); break; case 'r': range = atol(optarg); break; case 'u': update = atoi(optarg); break; case 'x': unit_tx = atoi(optarg); break; case 'v': verbose = 1; break; case '?': ONCE { printf("Use -h or --help for help\n"); } default: goto end; } } if (seed == 0) { srand_core(); seed = rand_range((NODE_ID() + 17) * 123); srand(seed); } else srand(seed); assert(duration >= 0); assert(initial >= 0); assert(nb_app_cores > 0); assert(range > 0 && range >= initial); assert(update >= 0 && update <= 100); ONCE { printf("Bench type : linked list\n"); #ifdef SEQUENTIAL printf(" sequential\n"); #elif defined(EARLY_RELEASE ) printf(" using early-release\n"); #elif defined(READ_VALIDATION) printf(" using read-validation\n"); #endif #ifdef LOCKS printf(" with locks\n"); #endif printf("Duration : %f\n", duration); printf("Initial size : %d\n", initial); printf("Nb cores : %d\n", nb_app_cores); printf("Value range : %ld\n", range); printf("Update rate : %d\n", update); printf("Elasticity : %d\n", unit_tx); printf("Alternate : %d\n", alternate); printf("Effective : %d\n", effective); FLUSH; } if ((data = (thread_data_t*) malloc(sizeof (thread_data_t))) == NULL) { perror("malloc"); exit(1); } set = set_new(); BARRIER; ONCE { /* Populate set */ /* printf("Adding %d entries to set\n", initial); */ i = 0; while (i < initial) { val = rand_range(range); if (set_add(set, val, 0)) { last = val; i++; } } size = set_size(set); /* set_print(set); */ printf("Set size : %d\n", size); assert(size == initial); FLUSH } shmem_init(10 * 1024 * (NODE_ID()-1) * sizeof (node_t) + ((initial + 2) * sizeof (node_t))); /* Access set from all threads */ data->first = last; data->range = range; data->update = update; data->unit_tx = unit_tx; data->alternate = alternate; data->effective = effective; data->nb_add = 0; data->nb_added = 0; data->nb_remove = 0; data->nb_removed = 0; data->nb_contains = 0; data->nb_found = 0; data->set = set; data->seed = seed; BARRIER; /* Start */ test(data, duration); if (verbose) { APP_EXEC_ORDER { printf("-- Core %d\n", NODE_ID()); printf(" #add : %lu\n", data->nb_add); printf(" #added : %lu\n", data->nb_added); printf(" #remove : %lu\n", data->nb_remove); printf(" #removed : %lu\n", data->nb_removed); printf(" #contains : %lu\n", data->nb_contains); printf(" #found : %lu\n", data->nb_found); printf("---------------------------------------------------"); FLUSH; } APP_EXEC_ORDER_END; } /* Delete set */ BARRIER; ONCE { int size_after = set_size(set); /* set_print(set); */ printf("Set size (af): %u\n", size_after); } BARRIER; #ifdef SEQUENTIAL int total_ops = data->nb_add + data->nb_contains + data->nb_remove; printf("#Ops : %d\n", total_ops); printf("#Ops/s : %d\n", (int) (total_ops / duration__)); printf("#Latency : %f\n", duration__ / total_ops); FLUSH; #endif //set_delete(set); /* Cleanup STM */ free(data); BARRIER; end: #ifndef SEQUENTIAL TM_END; #endif EXIT(0); }
int main(void) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short dest1[N]; int dest2[N]; long dest3[N]; long double dest4[N]; long long dest5[N]; double dest6[N]; float dest7[N]; char *dest8; short dest9; int dest10; long dest11; double dest12; float dest13; short *src1; int *src2; long *src3; long double *src4; long long *src5; double *src6; float *src7; char *src8; short *src9; int *src10; long *src11; double *src12; float *src13; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; dest8 = (char *) malloc(N * sizeof(char)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } dest9 = -9; dest10 = -9; dest11 = -9; dest12 = -9; dest13 = -9; src1 = (short *) shmem_malloc(N * sizeof(*src1)); src2 = (int *) shmem_malloc(N * sizeof(*src2)); src3 = (long *) shmem_malloc(N * sizeof(*src3)); src4 = (long double *) shmem_malloc(N * sizeof(*src4)); src5 = (long long *) shmem_malloc(N * sizeof(*src5)); src6 = (double *) shmem_malloc(N * sizeof(*src6)); src7 = (float *) shmem_malloc(N * sizeof(*src7)); src8 = (char *) shmem_malloc(4 * sizeof(*src8)); src9 = (short *) shmem_malloc(sizeof(*src9)); src10 = (int *) shmem_malloc(sizeof(*src10)); src11 = (long *) shmem_malloc(sizeof(*src11)); src12 = (double *) shmem_malloc(sizeof(*src12)); src13 = (float *) shmem_malloc(sizeof(*src13)); for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } *src9 = (short) me; *src10 = me; *src11 = (long) me; *src12 = (double) me; *src13 = (float) me; nextpe = (me + 1) % npes; /* Testing shmem_short_get, shmem_short_get, shmem_int_get, shmem_long_get, shmem_longdouble_get, shmem_longlong_get, shmem_double_get, shmem_float_get, shmem_getmem */ shmem_barrier_all(); shmem_short_get(dest1, src1, N, nextpe); shmem_int_get(dest2, src2, N, nextpe); shmem_long_get(dest3, src3, N, nextpe); shmem_longdouble_get(dest4, src4, N, nextpe); shmem_longlong_get(dest5, src5, N, nextpe); shmem_double_get(dest6, src6, N, nextpe); shmem_float_get(dest7, src7, N, nextpe); shmem_getmem(dest8, src8, N * sizeof(char), nextpe); shmem_barrier_all(); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } if (dest5[i] != (1)) { success5 = 1; } if (dest6[i] != (1)) { success6 = 1; } if (dest7[i] != (1)) { success7 = 1; } if (dest8[i] != (1)) { success8 = 1; } } if (success1 == 0) printf("Test shmem_short_get: Passed\n"); else printf("Test shmem_short_get: Failed\n"); if (success2 == 0) printf("Test shmem_int_get: Passed\n"); else printf("Test shmem_int_get: Failed\n"); if (success3 == 0) printf("Test shmem_long_get: Passed\n"); else printf("Test shmem_long_get: Failed\n"); if (success4 == 0) printf("Test shmem_longdouble_get: Passed\n"); else printf("Test shmem_longdouble_get: Failed\n"); if (success5 == 0) printf("Test shmem_longlong_get: Passed\n"); else printf("Test shmem_longlong_get: Failed\n"); if (success6 == 0) printf("Test shmem_double_get: Passed\n"); else printf("Test shmem_double_get: Failed\n"); if (success7 == 0) printf("Test shmem_float_get: Passed\n"); else printf("Test shmem_float_get: Failed\n"); if (success8 == 0) printf("Test shmem_getmem: Passed\n"); else printf("Test shmem_getmem: Failed\n"); } shmem_barrier_all(); /* Testing shmem_get32, shmem_get64, shmem_get128 */ if (sizeof(int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all(); shmem_get32(dest2, src2, N, nextpe); shmem_get64(dest3, src3, N, nextpe); shmem_get128(dest4, src4, N, nextpe); shmem_barrier_all(); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } } if (success2 == 0) printf("Test shmem_get32: Passed\n"); else printf("Test shmem_get32: Failed\n"); if (success3 == 0) printf("Test shmem_get64: Passed\n"); else printf("Test shmem_get64: Failed\n"); if (success4 == 0) printf("Test shmem_get128: Passed\n"); else printf("Test shmem_get128: Failed\n"); } } else if (sizeof(int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all(); shmem_get32(dest1, src1, N, nextpe); shmem_get64(dest2, src2, N, nextpe); shmem_get128(dest3, src3, N, nextpe); shmem_barrier_all(); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } } if (success1 == 0) printf("Test shmem_get32: Passed\n"); else printf("Test shmem_get32: Failed\n"); if (success2 == 0) printf("Test shmem_get64: Passed\n"); else printf("Test shmem_get64: Failed\n"); if (success3 == 0) printf("Test shmem_get128: Passed\n"); else printf("Test shmem_get128: Failed\n"); } } /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g, shmem_short_g */ shmem_barrier_all(); dest9 = shmem_short_g(src9, nextpe); dest10 = shmem_int_g(src10, nextpe); dest11 = shmem_long_g(src11, nextpe); dest12 = shmem_double_g(src12, nextpe); dest13 = shmem_float_g(src13, nextpe); shmem_barrier_all(); if (me == 0) { if (dest9 == 1) printf("Test shmem_short_g: Passed\n"); else printf("Test shmem_short_g: Failed\n"); if (dest10 == 1) printf("Test shmem_int_g: Passed\n"); else printf("Test shmem_int_g: Failed\n"); if (dest11 == 1) printf("Test shmem_long_g: Passed\n"); else printf("Test shmem_long_g: Failed\n"); if (dest12 == 1) printf("Test shmem_double_g: Passed\n"); else printf("Test shmem_double_g: Failed\n"); if (dest13 == 1) printf("Test shmem_float_g: Passed\n"); else printf("Test shmem_float_g: Failed\n"); } shmem_barrier_all(); shmem_free(src1); shmem_free(src2); shmem_free(src3); shmem_free(src4); shmem_free(src5); shmem_free(src6); shmem_free(src7); shmem_free(src8); } else { printf("Number of PEs must be > 1 to test shmem get, test skipped\n"); } shmem_finalize(); return 0; }
int main(int argc, char **argv) { int i,j; long modj,oldj,oldxmodj,oldxa; int my_pe,n_pes; size_t max_elements,max_elements_bytes; static long *x,*xa; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_long_swap(%s) n_pes=%d\n", argv[0],n_pes); /* shmalloc x & xa on all pes (only use the ones on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); x = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) x[i] = 0; max_elements_bytes = (size_t) (sizeof(long) * n_pes * ITER); xa = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes*ITER; i++) xa[i] = 0; count = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { oldj = shmem_long_finc(&count, 0); /* get index oldj from PE 0 */ modj = (oldj % (n_pes-1)); /* PE 0 is just the counter/checker */ /* record PE value in x[modj] */ oldxmodj = shmem_long_swap(&x[modj], my_pe, 0); /* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */ /* record PE value in xa[oldj] -- tells PE involved for each count */ oldxa = shmem_long_swap(&xa[oldj], my_pe, 0); /* printf("PE=%d,i=%d,oldj=%ld,oldxa=%ld\n",my_pe,i,oldj,oldxa); */ if (oldxa != 0) fprintf(stderr, "FAIL PE %d of %d: i=%d, oldxa = %ld expected = 0\n", my_pe, n_pes, i, oldxa); } } shmem_barrier_all(); if (my_pe == 0) { /* check last x[j] array PEs vs saved ans in xa[i] */ i = (ITER-1)*(n_pes-1); for(j=1 ; j<n_pes; j++) { printf("j=%d,x[%d]=%ld,xa[%d]=%ld\n",j,j-1,x[j-1],i,xa[i]); if (x[j-1] != xa[i]) fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, x[j-1], ITER); i++; } } shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main(int argc, char **argv) { int me, nproc; int c, all_ops = 1; int T = 0, S = 0, P = 0; const int DEFAULT_ITR = 7; int iterations = DEFAULT_ITR; shmem_init(); me = shmem_my_pe(); nproc = shmem_n_pes(); memset(target, -1, NUM_WRITE * sizeof(int)); memset(source, -1, NUM_READ * sizeof(int)); memset(sync_pes, -1, NUM_SYNC * sizeof(int)); shmem_barrier_all(); if (nproc != 2) { if (me == 0) { fprintf(stderr, "This is a micro test and is only " "intended to run on exactly two processes you" " are using %d\n", nproc); } shmem_finalize(); return 0; } while ((c = getopt(argc, argv, "i:vdpgaAscfFh")) != -1) { switch (c) { case 'i': iterations = atoi(optarg); assert(iterations > 0); all_ops += 2; break; case 'v': verbose = 1; all_ops++; break; case 'd': debug = 1; break; case 'p': putfence(me, iterations, T++); break; case 'g': gettest(me, iterations, T++, S++, P++); break; case 'a': atomic_add(me, iterations, T++); break; case 'A': atomic_inc(me, iterations, T++); break; case 's': swaptest(me, iterations, T++, S++, P++); break; case 'c': cswaptest(me, iterations, T++, S++, P++); break; case 'f': fetchatomic_add(me, iterations, T++, S++); break; case 'F': fetchatomic_inc(me, iterations, T++, S++); break; case 'h': default: if (me == 0) { fprintf(stderr, "input options:\n 1) single" " argument option will run all tests by default" "and additionally request: -v (verbose) | " "-i <number of interations>\n"); fprintf(stderr, " 2) two argument options " "choose any combination of the following " "to run individual tests: -i <iterations>, -v" ", -d, -p, -g, -a, -A, -s, -c, -f, -F, -h\n"); } shmem_finalize(); return 1; } } if (argc == all_ops || argc == 1) { putfence(me, iterations, T++); gettest(me, iterations, T++, S++, P++); atomic_add(me, iterations, T++); atomic_inc(me, iterations, T++); swaptest(me, iterations, T++, S++, P++); cswaptest(me, iterations, T++, S++, P++); fetchatomic_add(me, iterations, T++, S++); fetchatomic_inc(me, iterations, T++, S++); } if (verbose) { if (me == 1) printf("PE 1: PASS: %8d iterations\n", iterations); else printf("PE 0 Successful exit\n"); } shmem_finalize(); return 0; }
int main (void) { int i; int *target; int *source; int me, npes; struct timeval start, end; long time_taken, start_time, end_time; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); source = (int *) shmem_malloc (N_ELEMENTS * sizeof (*source)); time_taken = 0; for (i = 0; i < N_ELEMENTS; i += 1) { source[i] = (i + 1) * 10 + me; } target = (int *) shmem_malloc (N_ELEMENTS * sizeof (*target) * npes); for (i = 0; i < N_ELEMENTS * npes; i += 1) { target[i] = -90; } for (i = 0; i < _SHMEM_COLLECT_SYNC_SIZE; i += 1) { pSyncA[i] = _SHMEM_SYNC_VALUE; pSyncB[i] = _SHMEM_SYNC_VALUE; } shmem_barrier_all (); for (i = 0; i < 10000; i++) { gettimeofday (&start, NULL); start_time = (start.tv_sec * 1000000.0) + start.tv_usec; /* alternate between 2 pSync arrays to synchronize consequent collectives of even and odd iterations */ if (i % 2) { shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncA); } else { shmem_fcollect32 (target, source, N_ELEMENTS, 0, 0, npes, pSyncB); } gettimeofday (&end, NULL); end_time = (end.tv_sec * 1000000.0) + end.tv_usec; if (me == 0) { time_taken = time_taken + (end_time - start_time); } } if (me == 0) { printf ("Time required to collect %d bytes of data, with %d PEs is %ld microseconds\n", (4 * N_ELEMENTS * npes), npes, time_taken / 10000); } shmem_barrier_all (); shmem_free (target); shmem_free (source); shmem_finalize (); return 0; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short dest1[N]; int dest2[N]; long dest3[N]; long double dest4[N]; long long dest5[N]; double dest6[N]; float dest7[N]; char *dest8; short dest9; int dest10; long dest11; double dest12; float dest13; int fail_count = 0; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; dest8 = (char *) malloc (N * sizeof (char)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } dest9 = -9; dest10 = -9; dest11 = -9; dest12 = -9; dest13 = -9; for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } src9 = (short) me; src10 = me; src11 = (long) me; src12 = (double) me; src13 = (float) me; nextpe = (me + 1) % npes; /* Testing shmem_short_get, shmem_short_get, shmem_int_get, shmem_long_get, shmem_longdouble_get, shmem_longlong_get, shmem_double_get, shmem_float_get, shmem_getmem */ shmem_barrier_all (); shmem_short_get (dest1, src1, N, nextpe); shmem_int_get (dest2, src2, N, nextpe); shmem_long_get (dest3, src3, N, nextpe); shmem_longdouble_get (dest4, src4, N, nextpe); shmem_longlong_get (dest5, src5, N, nextpe); shmem_double_get (dest6, src6, N, nextpe); shmem_float_get (dest7, src7, N, nextpe); shmem_getmem (dest8, src8, N * sizeof (char), nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } if (dest5[i] != (1)) { success5 = 1; } if (dest6[i] != (1)) { success6 = 1; } if (dest7[i] != (1)) { success7 = 1; } if (dest8[i] != (1)) { success8 = 1; } } if (success1 == 0) printf ("Test shmem_short_get: Passed\n"); else { printf ("Test shmem_short_get: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_int_get: Passed\n"); else { printf ("Test shmem_int_get: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_long_get: Passed\n"); else { printf ("Test shmem_long_get: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_longdouble_get: Passed\n"); else { printf ("Test shmem_longdouble_get: Failed\n"); fail_count++; } if (success5 == 0) printf ("Test shmem_longlong_get: Passed\n"); else { printf ("Test shmem_longlong_get: Failed\n"); fail_count++; } if (success6 == 0) printf ("Test shmem_double_get: Passed\n"); else { printf ("Test shmem_double_get: Failed\n"); fail_count++; } if (success7 == 0) printf ("Test shmem_float_get: Passed\n"); else { printf ("Test shmem_float_get: Failed\n"); fail_count++; } if (success8 == 0) printf ("Test shmem_getmem: Passed\n"); else { printf ("Test shmem_getmem: Failed\n"); fail_count++; } } shmem_barrier_all (); /* Testing shmem_get32, shmem_get64, shmem_get128 */ if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_get32 (dest2, src2, N, nextpe); shmem_get64 (dest3, src3, N, nextpe); shmem_get128 (dest4, src4, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } if (dest4[i] != (1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_get32: Passed\n"); else { printf ("Test shmem_get32: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_get64: Passed\n"); else { printf ("Test shmem_get64: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_get128: Passed\n"); else { printf ("Test shmem_get128: Failed\n"); fail_count++; } } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_get32 (dest1, src1, N, nextpe); shmem_get64 (dest2, src2, N, nextpe); shmem_get128 (dest3, src3, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (1)) { success1 = 1; } if (dest2[i] != (1)) { success2 = 1; } if (dest3[i] != (1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_get32: Passed\n"); else { printf ("Test shmem_get32: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_get64: Passed\n"); else { printf ("Test shmem_get64: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_get128: Passed\n"); else { printf ("Test shmem_get128: Failed\n"); fail_count++; } } } /* Testing shmem_iget32, shmem_iget64, shmem_iget128 */ shmem_barrier_all (); if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_iget32 (dest2, src2, 1, 2, N / 2, npes - 1); shmem_iget64 (dest3, src3, 1, 2, N / 2, npes - 1); shmem_iget128 (dest4, src4, 1, 2, N / 2, npes - 1); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_iget32: Passed\n"); else { printf ("Test shmem_iget32: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_iget64: Passed\n"); else { printf ("Test shmem_iget64: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_iget128: Passed\n"); else { printf ("Test shmem_iget128: Failed\n"); fail_count++; } } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_iget32 (dest1, src1, 1, 2, N / 2, npes - 1); shmem_iget64 (dest2, src2, 1, 2, N / 2, npes - 1); shmem_iget128 (dest3, src3, 1, 2, N / 2, npes - 1); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_iget32: Passed\n"); else { printf ("Test shmem_iget32: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_iget64: Passed\n"); else { printf ("Test shmem_iget64: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_iget128: Passed\n"); else { printf ("Test shmem_iget128: Failed\n"); fail_count++; } } } /* Testing shmem_short_iget, shmem_int_iget, shmem_long_iget, shmem_double_iget, shmem_float_iget */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all (); shmem_short_iget (dest1, src1, 1, 2, N / 2, npes - 1); shmem_int_iget (dest2, src2, 1, 2, N / 2, npes - 1); shmem_long_iget (dest3, src3, 1, 2, N / 2, npes - 1); shmem_double_iget (dest6, src6, 1, 2, N / 2, npes - 1); shmem_float_iget (dest7, src7, 1, 2, N / 2, npes - 1); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } } if (success1 == 0) printf ("Test shmem_short_iget: Passed\n"); else { printf ("Test shmem_short_iget: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_int_iget: Passed\n"); else { printf ("Test shmem_int_iget: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_long_iget: Passed\n"); else { printf ("Test shmem_long_iget: Failed\n"); fail_count++; } if (success6 == 0) printf ("Test shmem_double_iget: Passed\n"); else { printf ("Test shmem_double_iget: Failed\n"); fail_count++; } if (success7 == 0) printf ("Test shmem_float_iget: Passed\n"); else { printf ("Test shmem_float_iget: Failed\n"); fail_count++; } } /* Testing shmem_double_g, shmem_float_g, shmem_int_g, shmem_long_g, shmem_short_g */ shmem_barrier_all (); dest9 = shmem_short_g (&src9, nextpe); dest10 = shmem_int_g (&src10, nextpe); dest11 = shmem_long_g (&src11, nextpe); dest12 = shmem_double_g (&src12, nextpe); dest13 = shmem_float_g (&src13, nextpe); shmem_barrier_all (); if (me == 0) { if (dest9 == 1) printf ("Test shmem_short_g: Passed\n"); else { printf ("Test shmem_short_g: Failed\n"); fail_count++; } if (dest10 == 1) printf ("Test shmem_int_g: Passed\n"); else { printf ("Test shmem_int_g: Failed\n"); fail_count++; } if (dest11 == 1) printf ("Test shmem_long_g: Passed\n"); else { printf ("Test shmem_long_g: Failed\n"); fail_count++; } if (dest12 == 1) printf ("Test shmem_double_g: Passed\n"); else { printf ("Test shmem_double_g: Failed\n"); fail_count++; } if (dest13 == 1) printf ("Test shmem_float_g: Passed\n"); else { printf ("Test shmem_float_g: Failed\n"); fail_count++; } } shmem_barrier_all (); if (me == 0) { if (fail_count == 0) printf("All Tests Passed\n"); else printf("%d Tests Failed\n", fail_count); } } else { printf ("Number of PEs must be > 1 to test shmem get, test skipped\n"); } shmem_finalize (); return 0; }
int main(int argc, char **argv) { int i,ps,ps_cnt=2; int *target; int *source; int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; char *pgm; double start_time, time_taken; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:p:s")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 'p': if ((ps_cnt = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad pSync[] elements %d\n",loops); shmem_finalize(); return 1; } break; case 's': Serialize++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } ps_cnt *= _SHMEM_BCAST_SYNC_SIZE; pSync = shmem_malloc( ps_cnt * sizeof(long) ); for (i = 0; i < ps_cnt; i++) pSync[i] = _SHMEM_SYNC_VALUE; source = (int *) shmem_malloc( elements * sizeof(*source) ); target = (int *) shmem_malloc( elements * sizeof(*target) ); for (i = 0; i < elements; i += 1) { source[i] = i + 1; target[i] = -90; } if (me==0 && Verbose) fprintf(stderr,"ps_cnt %d loops %d nElems %d\n", ps_cnt,loops,elements); shmem_barrier_all(); for(time_taken = 0.0, ps = i = 0; i < loops; i++) { start_time = shmemx_wtime(); shmem_broadcast32(target, source, elements, 0, 0, 0, npes, &pSync[ps]); if (Serialize) shmem_barrier_all(); time_taken += (shmemx_wtime() - start_time); if (ps_cnt > 1 ) { ps += _SHMEM_BCAST_SYNC_SIZE; if ( ps >= ps_cnt ) ps = 0; } } if(me == 0 && Verbose) { printf("%d loops of Broadcast32(%ld bytes) over %d PEs: %7.3f secs\n", loops, (elements*sizeof(*source)), npes, time_taken); elements = (elements * loops * sizeof(*source)) / (1024*1024); printf(" %7.5f secs per broadcast() @ %7.4f MB/sec\n", (time_taken/(double)loops), ((double)elements / time_taken) ); } if (Verbose > 1) fprintf(stderr,"[%d] pre B1\n",me); shmem_barrier_all(); if (Verbose > 1) fprintf(stderr,"[%d] post B1\n",me); shmem_free(pSync); shmem_free(target); shmem_free(source); shmem_finalize(); return 0; }
int main(int argc, char **argv) { int i,j; short modjs, oldjs, oldxmodjs, valuejs; int modji, oldji, oldxmodji, valueji; long modjl, oldjl, oldxmodjl, valuejl; long long modjll,oldjll,oldxmodjll, valuejll; int my_pe,n_pes; size_t max_elements,max_elements_bytes; static short *xs; static int *xi; static long *xl; static long long *xll; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_fadd(%s) n_pes=%d\n", argv[0],n_pes); /* test shmem_short_fadd */ #ifdef HAVE_SHORT /* shmalloc xs on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(short) * n_pes); xs = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xs[i] = 0; count_short = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { oldjs = shmem_short_finc(&count_short, 0); /* get index oldjs from PE 0 */ modjs = (oldjs % (n_pes-1)); /* PE 0 is just the counter/checker */ /* add 10 to value in xs[modjs] */ valuejs = (short) 10; oldxmodjs = shmem_short_fadd(&xs[modjs], valuejs, 0); /* printf("PE=%d,oldjs=%d,modjs=%d,oldxmodjs=%d,valuejs=%d\n", my_pe,oldjs,modjs,oldxmodjs,valuejs); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check xs[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (xs[j-1] != 10*ITER) fprintf(stderr, "FAIL PE %d of %d: xs[%d] = %d expected = %d\n", my_pe, n_pes, j-1, xs[j-1], ITER); } } shmem_free(xs); #endif /* test shmem_int_fadd */ /* shmalloc xi on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; count_int = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { oldji = shmem_int_finc(&count_int, 0); /* get index oldji from PE 0 */ modji = (oldji % (n_pes-1)); /* PE 0 is just the counter/checker */ /* add 10 to value in xi[modji] */ valueji = (int) 10; oldxmodji = shmem_int_fadd(&xi[modji], valueji, 0); /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d,valueji=%d\n", my_pe,oldji,modji,oldxmodji,valueji); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check xi[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (xi[j-1] != 10*ITER) fprintf(stderr, "FAIL PE %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j-1, xi[j-1], ITER); } } shmem_free(xi); /* test shmem_long_fadd */ /* shmalloc xl on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; count_long = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { oldjl = shmem_long_finc(&count_long, 0); /* get index oldjl from PE 0 */ modjl = (oldjl % (n_pes-1)); /* PE 0 is just the counter/checker */ /* add 10 to value in xl[modjl] */ valuejl = (long) 10; oldxmodjl = shmem_long_fadd(&xl[modjl], valuejl, 0); /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld,valuejl=%ld\n", my_pe,oldjl,modjl,oldxmodjl,valuejl); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check xl[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (xl[j-1] != 10*ITER) fprintf(stderr, "FAIL PE %d of %d: xl[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, xl[j-1], ITER); } } shmem_free(xl); /* test shmem_longlong_fadd */ #ifdef HAVE_LONG_LONG /* shmalloc xll on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(long long) * n_pes); xll = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xll[i] = 0; count_longlong = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { oldjll = shmem_longlong_finc(&count_longlong, 0); /* get index oldjll from PE 0 */ modjll = (oldjll % (n_pes-1)); /* PE 0 is just the counter/checker */ /* add 10 to value in xll[modjll] */ valuejll = (long long) 10; oldxmodjll = shmem_longlong_fadd(&xll[modjll], valuejll, 0); /* printf("PE=%d,oldjll=%ld,modjll=%ld,oldxmodjll=%ld,valuejll=%ld\n", my_pe,oldjll,modjll,oldxmodjll,valuejll); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check xll[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (xll[j-1] != 10*ITER) fprintf(stderr, "FAIL PE %d of %d: xll[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, xll[j-1], ITER); } } shmem_free(xll); #endif #ifdef SHMEM_C_GENERIC_32 /* test shmem_fadd (GENERIC 32) */ /* shmalloc xi on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(int) * n_pes); xi = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xi[i] = 0; count_int = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { oldji = shmem_finc(&count_int, 0); /* get index oldji from PE 0 */ modji = (oldji % (n_pes-1)); /* PE 0 is just the counter/checker */ /* add 10 to value in xi[modji] */ valueji = (int) 10; oldxmodji = shmem_fadd(&xi[modji], valueji, 0); /* printf("PE=%d,oldji=%d,modji=%d,oldxmodji=%d,valueji=%d\n", my_pe,oldji,modji,oldxmodji,valueji); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check xi[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (xi[j-1] != 10*ITER) fprintf(stderr, "FAIL pe %d of %d: xi[%d] = %d expected = %d\n", my_pe, n_pes, j-1, xi[j-1], ITER); } } shmem_free(xi); #else /* test shmem_fadd (GENERIC 64) */ /* shmalloc xl on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); xl = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) xl[i] = 0; count_long = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) oldjl = shmem_finc(&count_long, 0); /* get index oldjl from PE 0 */ #else oldjl = shmem_long_finc(&count_long, 0); /* get index oldjl from PE 0 */ #endif modjl = (oldjl % (n_pes-1)); /* PE 0 is just the counter/checker */ /* add 10 to value in xl[modjl] */ valuejl = (long) 10; #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) oldxmodjl = shmem_fadd(&xl[modjl], valuejl, 0); #else oldxmodjl = shmem_long_fadd(&xl[modjl], valuejl, 0); #endif /* printf("PE=%d,oldjl=%ld,modjl=%ld,oldxmodjl=%ld,valuejl=%ld\n", my_pe,oldjl,modjl,oldxmodjl,valuejl); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check xl[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (xl[j-1] != 10*ITER) fprintf(stderr, "FAIL pe %d of %d: xl[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, xl[j-1], ITER); } } shmem_free(xl); #endif shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main( int argc, char *argv[]) { int rc=0, my_pe, npes, neighbor; int loops=LOOPS; int j; size_t data_sz=sizeof(long) * 3; double start_time; long *data, lval=0; if (argc > 1) loops = atoi(argv[1]); shmem_init(); my_pe = shmem_my_pe(); npes = shmem_n_pes(); data = shmem_malloc(data_sz); if (!data) { fprintf(stderr,"[%d] shmem_malloc(%ld) failure? %d\n", my_pe,data_sz,errno); shmem_global_exit(1); } memset((void*)data,0,data_sz); shmem_barrier_all(); neighbor = (my_pe + 1) % npes; start_time = shmemx_wtime(); for(j=0,elapsed=0.0; j < loops; j++) { start_time = shmemx_wtime(); lval = shmem_long_finc( (void*)&data[1], neighbor ); elapsed += shmemx_wtime() - start_time; if (lval != (long) j) { fprintf(stderr,"[%d] Test: FAIL previous val %ld != %d Exit.\n", my_pe, lval, j); shmem_global_exit(1); } } shmem_barrier_all(); rc = 0; if (data[1] != (long)loops) { fprintf(stderr,"[%d] finc neighbot: FAIL data[1](%p) %ld != %d Exit.\n", my_pe, (void*)&data[1], data[1], loops); rc--; } /* check if adjancent memory locations distrubed */ assert(data[0] == 0); assert(data[2] == 0); if (my_pe == 0 ) { if (rc == 0 && Verbose) fprintf(stderr,"[%d] finc neighbor: PASSED.\n",my_pe); fprintf(stderr,"[%d] %d loops of shmem_long_finc() in %6.4f secs\n" " %2.6f usecs per shmem_long_finc()\n", my_pe,loops,elapsed,((elapsed*100000.0)/(double)loops)); } shmem_free(data); shmem_finalize(); return rc; }
int main(int argc, char* argv[]) { DataType source[10] = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; static DataType target[10]; static DataType pong=666; DataType *t2=NULL; int me, num_pes, pe, Verbose=0; if (argc > 1 && (strcmp(argv[1],"-v") == 0)) { Verbose++; } shmem_init(); me = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } t2 = shmem_malloc(10*sizeof(DataType)); if (!t2) { if (me==0) printf("shmem_malloc() failed?\n"); shmem_global_exit(1); } t2[9] = target[9] = 0xFF; shmem_barrier_all(); if (me == 0) { memset(target, 0, sizeof(target)); for(pe=1; pe < num_pes; pe++) SHM_PUT(target, target, 10, pe); for(pe=1; pe < num_pes; pe++) /* put 10 elements into target on PE 1 */ SHM_PUT(target, source, 10, pe); SHM_WAITU( &pong, SHMEM_CMP_GT, 666 ); Vprintf("PE[%d] pong now "PF"\n",me,pong); for(pe=1; pe < num_pes; pe++) /* put 1 element into t2 on PE 1 */ SHM_PUTP(&t2[9], 0xDD, pe); } else { /* wait for 10th element write of 'target' */ SHM_WAITU( &target[9], SHMEM_CMP_NE, 0xFF ); Vprintf("PE[%d] target[9] was 255 now "PF", success.\n",me,target[9]); SHM_WAITU( &target[9], SHMEM_CMP_EQ, 10 ); Vprintf("PE[%d] expected target[9] == 10 now "PF"\n",me,target[9]); if (me == 1) { if (Verbose) { DataType tmp = SHM_GETP( &pong, 0); printf("PE[%d] @ PE[0] pong == "PF", setting to 999\n",me,tmp); } SHM_PUTP( &pong, 999, 0); } SHM_WAITU( &t2[9], SHMEM_CMP_NE, 0xFF ); } //shmem_barrier_all(); /* sync sender and receiver */ if (me != 0) { if (memcmp(source, target, sizeof(DataType) * 10) != 0) { int i; fprintf(stderr,"[%d] Src & Target mismatch?\n",me); for (i = 0 ; i < 10 ; ++i) { printf(PF","PF" ", source[i], target[i]); } printf("\n"); shmem_global_exit(1); } } shmem_free(t2); if (Verbose) fprintf(stderr,"[%d] exit\n",shmem_my_pe()); shmem_finalize(); return 0; }
int main(int argc, char *argv[]) { int i; shmem_init(); rank = shmem_my_pe(); world_size = shmem_n_pes(); /* root handles arguments and bcasts answers */ if (0 == rank) { int ch; while (start_err != 1 && (ch = getopt(argc, argv, "p:i:m:s:c:n:oh")) != -1) { switch (ch) { case 'p': npeers = atoi(optarg); break; case 'i': niters = atoi(optarg); break; case 'm': nmsgs = atoi(optarg); break; case 's': nbytes = atoi(optarg); break; case 'c': cache_size = atoi(optarg) / sizeof(int); break; case 'n': ppn = atoi(optarg); break; case 'o': machine_output = 1; break; case 'h': case '?': default: start_err = 1; usage(); } } /* sanity check */ if (start_err != 1) { #if 0 if (world_size < 3) { fprintf(stderr, "Error: At least three processes are required\n"); start_err = 1; } else #endif if (world_size <= npeers) { fprintf(stderr, "Error: job size (%d) <= number of peers (%d)\n", world_size, npeers); start_err = 77; } else if (ppn < 1) { fprintf(stderr, "Error: must specify process per node (-n #)\n"); start_err = 77; } else if (world_size / ppn <= npeers) { fprintf(stderr, "Error: node count <= number of peers\n"); start_err = 77; } } } shmem_barrier_all(); /* broadcast results */ printf("%d: psync: 0x%lu\n", rank, (unsigned long) bcast_pSync); shmem_broadcast32(&start_err, &start_err, 1, 0, 0, 0, world_size, bcast_pSync); if (0 != start_err) { exit(start_err); } shmem_barrier_all(); shmem_broadcast32(&npeers, &npeers, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&niters, &niters, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&nmsgs, &nmsgs, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&nbytes, &nbytes, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&cache_size, &cache_size, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); shmem_broadcast32(&ppn, &ppn, 1, 0, 0, 0, world_size, bcast_pSync); shmem_barrier_all(); if (0 == rank) { if (!machine_output) { printf("job size: %d\n", world_size); printf("npeers: %d\n", npeers); printf("niters: %d\n", niters); printf("nmsgs: %d\n", nmsgs); printf("nbytes: %d\n", nbytes); printf("cache size: %d\n", cache_size * (int)sizeof(int)); printf("ppn: %d\n", ppn); } else { printf("%d %d %d %d %d %d %d ", world_size, npeers, niters, nmsgs, nbytes, cache_size * (int)sizeof(int), ppn); } } /* allocate buffers */ send_peers = malloc(sizeof(int) * npeers); if (NULL == send_peers) abort_app("malloc"); recv_peers = malloc(sizeof(int) * npeers); if (NULL == recv_peers) abort_app("malloc"); cache_buf = malloc(sizeof(int) * cache_size); if (NULL == cache_buf) abort_app("malloc"); send_buf = malloc(npeers * nmsgs * nbytes); if (NULL == send_buf) abort_app("malloc"); memset(send_buf, 1, npeers * nmsgs * nbytes); recv_buf = shmem_malloc(npeers * nmsgs * nbytes); if (NULL == recv_buf) abort_app("malloc"); memset(recv_buf, 0, npeers * nmsgs * nbytes); /* calculate peers */ for (i = 0 ; i < npeers ; ++i) { if (i < npeers / 2) { send_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size; } else { send_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size; } } if (npeers % 2 == 0) { /* even */ for (i = 0 ; i < npeers ; ++i) { if (i < npeers / 2) { recv_peers[i] = (rank + world_size + ((i - npeers / 2) *ppn)) % world_size; } else { recv_peers[i] = (rank + world_size + ((i - npeers / 2 + 1) * ppn)) % world_size; } } } else { /* odd */ for (i = 0 ; i < npeers ; ++i) { if (i < npeers / 2 + 1) { recv_peers[i] = (rank + world_size + ((i - npeers / 2 - 1) * ppn)) % world_size; } else { recv_peers[i] = (rank + world_size + ((i - npeers / 2) * ppn)) % world_size; } } } /* BWB: FIX ME: trash the free lists / malloc here */ /* sync, although tests will do this on their own (in theory) */ shmem_barrier_all(); /* run tests */ test_one_way(); test_same_direction(); test_prepost(); test_allstart(); if (rank == 0 && machine_output) printf("\n"); /* done */ shmem_finalize(); return 0; }
int main(int argc, char* argv[]) { int i, Verbose=0; int mpe, num_pes, loops=10, cloop; char *pgm; long *dst, *src; int nBytes = START_BCAST_SIZE; int nLongs=0; shmem_init(); mpe = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { printf("%s: Requires number of PEs > 1\n", argv[0]); shmem_finalize(); return 0; } if (sizeof(long) != 8) { printf("Test assumes 64-bit long (%zd)\n", sizeof(long)); shmem_global_exit(1); return 0; } if ((pgm=strrchr(argv[0],'/'))) { pgm++; } else { pgm = argv[0]; } if (argc > 1) { if (strncmp(argv[1],"-v",3) == 0) { Verbose=1; } else if (strncmp(argv[1],"-h",3) == 0) { fprintf(stderr,"usage: %s {-v(verbose)|h(help)}\n",pgm); shmem_finalize(); exit(1); } } for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = SHMEM_SYNC_VALUE; } if ( mpe == 0 && Verbose ) { fprintf(stderr,"%d loops\n",loops); } for(cloop=1; cloop <= loops; cloop++) { nLongs = nBytes / sizeof(long); dst = (long *)shmem_malloc(nBytes*2); if ( !dst ) { fprintf(stderr,"[%d] shmem_malloc(%d) failed %s\n", mpe,nBytes,strerror(errno)); return 0; } memset( (void*)dst, 0, nBytes ); src = &dst[nLongs]; for (i = 1; i < nLongs; i++) { src[i] = i+1; } shmem_barrier_all(); shmem_broadcast64(dst, src, nLongs, 1, 0, 0, num_pes, pSync); for(i=0; i < nLongs; i++) { /* the root node shouldn't have the result into dst (cf specification).*/ if (1 != mpe && dst[i] != src[i]) { fprintf(stderr,"[%d] dst[%d] %ld != expected %ld\n", mpe, i, dst[i],src[i]); shmem_global_exit(1); } else if (1 == mpe && dst[i] != 0) { fprintf(stderr,"[%d] dst[%d] %ld != expected 0\n", mpe, i, dst[i]); shmem_global_exit(1); } } shmem_barrier_all(); shmem_free (dst); if (Verbose && mpe ==0) fprintf(stderr,"loop %2d Bcast %d, Done.\n",cloop,nBytes); nBytes += BCAST_INCR; } shmem_finalize(); return 0; }
ssize_t ipc_shmem_init (ipc_t *ipc, config_t *config){ // {{{ ssize_t ret; int shmid; uint32_t shmkey; size_t shmsize; size_t nitems = NITEMS_DEFAULT; size_t item_size = ITEM_SIZE_DEFAULT; uintmax_t f_async = 0; uintmax_t f_sync = 0; char *role_str = NULL; ipc_shmem_userdata *userdata = (ipc_shmem_userdata *)ipc->userdata; userdata->buffer = HK(buffer); userdata->return_result = 1; hash_data_get(ret, TYPE_UINT32T, shmkey, config, HK(key)); if(ret != 0) return error("no key supplied"); hash_data_convert(ret, TYPE_STRINGT, role_str, config, HK(role)); if(ret != 0) return error("no role supplied"); hash_data_get(ret, TYPE_SIZET, item_size, config, HK(item_size)); hash_data_get(ret, TYPE_SIZET, nitems, config, HK(size)); hash_data_get(ret, TYPE_UINTT, f_async, config, HK(force_async)); hash_data_get(ret, TYPE_UINTT, f_sync, config, HK(force_sync)); hash_data_get(ret, TYPE_HASHKEYT, userdata->buffer, config, HK(buffer)); hash_data_get(ret, TYPE_UINTT, userdata->return_result, config, HK(return_result)); if( (userdata->role = ipc_string_to_role(role_str)) == ROLE_INVALID) return error("invalid role supplied"); free(role_str); shmsize = nitems * sizeof(ipc_shmem_block) + nitems * item_size + sizeof(ipc_shmem_header); if( (shmid = shmget(shmkey, shmsize, IPC_CREAT | 0666)) < 0) return error("shmget failed"); if( (userdata->shmaddr = shmat(shmid, NULL, 0)) == (void *)-1) return error("shmat failed"); if( (f_async != 0 && f_sync != 0) ) return error("force_async with force_sync"); userdata->shmblocks = (ipc_shmem_block *)((void *)userdata->shmaddr + sizeof(ipc_shmem_header)); userdata->shmdata = (void *) ((void *)userdata->shmblocks + nitems * sizeof(ipc_shmem_block)); userdata->inited = 1; userdata->forced_state = FORCE_NONE; if(f_async != 0) userdata->forced_state = FORCE_ASYNC; if(f_sync != 0) userdata->forced_state = FORCE_SYNC; if(userdata->role == ROLE_SERVER){ userdata->shmaddr->item_size = item_size; userdata->shmaddr->nitems = nitems; if(shmem_init(userdata) != 0) return error("shmem_init failed"); // start threads if(pthread_create(&userdata->server_thr, NULL, &ipc_shmem_listen, ipc) != 0) return error("pthread_create failed"); } return 0; } // }}}
int main(int argc, char **argv) { int i,j; long modj,oldj,oldxmodj,newcount; int my_pe,n_pes; size_t max_elements_bytes; static long *x; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); #ifdef HAVE_SET_CACHE_INV shmem_set_cache_inv(); #endif /* fail if trying to use only one processor */ if ( n_pes <= 1 ){ fprintf(stderr, "FAIL - test requires at least two PEs\n"); exit(1); } if(my_pe == 0) fprintf(stderr, "shmem_lock_set_clear(%s) n_pes=%d\n", argv[0],n_pes); /* shmalloc x on all pes (only use the one on PE 0) */ max_elements_bytes = (size_t) (sizeof(long) * n_pes); x = shmem_malloc( max_elements_bytes ); for(i=0; i<n_pes; i++) x[i] = 0; count = 0; shmem_barrier_all(); for(i=0; i<ITER; i++) { if (my_pe != 0) { /* emulate oldj = shmem_long_finc(&count, 0); */ shmem_set_lock(&lock); shmem_long_get(&oldj,&count,1,0); /* get oldj from PE 0's count */ newcount = oldj+1; shmem_long_put(&count,&newcount,1,0); /* update count on PE 0 */ shmem_quiet; /* insure that write completes */ shmem_clear_lock(&lock); /* end of emulation */ modj = (oldj % (n_pes-1)); /* PE 0 is just the counter/checker */ /* increment value in x[modj] */ oldxmodj = shmem_long_finc(&x[modj], 0); /* printf("PE=%d,oldj=%ld,modj=%ld,oldxmodj=%ld\n",my_pe,oldj,modj,oldxmodj); */ } } shmem_barrier_all(); if (my_pe == 0) { /* check x[j] array on PE 0 */ for(j=1 ; j<n_pes; j++) { if (x[j-1] != (long) ITER) fprintf(stderr, "FAIL PE %d of %d: x[%d] = %ld expected = %ld\n", my_pe, n_pes, j-1, x[j-1], (long) ITER); } } shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return 0; }
int main(int argc, char **argv) { int i; int *target; int *source; int me, npes, elements=N_ELEMENTS, loops=DFLT_LOOPS; char *pgm; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; /* lower-case switch enable only a specific test; otherwise run all tests */ while ((i = getopt (argc, argv, "hvqe:l:abcmn")) != EOF) { switch (i) { case 'a': All2++; break; case 'b': Bcast++; break; case 'c': Collect++; break; case 'm': Many++; break; case 'n': Neighbor++; break; case 'q': Verbose=0; break; case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 'h': if (me == 0) usage(pgm); shmem_finalize(); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } if (All2==0 && Bcast==0 && Collect==0 && Many==0 && Neighbor==0) All2 = Bcast = Collect = Many = Neighbor = 1; source = (int *) shmem_malloc( elements * sizeof(*source) ); target = (int *) shmem_malloc( elements * sizeof(*target) ); for (i = 0; i < elements; i += 1) { source[i] = i + 1; target[i] = -90; } shmem_barrier_all(); if (Neighbor) { neighbor_put( target, source, elements, me, npes, loops ); neighbor_get( target, source, elements, me, npes, loops ); } if (All2) { all2all_put( target, source, elements, me, npes, loops ); all2all_get( target, source, elements, me, npes, loops ); } if (Many) { one2many_put( target, source, elements, me, npes, loops ); many2one_get( target, source, elements, me, npes, loops ); } if (Bcast) bcast( target, source, elements, me, npes, loops ); if (Collect) { collect( NULL, source, elements, me, npes, loops ); fcollect( NULL, source, elements, me, npes, loops ); } shmem_barrier_all(); shmem_free(target); shmem_free(source); shmem_finalize(); return 0; }
int mdlInitialize(MDL *pmdl,char **argv,void (*fcnChild)(MDL)) { MDL mdl; int i,bDiag,bThreads; char *p,ach[256],achDiag[256]; int argc; /* SHMEM */ for (i=0;i<_SHMEM_COLLECT_SYNC_SIZE;++i) { pSync[i]=_SHMEM_SYNC_VALUE; } /* Init Shmem */ shmem_init(); *pmdl = NULL; mdl = malloc(sizeof(struct mdlContext)); assert(mdl != NULL); /* ** Set default "maximums" for structures. These are NOT hard ** maximums, as the structures will be realloc'd when these ** values are exceeded. */ mdl->nMaxServices = MDL_DEFAULT_SERVICES; mdl->nMaxSrvBytes = MDL_DEFAULT_BYTES; mdl->nMaxCacheIds = MDL_DEFAULT_CACHEIDS; /* ** Now allocate the initial service slots. */ mdl->psrv = malloc(mdl->nMaxServices*sizeof(SERVICE)); assert(mdl->psrv != NULL); /* ** Initialize the new service slots. */ for (i=0;i<mdl->nMaxServices;++i) { mdl->psrv[i].p1 = NULL; mdl->psrv[i].nInBytes = 0; mdl->psrv[i].nOutBytes = 0; mdl->psrv[i].fcnService = NULL; } /* ** Provide a 'null' service for sid = 0, so that stopping the ** service handler is well defined! */ mdl->psrv[0].p1 = NULL; mdl->psrv[0].nInBytes = 0; mdl->psrv[0].nOutBytes = 0; mdl->psrv[0].fcnService = _srvNull; /* ** Allocate service buffers. */ mdl->pszIn = malloc(mdl->nMaxSrvBytes+sizeof(SRVHEAD)); assert(mdl->pszIn != NULL); mdl->pszOut = malloc(mdl->nMaxSrvBytes+sizeof(SRVHEAD)); assert(mdl->pszOut != NULL); mdl->pszBuf = malloc(mdl->nMaxSrvBytes+sizeof(SRVHEAD)); assert(mdl->pszBuf != NULL); /* ** Allocate swapping transfer buffer. This buffer remains fixed. */ mdl->pszTrans = malloc(MDL_TRANS_SIZE); assert(mdl->pszTrans != NULL); /* ** Allocate initial cache spaces. */ mdl->cache = malloc(mdl->nMaxCacheIds*sizeof(CACHE)); assert(mdl->cache != NULL); /* ** Initialize caching spaces. */ for (i=0;i<mdl->nMaxCacheIds;++i) { mdl->cache[i].iType = MDL_NOCACHE; } for(argc = 0; argv[argc]; argc++); MPI_Init(&argc, &argv); /* ** Do some low level argument parsing for number of threads, and ** diagnostic flag! */ bDiag = 0; bThreads = 0; i = 1; while (argv[i]) { if (!strcmp(argv[i],"-sz") && !bThreads) { ++i; if (argv[i]) bThreads = 1; } if (!strcmp(argv[i],"+d") && !bDiag) { p = getenv("MDL_DIAGNOSTIC"); if (!p) p = getenv("HOME"); if (!p) sprintf(ach,"/tmp"); else sprintf(ach,"%s",p); bDiag = 1; } ++i; } if (bThreads) { fprintf(stderr,"Warning: -sz parameter ignored, using as many\n"); fprintf(stderr," processors as specified in environment.\n"); fflush(stderr); } MPI_Comm_size(MPI_COMM_WORLD, &mdl->nThreads); MPI_Comm_rank(MPI_COMM_WORLD, &mdl->idSelf); /* ** Allocate caching buffers, with initial data size of 0. ** We need one reply buffer for each thread, to deadlock situations. */ mdl->iMaxDataSize = 0; mdl->iCaBufSize = sizeof(CAHEAD); mdl->pszRcv = malloc(mdl->iCaBufSize); assert(mdl->pszRcv != NULL); mdl->ppszRpl = malloc(mdl->nThreads*sizeof(char *)); assert(mdl->ppszRpl != NULL); mdl->pmidRpl = malloc(mdl->nThreads*sizeof(int)); assert(mdl->pmidRpl != NULL); for (i=0;i<mdl->nThreads;++i) mdl->pmidRpl[i] = -1; mdl->pReqRpl = malloc(mdl->nThreads*sizeof(MPI_Request)); assert(mdl->pReqRpl != NULL); for (i=0;i<mdl->nThreads;++i) { mdl->ppszRpl[i] = malloc(mdl->iCaBufSize); assert(mdl->ppszRpl[i] != NULL); } mdl->pszFlsh = malloc(mdl->iCaBufSize); assert(mdl->pszFlsh != NULL); mdl->bDiag = bDiag; *pmdl = mdl; if (mdl->bDiag) { char *tmp = strrchr(argv[0],'/'); if (!tmp) tmp = argv[0]; else ++tmp; sprintf(achDiag,"%s/%s.%d",ach,tmp,mdl->idSelf); mdl->fpDiag = fopen(achDiag,"w"); assert(mdl->fpDiag != NULL); } if (mdl->nThreads > 1 && mdl->idSelf) { /* ** Child thread. */ (*fcnChild)(mdl); mdlFinish(mdl); exit(0); } return(mdl->nThreads); }
int main(const int argc, char ** argv) { shmem_init(); my_bucket_keys = (KEY_TYPE *)shmem_malloc(KEY_BUFFER_SIZE * sizeof(KEY_TYPE)); assert(my_bucket_keys); // fprintf(stderr, "PE %d allocating %llu bytes at %p\n", shmem_my_pe(), // KEY_BUFFER_SIZE * sizeof(KEY_TYPE), my_bucket_keys); #ifdef EXTRA_STATS _timer_t total_time; if(shmem_my_pe() == 0) { printf("\n-----\nmkdir timedrun fake\n\n"); timer_start(&total_time); } #endif init_shmem_sync_array(pSync); char * log_file = parse_params(argc, argv); int err = bucket_sort(); log_times(log_file); #ifdef EXTRA_STATS if(shmem_my_pe() == 0) { just_timer_stop(&total_time); double tTime = ( total_time.stop.tv_sec - total_time.start.tv_sec ) + ( total_time.stop.tv_nsec - total_time.start.tv_nsec )/1E9; avg_time *= 1000; avg_time_all2all *= 1000; printf("\n============================ MMTk Statistics Totals ============================\n"); if(NUM_ITERATIONS == 1) { //TODO: fix time calculation below for more number of iterations printf("time.mu\tt.ATA_KEYS\tt.MAKE_INPUT\tt.COUNT_BUCKET_SIZES\tt.BUCKETIZE\tt.COMPUTE_OFFSETS\tt.LOCAL_SORT\tBARRIER_AT_START\tBARRIER_AT_EXCHANGE\tBARRIER_AT_END\tnWorkers\tnPEs\n"); double TIMES[TIMER_NTIMERS]; memset(TIMES, 0x00, sizeof(double) * TIMER_NTIMERS); for(int i=0; i<NUM_PES; i++) { for(int t = 0; t < TIMER_NTIMERS; ++t){ if(timers[t].all_times != NULL){ TIMES[t] += timers[t].all_times[i]; } } } for(int t = 0; t < TIMER_NTIMERS; ++t){ printf("%.3f\t", (TIMES[t]/NUM_PES)*1000); } printf("1\t%d\n",NUM_PES); printf("Total time: %.3f\n",(TIMES[0]/NUM_PES)*1000); } else { printf("time.mu\ttimeAll2All\tnWorkers\tnPEs\n"); printf("%.3f\t%.3f\t1\t%d\n",avg_time,avg_time_all2all,NUM_PES); printf("Total time: %.3f\n",avg_time); } printf("------------------------------ End MMTk Statistics -----------------------------\n"); printf("===== TEST PASSED in %.3f msec =====\n",(tTime*1000)); } #endif shmem_finalize(); return err; }
int main() { int start,stride,rmlast,rstride,np_aset,inset,lpe; int my_pe,n_pes; int i,fail,n_err,asfail,nasfail; char Case[40]; static int sSource_int[NREDUCE]; static int sTarget_int[NREDUCE]; static int spWrk_int[PWRKELEM]; static long spSync[_SHMEM_REDUCE_SYNC_SIZE]; shmem_init(); my_pe = shmem_my_pe(); n_pes = shmem_n_pes(); lpe=my_pe; dpSync=shmem_malloc(_SHMEM_REDUCE_SYNC_SIZE*sizeof(long)); for(i=0;i<_SHMEM_REDUCE_SYNC_SIZE;i++) { gpSync[i]=_SHMEM_SYNC_VALUE; dpSync[i]=_SHMEM_SYNC_VALUE; spSync[i]=_SHMEM_SYNC_VALUE; } dSource_int=shmem_malloc(NREDUCE*sizeof(int)); dTarget_int=shmem_malloc(NREDUCE*sizeof(int)); dpWrk_int=shmem_malloc((NREDUCE/2+1 > _SHMEM_REDUCE_MIN_WRKDATA_SIZE ? NREDUCE/2+1 : _SHMEM_REDUCE_MIN_WRKDATA_SIZE)*sizeof(int)); for(start=0;start<=MAXSTART;start++) { rstride=1; for(stride=0;stride<=MAXSTRIDE;stride++) { for(rmlast=0;rmlast<=MAXRMLAST;rmlast++) { np_aset=(n_pes+rstride-1-start)/rstride-rmlast; /* number of processes in the active set */ if(np_aset > 0) /* if active set is not empty */ { if(my_pe==0) printf("\nActive set triplet: PE_start=%d,logPE_stride=%d,PE_size=%d \n",start,stride,np_aset); if((my_pe>=start) && ((my_pe-start)%rstride==0) && ((my_pe-start)/rstride<np_aset)) inset=1; else inset=0; /* Initialize Source and Target arrays */ for(i=0;i<NREDUCE;i++) { sSource_int[i]=SINIT; sTarget_int[i]=TINIT; gSource_int[i]=SINIT; gTarget_int[i]=TINIT; dSource_int[i]=SINIT; dTarget_int[i]=TINIT; } shmem_barrier_all(); /* CASE: static arrays, source is different from target */ sprintf(Case,"static, source!=target"); if(inset) asfail=or_int(sSource_int,sTarget_int,start,stride,np_aset,rstride,0,dpWrk_int,gpSync,Case); else { /* check that values of source and target have not been changed */ nasfail+=check_sval_notchanged(sSource_int,Case); nasfail+=check_tval_notchanged(sTarget_int,Case); } /* CASE: global arrays, source is different from target */ sprintf(Case,"global, source!=target"); if(inset) asfail=or_int(gSource_int,gTarget_int,start,stride,np_aset,rstride,0,spWrk_int,dpSync,Case); else { /* check that values of source and target have not been changed */ nasfail+=check_sval_notchanged(gSource_int,Case); nasfail+=check_tval_notchanged(gTarget_int,Case); } /* CASE: symmetric heap arrays, source is different from target */ sprintf(Case,"sym heap, source!=target"); if(inset) asfail=or_int(dSource_int,dTarget_int,start,stride,np_aset,rstride,0,gpWrk_int,spSync,Case); else { /* check that values of source and target have not been changed */ nasfail+=check_sval_notchanged(dSource_int,Case); nasfail+=check_tval_notchanged(dTarget_int,Case); } /* Reinitialize Source arrays for new tests */ for(i=0;i<NREDUCE;i++) { sSource_int[i]=SINIT; gSource_int[i]=SINIT; dSource_int[i]=SINIT; } shmem_barrier_all(); /* CASE: static arrays, source and target are the same array */ sprintf(Case,"static, source==target"); if(inset) asfail=or_int(sSource_int,sSource_int,start,stride,np_aset,rstride,1,gpWrk_int,dpSync,Case); else /* check that values of source have not been changed */ nasfail+=check_sval_notchanged(sSource_int,Case); /* CASE: global arrays, source and target are the same array */ sprintf(Case,"global, source==target"); if(inset) asfail=or_int(gSource_int,gSource_int,start,stride,np_aset,rstride,1,dpWrk_int,spSync,Case); else /* check that values of source have not been changed */ nasfail+=check_sval_notchanged(gSource_int,Case); /* CASE: symmetric heap arrays, source and target are the same array */ sprintf(Case,"sym heap, source==target"); if(inset) asfail=or_int(dSource_int,dSource_int,start,stride,np_aset,rstride,1,spWrk_int,gpSync,Case); else /* check that values of source have not been changed */ nasfail+=check_sval_notchanged(dSource_int,Case); } /* end of if active set is not empty */ } /* end of for loop on rmlast */ rstride*=2; } /* end of for loop on stride */ } /* end of for loop on start */ shmem_barrier_all(); #ifdef NEEDS_FINALIZE shmem_finalize(); #endif return(0); }
int main(int argc, char **argv) { int loops=DFLT_LOOPS; char *pgm; int *Target; int *Source; int i, me, npes; int target_pe; long bytes; double time_taken=0.0, start_time; shmem_init(); me = shmem_my_pe(); npes = shmem_n_pes(); if ((pgm=strrchr(argv[0],'/'))) pgm++; else pgm = argv[0]; while ((i = getopt (argc, argv, "hve:l:st")) != EOF) { switch (i) { case 'v': Verbose++; break; case 'e': if ((elements = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad elements count %d\n",elements); shmem_finalize(); return 1; } break; case 'l': if ((loops = atoi_scaled(optarg)) <= 0) { fprintf(stderr,"ERR: Bad loop count %d\n",loops); shmem_finalize(); return 1; } break; case 's': Sync++; break; case 't': Track++; break; case 'h': if (me == 0) usage(pgm); return 0; default: if (me == 0) { fprintf(stderr,"%s: unknown switch '-%c'?\n",pgm,i); usage(pgm); } shmem_finalize(); return 1; } } target_pe = (me+1) % npes; total_time = (double *) shmem_malloc( npes * sizeof(double) ); if (!total_time) { fprintf(stderr,"ERR: bad total_time shmem_malloc(%ld)\n", (elements * sizeof(double))); shmem_global_exit(1); } Source = (int *) shmem_malloc( elements * sizeof(*Source) ); if (!Source) { fprintf(stderr,"ERR: bad Source shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(total_time); shmem_global_exit(1); } Target = (int *) shmem_malloc( elements * sizeof(*Target) ); if (!Target) { fprintf(stderr,"ERR: bad Target shmem_malloc(%ld)\n", (elements * sizeof(*Target))); shmem_free(Source); shmem_free(total_time); shmem_global_exit(1); } for (i = 0; i < elements; i++) { Target[i] = -90; Source[i] = i + 1; } bytes = loops * sizeof(int) * elements; if (Verbose && me==0) fprintf(stderr, "%s: INFO - %d loops, get %d (int) elements from PE+1\n", pgm, loops, elements); shmem_barrier_all(); for(i=0; i < loops; i++) { start_time = shmemx_wtime(); shmem_int_get( Target, Source, elements, target_pe ); time_taken += shmemx_wtime() - start_time; if (me==0) { if ( Track && i > 0 && ((i % 200) == 0)) fprintf(stderr,".%d",i); } if (Sync) shmem_barrier_all(); } // collect time per node elapsed time. shmem_double_put( &total_time[me], &time_taken, 1, 0 ); shmem_barrier_all(); for (i = 0; i < elements; i++) { if (Target[i] != i + 1) { printf("%d: Error Target[%d] = %d, expected %d\n", me, i, Target[i], i + 1); shmem_global_exit(1); } } if ( Track && me == 0 ) fprintf(stderr,"\n"); if (Verbose && me == 0) { double rate,secs; // average time for(i=0,secs=0.0; i < npes; i++) secs += total_time[i]; secs /= (double)npes; rate = ((double)bytes/(1024.0*1024.0)) / secs; printf("%s: ave %5.3f MB/sec (bytes %ld secs %5.3f)\n", pgm, rate, bytes, secs); } shmem_free(total_time); shmem_free(Target); shmem_free(Source); shmem_finalize(); return 0; }
int main(int argc, char **argv) { int me, nProcs, c, l; int nWords, loops, incWords; int Verbose = 0, power2 = 0, modulo = 5; DataType *dp; pgm = strrchr(argv[0],'/'); if ( pgm ) pgm++; else pgm = argv[0]; shmem_init(); me = shmem_my_pe(); nProcs = shmem_n_pes(); while ((c = getopt (argc, argv, "hpv")) != -1) switch (c) { case 'p': power2++; break; case 'v': Verbose++; break; case 'h': default: usage(); break; } if (optind == argc) nWords = DFLT_NWORDS; else if ((nWords = getSize (argv[optind++])) <= 0) usage (); if (optind == argc) loops = DFLT_LOOPS; else if ((loops = getSize (argv[optind++])) < 0) usage (); if (optind == argc) incWords = DFLT_INCR; else if ((incWords = getSize (argv[optind++])) < 0) usage (); if (power2) { nWords = 1; modulo = 1; loops = 21; } if (Verbose && me == 0) { if (power2) { printf("%s: nWords(1) << 1 per loop.\n", pgm); } else printf("%s: nWords(%d) loops(%d) nWords-incr-per-loop(%d)\n", pgm, nWords, loops, incWords); } for(l=0; l < loops; l++) { result_sz = (nProcs-1) * (nWords * sizeof(DataType)); result = (DataType *)shmem_malloc(result_sz); if (! result) { perror ("Failed result memory allocation"); shmem_finalize(); exit (1); } for(dp=result; dp < &result[(result_sz/sizeof(DataType))];) *dp++ = 1; target_sz = nWords * sizeof(DataType); if (!(target = (DataType *)shmem_malloc(target_sz))) { perror ("Failed target memory allocation"); shmem_finalize(); exit (1); } for(dp=target; dp < &target[(target_sz / sizeof(DataType))];) *dp++ = 2; source_sz = 2 * nWords * sizeof(DataType); if (!(source = (DataType *)shmem_malloc(source_sz))) { perror ("Failed source memory allocation"); shmem_finalize(); exit (1); } for(dp=source; dp < &source[(source_sz / sizeof(DataType))];) *dp++ = 3; #if 0 printf("[%d] source %p target %p result %p\n", me, (void*)source,(void*)target,(void*)result); shmem_barrier_all(); #endif shmem_barrier_all(); /* sync sender and receiver */ for(dp=source; dp < &source[(source_sz / sizeof(DataType))]; dp++) if (*dp != 3 ) { printf("source not consistent @ 3?\n"); break; } shmem_free(source); for(dp=target; dp < &target[(target_sz / sizeof(DataType))]; dp++) if (*dp != 2 ) { printf("target not consistent @ 2?\n"); break; } shmem_free(target); for(dp=result; dp < &result[(result_sz / sizeof(DataType))]; dp++) if (*dp != 1 ) { printf("result not consistent @ 1?\n"); break; } shmem_free(result); if (loops > 1) { if (Verbose && me == 0) { if (l == 0 || (l % modulo == 0)) printf("End loop %3d nWords(%d)\n",(l+1),nWords); } if (power2) nWords <<= 1; else nWords += incWords; // watch for double inc. } } shmem_finalize(); return 0; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; int fail_count = 0; short src1[N]; int src2[N]; long src3[N]; long double src4[N]; long long src5[N]; double src6[N]; float src7[N]; char *src8; short src9; int src10; long src11; double src12; float src13; short *dest1; int *dest2; long *dest3; long double *dest4; long long *dest5; double *dest6; float *dest7; char *dest8; short *dest9; int *dest10; long *dest11; double *dest12; float *dest13; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; src8 = (char *) malloc (N * sizeof (char)); for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } src9 = (short) me; src10 = me; src11 = (long) me; src12 = (double) me; src13 = (float) me; dest1 = (short *) shmem_malloc (N * sizeof (*dest1)); dest2 = (int *) shmem_malloc (N * sizeof (*dest2)); dest3 = (long *) shmem_malloc (N * sizeof (*dest3)); dest4 = (long double *) shmem_malloc (N * sizeof (*dest4)); dest5 = (long long *) shmem_malloc (N * sizeof (*dest5)); dest6 = (double *) shmem_malloc (N * sizeof (*dest6)); dest7 = (float *) shmem_malloc (N * sizeof (*dest7)); dest8 = (char *) shmem_malloc (4 * sizeof (*dest8)); dest9 = (short *) shmem_malloc (sizeof (*dest9)); dest10 = (int *) shmem_malloc (sizeof (*dest10)); dest11 = (long *) shmem_malloc (sizeof (*dest11)); dest12 = (double *) shmem_malloc (sizeof (*dest12)); dest13 = (float *) shmem_malloc (sizeof (*dest13)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } *dest9 = -9; *dest10 = -9; *dest11 = -9; *dest12 = -9; *dest13 = -9.0; nextpe = (me + 1) % npes; /* Testing shmem_short_put, shmem_int_put, shmem_long_put, shmem_longdouble_put, shmem_longlong_put, shmem_double_put, shmem_float_put, shmem_putmem */ shmem_barrier_all (); shmem_short_put (dest1, src1, N, nextpe); shmem_int_put (dest2, src2, N, nextpe); shmem_long_put (dest3, src3, N, nextpe); shmem_longdouble_put (dest4, src4, N, nextpe); shmem_longlong_put (dest5, src5, N, nextpe); shmem_double_put (dest6, src6, N, nextpe); shmem_float_put (dest7, src7, N, nextpe); shmem_putmem (dest8, src8, N * sizeof (char), nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } if (dest5[i] != (npes - 1)) { success5 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } if (dest8[i] != (npes - 1)) { success8 = 1; } } if (success1 == 0) printf ("Test shmem_short_put: Passed\n"); else { printf ("Test shmem_short_put: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_int_put: Passed\n"); else { printf ("Test shmem_int_put: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_long_put: Passed\n"); else { printf ("Test shmem_long_put: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_longdouble_put: Passed\n"); else { printf ("Test shmem_longdouble_put: Failed\n"); fail_count++; } if (success5 == 0) printf ("Test shmem_longlong_put: Passed\n"); else { printf ("Test shmem_longlong_put: Failed\n"); fail_count++; } if (success6 == 0) printf ("Test shmem_double_put: Passed\n"); else { printf ("Test shmem_double_put: Failed\n"); fail_count++; } if (success7 == 0) printf ("Test shmem_float_put: Passed\n"); else { printf ("Test shmem_float_put: Failed\n"); fail_count++; } if (success8 == 0) printf ("Test shmem_putmem: Passed\n"); else { printf ("Test shmem_putmem: Failed\n"); fail_count++; } } shmem_barrier_all (); /* Testing shmem_put32, shmem_put64, shmem_put128 */ if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_put32 (dest2, src2, N, nextpe); shmem_put64 (dest3, src3, N, nextpe); shmem_put128 (dest4, src4, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_put32: Passed\n"); else { printf ("Test shmem_put32: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_put64: Passed\n"); else { printf ("Test shmem_put64: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_put128: Passed\n"); else { printf ("Test shmem_put128: Failed\n"); fail_count++; } } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_put32 (dest1, src1, N, nextpe); shmem_put64 (dest2, src2, N, nextpe); shmem_put128 (dest3, src3, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_put32: Passed\n"); else { printf ("Test shmem_put32: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_put64: Passed\n"); else { printf ("Test shmem_put64: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_put128: Passed\n"); else { printf ("Test shmem_put128: Failed\n"); fail_count++; } } } /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */ shmem_barrier_all (); if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_iput32 (dest2, src2, 1, 2, N, nextpe); shmem_iput64 (dest3, src3, 1, 2, N, nextpe); shmem_iput128 (dest4, src4, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_iput32: Passed\n"); else { printf ("Test shmem_iput32: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_iput64: Passed\n"); else { printf ("Test shmem_iput64: Failed\n"); fail_count++; } if (success4 == 0) printf ("Test shmem_iput128: Passed\n"); else { printf ("Test shmem_iput128: Failed\n"); fail_count++; } } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_iput32 (dest1, src1, 1, 2, N, nextpe); shmem_iput64 (dest2, src2, 1, 2, N, nextpe); shmem_iput128 (dest3, src3, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_iput32: Passed\n"); else { printf ("Test shmem_iput32: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_iput64: Passed\n"); else { printf ("Test shmem_iput64: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_iput128: Passed\n"); else { printf ("Test shmem_iput128: Failed\n"); fail_count++; } } } /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput, shmem_double_iput, shmem_float_iput */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all (); shmem_short_iput (dest1, src1, 1, 2, N, nextpe); shmem_int_iput (dest2, src2, 1, 2, N, nextpe); shmem_long_iput (dest3, src3, 1, 2, N, nextpe); shmem_double_iput (dest6, src6, 1, 2, N, nextpe); shmem_float_iput (dest7, src7, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } } if (success1 == 0) printf ("Test shmem_short_iput: Passed\n"); else { printf ("Test shmem_short_iput: Failed\n"); fail_count++; } if (success2 == 0) printf ("Test shmem_int_iput: Passed\n"); else { printf ("Test shmem_int_iput: Failed\n"); fail_count++; } if (success3 == 0) printf ("Test shmem_long_iput: Passed\n"); else { printf ("Test shmem_long_iput: Failed\n"); fail_count++; } if (success6 == 0) printf ("Test shmem_double_iput: Passed\n"); else { printf ("Test shmem_double_iput: Failed\n"); fail_count++; } if (success7 == 0) printf ("Test shmem_float_iput: Passed\n"); else { printf ("Test shmem_float_iput: Failed\n"); fail_count++; } } /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p, shmem_short_p */ shmem_barrier_all (); shmem_short_p (dest9, src9, nextpe); shmem_int_p (dest10, src10, nextpe); shmem_long_p (dest11, src11, nextpe); shmem_double_p (dest12, src12, nextpe); shmem_float_p (dest13, src13, nextpe); shmem_barrier_all (); if (me == 0) { if (*dest9 == (npes - 1)) printf ("Test shmem_short_p: Passed\n"); else { printf ("Test shmem_short_p: Failed\n"); fail_count++; } if (*dest10 == (npes - 1)) printf ("Test shmem_int_p: Passed\n"); else { printf ("Test shmem_int_p: Failed\n"); fail_count++; } if (*dest11 == (npes - 1)) printf ("Test shmem_long_p: Passed\n"); else { printf ("Test shmem_long_p: Failed\n"); fail_count++; } if (*dest12 == (npes - 1)) printf ("Test shmem_double_p: Passed\n"); else { printf ("Test shmem_double_p: Failed\n"); fail_count++; } if (*dest13 == (npes - 1)) printf ("Test shmem_float_p: Passed\n"); else { printf ("Test shmem_float_p: Failed\n"); fail_count++; } } shmem_barrier_all (); if (me == 0) { if (fail_count == 0) printf("All Tests Passed\n"); else printf("%d Tests Failed\n", fail_count); } shmem_free (dest1); shmem_free (dest2); shmem_free (dest3); shmem_free (dest4); shmem_free (dest5); shmem_free (dest6); shmem_free (dest7); shmem_free (dest8); shmem_free (dest9); shmem_free (dest10); shmem_free (dest11); shmem_free (dest12); shmem_free (dest13); } else { printf ("Number of PEs must be > 1 to test shmem put, test skipped\n"); } shmem_finalize (); return 0; }
int main(int argc, char* argv[]) { int c, j, cloop, loops = DFLT_LOOPS; int mpe, num_pes; int nWords=1; int nIncr=1; int failures=0; char *pgm; shmem_init(); mpe = shmem_my_pe(); num_pes = shmem_n_pes(); if (num_pes == 1) { Rfprintf(stderr, "ERR - Requires > 1 PEs\n"); shmem_finalize(); return 0; } pgm = strrchr(argv[0],'/'); if ( pgm ) pgm++; else pgm = argv[0]; while((c=getopt(argc,argv,"hqVvl:")) != -1) { switch(c) { case 'V': case 'v': Verbose++; break; case 'l': loops = atoi(optarg); break; case 'h': Rfprintf(stderr, "usage: %s {-l loopcnt(%d)} {numLongs(%d)} {loopIncr(%d)}\n", pgm,DFLT_LOOPS,DFLT_NWORDS,DFLT_INCR); shmem_finalize(); return 1; default: shmem_finalize(); return 1; } } if (optind == argc) nWords = DFLT_NWORDS; else { nWords = atoi_scaled(argv[optind++]); if (nWords <= 0) { Rfprintf(stderr, "ERR - Bad nBytes arg?\n"); shmem_finalize(); return 1; } } if (optind == argc) nIncr = DFLT_INCR; else { loops = atoi(argv[optind++]); if (nIncr <= 0 ) { Rfprintf(stderr, "ERR - incLongs arg out of bounds '%d'?\n", nIncr); shmem_finalize(); return 1; } } if ( nWords % 8 ) { // integral multiple of longs Rprintf("%s: nWords(%d) not a multiple of %ld?\n", pgm,nWords,sizeof(long)); shmem_finalize(); return 1; } for (c = 0; c < SHMEM_COLLECT_SYNC_SIZE;c++) pSync[c] = SHMEM_SYNC_VALUE; if (Verbose && mpe == 0) fprintf(stderr,"loops(%d) nWords(%d) incr-per-loop(%d)\n", loops,nWords,nIncr); for(cloop=1; cloop <= loops; cloop++) { c = (sizeof(long)*nWords) * (num_pes + 1); // src + dst allocation. //nWords /= sizeof(long); // convert input of bytes --> longs. src = (long*)shmem_malloc(c); if ( !src ) { Rprintf("[%d] %s: shmem_malloc(%d) failed?\n", mpe, pgm,c); shmem_global_exit(1); } dst = &src[nWords]; for(j=0; j < nWords; j++) src[j] = (long) (j + mpe*nWords); shmem_barrier_all(); shmem_fcollect64(dst,src,nWords,0,0,num_pes,pSync); // Expect dst to be consecuative integers 0 ... (nLongs*num_pes)-1 for(j=0; j < (nWords*num_pes); j++) { if ( dst[j] != (long) j ) { fprintf(stderr, "[%d] dst[%d] %ld != expected %d\n",mpe,j,dst[j],j); shmem_global_exit(1); } } shmem_barrier_all(); if (Verbose && mpe == 0 && loops > 1) { fprintf(stderr,"."); } nWords += nIncr; } if (Verbose && mpe == 0) { fprintf(stderr,"\n");fflush(stderr); } shmem_free( (void*)src ); shmem_barrier_all(); if (Verbose) printf("%d(%d) Exit(%d)\n", mpe, num_pes, failures); shmem_finalize(); return failures; }