glio_arena_t * _glio_arena_create( glio_group_t *gg, /* group of processes to share access */ size_t asize) /* arena size (bytes per process) */ { char *fname, *fncpy; int fd; int groupsz; int namelen; int myrank; size_t arena_size; glio_arena_t *arp; shmem_group_t shg; void *aret; MPI_Comm comm; MPI_Status mpistatus; groupsz = gg->groupsz; myrank = gg->myrank; arp = malloc(sizeof(*arp)); if (arp == NULL) { fprintf(stderr,"%s:\n\ _glio_arena_create(a) could not allocate a memory object of size %lld bytes\n", GLOBERRMSG, (long long)sizeof(*arp)); abort(); } bzero(arp, sizeof(*arp)); arp->grp = *gg; /* copy it */ gg = &arp->grp; /* point to the new copy */ /* * Process with rank 0 finds a unique new file name to use as * a memory mapped file. */ if (myrank == 0) { fname = NULL; do { if (fname != NULL) free(fname); fname = tempnam(NULL, "glio_arena"); assert(fname != NULL); fd = open(fname, O_CREAT | O_EXCL | O_RDWR, 0700); } while (fd == -1 && errno == EEXIST); } /* * Trivial groups of size 1 can be handled trivially. */ if (groupsz == 1) goto past_file_name_send; _glio_barrier(arp); /* * Initialization */ switch (gg->grtype) { case GR_SHMEM: if ( _shmem_group_inquire != NULL) { _shmem_group_inquire(arp->grp.u.shmem.group, &shg); } else { /* Special case for pre-release versions of MPT 1.2 */ static int *world_plist; static int *world_racom; /* if pre-release version of MPT 1.2 is used, then all */ /* PEs are in the group */ assert (groupsz == _num_pes()); if (world_plist == NULL) { register short ipe; world_plist = malloc(_num_pes() * sizeof(int)); if (world_plist == NULL) { fprintf(stderr,"%s:\n\ _glio_arena_create(b) could not allocate a memory object of size %lld bytes\n", GLOBERRMSG, (long long)(_num_pes() * sizeof(int))); abort(); } world_racom = shmalloc(SHMEM_GROUP_COM_SIZE * sizeof(int)); assert(world_racom != NULL); bzero(world_racom, 10*sizeof(int)); for (ipe = 0; ipe < _num_pes(); ipe++) world_plist[ipe] = ipe; } shg.groupsz = _num_pes(); shg.myrank = _my_pe(); shg.plist = world_plist; shg.racom = world_racom; } break; case GR_MPI: comm = arp->grp.u.MPI.comm; break; default: break; } /* * Process 0 now must communicate the file name to all other * processes in the group. */ switch (gg->grtype) { case GR_SHMEM: if (myrank == 0) { void *vp; fncpy = _sma_global_heap_alloc(strlen(fname)+1); assert(fncpy != NULL); strcpy(fncpy, fname); vp = fncpy; /* racom[1] gets string length */ shg.racom[1] = strlen(fname); /* racom[2] and racom[3] get the pointer */ /* to the string. */ memcpy(&shg.racom[2], &vp, sizeof(vp)); } _glio_barrier(arp); /* * Other processes now get the file name. */ if (myrank != 0) { void *vp; namelen = _shmem_int_g( &shg.racom[1], shg.plist[0]); assert(namelen > 0); /* get pointer to the string */ _shmem_getmem(&vp, &shg.racom[2], sizeof(vp), shg.plist[0]); fname = malloc(namelen + 1); if (fname == NULL) { fprintf(stderr,"%s:\n\ _glio_arena_create(c) could not allocate a memory object of size %lld bytes\n", GLOBERRMSG, (long long)(namelen + 1)); abort(); } /* copy the string */ _shmem_getmem(fname, vp, namelen, shg.plist[0]); fname[namelen] = '\0'; } _glio_barrier(arp); if (myrank == 0) { _sma_global_heap_free(fncpy); } break; case GR_MPI: if (myrank == 0) { register int rank; namelen = strlen(fname); for (rank = 1; rank < groupsz; rank++) { ckMPIerr( MPI_Send(&namelen, 1, MPI_INT, rank, 1, comm) ); } for (rank = 1; rank < groupsz; rank++) { ckMPIerr( MPI_Send(fname, namelen, MPI_CHAR, rank, 2, comm) ); } } else { ckMPIerr( MPI_Recv(&namelen, 1, MPI_INT, 0, 1, comm, &mpistatus) ); fname = malloc(namelen + 1); if (fname == NULL) { fprintf(stderr,"%s:\n\ _glio_arena_create(d) could not allocate a memory object of size %lld bytes\n", GLOBERRMSG, (long long)(namelen + 1)); abort(); } ckMPIerr( MPI_Recv(fname, namelen, MPI_CHAR, 0, 2, comm, &mpistatus) ); fname[namelen] = '\0'; } break; default: assert(0); } _glio_barrier(arp); /* * Non-rank-0 processes now open the file. */ if (myrank != 0) { fd = open(fname, O_RDWR, 0700); if (fd == -1) { fprintf(stderr, "%s:\n\ Global I/O failed to open mapped file. Errno is %d\n", GLOBERRMSG, errno); abort(); } } _glio_barrier(arp); past_file_name_send: /* * All processes have opened the file, so rank 0 may now unlink it. */ if (myrank == 0) { unlink(fname); } _glio_barrier(arp); /* * After the barrier process 0 may initialize the mapped * file and unlink it because we know that all processes in the * group have now opened this file. */ arena_size = groupsz * CEILING(asize, 1024); if (myrank == 0) { ssize_t wret; wret = pwrite(fd, " ", 1, arena_size - 1); assert(wret != -1L); } _glio_barrier(arp); /* * A barrier assures us that the file has been initialized * to the right size. Now map the file into every process' * address space. */ aret = mmap64(NULL, arena_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (aret == MAP_FAILED) { fprintf(stderr,"%s:\n\ Cannot map internal file %s\n\ for shared memory arena. Error = %d\n", GLOBERRMSG, fname, errno); abort(); }
int main (int argc, char **argv) { /* arrays used to contain each PE's rows - specify cols, no need to spec rows */ float **U_Curr; float **U_Next; /* helper variables */ /* available iterator */ int i, j, k, m, n; int per_proc, remainder, my_start_row, my_end_row, my_num_rows; int verbose = 0; int show_time = 0; double time; double t, tv[2]; /*OpenSHMEM initilization*/ start_pes (0); p = _num_pes (); my_rank = _my_pe (); if (p > 8) { fprintf(stderr, "Ignoring test when run with more than 8 pes\n"); return 77; } /* argument processing done by everyone */ int c, errflg; extern char *optarg; extern int optind, optopt; while ((c = getopt (argc, argv, "e:h:m:tw:v")) != -1) { switch (c) { case 'e': EPSILON = atof (optarg); break; case 'h': HEIGHT = atoi (optarg); break; case 'm': /* selects the numerical methods */ switch (atoi (optarg)) { case 1: /* jacobi */ meth = 1; break; case 2: /* gauss-seidel */ meth = 2; break; case 3: /* sor */ meth = 3; break; } break; case 't': show_time++; /* overridden by -v (verbose) */ break; case 'w': WIDTH = atoi (optarg); break; case 'v': verbose++; break; /* handle bad arguments */ case ':': /* -h or -w without operand */ if (ROOT == my_rank) fprintf (stderr, "Option -%c requires an operand\n", optopt); errflg++; break; case '?': if (ROOT == my_rank) fprintf (stderr, "Unrecognized option: -%c\n", optopt); errflg++; break; } } if (ROOT == my_rank && argc < 2) { printf ("Using defaults: -h 20 -w 20 -m 2\n"); } // if (0 < errflg) // exit(EXIT_FAILURE); /* wait for user to input runtime params */ for (i = 0; i < _SHMEM_REDUCE_SYNC_SIZE; i += 1) pSync[i] = _SHMEM_SYNC_VALUE; shmem_barrier_all (); /* broadcast method to use */ shmem_broadcast32 (&meth, &meth, 1, 0, 0, 0, p, pSync); switch (meth) { case 1: method = &jacobi; break; case 2: method = &gauss_seidel; break; case 3: method = &sor; break; } /* let each processor decide what rows(s) it owns */ my_start_row = get_start (my_rank); my_end_row = get_end (my_rank); my_num_rows = get_num_rows (my_rank); if (0 < verbose) printf ("proc %d contains (%d) rows %d to %d\n", my_rank, my_num_rows, my_start_row, my_end_row); fflush (stdout); /* allocate 2d array */ U_Curr = (float **) malloc (sizeof (float *) * my_num_rows); U_Curr[0] = (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H)); for (i = 1; i < my_num_rows; i++) { U_Curr[i] = U_Curr[i - 1] + (int) floor (WIDTH / H); } /* allocate 2d array */ U_Next = (float **) malloc (sizeof (float *) * my_num_rows); U_Next[0] = (float *) malloc (sizeof (float) * my_num_rows * (int) floor (WIDTH / H)); for (i = 1; i < my_num_rows; i++) { U_Next[i] = U_Next[i - 1] + (int) floor (WIDTH / H); } /* initialize global grid */ init_domain (U_Curr, my_rank); init_domain (U_Next, my_rank); /* iterate for solution */ if (my_rank == ROOT) { tv[0] = gettime (); } k = 1; while (1) { method (U_Curr, U_Next); local_convergence_sqd = get_convergence_sqd (U_Curr, U_Next, my_rank); shmem_barrier_all (); shmem_float_sum_to_all (&convergence_sqd, &local_convergence_sqd, 1, 0, 0, p, pWrk, pSync); if (my_rank == ROOT) { convergence = sqrt (convergence_sqd); if (verbose == 1) { printf ("L2 = %f\n", convergence); } } /* broadcast method to use */ shmem_barrier_all (); shmem_broadcast32 (&convergence, &convergence, 1, 0, 0, 0, p, pSync); if (convergence <= EPSILON) { break; } /* copy U_Next to U_Curr */ for (j = my_start_row; j <= my_end_row; j++) { for (i = 0; i < (int) floor (WIDTH / H); i++) { U_Curr[j - my_start_row][i] = U_Next[j - my_start_row][i]; } } k++; //MPI_Barrier(MPI_COMM_WORLD); shmem_barrier_all (); } /* say something at the end */ if (my_rank == ROOT) { //time = MPI_Wtime() - time; tv[1] = gettime (); t = dt (&tv[1], &tv[0]); printf ("Estimated time to convergence in %d iterations using %d processors on a %dx%d grid is %f seconds\n", k, p, (int) floor (WIDTH / H), (int) floor (HEIGHT / H), t / 1000000.0); } //MPI_Finalize(); exit (EXIT_SUCCESS); return 0; }
int main () { int quantum = -1, checktick (); int BytesPerWord; int k; ssize_t j, i; STREAM_TYPE scalar; /* --- SETUP --- determine precision and check timing --- */ printf (HLINE); printf ("STREAM version $Revision: 5.10 $\n"); printf (HLINE); BytesPerWord = sizeof (STREAM_TYPE); printf ("This system uses %d bytes per array element.\n", BytesPerWord); /* SHMEM initialize */ start_pes (0); _world_size = _num_pes (); _world_rank = _my_pe (); STREAM_TYPE *a = (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) * sizeof (STREAM_TYPE)); STREAM_TYPE *b = (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) * sizeof (STREAM_TYPE)); STREAM_TYPE *c = (STREAM_TYPE *) shmalloc ((STREAM_ARRAY_SIZE + OFFSET) * sizeof (STREAM_TYPE)); /* wait for user to input runtime params */ for (int j = 0; j < _SHMEM_BARRIER_SYNC_SIZE; j++) { pSync0[j] = pSync1[j] = pSync2[j] = _SHMEM_SYNC_VALUE; } int size = _world_size; if (!(size == 0) && !(size & (size - 1))) ; else { printf ("Program only works for a PE size of power-of-2\n"); exit (-1); } if (_world_rank == 0) { printf (HLINE); #ifdef N printf ("***** WARNING: ******\n"); printf (" It appears that you set the preprocessor variable N when compiling this code.\n"); printf (" This version of the code uses the preprocesor variable STREAM_ARRAY_SIZE to control the array size\n"); printf (" Reverting to default value of STREAM_ARRAY_SIZE=%llu\n", (unsigned long long) STREAM_ARRAY_SIZE); printf ("***** WARNING: ******\n"); #endif printf ("Array size = %llu (elements), Offset = %d (elements)\n", (unsigned long long) STREAM_ARRAY_SIZE, OFFSET); printf ("Memory per array = %.1f MiB (= %.1f GiB).\n", BytesPerWord * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024.0), BytesPerWord * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024.0 / 1024.0)); printf ("Total memory required = %.1f MiB (= %.1f GiB).\n", (3.0 * BytesPerWord) * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024.), (3.0 * BytesPerWord) * ((double) STREAM_ARRAY_SIZE / 1024.0 / 1024. / 1024.)); printf ("Each kernel will be executed %d times.\n", NTIMES); printf (" The *best* time for each kernel (excluding the first iteration)\n"); printf (" will be used to compute the reported bandwidth.\n"); printf ("Number of SHMEM PEs requested = %i\n", _world_size); } int blocksize = 10000; assert (STREAM_ARRAY_SIZE % blocksize == 0); // do something really minor /* Get initial value for system clock. */ for (j = 0; j < STREAM_ARRAY_SIZE; j++) { a[j] = 1.0; b[j] = 2.0; c[j] = 0.0; } printf (HLINE); if (_world_rank == 0) { if ((quantum = checktick ()) >= 1) printf ("Your clock granularity/precision appears to be " "%d microseconds.\n", quantum); else { printf ("Your clock granularity appears to be " "less than one microsecond.\n"); quantum = 1; } } shmem_barrier_all (); // assign fixed iterations per PE // since we know default STREAM array size // we are hardcoding this, but if the value // changes, then this blocking factor must // also change // basically, each PE works on this block // size at a time time_start = mysecond (); /* Initialize */ next_p = shmem_int_fadd (&gcounter, 1, ROOT); for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize) { if (next_p == count_p) { for (i = j; i < (j + blocksize); i++) { a[i] = 2.0E0 * a[i]; } next_p = shmem_int_fadd (&gcounter, 1, ROOT); } count_p++; } time_end = mysecond (); clock_time_PE = time_end - time_start; shmem_double_sum_to_all (&total_clock_time, &clock_time_PE, 1, 0, 0, _world_size, pWrk0, pSync0); if (_world_rank == 0) { printf ("Each test below will take on the order" " of %d microseconds.\n", (int) (total_clock_time * 1.0E6)); printf (" (= %d clock ticks)\n", (int) ((1.0E6 * total_clock_time) / quantum)); printf ("Increase the size of the arrays if this shows that\n"); printf ("you are not getting at least 20 clock ticks per test.\n"); printf (HLINE); printf ("WARNING -- The above is only a rough guideline.\n"); printf ("For best results, please be sure you know the\n"); printf ("precision of your system timer.\n"); printf (HLINE); } /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ // reduction required, as each PE only fills a,b,c partially scalar = 3.0; for (k = 0; k < NTIMES; k++) { for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize) { if (next_p == count_p) { for (i = j; i < (j + blocksize); i++) { a[i] = 1.0; b[i] = 2.0; c[i] = 0.0; a[i] = 2.0E0 * a[i]; } next_p = shmem_int_fadd (&gcounter, 1, ROOT); } count_p++; //shmem_double_max_to_all (a + j, a + j, blocksize, 0, // 0, _world_size, pWrk1, pSync1); shmem_barrier_all (); flat_tree (a + j, a + j, blocksize); } shmem_barrier_all (); time_start = mysecond (); for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize) { if (next_p == count_p) { for (i = j; i < (j + blocksize); i++) { c[i] = a[i]; } next_p = shmem_int_fadd (&gcounter, 1, ROOT); } count_p++; //shmem_double_max_to_all (c + j, c + j, blocksize, 0, // 0, _world_size, pWrk1, pSync1); shmem_barrier_all (); flat_tree (c + j, c + j, blocksize); } shmem_barrier_all (); time_end = mysecond () - time_start; shmem_double_max_to_all (×[0][k], &time_end, 1, 0, 0, _world_size, pWrk0, pSync0); time_start = mysecond (); for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize) { if (next_p == count_p) { for (i = j; i < (j + blocksize); i++) { b[i] = scalar * c[i]; } next_p = shmem_int_fadd (&gcounter, 1, ROOT); } count_p++; //shmem_double_max_to_all (b + j, b + j, blocksize, 0, // 0, _world_size, pWrk1, pSync1); shmem_barrier_all (); flat_tree (b + j, b + j, blocksize); } shmem_barrier_all (); time_end = mysecond () - time_start; shmem_double_sum_to_all (×[1][k], &time_end, 1, 0, 0, _world_size, pWrk0, pSync0); time_start = mysecond (); for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize) { if (next_p == count_p) { for (i = j; i < (j + blocksize); i++) { c[i] = a[i] + b[i]; } next_p = shmem_int_fadd (&gcounter, 1, ROOT); } count_p++; //shmem_double_max_to_all (c + j, c + j, blocksize, 0, // 0, _world_size, pWrk1, pSync1); shmem_barrier_all (); flat_tree (c + j, c + j, blocksize); } shmem_barrier_all (); time_end = mysecond () - time_start; shmem_double_sum_to_all (×[2][k], &time_end, 1, 0, 0, _world_size, pWrk0, pSync0); time_start = mysecond (); for (j = 0; j < STREAM_ARRAY_SIZE; j += blocksize) { if (next_p == count_p) { for (i = j; i < (j + blocksize); i++) { a[i] = b[i] + scalar * c[i]; } next_p = shmem_int_fadd (&gcounter, 1, ROOT); } count_p++; //shmem_double_max_to_all (a + j, a + j, blocksize, 0, // 0, _world_size, pWrk1, pSync1); shmem_barrier_all (); flat_tree (a + j, a + j, blocksize); } shmem_barrier_all (); time_end = mysecond () - time_start; shmem_double_sum_to_all (×[3][k], &time_end, 1, 0, 0, _world_size, pWrk0, pSync0); } shmem_barrier_all (); /* --- SUMMARY --- */ for (k = 1; k < NTIMES; k++) /* note -- skip first iteration */ { for (j = 0; j < 4; j++) { avgtime[j] = avgtime[j] + times[j][k]; mintime[j] = MIN (mintime[j], times[j][k]); maxtime[j] = MAX (maxtime[j], times[j][k]); } } if (_world_rank == 0) { printf ("Function Best Rate MB/s Avg time Min time Max time\n"); for (j = 0; j < 4; j++) { avgtime[j] = avgtime[j] / (double) (NTIMES - 1); printf ("%s%12.1f %11.6f %11.6f %11.6f\n", label[j], 1.0E-06 * bytes[j] / mintime[j], avgtime[j], mintime[j], maxtime[j]); } printf (HLINE); } /* --- Check Results --- */ if (_world_rank == 0) { checkSTREAMresults (a, b, c); printf (HLINE); } shfree (a); shfree (b); shfree (c); return 0; }
static int test_item9(void) { int rc = TC_PASS; static TYPE_VALUE target_addr[MAX_BUFFER_SIZE * 2]; static TYPE_VALUE source_addr[MAX_BUFFER_SIZE * 2]; TYPE_VALUE source_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; long* pSyncMult = NULL; TYPE_VALUE* pWrkMult = NULL; int pSyncNum = 2; int pWrkNum = 2; num_proc = _num_pes(); my_proc = _my_pe(); pSyncMult = shmalloc(sizeof(*pSyncMult) * pSyncNum * _SHMEM_REDUCE_SYNC_SIZE); if (pSyncMult) { TYPE_VALUE value = DEFAULT_VALUE; int i = 0; int j = 0; long cur_buf_size = 0; for ( j = 0; j < pSyncNum * _SHMEM_REDUCE_SYNC_SIZE; j++ ) { pSyncMult[j] = _SHMEM_SYNC_VALUE; } /* Give some time to all PE for setting their values */ shmem_barrier_all(); pWrkMult = shmalloc(sizeof(*pWrkMult) * pWrkNum * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE)); if (pWrkMult) { value = DEFAULT_VALUE; source_value = (TYPE_VALUE)(my_proc + 1); fill_buffer((void *)source_addr, MAX_BUFFER_SIZE * 2, (void *)&source_value, sizeof(source_value)); fill_buffer((void *)target_addr, MAX_BUFFER_SIZE * 2, (void *)&value, sizeof(value)); shmem_barrier_all(); for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++) { cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count); /* Set initial target value */ value = DEFAULT_VALUE; /* Set my value */ source_value = (TYPE_VALUE)(my_proc + 1); /* Define expected value */ expect_value = 0; if (my_proc % 2) expect_value = DEFAULT_VALUE; else { int k = num_proc; while (k) { if (k % 2) expect_value |= k; k--; } } int in_active_set = check_within_active_set(0, 1, ((num_proc / 2) + (num_proc % 2)), my_proc, num_proc); if ( in_active_set ) { /* Put value to peer */ FUNC_VALUE(target_addr + (i % 2) * MAX_BUFFER_SIZE, source_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrkMult + (i % pWrkNum) * sys_max(MAX_BUFFER_SIZE, _SHMEM_REDUCE_MIN_WRKDATA_SIZE), pSyncMult + (i % pSyncNum) * _SHMEM_REDUCE_SYNC_SIZE); rc = (!compare_buffer_with_const(target_addr + (i % 2) * MAX_BUFFER_SIZE, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n", my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size); if (rc) { TYPE_VALUE* check_addr = target_addr + (i % 2) * MAX_BUFFER_SIZE; int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value)); int show_index = (odd_index > 1 ? odd_index - 2 : 0); int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1); log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1); log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size); show_buffer(check_addr + show_index, show_size); } fill_buffer((void *)(source_addr + (i % 2) * MAX_BUFFER_SIZE), cur_buf_size, (void *)&source_value, sizeof(source_value)); fill_buffer((void *)(target_addr + (i % 2) * MAX_BUFFER_SIZE ), cur_buf_size, (void *)&value, sizeof(value)); } } shfree(pWrkMult); } else { rc = TC_SETUP_FAIL; } shfree(pSyncMult); } else { rc = TC_SETUP_FAIL; } return rc; }
static int test_item1(void) { int rc = TC_PASS; TYPE_VALUE* shmem_addr = NULL; TYPE_VALUE* local_addr = NULL; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE* expect_value = NULL; int num_proc = 0; int my_proc = 0; int peer_proc = 0; int tst, sst; int max_stride = MAX_ARRAY_SIZE/2-1; int *wait_variable = NULL; wait_variable = shmalloc(sizeof(int)); num_proc = _num_pes(); my_proc = _my_pe(); shmem_addr = shmalloc(sizeof(*shmem_addr)*MAX_ARRAY_SIZE); local_addr = malloc(sizeof(*local_addr)*MAX_ARRAY_SIZE); expect_value = malloc(sizeof(*expect_value)*MAX_ARRAY_SIZE); if (shmem_addr) { INT64_TYPE i = 0; INT64_TYPE j = 0; int num_to_get; my_value = 0; size_t odd_pos = 0; for (i = 0; (i < COUNT_VALUE) && (rc == TC_PASS); i++) { tst = (i < max_stride) ? i+1 : max_stride; sst = tst; num_to_get = MAX_ARRAY_SIZE/tst; /* Set my value */ my_value = (TYPE_VALUE)(my_proc + 1); memset(shmem_addr,0,MAX_ARRAY_SIZE*SIZE_VALUE); memset(expect_value,0,MAX_ARRAY_SIZE*SIZE_VALUE); for (j = 0; j < MAX_ARRAY_SIZE; j++) local_addr[j] = my_value; /* Define peer and it value */ peer_proc = (my_proc + 1) % num_proc; peer_value = (TYPE_VALUE)((my_proc == 0) ? num_proc : my_proc); /* Define expected value */ for (j=0; j<num_to_get; j++) expect_value[j*tst] = peer_value; /* Wait is set instead of barrier to give some time to all PE for setting their values */ shmem_barrier_all(); /* Get value from peer */ FUNC_VALUE(shmem_addr,local_addr,tst,sst,num_to_get,peer_proc); wait_for_completion(wait_variable,peer_proc,&rc); if (rc == TC_PASS) { rc = (compare_longdouble_buffers(shmem_addr, expect_value, MAX_ARRAY_SIZE, &odd_pos) ? TC_PASS : TC_FAIL); } log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld vs got = %lld, odd = %i\n", my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value[odd_pos], (INT64_TYPE)shmem_addr[odd_pos],odd_pos); /* Wait is set instead of barrier to give some time to all PE for setting their values */ shmem_barrier_all(); } } else { rc = TC_SETUP_FAIL; } if (local_addr) { free(local_addr); } if (expect_value) { free(expect_value); } if (shmem_addr) { shfree(shmem_addr); } if (wait_variable) { shfree(wait_variable); } return rc; }
static int test_item3(void) { int rc = TC_PASS; TYPE_VALUE* shmem_addr = NULL; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; int peer_proc = 0; num_proc = _num_pes(); my_proc = _my_pe(); shmem_addr = shmalloc(sizeof(*shmem_addr)); if (shmem_addr) { TYPE_VALUE value = -1; INT64_TYPE i = 0; /* Set my value */ my_value = (-1); *shmem_addr = my_value; for (i = 0; i < COUNT_VALUE; i++) { /* Define peer and it value */ peer_proc = (my_proc + 1) % num_proc; peer_value = (peer_proc % 2 ? 1 : -1) * (i * STEP_VALUE); /* Define expected value */ expect_value = (my_proc % 2 ? 1 : -1) * (i * STEP_VALUE); /* This guarantees that PE set initial value before peer change one */ shmem_barrier_all(); /* Write value to peer */ FUNC_VALUE(shmem_addr, peer_value, peer_proc); /* Get value put by peer: * These routines start the remote transfer and may return before the data * is delivered to the remote PE */ wait_for_put_completion(peer_proc,10 /* wait for 10 secs */); value = *shmem_addr; rc = (sys_fcompare(expect_value, value) ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my(#%d:%Lf) peer(#%d:%Lf) expected = %Lf vs got = %Lf\n", my_proc, (long double)my_value, peer_proc, (long double)peer_value, (long double)expect_value, (long double)value); } } else { rc = TC_SETUP_FAIL; } if (shmem_addr) { shfree(shmem_addr); } return rc; }
static int test_item7(void) { int rc = TC_PASS; static TYPE_VALUE shmem_addr[MAX_BUFFER_SIZE * 2]; static TYPE_VALUE send_addr[MAX_BUFFER_SIZE * 2]; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; int root_proc = 0; long* pSyncMult = NULL; int pSyncNum = 2; num_proc = _num_pes(); my_proc = _my_pe(); pSyncMult = shmalloc(sizeof(*pSyncMult) * pSyncNum * _SHMEM_COLLECT_SYNC_SIZE); if (!pSyncMult) { rc = TC_SETUP_FAIL; } if (rc == TC_PASS) { int i = 0; int j = 0; for ( j = 0; j < pSyncNum * _SHMEM_COLLECT_SYNC_SIZE; j++ ) { pSyncMult[j] = _SHMEM_SYNC_VALUE; } /* Give some time to all PE for setting their values */ shmem_barrier_all(); /* Set root */ root_proc = 0; my_value = DEFAULT_VALUE; peer_value = MAX_VALUE; expect_value = (my_proc == root_proc ? DEFAULT_VALUE : peer_value); fill_buffer((void *)send_addr, MAX_BUFFER_SIZE * 2, (void *)&peer_value, sizeof(peer_value)); fill_buffer((void *)shmem_addr, MAX_BUFFER_SIZE * 2, (void *)&my_value, sizeof(my_value)); shmem_barrier_all(); for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++) { /* Put value to peer */ FUNC_VALUE(shmem_addr + (i % 2) * MAX_BUFFER_SIZE, send_addr + (i % 2) * MAX_BUFFER_SIZE, MAX_BUFFER_SIZE, root_proc, 0, 0, num_proc, pSyncMult + (i % pSyncNum) * _SHMEM_COLLECT_SYNC_SIZE); rc = (!compare_buffer_with_const(shmem_addr + (i % 2) * MAX_BUFFER_SIZE, MAX_BUFFER_SIZE, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my#%d root(#%d:%lld) expected = %lld actual = %lld buffer size = %lld\n", my_proc, root_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)(*shmem_addr), (INT64_TYPE)MAX_BUFFER_SIZE); if (rc) { TYPE_VALUE* check_addr = shmem_addr + (i % 2) * MAX_BUFFER_SIZE; int odd_index = compare_buffer_with_const(check_addr, MAX_BUFFER_SIZE, &expect_value, sizeof(expect_value)); int show_index = (odd_index > 1 ? odd_index - 2 : 0); int show_size = sizeof(*check_addr) * sys_min(3, MAX_BUFFER_SIZE - show_index); log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1); log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size); show_buffer(check_addr + show_index, show_size); } fill_buffer((void *)(send_addr + (i % 2) * MAX_BUFFER_SIZE), MAX_BUFFER_SIZE, (void *)&peer_value, sizeof(peer_value)); fill_buffer((void *)(shmem_addr + (i % 2) * MAX_BUFFER_SIZE ), MAX_BUFFER_SIZE, (void *)&my_value, sizeof(my_value)); } } if (pSyncMult) { shfree(pSyncMult); } return rc; }
int main(int argc, char **argv) { int me, npes; struct timeval now; long t_start, t_end; start_pes(0); me = _my_pe(); npes = _num_pes(); if (npes < 4) { if (me==0) fprintf(stderr,"ERR: test requires 4 or more PEs\n"); return 1; } shmem_barrier_all(); gettimeofday(&now, NULL); t_start = (now.tv_sec * 1000000.0) + now.tv_usec; switch (me) { case 0: while (pe_escape) { double pi, pi2, pi3; int j; for (j=1; j <= 5000; j++) { pi = (22.0 / 7.0) + (double) j; pi2 = pi * (double) j; pi3 = (pi2 * pi) / 1.2; } mb(); } gettimeofday(&now, NULL); t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; break; case 1: shmem_int_inc(&A, 0); gettimeofday(&now, NULL); t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; break; case 2: while (1 != shmem_int_g(&A, 0)) { ; } shmem_int_inc(&A, 0); gettimeofday(&now, NULL); t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; break; case 3: while (2 != shmem_int_g(&A, 0)) { ; } shmem_int_p((int*) &pe_escape, 0, 0); // release PE0. if (npes > 4) { int i; for(i=4; i < npes; i++) shmem_int_p((int*)&pe_escape, 0, i); // release PE0. } gettimeofday(&now, NULL); t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; break; default: /* spin until released, A will never == 99, generate PE-0 traffic */ while (99 != shmem_int_g(&A, 0) && pe_escape) { mb(); } gettimeofday(&now, NULL); t_end = ((now.tv_sec * 1000000.0) + now.tv_usec) - t_start; break; } if (me < 4) fprintf(stderr,"[%d] elapsed usecs %ld A %d\n",me,t_end,A); shmem_barrier_all(); return 0; }
static int test_item4(void) { int rc = TC_PASS; TYPE_VALUE* shmem_addr = NULL; TYPE_VALUE* send_addr = NULL; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; int root_proc = 0; num_proc = _num_pes(); my_proc = _my_pe(); shmem_addr = shmalloc(sizeof(*shmem_addr)); send_addr = shmalloc(sizeof(*send_addr)); if (shmem_addr && send_addr) { TYPE_VALUE value = DEFAULT_VALUE; int j = 0; /* Set my value */ my_value = DEFAULT_VALUE; *shmem_addr = my_value; /* Define peer and it value */ peer_value = BASE_VALUE; *send_addr = peer_value; /* Set root */ root_proc = 0; /* Define expected value */ expect_value = (((my_proc % 2) == 0) && (my_proc != 0) ? BASE_VALUE : DEFAULT_VALUE); /* This guarantees that PE set initial value before peer change one */ for ( j = 0; j < _SHMEM_COLLECT_SYNC_SIZE; j++ ) { pSync[j] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); /* Put value to peer */ if ((my_proc % 2) == 0) { FUNC_VALUE(shmem_addr, send_addr, 1, root_proc, 0, 1, ((num_proc / 2) + (num_proc % 2)), pSync); } /* Get value put by peer: * These routines start the remote transfer and may return before the data * is delivered to the remote PE */ shmem_barrier_all(); { int wait = WAIT_COUNT; while (wait--) { value = *shmem_addr; if (expect_value == value) break; sleep(1); } } rc = (expect_value == value ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my#%d root(#%d:%lld) expected = %lld actual = %lld\n", my_proc, root_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)value); } else { rc = TC_SETUP_FAIL; } if (send_addr) { shfree(send_addr); } if (shmem_addr) { shfree(shmem_addr); } return rc; }
static int test_item6(void) { int rc = TC_PASS; static TYPE_VALUE shmem_addr[MAX_BUFFER_SIZE]; static TYPE_VALUE send_addr[MAX_BUFFER_SIZE]; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; int root_proc = 0; num_proc = _num_pes(); my_proc = _my_pe(); { TYPE_VALUE value = DEFAULT_VALUE; int i = 0; int j = 0; long cur_buf_size = 0; for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++) { cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count); /* Set my value */ my_value = DEFAULT_VALUE; fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value)); /* Give some time to all PE for setting their values */ shmem_barrier_all(); /* Define peer and it value */ peer_value = (i * (MAX_VALUE / __cycle_count)); fill_buffer((void *)send_addr, cur_buf_size, (void *)&peer_value, sizeof(peer_value)); /* Set root */ root_proc = 0; /* Define expected value */ expect_value = (((my_proc % 2) == 0) && (my_proc != root_proc) ? peer_value : DEFAULT_VALUE); /* This guarantees that PE set initial value before peer change one */ for ( j = 0; j < _SHMEM_COLLECT_SYNC_SIZE; j++ ) { pSync[j] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); /* Put value to peer */ if ((my_proc % 2) == 0) { FUNC_VALUE(shmem_addr, send_addr, cur_buf_size, root_proc, 0, 1, ((num_proc / 2) + (num_proc % 2)), pSync); } /* Get value put by peer: * These routines start the remote transfer and may return before the data * is delivered to the remote PE */ shmem_barrier_all(); { int wait = WAIT_COUNT; while (wait--) { value = *shmem_addr; if (expect_value == value) break; sleep(1); } } rc = (!compare_buffer_with_const(shmem_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my#%d root(#%d:%lld) expected = %lld actual = %lld buffer size = %lld\n", my_proc, root_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size); if (rc) { TYPE_VALUE* check_addr = shmem_addr; int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value)); int show_index = (odd_index > 1 ? odd_index - 2 : 0); int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - show_index); log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1); log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size); show_buffer(check_addr + show_index, show_size); } } } return rc; }
static int test_item5(void) { int rc = TC_PASS; static TYPE_VALUE shmem_addr[MAX_BUFFER_SIZE]; static TYPE_VALUE recv_addr[MAX_BUFFER_SIZE]; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; int peer_proc = 0; num_proc = _num_pes(); my_proc = _my_pe(); { INT64_TYPE i = 0; long cur_buf_size = 0; my_value = 0; for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++) { /* Set my value */ my_value = (my_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count)); cur_buf_size = sys_max(1, (i + 1) * MAX_BUFFER_SIZE / __cycle_count); fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value)); /* Give some time to all PE for setting their values */ shmem_barrier_all(); /* Define peer and it value */ peer_proc = (my_proc + 1) % num_proc; peer_value = (peer_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count)); /* Define expected value */ expect_value = peer_value; /* Get value from peer */ FUNC_VALUE(recv_addr, shmem_addr, cur_buf_size, peer_proc); rc = (!compare_buffer_with_const(recv_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld buffer size = %lld\n", my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)cur_buf_size); if (rc) { TYPE_VALUE* check_addr = recv_addr; int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value)); int show_index = (odd_index > 1 ? odd_index - 2 : 0); int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1); log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1); log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size); show_buffer(check_addr + show_index, show_size); } shmem_barrier_all(); } } return rc; }
static int test_item3(void) { int rc = TC_PASS; TYPE_VALUE* shmem_addr = NULL; TYPE_VALUE* recv_addr = NULL; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE expect_value = 0; int my_proc = 0; int peer_proc = 0; my_proc = _my_pe(); shmem_addr = (TYPE_VALUE*)shmalloc(sizeof(*shmem_addr) * __max_buffer_size); recv_addr = (TYPE_VALUE*)sys_malloc(sizeof(*recv_addr) * __max_buffer_size); if (shmem_addr && recv_addr) { INT64_TYPE i = 0; long cur_buf_size = 0; my_value = 0; for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++) { /* Set my value */ my_value = (my_proc % 2 ? 1 : -1) * (i * (MAX_VALUE / __cycle_count)); cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count); fill_buffer((void *)shmem_addr, cur_buf_size, (void *)&my_value, sizeof(my_value)); /* Give some time to all PE for setting their values */ shmem_barrier_all(); /* Define peer and it value */ peer_proc = my_proc; peer_value = my_value; /* Define expected value */ expect_value = peer_value; /* Get value from peer */ FUNC_VALUE(recv_addr, shmem_addr, cur_buf_size, peer_proc); rc = (!compare_buffer_with_const(recv_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my(#%d:%Lf) peer(#%d:%Lf) expected = %Lf buffer size = %lld\n", my_proc, (long double)my_value, peer_proc, (long double)peer_value, (long double)expect_value, (INT64_TYPE)cur_buf_size); if (rc) { TYPE_VALUE* check_addr = recv_addr; int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value)); int show_index = (odd_index > 1 ? odd_index - 2 : 0); int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1); log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1); log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size); show_buffer(check_addr + show_index, show_size); } shmem_barrier_all(); } } else { rc = TC_SETUP_FAIL; } if (recv_addr) { sys_free(recv_addr); } if (shmem_addr) { shfree(shmem_addr); } return rc; }
/* Performance test for shmem_XX_get (latency and bandwidth) */ #include <stdio.h> #include <stdlib.h> #include <time.h> #include <sys/time.h> #include <shmem.h> long double time_taken; long pSync[_SHMEM_REDUCE_SYNC_SIZE]; long double pWrk[_SHMEM_REDUCE_MIN_WRKDATA_SIZE]; //#define N_ELEMENTS 25600/*Data size chosen to be able to capture time required*/ int main(void) { int i,j,k; int *target; int *source; int me, npes; int nxtpe; struct timeval start, end; long double start_time,end_time; int N_ELEMENTS = (4194304*2)/sizeof(int); start_pes(0); me = _my_pe(); npes = _num_pes(); for (i = 0; i < SHMEM_BCAST_SYNC_SIZE; i += 1) { pSync[i] = _SHMEM_SYNC_VALUE; } nxtpe = (me+1)%npes; source = (int *) shmalloc( N_ELEMENTS * sizeof(*source) ); target = (int *) shmalloc( N_ELEMENTS * sizeof(*target) ); if(me == 0) printf("Get Performance test results:\nSize (Bytes)\t\tTime (Microseconds)\t\tBandwidth (Bytes/Second)\n"); for (i = 0; i < N_ELEMENTS; i += 1) { source[i] = i + 1; target[i] = -90; } shmem_barrier_all(); /*For int put we take average of all the times realized by a pair of PEs, thus * reducing effects of physical location of PEs*/ for (i=1;i<=N_ELEMENTS;i=i*2) { time_taken = 0; for(j=0;j<10000;j++){ gettimeofday(&start, NULL); start_time = (start.tv_sec * 1000000.0) + start.tv_usec; shmem_int_get(target, source, i,nxtpe); gettimeofday(&end, NULL); end_time = (end.tv_sec * 1000000.0) + end.tv_usec; time_taken = time_taken + (end_time - start_time); } shmem_longdouble_sum_to_all(&time_taken, &time_taken,1, 0, 0, npes, pWrk, pSync); if(me == 0){ time_taken = time_taken/(npes*10000); /*Average time across all PEs for one put*/ if (i*sizeof(i) < 1048576) printf("%ld \t\t\t\t %ld\t\t\t\t %ld\n",i*sizeof(i),time_taken,(i*sizeof(i))/(time_taken*1000000.0)); else printf("%ld \t\t\t %ld\t\t\t\t %ld\n",i*sizeof(i),time_taken,(i*sizeof(i))/(time_taken*1000000.0)); } } shmem_barrier_all(); shfree(target); shfree(source); return 0; }
static int test_item4(void) { int rc = TC_PASS; TYPE_VALUE* target_addr = NULL; TYPE_VALUE* source_addr = NULL; TYPE_VALUE source_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; num_proc = _num_pes(); my_proc = _my_pe(); pWrk = shmalloc(sizeof(*pWrk) * sys_max(1/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE)); if (pWrk) { source_addr = shmalloc(sizeof(*source_addr)); target_addr = source_addr; } if (target_addr && source_addr) { TYPE_VALUE value = DEFAULT_VALUE; int j = 0; /* Set my value */ source_value = ( my_proc < OVERFLOW_FACTORIAL_LIMIT ? (TYPE_VALUE)(my_proc + 1) : 1); *source_addr = source_value; /* Define expected value */ expect_value = 1; { int k = ( num_proc <= OVERFLOW_FACTORIAL_LIMIT ? num_proc : OVERFLOW_FACTORIAL_LIMIT); while (k) expect_value *= k--; } /* This guarantees that PE set initial value before peer change one */ for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ ) { pSync[j] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); /* Put value to peer */ FUNC_VALUE(target_addr, source_addr, 1, 0, 0, num_proc, pWrk, pSync); /* Get value put by peer: * These routines start the remote transfer and may return before the data * is delivered to the remote PE */ shmem_barrier_all(); { int total_wait = 0; while (*target_addr == DEFAULT_VALUE && total_wait < 1000 * WAIT_COUNT) { total_wait++; usleep(1); } value = *target_addr; } rc = (expect_value == value ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld\n", my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value); } else { rc = TC_SETUP_FAIL; } if (source_addr) { shfree(source_addr); } if (pWrk) { shfree(pWrk); pWrk = NULL; } return rc; }
int main (int argc, char *argv[]) { int myid, numprocs, i; double h, sum, x; struct timeval startwtime, endwtime; start_pes (0); numprocs = _num_pes (); myid = _my_pe (); if (myid == 0) { if (argc > 1) n = atoi (argv[1]); /* # rectangles on command line */ else n = 10000; /* default # of rectangles */ gettimeofday (&startwtime, NULL); } /* initialize sync array */ for (i = 0; i < _SHMEM_BCAST_SYNC_SIZE; i += 1) pSync[i] = _SHMEM_SYNC_VALUE; shmem_barrier_all (); /* send "n" out to everyone */ shmem_broadcast32 (&n, &n, 1, 0, 0, 0, numprocs, pSync); /* do partial computation */ h = 1.0 / (double) n; sum = 0.0; /* A slightly better approach starts from large i and works back */ for (i = myid + 1; i <= n; i += numprocs) { x = h * ((double) i - 0.5); sum += f (x); } mypi = h * sum; /* wait for everyone to finish */ shmem_barrier_all (); /* add up partial pi computations into PI */ shmem_double_sum_to_all (&pi, &mypi, 1, 0, 0, numprocs, pWrk, pSync); /* "master" PE summarizes */ if (myid == 0) { double elapsed; gettimeofday (&endwtime, NULL); elapsed = (endwtime.tv_sec - startwtime.tv_sec) * 1000.0; /* sec to ms */ elapsed += (endwtime.tv_usec - startwtime.tv_usec) / 1000.0; /* us to ms */ printf ("pi is approximately %.16f, Error is %.16f\n", pi, fabs (pi - PI25DT)); printf ("run time = %f ms\n", elapsed); fflush (stdout); } return 0; }
static int test_item7(void) { int rc = TC_PASS; TYPE_VALUE* target_addr = NULL; TYPE_VALUE* source_addr = NULL; TYPE_VALUE source_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; num_proc = _num_pes(); my_proc = _my_pe(); target_addr = (TYPE_VALUE*)shmalloc(sizeof(*target_addr) * __max_buffer_size); source_addr = (TYPE_VALUE*)shmalloc(sizeof(*source_addr) * __max_buffer_size); if (target_addr && source_addr) { TYPE_VALUE value = DEFAULT_VALUE; int i = 0; int j = 0; long cur_buf_size = 0; for (i = 0; (i < __cycle_count) && (rc == TC_PASS); i++) { cur_buf_size = sys_max(1, (i + 1) * __max_buffer_size / __cycle_count); pWrk = shmalloc(sizeof(*pWrk) * sys_max(cur_buf_size/2 + 1, _SHMEM_REDUCE_MIN_WRKDATA_SIZE)); if (pWrk) { /* Set initial target value */ value = DEFAULT_VALUE; fill_buffer((void *)target_addr, cur_buf_size, (void *)&value, sizeof(value)); /* Give some time to all PE for setting their values */ shmem_barrier_all(); /* Set my value */ source_value = ( my_proc < OVERFLOW_FACTORIAL_LIMIT ? (TYPE_VALUE)(my_proc + 1) : 1); fill_buffer((void *)source_addr, cur_buf_size, (void *)&source_value, sizeof(source_value)); /* Define expected value */ expect_value = 1; if (my_proc % 2) expect_value = DEFAULT_VALUE; else { int k = ( num_proc <= OVERFLOW_FACTORIAL_LIMIT ? num_proc : OVERFLOW_FACTORIAL_LIMIT); while (k) { if (k % 2) expect_value *= k; k--; } } /* This guarantees that PE set initial value before peer change one */ for ( j = 0; j < _SHMEM_REDUCE_SYNC_SIZE; j++ ) { pSync[j] = _SHMEM_SYNC_VALUE; } shmem_barrier_all(); /* Put value to peer */ FUNC_VALUE(target_addr, source_addr, cur_buf_size, 0, 1, ((num_proc / 2) + (num_proc % 2)), pWrk, pSync); /* Get value put by peer: * These routines start the remote transfer and may return before the data * is delivered to the remote PE */ shmem_barrier_all(); { int wait = WAIT_COUNT; while (wait--) { value = *target_addr; if (expect_value == value) break; sleep(1); } } rc = (!compare_buffer_with_const(target_addr, cur_buf_size, &expect_value, sizeof(expect_value)) ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my#%d source = %lld expected = %lld actual = %lld buffer size = %lld\n", my_proc, (INT64_TYPE)source_value, (INT64_TYPE)expect_value, (INT64_TYPE)value, (INT64_TYPE)cur_buf_size); if (rc) { TYPE_VALUE* check_addr = target_addr; int odd_index = compare_buffer_with_const(check_addr, cur_buf_size, &expect_value, sizeof(expect_value)); int show_index = (odd_index > 1 ? odd_index - 2 : 0); int show_size = sizeof(*check_addr) * sys_min(3, cur_buf_size - odd_index - 1); log_debug(OSH_TC, "index of incorrect value: 0x%08X (%d)\n", odd_index - 1, odd_index - 1); log_debug(OSH_TC, "buffer interval: 0x%08X - 0x%08X\n", show_index, show_index + show_size); show_buffer(check_addr + show_index, show_size); } shfree(pWrk); } else { rc = TC_SETUP_FAIL; } } } else { rc = TC_SETUP_FAIL; } if (source_addr) { shfree(source_addr); } if (target_addr) { shfree(target_addr); } return rc; }
static int test_item3(void) { int rc = TC_PASS; TYPE_VALUE* shmem_addr = NULL; TYPE_VALUE my_value = 0; TYPE_VALUE peer_value = 0; TYPE_VALUE expect_value = 0; int num_proc = 0; int my_proc = 0; int peer_proc = 0; num_proc = _num_pes(); my_proc = _my_pe(); shmem_addr = shmalloc(sizeof(*shmem_addr)); if (shmem_addr) { TYPE_VALUE value = -1; INT64_TYPE i = 0; my_value = 0; for (i = 0; i < COUNT_VALUE; i++) { /* Set my value */ my_value = (my_proc % 2 ? 1 : -1) * (i * STEP_VALUE); *shmem_addr = my_value; /* Wait is set instead of barrier to give some time to all PE for setting their values */ shmem_barrier_all(); /* Define peer and it value */ peer_proc = (my_proc + 1) % num_proc; peer_value = (peer_proc % 2 ? 1 : -1) * (i * STEP_VALUE); /* Define expected value */ expect_value = peer_value; /* Get value from peer */ value = FUNC_VALUE(shmem_addr, peer_proc); rc = (expect_value == value ? TC_PASS : TC_FAIL); log_debug(OSH_TC, "my(#%d:%lld) peer(#%d:%lld) expected = %lld vs got = %lld\n", my_proc, (INT64_TYPE)my_value, peer_proc, (INT64_TYPE)peer_value, (INT64_TYPE)expect_value, (INT64_TYPE)value); /* Wait is set instead of barrier to give some time to all PE for setting their values */ shmem_barrier_all(); } } else { rc = TC_SETUP_FAIL; } if (shmem_addr) { shfree(shmem_addr); } return rc; }