int main() { upcio_file_t *fd; upc_off_t ret_size; uint32_t size, nmemb; char *buffer; int i, err; int flag; void *dummy; upc_flag_t sync_mode = 0; char fname[] = "/mnt/plfs/upcio.test"; if(!MYTHREAD) printf("upcio test: test fread_local_async with %d Threads\n", THREADS); nmemb = 1; size = 10; buffer = (char *)malloc(sizeof(char)*size*nmemb); fd=upc_all_fopen( fname, UPC_INDIVIDUAL_FP|UPC_WRONLY, 0666); upc_barrier; if(fd==NULL) { printf("TH%2d: File open Error\n",MYTHREAD); upc_global_exit(-1); } upc_barrier; upc_all_fseek(fd, 10*MYTHREAD, UPC_SEEK_SET); /* Initialize the buffer, then write */ for(i=0; i<size; i++) buffer[i]= MYTHREAD + 48; upc_all_fwrite_local_async(fd, (void *)buffer, size, nmemb, &ret_size, sync_mode); err = upc_all_fwait_async(fd); if( err == -1 ) printf("TH%2d: fwait Error\n",MYTHREAD); else printf("TH%2d: fwait returns %d\n",MYTHREAD, err); if(upc_all_fclose(fd)!=0) { printf("TH%2d: File close Error\n",MYTHREAD); upc_global_exit(-1); } if(!MYTHREAD) printf("upcio test: Done with fread_local_async testing\n"); free((void *)buffer); return 0; }
void impl_abort(int err) { #if defined(__UPC__) upc_global_exit(err); #elif defined(_OPENMP) exit(err); #elif defined(_SHMEM) exit(err); #else exit(err); #endif }
void die(char *fmt, ...){ va_list argp; char buffer[1024]; va_start(argp, fmt); vsnprintf(buffer, 1024, fmt, argp); va_end(argp); fprintf(stderr, "FATAL ERROR %s\n", buffer); upc_global_exit(EXIT_FAILURE); }
upcio_file_t *uopen(char *fname, int ronly) { upcio_file_t *fd = NULL; int flags = 0; if (ronly) flags = UPC_INDIVIDUAL_FP|UPC_RDONLY; else flags = UPC_INDIVIDUAL_FP|UPC_WRONLY|UPC_CREATE; fd = upc_all_fopen( fname, flags, 0666); upc_barrier; if(fd==NULL) { printf("TH%2d: File open Error\n",MYTHREAD); upc_global_exit(-1); } return fd; }
int main() { upcio_file_t *fd; char *buffer; upc_off_t ret_size; uint32_t size, i; int err; struct upc_io_local_memvec memvec[2]; struct upc_io_filevec filevec[2]; int flag; void *dummy; upc_flag_t sync_mode = 0; char fname[] = "/mnt/plfs/upcio.test"; if(!MYTHREAD) printf("upcio test: test fwrite_list_local_async with %d Threads\n", THREADS); size = 10; buffer = (char *)malloc(sizeof(char)*size); memvec[0].baseaddr = &buffer[0]; memvec[0].len = 4; memvec[1].baseaddr = &buffer[6]; memvec[1].len = 3; filevec[0].offset = 4*MYTHREAD; filevec[0].len = 3; filevec[1].offset = 8+4*MYTHREAD; filevec[1].len = 4; for(i=0; i<size; i++) buffer[i] = 'z'; fd=upc_all_fopen( fname, UPC_INDIVIDUAL_FP|UPC_WRONLY, 0666); upc_barrier; if(fd==NULL) { printf("TH%2d: File open Error\n",MYTHREAD); upc_global_exit(-1); } upc_barrier; upc_all_fwrite_list_local_async(fd, 2, (struct upc_io_local_memvec const *)&memvec, 2, (struct upc_io_filevec const *)&filevec, &ret_size, sync_mode); dummy = NULL; if( upc_all_fcntl(fd, UPC_ASYNC_OUTSTANDING, dummy) ) printf("TH%2d has an outstanding ASYNC OP\n",MYTHREAD); else printf("TH%2d does not has outstanding ASYNC OPs\n",MYTHREAD); upc_barrier; err = upc_all_ftest_async(fd, &flag); if( err == -1 ) printf("TH%2d: ftest Error\n",MYTHREAD); else { if( flag ) { printf("TH%2d: Async op done\n",MYTHREAD); printf("TH%2d: Async return %d\n",MYTHREAD, err); } else printf("TH%2d: Async pending\n",MYTHREAD); } err = upc_all_fwait_async(fd); if( err == -1 ) printf("TH%2d: fwait Error\n",MYTHREAD); else printf("TH%2d: fwait returns %d\n",MYTHREAD, err); upc_barrier; if( upc_all_fcntl(fd, UPC_ASYNC_OUTSTANDING, dummy) ) printf("TH%2d has an outstanding ASYNC OP\n",MYTHREAD); else printf("TH%2d does not has outstanding ASYNC OPs\n",MYTHREAD); upc_barrier; if(upc_all_fclose(fd)!=0) { printf("TH%2d: File close Error\n",MYTHREAD); upc_global_exit(-1); } if(!MYTHREAD) printf("upcio test: Done with fwrite_list_local_async testing\n"); free((void *)buffer); return 0; }
int main(int argc, char **argv) { upcio_file_t *fd; upc_off_t ret_size, size; uint32_t nmemb; upc_flag_t sync = 0; char *buffer; uint32_t i; int tid = 0; char fname[] = "/mnt/plfs/upcio.test"; if(!MYTHREAD) printf("upcio test: test fread_local with %d Threads\n", THREADS); if (argc < 2) { printf("usage: ./test_read_trans_local transaction_id\n"); exit(1); } tid = atoi(argv[1]); nmemb = 1; size = 10*(MYTHREAD+1); buffer = (char *)malloc(sizeof(char)*(size+1)*nmemb); buffer[size] = '\0'; fd=upc_all_fopen_trans( fname, UPC_INDIVIDUAL_FP|UPC_RDONLY|UPC_CREATE, 0666, tid); upc_barrier; if(fd==NULL) { printf("TH%2d: File open Error\n",MYTHREAD); upc_global_exit(-1); } ret_size = upc_all_fread_local(fd, (void *)buffer, size, nmemb, sync); if( ret_size == -1 ) printf("upcio test: fread_local error on TH%2d\n",MYTHREAD); else { for(i=0; i<THREADS; i++) { if(MYTHREAD==i) printf("upcio test: read \"%s\" on TH%2d\n",buffer,MYTHREAD); upc_barrier; } } if(upc_all_fclose(fd)!=0) { printf("TH%2d: File close Error\n",MYTHREAD); upc_global_exit(-1); } if(!MYTHREAD) printf("upcio test: Done with fread_local testing\n"); free((void *)buffer); return 0; }
int main(int argc, char ** argv) { int N; int tile_size=32; /* default tile size for tiling of local transpose */ int num_iterations;/* number of times to do the transpose */ int tiling; /* boolean: true if tiling is used */ double total_bytes; /* combined size of matrices */ double start_time, /* timing parameters */ end_time, avgtime; /********************************************************************* ** read and test input parameters *********************************************************************/ if(argc != 3 && argc != 4){ if(MYTHREAD == 0) printf("Usage: %s <# iterations> <matrix order> [tile size]\n", *argv); upc_global_exit(EXIT_FAILURE); } num_iterations = atoi(*++argv); if(num_iterations < 1){ if(MYTHREAD == 0) printf("ERROR: iterations must be >= 1 : %d \n", num_iterations); upc_global_exit(EXIT_FAILURE); } N = atoi(*++argv); if(N < 0){ if(MYTHREAD == 0) printf("ERROR: Matrix Order must be greater than 0 : %d \n", N); upc_global_exit(EXIT_FAILURE); } if (argc == 4) tile_size = atoi(*++argv); /*a non-positive tile size means no tiling of the local transpose */ tiling = (tile_size > 0) && (tile_size < N); if(!tiling) tile_size = N; int sizex = N / THREADS; if(N % THREADS != 0) { if(MYTHREAD == 0) printf("N %% THREADS != 0\n"); upc_global_exit(EXIT_FAILURE); } int sizey = N; if(MYTHREAD == 0) { printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("UPC matrix transpose: B = A^T\n"); printf("Number of threads = %d\n", THREADS); printf("Matrix order = %d\n", N); printf("Number of iterations = %d\n", num_iterations); if (tiling) printf("Tile size = %d\n", tile_size); else printf("Untiled\n"); } /********************************************************************* ** Allocate memory for input and output matrices *********************************************************************/ total_bytes = 2.0 * sizeof(double) * N * N; int myoffsetx = MYTHREAD * sizex; int myoffsety = 0; upc_barrier; debug("Allocating arrays (%d, %d), offset (%d, %d)", sizex, sizey, myoffsetx, myoffsety); local_shared_block_ptrs in_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety); local_shared_block_ptrs out_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety); local_shared_block_ptrs buf_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety); in_arrays[MYTHREAD] = in_array; out_arrays[MYTHREAD] = out_array; buf_arrays[MYTHREAD] = buf_array; double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety); double **out_array_private = shared_2d_array_to_private(out_array, sizex, sizey, myoffsetx, myoffsety); double **buf_array_private = shared_2d_array_to_private(buf_array, sizex, sizey, myoffsetx, myoffsety); upc_barrier; /********************************************************************* ** Initialize the matrices *********************************************************************/ for(int y=myoffsety; y<myoffsety + sizey; y++){ for(int x=myoffsetx; x<myoffsetx + sizex; x++){ in_array_private[y][x] = (double) (x+N*y); out_array[y][x] = -1.0; } } upc_barrier; for(int y=myoffsety; y<myoffsety + sizey; y++){ for(int x=myoffsetx; x<myoffsetx + sizex; x++){ if(in_array_private[y][x] !=(double) (x+N*y)) die("x=%d y=%d in_array=%f != %f", x, y, in_array[y][x], (x+N*y)); if(out_array_private[y][x] != -1.0) die("out_array_private error"); } } /********************************************************************* ** Transpose *********************************************************************/ int transfer_size = sizex * sizex * sizeof(double); if(MYTHREAD == 0) debug("transfer size = %d", transfer_size); for(int iter=0; iter<=num_iterations; iter++){ /* start timer after a warmup iteration */ if(iter == 1){ upc_barrier; start_time = wtime(); } for(int i=0; i<THREADS; i++){ int local_blk_id = (MYTHREAD + i) % THREADS; int remote_blk_id = MYTHREAD; int remote_thread = local_blk_id; upc_memget(&buf_array_private[local_blk_id * sizex][myoffsetx], &in_arrays[remote_thread][remote_blk_id * sizex][remote_thread * sizex], transfer_size); #define OUT_ARRAY(x,y) out_array_private[local_blk_id * sizex + x][myoffsetx + y] #define BUF_ARRAY(x,y) buf_array_private[local_blk_id * sizex + x][myoffsetx + y] if(!tiling){ for(int x=0; x<sizex; x++){ for(int y=0; y<sizex; y++){ OUT_ARRAY(x,y) = BUF_ARRAY(y,x); } } } else{ for(int x=0; x<sizex; x+=tile_size){ for(int y=0; y<sizex; y+=tile_size){ for(int bx=x; bx<MIN(sizex, x+tile_size); bx++){ for(int by=y; by<MIN(sizex, y+tile_size); by++){ OUT_ARRAY(bx,by) = BUF_ARRAY(by,bx); } } } } } } upc_barrier; } upc_barrier; end_time = wtime(); /********************************************************************* ** Analyze and output results. *********************************************************************/ for(int y=myoffsety; y<myoffsety + sizey; y++){ for(int x=myoffsetx; x<myoffsetx + sizex; x++){ if(in_array_private[y][x] != (double)(x+ N*y)) die("Error in input: x=%d y=%d", x, y); if(out_array_private[y][x] != (double)(y + N*x)) die("x=%d y=%d in_array=%f != %f %d %d", x, y, out_array[y][x], (double)(y + N*x), (int)(out_array[y][x]) % N, (int)(out_array[y][x]) / N); } } if(MYTHREAD == 0){ printf("Solution validates\n"); double transfer_size = 2 * N * N * sizeof(double); avgtime = (end_time - start_time) / num_iterations; double rate = transfer_size / avgtime * 1.0E-06; printf("Rate (MB/s): %lf Avg time (s): %lf\n",rate, avgtime); } }
int main(int argc, char ** argv) { long m, n; /* grid dimensions */ int i, j, iter; /* dummies */ int iterations; /* number of times to run the pipeline algorithm */ double pipeline_time, /* timing parameters */ avgtime, max_time; double epsilon = 1.e-8; /* error tolerance */ double corner_val; /* verification value at top right corner of grid */ double *vector;/* array holding grid values */ long total_length; /* total required length to store grid values */ /******************************************************************************* ** process and test input parameters ********************************************************************************/ if(MYTHREAD == THREADS-1){ printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("UPC pipeline execution on 2D grid\n"); } if (argc != 4){ if(MYTHREAD == THREADS-1){ printf("Usage: %s <# iterations> <first array dimension> ", *argv); printf("<second array dimension>\n"); } upc_global_exit(EXIT_FAILURE); } iterations = atoi(*++argv); if (iterations < 1){ if(MYTHREAD == THREADS-1) printf("ERROR: iterations must be >= 1 : %d \n",iterations); upc_global_exit(EXIT_FAILURE); } m = atol(*++argv); n = atol(*++argv); if (m < 1 || n < 1){ if(MYTHREAD == THREADS-1) printf("ERROR: grid dimensions must be positive: %d, %d \n", m, n); upc_global_exit(EXIT_FAILURE); } if(MYTHREAD == THREADS-1){ printf("Number of threads = %d\n", THREADS); printf("Grid sizes = %ld, %ld\n", m, n); printf("Number of iterations = %d\n", iterations); #if USE_BUPC_EXT printf("Using Berkeley UPC extensions\n"); #endif } /********************************************************************* ** Allocate memory for input and output matrices *********************************************************************/ #if USE_BUPC_EXT bupc_sem_t *myflag = bupc_sem_alloc(BUPC_SEM_INTEGER | BUPC_SEM_MPRODUCER); upc_barrier; allflags[MYTHREAD] = myflag; upc_barrier; bupc_sem_t *mypeer = allflags[(MYTHREAD+1) % THREADS]; #endif long segment_size = m / THREADS; int leftover = m % THREADS; int myoffsetx, sizex; if(MYTHREAD < leftover){ myoffsetx = (segment_size + 1) * MYTHREAD; sizex = segment_size + 1; }else{ myoffsetx = (segment_size + 1) * leftover + segment_size * (MYTHREAD - leftover); sizex = segment_size; } #if USE_BUPC_EXT if(MYTHREAD != 0){ myoffsetx -= 1; sizex += 1; } #endif int sizey = n; int myoffsety = 0; upc_barrier; debug("Allocating arrays (%d, %d), offset (%d, %d)", sizex, sizey, myoffsetx, myoffsety); local_shared_block_ptrs in_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety); in_arrays[MYTHREAD] = in_array; double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety); if(MYTHREAD == 0) current_max_line[MYTHREAD] = sizey; else current_max_line[MYTHREAD] = 0; upc_barrier; /********************************************************************* ** Initialize the matrices *********************************************************************/ /* clear the array */ for (j=0; j<n; j++) for (i=myoffsetx; i<myoffsetx + sizex; i++) ARRAY(i, j) = 0.0; /* set boundary values (bottom and left side of grid */ if(MYTHREAD == 0) for (j=0; j<n; j++) ARRAY(0, j) = (double) j; for (i=myoffsetx; i<myoffsetx + sizex; i++) ARRAY(i, 0) = (double) i; upc_barrier; for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) pipeline_time = wtime(); if(MYTHREAD == 0) debug("start it %d, %f", iter, ARRAY(0, 0)); if(MYTHREAD != THREADS - 1) // Send the element in line 0 in_arrays[MYTHREAD + 1][0][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, 0); for (j=1; j<n; j++) { #if USE_BUPC_EXT if(MYTHREAD > 0){ bupc_sem_wait(myflag); } for (i=myoffsetx+1; i<myoffsetx + sizex; i++) ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1); if(MYTHREAD != THREADS - 1){ in_arrays[MYTHREAD + 1][j][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, j); bupc_sem_post(mypeer); } #else while(j > current_max_line[MYTHREAD]) // Normally not necessary: bupc_poll(); ; if(MYTHREAD > 0) ARRAY(myoffsetx, j) = in_arrays[MYTHREAD - 1][j][myoffsetx-1] + ARRAY(myoffsetx, j-1) - in_arrays[MYTHREAD-1][j-1][myoffsetx-1]; for (i=myoffsetx+1; i<myoffsetx + sizex; i++) ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1); if(MYTHREAD < THREADS - 1) current_max_line[MYTHREAD+1] = j; #endif } /* copy top right corner value to bottom left corner to create dependency; we need a barrier to make sure the latest value is used. This also guarantees that the flags for the next iteration (if any) are not getting clobbered */ if(MYTHREAD == 0) current_max_line[MYTHREAD] = sizey; else current_max_line[MYTHREAD] = 0; if(MYTHREAD == THREADS - 1){ in_arrays[0][0][0] = -ARRAY(m-1, n-1); } upc_barrier; } pipeline_time = wtime() - pipeline_time; times[MYTHREAD] = pipeline_time; upc_barrier; // Compute max_time if(MYTHREAD == THREADS - 1){ max_time = times[MYTHREAD]; for(i=1; i<THREADS; i++){ if(max_time < times[i]) max_time = times[i]; } } /******************************************************************************* ** Analyze and output results. ********************************************************************************/ /* verify correctness, using top right value; */ if( MYTHREAD == THREADS - 1){ corner_val = (double)((iterations+1)*(n+m-2)); if (fabs(ARRAY(m-1,n-1)-corner_val)/corner_val > epsilon) { printf("ERROR: checksum %lf does not match verification value %lf\n", ARRAY(m-1, n-1), corner_val); exit(EXIT_FAILURE); } #if VERBOSE printf("checksum %lf verification value %lf\n", ARRAY(m-1, n-1), corner_val); printf("Solution validates; verification value = %lf\n", corner_val); #else printf("Solution validates\n"); #endif avgtime = max_time/iterations; printf("Rate (MFlops/s): %lf Avg time (s): %lf\n", 1.0E-06 * 2 * ((double)(m-1)*(double)(n-1))/avgtime, avgtime); exit(EXIT_SUCCESS); } }
int main(int argc, char **argv) { int i, j, ntimes, err, flag, strl; double stim, read_tim, write_tim; double min_read_tim, min_write_tim, read_bw, write_bw; upcio_file_t *fh; upc_flag_t sync = 0; char *filename; shared int *buf; shared char *gfilename; shared int *len; ntimes=1; /* process 0 takes the file name as a command-line argument and broadcasts it to other processes */ len = (shared int *) upc_all_alloc(1, sizeof(int)); upc_barrier; if (!MYTHREAD) { i = 1; while ((i < argc) && strcmp("-fname", *argv)) { i++; argv++; } if (i >= argc) { fprintf(stderr, "\n*# Usage: perf -fname filename\n\n"); upc_global_exit(-1); } argv++; strl = strlen(*argv); upc_memput(len, &strl, sizeof(int)); } upc_barrier; upc_memget(&strl, len, sizeof(int)); upc_barrier; gfilename = (shared char *) upc_all_alloc(1,sizeof(char)*(strl)); if (!MYTHREAD) { upc_memput(gfilename, *argv, strl); fprintf(stderr, "Access size per process = %d bytes, ntimes = %d\n", SIZE, ntimes); } upc_barrier; filename = (char *) malloc(sizeof(char)*(strl+1)); upc_memget(filename, gfilename, strl); filename[strl] = '\0'; /* allocate the shared buf on each thread this is for shared w/r with INDIVIDUAL FP */ buf = (shared int *) upc_global_alloc(1,SIZE); upc_barrier; min_read_tim=0.0; min_write_tim=0.0; upc_barrier; fh = uopen( filename, 0); for (j=0; j<ntimes; j++) { upc_barrier; stim = UPC_Wtime(); upc_all_fseek(fh, MYTHREAD*SIZE + SIZE*THREADS*j, UPC_SEEK_SET); err = upc_all_fwrite_shared(fh, buf, BLOCK, SIZE, sizeof(unsigned char), sync); if( err == -1 ) { fprintf(stderr, "TH%2d: Error in write\n", MYTHREAD); break; } write_tim = UPC_Wtime() - stim; min_write_tim += write_tim; } upc_all_fclose(fh); upc_all_fsync(fh); min_write_tim /= ntimes; upc_barrier; fh = uopen( filename, 1); for (j=0; j<ntimes; j++) { upc_barrier; stim = UPC_Wtime(); upc_all_fseek(fh, MYTHREAD*SIZE + SIZE*THREADS*j, UPC_SEEK_SET); err = upc_all_fread_shared(fh, buf, BLOCK, SIZE, sizeof(unsigned char), sync); if( err == -1 ) { fprintf(stderr, "TH%2d: Error in read\n", MYTHREAD); break; } read_tim = UPC_Wtime() - stim; min_read_tim += read_tim; } upc_all_fclose(fh); min_read_tim /= ntimes; upc_barrier; if (!MYTHREAD) { read_bw = (SIZE*THREADS*ntimes)/(min_read_tim*1024.0*1024.0); write_bw = (SIZE*THREADS*ntimes)/(min_write_tim*1024.0*1024.0); printf("TH: %d - Write bandwidth with a prior file sync = %f Mbytes/sec\n", MYTHREAD, write_bw); printf("TH: %d - Read bandwidth with a prior file sync = %f Mbytes/sec\n", MYTHREAD, read_bw); } upc_barrier; /* only thread 0 clean up the single shared buf */ if(!MYTHREAD) { upc_free(buf); upc_free(gfilename); upc_free(len); } free(filename); return 0; }