static inline void init_comp_semaphores() { // allocate completion semaphore comp[MYTHREAD] = bupc_sem_alloc(0); completion = malloc(THREADS * sizeof(bupc_sem_t*)); // wait for all threads to have allocated their semaphore upc_barrier; // copy all semaphore pointers to local memory int i; for (i = 0; i < THREADS; i++) { completion[i] = comp[i]; } }
int main(int argc, char ** argv) { long m, n; /* grid dimensions */ int i, j, iter; /* dummies */ int iterations; /* number of times to run the pipeline algorithm */ double pipeline_time, /* timing parameters */ avgtime, max_time; double epsilon = 1.e-8; /* error tolerance */ double corner_val; /* verification value at top right corner of grid */ double *vector;/* array holding grid values */ long total_length; /* total required length to store grid values */ /******************************************************************************* ** process and test input parameters ********************************************************************************/ if(MYTHREAD == THREADS-1){ printf("Parallel Research Kernels version %s\n", PRKVERSION); printf("UPC pipeline execution on 2D grid\n"); } if (argc != 4){ if(MYTHREAD == THREADS-1){ printf("Usage: %s <# iterations> <first array dimension> ", *argv); printf("<second array dimension>\n"); } upc_global_exit(EXIT_FAILURE); } iterations = atoi(*++argv); if (iterations < 1){ if(MYTHREAD == THREADS-1) printf("ERROR: iterations must be >= 1 : %d \n",iterations); upc_global_exit(EXIT_FAILURE); } m = atol(*++argv); n = atol(*++argv); if (m < 1 || n < 1){ if(MYTHREAD == THREADS-1) printf("ERROR: grid dimensions must be positive: %d, %d \n", m, n); upc_global_exit(EXIT_FAILURE); } if(MYTHREAD == THREADS-1){ printf("Number of threads = %d\n", THREADS); printf("Grid sizes = %ld, %ld\n", m, n); printf("Number of iterations = %d\n", iterations); #if USE_BUPC_EXT printf("Using Berkeley UPC extensions\n"); #endif } /********************************************************************* ** Allocate memory for input and output matrices *********************************************************************/ #if USE_BUPC_EXT bupc_sem_t *myflag = bupc_sem_alloc(BUPC_SEM_INTEGER | BUPC_SEM_MPRODUCER); upc_barrier; allflags[MYTHREAD] = myflag; upc_barrier; bupc_sem_t *mypeer = allflags[(MYTHREAD+1) % THREADS]; #endif long segment_size = m / THREADS; int leftover = m % THREADS; int myoffsetx, sizex; if(MYTHREAD < leftover){ myoffsetx = (segment_size + 1) * MYTHREAD; sizex = segment_size + 1; }else{ myoffsetx = (segment_size + 1) * leftover + segment_size * (MYTHREAD - leftover); sizex = segment_size; } #if USE_BUPC_EXT if(MYTHREAD != 0){ myoffsetx -= 1; sizex += 1; } #endif int sizey = n; int myoffsety = 0; upc_barrier; debug("Allocating arrays (%d, %d), offset (%d, %d)", sizex, sizey, myoffsetx, myoffsety); local_shared_block_ptrs in_array = shared_2d_array_alloc(sizex, sizey, myoffsetx, myoffsety); in_arrays[MYTHREAD] = in_array; double **in_array_private = shared_2d_array_to_private(in_array, sizex, sizey, myoffsetx, myoffsety); if(MYTHREAD == 0) current_max_line[MYTHREAD] = sizey; else current_max_line[MYTHREAD] = 0; upc_barrier; /********************************************************************* ** Initialize the matrices *********************************************************************/ /* clear the array */ for (j=0; j<n; j++) for (i=myoffsetx; i<myoffsetx + sizex; i++) ARRAY(i, j) = 0.0; /* set boundary values (bottom and left side of grid */ if(MYTHREAD == 0) for (j=0; j<n; j++) ARRAY(0, j) = (double) j; for (i=myoffsetx; i<myoffsetx + sizex; i++) ARRAY(i, 0) = (double) i; upc_barrier; for (iter = 0; iter<=iterations; iter++){ /* start timer after a warmup iteration */ if (iter == 1) pipeline_time = wtime(); if(MYTHREAD == 0) debug("start it %d, %f", iter, ARRAY(0, 0)); if(MYTHREAD != THREADS - 1) // Send the element in line 0 in_arrays[MYTHREAD + 1][0][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, 0); for (j=1; j<n; j++) { #if USE_BUPC_EXT if(MYTHREAD > 0){ bupc_sem_wait(myflag); } for (i=myoffsetx+1; i<myoffsetx + sizex; i++) ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1); if(MYTHREAD != THREADS - 1){ in_arrays[MYTHREAD + 1][j][myoffsetx + sizex -1] = ARRAY(myoffsetx + sizex - 1, j); bupc_sem_post(mypeer); } #else while(j > current_max_line[MYTHREAD]) // Normally not necessary: bupc_poll(); ; if(MYTHREAD > 0) ARRAY(myoffsetx, j) = in_arrays[MYTHREAD - 1][j][myoffsetx-1] + ARRAY(myoffsetx, j-1) - in_arrays[MYTHREAD-1][j-1][myoffsetx-1]; for (i=myoffsetx+1; i<myoffsetx + sizex; i++) ARRAY(i, j) = ARRAY(i-1, j) + ARRAY(i, j-1) - ARRAY(i-1, j-1); if(MYTHREAD < THREADS - 1) current_max_line[MYTHREAD+1] = j; #endif } /* copy top right corner value to bottom left corner to create dependency; we need a barrier to make sure the latest value is used. This also guarantees that the flags for the next iteration (if any) are not getting clobbered */ if(MYTHREAD == 0) current_max_line[MYTHREAD] = sizey; else current_max_line[MYTHREAD] = 0; if(MYTHREAD == THREADS - 1){ in_arrays[0][0][0] = -ARRAY(m-1, n-1); } upc_barrier; } pipeline_time = wtime() - pipeline_time; times[MYTHREAD] = pipeline_time; upc_barrier; // Compute max_time if(MYTHREAD == THREADS - 1){ max_time = times[MYTHREAD]; for(i=1; i<THREADS; i++){ if(max_time < times[i]) max_time = times[i]; } } /******************************************************************************* ** Analyze and output results. ********************************************************************************/ /* verify correctness, using top right value; */ if( MYTHREAD == THREADS - 1){ corner_val = (double)((iterations+1)*(n+m-2)); if (fabs(ARRAY(m-1,n-1)-corner_val)/corner_val > epsilon) { printf("ERROR: checksum %lf does not match verification value %lf\n", ARRAY(m-1, n-1), corner_val); exit(EXIT_FAILURE); } #if VERBOSE printf("checksum %lf verification value %lf\n", ARRAY(m-1, n-1), corner_val); printf("Solution validates; verification value = %lf\n", corner_val); #else printf("Solution validates\n"); #endif avgtime = max_time/iterations; printf("Rate (MFlops/s): %lf Avg time (s): %lf\n", 1.0E-06 * 2 * ((double)(m-1)*(double)(n-1))/avgtime, avgtime); exit(EXIT_SUCCESS); } }