/* 32-1. */ void xchange_halffield32() { #ifdef _KOJAK_INST #pragma pomp inst begin(xchangehalf32) #endif # ifdef MPI shmem_barrier_all(); shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2 + LX*LY*LZ/2), (float*)(HalfSpinor32 + 4*VOLUME), (LX*LY*LZ*6), g_nb_t_up); shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2), (float*)(HalfSpinor32 + 4*VOLUME + LX*LY*LZ/2), (LX*LY*LZ*6), g_nb_t_dn); # if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2 + LX*LY*LZ + T*LY*LZ/2), (float*)(HalfSpinor32 + 4*VOLUME + LX*LY*LZ), (T*LY*LZ*6), g_nb_x_up); shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2 + LX*LY*LZ), (float*)(HalfSpinor32 + 4*VOLUME + LX*LY*LZ + T*LY*LZ/2), (T*LY*LZ*6), g_nb_x_dn); # endif # if (defined PARALLELXYT || defined PARALLELXYZT) shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2 + LX*LY*LZ + T*LY*LZ + T*LX*LZ/2), (float*)(HalfSpinor32 + 4*VOLUME + LX*LY*LZ + T*LY*LZ), (T*LX*LZ*6), g_nb_y_up); shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2 + LX*LY*LZ + T*LY*LZ), (float*)(HalfSpinor32 + 4*VOLUME + LX*LY*LZ + T*LY*LZ + T*LX*LZ/2), (T*LX*LZ*6), g_nb_y_dn); # endif # if (defined PARALLELXYZT) shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2 + LX*LY*LZ + T*LY*LZ + T*LX*LZ + T*LX*LY/2), (float*)(HalfSpinor32 + 4*VOLUME + LX*LY*LZ + T*LY*LZ + T*LX*LZ), (T*LX*LY*6), g_nb_z_up); shmem_float_put((float*)(HalfSpinor32 + 4*VOLUME + RAND/2 + LX*LY*LZ + T*LY*LZ + T*LX*LZ), (float*)(HalfSpinor32 + 4*VOLUME + LX*LY*LZ + T*LY*LZ + T*LX*LZ + T*LX*LY/2), (T*LX*LY*6), g_nb_z_dn); # endif shmem_barrier_all(); # endif /* MPI */ return; #ifdef _KOJAK_INST #pragma pomp inst end(xchangehalf32) #endif }
int main(int argc, char *argv[]) { int n = 100; // int n1=101; start_pes(0); int nn = (n-1) / _num_pes(); int n_local0 = 1 + _my_pe() * nn; int n_local1 = 1 + (_my_pe()+1) * nn; // allocate only local part + ghost zone of the arrays x,y float *x, *y; x = (float*) malloc((n_local1 - n_local0 + 2)*sizeof(float)); y = (float*) malloc((n_local1 - n_local0 + 2)*sizeof(float)); // forgot shmalloc shmem_barrier_all(); //... // fill x, y // fill ghost zone if (_my_pe() > 0) shmem_float_get(x,y,n1,1); // extra code shmem_float_put(y,x, 1, _my_pe()-1); shmem_barrier_all(); // do computation float e = 0; int i; for (i=n_local0; i<n_local1; ++i) { x[i] += ( y[i+1] + y[i-1] )*.5; e += y[i] * y[i]; } static float work[_SHMEM_REDUCE_SYNC_SIZE]; static long sync[_SHMEM_REDUCE_SYNC_SIZE]; static float el, es; el = e; shmem_float_sum_to_all(&es, &el, 1, 0, 0, _num_pes(), work, sync); e = es; // ... // output x, e x += (n_local0 - 1); y += (n_local0 - 1); shfree(x); shfree(y); return 0; }
int main (int argc, char **argv) { int i; int nextpe; int me, npes; int success1, success2, success3, success4, success5, success6, success7, success8; short src1[N]; int src2[N]; long src3[N]; long double src4[N]; long long src5[N]; double src6[N]; float src7[N]; char *src8; short src9; int src10; long src11; double src12; float src13; short *dest1; int *dest2; long *dest3; long double *dest4; long long *dest5; double *dest6; float *dest7; char *dest8; short *dest9; int *dest10; long *dest11; double *dest12; float *dest13; shmem_init (); me = shmem_my_pe (); npes = shmem_n_pes (); if (npes > 1) { success1 = 0; success2 = 0; success3 = 0; success4 = 0; success5 = 0; success6 = 0; success7 = 0; success8 = 0; src8 = (char *) malloc (N * sizeof (char)); for (i = 0; i < N; i += 1) { src1[i] = (short) me; src2[i] = me; src3[i] = (long) me; src4[i] = (long double) me; src5[i] = (long long) me; src6[i] = (double) me; src7[i] = (float) me; src8[i] = (char) me; } src9 = (short) me; src10 = me; src11 = (long) me; src12 = (double) me; src13 = (float) me; dest1 = (short *) shmem_malloc (N * sizeof (*dest1)); dest2 = (int *) shmem_malloc (N * sizeof (*dest2)); dest3 = (long *) shmem_malloc (N * sizeof (*dest3)); dest4 = (long double *) shmem_malloc (N * sizeof (*dest4)); dest5 = (long long *) shmem_malloc (N * sizeof (*dest5)); dest6 = (double *) shmem_malloc (N * sizeof (*dest6)); dest7 = (float *) shmem_malloc (N * sizeof (*dest7)); dest8 = (char *) shmem_malloc (4 * sizeof (*dest8)); dest9 = (short *) shmem_malloc (sizeof (*dest9)); dest10 = (int *) shmem_malloc (sizeof (*dest10)); dest11 = (long *) shmem_malloc (sizeof (*dest11)); dest12 = (double *) shmem_malloc (sizeof (*dest12)); dest13 = (float *) shmem_malloc (sizeof (*dest13)); for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; dest5[i] = -9; dest6[i] = -9; dest7[i] = -9.0; dest8[i] = -9; } *dest9 = -9; *dest10 = -9; *dest11 = -9; *dest12 = -9; *dest13 = -9.0; nextpe = (me + 1) % npes; /* Testing shmem_short_put, shmem_int_put, shmem_long_put, shmem_longdouble_put, shmem_longlong_put, shmem_double_put, shmem_float_put, shmem_putmem */ shmem_barrier_all (); shmem_short_put (dest1, src1, N, nextpe); shmem_int_put (dest2, src2, N, nextpe); shmem_long_put (dest3, src3, N, nextpe); shmem_longdouble_put (dest4, src4, N, nextpe); shmem_longlong_put (dest5, src5, N, nextpe); shmem_double_put (dest6, src6, N, nextpe); shmem_float_put (dest7, src7, N, nextpe); shmem_putmem (dest8, src8, N * sizeof (char), nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } if (dest5[i] != (npes - 1)) { success5 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } if (dest8[i] != (npes - 1)) { success8 = 1; } } if (success1 == 0) printf ("Test shmem_short_put: Passed\n"); else printf ("Test shmem_short_put: Failed\n"); if (success2 == 0) printf ("Test shmem_int_put: Passed\n"); else printf ("Test shmem_int_put: Failed\n"); if (success3 == 0) printf ("Test shmem_long_put: Passed\n"); else printf ("Test shmem_long_put: Failed\n"); if (success4 == 0) printf ("Test shmem_longdouble_put: Passed\n"); else printf ("Test shmem_longdouble_put: Failed\n"); if (success5 == 0) printf ("Test shmem_longlong_put: Passed\n"); else printf ("Test shmem_longlong_put: Failed\n"); if (success6 == 0) printf ("Test shmem_double_put: Passed\n"); else printf ("Test shmem_double_put: Failed\n"); if (success7 == 0) printf ("Test shmem_float_put: Passed\n"); else printf ("Test shmem_float_put: Failed\n"); if (success8 == 0) printf ("Test shmem_putmem: Passed\n"); else printf ("Test shmem_putmem: Failed\n"); } shmem_barrier_all (); /* Testing shmem_put32, shmem_put64, shmem_put128 */ if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_put32 (dest2, src2, N, nextpe); shmem_put64 (dest3, src3, N, nextpe); shmem_put128 (dest4, src4, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success3 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success4 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_put32 (dest1, src1, N, nextpe); shmem_put64 (dest2, src2, N, nextpe); shmem_put128 (dest3, src3, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_put32: Passed\n"); else printf ("Test shmem_put32: Failed\n"); if (success2 == 0) printf ("Test shmem_put64: Passed\n"); else printf ("Test shmem_put64: Failed\n"); if (success3 == 0) printf ("Test shmem_put128: Passed\n"); else printf ("Test shmem_put128: Failed\n"); } } /* Testing shmem_iput32, shmem_iput64, shmem_iput128 */ shmem_barrier_all (); if (sizeof (int) == 4) { for (i = 0; i < N; i += 1) { dest2[i] = -9; dest3[i] = -9; dest4[i] = -9; } success2 = 0; success3 = 0; success4 = 0; shmem_barrier_all (); shmem_iput32 (dest2, src2, 1, 2, N, nextpe); shmem_iput64 (dest3, src3, 1, 2, N, nextpe); shmem_iput128 (dest4, src4, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest4[i] != (npes - 1)) { success4 = 1; } } if (success2 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success3 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success4 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } else if (sizeof (int) == 8) { for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; } success1 = 0; success2 = 0; success3 = 0; shmem_barrier_all (); shmem_iput32 (dest1, src1, 1, 2, N, nextpe); shmem_iput64 (dest2, src2, 1, 2, N, nextpe); shmem_iput128 (dest3, src3, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } } if (success1 == 0) printf ("Test shmem_iput32: Passed\n"); else printf ("Test shmem_iput32: Failed\n"); if (success2 == 0) printf ("Test shmem_iput64: Passed\n"); else printf ("Test shmem_iput64: Failed\n"); if (success3 == 0) printf ("Test shmem_iput128: Passed\n"); else printf ("Test shmem_iput128: Failed\n"); } } /* Testing shmem_short_iput, shmem_int_iput, shmem_long_iput, shmem_double_iput, shmem_float_iput */ for (i = 0; i < N; i += 1) { dest1[i] = -9; dest2[i] = -9; dest3[i] = -9; dest6[i] = -9; dest7[i] = -9; } success1 = 0; success2 = 0; success3 = 0; success6 = 0; success7 = 0; shmem_barrier_all (); shmem_short_iput (dest1, src1, 1, 2, N, nextpe); shmem_int_iput (dest2, src2, 1, 2, N, nextpe); shmem_long_iput (dest3, src3, 1, 2, N, nextpe); shmem_double_iput (dest6, src6, 1, 2, N, nextpe); shmem_float_iput (dest7, src7, 1, 2, N, nextpe); shmem_barrier_all (); if (me == 0) { for (i = 0; i < N / 2; i += 1) { if (dest1[i] != (npes - 1)) { success1 = 1; } if (dest2[i] != (npes - 1)) { success2 = 1; } if (dest3[i] != (npes - 1)) { success3 = 1; } if (dest6[i] != (npes - 1)) { success6 = 1; } if (dest7[i] != (npes - 1)) { success7 = 1; } } if (success1 == 0) printf ("Test shmem_short_iput: Passed\n"); else printf ("Test shmem_short_iput: Failed\n"); if (success2 == 0) printf ("Test shmem_int_iput: Passed\n"); else printf ("Test shmem_int_iput: Failed\n"); if (success3 == 0) printf ("Test shmem_long_iput: Passed\n"); else printf ("Test shmem_long_iput: Failed\n"); if (success6 == 0) printf ("Test shmem_double_iput: Passed\n"); else printf ("Test shmem_double_iput: Failed\n"); if (success7 == 0) printf ("Test shmem_float_iput: Passed\n"); else printf ("Test shmem_float_iput: Failed\n"); } /* Testing shmem_double_p, shmem_float_p, shmem_int_p, shmem_long_p, shmem_short_p */ shmem_barrier_all (); shmem_short_p (dest9, src9, nextpe); shmem_int_p (dest10, src10, nextpe); shmem_long_p (dest11, src11, nextpe); shmem_double_p (dest12, src12, nextpe); shmem_float_p (dest13, src13, nextpe); shmem_barrier_all (); if (me == 0) { if (*dest9 == (npes - 1)) printf ("Test shmem_short_p: Passed\n"); else printf ("Test shmem_short_p: Failed\n"); if (*dest10 == (npes - 1)) printf ("Test shmem_int_p: Passed\n"); else printf ("Test shmem_int_p: Failed\n"); if (*dest11 == (npes - 1)) printf ("Test shmem_long_p: Passed\n"); else printf ("Test shmem_long_p: Failed\n"); if (*dest12 == (npes - 1)) printf ("Test shmem_double_p: Passed\n"); else printf ("Test shmem_double_p: Failed\n"); if (*dest13 == (npes - 1)) printf ("Test shmem_float_p: Passed\n"); else printf ("Test shmem_float_p: Failed\n"); } shmem_barrier_all (); shmem_free (dest1); shmem_free (dest2); shmem_free (dest3); shmem_free (dest4); shmem_free (dest5); shmem_free (dest6); shmem_free (dest7); shmem_free (dest8); shmem_free (dest9); shmem_free (dest10); shmem_free (dest11); shmem_free (dest12); shmem_free (dest13); } else { printf ("Number of PEs must be > 1 to test shmem put, test skipped\n"); } shmem_finalize (); return 0; }
void sor (float **current_ptr, float **next_ptr) { int i, j, my_start, my_end, my_num_rows; float *U_Curr_Above = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H))); /* 1d array holding values from bottom row of PE above */ float *U_Curr_Below = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H))); /* 1d array holding values from top row of PE below */ float *U_Send_Buffer = (float *) shmalloc ((sizeof (float)) * ((int) floor (WIDTH / H))); /* 1d array holding values that are currently being sent */ //float U_Curr_Above[(int)floor(WIDTH/H)]; /* 1d array holding values from bottom row of PE above */ //float U_Curr_Below[(int)floor(WIDTH/H)]; /* 1d array holding values from top row of PE below */ //float U_Send_Buffer[(int)floor(WIDTH/H)]; /* 1d array holding values that are currently being sent */ float W = 1.5; //MPI_Request request; //MPI_Status status; //MPI_Comm_size(MPI_COMM_WORLD,&p); //MPI_Comm_rank(MPI_COMM_WORLD,&my_rank); my_start = get_start (my_rank); my_end = get_end (my_rank); my_num_rows = get_num_rows (my_rank); /* * Communicating ghost rows - only bother if p > 1 */ if (p > 1) { /* send/receive bottom rows */ if (my_rank < (p - 1)) { /* populate send buffer with bottow row */ for (i = 0; i < (int) floor (WIDTH / H); i++) { U_Send_Buffer[i] = current_ptr[my_num_rows - 1][i]; } /* non blocking send */ //MPI_Isend(U_Send_Buffer,(int)floor(WIDTH/H),MPI_FLOAT,my_rank+1,0,MPI_COMM_WORLD,&request); shmem_float_put (U_Curr_Above, U_Send_Buffer, (int) floor (WIDTH / H), my_rank + 1); } //if (my_rank > ROOT) { /* blocking receive */ //MPI_Recv(U_Curr_Above,(int)floor(WIDTH/H),MPI_FLOAT,my_rank-1,0,MPI_COMM_WORLD,&status); //} //MPI_Barrier(MPI_COMM_WORLD); shmem_barrier_all (); /* send/receive top rows */ if (my_rank > ROOT) { /* populate send buffer with top row */ for (i = 0; i < (int) floor (WIDTH / H); i++) { U_Send_Buffer[i] = current_ptr[0][i]; } /* non blocking send */ //MPI_Isend(U_Send_Buffer,(int)floor(WIDTH/H),MPI_FLOAT,my_rank-1,0,MPI_COMM_WORLD,&request); shmem_float_put (U_Curr_Below, U_Send_Buffer, (int) floor (WIDTH / H), my_rank - 1); } //if (my_rank < (p-1)) { /* blocking receive */ //MPI_Recv(U_Curr_Below,(int)floor(WIDTH/H),MPI_FLOAT,my_rank+1,0,MPI_COMM_WORLD,&status); //} //MPI_Barrier(MPI_COMM_WORLD); shmem_barrier_all (); } /* solve next reds (i+j odd) */ for (j = my_start; j <= my_end; j++) { for (i = 0; i < (int) floor (WIDTH / H); i++) { if ((i + j) % 2 != 0) { next_ptr[j - my_start][i] = get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank, i, j) + (W / 4) * (get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank, i - 1, j) + get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank, i + 1, j) + get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank, i, j - 1) + get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank, i, j + 1) - 4 * (get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank, i, j)) - (pow (H, 2) * f (i, j))); enforce_bc_par (next_ptr, my_rank, i, j); } } } /* solve next blacks (i+j) even .... using next reds */ for (j = my_start; j <= my_end; j++) { for (i = 0; i < (int) floor (WIDTH / H); i++) { if ((i + j) % 2 == 0) { next_ptr[j - my_start][i] = get_val_par (U_Curr_Above, current_ptr, U_Curr_Below, my_rank, i, j) + (W / 4) * (get_val_par (U_Curr_Above, next_ptr, U_Curr_Below, my_rank, i - 1, j) + get_val_par (U_Curr_Above, next_ptr, U_Curr_Below, my_rank, i + 1, j) + get_val_par (U_Curr_Above, next_ptr, U_Curr_Below, my_rank, i, j - 1) + get_val_par (U_Curr_Above, next_ptr, U_Curr_Below, my_rank, i, j + 1) - 4 * (get_val_par (U_Curr_Above, next_ptr, U_Curr_Below, my_rank, i, j)) - (pow (H, 2) * f (i, j))); enforce_bc_par (next_ptr, my_rank, i, j); } } } shfree(U_Send_Buffer); shfree(U_Curr_Below); shfree(U_Curr_Above); }