int app_main(int argc, char **argv) { uint32_t bufsize = 1000; // Allocate target temp buffer. extern void *stencil_cp_alloc(size_t); uint8_t *unew = (uint8_t *)stencil_cp_alloc(bufsize * sizeof(uint8_t)); printf("unit count is %d\n", __htc_get_unit_count()); int i; int k; #pragma omp target #pragma omp teams distribute parallel for num_threads(17) firstprivate(k) for (i = 0; i < bufsize; i++) { k = (int)omp_get_team_num(); printf("team %d thread %d i is %d\n", k, (int)omp_get_thread_num(), i); unew[i] = (omp_get_team_num()+1) * omp_get_thread_num(); } int sum = 0; for (i = 0; i < bufsize; i++) { // printf("i = %d val = %d\n", i, unew[i]); sum += unew[i]; } printf("sum is %d %s\n", sum, (sum == 7976) ? "PASSED" : "FAILED"); return 0; }
void foo(uint8_t *a) { #pragma omp target teams num_teams(8) { a[omp_get_team_num()] = omp_get_team_num(); } printf("end of function foo\n"); }
int app_main(int argc, char **argv) { uint32_t bufsize = 1000; uint32_t bufsize2 = 1000; // Allocate target temp buffer. extern void *stencil_cp_alloc(size_t); uint8_t *unew = (uint8_t *)stencil_cp_alloc(bufsize * sizeof(uint8_t)); printf("unit count is %d\n", __htc_get_unit_count()); int i; int k; #pragma omp target #pragma omp teams distribute parallel for num_threads(17) firstprivate(k) schedule(static,33) num_teams(5) for (i = 0; i < bufsize; i++) { k = (int)omp_get_team_num() + bufsize - bufsize2; // printf("first target team %d thread %d i is %d\n", k, // (int)omp_get_thread_num(), i); unew[i] = (omp_get_team_num()+1) * omp_get_thread_num() + k - k; } #pragma omp target #pragma omp teams distribute parallel for num_threads(7) firstprivate(k) schedule(static,33) num_teams(8) for (i = 0; i < bufsize; i++) { k = (int)omp_get_team_num(); // printf("second target team %d thread %d i is %d\n", k, // (int)omp_get_thread_num(), i); unew[i] += (omp_get_team_num()+1) * omp_get_thread_num(); } int sum = 0; for (i = 0; i < bufsize; i++) { sum += unew[i]; } printf("sum is %d %s\n", sum, (sum == 13977) ? "PASSED" : "FAILED"); foo(unew); for (i = 0; i < 8; i++) { sum += unew[i]; } printf("sum is %d %s\n", sum, (sum == (13977+28)) ? "PASSED" : "FAILED"); return 0; }
KOKKOS_INLINE_FUNCTION ValueType team_reduce( const ValueType & value , const JoinOp & op_in ) const { #pragma omp barrier typedef ValueType value_type; const JoinLambdaAdapter<value_type,JoinOp> op(op_in); // Make sure there is enough scratch space: typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE , value_type , void >::type type ; const int n_values = TEAM_REDUCE_SIZE/sizeof(value_type); type * team_scratch = (type*) ((char*)m_glb_scratch + TEAM_REDUCE_SIZE*omp_get_team_num()); for(int i = m_team_rank; i < n_values; i+= m_team_size) { team_scratch[i] = value_type(); } #pragma omp barrier for(int k=0; k<m_team_size; k+=n_values) { if((k <= m_team_rank) && (k+n_values > m_team_rank)) team_scratch[m_team_rank%n_values]+=value; #pragma omp barrier } for(int d = 1; d<n_values;d*=2) { if((m_team_rank+d<n_values) && (m_team_rank%(2*d)==0)) { team_scratch[m_team_rank] += team_scratch[m_team_rank+d]; } #pragma omp barrier } return team_scratch[0]; }
int bar (int x, int y, int z) { int a, b[64], i; a = 8; for (i = 0; i < 64; i++) b[i] = i; foo (x, y, z, &a, b); if (x == 0) { if (a != 8 + 64 * 32) return 1; for (i = 0; i < 64; i++) if (b[i] != i + 31 * 32 / 2) return 1; } else if (x == 1) { int c = omp_get_num_teams (); int d = omp_get_team_num (); int e = d; int f = 0; for (i = 0; i < 64; i++) if (i == e) { if (b[i] != i + 31 * 32 / 2) return 1; f++; e = e + c; } else if (b[i] != i) return 1; if (a < 8 || a > 8 + f * 32) return 1; } else if (x == 2) { if (a != 8 + 32) return 1; for (i = 0; i < 64; i++) if (b[i] != i + (i == y ? 31 * 32 / 2 : 0)) return 1; } else if (x == 3) { if (a != 8 + 1) return 1; for (i = 0; i < 64; i++) if (b[i] != i + (i == y ? z : 0)) return 1; } return 0; }
void mergesortstat(I b, I e, I bb) { if(!mergesortstat_debug(-1)) return; char buf[128]; sprintf(buf,"Start-End %08d-%08d Team %02d Thread/Max %02d/%02d Level %02d Parent %02d\n", (int)(b-bb),(int)(e-bb), omp_get_team_num(), omp_get_thread_num(), omp_get_num_threads(), omp_get_level(), omp_get_ancestor_thread_num (omp_get_level()) ); #pragma omp critical (io) { std::cout << buf << std::flush; } }
int32_t omp_get_team_num_ (void) { return omp_get_team_num (); }
int main(void) { check_offloading(); double A[N], B[N], C[N], D[N], E[N]; int fail = 0; int expected = 1; int success = 0; int chunkSize; double p = 2.0, q = 4.0; int nte, tl, blockSize; INIT(); // ************************** // Series 1: no dist_schedule // ************************** // // Test: #iterations == #teams // printf("iterations = teams\n"); #define CLAUSES num_teams(992) CODE() #undef CLAUSES printf("iterations > teams\n"); #define CLAUSES num_teams(256) CODE() #undef CLAUSES printf("iterations < teams\n"); #define CLAUSES num_teams(1024) CODE() #undef CLAUSES printf("num_teams(512) dist_schedule(static,1)\n"); #define CLAUSES num_teams(512) dist_schedule(static, 1) CODE() #undef CLAUSES printf("num_teams(512) dist_schedule(static,512)\n"); #define CLAUSES num_teams(512) dist_schedule(static, 512) CODE() #undef CLAUSES printf("num_teams(512) dist_schedule(static, chunkSize)\n"); chunkSize = N / 10; #define CLAUSES num_teams(512) dist_schedule(static, chunkSize) CODE() #undef CLAUSES printf("num_teams(1024) dist_schedule(static, chunkSize)\n"); chunkSize = N / 10; #define CLAUSES num_teams(1024) dist_schedule(static, chunkSize) CODE() #undef CLAUSES printf("num_teams(1024) dist_schedule(static, 1)\n"); #define CLAUSES num_teams(1024) dist_schedule(static, 1) CODE() #undef CLAUSES printf("num_teams(3) dist_schedule(static, 1)\n"); #define CLAUSES num_teams(3) dist_schedule(static, 1) CODE() #undef CLAUSES printf("num_teams(3) dist_schedule(static, 3)\n"); #define CLAUSES num_teams(3) dist_schedule(static, 3) CODE() #undef CLAUSES printf("num_teams(10) dist_schedule(static, 99)\n"); #define CLAUSES num_teams(10) dist_schedule(static, 99) CODE() #undef CLAUSES printf("num_teams(256) dist_schedule(static, 992)\n"); #define CLAUSES num_teams(256) dist_schedule(static, 992) CODE() #undef CLAUSES #if 0 printf("num_teams(256) private(p,q)\n"); #define CLAUSES num_teams(256) private(p,q) CODE_PRIV() #undef CLAUSES #endif // // Test: firstprivate // #if 0 printf("num_teams(64) firstprivate(p, q)\n"); ZERO(A); ZERO(B); p = 2.0, q = 4.0; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target // implicit firstprivate for p and q, their initial values being 2 and 4 for each target invocation #pragma omp teams distribute num_teams(64) firstprivate(p, q) for(int i = 0 ; i < 128 ; i++) { // 2 iterations for each team p += 3.0; // p and q are firstprivate to the team, and as such incremented twice (2 iterations per team) q += 7.0; A[i] += p; B[i] += q; } } for(int i = 0 ; i < 128 ; i++) { if (i % 2 == 0) { if (A[i] != (2.0+3.0)*TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]); fail = 1; } if (B[i] != (4.0+7.0)*TRIALS) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0)*TRIALS, B[i]); fail = 1; } } else { if (A[i] != (2.0+3.0*2)*TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0*2)*TRIALS, A[i]); fail = 1; } if (B[i] != (4.0+7.0*2)*TRIALS) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0*2)*TRIALS, B[i]); fail = 1; } } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); #endif // // Test: lastprivate // printf("num_teams(10) lastprivate(lastpriv)\n"); success = 0; int lastpriv = -1; #pragma omp target map(tofrom:lastpriv) #pragma omp teams distribute num_teams(10) lastprivate(lastpriv) for(int i = 0 ; i < omp_get_num_teams() ; i++) lastpriv = omp_get_team_num(); if(lastpriv != 9) { printf("lastpriv value is %d and should have been %d\n", lastpriv, 9); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // *************************** // // Series 4: with parallel for // // *************************** // // Test: simple blocking loop // printf("num_teams(nte) thread_limit(tl) with parallel for innermost\n"); success = 0; ZERO(A); ZERO(B); nte = 32; tl = 64; blockSize = tl; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams distribute num_teams(nte) thread_limit(tl) for(int j = 0 ; j < 256 ; j += blockSize) { #pragma omp parallel for for(int i = j ; i < j+blockSize; i++) { A[i] += B[i] + C[i]; } } } for(int i = 0 ; i < 256 ; i++) { if (A[i] != TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]); fail = 1; } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: blocking loop where upper bound is not a multiple of tl*nte // printf("num_teams(nte) thread_limit(tl) with parallel for innermost\n"); success = 0; ZERO(A); ZERO(B); nte = 32; tl = 64; blockSize = tl; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams distribute num_teams(nte) thread_limit(tl) for(int j = 0 ; j < 510 ; j += blockSize) { int ub = (j+blockSize < 510) ? (j+blockSize) : 512; #pragma omp parallel for for(int i = j ; i < ub; i++) { A[i] += B[i] + C[i]; } } } for(int i = 0 ; i < 256 ; i++) { if (A[i] != TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]); fail = 1; } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // ************************** // Series 5: collapse // ************************** // // Test: 2 loops // printf("num_teams(512) collapse(2)\n"); success = 0; double * S = malloc(N*N*sizeof(double)); double * T = malloc(N*N*sizeof(double)); double * U = malloc(N*N*sizeof(double)); for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) { S[i*N+j] = 0.0; T[i*N+j] = 1.0; U[i*N+j] = 2.0; } for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target map(tofrom:S[:N*N]), map(to:T[:N*N],U[:N*N]) #pragma omp teams distribute num_teams(512) collapse(2) for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) S[i*N+j] += T[i*N+j] + U[i*N+j]; // += 3 at each t } for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) if (S[i*N+j] != TRIALS*3.0) { printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, S[i*N+j]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: 3 loops // printf("num_teams(512) collapse(3)\n"); success = 0; int M = N/8; double * V = malloc(M*M*M*sizeof(double)); double * Z = malloc(M*M*M*sizeof(double)); for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) { V[i*M*M+j*M+k] = 2.0; Z[i*M*M+j*M+k] = 3.0; } for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target map(tofrom:V[:M*M*M]), map(to:Z[:M*M*M]) #pragma omp teams distribute num_teams(512) collapse(3) for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) V[i*M*M+j*M+k] += Z[i*M*M+j*M+k]; // += 3 at each t } for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) if (V[i*M*M+j*M+k] != 2.0+TRIALS*3.0) { printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, V[i*M*M+j*M+k]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); return 0; }
int main(void) { check_offloading(); double A[N], B[N], C[N], D[N], E[N]; int fail = 0; INIT(); // ************************** // Series 1: no dist_schedule // ************************** // // Test: #iterations == #teams // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(512) #pragma omp distribute simd for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // **************************** // Series 2: with dist_schedule // **************************** // // Test: #iterations == #teams, dist_schedule(1) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(512) #pragma omp distribute simd dist_schedule(static,1) for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations == #teams, dist_schedule(#iterations) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(512) #pragma omp distribute simd dist_schedule(static,512) for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations == #teams, dist_schedule(#iterations/10), variable chunk size // ZERO(A); int ten = 10; int chunkSize = 512/ten; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(512) #pragma omp distribute simd dist_schedule(static,chunkSize) for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams, dist_schedule(1) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd dist_schedule(static,1) for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams, dist_schedule(#iterations) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd dist_schedule(static,500) for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams, dist_schedule(#iterations/10), variable chunk size // ZERO(A); ten = 10; chunkSize = 500/ten; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd dist_schedule(static,chunkSize) for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams, dist_schedule(1) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd dist_schedule(static,1) for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams, dist_schedule(#iterations) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd dist_schedule(static,123) for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams, dist_schedule(#iterations) // ZERO(A); ten = 10; chunkSize = 123/ten; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) #pragma omp distribute simd dist_schedule(static,chunkSize) for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // **************************** // Series 3: with ds attributes // **************************** // // Test: private // ZERO(A); ZERO(B); double p = 2.0, q = 4.0; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(256) { #pragma omp distribute simd private(p,q) for(int i = 0 ; i < N ; i++) { p = 2; q = 3; A[i] += p; B[i] += q; } } } for(int i = 0 ; i < N ; i++) { if (A[i] != TRIALS*2) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) TRIALS*2, A[i]); fail = 1; } if (B[i] != TRIALS*3) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) TRIALS*3, B[i]); fail = 1; } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: firstprivate // ZERO(A); ZERO(B); p = 2.0, q = 4.0; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target // implicit firstprivate for p and q, their initial values being 2 and 4 for each target invocation #pragma omp teams num_teams(64) { #pragma omp distribute simd firstprivate(p,q) for(int i = 0 ; i < 128 ; i++) { // 2 iterations for each team p += 3.0; // p and q are firstprivate to the team, and as such incremented twice (2 iterations per team) q += 7.0; A[i] += p; B[i] += q; } } } for(int i = 0 ; i < 128 ; i++) { if (i % 2 == 0) { if (A[i] != (2.0+3.0)*TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]); fail = 1; } if (B[i] != (4.0+7.0)*TRIALS) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0)*TRIALS, B[i]); fail = 1; } } else { if (A[i] != (2.0+3.0*2)*TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0*2)*TRIALS, A[i]); fail = 1; } if (B[i] != (4.0+7.0*2)*TRIALS) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0*2)*TRIALS, B[i]); fail = 1; } } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: lastprivate // int lastpriv = -1; #pragma omp target map(tofrom:lastpriv) #pragma omp teams num_teams(10) #pragma omp distribute simd lastprivate(lastpriv) for(int i = 0 ; i < omp_get_num_teams() ; i++) lastpriv = omp_get_team_num(); if(lastpriv != 9) { printf("lastpriv value is %d and should have been %d\n", lastpriv, 9); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // ************************** // Series 4: collapse // ************************** // // Test: 2 loops // double * S = malloc(N*N*sizeof(double)); double * T = malloc(N*N*sizeof(double)); double * U = malloc(N*N*sizeof(double)); for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) { S[i*N+j] = 0.0; T[i*N+j] = 1.0; U[i*N+j] = 2.0; } for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target map(tofrom:S[:N*N]), map(to:T[:N*N],U[:N*N]) #pragma omp teams num_teams(512) #pragma omp distribute simd collapse(2) for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) S[i*N+j] += T[i*N+j] + U[i*N+j]; // += 3 at each t } for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) if (S[i*N+j] != TRIALS*3.0) { printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, S[i*N+j]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: 3 loops // int M = N/8; double * V = malloc(M*M*M*sizeof(double)); double * Z = malloc(M*M*M*sizeof(double)); for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) { V[i*M*M+j*M+k] = 2.0; Z[i*M*M+j*M+k] = 3.0; } for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target map(tofrom:V[:M*M*M]), map(to:Z[:M*M*M]) #pragma omp teams num_teams(512) #pragma omp distribute simd collapse(3) for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) V[i*M*M+j*M+k] += Z[i*M*M+j*M+k]; // += 3 at each t } for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) if (V[i*M*M+j*M+k] != 2.0+TRIALS*3.0) { printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, V[i*M*M+j*M+k]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); return 0; }
int main(void) { check_offloading(); double A[N], B[N], C[N], D[N], E[N]; int fail = 0; INIT(); // ************************** // Series 1: no dist_schedule // ************************** // // Test: #iterations == #teams // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute num_teams(512) for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute num_teams(256) for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute num_teams(256) for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // **************************** // Series 2: with dist_schedule // **************************** // // Test: #iterations == #teams, dist_schedule(1) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,1) num_teams(512) for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations == #teams, dist_schedule(#iterations) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,512) num_teams(512) for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations == #teams, dist_schedule(#iterations/10), variable chunk size // ZERO(A); int ten = 10; int chunkSize = 512/ten; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,chunkSize) num_teams(512) for (int i = 0 ; i < 512 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 512 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams, dist_schedule(1) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,1) num_teams(256) for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams, dist_schedule(#iterations) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,500) num_teams(256) for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations > #teams, dist_schedule(#iterations/10), variable chunk size // ZERO(A); ten = 10; chunkSize = 500/ten; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,chunkSize) num_teams(256) for (int i = 0 ; i < 500 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 500 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams, dist_schedule(1) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,1) num_teams(256) for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams, dist_schedule(#iterations) // ZERO(A); for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,123) num_teams(256) for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: #iterations < #teams, dist_schedule(#iterations) // ZERO(A); ten = 10; chunkSize = 123/ten; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute dist_schedule(static,chunkSize) num_teams(256) for (int i = 0 ; i < 123 ; i++) { A[i] += C[i]; // += 1 per position } } for (int i = 0 ; i < 123 ; i++) if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // **************************** // Series 3: with ds attributes // **************************** // DS currently failing in the compiler with asserts (bug #T158) #if 0 // // Test: private // ZERO(A); ZERO(B); double p = 2.0, q = 4.0; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute private(p,q) num_teams(256) for(int i = 0 ; i < N ; i++) { p = 2; q = 3; A[i] += p; B[i] += q; } } for(int i = 0 ; i < N ; i++) { if (A[i] != TRIALS*2) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) TRIALS*2, A[i]); fail = 1; } if (B[i] != TRIALS*3) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) TRIALS*3, B[i]); fail = 1; } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: firstprivate // ZERO(A); ZERO(B); p = 2.0, q = 4.0; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute firstprivate(p,q) num_teams(64) for(int i = 0 ; i < 128 ; i++) { // 2 iterations for each team p += 3.0; // p and q are firstprivate to the team, and as such incremented twice (2 iterations per team) q += 7.0; A[i] += p; B[i] += q; } } for(int i = 0 ; i < 128 ; i++) { if (i % 2 == 0) { if (A[i] != (2.0+3.0)*TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]); fail = 1; } if (B[i] != (4.0+7.0)*TRIALS) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0)*TRIALS, B[i]); fail = 1; } } else { if (A[i] != (2.0+3.0*2)*TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0*2)*TRIALS, A[i]); fail = 1; } if (B[i] != (4.0+7.0*2)*TRIALS) { printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0*2)*TRIALS, B[i]); fail = 1; } } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); //#endif // // Test: lastprivate // int lastpriv = -1; // map(tofrom:lastpriv) #pragma omp target teams distribute lastprivate(lastpriv) num_teams(10) for(int i = 0 ; i < omp_get_num_teams() ; i++) lastpriv = omp_get_team_num(); if(lastpriv != 9) { printf("lastpriv value is %d and should have been %d\n", lastpriv, 9); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // *************************** // Series 4: with parallel for // *************************** // // Test: simple blocking loop // ZERO(A); ZERO(B); int nte = 32; int tl = 64; int blockSize = tl; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute num_teams(nte) thread_limit(tl) for(int j = 0 ; j < 256 ; j += blockSize) { #pragma omp parallel for for(int i = j ; i < j+blockSize; i++) { A[i] += B[i] + C[i]; } } } for(int i = 0 ; i < 256 ; i++) { if (A[i] != TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]); fail = 1; } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); #endif // // Test: blocking loop where upper bound is not a multiple of tl*nte // ZERO(A); ZERO(B); int nte = 32; int tl = 64; int blockSize = tl; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute num_teams(nte) thread_limit(tl) for(int j = 0 ; j < 510 ; j += blockSize) { int ub = (j+blockSize < 510) ? (j+blockSize) : 512; #pragma omp parallel for for(int i = j ; i < ub; i++) { A[i] += B[i] + C[i]; } } } for(int i = 0 ; i < 256 ; i++) { if (A[i] != TRIALS) { printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]); fail = 1; } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // ************************** // Series 5: collapse // ************************** // // Test: 2 loops // double * S = malloc(N*N*sizeof(double)); double * T = malloc(N*N*sizeof(double)); double * U = malloc(N*N*sizeof(double)); for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) { S[i*N+j] = 0.0; T[i*N+j] = 1.0; U[i*N+j] = 2.0; } for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute collapse(2) map(tofrom:S[:N*N]), map(to:T[:N*N],U[:N*N]) num_teams(512) for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) S[i*N+j] += T[i*N+j] + U[i*N+j]; // += 3 at each t } for (int i = 0 ; i < N ; i++) for (int j = 0 ; j < N ; j++) if (S[i*N+j] != TRIALS*3.0) { printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, S[i*N+j]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: 3 loops // int M = N/8; double * V = malloc(M*M*M*sizeof(double)); double * Z = malloc(M*M*M*sizeof(double)); for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) { V[i*M*M+j*M+k] = 2.0; Z[i*M*M+j*M+k] = 3.0; } for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target teams distribute collapse(3) map(tofrom:V[:M*M*M]), map(to:Z[:M*M*M]) num_teams(512) for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) V[i*M*M+j*M+k] += Z[i*M*M+j*M+k]; // += 3 at each t } for (int i = 0 ; i < M ; i++) for (int j = 0 ; j < M ; j++) for (int k = 0 ; k < M ; k++) if (V[i*M*M+j*M+k] != 2.0+TRIALS*3.0) { printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, V[i*M*M+j*M+k]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); return 0; }
int main(void) { check_offloading(); double A[N], B[N], C[N], D[N], E[N]; int fail = 0; INIT(); // // Test: num_teams and omp_get_team_num() // ZERO(A); int num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 512; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(num_teams) { A[omp_get_team_num()] += omp_get_team_num(); } } for (int i = 0 ; i < num_teams ; i++) if (A[i] != i*TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: thread_limit and omp_get_thread_num() // ZERO(A); fail = 0; int num_threads = omp_is_initial_device() ? HOST_MAX_TEAMS : 256; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(1) thread_limit(num_threads) #pragma omp parallel { int tid = omp_get_thread_num(); A[tid] += (double) tid; } } for (int i = 0 ; i < num_threads ; i++) if (A[i] != i*TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); // // Test: if statement in teams region // ZERO(A); fail = 0; num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 512; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(num_teams) { if (omp_get_team_num() % 2 == 0) { int teid = omp_get_team_num(); A[teid] += (double) 1; } else { int teid = omp_get_team_num(); A[teid] += (double) 2; } } } for (int i = 0 ; i < num_teams ; i++) { if (i % 2 == 0) { if (A[i] != TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]); fail = 1; } } else if (A[i] != 2*TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) 2*TRIALS, A[i]); fail = 1; } } if(fail) printf("Failed\n"); else printf("Succeeded\n"); /* // */ /* // Test: num_teams and thread_limit by simulating a distribute pragma */ /* // */ /* ZERO(A); */ /* fail = 0; */ /* for (int t = 0 ; t < TRIALS ; t++) { */ /* #pragma omp target */ /* #pragma omp teams num_teams(2) thread_limit(496) */ /* { */ /* if (omp_get_team_num() == 0) { */ /* #pragma omp parallel */ /* { */ /* A[omp_get_team_num()*496+omp_get_thread_num()] += omp_get_thread_num(); */ /* if(omp_get_thread_num() == 498) printf("teid = %d, tid = %d, accessing %d\n", omp_get_team_num(), omp_get_thread_num(), omp_get_team_num()*496+omp_get_thread_num()); */ /* } */ /* } else { */ /* #pragma omp parallel */ /* { */ /* if(omp_get_thread_num() == 0) */ /* printf("teid = %d, tid = %d: A= %lf\n", omp_get_team_num(), omp_get_thread_num(), A[omp_get_team_num()*496+omp_get_thread_num()]); */ /* A[omp_get_team_num()*496+omp_get_thread_num()] -= omp_get_thread_num(); */ /* if(omp_get_thread_num() == 0) */ /* printf("teid = %d, tid = %d: A= %lf\n", omp_get_team_num(), omp_get_thread_num(), A[omp_get_team_num()*496+omp_get_thread_num()]); */ /* } */ /* } */ /* } */ /* } */ /* for (int i = 0 ; i < 992 ; i++) { */ /* if (i < 496) { */ /* if (A[i] != i*TRIALS) { */ /* printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]); */ /* fail = 1; */ /* } */ /* } else if(i >= 496) */ /* if (A[i] != -((i-496)*TRIALS)) { */ /* printf("Error at %d, h = %lf, d = %lf\n", i, (double) -((i-496)*TRIALS), A[i]); */ /* fail = 1; */ /* } */ /* } */ /* if(fail) printf("Failed\n"); */ /* else printf("Succeeded\n"); */ // // Test: private // ZERO(A); fail = 0; int a = 10; num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target #pragma omp teams num_teams(num_teams) private(a) { a = omp_get_team_num(); A[omp_get_team_num()] += a; } } for (int i = 0 ; i < num_teams ; i++) if (A[i] != i*TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); ZERO(A); fail = 0; a = 10; num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target firstprivate(a) #pragma omp teams num_teams(num_teams) firstprivate(a) { a += omp_get_team_num(); A[omp_get_team_num()] += a; } } for (int i = 0 ; i < num_teams ; i++) if (A[i] != 10+i*TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) (10+i*TRIALS), A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); ZERO(A); fail = 0; a = 10; num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target // a is implicitly captured as a firsptivate #pragma omp teams num_teams(num_teams) firstprivate(a) { a += omp_get_team_num(); A[omp_get_team_num()] += a; } } for (int i = 0 ; i < num_teams ; i++) if (A[i] != 10+i*TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) (10+i*TRIALS), A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); ZERO(A); fail = 0; a = 10; num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256; for (int t = 0 ; t < TRIALS ; t++) { #pragma omp target firstprivate(a) #pragma omp teams num_teams(num_teams) private(a) { a = omp_get_team_num(); A[omp_get_team_num()] += a; } } for (int i = 0 ; i < num_teams ; i++) if (A[i] != i*TRIALS) { printf("Error at %d, h = %lf, d = %lf\n", i, (double) (i*TRIALS), A[i]); fail = 1; } if(fail) printf("Failed\n"); else printf("Succeeded\n"); return 0; }