示例#1
0
int app_main(int argc, char **argv) {

    uint32_t bufsize = 1000;


    // Allocate target temp buffer.
    extern void *stencil_cp_alloc(size_t);
    uint8_t *unew = (uint8_t *)stencil_cp_alloc(bufsize * sizeof(uint8_t));

    printf("unit count is %d\n",  __htc_get_unit_count());

    int i;
    int k;

#pragma omp target
#pragma omp teams distribute parallel for num_threads(17) firstprivate(k)
    for (i = 0; i < bufsize; i++) {
        k = (int)omp_get_team_num();
        printf("team %d thread %d i is %d\n", k,
                       (int)omp_get_thread_num(), i);
        unew[i] = (omp_get_team_num()+1) * omp_get_thread_num();
    }

    int sum = 0;

    for (i = 0; i < bufsize; i++) {
        //  printf("i = %d  val = %d\n", i, unew[i]);
        sum += unew[i];
    } 

    printf("sum is %d  %s\n", sum, (sum == 7976) ? "PASSED" : "FAILED");

    return 0;
}
示例#2
0
void foo(uint8_t *a) {
#pragma omp target teams num_teams(8)
    {
        a[omp_get_team_num()] = omp_get_team_num();
    }

    printf("end of function foo\n");
}
示例#3
0
int app_main(int argc, char **argv) {

    uint32_t bufsize = 1000;
    uint32_t bufsize2 = 1000;


    // Allocate target temp buffer.
    extern void *stencil_cp_alloc(size_t);
    uint8_t *unew = (uint8_t *)stencil_cp_alloc(bufsize * sizeof(uint8_t));

    printf("unit count is %d\n",  __htc_get_unit_count());

    int i;
    int k;

#pragma omp target
#pragma omp teams distribute parallel for num_threads(17) firstprivate(k) schedule(static,33) num_teams(5)
    for (i = 0; i < bufsize; i++) {
        k = (int)omp_get_team_num() + bufsize - bufsize2;
        //        printf("first target team %d thread %d i is %d\n", k,
        //                       (int)omp_get_thread_num(), i);
        unew[i] = (omp_get_team_num()+1) * omp_get_thread_num() + k - k;
    }

#pragma omp target
#pragma omp teams distribute parallel for num_threads(7) firstprivate(k) schedule(static,33) num_teams(8)
    for (i = 0; i < bufsize; i++) {
        k = (int)omp_get_team_num();
        //        printf("second target team %d thread %d i is %d\n", k,
        //                       (int)omp_get_thread_num(), i);
        unew[i] += (omp_get_team_num()+1) * omp_get_thread_num();
    }

    int sum = 0;

    for (i = 0; i < bufsize; i++) {
        sum += unew[i];
    } 

    printf("sum is %d  %s\n", sum, (sum == 13977) ? "PASSED" : "FAILED");

    foo(unew);

    for (i = 0; i < 8; i++) {
        sum += unew[i];
    } 

    printf("sum is %d  %s\n", sum, (sum == (13977+28)) ? "PASSED" : "FAILED");

    return 0;
}
  KOKKOS_INLINE_FUNCTION ValueType
    team_reduce( const ValueType & value
               , const JoinOp & op_in ) const {

      #pragma omp barrier

      typedef ValueType value_type;
      const JoinLambdaAdapter<value_type,JoinOp> op(op_in);

      // Make sure there is enough scratch space:
      typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE
                           , value_type , void >::type type ;

      const int n_values = TEAM_REDUCE_SIZE/sizeof(value_type);
      type * team_scratch = (type*) ((char*)m_glb_scratch + TEAM_REDUCE_SIZE*omp_get_team_num());
      for(int i = m_team_rank; i < n_values; i+= m_team_size) {
        team_scratch[i] = value_type();
      }

      #pragma omp barrier

      for(int k=0; k<m_team_size; k+=n_values) {
        if((k <= m_team_rank) && (k+n_values > m_team_rank))
          team_scratch[m_team_rank%n_values]+=value;
        #pragma omp barrier
      }

      for(int d = 1; d<n_values;d*=2) {
        if((m_team_rank+d<n_values) && (m_team_rank%(2*d)==0)) {
          team_scratch[m_team_rank] += team_scratch[m_team_rank+d];
        }
        #pragma omp barrier
      }
      return team_scratch[0];
    }
示例#5
0
文件: target-35.c 项目: 0day-ci/gcc
int
bar (int x, int y, int z)
{
  int a, b[64], i;
  a = 8;
  for (i = 0; i < 64; i++)
    b[i] = i;
  foo (x, y, z, &a, b);
  if (x == 0)
    {
      if (a != 8 + 64 * 32)
	return 1;
      for (i = 0; i < 64; i++)
	if (b[i] != i + 31 * 32 / 2)
	  return 1;
    }
  else if (x == 1)
    {
      int c = omp_get_num_teams ();
      int d = omp_get_team_num ();
      int e = d;
      int f = 0;
      for (i = 0; i < 64; i++)
	if (i == e)
	  {
	    if (b[i] != i + 31 * 32 / 2)
	      return 1;
	    f++;
	    e = e + c;
	  }
	else if (b[i] != i)
	  return 1;
      if (a < 8 || a > 8 + f * 32)
	return 1;
    }
  else if (x == 2)
    {
      if (a != 8 + 32)
	return 1;
      for (i = 0; i < 64; i++)
	if (b[i] != i + (i == y ? 31 * 32 / 2 : 0))
	  return 1;
    }
  else if (x == 3)
    {
      if (a != 8 + 1)
	return 1;
      for (i = 0; i < 64; i++)
	if (b[i] != i + (i == y ? z : 0))
	  return 1;
    }
  return 0;
}
示例#6
0
void mergesortstat(I b, I e, I bb)
{
	if(!mergesortstat_debug(-1))
		return;
	char buf[128];
	sprintf(buf,"Start-End %08d-%08d Team %02d Thread/Max %02d/%02d Level %02d Parent %02d\n",
		(int)(b-bb),(int)(e-bb),
		omp_get_team_num(),
		omp_get_thread_num(),
		omp_get_num_threads(),
		omp_get_level(),
		omp_get_ancestor_thread_num (omp_get_level())
		);
	#pragma omp critical (io)
	{
		std::cout << buf << std::flush;
	}
}
示例#7
0
int32_t
omp_get_team_num_ (void)
{
  return omp_get_team_num ();
}
示例#8
0
文件: test.c 项目: clang-ykt/openmp
int main(void) {
  check_offloading();

  double A[N], B[N], C[N], D[N], E[N];
  int fail = 0;
  int expected = 1;
  int success = 0;
  int chunkSize;
  double p = 2.0, q = 4.0;
  int nte, tl, blockSize;

  INIT();

  // **************************
  // Series 1: no dist_schedule
  // **************************

  //
  // Test: #iterations == #teams
  //
  printf("iterations = teams\n");
  #define CLAUSES num_teams(992)
  CODE()
  #undef CLAUSES

  printf("iterations > teams\n");
  #define CLAUSES num_teams(256)
  CODE()
  #undef CLAUSES

  printf("iterations < teams\n");
  #define CLAUSES num_teams(1024)
  CODE()
  #undef CLAUSES

  printf("num_teams(512) dist_schedule(static,1)\n");
  #define CLAUSES num_teams(512) dist_schedule(static, 1)
  CODE()
  #undef CLAUSES

  printf("num_teams(512) dist_schedule(static,512)\n");
  #define CLAUSES num_teams(512) dist_schedule(static, 512)
  CODE()
  #undef CLAUSES

  printf("num_teams(512) dist_schedule(static, chunkSize)\n");
  chunkSize = N / 10;
  #define CLAUSES num_teams(512) dist_schedule(static, chunkSize)
  CODE()
  #undef CLAUSES

  printf("num_teams(1024) dist_schedule(static, chunkSize)\n");
  chunkSize = N / 10;
  #define CLAUSES num_teams(1024) dist_schedule(static, chunkSize)
  CODE()
  #undef CLAUSES

  printf("num_teams(1024) dist_schedule(static, 1)\n");
  #define CLAUSES num_teams(1024) dist_schedule(static, 1)
  CODE()
  #undef CLAUSES

  printf("num_teams(3) dist_schedule(static, 1)\n");
  #define CLAUSES num_teams(3) dist_schedule(static, 1)
  CODE()
  #undef CLAUSES

  printf("num_teams(3) dist_schedule(static, 3)\n");
  #define CLAUSES num_teams(3) dist_schedule(static, 3)
  CODE()
  #undef CLAUSES

  printf("num_teams(10) dist_schedule(static, 99)\n");
  #define CLAUSES num_teams(10) dist_schedule(static, 99)
  CODE()
  #undef CLAUSES

  printf("num_teams(256) dist_schedule(static, 992)\n");
  #define CLAUSES num_teams(256) dist_schedule(static, 992)
  CODE()
  #undef CLAUSES

#if 0
  printf("num_teams(256) private(p,q)\n");
  #define CLAUSES num_teams(256) private(p,q)
  CODE_PRIV()
  #undef CLAUSES
#endif

  //
  // Test: firstprivate
  //

#if 0
  printf("num_teams(64) firstprivate(p, q)\n");
  ZERO(A); ZERO(B);
  p = 2.0, q = 4.0;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target // implicit firstprivate for p and q, their initial values being 2 and 4 for each target invocation
    #pragma omp teams distribute num_teams(64) firstprivate(p, q)
    for(int i = 0 ; i < 128 ; i++) { // 2 iterations for each team
      p += 3.0;  // p and q are firstprivate to the team, and as such incremented twice (2 iterations per team)
      q += 7.0;
      A[i] += p;
      B[i] += q;
    }
  }
  for(int i = 0 ; i < 128 ; i++) {
    if (i % 2 == 0) {
      if (A[i] != (2.0+3.0)*TRIALS) {
      	printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]);
      	fail = 1;
      }
      if (B[i] != (4.0+7.0)*TRIALS) {
      	printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0)*TRIALS, B[i]);
      	fail = 1;
      }
    } else {
      if (A[i] != (2.0+3.0*2)*TRIALS) {
      	printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0*2)*TRIALS, A[i]);
      	fail = 1;
      }
      if (B[i] != (4.0+7.0*2)*TRIALS) {
      	printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0*2)*TRIALS, B[i]);
      	fail = 1;
      }
    }
  }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");
#endif

  //
  // Test: lastprivate
  //

  printf("num_teams(10) lastprivate(lastpriv)\n");
  success = 0;
  int lastpriv = -1;
  #pragma omp target map(tofrom:lastpriv)
  #pragma omp teams distribute num_teams(10) lastprivate(lastpriv)
  for(int i = 0 ; i < omp_get_num_teams() ; i++)
    lastpriv = omp_get_team_num();

  if(lastpriv != 9) {
    printf("lastpriv value is %d and should have been %d\n", lastpriv, 9);
    fail = 1;
  }

  if(fail) printf("Failed\n");
  else printf("Succeeded\n");


  // // ***************************
  // // Series 4: with parallel for
  // // ***************************

  //
  // Test: simple blocking loop
  //
  printf("num_teams(nte) thread_limit(tl) with parallel for innermost\n");
  success = 0;
  ZERO(A); ZERO(B);
  nte = 32;
  tl = 64;
  blockSize = tl;

  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams distribute num_teams(nte) thread_limit(tl)
    for(int j = 0 ; j < 256 ; j += blockSize) {
      #pragma omp parallel for
      for(int i = j ; i < j+blockSize; i++) {
        A[i] += B[i] + C[i];
      }
    }
  }
  for(int i = 0 ; i < 256 ; i++) {
    if (A[i] != TRIALS) {
      printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]);
      fail = 1;
    }
  }

  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: blocking loop where upper bound is not a multiple of tl*nte
  //

  printf("num_teams(nte) thread_limit(tl) with parallel for innermost\n");
  success = 0;
  ZERO(A); ZERO(B);
  nte = 32;
  tl = 64;
  blockSize = tl;

  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams distribute num_teams(nte) thread_limit(tl)
    for(int j = 0 ; j < 510 ; j += blockSize) {
      int ub = (j+blockSize < 510) ? (j+blockSize) : 512;
      #pragma omp parallel for
      for(int i = j ; i < ub; i++) {
        A[i] += B[i] + C[i];
      }
    }
  }
  for(int i = 0 ; i < 256 ; i++) {
    if (A[i] != TRIALS) {
      printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]);
      fail = 1;
    }
  }

  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  // **************************
  // Series 5: collapse
  // **************************

  //
  // Test: 2 loops
  //

  printf("num_teams(512) collapse(2)\n");
  success = 0;
  double * S = malloc(N*N*sizeof(double));
  double * T = malloc(N*N*sizeof(double));
  double * U = malloc(N*N*sizeof(double));
  for (int i = 0 ; i < N ; i++)
    for (int j = 0 ; j < N ; j++)
    {
      S[i*N+j] = 0.0;
      T[i*N+j] = 1.0;
      U[i*N+j] = 2.0;
    }

  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target map(tofrom:S[:N*N]), map(to:T[:N*N],U[:N*N])
    #pragma omp teams distribute num_teams(512) collapse(2)
    for (int i = 0 ; i < N ; i++)
      for (int j = 0 ; j < N ; j++)
        S[i*N+j] += T[i*N+j] + U[i*N+j];  // += 3 at each t
  }
  for (int i = 0 ; i < N ; i++)
    for (int j = 0 ; j < N ; j++)
      if (S[i*N+j] != TRIALS*3.0) {
        printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, S[i*N+j]);
        fail = 1;
      }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: 3 loops
  //

  printf("num_teams(512) collapse(3)\n");
  success = 0;
  int M = N/8;
  double * V = malloc(M*M*M*sizeof(double));
  double * Z = malloc(M*M*M*sizeof(double));
  for (int i = 0 ; i < M ; i++)
    for (int j = 0 ; j < M ; j++)
      for (int k = 0 ; k < M ; k++)
      {
        V[i*M*M+j*M+k] = 2.0;
        Z[i*M*M+j*M+k] = 3.0;
      }

  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target map(tofrom:V[:M*M*M]), map(to:Z[:M*M*M])
    #pragma omp teams distribute num_teams(512) collapse(3)
    for (int i = 0 ; i < M ; i++)
      for (int j = 0 ; j < M ; j++)
        for (int k = 0 ; k < M ; k++)
          V[i*M*M+j*M+k] += Z[i*M*M+j*M+k];  // += 3 at each t
  }
  for (int i = 0 ; i < M ; i++)
    for (int j = 0 ; j < M ; j++)
      for (int k = 0 ; k < M ; k++)
        if (V[i*M*M+j*M+k] != 2.0+TRIALS*3.0) {
          printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, V[i*M*M+j*M+k]);
          fail = 1;
        }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  return 0;
}
示例#9
0
文件: test.c 项目: clang-ykt/openmp
int main(void) {
  check_offloading();

  double A[N], B[N], C[N], D[N], E[N];
  int fail = 0;

  INIT();

  // **************************
  // Series 1: no dist_schedule
  // **************************

  //
  // Test: #iterations == #teams
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(512)
    #pragma omp distribute simd
    for (int i = 0 ; i < 512 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 512 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations > #teams
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
    #pragma omp distribute simd
    for (int i = 0 ; i < 500 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 500 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations < #teams
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
    #pragma omp distribute simd
    for (int i = 0 ; i < 123 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 123 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  // ****************************
  // Series 2: with dist_schedule
  // ****************************

  //
  // Test: #iterations == #teams, dist_schedule(1)
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(512)
    #pragma omp distribute simd dist_schedule(static,1)
    for (int i = 0 ; i < 512 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 512 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations == #teams, dist_schedule(#iterations)
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(512)
    #pragma omp distribute simd dist_schedule(static,512)
    for (int i = 0 ; i < 512 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 512 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations == #teams, dist_schedule(#iterations/10), variable chunk size
  //
  ZERO(A);
  int ten = 10;
  int chunkSize = 512/ten;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(512)
    #pragma omp distribute simd dist_schedule(static,chunkSize)
    for (int i = 0 ; i < 512 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 512 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations > #teams, dist_schedule(1)
  //
    ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
    #pragma omp distribute simd dist_schedule(static,1)
    for (int i = 0 ; i < 500 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 500 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations > #teams, dist_schedule(#iterations)
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
    #pragma omp distribute simd dist_schedule(static,500)
    for (int i = 0 ; i < 500 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 500 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations > #teams, dist_schedule(#iterations/10), variable chunk size
  //
  ZERO(A);
  ten = 10;
  chunkSize = 500/ten;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
    #pragma omp distribute simd dist_schedule(static,chunkSize)
    for (int i = 0 ; i < 500 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 500 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations < #teams, dist_schedule(1)
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
#pragma omp distribute simd dist_schedule(static,1)
    for (int i = 0 ; i < 123 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 123 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations < #teams, dist_schedule(#iterations)
  //
  ZERO(A);
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
#pragma omp distribute simd dist_schedule(static,123)
    for (int i = 0 ; i < 123 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 123 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: #iterations < #teams, dist_schedule(#iterations)
  //
  ZERO(A);
  ten = 10;
  chunkSize = 123/ten;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
    #pragma omp distribute simd dist_schedule(static,chunkSize)
    for (int i = 0 ; i < 123 ; i++)
    {
      A[i] += C[i]; // += 1 per position
    }
  }
  for (int i = 0 ; i < 123 ; i++)
    if (A[i] != TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  // ****************************
  // Series 3: with ds attributes
  // ****************************

  //
  // Test: private
  //
  ZERO(A); ZERO(B);
  double p = 2.0, q = 4.0;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(256)
    {
      #pragma omp distribute simd private(p,q)
      for(int i = 0 ; i < N ; i++) {
	p = 2;
	q = 3;
	A[i] += p;
	B[i] += q;
      }
    }
  }
  for(int i = 0 ; i < N ; i++) {
    if (A[i] != TRIALS*2) {
      printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) TRIALS*2, A[i]);
      fail = 1;
    }
    if (B[i] != TRIALS*3) {
      printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) TRIALS*3, B[i]);
      fail = 1;
    }
  }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: firstprivate
  //
  ZERO(A); ZERO(B);
  p = 2.0, q = 4.0;
  for (int t = 0 ; t < TRIALS ; t++) {
#pragma omp target // implicit firstprivate for p and q, their initial values being 2 and 4 for each target invocation
#pragma omp teams num_teams(64)
    {
      #pragma omp distribute simd firstprivate(p,q)
      for(int i = 0 ; i < 128 ; i++) { // 2 iterations for each team
	p += 3.0;  // p and q are firstprivate to the team, and as such incremented twice (2 iterations per team)
	q += 7.0;
	A[i] += p;
	B[i] += q;
      }
    }
  }
  for(int i = 0 ; i < 128 ; i++) {
    if (i % 2 == 0) {
      if (A[i] != (2.0+3.0)*TRIALS) {
	printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]);
	fail = 1;
      }
      if (B[i] != (4.0+7.0)*TRIALS) {
	printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0)*TRIALS, B[i]);
	fail = 1;
      }
    } else {
      if (A[i] != (2.0+3.0*2)*TRIALS) {
	printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0*2)*TRIALS, A[i]);
	fail = 1;
      }
      if (B[i] != (4.0+7.0*2)*TRIALS) {
	printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0*2)*TRIALS, B[i]);
	fail = 1;
      }
    }
  }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: lastprivate
  //

  int lastpriv = -1;
#pragma omp target map(tofrom:lastpriv)
#pragma omp teams num_teams(10)
#pragma omp distribute simd lastprivate(lastpriv)
  for(int i = 0 ; i < omp_get_num_teams() ; i++)
    lastpriv = omp_get_team_num();

  if(lastpriv != 9) {
    printf("lastpriv value is %d and should have been %d\n", lastpriv, 9);
    fail = 1;
  }

  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  // **************************
  // Series 4: collapse
  // **************************

  //
  // Test: 2 loops
  //
  double * S = malloc(N*N*sizeof(double));
  double * T = malloc(N*N*sizeof(double));
  double * U = malloc(N*N*sizeof(double));
  for (int i = 0 ; i < N ; i++)
    for (int j = 0 ; j < N ; j++)
    {
      S[i*N+j] = 0.0;
      T[i*N+j] = 1.0;
      U[i*N+j] = 2.0;
    }

  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target map(tofrom:S[:N*N]), map(to:T[:N*N],U[:N*N])
    #pragma omp teams num_teams(512)
    #pragma omp distribute simd collapse(2)
    for (int i = 0 ; i < N ; i++)
      for (int j = 0 ; j < N ; j++)
	S[i*N+j] += T[i*N+j] + U[i*N+j];  // += 3 at each t
  }
  for (int i = 0 ; i < N ; i++)
    for (int j = 0 ; j < N ; j++)
    if (S[i*N+j] != TRIALS*3.0) {
      printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, S[i*N+j]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: 3 loops
  //
  int M = N/8;
  double * V = malloc(M*M*M*sizeof(double));
  double * Z = malloc(M*M*M*sizeof(double));
  for (int i = 0 ; i < M ; i++)
    for (int j = 0 ; j < M ; j++)
      for (int k = 0 ; k < M ; k++)
      {
	V[i*M*M+j*M+k] = 2.0;
	Z[i*M*M+j*M+k] = 3.0;
      }

  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target map(tofrom:V[:M*M*M]), map(to:Z[:M*M*M])
    #pragma omp teams num_teams(512)
    #pragma omp distribute simd collapse(3)
    for (int i = 0 ; i < M ; i++)
      for (int j = 0 ; j < M ; j++)
	for (int k = 0 ; k < M ; k++)
	  V[i*M*M+j*M+k] += Z[i*M*M+j*M+k];  // += 3 at each t
  }
  for (int i = 0 ; i < M ; i++)
    for (int j = 0 ; j < M ; j++)
      for (int k = 0 ; k < M ; k++)
	if (V[i*M*M+j*M+k] != 2.0+TRIALS*3.0) {
	  printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, V[i*M*M+j*M+k]);
	  fail = 1;
	}
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  return 0;
}
示例#10
0
文件: test.c 项目: clang-ykt/openmp
int main(void) {
    check_offloading();

    double A[N], B[N], C[N], D[N], E[N];
    int fail = 0;

    INIT();

    // **************************
    // Series 1: no dist_schedule
    // **************************

    //
    // Test: #iterations == #teams
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute num_teams(512)
        for (int i = 0 ; i < 512 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 512 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");


    //
    // Test: #iterations > #teams
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute num_teams(256)
        for (int i = 0 ; i < 500 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 500 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations < #teams
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute num_teams(256)
        for (int i = 0 ; i < 123 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 123 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    // ****************************
    // Series 2: with dist_schedule
    // ****************************

    //
    // Test: #iterations == #teams, dist_schedule(1)
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,1) num_teams(512)
        for (int i = 0 ; i < 512 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 512 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations == #teams, dist_schedule(#iterations)
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,512) num_teams(512)
        for (int i = 0 ; i < 512 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 512 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations == #teams, dist_schedule(#iterations/10), variable chunk size
    //
    ZERO(A);
    int ten = 10;
    int chunkSize = 512/ten;
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,chunkSize) num_teams(512)
        for (int i = 0 ; i < 512 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 512 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations > #teams, dist_schedule(1)
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,1) num_teams(256)
        for (int i = 0 ; i < 500 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 500 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations > #teams, dist_schedule(#iterations)
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,500) num_teams(256)
        for (int i = 0 ; i < 500 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 500 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations > #teams, dist_schedule(#iterations/10), variable chunk size
    //
    ZERO(A);
    ten = 10;
    chunkSize = 500/ten;
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,chunkSize) num_teams(256)
        for (int i = 0 ; i < 500 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 500 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations < #teams, dist_schedule(1)
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,1) num_teams(256)
        for (int i = 0 ; i < 123 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 123 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations < #teams, dist_schedule(#iterations)
    //
    ZERO(A);
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,123) num_teams(256)
        for (int i = 0 ; i < 123 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 123 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: #iterations < #teams, dist_schedule(#iterations)
    //
    ZERO(A);
    ten = 10;
    chunkSize = 123/ten;
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute dist_schedule(static,chunkSize) num_teams(256)
        for (int i = 0 ; i < 123 ; i++)
        {
            A[i] += C[i]; // += 1 per position
        }
    }
    for (int i = 0 ; i < 123 ; i++)
        if (A[i] != TRIALS) {
            printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
            fail = 1;
        }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    // ****************************
    // Series 3: with ds attributes
    // ****************************
    // DS currently failing in the compiler with asserts (bug #T158)
#if 0
    //
    // Test: private
    //
    ZERO(A);
    ZERO(B);
    double p = 2.0, q = 4.0;
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute private(p,q) num_teams(256)
        for(int i = 0 ; i < N ; i++) {
            p = 2;
            q = 3;
            A[i] += p;
            B[i] += q;
        }
    }
    for(int i = 0 ; i < N ; i++) {
        if (A[i] != TRIALS*2) {
            printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) TRIALS*2, A[i]);
            fail = 1;
        }
        if (B[i] != TRIALS*3) {
            printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) TRIALS*3, B[i]);
            fail = 1;
        }
    }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: firstprivate
    //
    ZERO(A);
    ZERO(B);
    p = 2.0, q = 4.0;
    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute firstprivate(p,q) num_teams(64)
        for(int i = 0 ; i < 128 ; i++) { // 2 iterations for each team
            p += 3.0;  // p and q are firstprivate to the team, and as such incremented twice (2 iterations per team)
            q += 7.0;
            A[i] += p;
            B[i] += q;
        }
    }
    for(int i = 0 ; i < 128 ; i++) {
        if (i % 2 == 0) {
            if (A[i] != (2.0+3.0)*TRIALS) {
                printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]);
                fail = 1;
            }
            if (B[i] != (4.0+7.0)*TRIALS) {
                printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0)*TRIALS, B[i]);
                fail = 1;
            }
        } else {
            if (A[i] != (2.0+3.0*2)*TRIALS) {
                printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0*2)*TRIALS, A[i]);
                fail = 1;
            }
            if (B[i] != (4.0+7.0*2)*TRIALS) {
                printf("Error at B[%d], h = %lf, d = %lf\n", i, (double) (4.0+7.0*2)*TRIALS, B[i]);
                fail = 1;
            }
        }
    }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");
//#endif

    //
    // Test: lastprivate
    //

    int lastpriv = -1;
    // map(tofrom:lastpriv)
    #pragma omp target teams distribute lastprivate(lastpriv) num_teams(10)
    for(int i = 0 ; i < omp_get_num_teams() ; i++)
        lastpriv = omp_get_team_num();

    if(lastpriv != 9) {
        printf("lastpriv value is %d and should have been %d\n", lastpriv, 9);
        fail = 1;
    }

    if(fail) printf("Failed\n");
    else printf("Succeeded\n");


    // ***************************
    // Series 4: with parallel for
    // ***************************

    //
    // Test: simple blocking loop
    //
    ZERO(A);
    ZERO(B);
    int nte = 32;
    int tl = 64;
    int blockSize = tl;

    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute num_teams(nte) thread_limit(tl)
        for(int j = 0 ; j < 256 ; j += blockSize) {
            #pragma omp parallel for
            for(int i = j ; i < j+blockSize; i++) {
                A[i] += B[i] + C[i];
            }
        }
    }
    for(int i = 0 ; i < 256 ; i++) {
        if (A[i] != TRIALS) {
            printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]);
            fail = 1;
        }
    }

    if(fail) printf("Failed\n");
    else printf("Succeeded\n");
#endif

    //
    // Test: blocking loop where upper bound is not a multiple of tl*nte
    //
    ZERO(A);
    ZERO(B);
    int nte = 32;
    int tl = 64;
    int blockSize = tl;

    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute num_teams(nte) thread_limit(tl)
        for(int j = 0 ; j < 510 ; j += blockSize) {
            int ub = (j+blockSize < 510) ? (j+blockSize) : 512;
            #pragma omp parallel for
            for(int i = j ; i < ub; i++) {
                A[i] += B[i] + C[i];
            }
        }
    }
    for(int i = 0 ; i < 256 ; i++) {
        if (A[i] != TRIALS) {
            printf("Error at A[%d], h = %lf, d = %lf\n", i, (double) (2.0+3.0)*TRIALS, A[i]);
            fail = 1;
        }
    }

    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    // **************************
    // Series 5: collapse
    // **************************

    //
    // Test: 2 loops
    //
    double * S = malloc(N*N*sizeof(double));
    double * T = malloc(N*N*sizeof(double));
    double * U = malloc(N*N*sizeof(double));
    for (int i = 0 ; i < N ; i++)
        for (int j = 0 ; j < N ; j++)
        {
            S[i*N+j] = 0.0;
            T[i*N+j] = 1.0;
            U[i*N+j] = 2.0;
        }

    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute collapse(2) map(tofrom:S[:N*N]), map(to:T[:N*N],U[:N*N]) num_teams(512)
        for (int i = 0 ; i < N ; i++)
            for (int j = 0 ; j < N ; j++)
                S[i*N+j] += T[i*N+j] + U[i*N+j];  // += 3 at each t
    }
    for (int i = 0 ; i < N ; i++)
        for (int j = 0 ; j < N ; j++)
            if (S[i*N+j] != TRIALS*3.0) {
                printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, S[i*N+j]);
                fail = 1;
            }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    //
    // Test: 3 loops
    //
    int M = N/8;
    double * V = malloc(M*M*M*sizeof(double));
    double * Z = malloc(M*M*M*sizeof(double));
    for (int i = 0 ; i < M ; i++)
        for (int j = 0 ; j < M ; j++)
            for (int k = 0 ; k < M ; k++)
            {
                V[i*M*M+j*M+k] = 2.0;
                Z[i*M*M+j*M+k] = 3.0;
            }

    for (int t = 0 ; t < TRIALS ; t++) {
        #pragma omp target teams distribute collapse(3) map(tofrom:V[:M*M*M]), map(to:Z[:M*M*M]) num_teams(512)
        for (int i = 0 ; i < M ; i++)
            for (int j = 0 ; j < M ; j++)
                for (int k = 0 ; k < M ; k++)
                    V[i*M*M+j*M+k] += Z[i*M*M+j*M+k];  // += 3 at each t
    }
    for (int i = 0 ; i < M ; i++)
        for (int j = 0 ; j < M ; j++)
            for (int k = 0 ; k < M ; k++)
                if (V[i*M*M+j*M+k] != 2.0+TRIALS*3.0) {
                    printf("Error at (%d,%d), h = %lf, d = %lf\n", i, j, (double) TRIALS*3.0, V[i*M*M+j*M+k]);
                    fail = 1;
                }
    if(fail) printf("Failed\n");
    else printf("Succeeded\n");

    return 0;
}
示例#11
0
文件: test.c 项目: clang-ykt/openmp
int main(void) {
  check_offloading();

  double A[N], B[N], C[N], D[N], E[N];
  int fail = 0;

  INIT();

  //
  // Test: num_teams and omp_get_team_num()
  //
  ZERO(A);
  int num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 512;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(num_teams)
    {
      A[omp_get_team_num()] += omp_get_team_num();
    }
  }
  for (int i = 0 ; i < num_teams ; i++)
    if (A[i] != i*TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  //
  // Test: thread_limit and omp_get_thread_num()
  //
  ZERO(A);
  fail = 0;
  int num_threads = omp_is_initial_device() ? HOST_MAX_TEAMS : 256;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(1) thread_limit(num_threads)
    #pragma omp parallel
    {
      int tid = omp_get_thread_num();
      A[tid] += (double) tid;
    }
  }
  for (int i = 0 ; i < num_threads ; i++)
    if (A[i] != i*TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");


  //
  // Test: if statement in teams region
  //
  ZERO(A);
  fail = 0;
  num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 512;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(num_teams)
    {
      if (omp_get_team_num() % 2 == 0) {
  	int teid = omp_get_team_num();
  	A[teid] += (double) 1;
      }
      else {
  	int teid = omp_get_team_num();
  	A[teid] += (double) 2;
      }
    }
  }
  for (int i = 0 ; i < num_teams ; i++) {
    if (i % 2 == 0) {
      if (A[i] != TRIALS) {
  	printf("Error at %d, h = %lf, d = %lf\n", i, (double) TRIALS, A[i]);
  	fail = 1;
      }
    } else
      if (A[i] != 2*TRIALS) {
  	printf("Error at %d, h = %lf, d = %lf\n", i, (double) 2*TRIALS, A[i]);
  	fail = 1;
      }
  }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  /* // */
  /* // Test: num_teams and thread_limit by simulating a distribute pragma */
  /* // */
  /* ZERO(A); */
  /* fail = 0; */
  /* for (int t = 0 ; t < TRIALS ; t++) { */
  /*   #pragma omp target */
  /*   #pragma omp teams num_teams(2) thread_limit(496) */
  /*   { */
  /*     if (omp_get_team_num() == 0) { */
  /* 	#pragma omp parallel */
  /* 	{ */
  /* 	  A[omp_get_team_num()*496+omp_get_thread_num()] += omp_get_thread_num(); */
  /* 	  if(omp_get_thread_num() == 498) printf("teid = %d, tid = %d, accessing %d\n", omp_get_team_num(), omp_get_thread_num(), omp_get_team_num()*496+omp_get_thread_num()); */
  /* 	} */
  /*     } else { */
  /* 	#pragma omp parallel */
  /* 	{ */
  /* 	  if(omp_get_thread_num() == 0) */
  /* 	    printf("teid = %d, tid = %d: A= %lf\n", omp_get_team_num(), omp_get_thread_num(), A[omp_get_team_num()*496+omp_get_thread_num()]); */
  /* 	  A[omp_get_team_num()*496+omp_get_thread_num()] -= omp_get_thread_num(); */
  /* 	  if(omp_get_thread_num() == 0) */
  /* 	    printf("teid = %d, tid = %d: A= %lf\n", omp_get_team_num(), omp_get_thread_num(), A[omp_get_team_num()*496+omp_get_thread_num()]); */
  /* 	} */
  /*     } */
  /*   } */
  /* } */
  /* for (int i = 0 ; i < 992 ; i++) { */
  /*   if (i < 496) { */
  /*     if (A[i] != i*TRIALS) { */
  /* 	printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]); */
  /* 	fail = 1; */
  /*     } */
  /*   } else if(i >= 496) */
  /*     if (A[i] != -((i-496)*TRIALS)) { */
  /* 	printf("Error at %d, h = %lf, d = %lf\n", i, (double) -((i-496)*TRIALS), A[i]); */
  /* 	fail = 1; */
  /*     } */
  /* } */
  /* if(fail) printf("Failed\n"); */
  /* else printf("Succeeded\n"); */

  //
  // Test: private
  //
  ZERO(A);
  fail = 0;
  int a = 10;
  num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target
    #pragma omp teams num_teams(num_teams) private(a)
    {
      a = omp_get_team_num();
      A[omp_get_team_num()] += a;
    }
  }

  for (int i = 0 ; i < num_teams ; i++)
    if (A[i] != i*TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) i*TRIALS, A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  ZERO(A);
  fail = 0;
  a = 10;
  num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target firstprivate(a)
    #pragma omp teams num_teams(num_teams) firstprivate(a)
    {
      a += omp_get_team_num();
      A[omp_get_team_num()] += a;
    }
  }

  for (int i = 0 ; i < num_teams ; i++)
    if (A[i] != 10+i*TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) (10+i*TRIALS), A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  ZERO(A);
  fail = 0;
  a = 10;
  num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target // a is implicitly captured as a firsptivate
    #pragma omp teams num_teams(num_teams) firstprivate(a)
    {
      a += omp_get_team_num();
      A[omp_get_team_num()] += a;
    }
  }

  for (int i = 0 ; i < num_teams ; i++)
    if (A[i] != 10+i*TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) (10+i*TRIALS), A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  ZERO(A);
  fail = 0;
  a = 10;
  num_teams = omp_is_initial_device() ? HOST_MAX_TEAMS : 256;
  for (int t = 0 ; t < TRIALS ; t++) {
    #pragma omp target firstprivate(a)
    #pragma omp teams num_teams(num_teams) private(a)
    {
      a = omp_get_team_num();
      A[omp_get_team_num()] += a;
    }
  }

  for (int i = 0 ; i < num_teams ; i++)
    if (A[i] != i*TRIALS) {
      printf("Error at %d, h = %lf, d = %lf\n", i, (double) (i*TRIALS), A[i]);
      fail = 1;
    }
  if(fail) printf("Failed\n");
  else printf("Succeeded\n");

  return 0;
}