C++ (Cpp) acc_copyinの例

コード例 #1

0

ファイルを表示

ファイル: lib-23.c プロジェクト: abumaryam/gcc

int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h1, *h2;

  h1 = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h1[i] = 0xab;
    }

  (void) acc_copyin (h1, N);

  h2 = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h2[i] = 0xde;
    }

  (void) acc_copyin (h2, N);

  acc_copyout (h1, N + N);

  free (h1);
  free (h2);

  return 0;
}

コード例 #2

0

ファイルを表示

ファイル: lib-18.c プロジェクト: 0day-ci/gcc

int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  d = acc_copyin (h, N);

  acc_free (d);

  fprintf (stderr, "CheCKpOInT\n");
  acc_copyout (h, N);

  free (h);

  return 0;
}

コード例 #3

0

ファイルを表示

ファイル: lib-88.c プロジェクト: 0day-ci/gcc

static void *
test (void *arg)
{
  int i;

  if (acc_get_current_cuda_context () != NULL)
    abort ();

  if (acc_is_present (x, N) != 1)
    abort ();

  memset (x, 0, N);

  acc_copyout (x, N);

  for (i = 0; i < N; i++)
    {
      if (x[i] != i)
	abort ();

      x[i] = N - i - 1;
    }

  d_x = acc_copyin (x, N);

  return 0;
}

コード例 #4

0

ファイルを表示

ファイル: lib-47.c プロジェクト: earonesty/gcc

int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  d = acc_copyin (h, N);
  if (!d)
    abort ();

  memset (&h[0], 0, N);

  acc_update_self (0, N);

  for (i = 0; i < N; i++)
    {
      if (h[i] != i)
	abort ();
    }

  acc_delete (h, N);

  free (h);

  return 0;
}

コード例 #5

0

ファイルを表示

ファイル: data-already-3.c プロジェクト: abumaryam/gcc

int
main (int argc, char *argv[])
{
  int i;

#pragma acc data present_or_copy (i)
  acc_copyin (&i, sizeof i);

  return 0;
}

コード例 #6

0

ファイルを表示

ファイル: kernels-alias-8.c プロジェクト: 0day-ci/gcc

void
foo (int *a, size_t n)
{
  int *p = (int *)acc_copyin (&a, n);

#pragma acc kernels deviceptr (p) pcopy(a[0:n])
  {
    a = 0;
    *p = 1;
  }
}

コード例 #7

0

ファイルを表示

ファイル: lib-13.c プロジェクト: SuperLu/gcc

int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  d = acc_copyin (h, N);

  if (acc_is_present (h, 1) != 1)
    abort ();

  if (acc_is_present (h, N + 1) != 0)
    abort ();

  if (acc_is_present (h + 1, N) != 0)
    abort ();

  if (acc_is_present (h - 1, N) != 0)
    abort ();

  if (acc_is_present (h - 1, N - 1) != 0)
    abort ();

  if (acc_is_present (h + N, 0) != 0)
    abort ();

  if (acc_is_present (h + N, N) != 0)
    abort ();

  if (acc_is_present (0, N) != 0)
    abort ();
   
  if (acc_is_present (h, 0) != 0)
    abort ();

  acc_free (d);

  if (acc_is_present (h, 1) != 0)
    abort ();

  free (h);

  return 0;
}

コード例 #8

0

ファイルを表示

ファイル: kernels-alias-7.c プロジェクト: ChenBoTang/gcc

void
foo (void)
{
  int a[N];
  int *p = (int *)acc_copyin (&a[0], sizeof (a));

#pragma acc kernels deviceptr (p) pcopy(a)
  {
    a[0] = 0;
    *p = 1;
  }
}

コード例 #9

0

ファイルを表示

ファイル: lib-16.c プロジェクト: abumaryam/gcc

int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  (void) acc_copyin (h, N);

  (void) acc_copyin (h, N);

  free (h);

  return 0;
}

コード例 #10

0

ファイルを表示

ファイル: GPUCopy.cpp プロジェクト: mcprice30/MCGPU

void GPUCopy::copyIn(SimBox *sb) {
    h_moleculeData = sb->moleculeData;
    h_atomData = sb->atomData;
    h_atomCoordinates = sb->atomCoordinates;
    h_rollBackCoordinates = sb->rollBackCoordinates;
    h_size = sb-> size;
    h_primaryIndexes = sb->primaryIndexes;
    if (!parallel) {
        return;
    }

#ifdef _OPENACC
    d_moleculeData = (int**)acc_malloc(MOL_DATA_SIZE * sizeof(int *));
    assert(d_moleculeData != NULL);
    for (int row = 0; row < MOL_DATA_SIZE; row++) {
        int *h_moleculeData_row = sb->moleculeData[row];
        int *d_moleculeData_row = (int *)acc_copyin(h_moleculeData_row,
                                  sb->numMolecules * sizeof(int));
        assert(d_moleculeData_row != NULL);
#pragma acc parallel deviceptr(d_moleculeData)
        d_moleculeData[row] = d_moleculeData_row;
    }

    d_atomData = (Real**)acc_malloc(ATOM_DATA_SIZE * sizeof(Real *));
    assert(d_atomData != NULL);
    for (int row = 0; row < ATOM_DATA_SIZE; row++) {
        Real *h_atomData_row = sb->atomData[row];
        Real *d_atomData_row = (Real *)acc_copyin(h_atomData_row, sb->numAtoms * sizeof(Real));
        assert(d_atomData_row != NULL);
#pragma acc parallel deviceptr(d_atomData)
        d_atomData[row] = d_atomData_row;
    }

    d_atomCoordinates = (Real**)acc_malloc(NUM_DIMENSIONS * sizeof(Real *));
    assert(d_atomCoordinates != NULL);
    for (int row = 0; row < NUM_DIMENSIONS; row++) {
        Real *h_atomCoordinates_row = sb->atomCoordinates[row];
        Real *d_atomCoordinates_row = (Real *)acc_copyin(h_atomCoordinates_row, sb->numAtoms * sizeof(Real));
        assert(d_atomCoordinates_row != NULL);
#pragma acc parallel deviceptr(d_atomCoordinates)
        d_atomCoordinates[row] = d_atomCoordinates_row;
    }

    d_rollBackCoordinates = (Real**)acc_malloc(NUM_DIMENSIONS * sizeof(Real *));
    assert(d_rollBackCoordinates != NULL);
    for (int row = 0; row < NUM_DIMENSIONS; row++) {
        Real *h_rollBackCoordinates_row = sb->rollBackCoordinates[row];
        Real *d_rollBackCoordinates_row = (Real *)acc_copyin(h_rollBackCoordinates_row, sb->largestMol * sizeof(Real));
        assert(d_rollBackCoordinates_row != NULL);
#pragma acc parallel deviceptr(d_rollBackCoordinates)
        d_rollBackCoordinates[row] = d_rollBackCoordinates_row;
    }

    d_primaryIndexes = (int *)acc_copyin(sb->primaryIndexes, sb->numPIdxes * sizeof(int));

    d_size = (Real *)acc_copyin(sb->size, NUM_DIMENSIONS * sizeof(Real));
#endif
}

コード例 #11

0

ファイルを表示

ファイル: lib-43.c プロジェクト: SuperLu/gcc

int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  d = acc_copyin (h, N);
  if (!d)
    abort ();

  for (i = 0; i < N; i++)
    {
      h[i] = 0xab;
    }

  fprintf (stderr, "CheCKpOInT\n");
  acc_update_device (0, N);

  acc_copyout (h, N);

  for (i = 0; i < N; i++)
    {
      if (h[i] != 0xab)
	abort ();
    }

  free (h);

  return 0;
}

コード例 #12

0

ファイルを表示

ファイル: context-4.c プロジェクト: 0day-ci/gcc

int
main (int argc, char **argv)
{
    cublasStatus_t s;
    cublasHandle_t h;
    CUcontext pctx;
    CUresult r;
    int i;
    const int N = 256;
    float *h_X, *h_Y1, *h_Y2;
    float *d_X,*d_Y;
    float alpha = 2.0f;
    float error_norm;
    float ref_norm;

    /* Test 4 - OpenACC creates, cuBLAS shares.  */

    acc_set_device_num (0, acc_device_nvidia);

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    h_X = (float *) malloc (N * sizeof (float));
    if (h_X == 0)
    {
        fprintf (stderr, "malloc failed: for h_X\n");
        exit (EXIT_FAILURE);
    }

    h_Y1 = (float *) malloc (N * sizeof (float));
    if (h_Y1 == 0)
    {
        fprintf (stderr, "malloc failed: for h_Y1\n");
        exit (EXIT_FAILURE);
    }

    h_Y2 = (float *) malloc (N * sizeof (float));
    if (h_Y2 == 0)
    {
        fprintf (stderr, "malloc failed: for h_Y2\n");
        exit (EXIT_FAILURE);
    }

    for (i = 0; i < N; i++)
    {
        h_X[i] = rand () / (float) RAND_MAX;
        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
    }

#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
    {
        int i;

        for (i = 0; i < N; i++)
        {
            h_Y2[i] = alpha * h_X[i] + h_Y2[i];
        }
    }

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
    if (d_X == NULL)
    {
        fprintf (stderr, "copyin error h_Y1\n");
        exit (EXIT_FAILURE);
    }

    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
    if (d_Y == NULL)
    {
        fprintf (stderr, "copyin error h_Y1\n");
        exit (EXIT_FAILURE);
    }

    s = cublasCreate (&h);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasCreate failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));

    context_check (pctx);

    error_norm = 0;
    ref_norm = 0;

    for (i = 0; i < N; ++i)
    {
        float diff;

        diff = h_Y1[i] - h_Y2[i];
        error_norm += diff * diff;
        ref_norm += h_Y2[i] * h_Y2[i];
    }

    error_norm = (float) sqrt ((double) error_norm);
    ref_norm = (float) sqrt ((double) ref_norm);

    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
    {
        fprintf (stderr, "math error\n");
        exit (EXIT_FAILURE);
    }

    free (h_X);
    free (h_Y1);
    free (h_Y2);

    acc_free (d_X);
    acc_free (d_Y);

    context_check (pctx);

    s = cublasDestroy (h);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasDestroy failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    acc_shutdown (acc_device_nvidia);

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    if (pctx)
    {
        fprintf (stderr, "Unexpected context\n");
        exit (EXIT_FAILURE);
    }

    return EXIT_SUCCESS;
}

コード例 #13

0

ファイルを表示

ファイル: enter_exit-lib.c プロジェクト: SuperLu/gcc

int
main ()
{
  int *p = (int *)malloc (sizeof (int));

  /* Test 1: pragma input, library output.  */
  
#pragma acc enter data copyin (p[0:1])

#pragma acc parallel present (p[0:1]) num_gangs (1)
  {
    p[0] = 1;
  }

  acc_copyout (p, sizeof (int));

  assert (p[0] == 1);
  
  /* Test 2: library input, pragma output.  */

  acc_copyin (p, sizeof (int));

#pragma acc parallel present (p[0:1]) num_gangs (1)
  {
    p[0] = 2;
  }

#pragma acc exit data copyout (p[0:1])
  
  assert (p[0] == 2);

  /* Test 3: library input, library output.  */

  acc_copyin (p, sizeof (int));

#pragma acc parallel present (p[0:1]) num_gangs (1)
  {
    p[0] = 3;
  }

  acc_copyout (p, sizeof (int));
  
  assert (p[0] == 3);

  /* Test 4: pragma input, pragma output.  */

#pragma acc enter data copyin (p[0:1])
  
#pragma acc parallel present (p[0:1]) num_gangs (1)
  {
    p[0] = 3;
  }

#pragma acc exit data copyout (p[0:1])
  
  assert (p[0] == 3);
  
  free (p);

  return 0;
}

コード例 #14

0

ファイルを表示

ファイル: lib-88.c プロジェクト: 0day-ci/gcc

int
main (int argc, char **argv)
{
  const int nthreads = 1;
  int i;
  pthread_attr_t attr;
  pthread_t *tid;

  if (acc_get_num_devices (acc_device_nvidia) == 0)
    return 0;

  acc_init (acc_device_nvidia);

  x = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      x[i] = i;
    }

  d_x = acc_copyin (x, N);

  if (acc_is_present (x, N) != 1)
    abort ();

  if (pthread_attr_init (&attr) != 0)
    perror ("pthread_attr_init failed");

  tid = (pthread_t *) malloc (nthreads * sizeof (pthread_t));

  for (i = 0; i < nthreads; i++)
    {
      if (pthread_create (&tid[i], &attr, &test, (void *) (unsigned long) (i))
	  != 0)
	perror ("pthread_create failed");
    }

  if (pthread_attr_destroy (&attr) != 0)
    perror ("pthread_attr_destroy failed");

  for (i = 0; i < nthreads; i++)
    {
      void *res;

      if (pthread_join (tid[i], &res) != 0)
	perror ("pthread join failed");
    }

  if (acc_is_present (x, N) != 1)
    abort ();

  memset (x, 0, N);

  acc_copyout (x, N);

  for (i = 0; i < N; i++)
    {
      if (x[i] != N - i - 1)
	abort ();
    }

  if (acc_is_present (x, N) != 0)
    abort ();

  acc_shutdown (acc_device_nvidia);

  return 0;
}