Ejemplo n.º 1
0
static void exec_gaussblur_app(DATA *image_out, DATA *image_in, ARGUMENTS *settings) {
    size_t size_in_bytes = settings->size*settings->size*(PIXEL_CHANNELS*sizeof(DATA));

    DATA *image_in_gpu	= (DATA *)acc_malloc(size_in_bytes);
    DATA *image_out_gpu	= (DATA *)acc_malloc(size_in_bytes);

    uint32_t e;
    uint32_t i;

    for (e=0; e<settings->energy_loops; ++e) {
        acc_memcpy_to_device(image_in_gpu , image_in , size_in_bytes);
        acc_memcpy_to_device(image_out_gpu, image_out, size_in_bytes);

        for (i=0; i<settings->checkpoints; ++i) {
            gaussblur_calc_gpu_compute((uint16_t *)image_in_gpu , (uint16_t *)image_out_gpu, settings->size);
            gaussblur_calc_gpu_compute((uint16_t *)image_out_gpu, (uint16_t *)image_in_gpu , settings->size);
        }

        acc_memcpy_from_device(image_in , image_in_gpu , size_in_bytes);
        acc_memcpy_from_device(image_out, image_out_gpu, size_in_bytes);
    }

    acc_free(image_in_gpu);
    acc_free(image_out_gpu);

    image_in_gpu	= NULL;
    image_out_gpu	= NULL;
}
Ejemplo n.º 2
0
int
main (int argc, char **argv)
{
    void *d;
    acc_device_t devtype = acc_device_host;

#if ACC_DEVICE_TYPE_nvidia
    devtype = acc_device_nvidia;

    if (acc_get_num_devices (acc_device_nvidia) == 0)
        return 0;
#endif

    acc_init (devtype);

    d = acc_malloc (0);
    if (d != NULL)
        abort ();

    acc_free (0);

    acc_shutdown (devtype);

    acc_set_device_type (devtype);

    d = acc_malloc (0);
    if (d != NULL)
        abort ();

    acc_shutdown (devtype);

    acc_init (devtype);

    d = acc_malloc (1024);
    if (d == NULL)
        abort ();

    acc_free (d);

    acc_shutdown (devtype);

    acc_set_device_type (devtype);

    d = acc_malloc (1024);
    if (d == NULL)
        abort ();

    acc_free (d);

    acc_shutdown (devtype);

    return 0;
}
Ejemplo n.º 3
0
Archivo: lib-18.c Proyecto: 0day-ci/gcc
int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  d = acc_copyin (h, N);

  acc_free (d);

  fprintf (stderr, "CheCKpOInT\n");
  acc_copyout (h, N);

  free (h);

  return 0;
}
Ejemplo n.º 4
0
void
free_buffer (void * buffer, enum accel_type type)
{
    switch (type) {
        case none:
            free(buffer);
            break;
        case managed:
        case cuda:
#ifdef _ENABLE_CUDA_
            cudaFree(buffer);
#endif
            break;
        case openacc:
#ifdef _ENABLE_OPENACC_
            acc_free(buffer);
#endif
            break;
    }
    
    /* Free dummy compute related resources */
    if (is_alloc) {
        if (options.target == cpu) {
            free_host_arrays();
        } 
#ifdef _ENABLE_CUDA_KERNEL_ 
        else if (options.target == gpu || options.target == both) {
            free_host_arrays();
            free_device_arrays();
        }
#endif
    }

    is_alloc = 0;
}
Ejemplo n.º 5
0
void
free_buffer (void * buffer, enum accel_type type)
{
    switch (type) {
        case none:
            free(buffer);
            break;
        case managed:
        case cuda:
#ifdef _ENABLE_CUDA_
            cudaFree(buffer);
#endif
            break;
        case openacc:
#ifdef _ENABLE_OPENACC_
            acc_free(buffer);
#endif
            break;
    }
    
    /* Free dummy compute related resources */
    if (cpu == options.target || both == options.target) {
        free_host_arrays();
    }

    if (gpu == options.target || both == options.target) {
#ifdef _ENABLE_CUDA_KERNEL_
        free_device_arrays();
#endif /* #ifdef _ENABLE_CUDA_KERNEL_ */
    }
}
Ejemplo n.º 6
0
Archivo: lib-13.c Proyecto: SuperLu/gcc
int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  d = acc_copyin (h, N);

  if (acc_is_present (h, 1) != 1)
    abort ();

  if (acc_is_present (h, N + 1) != 0)
    abort ();

  if (acc_is_present (h + 1, N) != 0)
    abort ();

  if (acc_is_present (h - 1, N) != 0)
    abort ();

  if (acc_is_present (h - 1, N - 1) != 0)
    abort ();

  if (acc_is_present (h + N, 0) != 0)
    abort ();

  if (acc_is_present (h + N, N) != 0)
    abort ();

  if (acc_is_present (0, N) != 0)
    abort ();
   
  if (acc_is_present (h, 0) != 0)
    abort ();

  acc_free (d);

  if (acc_is_present (h, 1) != 0)
    abort ();

  free (h);

  return 0;
}
Ejemplo n.º 7
0
int
main (int argc, char **argv)
{
    const int N = 256;
    int i;
    unsigned char *h;
    void *d;

    acc_init (acc_device_nvidia);

    h = (unsigned char *) malloc (N);

    for (i = 0; i < N; i++)
    {
        h[i] = i;
    }

    d = acc_malloc (N);

    acc_memcpy_to_device (d, h, N);

    memset (&h[0], 0, N);

    acc_memcpy_to_device (d, h, N << 1);

    acc_memcpy_from_device (h, d, N);

    for (i = 0; i < N; i++)
    {
        if (h[i] != i)
            abort ();
    }

    acc_free (d);

    free (h);

    acc_shutdown (acc_device_nvidia);

    return 0;
}
Ejemplo n.º 8
0
int search_GPU(int s)
{
	int i;
    	int find = -1;
	
	a = (float *) malloc(sizeof(float) * SIZE);
	c = (float)s-5;
	
	init(s);
	
	double start, finish, elapsed;
        start = (double) clock() / (CLOCKS_PER_SEC*1000);
	#pragma acc data copyin(a[0:s],c) copy(find)
        {
                #pragma acc kernels
                {
                        #pragma acc loop independent
                        {
				for (i = 0; i < s; ++i)
				{
        				if(a[i] == c)
        				{
            					find = i;
	    					i=s;
        				}
				}
			}
		}
	}
	
	acc_free(acc_deviceptr(a));

	finish = (double) clock() / (CLOCKS_PER_SEC*1000);
    elapsed = finish - start;
    fprintf(out,"%.6lf,",elapsed);
    	
	//print_result(s,find);
    free(a);
	
	return find;
}
Ejemplo n.º 9
0
int
free_device_buffer (void * buf)
{
    switch (options.accel) {
#ifdef _ENABLE_CUDA_
        case cuda:
            cudaFree(buf);
            break;
#endif
#ifdef _ENABLE_OPENACC_
        case openacc:
            acc_free(buf);
            break;
#endif
        default:
            /* unknown device */
            return 1;
    }

    return 0;
}
Ejemplo n.º 10
0
Archivo: lib-54.c Proyecto: SuperLu/gcc
int
main (int argc, char **argv)
{
  const int N = 256;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  d = acc_malloc (N);

  fprintf (stderr, "CheCKpOInT\n");
  acc_map_data (h, d, 0);

  acc_unmap_data (h);

  acc_free (d);

  free (h);

  return 0;
}
Ejemplo n.º 11
0
Archivo: lib-63.c Proyecto: 0day-ci/gcc
int
main (int argc, char **argv)
{
  const int N = 256;
  int i;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  for (i = 0; i < N; i++)
    {
      h[i] = i;
    }

  d = acc_malloc (N);

  fprintf (stderr, "CheCKpOInT\n");
  acc_memcpy_to_device (0, h, N);

  memset (&h[0], 0, N);

  acc_memcpy_from_device (h, d, N);

  for (i = 0; i < N; i++)
    {
      if (h[i] != i)
	abort ();
    }

  acc_free (d);

  free (h);

  return 0;
}
Ejemplo n.º 12
0
Archivo: lib-50.c Proyecto: 0day-ci/gcc
int
main (int argc, char **argv)
{
  const int N = 256;
  unsigned char *h;
  void *d;

  h = (unsigned char *) malloc (N);

  d = acc_malloc (N);

  acc_map_data (h, d, N);

  if (acc_is_present (h, N) != 1)
    abort ();

  acc_unmap_data (h);

  acc_free (d);

  free (h);

  return 0;
}
Ejemplo n.º 13
0
int
main (int argc, char **argv)
{
  CUdevice dev;
  CUfunction delay;
  CUmodule module;
  CUresult r;
  CUstream stream;
  unsigned long *a, *d_a, dticks;
  int nbytes;
  float atime, dtime;
  void *kargs[2];
  int clkrate;
  int devnum, nprocs;

  acc_init (acc_device_nvidia);

  devnum = acc_get_device_num (acc_device_nvidia);

  r = cuDeviceGet (&dev, devnum);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      abort ();
    }

  r =
    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
			  dev);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      abort ();
    }

  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      abort ();
    }

  r = cuModuleLoad (&module, "subr.ptx");
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      abort ();
    }

  r = cuModuleGetFunction (&delay, module, "delay");
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      abort ();
    }

  nbytes = nprocs * sizeof (unsigned long);

  dtime = 200.0;

  dticks = (unsigned long) (dtime * clkrate);

  a = (unsigned long *) malloc (nbytes);
  d_a = (unsigned long *) acc_malloc (nbytes);

  acc_map_data (a, d_a, nbytes);

  kargs[0] = (void *) &d_a;
  kargs[1] = (void *) &dticks;

  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
  if (r != CUDA_SUCCESS)
	{
	  fprintf (stderr, "cuStreamCreate failed: %d\n", r);
	  abort ();
	}

  acc_set_cuda_stream (0, stream);

  init_timers (1);

  start_timer (0);

  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
      abort ();
    }

  acc_wait (1);

  atime = stop_timer (0);

  if (atime < dtime)
    {
      fprintf (stderr, "actual time < delay time\n");
      abort ();
    }

  start_timer (0);

  acc_wait (1);

  atime = stop_timer (0);

  if (0.010 < atime)
    {
      fprintf (stderr, "actual time < delay time\n");
      abort ();
    }

  acc_unmap_data (a);

  fini_timers ();

  free (a);
  acc_free (d_a);

  acc_shutdown (acc_device_nvidia);

  return 0;
}
Ejemplo n.º 14
0
Archivo: lib-72.c Proyecto: 0day-ci/gcc
int
main (int argc, char **argv)
{
  CUdevice dev;
  CUfunction delay;
  CUmodule module;
  CUresult r;
  CUstream stream;
  unsigned long *a, *d_a, dticks;
  int nbytes;
  float dtime;
  void *kargs[2];
  int clkrate;
  int devnum, nprocs;

  acc_init (acc_device_nvidia);

  devnum = acc_get_device_num (acc_device_nvidia);

  r = cuDeviceGet (&dev, devnum);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuDeviceGet failed: %d\n", r);
      abort ();
    }

  r =
    cuDeviceGetAttribute (&nprocs, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT,
			  dev);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      abort ();
    }

  r = cuDeviceGetAttribute (&clkrate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, dev);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuDeviceGetAttribute failed: %d\n", r);
      abort ();
    }

  r = cuModuleLoad (&module, "subr.ptx");
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuModuleLoad failed: %d\n", r);
      abort ();
    }

  r = cuModuleGetFunction (&delay, module, "delay");
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuModuleGetFunction failed: %d\n", r);
      abort ();
    }

  nbytes = nprocs * sizeof (unsigned long);

  dtime = 200.0;

  dticks = (unsigned long) (dtime * clkrate);

  a = (unsigned long *) malloc (nbytes);
  d_a = (unsigned long *) acc_malloc (nbytes);

  acc_map_data (a, d_a, nbytes);

  kargs[0] = (void *) &d_a;
  kargs[1] = (void *) &dticks;

  r = cuStreamCreate (&stream, CU_STREAM_DEFAULT);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuStreamCreate failed: %d\n", r);
      abort ();
    }

  if (!acc_set_cuda_stream (0, stream))
    abort ();
    
  r = cuLaunchKernel (delay, 1, 1, 1, 1, 1, 1, 0, stream, kargs, 0);
  if (r != CUDA_SUCCESS)
    {
      fprintf (stderr, "cuLaunchKernel failed: %d\n", r);
      abort ();
    }

  if (acc_async_test_all () != 0)
    {
      fprintf (stderr, "asynchronous operation not running\n");
      abort ();
    }

  sleep ((int) (dtime / 1000.f) + 1);

  if (acc_async_test_all () != 1)
    {
      fprintf (stderr, "found asynchronous operation still running\n");
      abort ();
    }

  acc_unmap_data (a);

  free (a);
  acc_free (d_a);

  acc_shutdown (acc_device_nvidia);

  exit (0);
}
Ejemplo n.º 15
0
int
main (int argc, char **argv)
{
    cublasStatus_t s;
    cublasHandle_t h;
    CUcontext pctx;
    CUresult r;
    int i;
    const int N = 256;
    float *h_X, *h_Y1, *h_Y2;
    float *d_X,*d_Y;
    float alpha = 2.0f;
    float error_norm;
    float ref_norm;

    /* Test 4 - OpenACC creates, cuBLAS shares.  */

    acc_set_device_num (0, acc_device_nvidia);

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    h_X = (float *) malloc (N * sizeof (float));
    if (h_X == 0)
    {
        fprintf (stderr, "malloc failed: for h_X\n");
        exit (EXIT_FAILURE);
    }

    h_Y1 = (float *) malloc (N * sizeof (float));
    if (h_Y1 == 0)
    {
        fprintf (stderr, "malloc failed: for h_Y1\n");
        exit (EXIT_FAILURE);
    }

    h_Y2 = (float *) malloc (N * sizeof (float));
    if (h_Y2 == 0)
    {
        fprintf (stderr, "malloc failed: for h_Y2\n");
        exit (EXIT_FAILURE);
    }

    for (i = 0; i < N; i++)
    {
        h_X[i] = rand () / (float) RAND_MAX;
        h_Y2[i] = h_Y1[i] = rand () / (float) RAND_MAX;
    }

#pragma acc parallel copyin (h_X[0:N]), copy (h_Y2[0:N]) copy (alpha)
    {
        int i;

        for (i = 0; i < N; i++)
        {
            h_Y2[i] = alpha * h_X[i] + h_Y2[i];
        }
    }

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    d_X = (float *) acc_copyin (&h_X[0], N * sizeof (float));
    if (d_X == NULL)
    {
        fprintf (stderr, "copyin error h_Y1\n");
        exit (EXIT_FAILURE);
    }

    d_Y = (float *) acc_copyin (&h_Y1[0], N * sizeof (float));
    if (d_Y == NULL)
    {
        fprintf (stderr, "copyin error h_Y1\n");
        exit (EXIT_FAILURE);
    }

    s = cublasCreate (&h);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasCreate failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    s = cublasSaxpy (h, N, &alpha, d_X, 1, d_Y, 1);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasSaxpy failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    acc_memcpy_from_device (&h_Y1[0], d_Y, N * sizeof (float));

    context_check (pctx);

    error_norm = 0;
    ref_norm = 0;

    for (i = 0; i < N; ++i)
    {
        float diff;

        diff = h_Y1[i] - h_Y2[i];
        error_norm += diff * diff;
        ref_norm += h_Y2[i] * h_Y2[i];
    }

    error_norm = (float) sqrt ((double) error_norm);
    ref_norm = (float) sqrt ((double) ref_norm);

    if ((fabs (ref_norm) < 1e-7) || ((error_norm / ref_norm) >= 1e-6f))
    {
        fprintf (stderr, "math error\n");
        exit (EXIT_FAILURE);
    }

    free (h_X);
    free (h_Y1);
    free (h_Y2);

    acc_free (d_X);
    acc_free (d_Y);

    context_check (pctx);

    s = cublasDestroy (h);
    if (s != CUBLAS_STATUS_SUCCESS)
    {
        fprintf (stderr, "cublasDestroy failed: %d\n", s);
        exit (EXIT_FAILURE);
    }

    context_check (pctx);

    acc_shutdown (acc_device_nvidia);

    r = cuCtxGetCurrent (&pctx);
    if (r != CUDA_SUCCESS)
    {
        fprintf (stderr, "cuCtxGetCurrent failed: %d\n", r);
        exit (EXIT_FAILURE);
    }

    if (pctx)
    {
        fprintf (stderr, "Unexpected context\n");
        exit (EXIT_FAILURE);
    }

    return EXIT_SUCCESS;
}