예제 #1
0
void
free_buffer (void * buffer, enum accel_type type)
{
    switch (type) {
        case none:
            free(buffer);
            break;
        case managed:
        case cuda:
#ifdef _ENABLE_CUDA_
            cudaFree(buffer);
#endif
            break;
        case openacc:
#ifdef _ENABLE_OPENACC_
            acc_free(buffer);
#endif
            break;
    }
    
    /* Free dummy compute related resources */
    if (is_alloc) {
        if (options.target == cpu) {
            free_host_arrays();
        } 
#ifdef _ENABLE_CUDA_KERNEL_ 
        else if (options.target == gpu || options.target == both) {
            free_host_arrays();
            free_device_arrays();
        }
#endif
    }

    is_alloc = 0;
}
예제 #2
0
void
free_buffer (void * buffer, enum accel_type type)
{
    switch (type) {
        case none:
            free(buffer);
            break;
        case managed:
        case cuda:
#ifdef _ENABLE_CUDA_
            cudaFree(buffer);
#endif
            break;
        case openacc:
#ifdef _ENABLE_OPENACC_
            acc_free(buffer);
#endif
            break;
    }
    
    /* Free dummy compute related resources */
    if (cpu == options.target || both == options.target) {
        free_host_arrays();
    }

    if (gpu == options.target || both == options.target) {
#ifdef _ENABLE_CUDA_KERNEL_
        free_device_arrays();
#endif /* #ifdef _ENABLE_CUDA_KERNEL_ */
    }
}
예제 #3
0
파일: cuda.c 프로젝트: abhishek111226/sach
/* Print code for clearing the device after execution of the transformed code.
 * In particular, free the memory that was allocated on the device.
 */
static __isl_give isl_printer *clear_device(__isl_take isl_printer *p,
	struct gpu_prog *prog)
{
	p = unbind_device_textures_surfaces(p, prog);
	p = free_cuda_array(p,prog);
	p = free_device_arrays(p, prog);
	return p;
}
예제 #4
0
void
allocate_device_arrays(int n)
{
    cudaError_t cuerr = cudaSuccess;
    
    /* First free the old arrays */
    free_device_arrays();

    /* Allocate Device Arrays for Dummy Compute */
    cuerr = cudaMalloc((void**)&d_x, n * sizeof(float));
    if (cuerr != cudaSuccess) {
        fprintf(stderr, "Failed to free device array");
    }
    
    cuerr = cudaMalloc((void**)&d_y, n * sizeof(float));
    if (cuerr != cudaSuccess) {
        fprintf(stderr, "Failed to free device array");
    }

    cudaMemset(d_x, 1.0f, n);
    cudaMemset(d_y, 2.0f, n);
    is_alloc = 1;
}
예제 #5
0
void 
init_arrays(double target_time) 
{
    
    if (DEBUG) fprintf(stderr, "called init_arrays with target_time = %f \n", (target_time * 1e6));
    int i = 0, j = 0;
    
    a = (float **)malloc(DIM * sizeof(float *));
    
    for (i = 0; i < DIM; i++) {
        a[i] = (float *)malloc(DIM * sizeof(float));
    }
    
    x = (float *)malloc(DIM * sizeof(float));
    y = (float *)malloc(DIM * sizeof(float));

    for (i = 0; i < DIM; i++) {
        x[i] = y[i] = 1.0f;
        for (j = 0; j < DIM; j++) {
            a[i][j] = 2.0f;
        }
    }

#ifdef _ENABLE_CUDA_KERNEL_
    if (options.target == gpu || options.target == both) {
    /* Setting size of arrays for Dummy Compute */
    int N = options.device_array_size;

    /* Device Arrays for Dummy Compute */
    allocate_device_arrays(N);
    
    double time_elapsed = 0.0;
    double t1 = 0.0, t2 = 0.0;
    
    while (1) {
        t1 = MPI_Wtime();
        
        if (options.target == gpu || options.target == both) {
            cudaStreamCreate(&stream);
            call_kernel(A, d_x, d_y, N, &stream);
            
            cudaDeviceSynchronize();
            cudaStreamDestroy(stream);
        }

        t2 = MPI_Wtime();
        if ((t2-t1) < target_time)
        {  
            N += 32;

            /* First free the old arrays */
            free_device_arrays();

            /* Now allocate arrays of size N */
            allocate_device_arrays(N);
        }
        else {
            break;
        }
    }
    
    /* we reach here with desired N so save it and pass it to options */
    options.device_array_size = N;
    if (DEBUG) fprintf(stderr, "correct N = %d\n", N);
    }
#endif

}