Esempio n. 1
0
int main() {
    /* Matriz que armazena os pixels da imagem */
    int img[MAX_TAM][MAX_TAM]; 
    /* Atributos da imagem */
    int W, H, MaxV;
    /* Identificador do filtro a ser utilizado */
    int F;
    /* Parametro a ser passado para o cisalhamento */
    double Px, Py;

    /* Le a imagem */
    if (ler_pgm(img, &W, &H, &MaxV, &F, &Px, &Py) != 1) {
        printf("Error: nao foi possivel ler a imagem\n");
        return 0;
    }
 
    /* Seleciona o filtro */
    switch (F) {
        case 1:
            negativo(img, W, H, MaxV);
            break;
        case 2:
            rotacao180(img, W, H);
            break;
        case 3:
            detectaBorda(img, W, H, MaxV);
            break;
        case 4:
            cisalhamento(img, W, H, Px, Py);
            break;
    }

    /* Escreve a imagem */
    escrever_pgm(img, W, H, MaxV);

    return 0;
}
Esempio n. 2
0
int main(int argc, char *argv[])
{
    int i, j, m, n;
	double			tempo_kernel;
    double			tempo_total;
    char			*output_filename;
	float			*image_amplitudes;
	float           (*x)[2];  
	float           (*X)[2]; 
	
	pgm_t			ipgm, opgm;   
	image_file_t	*image_filename;
    timer_reset();
    timer_start();

    if (argc < 2)
    {
        printf("**Erro: parametros de entrada invalidos");
        exit(EXIT_FAILURE);
    }

    image_filename = (image_file_t *) malloc(sizeof(image_file_t));
    split_image_filename(image_filename, argv[1]);
    output_filename = (char *) malloc(40*sizeof(char));
    sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO);

    if( ler_pgm(&ipgm, argv[1]) == -1)
        exit(EXIT_FAILURE);

	n = ipgm.width;
	m = (int)(log((double)n)/log(2.0));
	x = malloc(2 * n * n * sizeof(float));
	X = malloc(2 * n * n * sizeof(float));
		
	opgm.width = n;
    opgm.height = n;
    
	for (i = 0; i < n; i++) {
        for (j = 0; j < n; j++) {
            x[i*n + j][0] = (float) ipgm.buf[i*n + j];
			x[i*n + j][1] = (float) 0;
        }
    }

	/* Check that n = 2^m for some integer m >= 1. */
	if (n >= 2) {
		i = n;
		while(i==2*(i/2)) i = i/2;  /* While i is even, factor out a 2. */
	}  /* For n >=2, we now have N = 2^n iff i = 1. */
	if (n < 2 || i != 1) {
		  printf(" %d deve ser um inteiro tal que n = 2^m , para m >= 1", n);
		  exit(EXIT_FAILURE);
	}
	
	timer_stop();
    tempo_total = get_elapsed_time();

    //====== Performance Test - start =======================================
    timer_reset();
    timer_start();
	j = 0;
	j = n*n;
	
    //fft direta
	fft(j, x, X);
	// filtro passa baixa
	lowpass_filter(X, n);
	//fft inversa
	for(i=0; i<j; i++) x[i][0] = x[i][1] = 0;
	ifft(j, x, X);
   
	timer_stop();
    tempo_kernel = get_elapsed_time();

    tempo_total += tempo_kernel;
    //====== Performance Test - end ============================================

    save_log_cpu(image_filename, tempo_kernel, tempo_total, LOG_NAME);
	
	image_amplitudes = (float*)malloc(n*n*sizeof(float));
	 for (i=0; i < n; i++) {
        for (j=0; j < n; j++) {
            image_amplitudes[i*n + j] = (float) (AMP(x[i*n + j][0], x[i*n + j][1]));
        }
    }
	
	normalizar_pgm(&opgm, image_amplitudes);
    escrever_pgm(&opgm, output_filename);
    free(x);
	free(X);
	destruir_pgm(&ipgm);
    destruir_pgm(&opgm);
    free(image_filename);
    free(output_filename);
    free(image_amplitudes); 
   _CrtDumpMemoryLeaks();
    return 0;
}
Esempio n. 3
0
int main(int argc, char *argv[])
{
    /* Variaveis obrigatorias do openCL pdccpk*/
    cl_platform_id          platform_ids[2];
    cl_device_id            device_id;
    cl_context              context;
    cl_command_queue        commands;
    cl_program              program;
    cl_kernel               kernel_sobel;

    cl_int                  ret_code;
    cl_uint                 ret_num_devices;
    cl_uint                 ret_num_platforms;
    //
    cl_event                kernel_event;
    cl_ulong                kernel_start_time   = (cl_ulong) 0;
    cl_ulong                kernel_end_time     = (cl_ulong) 0;
    cl_ulong                kernel_run_time     = (cl_ulong) 0;

    cl_event                write_host_dev_event;
    cl_ulong                write_host_dev_start_time   = (cl_ulong) 0;
    cl_ulong                write_host_dev_end_time     = (cl_ulong) 0;
    cl_ulong                write_host_dev_run_time     = (cl_ulong) 0;

    cl_event                read_dev_host_event;
    cl_ulong                read_dev_host_start_time    = (cl_ulong) 0;
    cl_ulong                read_dev_host_end_time      = (cl_ulong) 0;
    cl_ulong                read_dev_host_run_time      = (cl_ulong) 0;

    unsigned __int64        image_tam;
    const unsigned __int64  MEGA_BYTES   =  1048576; // 1024*1024
    double                  image_tam_MB;
    double                  tempo_total;

    /* objetos que serao armazenados na memoria da GPU */
    cl_mem                  image_in_mem, image_out_mem;
    /* objetos que serao armazenados na memoria local (host) */
    unsigned char           *image_in_host, *image_out_host;
    unsigned  int           image_width, image_height;
    size_t                  image_size;
    /*IMPORTANTE: dimensionamento dos compute units para exec do kernel*/
    size_t                  work_global[C_NUM_DIMENSOES];
    size_t                  work_local[C_NUM_DIMENSOES];
    /*Setup dos nomes de arquivos*/
    const char              *kernel_filename = C_NOME_ARQ_KERNEL;
    pgm_t                   ipgm, opgm;
    /* Codigo fonte do kernel dever ser aberto como uma cadeia de caracteres*/
    image_file_t* image_filename;
    char* output_filename;
    FILE                    *fp;
    size_t                  source_size;
    char                    *source_str;

    /* Timer count start */
    timer_reset();
    timer_start();

    if (argc < 2) {
        printf("**Erro: A imagem de entrada é necessaria.\n");
        exit(EXIT_FAILURE);
    }

    //===================================================================================================
    image_filename = (image_file_t *) malloc(sizeof(image_file_t));
    split_image_filename(image_filename, argv[1]);
    output_filename = (char *) malloc(40*sizeof(char));
    sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO);
    //===================================================================================================

    fp = fopen(kernel_filename, "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(1);
    }

    source_str  = (char *) malloc(MAX_SOURCE_SIZE);
    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose(fp);

    //===================================================================================================
    // Abrindo imagem do arquivo para objeto de memoria local
    if( ler_pgm(&ipgm, argv[1]) == -1)
        exit(EXIT_FAILURE);

    image_in_host = ipgm.buf;
    image_width  = ipgm.width;
    image_height = ipgm.height;

    image_size =  (int) (image_width * image_height) * sizeof(unsigned char);

    image_tam = image_size;

    /* Alocando memoria para a imagem de saida apos o processamento*/
    image_out_host = (unsigned char *) malloc(image_size);
    //===================================================================================================

    /* Recebe um vetor de platform_id e retorna sucesso
     * se encontrar plataformas OpenCL no sistema, inseridos
     * essas plataformas no vetor com no maximo MAX_PLATFORM_ID
     * entradas, caso contrario retorna codigo de erro.
     * CL_CHECK é um macro para retornar o titulo do erro
     * a partir de uma funcao que retorne um codigo de erro
     ***************************************************/

    CL_CHECK(clGetPlatformIDs(MAX_PLATFORM_ID, platform_ids, &ret_num_platforms));

    if (ret_num_platforms == 0) {
        fprintf(stderr, "[Erro] Não existem plataformas OpenCL\n");
        exit(2);
    }
    //===================================================================================================

    /* Recebe uma platform_id e retorna sucesso
     * se obter um device do tipo GPU dessa plataforma OpenCL
     * caso contrario retorna codigo de erro.
     ***************************************************/

    CL_CHECK(clGetDeviceIDs(platform_ids[1], CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices));
    //print_platform_info(&platform_ids[0]);
    //system("pause");
    //exit(0);
    //===================================================================================================

    /* Retorna sucesso se consegui criar um contexto para
     * o device id escolhido, caso contrario retorna codigo de erro.
     ***************************************************/
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret_code);
    //===================================================================================================

    commands = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret_code);
    //===================================================================================================

    program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, NULL);
    //===================================================================================================

    ret_code = clBuildProgram(program, 0, NULL, NULL, NULL, NULL);

    if (ret_code != CL_SUCCESS) {
        char build_str[4096];
        fprintf(stderr, "[ERRO] clBuildProgram '%s' (code: %d)\n",
                error_cl_str(ret_code), ret_code );
        clGetProgramBuildInfo( program, device_id,
                               CL_PROGRAM_BUILD_LOG, sizeof(build_str), build_str, NULL);
        fprintf(stderr, "[ERRO] log: '%s'\n", build_str);
        system("pause");
        exit(4);
    }
    //===================================================================================================

    kernel_sobel = clCreateKernel(program, "sobel_kernel", NULL);
    image_in_mem  = clCreateBuffer(context, CL_MEM_READ_ONLY, image_size, NULL, NULL);
    image_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, image_size , NULL, NULL);
    //===================================================================================================

    CL_CHECK(clEnqueueWriteBuffer(commands, image_in_mem, CL_TRUE, 0, image_size, image_in_host, 0, NULL, &write_host_dev_event));
    CL_CHECK(clSetKernelArg(kernel_sobel, 0, sizeof(cl_mem), &image_in_mem));
    CL_CHECK(clSetKernelArg(kernel_sobel, 1, sizeof(cl_mem), &image_out_mem));
    //===================================================================================================

    work_global[0] = image_width;
    work_global[1] = image_height;
    work_local[0] = MAX_WORK_GROUP_ITEM_SIZE_DIM_1;
    work_local[1] = MAX_WORK_GROUP_ITEM_SIZE_DIM_2;
    //===================================================================================================

    CL_CHECK(clEnqueueNDRangeKernel(commands, kernel_sobel, 2, NULL, work_global, work_local, 0, NULL,  &kernel_event) );
    // CL_CHECK(clFinish(commands));
    // CL_CHECK( clWaitForEvents(1 , &kernel_event) );
    //===================================================================================================

    CL_CHECK(clEnqueueReadBuffer(commands, image_out_mem, CL_TRUE, 0, image_size, image_out_host, 0, NULL, &read_dev_host_event));

    //== Total time elapsed =============================================================================
    timer_stop();
    tempo_total = get_elapsed_time();
    //===================================================================================================
    //====== Get time of Profile Info ===================================================================
    // kernel sobel time
    CL_CHECK(clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
    CL_CHECK(clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
    // Write data time
    CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &write_host_dev_start_time, NULL));
    CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &write_host_dev_end_time, NULL));
    // Read data time
    CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &read_dev_host_start_time, NULL));
    CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &read_dev_host_end_time, NULL));
    //===================================================================================================

    write_host_dev_run_time = write_host_dev_end_time - write_host_dev_start_time;
    read_dev_host_run_time =  read_dev_host_end_time -  read_dev_host_start_time;
    kernel_run_time = kernel_end_time - kernel_start_time;

    image_tam_MB = (double) (((double) image_tam)/(double) MEGA_BYTES);

    //===================================================================================================
    save_log_gpu(image_filename, kernel_run_time, (double) (image_tam_MB/( (double) read_dev_host_run_time/(double) NANOSECONDS)),
        (double) (image_tam_MB/ ((double) write_host_dev_run_time/ (double) NANOSECONDS)), tempo_total, LOG_NAME);
    //===================================================================================================

    opgm.width  = image_width;
    opgm.height = image_height;
    opgm.buf    = image_out_host;

    escrever_pgm(&opgm, output_filename);

    //===================================================================================================
    CL_CHECK(clReleaseMemObject(image_in_mem));
	CL_CHECK(clReleaseEvent(kernel_event));
    CL_CHECK(clReleaseEvent(read_dev_host_event));
    CL_CHECK(clReleaseEvent(write_host_dev_event));
    CL_CHECK(clReleaseMemObject(image_out_mem));
    CL_CHECK(clReleaseProgram(program));
    CL_CHECK(clReleaseKernel(kernel_sobel));
    CL_CHECK(clReleaseCommandQueue(commands));
    CL_CHECK(clReleaseContext(context));
    destruir_pgm(&ipgm);
    destruir_pgm(&opgm);
    free(source_str);
    free(image_filename);
    free(output_filename);

    //_CrtDumpMemoryLeaks();

    return 0;
}
Esempio n. 4
0
int main(int argc, char *argv[])
{
    //FILE *fp;

    cl_platform_id      platform_id[2];
    cl_uint             ret_num_devices;
    cl_uint             ret_num_platforms;
    cl_int              ret_code;

    cl_mem              image_in_mem = NULL;
    cl_mem              image_out_mem = NULL;
    cl_mem              twiddle_factors_mem = NULL;
    cl_float2           *image_in_host;
    cl_float2           *twiddle_factors_host;

    cl_kernel           kernel_twiddle_factors;
    cl_kernel           kernel_matriz_transpose;
    cl_kernel           kernel_lowpass_filter;

    pgm_t ipgm;
    pgm_t opgm;

    image_file_t        *image_filename;
    char                *output_filename;
    FILE                *fp;
    const char          *kernel_filename = C_NOME_ARQ_KERNEL;
    size_t              source_size;
    char                *source_str;
    cl_int              i, j,n ,m;
    cl_int              raio = 0;
    size_t              global_wg[2];
    size_t              local_wg[2];
    float               *image_amplitudes;
    size_t              log_size;
    char                *log_file;

    cl_event            kernels_events_out_fft[4];

    cl_ulong            kernel_runtime              = (cl_ulong) 0;
    cl_ulong            kernel_start_time           = (cl_ulong) 0;
    cl_ulong            kernel_end_time             = (cl_ulong) 0;

    cl_event            write_host_dev_event;
    cl_ulong            write_host_dev_start_time   = (cl_ulong) 0;
    cl_ulong            write_host_dev_end_time     = (cl_ulong) 0;
    cl_ulong            write_host_dev_run_time     = (cl_ulong) 0;

    cl_event            read_dev_host_event;
    cl_ulong            read_dev_host_start_time    = (cl_ulong) 0;
    cl_ulong            read_dev_host_end_time      = (cl_ulong) 0;
    cl_ulong            read_dev_host_run_time      = (cl_ulong) 0;

    unsigned __int64    image_tam;
    unsigned __int64    MEGA_BYTES   =  1048576; // 1024*1024
    double              image_tam_MB;
    double              tempo_total;

    struct event_in_fft_t *fft_events;


   //=== Timer count start ==============================================================================
    timer_reset();
    timer_start();
    //===================================================================================================

    if (argc < 2) {
        printf("**Erro: O arquivo de entrada eh necessario.\n");
        exit(EXIT_FAILURE);
    }

    image_filename = (image_file_t *) malloc(sizeof(image_file_t));
    split_image_filename(image_filename, argv[1]);
    output_filename = (char *) malloc(40*sizeof(char));
    sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO);

    fp = fopen(kernel_filename, "r");
    if (!fp) {
        fprintf(stderr, "Failed to load kernel.\n");
        exit(EXIT_FAILURE);
    }

    source_str = (char *)malloc(MAX_SOURCE_SIZE);
    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
    fclose( fp );

    //===================================================================================================
     /* Abrindo imagem do arquivo para objeto de memoria local*/
    if( ler_pgm(&ipgm, argv[1]) == -1)
        exit(EXIT_FAILURE);

    n = ipgm.width;
    raio = n/8;
    m = (cl_int)(log((double)n)/log(2.0));

    image_in_host = (cl_float2 *)malloc((n*n)*sizeof(cl_float2));
    twiddle_factors_host = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));

    for (i = 0; i < n; i++) {
        for (j = 0; j < n; j++) {
            image_in_host[n*i + j].s[0] = (float)ipgm.buf[n*i + j];
            image_in_host[n*i + j].s[1] = (float)0;
        }
    }

    fft_events = (struct event_in_fft_t *)malloc(MAX_CALL_FFT*sizeof(struct event_in_fft_t));

    kernel_butter_events = (cl_event *)malloc(MAX_CALL_FFT*m*sizeof(cl_event));

    //===================================================================================================
    CL_CHECK(clGetPlatformIDs(MAX_PLATFORM_ID, platform_id, &ret_num_platforms));

    if (ret_num_platforms == 0 ) {
        fprintf(stderr,"[Erro] Não existem plataformas OpenCL\n");
        exit(2);
    }

    //===================================================================================================

    CL_CHECK(clGetDeviceIDs( platform_id[0], CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices));
    //print_platform_info(&platform_id[1]);

    //===================================================================================================
    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret_code);
    //===================================================================================================

    cmd_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret_code);
    //===================================================================================================

    image_in_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code);
    image_out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code);
    twiddle_factors_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret_code);
    //===================================================================================================

    /* Transfer data to memory buffer */
    CL_CHECK(clEnqueueWriteBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &write_host_dev_event));

    image_tam = n*n*sizeof(cl_float2);

    //===================================================================================================
    program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret_code);
    //===================================================================================================
    ret_code = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
    //===================================================================================================
    if (ret_code != CL_SUCCESS) {
    // Determine the size of the log
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size);
    //===================================================================================================

    // Allocate memory for the log
    log_file = (char *) malloc(log_size);

    // Get the log
    clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log_file, NULL);
    printf("%s\n", log_file);
    system("pause");
    exit(0);
}
    kernel_twiddle_factors = clCreateKernel(program, "twiddle_factors", &ret_code);
    kernel_matriz_transpose = clCreateKernel(program, "matrix_trasponse", &ret_code);
    kernel_lowpass_filter  = clCreateKernel(program, "lowpass_filter", &ret_code);

    /* Processa os fatores Wn*/
    //===================================================================================================
    CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 0, sizeof(cl_mem), (void *)&twiddle_factors_mem));
    CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 1, sizeof(cl_int), (void *)&n));
    config_workgroup_size(global_wg, local_wg, n/2, 1);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_twiddle_factors, 1, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[0]));

    //===================================================================================================
    /* Executa a FFT em N/2 */
    fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[0]);

    //===================================================================================================
    /* Realiza a transposta da Matriz (imagem) */
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_in_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_out_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n));
    config_workgroup_size(global_wg, local_wg, n, n);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[1]));

    //===================================================================================================
    /* Executa a FFT N/2 */
    fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[1]);

    //===================================================================================================
    /* Processa o filtro passa baixa */
    CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 0, sizeof(cl_mem), (void *)&image_out_mem));
    CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 1, sizeof(cl_int), (void *)&n));
    CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 2, sizeof(cl_int), (void *)&raio));
    config_workgroup_size(global_wg, local_wg, n, n);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_lowpass_filter, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[2]));

    //===================================================================================================
    /* Obtem a FFT inversa*/
    fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[2]);
    //===================================================================================================

    /* Realiza a transposta da Matriz (imagem) */
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_out_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_in_mem));
    CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n));
    config_workgroup_size(global_wg, local_wg, n, n);
    CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[3]));

    //===================================================================================================
    fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[3]);
    //===================================================================================================

    CL_CHECK(clEnqueueReadBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &read_dev_host_event));
    //===================================================================================================

    //== Total time elapsed ============================================================================
    timer_stop();
    tempo_total = get_elapsed_time();
    //==================================================================================================

    //====== Get time of Profile Info ==================================================================
    // Write data time
    CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &write_host_dev_start_time, NULL));
    CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &write_host_dev_end_time, NULL));
    // Read data time
    CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &read_dev_host_start_time, NULL));
    CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &read_dev_host_end_time, NULL));

    for (i = 0; i < MAX_CALL_FFT; i++) {

        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;
        CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
        CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
        kernel_runtime += (kernel_end_time - kernel_start_time);

        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;
        CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
        CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
        kernel_runtime += (kernel_end_time - kernel_start_time);

        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;

        if (fft_events[i].kernel_normalize != NULL) {
            CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
            CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
            kernel_runtime += (kernel_end_time - kernel_start_time);
        }
   }

    for (j=0; j < MAX_CALL_FFT*m; j++){
        kernel_start_time = (cl_long) 0;
        kernel_end_time = (cl_long) 0;

        CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL));
        CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL));
        kernel_runtime += (kernel_end_time - kernel_start_time);
    }

    write_host_dev_run_time = write_host_dev_end_time - write_host_dev_start_time;
    read_dev_host_run_time =  read_dev_host_end_time -  read_dev_host_start_time;

/* save_log_debug(write_host_dev_run_time,fp);
    save_log_debug(read_dev_host_run_time,fp);
    close_log_debug(fp); */

    image_tam_MB = (double) (((double) image_tam)/(double) MEGA_BYTES);

    //==================================================================================================
    save_log_gpu(image_filename, kernel_runtime, (double) (image_tam_MB/( (double) read_dev_host_run_time/(double) NANOSECONDS)),
    (double) (image_tam_MB/ ((double) write_host_dev_run_time/ (double) NANOSECONDS)), tempo_total, LOG_NAME);

    //===================================================================================================
    image_amplitudes = (float*)malloc(n*n*sizeof(float));
    for (i=0; i < n; i++) {
        for (j=0; j < n; j++) {
            image_amplitudes[n*j + i] = (float) (AMP(((float*)image_in_host)[(2*n*j)+2*i], ((float*)image_in_host)[(2*n*j)+2*i+1]));
        }
    }

    //clFlush(cmd_queue);
    //clFinish(cmd_queue);
    opgm.width = n;
    opgm.height = n;

    normalizar_pgm(&opgm, image_amplitudes);
    escrever_pgm(&opgm, output_filename);

    //===================================================================================================
	clFinish(cmd_queue);
    clReleaseKernel(kernel_twiddle_factors);
    clReleaseKernel(kernel_matriz_transpose);
    clReleaseKernel(kernel_lowpass_filter);
    clReleaseProgram(program);
    clReleaseMemObject(image_in_mem);
    clReleaseMemObject(image_out_mem);
    clReleaseMemObject(twiddle_factors_mem);
    clReleaseCommandQueue(cmd_queue);
    clReleaseContext(context);
	clReleaseEvent(read_dev_host_event);
	clReleaseEvent(write_host_dev_event);
	clReleaseEvent(kernels_events_out_fft[0]);
	clReleaseEvent(kernels_events_out_fft[1]);
	clReleaseEvent(kernels_events_out_fft[2]);
	clReleaseEvent(kernels_events_out_fft[3]);
    destruir_pgm(&ipgm);
    destruir_pgm(&opgm);
    free(image_amplitudes);
    free(source_str);
    free(image_in_host);
    free(image_filename);
    free(twiddle_factors_host);
    free(output_filename);
    free(fft_events);
    free(kernel_butter_events);

    //_CrtDumpMemoryLeaks();

    return 0;
}