int main() { /* Matriz que armazena os pixels da imagem */ int img[MAX_TAM][MAX_TAM]; /* Atributos da imagem */ int W, H, MaxV; /* Identificador do filtro a ser utilizado */ int F; /* Parametro a ser passado para o cisalhamento */ double Px, Py; /* Le a imagem */ if (ler_pgm(img, &W, &H, &MaxV, &F, &Px, &Py) != 1) { printf("Error: nao foi possivel ler a imagem\n"); return 0; } /* Seleciona o filtro */ switch (F) { case 1: negativo(img, W, H, MaxV); break; case 2: rotacao180(img, W, H); break; case 3: detectaBorda(img, W, H, MaxV); break; case 4: cisalhamento(img, W, H, Px, Py); break; } /* Escreve a imagem */ escrever_pgm(img, W, H, MaxV); return 0; }
int main(int argc, char *argv[]) { int i, j, m, n; double tempo_kernel; double tempo_total; char *output_filename; float *image_amplitudes; float (*x)[2]; float (*X)[2]; pgm_t ipgm, opgm; image_file_t *image_filename; timer_reset(); timer_start(); if (argc < 2) { printf("**Erro: parametros de entrada invalidos"); exit(EXIT_FAILURE); } image_filename = (image_file_t *) malloc(sizeof(image_file_t)); split_image_filename(image_filename, argv[1]); output_filename = (char *) malloc(40*sizeof(char)); sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO); if( ler_pgm(&ipgm, argv[1]) == -1) exit(EXIT_FAILURE); n = ipgm.width; m = (int)(log((double)n)/log(2.0)); x = malloc(2 * n * n * sizeof(float)); X = malloc(2 * n * n * sizeof(float)); opgm.width = n; opgm.height = n; for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { x[i*n + j][0] = (float) ipgm.buf[i*n + j]; x[i*n + j][1] = (float) 0; } } /* Check that n = 2^m for some integer m >= 1. */ if (n >= 2) { i = n; while(i==2*(i/2)) i = i/2; /* While i is even, factor out a 2. */ } /* For n >=2, we now have N = 2^n iff i = 1. */ if (n < 2 || i != 1) { printf(" %d deve ser um inteiro tal que n = 2^m , para m >= 1", n); exit(EXIT_FAILURE); } timer_stop(); tempo_total = get_elapsed_time(); //====== Performance Test - start ======================================= timer_reset(); timer_start(); j = 0; j = n*n; //fft direta fft(j, x, X); // filtro passa baixa lowpass_filter(X, n); //fft inversa for(i=0; i<j; i++) x[i][0] = x[i][1] = 0; ifft(j, x, X); timer_stop(); tempo_kernel = get_elapsed_time(); tempo_total += tempo_kernel; //====== Performance Test - end ============================================ save_log_cpu(image_filename, tempo_kernel, tempo_total, LOG_NAME); image_amplitudes = (float*)malloc(n*n*sizeof(float)); for (i=0; i < n; i++) { for (j=0; j < n; j++) { image_amplitudes[i*n + j] = (float) (AMP(x[i*n + j][0], x[i*n + j][1])); } } normalizar_pgm(&opgm, image_amplitudes); escrever_pgm(&opgm, output_filename); free(x); free(X); destruir_pgm(&ipgm); destruir_pgm(&opgm); free(image_filename); free(output_filename); free(image_amplitudes); _CrtDumpMemoryLeaks(); return 0; }
int main(int argc, char *argv[]) { /* Variaveis obrigatorias do openCL pdccpk*/ cl_platform_id platform_ids[2]; cl_device_id device_id; cl_context context; cl_command_queue commands; cl_program program; cl_kernel kernel_sobel; cl_int ret_code; cl_uint ret_num_devices; cl_uint ret_num_platforms; // cl_event kernel_event; cl_ulong kernel_start_time = (cl_ulong) 0; cl_ulong kernel_end_time = (cl_ulong) 0; cl_ulong kernel_run_time = (cl_ulong) 0; cl_event write_host_dev_event; cl_ulong write_host_dev_start_time = (cl_ulong) 0; cl_ulong write_host_dev_end_time = (cl_ulong) 0; cl_ulong write_host_dev_run_time = (cl_ulong) 0; cl_event read_dev_host_event; cl_ulong read_dev_host_start_time = (cl_ulong) 0; cl_ulong read_dev_host_end_time = (cl_ulong) 0; cl_ulong read_dev_host_run_time = (cl_ulong) 0; unsigned __int64 image_tam; const unsigned __int64 MEGA_BYTES = 1048576; // 1024*1024 double image_tam_MB; double tempo_total; /* objetos que serao armazenados na memoria da GPU */ cl_mem image_in_mem, image_out_mem; /* objetos que serao armazenados na memoria local (host) */ unsigned char *image_in_host, *image_out_host; unsigned int image_width, image_height; size_t image_size; /*IMPORTANTE: dimensionamento dos compute units para exec do kernel*/ size_t work_global[C_NUM_DIMENSOES]; size_t work_local[C_NUM_DIMENSOES]; /*Setup dos nomes de arquivos*/ const char *kernel_filename = C_NOME_ARQ_KERNEL; pgm_t ipgm, opgm; /* Codigo fonte do kernel dever ser aberto como uma cadeia de caracteres*/ image_file_t* image_filename; char* output_filename; FILE *fp; size_t source_size; char *source_str; /* Timer count start */ timer_reset(); timer_start(); if (argc < 2) { printf("**Erro: A imagem de entrada é necessaria.\n"); exit(EXIT_FAILURE); } //=================================================================================================== image_filename = (image_file_t *) malloc(sizeof(image_file_t)); split_image_filename(image_filename, argv[1]); output_filename = (char *) malloc(40*sizeof(char)); sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO); //=================================================================================================== fp = fopen(kernel_filename, "r"); if (!fp) { fprintf(stderr, "Failed to load kernel.\n"); exit(1); } source_str = (char *) malloc(MAX_SOURCE_SIZE); source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose(fp); //=================================================================================================== // Abrindo imagem do arquivo para objeto de memoria local if( ler_pgm(&ipgm, argv[1]) == -1) exit(EXIT_FAILURE); image_in_host = ipgm.buf; image_width = ipgm.width; image_height = ipgm.height; image_size = (int) (image_width * image_height) * sizeof(unsigned char); image_tam = image_size; /* Alocando memoria para a imagem de saida apos o processamento*/ image_out_host = (unsigned char *) malloc(image_size); //=================================================================================================== /* Recebe um vetor de platform_id e retorna sucesso * se encontrar plataformas OpenCL no sistema, inseridos * essas plataformas no vetor com no maximo MAX_PLATFORM_ID * entradas, caso contrario retorna codigo de erro. * CL_CHECK é um macro para retornar o titulo do erro * a partir de uma funcao que retorne um codigo de erro ***************************************************/ CL_CHECK(clGetPlatformIDs(MAX_PLATFORM_ID, platform_ids, &ret_num_platforms)); if (ret_num_platforms == 0) { fprintf(stderr, "[Erro] Não existem plataformas OpenCL\n"); exit(2); } //=================================================================================================== /* Recebe uma platform_id e retorna sucesso * se obter um device do tipo GPU dessa plataforma OpenCL * caso contrario retorna codigo de erro. ***************************************************/ CL_CHECK(clGetDeviceIDs(platform_ids[1], CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices)); //print_platform_info(&platform_ids[0]); //system("pause"); //exit(0); //=================================================================================================== /* Retorna sucesso se consegui criar um contexto para * o device id escolhido, caso contrario retorna codigo de erro. ***************************************************/ context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret_code); //=================================================================================================== commands = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret_code); //=================================================================================================== program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, NULL); //=================================================================================================== ret_code = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if (ret_code != CL_SUCCESS) { char build_str[4096]; fprintf(stderr, "[ERRO] clBuildProgram '%s' (code: %d)\n", error_cl_str(ret_code), ret_code ); clGetProgramBuildInfo( program, device_id, CL_PROGRAM_BUILD_LOG, sizeof(build_str), build_str, NULL); fprintf(stderr, "[ERRO] log: '%s'\n", build_str); system("pause"); exit(4); } //=================================================================================================== kernel_sobel = clCreateKernel(program, "sobel_kernel", NULL); image_in_mem = clCreateBuffer(context, CL_MEM_READ_ONLY, image_size, NULL, NULL); image_out_mem = clCreateBuffer(context, CL_MEM_WRITE_ONLY, image_size , NULL, NULL); //=================================================================================================== CL_CHECK(clEnqueueWriteBuffer(commands, image_in_mem, CL_TRUE, 0, image_size, image_in_host, 0, NULL, &write_host_dev_event)); CL_CHECK(clSetKernelArg(kernel_sobel, 0, sizeof(cl_mem), &image_in_mem)); CL_CHECK(clSetKernelArg(kernel_sobel, 1, sizeof(cl_mem), &image_out_mem)); //=================================================================================================== work_global[0] = image_width; work_global[1] = image_height; work_local[0] = MAX_WORK_GROUP_ITEM_SIZE_DIM_1; work_local[1] = MAX_WORK_GROUP_ITEM_SIZE_DIM_2; //=================================================================================================== CL_CHECK(clEnqueueNDRangeKernel(commands, kernel_sobel, 2, NULL, work_global, work_local, 0, NULL, &kernel_event) ); // CL_CHECK(clFinish(commands)); // CL_CHECK( clWaitForEvents(1 , &kernel_event) ); //=================================================================================================== CL_CHECK(clEnqueueReadBuffer(commands, image_out_mem, CL_TRUE, 0, image_size, image_out_host, 0, NULL, &read_dev_host_event)); //== Total time elapsed ============================================================================= timer_stop(); tempo_total = get_elapsed_time(); //=================================================================================================== //====== Get time of Profile Info =================================================================== // kernel sobel time CL_CHECK(clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(kernel_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); // Write data time CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &write_host_dev_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &write_host_dev_end_time, NULL)); // Read data time CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &read_dev_host_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &read_dev_host_end_time, NULL)); //=================================================================================================== write_host_dev_run_time = write_host_dev_end_time - write_host_dev_start_time; read_dev_host_run_time = read_dev_host_end_time - read_dev_host_start_time; kernel_run_time = kernel_end_time - kernel_start_time; image_tam_MB = (double) (((double) image_tam)/(double) MEGA_BYTES); //=================================================================================================== save_log_gpu(image_filename, kernel_run_time, (double) (image_tam_MB/( (double) read_dev_host_run_time/(double) NANOSECONDS)), (double) (image_tam_MB/ ((double) write_host_dev_run_time/ (double) NANOSECONDS)), tempo_total, LOG_NAME); //=================================================================================================== opgm.width = image_width; opgm.height = image_height; opgm.buf = image_out_host; escrever_pgm(&opgm, output_filename); //=================================================================================================== CL_CHECK(clReleaseMemObject(image_in_mem)); CL_CHECK(clReleaseEvent(kernel_event)); CL_CHECK(clReleaseEvent(read_dev_host_event)); CL_CHECK(clReleaseEvent(write_host_dev_event)); CL_CHECK(clReleaseMemObject(image_out_mem)); CL_CHECK(clReleaseProgram(program)); CL_CHECK(clReleaseKernel(kernel_sobel)); CL_CHECK(clReleaseCommandQueue(commands)); CL_CHECK(clReleaseContext(context)); destruir_pgm(&ipgm); destruir_pgm(&opgm); free(source_str); free(image_filename); free(output_filename); //_CrtDumpMemoryLeaks(); return 0; }
int main(int argc, char *argv[]) { //FILE *fp; cl_platform_id platform_id[2]; cl_uint ret_num_devices; cl_uint ret_num_platforms; cl_int ret_code; cl_mem image_in_mem = NULL; cl_mem image_out_mem = NULL; cl_mem twiddle_factors_mem = NULL; cl_float2 *image_in_host; cl_float2 *twiddle_factors_host; cl_kernel kernel_twiddle_factors; cl_kernel kernel_matriz_transpose; cl_kernel kernel_lowpass_filter; pgm_t ipgm; pgm_t opgm; image_file_t *image_filename; char *output_filename; FILE *fp; const char *kernel_filename = C_NOME_ARQ_KERNEL; size_t source_size; char *source_str; cl_int i, j,n ,m; cl_int raio = 0; size_t global_wg[2]; size_t local_wg[2]; float *image_amplitudes; size_t log_size; char *log_file; cl_event kernels_events_out_fft[4]; cl_ulong kernel_runtime = (cl_ulong) 0; cl_ulong kernel_start_time = (cl_ulong) 0; cl_ulong kernel_end_time = (cl_ulong) 0; cl_event write_host_dev_event; cl_ulong write_host_dev_start_time = (cl_ulong) 0; cl_ulong write_host_dev_end_time = (cl_ulong) 0; cl_ulong write_host_dev_run_time = (cl_ulong) 0; cl_event read_dev_host_event; cl_ulong read_dev_host_start_time = (cl_ulong) 0; cl_ulong read_dev_host_end_time = (cl_ulong) 0; cl_ulong read_dev_host_run_time = (cl_ulong) 0; unsigned __int64 image_tam; unsigned __int64 MEGA_BYTES = 1048576; // 1024*1024 double image_tam_MB; double tempo_total; struct event_in_fft_t *fft_events; //=== Timer count start ============================================================================== timer_reset(); timer_start(); //=================================================================================================== if (argc < 2) { printf("**Erro: O arquivo de entrada eh necessario.\n"); exit(EXIT_FAILURE); } image_filename = (image_file_t *) malloc(sizeof(image_file_t)); split_image_filename(image_filename, argv[1]); output_filename = (char *) malloc(40*sizeof(char)); sprintf(output_filename, "%d.%d.%s.%s.%s", image_filename->res, image_filename->num, ENV_TYPE, APP_TYPE, EXTENSAO); fp = fopen(kernel_filename, "r"); if (!fp) { fprintf(stderr, "Failed to load kernel.\n"); exit(EXIT_FAILURE); } source_str = (char *)malloc(MAX_SOURCE_SIZE); source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp); fclose( fp ); //=================================================================================================== /* Abrindo imagem do arquivo para objeto de memoria local*/ if( ler_pgm(&ipgm, argv[1]) == -1) exit(EXIT_FAILURE); n = ipgm.width; raio = n/8; m = (cl_int)(log((double)n)/log(2.0)); image_in_host = (cl_float2 *)malloc((n*n)*sizeof(cl_float2)); twiddle_factors_host = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2)); for (i = 0; i < n; i++) { for (j = 0; j < n; j++) { image_in_host[n*i + j].s[0] = (float)ipgm.buf[n*i + j]; image_in_host[n*i + j].s[1] = (float)0; } } fft_events = (struct event_in_fft_t *)malloc(MAX_CALL_FFT*sizeof(struct event_in_fft_t)); kernel_butter_events = (cl_event *)malloc(MAX_CALL_FFT*m*sizeof(cl_event)); //=================================================================================================== CL_CHECK(clGetPlatformIDs(MAX_PLATFORM_ID, platform_id, &ret_num_platforms)); if (ret_num_platforms == 0 ) { fprintf(stderr,"[Erro] Não existem plataformas OpenCL\n"); exit(2); } //=================================================================================================== CL_CHECK(clGetDeviceIDs( platform_id[0], CL_DEVICE_TYPE_GPU, 1, &device_id, &ret_num_devices)); //print_platform_info(&platform_id[1]); //=================================================================================================== context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret_code); //=================================================================================================== cmd_queue = clCreateCommandQueue(context, device_id, CL_QUEUE_PROFILING_ENABLE, &ret_code); //=================================================================================================== image_in_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code); image_out_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret_code); twiddle_factors_mem = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret_code); //=================================================================================================== /* Transfer data to memory buffer */ CL_CHECK(clEnqueueWriteBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &write_host_dev_event)); image_tam = n*n*sizeof(cl_float2); //=================================================================================================== program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret_code); //=================================================================================================== ret_code = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL); //=================================================================================================== if (ret_code != CL_SUCCESS) { // Determine the size of the log clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, 0, NULL, &log_size); //=================================================================================================== // Allocate memory for the log log_file = (char *) malloc(log_size); // Get the log clGetProgramBuildInfo(program, device_id, CL_PROGRAM_BUILD_LOG, log_size, log_file, NULL); printf("%s\n", log_file); system("pause"); exit(0); } kernel_twiddle_factors = clCreateKernel(program, "twiddle_factors", &ret_code); kernel_matriz_transpose = clCreateKernel(program, "matrix_trasponse", &ret_code); kernel_lowpass_filter = clCreateKernel(program, "lowpass_filter", &ret_code); /* Processa os fatores Wn*/ //=================================================================================================== CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 0, sizeof(cl_mem), (void *)&twiddle_factors_mem)); CL_CHECK(clSetKernelArg(kernel_twiddle_factors, 1, sizeof(cl_int), (void *)&n)); config_workgroup_size(global_wg, local_wg, n/2, 1); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_twiddle_factors, 1, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[0])); //=================================================================================================== /* Executa a FFT em N/2 */ fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[0]); //=================================================================================================== /* Realiza a transposta da Matriz (imagem) */ CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_in_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_out_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n)); config_workgroup_size(global_wg, local_wg, n, n); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[1])); //=================================================================================================== /* Executa a FFT N/2 */ fft_main(image_out_mem, image_in_mem, twiddle_factors_mem, m, direta, &fft_events[1]); //=================================================================================================== /* Processa o filtro passa baixa */ CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 0, sizeof(cl_mem), (void *)&image_out_mem)); CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 1, sizeof(cl_int), (void *)&n)); CL_CHECK(clSetKernelArg(kernel_lowpass_filter, 2, sizeof(cl_int), (void *)&raio)); config_workgroup_size(global_wg, local_wg, n, n); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_lowpass_filter, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[2])); //=================================================================================================== /* Obtem a FFT inversa*/ fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[2]); //=================================================================================================== /* Realiza a transposta da Matriz (imagem) */ CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 0, sizeof(cl_mem), (void *)&image_out_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 1, sizeof(cl_mem), (void *)&image_in_mem)); CL_CHECK(clSetKernelArg(kernel_matriz_transpose, 2, sizeof(cl_int), (void *)&n)); config_workgroup_size(global_wg, local_wg, n, n); CL_CHECK(clEnqueueNDRangeKernel(cmd_queue, kernel_matriz_transpose, 2, NULL, global_wg, local_wg, 0, NULL, &kernels_events_out_fft[3])); //=================================================================================================== fft_main(image_in_mem, image_out_mem, twiddle_factors_mem, m, inversa, &fft_events[3]); //=================================================================================================== CL_CHECK(clEnqueueReadBuffer(cmd_queue, image_in_mem, CL_TRUE, 0, n*n*sizeof(cl_float2), image_in_host, 0, NULL, &read_dev_host_event)); //=================================================================================================== //== Total time elapsed ============================================================================ timer_stop(); tempo_total = get_elapsed_time(); //================================================================================================== //====== Get time of Profile Info ================================================================== // Write data time CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &write_host_dev_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(write_host_dev_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &write_host_dev_end_time, NULL)); // Read data time CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &read_dev_host_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(read_dev_host_event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &read_dev_host_end_time, NULL)); for (i = 0; i < MAX_CALL_FFT; i++) { kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(kernels_events_out_fft[i], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_bitsrev, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; if (fft_events[i].kernel_normalize != NULL) { CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(fft_events[i].kernel_normalize, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); } } for (j=0; j < MAX_CALL_FFT*m; j++){ kernel_start_time = (cl_long) 0; kernel_end_time = (cl_long) 0; CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &kernel_start_time, NULL)); CL_CHECK(clGetEventProfilingInfo(kernel_butter_events[j], CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &kernel_end_time, NULL)); kernel_runtime += (kernel_end_time - kernel_start_time); } write_host_dev_run_time = write_host_dev_end_time - write_host_dev_start_time; read_dev_host_run_time = read_dev_host_end_time - read_dev_host_start_time; /* save_log_debug(write_host_dev_run_time,fp); save_log_debug(read_dev_host_run_time,fp); close_log_debug(fp); */ image_tam_MB = (double) (((double) image_tam)/(double) MEGA_BYTES); //================================================================================================== save_log_gpu(image_filename, kernel_runtime, (double) (image_tam_MB/( (double) read_dev_host_run_time/(double) NANOSECONDS)), (double) (image_tam_MB/ ((double) write_host_dev_run_time/ (double) NANOSECONDS)), tempo_total, LOG_NAME); //=================================================================================================== image_amplitudes = (float*)malloc(n*n*sizeof(float)); for (i=0; i < n; i++) { for (j=0; j < n; j++) { image_amplitudes[n*j + i] = (float) (AMP(((float*)image_in_host)[(2*n*j)+2*i], ((float*)image_in_host)[(2*n*j)+2*i+1])); } } //clFlush(cmd_queue); //clFinish(cmd_queue); opgm.width = n; opgm.height = n; normalizar_pgm(&opgm, image_amplitudes); escrever_pgm(&opgm, output_filename); //=================================================================================================== clFinish(cmd_queue); clReleaseKernel(kernel_twiddle_factors); clReleaseKernel(kernel_matriz_transpose); clReleaseKernel(kernel_lowpass_filter); clReleaseProgram(program); clReleaseMemObject(image_in_mem); clReleaseMemObject(image_out_mem); clReleaseMemObject(twiddle_factors_mem); clReleaseCommandQueue(cmd_queue); clReleaseContext(context); clReleaseEvent(read_dev_host_event); clReleaseEvent(write_host_dev_event); clReleaseEvent(kernels_events_out_fft[0]); clReleaseEvent(kernels_events_out_fft[1]); clReleaseEvent(kernels_events_out_fft[2]); clReleaseEvent(kernels_events_out_fft[3]); destruir_pgm(&ipgm); destruir_pgm(&opgm); free(image_amplitudes); free(source_str); free(image_in_host); free(image_filename); free(twiddle_factors_host); free(output_filename); free(fft_events); free(kernel_butter_events); //_CrtDumpMemoryLeaks(); return 0; }