void atari_ntsc_init( atari_ntsc_t* ntsc, atari_ntsc_setup_t const* setup, atari_ntsc_in_t const* palette ) { int entry; init_t impl; if ( !setup ) setup = &atari_ntsc_composite; init( &impl, setup ); // Palette stores R/G/B data for 'atari_ntsc_palette_size' entries for ( entry = 0; entry < atari_ntsc_palette_size; entry++ ) { float r = impl.to_float [*palette++]; float g = impl.to_float [*palette++]; float b = impl.to_float [*palette++]; float y, i, q = RGB_TO_YIQ( r, g, b, y, i ); // Generate kernel int ir, ig, ib = YIQ_TO_RGB( y, i, q, impl.to_rgb, int, ir, ig ); atari_ntsc_rgb_t rgb = PACK_RGB( ir, ig, ib ); if ( ntsc ) { atari_ntsc_rgb_t* kernel = ntsc->table [entry]; gen_kernel( &impl, y, i, q, kernel ); correct_errors( rgb, kernel ); } } }
void atari_ntsc_init( atari_ntsc_t* ntsc, atari_ntsc_setup_t const* setup ) { /* Atari change: no alternating burst phases - remove merge_fields variable. */ int entry; init_t impl; /* Atari change: NES palette generation and reading removed. Atari palette generation is located in colours_ntsc.c, and colours are read from setup->yiq_palette. */ if ( !setup ) setup = &atari_ntsc_composite; init( &impl, setup ); /* Atari change: no alternating burst phases - remove code for merge_fields. */ for ( entry = 0; entry < atari_ntsc_palette_size; entry++ ) { /* Atari change: Instead of palette generation, load colours from setup->yiq_palette. */ double y; double i; double q; { double *yiq_ptr = setup->yiq_palette + 3 * entry; y = *yiq_ptr++; i = *yiq_ptr++; q = *yiq_ptr++; } i *= rgb_unit; q *= rgb_unit; y *= rgb_unit; y += rgb_offset; /* Generate kernel */ { int r, g, b = YIQ_TO_RGB( y, i, q, impl.to_rgb, int, r, g ); /* blue tends to overflow, so clamp it */ atari_ntsc_rgb_t rgb = PACK_RGB( r, g, (b < 0x3E0 ? b: 0x3E0) ); if ( setup->palette_out ) RGB_PALETTE_OUT( rgb, &setup->palette_out [entry * 3] ); if ( ntsc ) { atari_ntsc_rgb_t* kernel = ntsc->table [entry]; gen_kernel( &impl, y, i, q, kernel ); /* Atari change: no alternating burst phases - remove code for merge_fields. */ correct_errors( rgb, kernel ); } } } }
int run_kernel_benchmark(cl_device_id did, cl_context context, cl_command_queue commands, int n_args, int n_lines, double *duration, double *delta, double *compile_time) { int i; int err; char build_log[4096] = {0}; T *tmp_args = NULL; cl_mem* mem_args = NULL; double durations[10]; size_t len; cl_ulong t1; cl_ulong t2; cl_ulong t3; cl_ulong t4; size_t global = 1; size_t local = global; cl_program program; cl_kernel kernel; cl_event event; //printf("lines: %i, args %i\n", n_args, n_lines); program = gen_kernel(n_args, n_lines, context); if(!program) return -1; unsigned long start_time = current_msecs(); err = clBuildProgram(program, 0, NULL, NULL, NULL, NULL); if(err != CL_SUCCESS){ fprintf(stderr, "clBuildProgram() failed!\n"); fprintf(stderr, "err: %i\n", err); clGetProgramBuildInfo(program, did, CL_PROGRAM_BUILD_LOG, sizeof(build_log), build_log, &len); puts(build_log); return -1; } /* size_t pz = 0; clGetProgramInfo(program, CL_PROGRAM_BINARY_SIZES, sizeof(size_t), &pz, NULL); printf("*Progsize: %i\n", pz); */ kernel = clCreateKernel(program, "main_func", &err); if(!kernel || err != CL_SUCCESS){ fprintf(stderr, "clCreateKernel() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } *compile_time = elapsed_msecs(start_time)*1.0; err = 0; tmp_args = (T *)malloc(n_args * sizeof(T)); mem_args = (cl_mem*)malloc(n_args * sizeof(cl_mem)); for(i = 0; i < n_args; i++){ tmp_args[i] = (float)(1 + (int) (100.0 * (rand() / (RAND_MAX + 1.0)))); mem_args[i] = clCreateBuffer(context, CL_MEM_READ_WRITE | CL_MEM_COPY_HOST_PTR, sizeof(T), tmp_args+i, NULL); err |= clSetKernelArg(kernel, i, sizeof(cl_mem), mem_args+i); } if(err != CL_SUCCESS){ fprintf(stderr, "clSetKernelArg() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } //warm up call err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, NULL); if(err != CL_SUCCESS){ fprintf(stderr, "err: %i\n", err); fprintf(stderr, "clEnqueueNDRangeKernel() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } clFinish(commands); for(i = 0; i < 10; i++){ err = clEnqueueNDRangeKernel(commands, kernel, 1, NULL, &global, &local, 0, NULL, &event); if(err != CL_SUCCESS){ fprintf(stderr, "err: %i\n", err); fprintf(stderr, "clEnqueueNDRangeKernel() failed!\n"); fprintf(stderr, "err: %i\n", err); return -1; } cl_int errcode = clFinish(commands); errcode |= clWaitForEvents(1, &event); if(errcode != CL_SUCCESS) printf("Error waiting for kernel completion: %s\n", oclErrorString(errcode)); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_QUEUED, sizeof(cl_ulong), &t1, NULL); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_SUBMIT, sizeof(cl_ulong), &t2, NULL); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &t3, NULL); clGetEventProfilingInfo(event, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &t4, NULL); durations[i] = (t3 - t1) * 1e-6; } /* printf("submit: %lu\n", (unsigned long)t2 - (unsigned long)t1); printf("start: %lu\n", (unsigned long)t3 - (unsigned long)t1); printf("end: %lu\n", (unsigned long)t4 - (unsigned long)t1); */ qsort(durations, 10, sizeof(double), dblcmp); *duration = durations[4]; *delta = durations[0] - durations[9]; clReleaseEvent(event); clReleaseKernel(kernel); clReleaseProgram(program); free(tmp_args); return 0; }