int main(int argc, char *argv[]) { int ret = 0; if (argc < 2) { fprintf(stderr, "Usage: transalign_killer [--cldev=x.y] <input file>\n"); fprintf(stderr, " --cldev=x.y: x specifies the platform index, y the device index.\n"); return 1; } long seq_length; char *sequence = load_text(argv[argc - 1], &seq_length); if (!sequence) return 1; seq_length--; // Cut final 0 byte // FIXME: All the following code relies on seq_length being a multiple of BASE. long round_seq_length = round_up_to_power_of_two(seq_length, BASE_EXP); long res_length = 0; for (long len = round_seq_length / BASE; len; len /= BASE) res_length += len; // Use some random index to be searched for here unsigned letter_index = seq_length / 2; // Select an OpenCL device cl_device_id dev = select_device(argc - 1, argv); if (!dev) return 1; // Initialize the OpenCL st...ack cl_context ctx = clCreateContext(NULL, 1, &dev, NULL, NULL, NULL); cl_command_queue queue = clCreateCommandQueue(ctx, dev, 0, NULL); // Load the OpenCL kernesl char *prog_src = load_text("trans.cl", NULL); if (!prog_src) return 1; cl_program prog = clCreateProgramWithSource(ctx, 1, (const char **)&prog_src, NULL, NULL); free(prog_src); // Build them clBuildProgram(prog, 0, NULL, NULL, NULL, NULL); cl_kernel k_iadd = clCreateKernel(prog, "k_iadd", NULL); // initial addition cl_kernel k_cadd = clCreateKernel(prog, "k_cadd", NULL); // consecutive addition assert(k_iadd); assert(k_cadd); // Create the result buffer unsigned *result = malloc(res_length * sizeof(unsigned)); cl_mem result_gpu = clCreateBuffer(ctx, CL_MEM_READ_WRITE | HOST_PTR_POLICY, res_length * sizeof(unsigned), result, NULL); clock_start(); /*** START OF ROCKET SCIENCE LEVEL RUNTIME-TIME INTENSIVE STUFF ***/ // Bandwidth intensive stuff goes here // Copy the sequence to the video memory (or, generally speaking, the OpenCL device) cl_mem seq_gpu = clCreateBuffer(ctx, CL_MEM_READ_WRITE | HOST_PTR_POLICY, seq_length * sizeof(char), sequence, NULL); long bw1_time = clock_delta(); // GPU intensive stuff goes here /** * First, transform every - and \0 into a 0 and every other character into a * 1. Then, add consecutive fields (BASE fields) together and store them at * the beginning of the result buffer. */ clSetKernelArg(k_iadd, 0, sizeof(result_gpu), &result_gpu); clSetKernelArg(k_iadd, 1, sizeof(seq_gpu), &seq_gpu); clSetKernelArg(k_iadd, 2, sizeof(unsigned), &(unsigned){seq_length});
void clock_usdelay(unsigned int micros) { uint32_t start = clock_micros(); while (clock_delta(clock_micros(), start) < micros); }