示例#1
0
int main(int argc, char *argv[])
{
    int ret = 0;


    if (argc < 2)
    {
        fprintf(stderr, "Usage: transalign_killer [--cldev=x.y] <input file>\n");
        fprintf(stderr, "  --cldev=x.y: x specifies the platform index, y the device index.\n");
        return 1;
    }


    long seq_length;
    char *sequence = load_text(argv[argc - 1], &seq_length);
    if (!sequence)
        return 1;

    seq_length--; // Cut final 0 byte

    // FIXME: All the following code relies on seq_length being a multiple of BASE.

    long round_seq_length = round_up_to_power_of_two(seq_length, BASE_EXP);

    long res_length = 0;
    for (long len = round_seq_length / BASE; len; len /= BASE)
        res_length += len;


    // Use some random index to be searched for here
    unsigned letter_index = seq_length / 2;


    // Select an OpenCL device
    cl_device_id dev = select_device(argc - 1, argv);
    if (!dev)
        return 1;

    // Initialize the OpenCL st...ack
    cl_context ctx = clCreateContext(NULL, 1, &dev, NULL, NULL, NULL);
    cl_command_queue queue = clCreateCommandQueue(ctx, dev, 0, NULL);

    // Load the OpenCL kernesl
    char *prog_src = load_text("trans.cl", NULL);
    if (!prog_src)
        return 1;
    cl_program prog = clCreateProgramWithSource(ctx, 1, (const char **)&prog_src, NULL, NULL);
    free(prog_src);

    // Build them
    clBuildProgram(prog, 0, NULL, NULL, NULL, NULL);
    cl_kernel k_iadd = clCreateKernel(prog, "k_iadd", NULL); // initial addition
    cl_kernel k_cadd = clCreateKernel(prog, "k_cadd", NULL); // consecutive addition
    assert(k_iadd);
    assert(k_cadd);


    // Create the result buffer
    unsigned *result = malloc(res_length * sizeof(unsigned));
    cl_mem result_gpu = clCreateBuffer(ctx, CL_MEM_READ_WRITE | HOST_PTR_POLICY, res_length * sizeof(unsigned), result, NULL);


    clock_start();

    /*** START OF ROCKET SCIENCE LEVEL RUNTIME-TIME INTENSIVE STUFF ***/

    // Bandwidth intensive stuff goes here

    // Copy the sequence to the video memory (or, generally speaking, the OpenCL device)
    cl_mem seq_gpu = clCreateBuffer(ctx, CL_MEM_READ_WRITE | HOST_PTR_POLICY, seq_length * sizeof(char), sequence, NULL);

    long bw1_time = clock_delta();


    // GPU intensive stuff goes here

    /**
     * First, transform every - and \0 into a 0 and every other character into a
     * 1. Then, add consecutive fields (BASE fields) together and store them at
     * the beginning of the result buffer.
     */
    clSetKernelArg(k_iadd, 0, sizeof(result_gpu), &result_gpu);
    clSetKernelArg(k_iadd, 1, sizeof(seq_gpu), &seq_gpu);
    clSetKernelArg(k_iadd, 2, sizeof(unsigned), &(unsigned){seq_length});
示例#2
0
void clock_usdelay(unsigned int micros)
{
  uint32_t start = clock_micros();
  while (clock_delta(clock_micros(), start) < micros);
}