Exemplo n.º 1
0
Arquivo: ct.c Projeto: decimalbell/ct
static void
runallbench(Benchmark *b)
{
    for (; b->f; b++) {
        runbench(b);
    }
}
Exemplo n.º 2
0
extern "C" void mixbenchGPU(cl_device_id dev_id, double *c, long size, bool block_strided, bool host_allocated, size_t workgroupsize, unsigned int elements_per_wi, unsigned int fusion_degree) {
    const char *benchtype;
    if(block_strided)
        benchtype = "Workgroup";
    else
        benchtype = "NDRange";
    printf("Workitem stride:        %s\n", benchtype);
    const char *buffer_allocation = host_allocated ? "Host allocated" : "Device allocated";
    printf("Buffer allocation:      %s\n", buffer_allocation);

    // Set context properties
    cl_platform_id p_id;
    OCL_SAFE_CALL( clGetDeviceInfo(dev_id, CL_DEVICE_PLATFORM, sizeof(p_id), &p_id, NULL) );
    size_t length;
    OCL_SAFE_CALL( clGetDeviceInfo(dev_id, CL_DEVICE_EXTENSIONS, 0, NULL, &length) );
    char *extensions = (char*)alloca(length);
    OCL_SAFE_CALL( clGetDeviceInfo(dev_id, CL_DEVICE_EXTENSIONS, length, extensions, NULL) );
    bool enable_dp = strstr(extensions, "cl_khr_fp64") != NULL;

    cl_context_properties ctxProps[] = { CL_CONTEXT_PLATFORM, (cl_context_properties)p_id, 0 };

    cl_int errno;
    // Create context
    cl_context context = clCreateContext(ctxProps, 1, &dev_id, NULL, NULL, &errno);
    OCL_SAFE_CALL(errno);

    cl_mem_flags buf_flags = CL_MEM_READ_WRITE;
    if( host_allocated )
        buf_flags |= CL_MEM_ALLOC_HOST_PTR;
    cl_mem c_buffer = clCreateBuffer(context, buf_flags, size*sizeof(double), NULL, &errno);
    OCL_SAFE_CALL(errno);

    // Create command queue
    cl_command_queue cmd_queue = clCreateCommandQueue(context, dev_id, CL_QUEUE_PROFILING_ENABLE, &errno);
    OCL_SAFE_CALL(errno);

    // Set data on device memory
    cl_int *mapped_data = (cl_int*)clEnqueueMapBuffer(cmd_queue, c_buffer, CL_TRUE, CL_MAP_WRITE, 0, size*sizeof(double), 0, NULL, NULL, &errno);
    OCL_SAFE_CALL(errno);
    for(int i=0; i<size; i++)
        mapped_data[i] = 0;
    clEnqueueUnmapMemObject(cmd_queue, c_buffer, mapped_data, 0, NULL, NULL);

    // Load source, create program and all kernels
    printf("Loading kernel source file...\n");
    const char c_param_format_str[] = "-cl-std=CL1.1 -cl-mad-enable -Dclass_T=%s -Dblockdim=" SIZE_T_FORMAT " -DCOMPUTE_ITERATIONS=%d -DELEMENTS_PER_THREAD=%d -DFUSION_DEGREE=%d %s %s";
    const char *c_empty = "";
    const char *c_striding = block_strided ? "-DBLOCK_STRIDED" : c_empty;
    const char *c_enable_dp = "-DENABLE_DP";
    char c_build_params[256];
    const char *c_kernel_source = {ReadFile("mix_kernels_ro.cl")};
    printf("Precompilation of kernels... ");
    sprintf(c_build_params, c_param_format_str, "short", workgroupsize, 0, 1, 1, c_striding, c_empty);

    cl_kernel kernel_warmup = BuildKernel(context, dev_id, c_kernel_source, c_build_params);

    show_progress_init(compute_iterations_len);
    cl_kernel kernels[kdt_double+1][compute_iterations_len];
    for(int i=0; i<compute_iterations_len; i++) {
        show_progress_step(0, '\\');
        sprintf(c_build_params, c_param_format_str, "float", workgroupsize, compute_iterations[i], elements_per_wi, fusion_degree, c_striding, c_empty);
        //printf("%s\n",c_build_params);
        kernels[kdt_float][i] = BuildKernel(context, dev_id, c_kernel_source, c_build_params);

        show_progress_step(0, '|');
        sprintf(c_build_params, c_param_format_str, "int", workgroupsize, compute_iterations[i], elements_per_wi, fusion_degree, c_striding, c_empty);
        //printf("%s\n",c_build_params);
        kernels[kdt_int][i] = BuildKernel(context, dev_id, c_kernel_source, c_build_params);

        if( enable_dp ) {
            show_progress_step(0, '/');
            sprintf(c_build_params, c_param_format_str, "double", workgroupsize, compute_iterations[i], elements_per_wi, fusion_degree, c_striding, c_enable_dp);
            //printf("%s\n",c_build_params);
            kernels[kdt_double][i] = BuildKernel(context, dev_id, c_kernel_source, c_build_params);
        } else
            kernels[kdt_double][i] = 0;
        show_progress_step(1, '>');
    }
    show_progress_done();
    free((char*)c_kernel_source);

    runbench_warmup(cmd_queue, kernel_warmup, c_buffer, size, workgroupsize);

    // Synchronize in order to wait for memory operations to finish
    OCL_SAFE_CALL( clFinish(cmd_queue) );

    printf("---------------------------------------------------------- CSV data ----------------------------------------------------------\n");
    printf("Experiment ID, Single Precision ops,,,,              Double precision ops,,,,              Integer operations,,, \n");
    printf("Compute iters, Flops/byte, ex.time,  GFLOPS, GB/sec, Flops/byte, ex.time,  GFLOPS, GB/sec, Iops/byte, ex.time,   GIOPS, GB/sec\n");

    for(int i=0; i<compute_iterations_len; i++)
        runbench(compute_iterations, i, cmd_queue, kernels, c_buffer, size, workgroupsize, elements_per_wi, fusion_degree);

    printf("------------------------------------------------------------------------------------------------------------------------------\n");

    // Copy results back to host memory
    OCL_SAFE_CALL( clEnqueueReadBuffer(cmd_queue, c_buffer, CL_TRUE, 0, size*sizeof(double), c, 0, NULL, NULL) );

    // Release kernels and program
    ReleaseKernelNProgram(kernel_warmup);
    for(int i=0; i<compute_iterations_len; i++) {
        ReleaseKernelNProgram(kernels[kdt_float][i]);
        ReleaseKernelNProgram(kernels[kdt_int][i]);
        if( enable_dp )
            ReleaseKernelNProgram(kernels[kdt_double][i]);
    }

    // Release buffer
    OCL_SAFE_CALL( clReleaseMemObject(c_buffer) );
}