コード例 #1
0
int main(int argc, const char * argv[]) {
    static mach_timebase_info_data_t sTimebaseInfo;
    int seq = 0;
    int cpu = 0;
    // insert code here...
    int* data = (int*) malloc(sizeof(cl_int)*RANGE);
    for (int i = 0; i < RANGE; i ++) {
        data[i] = START + i;
        //printf("%d",data[i]);
    }
    
    int* out = (int*) malloc(sizeof(cl_int)*RANGE);
    printf("Hello, World!\n");
    if (argc > 1){
        if (strncmp(argv[1], "seq", 3) == 0) {
            seq = TRUE;
        } else if (strncmp(argv[1], "cpu", 3) == 0) {
            cpu = 1;
        }
    }
    uint64_t start = mach_absolute_time();
    if (seq)
        euler_totient(data, out);
    else {
        dispatch_queue_t queue;
        if (cpu)
            queue = gcl_create_dispatch_queue(CL_DEVICE_TYPE_CPU, NULL);
        else
            queue = gcl_create_dispatch_queue(CL_DEVICE_TYPE_GPU, NULL);
       
        void *mem_in = gcl_malloc(sizeof(cl_int)*RANGE, data, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
        void *mem_out = gcl_malloc(sizeof(cl_int)*RANGE, NULL, CL_MEM_WRITE_ONLY);
        dispatch_sync(queue, ^{
            size_t wgs;
            gcl_get_kernel_block_workgroup_info(euler_totient_kernel,
                                               CL_KERNEL_WORK_GROUP_SIZE,
                                               sizeof(wgs), &wgs, NULL);
            
            cl_ndrange range = {
                1,
                {0, 0, 0},
                {RANGE, 0, 0},
                {wgs, 0, 0}
            };
            
            euler_totient_kernel(&range,(cl_int*)mem_in, (cl_int*)mem_out);
            
            gcl_memcpy(out, mem_out, sizeof(cl_float) * RANGE);
            
        });
    }
コード例 #2
0
ファイル: main.cpp プロジェクト: danbolt/OpenCL-Experiments
int main(int argc, const char * argv[])
{
    int i;
    char deviceName[128];
    
    if (init() != true)
    {
        return 1;
    }
    
    SDL_Delay(2000);
    
    // try to get the dispatch queue for the GPU
    dispatch_queue_t queue = gcl_create_dispatch_queue(CL_DEVICE_TYPE_GPU, NULL);
    
    // in the event that the system does not have an OpenCL GPU, we can use the CPU instead
    if (queue == NULL)
    {
        queue = gcl_create_dispatch_queue(CL_DEVICE_TYPE_CPU, NULL);
    }
    
    // let's print some data on the device we're usng! かわいいです!
    cl_device_id gpu = gcl_get_device_id_with_dispatch_queue(queue);
    clGetDeviceInfo(gpu, CL_DEVICE_NAME, 128, deviceName, NULL);
    fprintf(stdout, "Created a dispatch queue using the %s\n", deviceName);
    
    // let's hardcode some handy test data that's easy to understand
    float* test_in = (float*)malloc(sizeof(cl_float) * NUM_VALUES);
    for (i = 0; i < NUM_VALUES; i++)
    {
        test_in[i] = (cl_float)i;
    }
    
    // Once the computation using the CL is done, we'll need space in RAM for the output
    float* test_out = (float*)malloc(sizeof(cl_float) * NUM_VALUES);
    
    // Now we're going to allocate the buffers again in the OpenCL device's memory space
    // CL_MEM_COPY_HOST_PTR will copy the values of test_in to mem_in
    void* mem_in = gcl_malloc(sizeof(cl_float) * NUM_VALUES, test_in, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR);
    void* mem_out = gcl_malloc(sizeof(cl_float) * NUM_VALUES, NULL, CL_MEM_WRITE_ONLY);
    
    // DISPATCH THE KERNEL PROGRAM
    dispatch_sync(queue, ^{
        
        //workgroup size, I think
        size_t wgs;
        
        //information on sizing of dimensions
        gcl_get_kernel_block_workgroup_info(square_kernel, CL_KERNEL_WORK_GROUP_SIZE, sizeof(wgs), &wgs, NULL);
        cl_ndrange range = {
            1,
            {0, 0, 0},
            {NUM_VALUES, 0, 0},
            {wgs, 0, 0}
        };
        
        // call the kernel
        square_kernel(&range, (cl_float*)mem_in, (cl_float*)mem_out);
        
        //copy the output into memory
        gcl_memcpy(test_out, mem_out, sizeof(cl_float) * NUM_VALUES);
    });