void pocl_basic_run (void *data, _cl_command_node* cmd) { struct data *d; struct pocl_argument *al; size_t x, y, z; unsigned i; cl_kernel kernel = cmd->command.run.kernel; struct pocl_context *pc = &cmd->command.run.pc; assert (data != NULL); d = (struct data *) data; d->current_kernel = kernel; void **arguments = (void**)malloc( sizeof(void*) * (kernel->num_args + kernel->num_locals) ); /* Process the kernel arguments. Convert the opaque buffer pointers to real device pointers, allocate dynamic local memory buffers, etc. */ for (i = 0; i < kernel->num_args; ++i) { al = &(cmd->command.run.arguments[i]); if (kernel->arg_info[i].is_local) { arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT, al->size); } else if (kernel->arg_info[i].type == POCL_ARG_TYPE_POINTER) { /* It's legal to pass a NULL pointer to clSetKernelArguments. In that case we must pass the same NULL forward to the kernel. Otherwise, the user must have created a buffer with per device pointers stored in the cl_mem. */ if (al->value == NULL) { arguments[i] = malloc (sizeof (void *)); *(void **)arguments[i] = NULL; } else arguments[i] = &((*(cl_mem *) (al->value))->device_ptrs[cmd->device->dev_id].mem_ptr); } else if (kernel->arg_info[i].type == POCL_ARG_TYPE_IMAGE) { dev_image_t di; fill_dev_image_t (&di, al, cmd->device); void* devptr = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT, sizeof(dev_image_t)); arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = devptr; pocl_basic_write (data, &di, devptr, 0, sizeof(dev_image_t)); } else if (kernel->arg_info[i].type == POCL_ARG_TYPE_SAMPLER) { dev_sampler_t ds; fill_dev_sampler_t(&ds, al); void* devptr = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT, sizeof(dev_sampler_t)); arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = devptr; pocl_basic_write (data, &ds, devptr, 0, sizeof(dev_sampler_t)); } else { arguments[i] = al->value; } } for (i = kernel->num_args; i < kernel->num_args + kernel->num_locals; ++i) { al = &(cmd->command.run.arguments[i]); arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = pocl_memalign_alloc(MAX_EXTENDED_ALIGNMENT, al->size); } for (z = 0; z < pc->num_groups[2]; ++z) { for (y = 0; y < pc->num_groups[1]; ++y) { for (x = 0; x < pc->num_groups[0]; ++x) { pc->group_id[0] = x; pc->group_id[1] = y; pc->group_id[2] = z; cmd->command.run.wg (arguments, pc); } } } for (i = 0; i < kernel->num_args; ++i) { if (kernel->arg_info[i].is_local) { POCL_MEM_FREE(*(void **)(arguments[i])); POCL_MEM_FREE(arguments[i]); } else if (kernel->arg_info[i].type == POCL_ARG_TYPE_IMAGE || kernel->arg_info[i].type == POCL_ARG_TYPE_SAMPLER) { POCL_MEM_FREE(*(void **)(arguments[i])); POCL_MEM_FREE(arguments[i]); } else if (kernel->arg_info[i].type == POCL_ARG_TYPE_POINTER && *(void**)arguments[i] == NULL) { POCL_MEM_FREE(arguments[i]); } } for (i = kernel->num_args; i < kernel->num_args + kernel->num_locals; ++i) { POCL_MEM_FREE(*(void **)(arguments[i])); POCL_MEM_FREE(arguments[i]); } free(arguments); }
void pocl_basic_run (void *data, _cl_command_node* cmd) { struct data *d; int error; const char *module_fn; char command[COMMAND_LENGTH]; char workgroup_string[WORKGROUP_STRING_LENGTH]; unsigned device; struct pocl_argument *al; size_t x, y, z; unsigned i; pocl_workgroup w; char* tmpdir = cmd->command.run.tmp_dir; cl_kernel kernel = cmd->command.run.kernel; struct pocl_context *pc = &cmd->command.run.pc; assert (data != NULL); d = (struct data *) data; module_fn = llvm_codegen (tmpdir); d->current_dlhandle = lt_dlopen (module_fn); if (d->current_dlhandle == NULL) { printf ("pocl error: lt_dlopen(\"%s\") failed with '%s'.\n", module_fn, lt_dlerror()); printf ("note: missing symbols in the kernel binary might be reported as 'file not found' errors.\n"); abort(); } d->current_kernel = kernel; /* Find which device number within the context correspond to current device. */ for (i = 0; i < kernel->context->num_devices; ++i) { if (kernel->context->devices[i]->data == data) { device = i; break; } } snprintf (workgroup_string, WORKGROUP_STRING_LENGTH, "_%s_workgroup", kernel->function_name); w = (pocl_workgroup) lt_dlsym (d->current_dlhandle, workgroup_string); if (w == NULL) { printf("pocl error: could not load the work-group function '%s' in module '%s'.\n", workgroup_string, module_fn); abort(); } free ((void*) module_fn); void *arguments[kernel->num_args + kernel->num_locals]; /* Process the kernel arguments. Convert the opaque buffer pointers to real device pointers, allocate dynamic local memory buffers, etc. */ for (i = 0; i < kernel->num_args; ++i) { al = &(cmd->command.run.arguments[i]); if (kernel->arg_is_local[i]) { arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = pocl_basic_malloc(data, 0, al->size, NULL); } else if (kernel->arg_is_pointer[i]) { /* It's legal to pass a NULL pointer to clSetKernelArguments. In that case we must pass the same NULL forward to the kernel. Otherwise, the user must have created a buffer with per device pointers stored in the cl_mem. */ if (al->value == NULL) { arguments[i] = malloc (sizeof (void *)); *(void **)arguments[i] = NULL; } else arguments[i] = &((*(cl_mem *) (al->value))->device_ptrs[device]); } else if (kernel->arg_is_image[i]) { dev_image2d_t di; cl_mem mem = *(cl_mem*)al->value; di.data = &((*(cl_mem *) (al->value))->device_ptrs[device]); di.data = ((*(cl_mem *) (al->value))->device_ptrs[device]); di.width = mem->image_width; di.height = mem->image_height; di.rowpitch = mem->image_row_pitch; di.order = mem->image_channel_order; di.data_type = mem->image_channel_data_type; void* devptr = pocl_basic_malloc(data, 0, sizeof(dev_image2d_t), NULL); arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = devptr; pocl_basic_write (data, &di, devptr, sizeof(dev_image2d_t)); } else if (kernel->arg_is_sampler[i]) { dev_sampler_t ds; arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = pocl_basic_malloc(data, 0, sizeof(dev_sampler_t), NULL); pocl_basic_write (data, &ds, *(void**)arguments[i], sizeof(dev_sampler_t)); } else { arguments[i] = al->value; } } for (i = kernel->num_args; i < kernel->num_args + kernel->num_locals; ++i) { al = &(cmd->command.run.arguments[i]); arguments[i] = malloc (sizeof (void *)); *(void **)(arguments[i]) = pocl_basic_malloc(data, 0, al->size, NULL); } for (z = 0; z < pc->num_groups[2]; ++z) { for (y = 0; y < pc->num_groups[1]; ++y) { for (x = 0; x < pc->num_groups[0]; ++x) { pc->group_id[0] = x; pc->group_id[1] = y; pc->group_id[2] = z; w (arguments, pc); } } } for (i = 0; i < kernel->num_args; ++i) { if (kernel->arg_is_local[i]) pocl_basic_free(data, 0, *(void **)(arguments[i])); } for (i = kernel->num_args; i < kernel->num_args + kernel->num_locals; ++i) pocl_basic_free(data, 0, *(void **)(arguments[i])); }