VX_API_ENTRY vx_status VX_API_CALL vxCommitDistribution(vx_distribution distribution, const void *ptr) { vx_status status = VX_FAILURE; if ((vxIsValidSpecificReference(&distribution->base, VX_TYPE_DISTRIBUTION) == vx_true_e) && (vxAllocateMemory(distribution->base.context, &distribution->memory) == vx_true_e)) { if (ptr != NULL) { vxSemWait(&distribution->base.lock); { if (ptr != distribution->memory.ptrs[0]) { vx_size size = vxComputeMemorySize(&distribution->memory, 0); memcpy(distribution->memory.ptrs[0], ptr, size); VX_PRINT(VX_ZONE_INFO, "Copied distribution from %p to %p for "VX_FMT_SIZE" bytes\n", ptr, distribution->memory.ptrs[0], size); } } vxSemPost(&distribution->base.lock); vxWroteToReference(&distribution->base); } vxDecrementReference(&distribution->base, VX_EXTERNAL); status = VX_SUCCESS; } else { VX_PRINT(VX_ZONE_ERROR, "Not a valid object!\n"); } return status; }
VX_INT_API void vxMemoryUnmap(vx_context context, vx_uint32 map_id) { /* lock the table for modification */ if (vx_true_e == vxSemWait(&context->memory_maps_lock)) { if (context->memory_maps[map_id].used == vx_true_e) { if (context->memory_maps[map_id].ptr != NULL) { /* freeing mapped buffer */ free(context->memory_maps[map_id].ptr); memset(&context->memory_maps[map_id], 0, sizeof(vx_memory_map_t)); } VX_PRINT(VX_ZONE_CONTEXT, "Removed memory mapping[%u]\n", map_id); } context->memory_maps[map_id].used = vx_false_e; /* we're done, unlock the table */ vxSemPost(&context->memory_maps_lock); } else VX_PRINT(VX_ZONE_ERROR, "vxSemWait() failed!\n"); return; } /* vxMemoryUnmap() */
VX_API_ENTRY vx_status VX_API_CALL vxAccessDistribution(vx_distribution distribution, void **ptr, vx_enum usage) { vx_status status = VX_FAILURE; if ((vxIsValidSpecificReference(&distribution->base, VX_TYPE_DISTRIBUTION) == vx_true_e) && (vxAllocateMemory(distribution->base.context, &distribution->memory) == vx_true_e)) { if (ptr != NULL) { vxSemWait(&distribution->base.lock); { vx_size size = vxComputeMemorySize(&distribution->memory, 0); vxPrintMemory(&distribution->memory); if (*ptr == NULL) { *ptr = distribution->memory.ptrs[0]; } else if (*ptr != NULL) { memcpy(*ptr, distribution->memory.ptrs[0], size); } } vxSemPost(&distribution->base.lock); vxReadFromReference(&distribution->base); } vxIncrementReference(&distribution->base, VX_EXTERNAL); status = VX_SUCCESS; } else { VX_PRINT(VX_ZONE_ERROR, "Not a valid object!\n"); } return status; }
VX_INT_API vx_bool vxFindMemoryMap( vx_context context, vx_reference ref, vx_map_id map_id) { vx_bool worked = vx_false_e; vx_uint32 id = (vx_uint32)map_id; /* check index range */ if (id < dimof(context->memory_maps)) { /* lock the table for exclusive access */ if (vx_true_e == vxSemWait(&context->memory_maps_lock)) { if ((context->memory_maps[id].used == vx_true_e) && (context->memory_maps[id].ref == ref)) { worked = vx_true_e; } /* unlock teh table */ worked = vxSemPost(&context->memory_maps_lock); } } return worked; } /* vxFindMemoryMap() */
void vxReadFromReference(vx_reference_t *ref) { if (ref) { vxSemWait(&ref->lock); ref->read_count++; vxSemPost(&ref->lock); } }
void vxIncrementIntReference(vx_reference_t *ref) { if (ref) { vxSemWait(&ref->lock); ref->internal_count++; VX_PRINT(VX_ZONE_REFERENCE, "Incremented Internal Reference Count to %u on "VX_FMT_REF"\n", ref->internal_count, ref); vxSemPost(&ref->lock); } }
vx_uint32 vxTotalReferenceCount(vx_reference_t *ref) { vx_uint32 count = 0; if (ref) { vxSemWait(&ref->lock); count = ref->external_count + ref->internal_count; vxSemPost(&ref->lock); } return count; }
void vxWroteToReference(vx_reference_t *ref) { if (ref) { vxSemWait(&ref->lock); ref->write_count++; if (ref->extracted == vx_true_e) { vxContaminateGraphs(ref); } vxSemPost(&ref->lock); } }
vx_bool vxDecrementIntReference(vx_reference_t *ref) { vx_bool result = vx_false_e; if (ref) { vxSemWait(&ref->lock); if (ref->internal_count == 0) { VX_PRINT(VX_ZONE_WARNING, "#### INTERNAL REF COUNT IS ALREADY ZERO!!! "VX_FMT_REF" type:%08x #####\n", ref, ref->type); } else { ref->internal_count--; VX_PRINT(VX_ZONE_REFERENCE, "Decremented Internal Reference Count to %u on "VX_FMT_REF"\n", ref->internal_count, ref); result = vx_true_e; } vxSemPost(&ref->lock); } return result; }
vx_status vxAccessConvolutionCoefficients(vx_convolution conv, vx_int16 *array) { vx_convolution_t *convolution = (vx_convolution_t *)conv; vx_status status = VX_ERROR_INVALID_REFERENCE; if ((vxIsValidSpecificReference(&convolution->base.base, VX_TYPE_CONVOLUTION) == vx_true_e) && (vxAllocateMemory(convolution->base.base.context, &convolution->base.memory) == vx_true_e)) { vxSemWait(&convolution->base.base.lock); if (array) { vx_size size = convolution->base.memory.strides[0][1] * convolution->base.memory.dims[0][1]; memcpy(array, convolution->base.memory.ptrs[0], size); } vxSemPost(&convolution->base.base.lock); vxReadFromReference(&convolution->base.base); vxIncrementReference(&convolution->base.base); status = VX_SUCCESS; } return status; }
VX_API_ENTRY vx_status VX_API_CALL vxCommitScalarValue(vx_scalar scalar, void *ptr) { vx_status status = VX_SUCCESS; if (vxIsValidSpecificReference(&scalar->base,VX_TYPE_SCALAR) == vx_false_e) return VX_ERROR_INVALID_REFERENCE; if (ptr == NULL) return VX_ERROR_INVALID_PARAMETERS; vxSemWait(&scalar->base.lock); switch (scalar->data_type) { case VX_TYPE_CHAR: scalar->data.chr = *(vx_char *)ptr; break; case VX_TYPE_INT8: scalar->data.s08 = *(vx_int8 *)ptr; break; case VX_TYPE_UINT8: scalar->data.u08 = *(vx_uint8 *)ptr; break; case VX_TYPE_INT16: scalar->data.s16 = *(vx_int16 *)ptr; break; case VX_TYPE_UINT16: scalar->data.u16 = *(vx_uint16 *)ptr; break; case VX_TYPE_INT32: scalar->data.s32 = *(vx_int32 *)ptr; break; case VX_TYPE_UINT32: scalar->data.u32 = *(vx_uint32 *)ptr; break; case VX_TYPE_INT64: scalar->data.s64 = *(vx_int64 *)ptr; break; case VX_TYPE_UINT64: scalar->data.u64 = *(vx_uint64 *)ptr; break; #if OVX_SUPPORT_HALF_FLOAT case VX_TYPE_FLOAT16: scalar->data.f16 = *(vx_float16 *)ptr; break; #endif case VX_TYPE_FLOAT32: scalar->data.f32 = *(vx_float32 *)ptr; break; case VX_TYPE_FLOAT64: scalar->data.f64 = *(vx_float64 *)ptr; break; case VX_TYPE_DF_IMAGE: scalar->data.fcc = *(vx_df_image *)ptr; break; case VX_TYPE_ENUM: scalar->data.enm = *(vx_enum *)ptr; break; case VX_TYPE_SIZE: scalar->data.size = *(vx_size *)ptr; break; case VX_TYPE_BOOL: scalar->data.boolean = *(vx_bool *)ptr; break; default: VX_PRINT(VX_ZONE_ERROR, "some case is not covered in %s\n", __FUNCTION__); status = VX_ERROR_NOT_SUPPORTED; break; } vxPrintScalarValue(scalar); vxSemPost(&scalar->base.lock); vxWroteToReference(&scalar->base); return status; }
VX_API_ENTRY vx_status VX_API_CALL vxReleaseContext(vx_context *c) { vx_status status = VX_SUCCESS; vx_context context = (c?*c:0); vx_uint32 r,m,a; vx_uint32 t; if (c) *c = 0; vxSemWait(&context_lock); if (vxIsValidContext(context) == vx_true_e) { if (vxDecrementReference(&context->base, VX_EXTERNAL) == 0) { vxDestroyThreadpool(&context->workers); context->proc.running = vx_false_e; vxPopQueue(&context->proc.input); vxJoinThread(context->proc.thread, NULL); vxDeinitQueue(&context->proc.output); vxDeinitQueue(&context->proc.input); /* Deregister any log callbacks if there is any registered */ vxRegisterLogCallback(context, NULL, vx_false_e); /*! \internal Garbage Collect All References */ /* Details: * 1. This loop will warn of references which have not been released by the user. * 2. It will close all internally opened error references. * 3. It will close the external references, which in turn will internally * close any internally dependent references that they reference, assuming the * reference counting has been done properly in the framework. * 4. This garbage collection must be done before the targets are released since some of * these external references may have internal references to target kernels. */ for (r = 0; r < VX_INT_MAX_REF; r++) { vx_reference_t *ref = context->reftable[r]; /* Warnings should only come when users have not released all external references */ if (ref && ref->external_count > 0) { VX_PRINT(VX_ZONE_WARNING,"Stale reference "VX_FMT_REF" of type %08x at external count %u, internal count %u\n", ref, ref->type, ref->external_count, ref->internal_count); } /* These were internally opened during creation, so should internally close ERRORs */ if(ref && ref->type == VX_TYPE_ERROR) { vxReleaseReferenceInt(&ref, ref->type, VX_INTERNAL, NULL); } /* Warning above so user can fix release external objects, but close here anyway */ while (ref && ref->external_count > 1) { vxDecrementReference(ref, VX_EXTERNAL); } if (ref && ref->external_count > 0) { vxReleaseReferenceInt(&ref, ref->type, VX_EXTERNAL, NULL); } } for (m = 0; m < context->num_modules; m++) { if (context->modules[m].handle) { vxUnloadModule(context->modules[m].handle); memset(context->modules[m].name, 0, sizeof(context->modules[m].name)); context->modules[m].handle = VX_MODULE_INIT; } } /* de-initialize and unload each target */ for (t = 0u; t < context->num_targets; t++) { if (context->targets[t].enabled == vx_true_e) { context->targets[t].funcs.deinit(&context->targets[t]); vxUnloadTarget(context, t, vx_true_e); context->targets[t].enabled = vx_false_e; } } /* Remove all outstanding accessors. */ for (a = 0; a < dimof(context->accessors); ++a) if (context->accessors[a].used) vxRemoveAccessor(context, a); /* Check for outstanding mappings */ for (a = 0; a < dimof(context->memory_maps); ++a) { if (context->memory_maps[a].used) { VX_PRINT(VX_ZONE_ERROR, "Memory map %d not unmapped\n", a); vxMemoryUnmap(context, a); } } vxDestroySem(&context->memory_maps_lock); /* By now, all external and internal references should be removed */ for (r = 0; r < VX_INT_MAX_REF; r++) { if(context->reftable[r]) VX_PRINT(VX_ZONE_ERROR,"Reference %d not removed\n", r); } #ifdef EXPERIMENTAL_USE_HEXAGON remote_handle_close(tmp_ph); #endif /*! \internal wipe away the context memory first */ /* Normally destroy sem is part of release reference, but can't for context */ vxDestroySem(&((vx_reference )context)->lock); memset(context, 0, sizeof(vx_context_t)); free((void *)context); vxDestroySem(&global_lock); vxSemPost(&context_lock); vxDestroySem(&context_lock); single_context = NULL; return status; } else { VX_PRINT(VX_ZONE_WARNING, "Context still has %u holders\n", vxTotalReferenceCount(&context->base)); } } else { status = VX_ERROR_INVALID_REFERENCE; } vxSemPost(&context_lock); return status; }
VX_API_ENTRY vx_context VX_API_CALL vxCreateContext() #endif { vx_context context = NULL; if (single_context == NULL) { vxCreateSem(&context_lock, 1); vxCreateSem(&global_lock, 1); } vxSemWait(&context_lock); if (single_context == NULL) { /* read the variables for debugging flags */ vx_set_debug_zone_from_env(); context = VX_CALLOC(vx_context_t); /* \todo get from allocator? */ if (context) { vx_uint32 p = 0u, p2 = 0u, t = 0u; context->p_global_lock = &global_lock; context->imm_border.mode = VX_BORDER_UNDEFINED; context->imm_border_policy = VX_BORDER_POLICY_DEFAULT_TO_UNDEFINED; context->next_dynamic_user_kernel_id = 0; context->next_dynamic_user_library_id = 1; vxInitReference(&context->base, NULL, VX_TYPE_CONTEXT, NULL); #if !DISABLE_ICD_COMPATIBILITY context->base.platform = platform; #endif vxIncrementReference(&context->base, VX_EXTERNAL); context->workers = vxCreateThreadpool(VX_INT_HOST_CORES, VX_INT_MAX_REF, /* very deep queues! */ sizeof(vx_work_t), vxWorkerNode, context); vxCreateConstErrors(context); #ifdef EXPERIMENTAL_USE_HEXAGON remote_handle_open((const char *)OPENVX_HEXAGON_NAME, &tmp_ph); #endif /* load all targets */ for (t = 0u; t < dimof(targetModules); t++) { if (vxLoadTarget(context, targetModules[t]) == VX_SUCCESS) { context->num_targets++; } } if (context->num_targets == 0) { VX_PRINT(VX_ZONE_ERROR, "No targets loaded!\n"); free(context); vxSemPost(&context_lock); return 0; } /* initialize all targets */ for (t = 0u; t < context->num_targets; t++) { if (context->targets[t].module.handle) { /* call the init function */ if (context->targets[t].funcs.init(&context->targets[t]) != VX_SUCCESS) { VX_PRINT(VX_ZONE_WARNING, "Target %s failed to initialize!\n", context->targets[t].name); /* unload this module */ vxUnloadTarget(context, t, vx_true_e); break; } else { context->targets[t].enabled = vx_true_e; } } } /* assign the targets by priority into the list */ p2 = 0u; for (p = 0u; p < VX_TARGET_PRIORITY_MAX; p++) { for (t = 0u; t < context->num_targets; t++) { vx_target_t * target = &context->targets[t]; if (p == target->priority) { context->priority_targets[p2] = t; p2++; } } } /* print out the priority list */ for (t = 0u; t < context->num_targets; t++) { vx_target_t *target = &context->targets[context->priority_targets[t]]; if (target->enabled == vx_true_e) { VX_PRINT(VX_ZONE_TARGET, "target[%u]: %s\n", target->priority, target->name); } } // create the internal thread which processes graphs for asynchronous mode. vxInitQueue(&context->proc.input); vxInitQueue(&context->proc.output); context->proc.running = vx_true_e; context->proc.thread = vxCreateThread(vxWorkerGraph, &context->proc); single_context = context; context->imm_target_enum = VX_TARGET_ANY; memset(context->imm_target_string, 0, sizeof(context->imm_target_string)); /* memory maps table lock */ vxCreateSem(&context->memory_maps_lock, 1); } } else { context = single_context; vxIncrementReference(&context->base, VX_EXTERNAL); } vxSemPost(&context_lock); return (vx_context)context; }
VX_INT_API vx_bool vxMemoryMap( vx_context context, vx_reference ref, vx_size size, vx_enum usage, vx_enum mem_type, vx_uint32 flags, void* extra_data, void** ptr, vx_map_id* map_id) { vx_uint32 id; vx_uint8* buf = 0; vx_bool worked = vx_false_e; /* lock the table for modification */ if (vx_true_e == vxSemWait(&context->memory_maps_lock)) { for (id = 0u; id < dimof(context->memory_maps); id++) { if (context->memory_maps[id].used == vx_false_e) { VX_PRINT(VX_ZONE_CONTEXT, "Found free memory map slot[%u]\n", id); /* allocate mapped buffer if requested (by providing size != 0) */ if (size != 0) { buf = malloc(size); if (buf == NULL) { vxSemPost(&context->memory_maps_lock); return vx_false_e; } } context->memory_maps[id].used = vx_true_e; context->memory_maps[id].ref = ref; context->memory_maps[id].ptr = buf; context->memory_maps[id].usage = usage; context->memory_maps[id].mem_type = mem_type; context->memory_maps[id].flags = flags; vx_memory_map_extra* extra = (vx_memory_map_extra*)extra_data; if (VX_TYPE_IMAGE == ref->type) { context->memory_maps[id].extra.image_data.plane_index = extra->image_data.plane_index; context->memory_maps[id].extra.image_data.rect = extra->image_data.rect; } else if (VX_TYPE_ARRAY == ref->type || VX_TYPE_LUT == ref->type) { vx_memory_map_extra* extra = (vx_memory_map_extra*)extra_data; context->memory_maps[id].extra.array_data.start = extra->array_data.start; context->memory_maps[id].extra.array_data.end = extra->array_data.end; } *ptr = buf; *map_id = (vx_map_id)id; worked = vx_true_e; break; } } /* we're done, unlock the table */ worked = vxSemPost(&context->memory_maps_lock); } else worked = vx_false_e; return worked; } /* vxMemoryMap() */
/*! \brief Calls an OpenCL kernel from OpenVX Graph. * Steps: * \arg Find the target * \arg Get the vxcl context * \arg Find the kernel (to get cl kernel information) * \arg for each input parameter that is an object, enqueue write * \arg wait for finish * \arg for each parameter, SetKernelArg * \arg call kernel * \arg wait for finish * \arg for each output parameter that is an object, enqueue read * \arg wait for finish * \note This implementation will attempt to use the External API as much as possible, * but will cast to internal representation when needed (due to lack of API or * need for secret information). This is not an optimal OpenCL invocation. */ vx_status vxclCallOpenCLKernel(vx_node node, const vx_reference *parameters, vx_uint32 num) { vx_status status = VX_FAILURE; vx_context context = node->base.context; vx_target target = (vx_target_t *)&node->base.context->targets[node->affinity]; vx_cl_kernel_description_t *vxclk = vxclFindKernel(node->kernel->enumeration); vx_uint32 pidx, pln, didx, plidx, argidx; cl_int err = 0; size_t off_dim[3] = {0,0,0}; size_t work_dim[3]; //size_t local_dim[3]; cl_event writeEvents[VX_INT_MAX_PARAMS]; cl_event readEvents[VX_INT_MAX_PARAMS]; cl_int we = 0, re = 0; vxSemWait(&target->base.lock); // determine which platform to use plidx = 0; // determine which device to use didx = 0; /* for each input/bi data object, enqueue it and set the kernel parameters */ for (argidx = 0, pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; vx_enum type = node->kernel->signature.types[pidx]; vx_memory_t *memory = NULL; switch (type) { case VX_TYPE_ARRAY: memory = &((vx_array)ref)->memory; break; case VX_TYPE_CONVOLUTION: memory = &((vx_convolution)ref)->base.memory; break; case VX_TYPE_DISTRIBUTION: memory = &((vx_distribution)ref)->memory; break; case VX_TYPE_IMAGE: memory = &((vx_image)ref)->memory; break; case VX_TYPE_LUT: memory = &((vx_lut_t*)ref)->memory; break; case VX_TYPE_MATRIX: memory = &((vx_matrix)ref)->memory; break; //case VX_TYPE_PYRAMID: // break; case VX_TYPE_REMAP: memory = &((vx_remap)ref)->memory; break; //case VX_TYPE_SCALAR: //case VX_TYPE_THRESHOLD: // break; } if (memory) { for (pln = 0; pln < memory->nptrs; pln++) { if (memory->cl_type == CL_MEM_OBJECT_BUFFER) { if (type == VX_TYPE_IMAGE) { /* set the work dimensions */ work_dim[0] = memory->dims[pln][VX_DIM_X]; work_dim[1] = memory->dims[pln][VX_DIM_Y]; // width, height, stride_x, stride_y err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->dims[pln][VX_DIM_X]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->dims[pln][VX_DIM_Y]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->strides[pln][VX_DIM_X]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &memory->strides[pln][VX_DIM_Y]); VX_PRINT(VX_ZONE_INFO, "Setting vx_image as Buffer with 4 parameters\n"); } else if (type == VX_TYPE_ARRAY || type == VX_TYPE_LUT) { vx_array arr = (vx_array)ref; // sizeof item, active count, capacity err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&arr->item_size); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&arr->num_items); // this is output? err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&arr->capacity); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_int32), &arr->memory.strides[VX_DIM_X]); VX_PRINT(VX_ZONE_INFO, "Setting vx_buffer as Buffer with 4 parameters\n"); } else if (type == VX_TYPE_MATRIX) { vx_matrix mat = (vx_matrix)ref; // columns, rows err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&mat->columns); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&mat->rows); VX_PRINT(VX_ZONE_INFO, "Setting vx_matrix as Buffer with 2 parameters\n"); } else if (type == VX_TYPE_DISTRIBUTION) { vx_distribution dist = (vx_distribution)ref; // num, range, offset, winsize vx_uint32 range = dist->memory.dims[0][VX_DIM_X] * dist->window_x; err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&dist->memory.dims[VX_DIM_X]); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&range); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&dist->offset_x); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&dist->window_x); } else if (type == VX_TYPE_CONVOLUTION) { vx_convolution conv = (vx_convolution)ref; // columns, rows, scale err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&conv->base.columns); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&conv->base.rows); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint32), (vx_uint32 *)&conv->scale); } err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(cl_mem), &memory->hdls[pln]); CL_ERROR_MSG(err, "clSetKernelArg"); if (dir == VX_INPUT || dir == VX_BIDIRECTIONAL) { err = clEnqueueWriteBuffer(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, 0, vxComputeMemorySize(memory, pln), memory->ptrs[pln], 0, NULL, &ref->event); } } else if (memory->cl_type == CL_MEM_OBJECT_IMAGE2D) { vx_rectangle_t rect = {0}; vx_image image = (vx_image)ref; vxGetValidRegionImage(image, &rect); size_t origin[3] = {rect.start_x, rect.start_y, 0}; size_t region[3] = {rect.end_x-rect.start_x, rect.end_y-rect.start_y, 1}; /* set the work dimensions */ work_dim[0] = rect.end_x-rect.start_x; work_dim[1] = rect.end_y-rect.start_y; VX_PRINT(VX_ZONE_INFO, "Setting vx_image as image2d_t wd={%zu,%zu} arg:%u\n",work_dim[0], work_dim[1], argidx); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(cl_mem), &memory->hdls[pln]); CL_ERROR_MSG(err, "clSetKernelArg"); if (err != CL_SUCCESS) { VX_PRINT(VX_ZONE_ERROR, "Error Calling Kernel %s, parameter %u\n", node->kernel->name, pidx); } if (dir == VX_INPUT || dir == VX_BIDIRECTIONAL) { err = clEnqueueWriteImage(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, origin, region, memory->strides[pln][VX_DIM_Y], 0, memory->ptrs[pln], 0, NULL, &ref->event); CL_ERROR_MSG(err, "clEnqueueWriteImage"); } } } } else { if (type == VX_TYPE_SCALAR) { vx_value_t value; // largest platform atomic vx_size size = 0ul; vx_scalar sc = (vx_scalar)ref; vx_enum stype = VX_TYPE_INVALID; vxReadScalarValue(sc, &value); vxQueryScalar(sc, VX_SCALAR_ATTRIBUTE_TYPE, &stype, sizeof(stype)); size = vxSizeOfType(stype); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, size, &value); } else if (type == VX_TYPE_THRESHOLD) { vx_enum ttype = 0; vx_threshold th = (vx_threshold)ref; vxQueryThreshold(th, VX_THRESHOLD_ATTRIBUTE_TYPE, &ttype, sizeof(ttype)); if (ttype == VX_THRESHOLD_TYPE_BINARY) { err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint8), &th->value); } else if (ttype == VX_THRESHOLD_TYPE_RANGE) { err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint8), &th->lower); err = clSetKernelArg(vxclk->kernels[plidx], argidx++, sizeof(vx_uint8), &th->upper); } } } } we = 0; for (pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; if (dir == VX_INPUT || dir == VX_BIDIRECTIONAL) { memcpy(&writeEvents[we++],&ref->event, sizeof(cl_event)); } } //local_dim[0] = 1; //local_dim[1] = 1; err = clEnqueueNDRangeKernel(context->queues[plidx][didx], vxclk->kernels[plidx], 2, off_dim, work_dim, NULL, we, writeEvents, &node->base.event); CL_ERROR_MSG(err, "clEnqueueNDRangeKernel"); /* enqueue a read on all output data */ for (pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; vx_enum type = node->kernel->signature.types[pidx]; if (dir == VX_OUTPUT || dir == VX_BIDIRECTIONAL) { vx_memory_t *memory = NULL; switch (type) { case VX_TYPE_ARRAY: memory = &((vx_array)ref)->memory; break; case VX_TYPE_CONVOLUTION: memory = &((vx_convolution)ref)->base.memory; break; case VX_TYPE_DISTRIBUTION: memory = &((vx_distribution)ref)->memory; break; case VX_TYPE_IMAGE: memory = &((vx_image)ref)->memory; break; case VX_TYPE_LUT: memory = &((vx_lut_t*)ref)->memory; break; case VX_TYPE_MATRIX: memory = &((vx_matrix)ref)->memory; break; //case VX_TYPE_PYRAMID: // break; case VX_TYPE_REMAP: memory = &((vx_remap)ref)->memory; break; //case VX_TYPE_SCALAR: //case VX_TYPE_THRESHOLD: // break; } if (memory) { for (pln = 0; pln < memory->nptrs; pln++) { if (memory->cl_type == CL_MEM_OBJECT_BUFFER) { err = clEnqueueReadBuffer(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, 0, vxComputeMemorySize(memory, pln), memory->ptrs[pln], 1, &node->base.event, &ref->event); CL_ERROR_MSG(err, "clEnqueueReadBuffer"); } else if (memory->cl_type == CL_MEM_OBJECT_IMAGE2D) { vx_rectangle_t rect = {0}; vx_image image = (vx_image)ref; vxGetValidRegionImage(image, &rect); size_t origin[3] = {rect.start_x, rect.start_y, 0}; size_t region[3] = {rect.end_x-rect.start_x, rect.end_y-rect.start_y, 1}; /* set the work dimensions */ work_dim[0] = rect.end_x-rect.start_x; work_dim[1] = rect.end_y-rect.start_y; err = clEnqueueReadImage(context->queues[plidx][didx], memory->hdls[pln], CL_TRUE, origin, region, memory->strides[pln][VX_DIM_Y], 0, memory->ptrs[pln], 1, &node->base.event, &ref->event); CL_ERROR_MSG(err, "clEnqueueReadImage"); VX_PRINT(VX_ZONE_INFO, "Reading Image wd={%zu,%zu}\n", work_dim[0], work_dim[1]); } } } } } re = 0; for (pidx = 0; pidx < num; pidx++) { vx_reference ref = node->parameters[pidx]; vx_enum dir = node->kernel->signature.directions[pidx]; if (dir == VX_OUTPUT || dir == VX_BIDIRECTIONAL) { memcpy(&readEvents[re++],&ref->event, sizeof(cl_event)); } } err = clFlush(context->queues[plidx][didx]); CL_ERROR_MSG(err, "Flush"); VX_PRINT(VX_ZONE_TARGET, "Waiting for read events!\n"); clWaitForEvents(re, readEvents); if (err == CL_SUCCESS) status = VX_SUCCESS; //exit: VX_PRINT(VX_ZONE_API, "%s exiting %d\n", __FUNCTION__, status); vxSemPost(&target->base.lock); return status; }
vx_status vxAccessArrayRangeInt(vx_array arr, vx_size start, vx_size end, vx_size *pStride, void **ptr, vx_enum usage) { vx_status status = VX_FAILURE; /* bad parameters */ if ((usage < VX_READ_ONLY) || (VX_READ_AND_WRITE < usage) || (ptr == NULL) || (start >= end) || (end > arr->num_items)) { return VX_ERROR_INVALID_PARAMETERS; } /* determine if virtual before checking for memory */ if (arr->base.is_virtual == vx_true_e) { if (arr->base.is_accessible == vx_false_e) { /* User tried to access a "virtual" array. */ VX_PRINT(VX_ZONE_ERROR, "Can not access a virtual array\n"); return VX_ERROR_OPTIMIZED_AWAY; } /* framework trying to access a virtual image, this is ok. */ } /* verify has not run or will not run yet. this allows this API to "touch" * the array to create it. */ if (vxAllocateArray(arr) == vx_false_e) { return VX_ERROR_NO_MEMORY; } /* POSSIBILITIES: * 1.) !*ptr && RO == COPY-ON-READ (make ptr=alloc) * 2.) !*ptr && WO == MAP * 3.) !*ptr && RW == MAP * 4.) *ptr && RO||RW == COPY (UNLESS MAP) */ /* MAP mode */ if (*ptr == NULL) { if ((usage == VX_WRITE_ONLY) || (usage == VX_READ_AND_WRITE)) { /*-- MAP --*/ status = VX_ERROR_NO_RESOURCES; /* lock the memory */ if(vxSemWait(&arr->memory.locks[0]) == vx_true_e) { vx_size offset = start * arr->item_size; *ptr = &arr->memory.ptrs[0][offset]; if (usage != VX_WRITE_ONLY) { vxReadFromReference(&arr->base); } vxIncrementReference(&arr->base, VX_EXTERNAL); status = VX_SUCCESS; } } else { /*-- COPY-ON-READ --*/ vx_size size = ((end - start) * arr->item_size); vx_uint32 a = 0u; vx_size *stride_save = calloc(1, sizeof(vx_size)); *stride_save = arr->item_size; if (vxAddAccessor(arr->base.context, size, usage, *ptr, &arr->base, &a, stride_save) == vx_true_e) { vx_size offset; *ptr = arr->base.context->accessors[a].ptr; offset = start * arr->item_size; memcpy(*ptr, &arr->memory.ptrs[0][offset], size); vxReadFromReference(&arr->base); vxIncrementReference(&arr->base, VX_EXTERNAL); status = VX_SUCCESS; } else { status = VX_ERROR_NO_MEMORY; vxAddLogEntry((vx_reference)arr, status, "Failed to allocate memory for COPY-ON-READ! Size="VX_FMT_SIZE"\n", size); } } if ((status == VX_SUCCESS) && (pStride != NULL)) { *pStride = arr->item_size; } } /* COPY mode */ else { vx_size size = ((end - start) * arr->item_size); vx_uint32 a = 0u; vx_size *stride_save = calloc(1, sizeof(vx_size)); if (pStride == NULL) { *stride_save = arr->item_size; pStride = stride_save; } else { *stride_save = *pStride; } if (vxAddAccessor(arr->base.context, size, usage, *ptr, &arr->base, &a, stride_save) == vx_true_e) { *ptr = arr->base.context->accessors[a].ptr; status = VX_SUCCESS; if ((usage == VX_WRITE_ONLY) || (usage == VX_READ_AND_WRITE)) { if (vxSemWait(&arr->memory.locks[0]) == vx_false_e) { status = VX_ERROR_NO_RESOURCES; } } if (status == VX_SUCCESS) { if (usage != VX_WRITE_ONLY) { int i; vx_uint8 *pSrc, *pDest; for (i = start, pDest = *ptr, pSrc = &arr->memory.ptrs[0][start * arr->item_size]; i < end; i++, pDest += *pStride, pSrc += arr->item_size) { memcpy(pDest, pSrc, arr->item_size); } vxReadFromReference(&arr->base); } vxIncrementReference(&arr->base, VX_EXTERNAL); } } else { status = VX_ERROR_NO_MEMORY; vxAddLogEntry((vx_reference)arr, status, "Failed to allocate memory for COPY-ON-READ! Size="VX_FMT_SIZE"\n", size); } } return status; }