static void skc_raster_builder_cohort_grid_pfn_execute(skc_grid_t const grid) { // // ALLOCATED RESOURCES // // path_ids i // raster_ids i // transforms i // clips i // fill_cmds i // cq - // cohort atomics - // cmds - // keys - // meta - // // allocate the cohort struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); // get impl struct skc_raster_builder_impl * const impl = cohort->impl; struct skc_runtime * const runtime = impl->runtime; // acquire in-order cq cohort->cq = skc_runtime_acquire_cq_in_order(runtime); // alloc the snapshot -- could be zero-sized skc_extent_phw1g_tdrNs_snap_alloc(runtime, &impl->fill_cmds, &cohort->fill_cmds, cohort->cq,NULL); // flush the cq to get the fill running // cl(Flush(cohort->cq)); // create split atomics skc_extent_thr_tdrw_alloc(runtime,&cohort->atomics,sizeof(struct skc_raster_cohort_atomic)); // zero the atomics skc_extent_thr_tdrw_zero(&cohort->atomics,cohort->cq,NULL); // get config struct skc_config const * const config = runtime->config; // acquire device-side extents skc_extent_tdrw_alloc(runtime, &cohort->cmds, sizeof(union skc_cmd_rasterize) * config->raster_cohort.expand.cmds); // // FILLS EXPAND // // need result of cmd counts before launching RASTERIZE grids // // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device // - or launch a device-wide grid that feeds itself but that's unsatisfying // // how many commands? could be zero skc_uint const work_size = skc_extent_ring_snap_count(cohort->fill_cmds.snap); if (work_size > 0) { cl(SetKernelArg(impl->kernels.fills_expand,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw))); cl(SetKernelArg(impl->kernels.fills_expand,1,SKC_CL_ARG(cohort->atomics.drw))); cl(SetKernelArg(impl->kernels.fills_expand,2,SKC_CL_ARG(runtime->handle_pool.map.drw))); cl(SetKernelArg(impl->kernels.fills_expand,3,SKC_CL_ARG(cohort->fill_cmds.drN))); cl(SetKernelArg(impl->kernels.fills_expand,4,SKC_CL_ARG(cohort->cmds.drw))); skc_device_enqueue_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_FILLS_EXPAND, cohort->cq, impl->kernels.fills_expand, work_size, 0,NULL,NULL); } // // copyback number of rasterization commands // cl_event complete; skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete); cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_fills_expand_cb,grid)); cl(ReleaseEvent(complete)); // flush command queue cl(Flush(cohort->cq)); // // ALLOCATED RESOURCES // // path_ids i // raster_ids i // transforms i // clips i // fill_cmds s // cq a // cohort atomics a // cmds a // keys - // meta - // }
static void skc_raster_cohort_rasterize(skc_grid_t const grid) { // // ALLOCATED RESOURCES // // path_ids i // raster_ids i // transforms i // clips i // fill_cmds s // cq a // cohort atomics a // cmds a // cmds_quad a // cmds_cubic a // keys - // meta - // use the backpointers struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); struct skc_raster_builder_impl * const impl = cohort->impl; struct skc_runtime * const runtime = impl->runtime; // // RELEASED RESOURCES // // cmds snap // // release the cmds extent and snap since it's only used by the expand stage skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->fill_cmds); // // NEW ALLOCATED RESOURCES // // transforms snap // clips snap // ttrk keys // skc_extent_phw1g_tdrNs_snap_alloc(runtime, &impl->transforms, &cohort->transforms, cohort->cq,NULL); skc_extent_phw1g_tdrNs_snap_alloc(runtime, &impl->clips, &cohort->clips, cohort->cq,NULL); // acquire device-side extent skc_extent_tdrw_alloc(runtime, &cohort->keys, sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys); // skc_extent_thrw_tdrw_alloc(runtime, // &cohort->keys, // sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys); // // acquire out-of-order command queue // // and launch up to 3 kernels // // for each kernel: // // set runtime "global" kernel args: // // - block pool atomics // - block pool extent // // set cohort "local" kernel args: // // - atomics // - cmds // // enqueue barrier // enqueue copy back of atomics on the command queue // set callback on copy back event // release command queue // struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr; if (atomics->cmds > 0) { cl(SetKernelArg(impl->kernels.rasterize_all,0,SKC_CL_ARG(runtime->block_pool.atomics.drw))); cl(SetKernelArg(impl->kernels.rasterize_all,1,SKC_CL_ARG(runtime->block_pool.blocks.drw))); cl(SetKernelArg(impl->kernels.rasterize_all,2,SKC_CL_ARG(runtime->block_pool.ids.drw))); cl(SetKernelArg(impl->kernels.rasterize_all,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask))); cl(SetKernelArg(impl->kernels.rasterize_all,4,SKC_CL_ARG(cohort->atomics.drw))); cl(SetKernelArg(impl->kernels.rasterize_all,5,SKC_CL_ARG(cohort->keys.drw))); cl(SetKernelArg(impl->kernels.rasterize_all,6,SKC_CL_ARG(cohort->transforms.drN))); cl(SetKernelArg(impl->kernels.rasterize_all,7,SKC_CL_ARG(cohort->clips.drN))); cl(SetKernelArg(impl->kernels.rasterize_all,8,SKC_CL_ARG(cohort->cmds.drw))); cl(SetKernelArg(impl->kernels.rasterize_all,9,SKC_CL_ARG(atomics->cmds))); skc_device_enqueue_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL, cohort->cq, impl->kernels.rasterize_all, atomics->cmds, 0,NULL,NULL); } // // copyback number of TTSK keys // cl_event complete; skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete); cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_rasterize_cb,grid)); cl(ReleaseEvent(complete)); // flush command queue cl(Flush(cohort->cq)); // // ALLOCATED RESOURCES // // path_ids i // raster_ids i // transforms a // clips a // fill_cmds - // cq a // cohort atomics a // cmds a // keys a // meta - }
/* void setKernelArg (in long aIndex, in nsIVariant aValue, [optional] in long aType); */ NS_IMETHODIMP WebCLKernel::SetKernelArg(PRInt32 aIndex, nsIVariant *aValue, PRInt32 aType, JSContext *cx) { D_METHOD_START; NS_ENSURE_ARG_POINTER (aValue); NS_ENSURE_ARG_POINTER (cx); nsresult rv = NS_OK; if (aType == types::UNKNOWN) { PRUint16 variantType = 0; rv = aValue->GetDataType (&variantType); // If the type is unknown or user chose not to use explicit type, we'll try // to guess it based on the type of the variant. switch (variantType) { case nsIDataType::VTYPE_INT8: return SetKernelArg (aIndex, aValue, types::CHAR, cx); case nsIDataType::VTYPE_INT16: return SetKernelArg (aIndex, aValue, types::SHORT, cx); case nsIDataType::VTYPE_INT32: return SetKernelArg (aIndex, aValue, types::INT, cx); case nsIDataType::VTYPE_INT64: return SetKernelArg (aIndex, aValue, types::LONG, cx); case nsIDataType::VTYPE_UINT8: return SetKernelArg (aIndex, aValue, types::UCHAR, cx); case nsIDataType::VTYPE_UINT16: return SetKernelArg (aIndex, aValue, types::USHORT, cx); case nsIDataType::VTYPE_UINT32: return SetKernelArg (aIndex, aValue, types::UINT, cx); case nsIDataType::VTYPE_UINT64: return SetKernelArg (aIndex, aValue, types::ULONG, cx); case nsIDataType::VTYPE_FLOAT: return SetKernelArg (aIndex, aValue, types::FLOAT, cx); case nsIDataType::VTYPE_DOUBLE: return SetKernelArg (aIndex, aValue, types::DOUBLE, cx); case nsIDataType::VTYPE_BOOL: return SetKernelArg (aIndex, aValue, types::BOOL, cx); case nsIDataType::VTYPE_CHAR: case nsIDataType::VTYPE_WCHAR: return SetKernelArg (aIndex, aValue, types::CHAR, cx); case nsIDataType::VTYPE_CHAR_STR: case nsIDataType::VTYPE_WCHAR_STR: case nsIDataType::VTYPE_UTF8STRING: case nsIDataType::VTYPE_ASTRING: case nsIDataType::VTYPE_CSTRING: return SetKernelArg (aIndex, aValue, types::STRING, cx); case nsIDataType::VTYPE_INTERFACE: case nsIDataType::VTYPE_INTERFACE_IS: { // Try conversions to supported WebCL interfaces. nsCOMPtr<IWebCLMemoryObject> memObj; rv = aValue->GetAsISupports (getter_AddRefs(memObj)); if (NS_SUCCEEDED (rv)) return SetKernelArg (aIndex, aValue, types::MEMORY_OBJECT, cx); nsCOMPtr<IWebCLSampler> samplerObj; rv = aValue->GetAsISupports (getter_AddRefs(samplerObj)); if (NS_SUCCEEDED (rv)) return SetKernelArg (aIndex, aValue, types::SAMPLER, cx); // None found, intentional leak to default } case nsIDataType::VTYPE_ARRAY: case nsIDataType::VTYPE_EMPTY_ARRAY: D_LOG (LOG_LEVEL_ERROR, "Array support not implemented."); WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Array support not implemented."); return WEBCL_XPCOM_ERROR; //NS_ERROR_INVALID_ARG; case nsIDataType::VTYPE_EMPTY: case nsIDataType::VTYPE_VOID: case nsIDataType::VTYPE_ID: default: D_LOG (LOG_LEVEL_ERROR, "Unable to guess type from variant (type %u) and no type given by the user.", variantType); WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Unable to guess type from variant (type %u) and no type given by the user.", variantType); return WEBCL_XPCOM_ERROR; //NS_ERROR_INVALID_ARG; } } size_t sze = 0; void* value = 0; switch (aType) { case types::BYTE: case types::CHAR: value = (void*)malloc (sze = sizeof (cl_char)); if (value) rv = variantToCLChar (aValue, (cl_char*)value); break; case types::UCHAR: value = (void*)malloc (sze = sizeof (cl_uchar)); if (value) rv = variantToCLUChar (aValue, (cl_uchar*)value); break; case types::SHORT: value = (void*)malloc (sze = sizeof (cl_short)); if (value) rv = variantToCLShort (aValue, (cl_short*)value); break; case types::USHORT: value = (void*)malloc (sze = sizeof (cl_ushort)); if (value) rv = variantToCLUShort (aValue, (cl_ushort*)value); break; case types::BUILD_STATUS: case types::INT: value = (void*)malloc (sze = sizeof (cl_int)); if (value) rv = variantToCLInt (aValue, (cl_int*)value); break; case types::ADRESSING_MODE: case types::CHANNEL_ORDER: case types::CHANNEL_TYPE: case types::COMMAND_TYPE: case types::DEVICE_LOCAL_MEM_TYPE: case types::DEVICE_MEM_CACHE_TYPE: case types::FILTER_MODE: case types::GL_OBJECT_TYPE: case types::MEM_OBJECT_TYPE: case types::UINT: value = (void*)malloc (sze = sizeof (cl_uint)); if (value) rv = variantToCLUInt (aValue, (cl_uint*)value); break; case types::LONG: value = (void*)malloc (sze = sizeof (cl_long)); if (value) rv = variantToCLLong (aValue, (cl_long*)value); break; case types::COMMAND_QUEUE_PROPERTIES: // bitfield case types::DEVICE_EXEC_CAPABILITIES: // bitfield case types::DEVICE_FP_CONFIG: // bitfield case types::DEVICE_TYPE: // bitfield case types::MAP_FLAGS: // bitfield case types::MEM_FLAGS: // bitfield case types::ULONG: value = (void*)malloc (sze = sizeof (cl_ulong)); if (value) rv = variantToCLULong (aValue, (cl_ulong*)value); break; case types::BOOL: value = (void*)malloc (sze = sizeof (cl_bool)); if (value) rv = variantToCLBool (aValue, (cl_bool*)value); break; case types::SIZE_T: value = (void*)malloc (sze = sizeof (size_t)); if (value) rv = variantToCLSizeT (aValue, (size_t*)value); break; case types::HALF: value = (void*)malloc (sze = sizeof (cl_half)); if (value) rv = variantToCLHalf (aValue, (cl_half*)value); break; case types::FLOAT: value = (void*)malloc (sze = sizeof (cl_float)); if (value) rv = variantToCLFloat (aValue, (cl_float*)value); break; case types::DOUBLE: value = (void*)malloc (sze = sizeof (cl_double)); if (value) rv = variantToCLDouble (aValue, (cl_double*)value); break; case types::STRING: { nsCString str; rv = aValue->GetAsACString (str); if (NS_SUCCEEDED (rv)) { cl_int err = mWrapper->setKernelArg (mInternal, aIndex, str.Length () + 1, (void*)str.get ()); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (err); } break; } case types::BYTE_V: case types::CHAR_V: case types::UCHAR_V: case types::SHORT_V: case types::USHORT_V: case types::CONTEXT_PROPERTIES: case types::INT_V: case types::UINT_V: case types::LONG_V: case types::ULONG_V: case types::BOOL_V: case types::SIZE_T_V: case types::HALF_V: case types::FLOAT_V: case types::DOUBLE_V: case types::STRING_V: D_LOG (LOG_LEVEL_ERROR, "Array types are not supported."); WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Array types are not supported."); return WEBCL_XPCOM_ERROR; //NS_ERROR_NOT_IMPLEMENTED; case types::MEMORY_OBJECT: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLMemoryObject> memObject = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_mem wrapped = memObject->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_mem), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::SAMPLER: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLSampler> sampler = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_sampler wrapped = sampler->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_sampler), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::PLATFORM: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLPlatform> platform = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_platform_id wrapped = platform->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_platform_id), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::DEVICE: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLDevice> device = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_device_id wrapped = device->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_device_id), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::CONTEXT: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLContext> context = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_context wrapped = context->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_context), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::COMMAND_QUEUE: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLCommandQueue> cmdQueue = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_command_queue wrapped = cmdQueue->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_command_queue), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::PROGRAM: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLProgram> program = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_program wrapped = program->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_program), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::KERNEL: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLKernel> kernel = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_kernel wrapped = kernel->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_kernel), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } case types::EVENT: { nsCOMPtr<nsISupports> isu; rv = aValue->GetAsISupports (getter_AddRefs(isu)); if (NS_FAILED (rv)) break; nsCOMPtr<WebCLEvent> event = do_QueryInterface (isu, &rv); if (NS_FAILED (rv)) break; cl_event wrapped = event->getInternal (); cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_event), (void*)&wrapped); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (wrErr); return NS_OK; } default: D_LOG (LOG_LEVEL_ERROR, "Unsupported type %d at argument index %u", aType, aIndex); WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Unsupported type %d at argument index %u.", aType, aIndex); //rv = NS_ERROR_INVALID_ARG; return WEBCL_XPCOM_ERROR; } if (NS_SUCCEEDED (rv)) { if (value) { cl_int err = mWrapper->setKernelArg (mInternal, aIndex, sze, value); ENSURE_LIB_WRAPPER_SUCCESS (mWrapper); ENSURE_CL_OP_SUCCESS (err); } else { D_LOG (LOG_LEVEL_ERROR, "Memory allocation failed for kernel argument at index %d.", aIndex); WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Memory allocation failed for kernel argument at index %d.", aIndex); rv = WEBCL_XPCOM_ERROR; //NS_ERROR_OUT_OF_MEMORY; } } else { D_LOG (LOG_LEVEL_ERROR, "Failed to convert kernel argument at index %d.", aIndex); WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Failed to convert kernel argument at index %d.", aIndex); rv = WEBCL_XPCOM_ERROR; } if (value) free (value); return rv; }
static void skc_raster_cohort_sort_prefix(skc_grid_t const grid) { // // ALLOCATED RESOURCES // // path_ids i // raster_ids i // transforms a // clips a // fill_cmds - // cq a // cohort atomics a // cmds a // keys a // meta - // // use the backpointers struct skc_raster_cohort * const cohort = skc_grid_get_data(grid); struct skc_raster_builder_impl * const impl = cohort->impl; struct skc_runtime * const runtime = impl->runtime; // release transforms skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->transforms); // release clips skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->clips); // release expanded cmds skc_extent_tdrw_free(runtime,&cohort->cmds); // alloc the snapshost -- could be zero-sized skc_extent_phrwg_tdrNs_snap_alloc(runtime, &impl->raster_ids, &cohort->raster_ids, cohort->cq,NULL); // will never be zero skc_uint const rasters = skc_extent_ring_snap_count(cohort->raster_ids.snap); // acquire fixed-size device-side extent skc_extent_tdrw_alloc(runtime, &cohort->metas, sizeof(struct skc_raster_cohort_meta)); // skc_extent_thrw_tdrw_alloc(runtime, // &cohort->metas, // sizeof(struct skc_raster_cohort_meta)); // zero the metas skc_extent_tdrw_zero(&cohort->metas,cohort->cq,NULL); // get the read-only host copy of the device atomics struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr; // // SORT // if (atomics->keys > 0) { #ifndef NDEBUG fprintf(stderr,"raster cohort sort: %u\n",atomics->keys); #endif // // // uint32_t keys_padded_in, keys_padded_out; hs_cl_pad(runtime->hs,atomics->keys,&keys_padded_in,&keys_padded_out); hs_cl_sort(runtime->hs, cohort->cq, 0,NULL,NULL, cohort->keys.drw, NULL, atomics->keys, keys_padded_in, keys_padded_out, false); cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(cohort->keys.drw))); cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(cohort->metas.drw))); #ifndef NDEBUG fprintf(stderr,"post-sort\n"); #endif // find start of each tile skc_device_enqueue_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK, cohort->cq, impl->kernels.segment, atomics->keys, 0,NULL,NULL); #ifndef NDEBUG fprintf(stderr,"post-segment\n"); #endif // // DELETE ALL THIS WHEN READY // #if 0 // // // cl(Finish(cohort->cq)); // map keys to host union skc_ttrk * const keys = skc_extent_thrw_tdrw_map(&cohort->keys, cohort->cq, NULL); // map meta to host struct skc_raster_cohort_meta * const metas = skc_extent_thrw_tdrw_map(&cohort->metas, cohort->cq, NULL); // block until done cl(Finish(cohort->cq)); // sort keys qsort(keys,atomics->keys,sizeof(*keys),cmp64); // mask to determine if rk id is a new block skc_uint const subblock_mask = runtime->config->block.subblocks - 1; // // some counters // union skc_raster_cohort_meta_in meta_in = { .blocks = 0, .offset = 0, .pk = 0, .rk = 0 }; // get first key union skc_ttrk curr = keys[0]; skc_uint ii=0, jj=0; // for all TTRK keys while (true) { // increment ttrk count meta_in.rk += 1; // was this a new block? if ((curr.u32v2.lo & subblock_mask) == 0) meta_in.blocks += 1; // break if we're out of keys if (++ii >= atomics->keys) break; // otherwise, process next key union skc_ttrk const next = keys[ii]; // if new cohort then save curr meta and init next meta if (next.cohort != curr.cohort) { fprintf(stderr,"[ %u, %u, %u, %u ]\n", meta_in.blocks, meta_in.offset, meta_in.pk, meta_in.rk); // store back to buffer metas->inout[curr.cohort].in = meta_in; // update meta_in meta_in.blocks = 0; meta_in.offset = ii; meta_in.pk = 0; meta_in.rk = 0; } // otherwise, if same y but new x then increment TTPK count else if ((next.y == curr.y) && (next.x != curr.x)) { meta_in.pk += 1; #if 0 fprintf(stderr,"%3u : %3u : ( %3u, %3u ) -> ( %3u )\n", jj++,curr.cohort,curr.y,curr.x,next.x); #endif } #if 0 fprintf(stderr,"( %3u, %3u )\n",next.y,next.x); #endif curr = next; } fprintf(stderr,"[ %u, %u, %u, %u ]\n", meta_in.blocks, meta_in.offset, meta_in.pk, meta_in.rk); // store back to buffer metas->inout[curr.cohort].in = meta_in; // unmap skc_extent_thrw_tdrw_unmap(&cohort->keys, keys, cohort->cq, NULL); // unmap skc_extent_thrw_tdrw_unmap(&cohort->metas, metas, cohort->cq, NULL); #endif } #ifndef NDEBUG fprintf(stderr,"rasters_alloc: %u\n",rasters); #endif // // RASTER ALLOC/INIT // cl(SetKernelArg(impl->kernels.rasters_alloc,0,SKC_CL_ARG(runtime->block_pool.atomics.drw))); cl(SetKernelArg(impl->kernels.rasters_alloc,1,SKC_CL_ARG(runtime->block_pool.ids.drw))); cl(SetKernelArg(impl->kernels.rasters_alloc,2,SKC_CL_ARG(runtime->block_pool.size->ring_mask))); cl(SetKernelArg(impl->kernels.rasters_alloc,3,SKC_CL_ARG(runtime->handle_pool.map.drw))); cl(SetKernelArg(impl->kernels.rasters_alloc,4,SKC_CL_ARG(cohort->metas.drw))); cl(SetKernelArg(impl->kernels.rasters_alloc,5,SKC_CL_ARG(cohort->raster_ids.drN))); cl(SetKernelArg(impl->kernels.rasters_alloc,6,SKC_CL_ARG(rasters))); skc_device_enqueue_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC, cohort->cq, impl->kernels.rasters_alloc, rasters, 0,NULL,NULL); #ifndef NDEBUG fprintf(stderr,"post-alloc\n"); #endif // // PREFIX // cl(SetKernelArg(impl->kernels.prefix,0,SKC_CL_ARG(runtime->block_pool.atomics.drw))); cl(SetKernelArg(impl->kernels.prefix,1,SKC_CL_ARG(runtime->block_pool.ids.drw))); cl(SetKernelArg(impl->kernels.prefix,2,SKC_CL_ARG(runtime->block_pool.blocks.drw))); cl(SetKernelArg(impl->kernels.prefix,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask))); cl(SetKernelArg(impl->kernels.prefix,4,SKC_CL_ARG(cohort->keys.drw))); cl(SetKernelArg(impl->kernels.prefix,5,SKC_CL_ARG(runtime->handle_pool.map.drw))); cl(SetKernelArg(impl->kernels.prefix,6,SKC_CL_ARG(cohort->metas.drw))); cl(SetKernelArg(impl->kernels.prefix,7,SKC_CL_ARG(rasters))); cl_event complete; skc_device_enqueue_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_PREFIX, cohort->cq, impl->kernels.prefix, rasters, 0,NULL, &complete); cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_prefix_cb,grid)); cl(ReleaseEvent(complete)); #ifndef NDEBUG fprintf(stderr,"post-prefix\n"); #endif // flush command queue cl(Flush(cohort->cq)); // // ALLOCATED RESOURCES // // path_ids a // raster_ids a // transforms - // clips - // fill_cmds - // cq a // cohort atomics a // cmds - // keys a // meta a // }
static void skc_surface_grid_pfn_execute(skc_grid_t const grid) { struct skc_surface_render * const render = skc_grid_get_data(grid); struct skc_surface_impl * const impl = render->impl; struct skc_runtime * const runtime = impl->runtime; // get the composition args struct skc_composition_impl * const ci = render->composition->impl; struct skc_place_atomics * const atomics = ci->atomics.hr; if (atomics->offsets > 0) { // acquire the rbo/tex if (render->fb->type != SKC_FRAMEBUFFER_CL_IMAGE2D) cl(EnqueueAcquireGLObjects(impl->cq,1,&render->fb->mem,0,NULL,NULL)); // get the styling args struct skc_styling_impl * const si = render->styling->impl; cl(SetKernelArg(impl->kernels.render,0,SKC_CL_ARG(si->layers.drN))); cl(SetKernelArg(impl->kernels.render,1,SKC_CL_ARG(si->groups.drN))); cl(SetKernelArg(impl->kernels.render,2,SKC_CL_ARG(si->extras.drN))); cl(SetKernelArg(impl->kernels.render,3,SKC_CL_ARG(ci->keys.drw))); cl(SetKernelArg(impl->kernels.render,4,SKC_CL_ARG(atomics->keys))); cl(SetKernelArg(impl->kernels.render,5,SKC_CL_ARG(ci->offsets.drw))); cl(SetKernelArg(impl->kernels.render,6,SKC_CL_ARG(atomics->offsets))); // block pool cl(SetKernelArg(impl->kernels.render,7,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw))); // surface cl(SetKernelArg(impl->kernels.render,8,SKC_CL_ARG(render->fb->mem))); #if 1 // tile clip cl(SetKernelArg(impl->kernels.render,9,sizeof(skc_uint4),render->clip)); #else // surface pitch (height) skc_uint const surface_pitch = SKC_SURFACE_HEIGHT; cl(SetKernelArg(impl->kernels.render,9,SKC_CL_ARG(surface_pitch))); // tile clip cl(SetKernelArg(impl->kernels.render,10,sizeof(skc_uint4),render->clip)); #endif // launch render kernel skc_device_enqueue_kernel(runtime->device, SKC_DEVICE_KERNEL_ID_RENDER, impl->cq, impl->kernels.render, atomics->offsets, 0,NULL,NULL); cl_event complete; // give the rbo back if (render->fb->type != SKC_FRAMEBUFFER_CL_IMAGE2D) { cl(EnqueueReleaseGLObjects(impl->cq,1,&render->fb->mem,0,NULL,&complete)); // // blit the rbo to fbo0 // render->fb->post_render(render->fb->interop); // // clear the rbo -- FIXME -- we shouldn't have to do this here // float const rgba[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; uint32_t rect[4] = { 0 }; skc_interop_get_size(render->fb->interop,rect+2,rect+3); skc_surface_debug_clear(impl,render->fb,rgba,rect); } // notify anyone listening... cl(SetEventCallback(complete,CL_COMPLETE,skc_surface_render_cb,render)); cl(ReleaseEvent(complete)); // flush it cl(Flush(impl->cq)); } else { skc_surface_render_complete(render); } }