static
void
skc_raster_builder_cohort_grid_pfn_execute(skc_grid_t const grid)
{
  //
  // ALLOCATED RESOURCES
  //
  // path_ids          i
  // raster_ids        i
  // transforms        i
  // clips             i
  // fill_cmds         i
  // cq                -
  // cohort atomics    -
  // cmds              -
  // keys              -
  // meta              -
  //

  // allocate the cohort
  struct skc_raster_cohort       * const cohort  = skc_grid_get_data(grid);

  // get impl
  struct skc_raster_builder_impl * const impl    = cohort->impl;
  struct skc_runtime             * const runtime = impl->runtime;

  // acquire in-order cq
  cohort->cq = skc_runtime_acquire_cq_in_order(runtime);

  // alloc the snapshot -- could be zero-sized
  skc_extent_phw1g_tdrNs_snap_alloc(runtime,
                                    &impl->fill_cmds,
                                    &cohort->fill_cmds,
                                    cohort->cq,NULL);

  // flush the cq to get the fill running
  // cl(Flush(cohort->cq));

  // create split atomics
  skc_extent_thr_tdrw_alloc(runtime,&cohort->atomics,sizeof(struct skc_raster_cohort_atomic));

  // zero the atomics
  skc_extent_thr_tdrw_zero(&cohort->atomics,cohort->cq,NULL);

  // get config
  struct skc_config const * const config = runtime->config;

  // acquire device-side extents
  skc_extent_tdrw_alloc(runtime,
                        &cohort->cmds,
                        sizeof(union skc_cmd_rasterize) * config->raster_cohort.expand.cmds);

  //
  // FILLS EXPAND
  //
  // need result of cmd counts before launching RASTERIZE grids
  //
  // - OpenCL 1.2: copy atomic counters back to host and launch RASTERIZE grids from host
  // - OpenCL 2.x: have a kernel size and launch RASTERIZE grids from device
  // - or launch a device-wide grid that feeds itself but that's unsatisfying
  //

  // how many commands?  could be zero
  skc_uint const work_size = skc_extent_ring_snap_count(cohort->fill_cmds.snap);

  if (work_size > 0)
    {
      cl(SetKernelArg(impl->kernels.fills_expand,0,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw)));
      cl(SetKernelArg(impl->kernels.fills_expand,1,SKC_CL_ARG(cohort->atomics.drw)));
      cl(SetKernelArg(impl->kernels.fills_expand,2,SKC_CL_ARG(runtime->handle_pool.map.drw)));
      cl(SetKernelArg(impl->kernels.fills_expand,3,SKC_CL_ARG(cohort->fill_cmds.drN)));
      cl(SetKernelArg(impl->kernels.fills_expand,4,SKC_CL_ARG(cohort->cmds.drw)));

      skc_device_enqueue_kernel(runtime->device,
                                SKC_DEVICE_KERNEL_ID_FILLS_EXPAND,
                                cohort->cq,
                                impl->kernels.fills_expand,
                                work_size,
                                0,NULL,NULL);
    }

  //
  // copyback number of rasterization commands
  //
  cl_event complete;

  skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete);

  cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_fills_expand_cb,grid));
  cl(ReleaseEvent(complete));

  // flush command queue
  cl(Flush(cohort->cq));

  //
  // ALLOCATED RESOURCES
  //
  // path_ids          i
  // raster_ids        i
  // transforms        i
  // clips             i
  // fill_cmds         s
  // cq                a
  // cohort atomics    a
  // cmds              a
  // keys              -
  // meta              -
  //
}
static
void
skc_raster_cohort_rasterize(skc_grid_t const grid)
{
  //
  // ALLOCATED RESOURCES
  //
  // path_ids          i
  // raster_ids        i
  // transforms        i
  // clips             i
  // fill_cmds         s
  // cq                a
  // cohort atomics    a
  // cmds              a
  // cmds_quad         a
  // cmds_cubic        a
  // keys              -
  // meta              -

  // use the backpointers
  struct skc_raster_cohort       * const cohort  = skc_grid_get_data(grid);
  struct skc_raster_builder_impl * const impl    = cohort->impl;
  struct skc_runtime             * const runtime = impl->runtime;

  //
  // RELEASED RESOURCES
  //
  // cmds       snap
  //

  // release the cmds extent and snap since it's only used by the expand stage
  skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->fill_cmds);

  //
  // NEW ALLOCATED RESOURCES
  //
  // transforms snap
  // clips snap
  // ttrk keys
  //
  skc_extent_phw1g_tdrNs_snap_alloc(runtime,
                                    &impl->transforms,
                                    &cohort->transforms,
                                    cohort->cq,NULL);

  skc_extent_phw1g_tdrNs_snap_alloc(runtime,
                                    &impl->clips,
                                    &cohort->clips,
                                    cohort->cq,NULL);

  // acquire device-side extent
  skc_extent_tdrw_alloc(runtime,
                        &cohort->keys,
                        sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys);

  // skc_extent_thrw_tdrw_alloc(runtime,
  //                            &cohort->keys,
  //                            sizeof(union skc_ttrk) * runtime->config->raster_cohort.rasterize.keys);

  //
  // acquire out-of-order command queue
  //
  // and launch up to 3 kernels
  //
  // for each kernel:
  //
  //   set runtime "global" kernel args:
  //
  //   - block pool atomics
  //   - block pool extent
  //
  //   set cohort "local" kernel args:
  //
  //   - atomics
  //   - cmds
  //
  // enqueue barrier
  // enqueue copy back of atomics on the command queue
  // set callback on copy back event
  // release command queue
  //
  struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr;

  if (atomics->cmds > 0)
    {
      cl(SetKernelArg(impl->kernels.rasterize_all,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
      cl(SetKernelArg(impl->kernels.rasterize_all,1,SKC_CL_ARG(runtime->block_pool.blocks.drw)));
      cl(SetKernelArg(impl->kernels.rasterize_all,2,SKC_CL_ARG(runtime->block_pool.ids.drw)));
      cl(SetKernelArg(impl->kernels.rasterize_all,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));

      cl(SetKernelArg(impl->kernels.rasterize_all,4,SKC_CL_ARG(cohort->atomics.drw)));
      cl(SetKernelArg(impl->kernels.rasterize_all,5,SKC_CL_ARG(cohort->keys.drw)));

      cl(SetKernelArg(impl->kernels.rasterize_all,6,SKC_CL_ARG(cohort->transforms.drN)));
      cl(SetKernelArg(impl->kernels.rasterize_all,7,SKC_CL_ARG(cohort->clips.drN)));
      cl(SetKernelArg(impl->kernels.rasterize_all,8,SKC_CL_ARG(cohort->cmds.drw)));
      cl(SetKernelArg(impl->kernels.rasterize_all,9,SKC_CL_ARG(atomics->cmds)));

      skc_device_enqueue_kernel(runtime->device,
                                SKC_DEVICE_KERNEL_ID_RASTERIZE_ALL,
                                cohort->cq,
                                impl->kernels.rasterize_all,
                                atomics->cmds,
                                0,NULL,NULL);
    }

  //
  // copyback number of TTSK keys
  //
  cl_event complete;

  skc_extent_thr_tdrw_read(&cohort->atomics,cohort->cq,&complete);

  cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_rasterize_cb,grid));
  cl(ReleaseEvent(complete));

  // flush command queue
  cl(Flush(cohort->cq));

  //
  // ALLOCATED RESOURCES
  //
  // path_ids          i
  // raster_ids        i
  // transforms        a
  // clips             a
  // fill_cmds         -
  // cq                a
  // cohort atomics    a
  // cmds              a
  // keys              a
  // meta              -
}
示例#3
0
/* void setKernelArg (in long aIndex, in nsIVariant aValue, [optional] in long aType); */
NS_IMETHODIMP WebCLKernel::SetKernelArg(PRInt32 aIndex, nsIVariant *aValue, PRInt32 aType, JSContext *cx)
{
  D_METHOD_START;
  NS_ENSURE_ARG_POINTER (aValue);
  NS_ENSURE_ARG_POINTER (cx);
  nsresult rv = NS_OK;

  if (aType == types::UNKNOWN)
  {
    PRUint16 variantType = 0;
    rv = aValue->GetDataType (&variantType);
    // If the type is unknown or user chose not to use explicit type, we'll try
    // to guess it based on the type of the variant.
    switch (variantType)
    {
      case nsIDataType::VTYPE_INT8: return SetKernelArg (aIndex, aValue, types::CHAR, cx);
      case nsIDataType::VTYPE_INT16: return SetKernelArg (aIndex, aValue, types::SHORT, cx);
      case nsIDataType::VTYPE_INT32: return SetKernelArg (aIndex, aValue, types::INT, cx);
      case nsIDataType::VTYPE_INT64: return SetKernelArg (aIndex, aValue, types::LONG, cx);
      case nsIDataType::VTYPE_UINT8: return SetKernelArg (aIndex, aValue, types::UCHAR, cx);
      case nsIDataType::VTYPE_UINT16: return SetKernelArg (aIndex, aValue, types::USHORT, cx);
      case nsIDataType::VTYPE_UINT32: return SetKernelArg (aIndex, aValue, types::UINT, cx);
      case nsIDataType::VTYPE_UINT64: return SetKernelArg (aIndex, aValue, types::ULONG, cx);
      case nsIDataType::VTYPE_FLOAT: return SetKernelArg (aIndex, aValue, types::FLOAT, cx);
      case nsIDataType::VTYPE_DOUBLE: return SetKernelArg (aIndex, aValue, types::DOUBLE, cx);
      case nsIDataType::VTYPE_BOOL: return SetKernelArg (aIndex, aValue, types::BOOL, cx);

      case nsIDataType::VTYPE_CHAR:
      case nsIDataType::VTYPE_WCHAR: return SetKernelArg (aIndex, aValue, types::CHAR, cx);

      case nsIDataType::VTYPE_CHAR_STR:
      case nsIDataType::VTYPE_WCHAR_STR:
      case nsIDataType::VTYPE_UTF8STRING:
      case nsIDataType::VTYPE_ASTRING:
      case nsIDataType::VTYPE_CSTRING: return SetKernelArg (aIndex, aValue, types::STRING, cx);

      case nsIDataType::VTYPE_INTERFACE:
      case nsIDataType::VTYPE_INTERFACE_IS:
      {
        // Try conversions to supported WebCL interfaces.
        nsCOMPtr<IWebCLMemoryObject> memObj;
        rv = aValue->GetAsISupports (getter_AddRefs(memObj));
        if (NS_SUCCEEDED (rv)) return SetKernelArg (aIndex, aValue, types::MEMORY_OBJECT, cx);

        nsCOMPtr<IWebCLSampler> samplerObj;
        rv = aValue->GetAsISupports (getter_AddRefs(samplerObj));
        if (NS_SUCCEEDED (rv)) return SetKernelArg (aIndex, aValue, types::SAMPLER, cx);

        // None found, intentional leak to default
      }

      case nsIDataType::VTYPE_ARRAY:
      case nsIDataType::VTYPE_EMPTY_ARRAY:
        D_LOG (LOG_LEVEL_ERROR, "Array support not implemented.");
        WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Array support not implemented.");
        return WEBCL_XPCOM_ERROR; //NS_ERROR_INVALID_ARG;

      case nsIDataType::VTYPE_EMPTY:
      case nsIDataType::VTYPE_VOID:
      case nsIDataType::VTYPE_ID:
      default:
        D_LOG (LOG_LEVEL_ERROR,
               "Unable to guess type from variant (type %u) and no type given by the user.",
               variantType);
        WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Unable to guess type from variant (type %u) and no type given by the user.",
                             variantType);
        return WEBCL_XPCOM_ERROR; //NS_ERROR_INVALID_ARG;
    }
  }

  size_t sze = 0;
  void* value = 0;

  switch (aType)
  {
    case types::BYTE:
    case types::CHAR:
      value = (void*)malloc (sze = sizeof (cl_char));
      if (value)
        rv = variantToCLChar (aValue, (cl_char*)value);
      break;

    case types::UCHAR:
      value = (void*)malloc (sze = sizeof (cl_uchar));
      if (value)
        rv = variantToCLUChar (aValue, (cl_uchar*)value);
      break;

    case types::SHORT:
      value = (void*)malloc (sze = sizeof (cl_short));
      if (value)
        rv = variantToCLShort (aValue, (cl_short*)value);
      break;

    case types::USHORT:
      value = (void*)malloc (sze = sizeof (cl_ushort));
      if (value)
        rv = variantToCLUShort (aValue, (cl_ushort*)value);
      break;

    case types::BUILD_STATUS:
    case types::INT:
      value = (void*)malloc (sze = sizeof (cl_int));
      if (value)
        rv = variantToCLInt (aValue, (cl_int*)value);
      break;

    case types::ADRESSING_MODE:
    case types::CHANNEL_ORDER:
    case types::CHANNEL_TYPE:
    case types::COMMAND_TYPE:
    case types::DEVICE_LOCAL_MEM_TYPE:
    case types::DEVICE_MEM_CACHE_TYPE:
    case types::FILTER_MODE:
    case types::GL_OBJECT_TYPE:
    case types::MEM_OBJECT_TYPE:
    case types::UINT:
      value = (void*)malloc (sze = sizeof (cl_uint));
      if (value)
        rv = variantToCLUInt (aValue, (cl_uint*)value);
      break;

    case types::LONG:
      value = (void*)malloc (sze = sizeof (cl_long));
      if (value)
        rv = variantToCLLong (aValue, (cl_long*)value);
      break;

    case types::COMMAND_QUEUE_PROPERTIES: // bitfield
    case types::DEVICE_EXEC_CAPABILITIES: // bitfield
    case types::DEVICE_FP_CONFIG: // bitfield
    case types::DEVICE_TYPE: // bitfield
    case types::MAP_FLAGS: // bitfield
    case types::MEM_FLAGS: // bitfield
    case types::ULONG:
      value = (void*)malloc (sze = sizeof (cl_ulong));
      if (value)
        rv = variantToCLULong (aValue, (cl_ulong*)value);
      break;

    case types::BOOL:
      value = (void*)malloc (sze = sizeof (cl_bool));
      if (value)
        rv = variantToCLBool (aValue, (cl_bool*)value);
      break;

    case types::SIZE_T:
      value = (void*)malloc (sze = sizeof (size_t));
      if (value)
        rv = variantToCLSizeT (aValue, (size_t*)value);
      break;

    case types::HALF:
      value = (void*)malloc (sze = sizeof (cl_half));
      if (value)
        rv = variantToCLHalf (aValue, (cl_half*)value);
      break;

    case types::FLOAT:
      value = (void*)malloc (sze = sizeof (cl_float));
      if (value)
        rv = variantToCLFloat (aValue, (cl_float*)value);
      break;

    case types::DOUBLE:
      value = (void*)malloc (sze = sizeof (cl_double));
      if (value)
        rv = variantToCLDouble (aValue, (cl_double*)value);
      break;

    case types::STRING:
    {
      nsCString str;
      rv = aValue->GetAsACString (str);
      if (NS_SUCCEEDED (rv))
      {
        cl_int err = mWrapper->setKernelArg (mInternal, aIndex, str.Length () + 1,
                                             (void*)str.get ());
        ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
        ENSURE_CL_OP_SUCCESS (err);
      }
      break;
    }

    case types::BYTE_V:
    case types::CHAR_V:
    case types::UCHAR_V:
    case types::SHORT_V:
    case types::USHORT_V:
    case types::CONTEXT_PROPERTIES:
    case types::INT_V:
    case types::UINT_V:
    case types::LONG_V:
    case types::ULONG_V:
    case types::BOOL_V:
    case types::SIZE_T_V:
    case types::HALF_V:
    case types::FLOAT_V:
    case types::DOUBLE_V:
    case types::STRING_V:
      D_LOG (LOG_LEVEL_ERROR, "Array types are not supported.");
      WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Array types are not supported.");
      return WEBCL_XPCOM_ERROR; //NS_ERROR_NOT_IMPLEMENTED;

    case types::MEMORY_OBJECT:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLMemoryObject> memObject = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_mem wrapped = memObject->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_mem),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::SAMPLER:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLSampler> sampler = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_sampler wrapped = sampler->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_sampler),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::PLATFORM:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLPlatform> platform = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_platform_id wrapped = platform->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_platform_id),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::DEVICE:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLDevice> device = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_device_id wrapped = device->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_device_id),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::CONTEXT:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLContext> context = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_context wrapped = context->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_context),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::COMMAND_QUEUE:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLCommandQueue> cmdQueue = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_command_queue wrapped = cmdQueue->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_command_queue),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::PROGRAM:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLProgram> program = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_program wrapped = program->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_program),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::KERNEL:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLKernel> kernel = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_kernel wrapped = kernel->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_kernel),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    case types::EVENT:
    {
      nsCOMPtr<nsISupports> isu;
      rv = aValue->GetAsISupports (getter_AddRefs(isu));
      if (NS_FAILED (rv)) break;
      nsCOMPtr<WebCLEvent> event = do_QueryInterface (isu, &rv);
      if (NS_FAILED (rv)) break;
      cl_event wrapped = event->getInternal ();
      cl_int wrErr = mWrapper->setKernelArg (mInternal, aIndex, sizeof(cl_event),
                                             (void*)&wrapped);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (wrErr);
      return NS_OK;
    }

    default:
      D_LOG (LOG_LEVEL_ERROR, "Unsupported type %d at argument index %u", aType, aIndex);
      WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Unsupported type %d at argument index %u.", aType, aIndex);
      //rv = NS_ERROR_INVALID_ARG;
      return WEBCL_XPCOM_ERROR;
  }

  if (NS_SUCCEEDED (rv))
  {
    if (value)
    {
      cl_int err = mWrapper->setKernelArg (mInternal, aIndex, sze, value);
      ENSURE_LIB_WRAPPER_SUCCESS (mWrapper);
      ENSURE_CL_OP_SUCCESS (err);

    }
    else
    {
      D_LOG (LOG_LEVEL_ERROR, "Memory allocation failed for kernel argument at index %d.", aIndex);
      WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Memory allocation failed for kernel argument at index %d.", aIndex);
      rv = WEBCL_XPCOM_ERROR; //NS_ERROR_OUT_OF_MEMORY;
    }
  }
  else
  {
    D_LOG (LOG_LEVEL_ERROR, "Failed to convert kernel argument at index %d.", aIndex);
    WebCL_reportJSError (cx, "WebCLKernel::setKernelArg: Failed to convert kernel argument at index %d.", aIndex);
    rv = WEBCL_XPCOM_ERROR;
  }

  if (value)
    free (value);

  return rv;
}
static
void
skc_raster_cohort_sort_prefix(skc_grid_t const grid)
{
  //
  // ALLOCATED RESOURCES
  //
  // path_ids          i
  // raster_ids        i
  // transforms        a
  // clips             a
  // fill_cmds         -
  // cq                a
  // cohort atomics    a
  // cmds              a
  // keys              a
  // meta              -
  //

  // use the backpointers
  struct skc_raster_cohort       * const cohort  = skc_grid_get_data(grid);
  struct skc_raster_builder_impl * const impl    = cohort->impl;
  struct skc_runtime             * const runtime = impl->runtime;

  // release transforms
  skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->transforms);

  // release clips
  skc_extent_phw1g_tdrNs_snap_free(runtime,&cohort->clips);

  // release expanded cmds
  skc_extent_tdrw_free(runtime,&cohort->cmds);

  // alloc the snapshost -- could be zero-sized
  skc_extent_phrwg_tdrNs_snap_alloc(runtime,
                                    &impl->raster_ids,
                                    &cohort->raster_ids,
                                    cohort->cq,NULL);

  // will never be zero
  skc_uint const rasters = skc_extent_ring_snap_count(cohort->raster_ids.snap);

  // acquire fixed-size device-side extent
  skc_extent_tdrw_alloc(runtime,
                        &cohort->metas,
                        sizeof(struct skc_raster_cohort_meta));

  // skc_extent_thrw_tdrw_alloc(runtime,
  //                            &cohort->metas,
  //                            sizeof(struct skc_raster_cohort_meta));

  // zero the metas
  skc_extent_tdrw_zero(&cohort->metas,cohort->cq,NULL);

  // get the read-only host copy of the device atomics
  struct skc_raster_cohort_atomic const * const atomics = cohort->atomics.hr;

  //
  // SORT
  //
  if (atomics->keys > 0)
    {
#ifndef NDEBUG
      fprintf(stderr,"raster cohort sort: %u\n",atomics->keys);
#endif

      //
      //
      //
      uint32_t keys_padded_in, keys_padded_out;

      hs_cl_pad(runtime->hs,atomics->keys,&keys_padded_in,&keys_padded_out);

      hs_cl_sort(runtime->hs,
                 cohort->cq,
                 0,NULL,NULL,
                 cohort->keys.drw,
                 NULL,
                 atomics->keys,
                 keys_padded_in,
                 keys_padded_out,
                 false);

      cl(SetKernelArg(impl->kernels.segment,0,SKC_CL_ARG(cohort->keys.drw)));
      cl(SetKernelArg(impl->kernels.segment,1,SKC_CL_ARG(cohort->metas.drw)));

#ifndef NDEBUG
      fprintf(stderr,"post-sort\n");
#endif

      // find start of each tile
      skc_device_enqueue_kernel(runtime->device,
                                SKC_DEVICE_KERNEL_ID_SEGMENT_TTRK,
                                cohort->cq,
                                impl->kernels.segment,
                                atomics->keys,
                                0,NULL,NULL);

#ifndef NDEBUG
      fprintf(stderr,"post-segment\n");
#endif

      //
      // DELETE ALL THIS WHEN READY
      //

#if 0
      //
      //
      //
      cl(Finish(cohort->cq));

      // map keys to host
      union skc_ttrk * const keys = skc_extent_thrw_tdrw_map(&cohort->keys,
                                                             cohort->cq,
                                                             NULL);
      // map meta to host
      struct skc_raster_cohort_meta * const metas = skc_extent_thrw_tdrw_map(&cohort->metas,
                                                                             cohort->cq,
                                                                             NULL);
      // block until done
      cl(Finish(cohort->cq));

      // sort keys
      qsort(keys,atomics->keys,sizeof(*keys),cmp64);

      // mask to determine if rk id is a new block
      skc_uint const subblock_mask = runtime->config->block.subblocks - 1;

      //
      // some counters
      //
      union skc_raster_cohort_meta_in meta_in = {
        .blocks = 0,
        .offset = 0,
        .pk     = 0,
        .rk     = 0
      };

      // get first key
      union skc_ttrk curr = keys[0];

      skc_uint ii=0, jj=0;

      // for all TTRK keys
      while (true)
        {
          // increment ttrk count
          meta_in.rk += 1;

          // was this a new block?
          if ((curr.u32v2.lo & subblock_mask) == 0)
            meta_in.blocks += 1;

          // break if we're out of keys
          if (++ii >= atomics->keys)
            break;

          // otherwise, process next key
          union skc_ttrk const next = keys[ii];

          // if new cohort then save curr meta and init next meta
          if (next.cohort != curr.cohort)
            {
              fprintf(stderr,"[ %u, %u, %u, %u ]\n",
                      meta_in.blocks,
                      meta_in.offset,
                      meta_in.pk,
                      meta_in.rk);

              // store back to buffer
              metas->inout[curr.cohort].in = meta_in;

              // update meta_in
              meta_in.blocks = 0;
              meta_in.offset = ii;
              meta_in.pk     = 0;
              meta_in.rk     = 0;
            }
          // otherwise, if same y but new x then increment TTPK count
          else if ((next.y == curr.y) && (next.x != curr.x))
            {
              meta_in.pk += 1;

#if 0
              fprintf(stderr,"%3u : %3u : ( %3u, %3u ) -> ( %3u )\n",
                      jj++,curr.cohort,curr.y,curr.x,next.x);
#endif
            }

#if 0
          fprintf(stderr,"( %3u, %3u )\n",next.y,next.x);
#endif

          curr = next;
        }

      fprintf(stderr,"[ %u, %u, %u, %u ]\n",
              meta_in.blocks,
              meta_in.offset,
              meta_in.pk,
              meta_in.rk);

      // store back to buffer
      metas->inout[curr.cohort].in = meta_in;


      // unmap
      skc_extent_thrw_tdrw_unmap(&cohort->keys,
                                 keys,
                                 cohort->cq,
                                 NULL);

      // unmap
      skc_extent_thrw_tdrw_unmap(&cohort->metas,
                                 metas,
                                 cohort->cq,
                                 NULL);
#endif
    }

#ifndef NDEBUG
  fprintf(stderr,"rasters_alloc: %u\n",rasters);
#endif

  //
  // RASTER ALLOC/INIT
  //
  cl(SetKernelArg(impl->kernels.rasters_alloc,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
  cl(SetKernelArg(impl->kernels.rasters_alloc,1,SKC_CL_ARG(runtime->block_pool.ids.drw)));
  cl(SetKernelArg(impl->kernels.rasters_alloc,2,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));
  cl(SetKernelArg(impl->kernels.rasters_alloc,3,SKC_CL_ARG(runtime->handle_pool.map.drw)));
  cl(SetKernelArg(impl->kernels.rasters_alloc,4,SKC_CL_ARG(cohort->metas.drw)));
  cl(SetKernelArg(impl->kernels.rasters_alloc,5,SKC_CL_ARG(cohort->raster_ids.drN)));
  cl(SetKernelArg(impl->kernels.rasters_alloc,6,SKC_CL_ARG(rasters)));

  skc_device_enqueue_kernel(runtime->device,
                            SKC_DEVICE_KERNEL_ID_RASTERS_ALLOC,
                            cohort->cq,
                            impl->kernels.rasters_alloc,
                            rasters,
                            0,NULL,NULL);

#ifndef NDEBUG
  fprintf(stderr,"post-alloc\n");
#endif

  //
  // PREFIX
  //
  cl(SetKernelArg(impl->kernels.prefix,0,SKC_CL_ARG(runtime->block_pool.atomics.drw)));
  cl(SetKernelArg(impl->kernels.prefix,1,SKC_CL_ARG(runtime->block_pool.ids.drw)));
  cl(SetKernelArg(impl->kernels.prefix,2,SKC_CL_ARG(runtime->block_pool.blocks.drw)));
  cl(SetKernelArg(impl->kernels.prefix,3,SKC_CL_ARG(runtime->block_pool.size->ring_mask)));

  cl(SetKernelArg(impl->kernels.prefix,4,SKC_CL_ARG(cohort->keys.drw)));
  cl(SetKernelArg(impl->kernels.prefix,5,SKC_CL_ARG(runtime->handle_pool.map.drw)));

  cl(SetKernelArg(impl->kernels.prefix,6,SKC_CL_ARG(cohort->metas.drw)));
  cl(SetKernelArg(impl->kernels.prefix,7,SKC_CL_ARG(rasters)));

  cl_event complete;

  skc_device_enqueue_kernel(runtime->device,
                            SKC_DEVICE_KERNEL_ID_PREFIX,
                            cohort->cq,
                            impl->kernels.prefix,
                            rasters,
                            0,NULL,
                            &complete);

  cl(SetEventCallback(complete,CL_COMPLETE,skc_raster_cohort_prefix_cb,grid));
  cl(ReleaseEvent(complete));

#ifndef NDEBUG
  fprintf(stderr,"post-prefix\n");
#endif

  // flush command queue
  cl(Flush(cohort->cq));

  //
  // ALLOCATED RESOURCES
  //
  // path_ids          a
  // raster_ids        a
  // transforms        -
  // clips             -
  // fill_cmds         -
  // cq                a
  // cohort atomics    a
  // cmds              -
  // keys              a
  // meta              a
  //
}
示例#5
0
static
void
skc_surface_grid_pfn_execute(skc_grid_t const grid)
{
  struct skc_surface_render   * const render  = skc_grid_get_data(grid);
  struct skc_surface_impl     * const impl    = render->impl;
  struct skc_runtime          * const runtime = impl->runtime;

  // get the composition args
  struct skc_composition_impl * const ci      = render->composition->impl;
  struct skc_place_atomics    * const atomics = ci->atomics.hr;

  if (atomics->offsets > 0)
    {
      // acquire the rbo/tex
      if (render->fb->type != SKC_FRAMEBUFFER_CL_IMAGE2D)
        cl(EnqueueAcquireGLObjects(impl->cq,1,&render->fb->mem,0,NULL,NULL));

      // get the styling args
      struct skc_styling_impl * const si = render->styling->impl;

      cl(SetKernelArg(impl->kernels.render,0,SKC_CL_ARG(si->layers.drN)));
      cl(SetKernelArg(impl->kernels.render,1,SKC_CL_ARG(si->groups.drN)));
      cl(SetKernelArg(impl->kernels.render,2,SKC_CL_ARG(si->extras.drN)));

      cl(SetKernelArg(impl->kernels.render,3,SKC_CL_ARG(ci->keys.drw)));
      cl(SetKernelArg(impl->kernels.render,4,SKC_CL_ARG(atomics->keys)));
      cl(SetKernelArg(impl->kernels.render,5,SKC_CL_ARG(ci->offsets.drw)));
      cl(SetKernelArg(impl->kernels.render,6,SKC_CL_ARG(atomics->offsets)));

      // block pool
      cl(SetKernelArg(impl->kernels.render,7,SKC_CL_ARG(impl->runtime->block_pool.blocks.drw)));

      // surface
      cl(SetKernelArg(impl->kernels.render,8,SKC_CL_ARG(render->fb->mem)));

#if 1
      // tile clip
      cl(SetKernelArg(impl->kernels.render,9,sizeof(skc_uint4),render->clip));
#else
      // surface pitch (height)
      skc_uint const surface_pitch = SKC_SURFACE_HEIGHT;
      cl(SetKernelArg(impl->kernels.render,9,SKC_CL_ARG(surface_pitch)));
      // tile clip
      cl(SetKernelArg(impl->kernels.render,10,sizeof(skc_uint4),render->clip));
#endif

      // launch render kernel
      skc_device_enqueue_kernel(runtime->device,
                                SKC_DEVICE_KERNEL_ID_RENDER,
                                impl->cq,
                                impl->kernels.render,
                                atomics->offsets,
                                0,NULL,NULL);


      cl_event complete;

      // give the rbo back
      if (render->fb->type != SKC_FRAMEBUFFER_CL_IMAGE2D)
        {
          cl(EnqueueReleaseGLObjects(impl->cq,1,&render->fb->mem,0,NULL,&complete));

          //
          // blit the rbo to fbo0
          //
          render->fb->post_render(render->fb->interop);

          //
          // clear the rbo -- FIXME -- we shouldn't have to do this here
          //
          float    const rgba[4] = { 1.0f, 1.0f, 1.0f, 1.0f };
          uint32_t       rect[4] = { 0 };

          skc_interop_get_size(render->fb->interop,rect+2,rect+3);

          skc_surface_debug_clear(impl,render->fb,rgba,rect);
        }

      // notify anyone listening...
      cl(SetEventCallback(complete,CL_COMPLETE,skc_surface_render_cb,render));
      cl(ReleaseEvent(complete));

      // flush it
      cl(Flush(impl->cq));
    }
  else
    {
      skc_surface_render_complete(render);
    }
}