bool OclPostProcessTransform::prepareKernels()
{
    m_kernelFlipH = prepareKernel("transform_flip_h");
    m_kernelFlipV = prepareKernel("transform_flip_v");
    m_kernelRot180 = prepareKernel("transform_rot_180");
    m_kernelRot90 = prepareKernel("transform_rot_90");
    m_kernelRot270 = prepareKernel("transform_rot_270");
    m_kernelFlipHRot90 = prepareKernel("transform_flip_h_rot_90");
    m_kernelFlipVRot90 = prepareKernel("transform_flip_v_rot_90");

    return (m_kernelFlipH != NULL)
        && (m_kernelFlipV != NULL)
        && (m_kernelRot180 != NULL)
        && (m_kernelRot90 != NULL)
        && (m_kernelRot270 != NULL)
        && (m_kernelFlipHRot90 != NULL)
        && (m_kernelFlipVRot90 != NULL);
}
示例#2
0
void Network::step()
{
   if (!built) build();

   if (built) {
      LayerMap::iterator it;
      Layer* l;
      //update the external layers
      for (it = layers.begin(); it != layers.end(); it++) {         
         l = it->second;
         if (l->isExternal()) {
            //update layer and add outputs to native outputs array
            ExternalLayer* el = (ExternalLayer*) l;
            el->update();
            UnitMap::iterator uit;
            UnitId unitId;
            for (uit = el->units.begin(); uit != el->units.end(); uit++) {
               output[unitId] = el->getOutput(unitId);
            }
         }
      }

      //run the kernel
      cl_int err;
      size_t workSize = (size_t) numNonExUnits;
      prepareKernel();
      err = clEnqueueNDRangeKernel(clDevice.commandQueue, *clKernel, 1, NULL, &workSize, NULL, 0, NULL, NULL);
	   err = clFinish(clDevice.commandQueue);
      if (err != CL_SUCCESS) {
         printf("Failed to execute kernel!\n");
      }

      //copy the new output to the previous output
      clEnqueueCopyBuffer(clDevice.commandQueue, clNewOutput, clPrevOutput, 0, 0, sizeof(float)*units.size(), 0, NULL, NULL);
      err = clFinish(clDevice.commandQueue);
   }
}