bool OclPostProcessTransform::prepareKernels() { m_kernelFlipH = prepareKernel("transform_flip_h"); m_kernelFlipV = prepareKernel("transform_flip_v"); m_kernelRot180 = prepareKernel("transform_rot_180"); m_kernelRot90 = prepareKernel("transform_rot_90"); m_kernelRot270 = prepareKernel("transform_rot_270"); m_kernelFlipHRot90 = prepareKernel("transform_flip_h_rot_90"); m_kernelFlipVRot90 = prepareKernel("transform_flip_v_rot_90"); return (m_kernelFlipH != NULL) && (m_kernelFlipV != NULL) && (m_kernelRot180 != NULL) && (m_kernelRot90 != NULL) && (m_kernelRot270 != NULL) && (m_kernelFlipHRot90 != NULL) && (m_kernelFlipVRot90 != NULL); }
void Network::step() { if (!built) build(); if (built) { LayerMap::iterator it; Layer* l; //update the external layers for (it = layers.begin(); it != layers.end(); it++) { l = it->second; if (l->isExternal()) { //update layer and add outputs to native outputs array ExternalLayer* el = (ExternalLayer*) l; el->update(); UnitMap::iterator uit; UnitId unitId; for (uit = el->units.begin(); uit != el->units.end(); uit++) { output[unitId] = el->getOutput(unitId); } } } //run the kernel cl_int err; size_t workSize = (size_t) numNonExUnits; prepareKernel(); err = clEnqueueNDRangeKernel(clDevice.commandQueue, *clKernel, 1, NULL, &workSize, NULL, 0, NULL, NULL); err = clFinish(clDevice.commandQueue); if (err != CL_SUCCESS) { printf("Failed to execute kernel!\n"); } //copy the new output to the previous output clEnqueueCopyBuffer(clDevice.commandQueue, clNewOutput, clPrevOutput, 0, 0, sizeof(float)*units.size(), 0, NULL, NULL); err = clFinish(clDevice.commandQueue); } }