void PredictorSwarm::activate(sys::ComputeSystem &cs, const cl::Image2D &targets, const std::vector<cl::Image2D> &visibleStates, const std::vector<cl::Image2D> &visibleStatesPrev, float activeRatio, int inhibitionRadius, float noise, std::mt19937 &rng) { // Start by clearing summation buffer { cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 }; //cs.getQueue().enqueueCopyImage(_hiddenBiases[_back], _hiddenSummationTemp[_back], zeroOrigin, zeroOrigin, hiddenRegion); cs.getQueue().enqueueFillImage(_hiddenSummationTemp[_back], cl_float4{ 0.0f, 0.0f, 0.0f, 0.0f }, zeroOrigin, hiddenRegion); } for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; int argIndex = 0; _activateKernel.setArg(argIndex++, visibleStates[vli]); _activateKernel.setArg(argIndex++, _hiddenSummationTemp[_back]); _activateKernel.setArg(argIndex++, _hiddenSummationTemp[_front]); _activateKernel.setArg(argIndex++, vl._weights[_back]); _activateKernel.setArg(argIndex++, vld._size); _activateKernel.setArg(argIndex++, vl._hiddenToVisible); _activateKernel.setArg(argIndex++, vld._radius); cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); // Swap buffers std::swap(_hiddenSummationTemp[_front], _hiddenSummationTemp[_back]); } { std::uniform_int_distribution<int> seedDist(0, 999); cl_uint2 seed = { seedDist(rng), seedDist(rng) }; int argIndex = 0; _solveHiddenKernel.setArg(argIndex++, _hiddenSummationTemp[_back]); _solveHiddenKernel.setArg(argIndex++, _hiddenStates[_front]); _solveHiddenKernel.setArg(argIndex++, _hiddenActivations[_front]); _solveHiddenKernel.setArg(argIndex++, _hiddenSize); _solveHiddenKernel.setArg(argIndex++, inhibitionRadius); _solveHiddenKernel.setArg(argIndex++, activeRatio); cs.getQueue().enqueueNDRangeKernel(_solveHiddenKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } // Swap hidden state buffers std::swap(_hiddenStates[_front], _hiddenStates[_back]); std::swap(_hiddenActivations[_front], _hiddenActivations[_back]); }
void Predictor::activate(sys::ComputeSystem &cs, const std::vector<cl::Image2D> &visibleStates, NonlinearityType nonlinearityType, bool bufferSwap) { // Start by clearing summation buffer { cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 }; cs.getQueue().enqueueFillImage(_hiddenSummationTemp[_back], zeroColor, zeroOrigin, hiddenRegion); } for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; int argIndex = 0; _activateKernel.setArg(argIndex++, visibleStates[vli]); _activateKernel.setArg(argIndex++, _hiddenSummationTemp[_back]); _activateKernel.setArg(argIndex++, _hiddenSummationTemp[_front]); _activateKernel.setArg(argIndex++, vl._weights[_back]); _activateKernel.setArg(argIndex++, vld._size); _activateKernel.setArg(argIndex++, vl._hiddenToVisible); _activateKernel.setArg(argIndex++, vld._radius); cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); // Swap buffers std::swap(_hiddenSummationTemp[_front], _hiddenSummationTemp[_back]); } if (nonlinearityType == _binary) { int argIndex = 0; _solveHiddenBinaryKernel.setArg(argIndex++, _hiddenSummationTemp[_back]); _solveHiddenBinaryKernel.setArg(argIndex++, _hiddenStates[_front]); cs.getQueue().enqueueNDRangeKernel(_solveHiddenBinaryKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } else if (nonlinearityType == _tanH) { int argIndex = 0; _solveHiddenTanHKernel.setArg(argIndex++, _hiddenSummationTemp[_back]); _solveHiddenTanHKernel.setArg(argIndex++, _hiddenStates[_front]); cs.getQueue().enqueueNDRangeKernel(_solveHiddenTanHKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } else cs.getQueue().enqueueCopyImage(_hiddenSummationTemp[_back], _hiddenStates[_front], { 0, 0, 0 }, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }); // Swap hidden state buffers std::swap(_hiddenStates[_front], _hiddenStates[_back]); }
void Predictor::writeToStream(sys::ComputeSystem &cs, std::ostream &os) const { abort(); // Not yet working os << _hiddenSize.x << " " << _hiddenSize.y << std::endl; { std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y); cs.getQueue().enqueueReadImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data()); for (int si = 0; si < hiddenStates.size(); si++) os << hiddenStates[si] << " "; os << std::endl; } // Layer information os << _visibleLayers.size() << std::endl; for (int vli = 0; vli < _visibleLayers.size(); vli++) { const VisibleLayer &vl = _visibleLayers[vli]; const VisibleLayerDesc &vld = _visibleLayerDescs[vli]; // Desc os << vld._size.x << " " << vld._size.y << " " << vld._radius << std::endl; // Layer int weightDiam = vld._radius * 2 + 1; int numWeights = weightDiam * weightDiam; cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights }; int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z; { std::vector<cl_float> weights(totalNumWeights); cs.getQueue().enqueueReadImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data()); for (int wi = 0; wi < weights.size(); wi++) os << weights[wi] << " "; } os << std::endl; os << vl._hiddenToVisible.x << " " << vl._hiddenToVisible.y << " " << vl._visibleToHidden.x << " " << vl._visibleToHidden.y << " " << vl._reverseRadii.x << " " << vl._reverseRadii.y << std::endl; } }
void Predictor::learn(sys::ComputeSystem &cs, float tdError, const cl::Image2D &targets, std::vector<cl::Image2D> &visibleStatesPrev, float weightAlpha, float weightLambda) { // Learn weights for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; int argIndex = 0; _learnWeightsTracesKernel.setArg(argIndex++, visibleStatesPrev[vli]); _learnWeightsTracesKernel.setArg(argIndex++, targets); _learnWeightsTracesKernel.setArg(argIndex++, _hiddenStates[_front]); _learnWeightsTracesKernel.setArg(argIndex++, vl._weights[_back]); _learnWeightsTracesKernel.setArg(argIndex++, vl._weights[_front]); _learnWeightsTracesKernel.setArg(argIndex++, vld._size); _learnWeightsTracesKernel.setArg(argIndex++, vl._hiddenToVisible); _learnWeightsTracesKernel.setArg(argIndex++, vld._radius); _learnWeightsTracesKernel.setArg(argIndex++, weightAlpha); _learnWeightsTracesKernel.setArg(argIndex++, weightLambda); _learnWeightsTracesKernel.setArg(argIndex++, tdError); cs.getQueue().enqueueNDRangeKernel(_learnWeightsTracesKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); std::swap(vl._weights[_front], vl._weights[_back]); } }
void PredictorSwarm::learn(sys::ComputeSystem &cs, float reward, float gamma, const cl::Image2D &targets, std::vector<cl::Image2D> &visibleStatesPrev, cl_float2 weightAlpha, cl_float2 weightLambda, cl_float biasAlpha, cl_float activeRatio, float noise) { // Learn weights for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; int argIndex = 0; _learnWeightsTracesInhibitedKernel.setArg(argIndex++, visibleStatesPrev[vli]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, targets); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, _hiddenStates[_back]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, _hiddenActivations[_front]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, _hiddenStates[_front]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._weights[_back]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._weights[_front]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._qTraces[_back]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._qTraces[_front]); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, vld._size); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._hiddenToVisible); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, vld._radius); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, weightAlpha); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, weightLambda); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, reward); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, gamma); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, activeRatio); _learnWeightsTracesInhibitedKernel.setArg(argIndex++, noise); cs.getQueue().enqueueNDRangeKernel(_learnWeightsTracesInhibitedKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); std::swap(vl._weights[_front], vl._weights[_back]); std::swap(vl._qTraces[_front], vl._qTraces[_back]); } }
void HEInet::update(sys::ComputeSystem &cs, const cl::Image2D &inputFrequencyImage, const cl::Image2D &zeroImage, float eta, float shDecay, float saDecay) { // Update input spikes int index = 0; _kernels->_updateInputSpikesKernel.setArg(index++, inputFrequencyImage); _kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikeTimersPrev); _kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikesHistoryPrev); _kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikeTimers); _kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikes); _kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikesHistory); _kernels->_updateInputSpikesKernel.setArg(index++, shDecay); cs.getQueue().enqueueNDRangeKernel(_kernels->_updateInputSpikesKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._eFeedForwardWidth, _eiLayers.front().getConfig()._eFeedForwardHeight)); const cl::Image2D* pLayerInput = &_inputSpikesPrev; // Feed forward for (int li = 0; li < _eiLayers.size(); li++) { _eiLayers[li].eActivate(cs, *pLayerInput, eta, shDecay, saDecay); pLayerInput = &_eiLayers[li]._eLayer._statesPrev; } pLayerInput = &zeroImage; // Feed back for (int li = _eiLayers.size() - 1; li >= 0; li--) { _eiLayers[li].iActivate(cs, *pLayerInput, eta, shDecay, saDecay); pLayerInput = &_eiLayers[li]._iLayer._statesPrev; } }
void HEInet::learnPrediction(sys::ComputeSystem &cs, const cl::Image2D &inputImage, float alpha) { cl_float2 eFeedForwardDimsToEDims = { static_cast<float>(_eiLayers.front().getConfig()._eWidth + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardWidth + 1), static_cast<float>(_eiLayers.front().getConfig()._eHeight + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardHeight + 1) }; cl_float2 eFeedForwardDimsToIDims = { static_cast<float>(_eiLayers.front().getConfig()._iWidth + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardWidth + 1), static_cast<float>(_eiLayers.front().getConfig()._iHeight + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardHeight + 1) }; cl_int2 eDims = { _eiLayers.front().getConfig()._eWidth, _eiLayers.front().getConfig()._eHeight }; cl_int2 iDims = { _eiLayers.front().getConfig()._iWidth, _eiLayers.front().getConfig()._iHeight }; int index = 0; _kernels->_predictionLearnKernel.setArg(index++, _eSpikeSumsIterPrev); _kernels->_predictionLearnKernel.setArg(index++, _iSpikeSumsIterPrev); _kernels->_predictionLearnKernel.setArg(index++, inputImage); _kernels->_predictionLearnKernel.setArg(index++, _predictionPrev); _kernels->_predictionLearnKernel.setArg(index++, _predictionFromEWeights._weightsPrev); _kernels->_predictionLearnKernel.setArg(index++, _predictionFromIWeights._weightsPrev); _kernels->_predictionLearnKernel.setArg(index++, _predictionFromEWeights._weights); _kernels->_predictionLearnKernel.setArg(index++, _predictionFromIWeights._weights); _kernels->_predictionLearnKernel.setArg(index++, eFeedForwardDimsToEDims); _kernels->_predictionLearnKernel.setArg(index++, eFeedForwardDimsToIDims); _kernels->_predictionLearnKernel.setArg(index++, eDims); _kernels->_predictionLearnKernel.setArg(index++, iDims); _kernels->_predictionLearnKernel.setArg(index++, _predictionRadiusFromE); _kernels->_predictionLearnKernel.setArg(index++, _predictionRadiusFromI); _kernels->_predictionLearnKernel.setArg(index++, alpha); cs.getQueue().enqueueNDRangeKernel(_kernels->_predictionLearnKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._eFeedForwardWidth, _eiLayers.front().getConfig()._eFeedForwardHeight)); }
void HEInet::sumSpikes(sys::ComputeSystem &cs, float scalar) { int index = 0; _kernels->_sumSpikesKernel.setArg(index++, _eiLayers.front()._eLayer._states); _kernels->_sumSpikesKernel.setArg(index++, _eSpikeSumsPrev); _kernels->_sumSpikesKernel.setArg(index++, _eSpikeSums); _kernels->_sumSpikesKernel.setArg(index++, scalar); cs.getQueue().enqueueNDRangeKernel(_kernels->_sumSpikesKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._eWidth, _eiLayers.front().getConfig()._eHeight)); index = 0; _kernels->_sumSpikesKernel.setArg(index++, _eiLayers.front()._iLayer._states); _kernels->_sumSpikesKernel.setArg(index++, _iSpikeSumsPrev); _kernels->_sumSpikesKernel.setArg(index++, _iSpikeSums); _kernels->_sumSpikesKernel.setArg(index++, scalar); cs.getQueue().enqueueNDRangeKernel(_kernels->_sumSpikesKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._iWidth, _eiLayers.front().getConfig()._iHeight)); }
void AgentSwarm::clearMemory(sys::ComputeSystem &cs) { cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; for (int l = 0; l < _layers.size(); l++) { cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y, 1 }; cs.getQueue().enqueueFillImage(_layers[l]._scHiddenStatesPrev, zeroColor, zeroOrigin, layerRegion); } }
void HEInet::setInputPhase(sys::ComputeSystem &cs, cl_uint4 color) { cl::size_t<3> zeroCoord; zeroCoord[0] = zeroCoord[1] = zeroCoord[2] = 0; cl::size_t<3> eFeedForwardDimsCoord; eFeedForwardDimsCoord[0] = _eiLayers.front().getConfig()._eFeedForwardWidth; eFeedForwardDimsCoord[1] = _eiLayers.front().getConfig()._eFeedForwardHeight; eFeedForwardDimsCoord[2] = 1; cs.getQueue().enqueueFillImage(_inputSpikeTimersPrev, color, zeroCoord, eFeedForwardDimsCoord); }
void HEInet::setInputPhase(sys::ComputeSystem &cs, const cl::Image2D &inputPhaseImage) { cl::size_t<3> zeroCoord; zeroCoord[0] = zeroCoord[1] = zeroCoord[2] = 0; cl::size_t<3> eFeedForwardDimsCoord; eFeedForwardDimsCoord[0] = _eiLayers.front().getConfig()._eFeedForwardWidth; eFeedForwardDimsCoord[1] = _eiLayers.front().getConfig()._eFeedForwardHeight; eFeedForwardDimsCoord[2] = 1; cs.getQueue().enqueueCopyImage(inputPhaseImage, _inputSpikeTimersPrev, zeroCoord, zeroCoord, eFeedForwardDimsCoord); }
void HEInet::spikeSumBegin(sys::ComputeSystem &cs) { cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::size_t<3> zeroCoord; zeroCoord[0] = zeroCoord[1] = zeroCoord[2] = 0; cl::size_t<3> eDims; eDims[0] = _eiLayers.front().getConfig()._eWidth; eDims[1] = _eiLayers.front().getConfig()._eHeight; eDims[2] = 1; cl::size_t<3> iDims; iDims[0] = _eiLayers.front().getConfig()._iWidth; iDims[1] = _eiLayers.front().getConfig()._iHeight; iDims[2] = 1; cs.getQueue().enqueueFillImage(_eSpikeSums, zeroColor, zeroCoord, eDims); cs.getQueue().enqueueFillImage(_eSpikeSumsPrev, zeroColor, zeroCoord, eDims); cs.getQueue().enqueueFillImage(_iSpikeSums, zeroColor, zeroCoord, iDims); cs.getQueue().enqueueFillImage(_iSpikeSumsPrev, zeroColor, zeroCoord, iDims); }
void neo::randomUniform(cl::Image3D &image3D, sys::ComputeSystem &cs, cl::Kernel &randomUniform3DKernel, cl_int3 size, cl_float2 range, std::mt19937 &rng) { int argIndex = 0; std::uniform_int_distribution<int> seedDist; cl_uint2 seed = { seedDist(rng), seedDist(rng) }; randomUniform3DKernel.setArg(argIndex++, image3D); randomUniform3DKernel.setArg(argIndex++, seed); randomUniform3DKernel.setArg(argIndex++, range); cs.getQueue().enqueueNDRangeKernel(randomUniform3DKernel, cl::NullRange, cl::NDRange(size.x, size.y, size.z)); }
void HTFE::clearMemory(sys::ComputeSystem &cs) { // ------------------------------------------------------------------------------ // -------------------------------- Clear Memory -------------------------------- // ------------------------------------------------------------------------------ cl_uint4 clear = { 0, 0, 0, 0 }; for (int l = 0; l < _layers.size(); l++) { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = 1; cs.getQueue().enqueueFillImage(_layers[l]._hiddenStatesFeedBackPrevPrev, clear, origin, region); cs.getQueue().enqueueFillImage(_layers[l]._hiddenStatesFeedBackPrev, clear, origin, region); cs.getQueue().enqueueFillImage(_layers[l]._hiddenStatesFeedBack, clear, origin, region); } }
void ComparisonSparseCoder::reconstruct(sys::ComputeSystem &cs, const cl::Image2D &hiddenStates, int visibleLayerIndex, cl::Image2D &visibleStates) { VisibleLayer &vl = _visibleLayers[visibleLayerIndex]; VisibleLayerDesc &vld = _visibleLayerDescs[visibleLayerIndex]; int argIndex = 0; _forwardKernel.setArg(argIndex++, hiddenStates); _forwardKernel.setArg(argIndex++, visibleStates); _forwardKernel.setArg(argIndex++, vl._weights[_back]); _forwardKernel.setArg(argIndex++, vld._size); _forwardKernel.setArg(argIndex++, _hiddenSize); _forwardKernel.setArg(argIndex++, vl._visibleToHidden); _forwardKernel.setArg(argIndex++, vl._hiddenToVisible); _forwardKernel.setArg(argIndex++, vld._radius); _forwardKernel.setArg(argIndex++, vl._reverseRadii); cs.getQueue().enqueueNDRangeKernel(_forwardKernel, cl::NullRange, cl::NDRange(vld._size.x, vld._size.y)); }
void AgentSwarm::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, cl_int2 inputSize, cl_int2 actionSize, cl_int firstLayerPredictorRadius, const std::vector<LayerDesc> &layerDescs, cl_float2 initWeightRange, std::mt19937 &rng) { _layerDescs = layerDescs; _layers.resize(_layerDescs.size()); cl_int2 prevLayerSize = inputSize; for (int l = 0; l < _layers.size(); l++) { std::vector<ComparisonSparseCoder::VisibleLayerDesc> scDescs(2); scDescs[0]._size = prevLayerSize; scDescs[0]._radius = _layerDescs[l]._feedForwardRadius; scDescs[0]._ignoreMiddle = false; scDescs[0]._weightAlpha = _layerDescs[l]._scWeightAlpha; scDescs[0]._weightLambda = _layerDescs[l]._scWeightLambda; scDescs[0]._useTraces = false; scDescs[1]._size = _layerDescs[l]._hiddenSize; scDescs[1]._radius = _layerDescs[l]._recurrentRadius; scDescs[1]._ignoreMiddle = true; scDescs[1]._weightAlpha = _layerDescs[l]._scWeightRecurrentAlpha; scDescs[1]._weightLambda = _layerDescs[l]._scWeightLambda; scDescs[1]._useTraces = false; _layers[l]._sc.createRandom(cs, program, scDescs, _layerDescs[l]._hiddenSize, _layerDescs[l]._lateralRadius, initWeightRange, rng); _layers[l]._modulatedFeedForwardInput = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevLayerSize.x, prevLayerSize.y); _layers[l]._modulatedRecurrentInput = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y); std::vector<Predictor::VisibleLayerDesc> predDescs; if (l < _layers.size() - 1) { predDescs.resize(2); predDescs[0]._size = _layerDescs[l]._hiddenSize; predDescs[0]._radius = _layerDescs[l]._predictiveRadius; predDescs[1]._size = _layerDescs[l + 1]._hiddenSize; predDescs[1]._radius = _layerDescs[l]._feedBackRadius; } else { predDescs.resize(1); predDescs[0]._size = _layerDescs[l]._hiddenSize; predDescs[0]._radius = _layerDescs[l]._predictiveRadius; } _layers[l]._pred.createRandom(cs, program, predDescs, prevLayerSize, initWeightRange, false, rng); std::vector<Swarm::VisibleLayerDesc> swarmDescs; if (l == 0) { swarmDescs.resize(3); swarmDescs[0]._size = inputSize; swarmDescs[0]._qRadius = _layerDescs[l]._qRadiusHiddenFeedForwardAttention; swarmDescs[0]._startRadius = _layerDescs[l]._startRadiusHiddenFeedForwardAttention; swarmDescs[1]._size = _layerDescs[l]._hiddenSize; swarmDescs[1]._qRadius = _layerDescs[l]._qRadiusHiddenRecurrentAttention; swarmDescs[1]._startRadius = _layerDescs[l]._startRadiusHiddenRecurrentAttention; swarmDescs[2]._size = actionSize; swarmDescs[2]._qRadius = _layerDescs[l]._qRadiusHiddenAction; swarmDescs[2]._startRadius = _layerDescs[l]._startRadiusHiddenAction; } else { swarmDescs.resize(3); swarmDescs[0]._size = _layerDescs[l - 1]._hiddenSize; swarmDescs[0]._qRadius = _layerDescs[l]._qRadiusHiddenFeedForwardAttention; swarmDescs[0]._startRadius = _layerDescs[l]._startRadiusHiddenFeedForwardAttention; swarmDescs[1]._size = _layerDescs[l]._hiddenSize; swarmDescs[1]._qRadius = _layerDescs[l]._qRadiusHiddenRecurrentAttention; swarmDescs[1]._startRadius = _layerDescs[l]._startRadiusHiddenRecurrentAttention; swarmDescs[2]._size = _layerDescs[l - 1]._hiddenSize; swarmDescs[2]._qRadius = _layerDescs[l]._qRadiusHiddenAction; swarmDescs[2]._startRadius = _layerDescs[l]._startRadiusHiddenAction; } _layers[l]._swarm.createRandom(cs, program, swarmDescs, _layerDescs[l]._qSize, _layerDescs[l]._hiddenSize, _layerDescs[l]._qRadius, initWeightRange, rng); // Create baselines _layers[l]._baseLines = createDoubleBuffer2D(cs, _layerDescs[l]._hiddenSize, CL_R, CL_FLOAT); _layers[l]._reward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y); _layers[l]._scHiddenStatesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y); cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; if (l != 0) { cl::array<cl::size_type, 3> actionRegion = { _layers[l]._swarm.getVisibleLayerDesc(2)._size.x, _layers[l]._swarm.getVisibleLayerDesc(2)._size.y, 1 }; _layers[l]._inhibitedAction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), swarmDescs[1]._size.x, swarmDescs[1]._size.y); cs.getQueue().enqueueFillImage(_layers[l]._inhibitedAction, zeroColor, zeroOrigin, actionRegion); } cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y, 1 }; cs.getQueue().enqueueFillImage(_layers[l]._baseLines[_back], zeroColor, zeroOrigin, layerRegion); cs.getQueue().enqueueFillImage(_layers[l]._reward, zeroColor, zeroOrigin, layerRegion); cs.getQueue().enqueueFillImage(_layers[l]._scHiddenStatesPrev, zeroColor, zeroOrigin, layerRegion); prevLayerSize = _layerDescs[l]._hiddenSize; } { cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> layerRegion = { _layerDescs.back()._hiddenSize.x, _layerDescs.back()._hiddenSize.y, 1 }; _lastLayerAction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs.back()._hiddenSize.x, _layerDescs.back()._hiddenSize.y); cs.getQueue().enqueueFillImage(_lastLayerAction, zeroColor, zeroOrigin, layerRegion); } _baseLineUpdateKernel = cl::Kernel(program.getProgram(), "phBaseLineUpdate"); _baseLineUpdateSumErrorKernel = cl::Kernel(program.getProgram(), "phBaseLineUpdateSumError"); _inhibitKernel = cl::Kernel(program.getProgram(), "phInhibit"); _modulateKernel = cl::Kernel(program.getProgram(), "phModulate"); }
void HTFE::learn(sys::ComputeSystem &cs) { // ------------------------------------------------------------------------------ // ---------------------- Weight Update and Predictions ------------------------ // ------------------------------------------------------------------------------ cl::Image2D* pPrevLayer = &_inputImage; int prevWidth = _inputWidth; int prevHeight = _inputHeight; cl::Image2D* pPrevLayerFeedForwardPrev = &_inputImagePrev; cl::Image2D* pPrevLayerFeedBackPrev = &_inputImagePrev; for (int l = 0; l < _layers.size(); l++) { float localActivity = std::round(_layerDescs[l]._sparsity * std::pow(2 * _layerDescs[l]._inhibitionRadius + 1, 2)); Int2 layerSize; layerSize._x = _layerDescs[l]._width; layerSize._y = _layerDescs[l]._height; Int2 layerSizeMinusOne; layerSizeMinusOne._x = _layerDescs[l]._width - 1; layerSizeMinusOne._y = _layerDescs[l]._height - 1; Float2 layerSizeMinusOneInv; layerSizeMinusOneInv._x = 1.0f / (_layerDescs[l]._width - 1); layerSizeMinusOneInv._y = 1.0f / (_layerDescs[l]._height - 1); Int2 inputSize; inputSize._x = prevWidth; inputSize._y = prevHeight; Int2 inputSizeMinusOne; inputSizeMinusOne._x = prevWidth - 1; inputSizeMinusOne._y = prevHeight - 1; Float2 inputSizeMinusOneInv; inputSizeMinusOneInv._x = 1.0f / (prevWidth - 1); inputSizeMinusOneInv._y = 1.0f / (prevHeight - 1); Int2 nextSize; Int2 nextSizeMinusOne; if (l == _layers.size() - 1) { nextSize._x = nextSize._y = 1; nextSizeMinusOne._x = nextSizeMinusOne._y = 0; } else { nextSize._x = _layerDescs[l + 1]._width; nextSize._y = _layerDescs[l + 1]._height; nextSizeMinusOne._x = _layerDescs[l + 1]._width - 1; nextSizeMinusOne._y = _layerDescs[l + 1]._height - 1; } // ------------------------------- Weight Updates ------------------------------- Float4 alphas; alphas._x = _layerDescs[l]._feedForwardAlpha; alphas._y = _layerDescs[l]._lateralAlpha; alphas._z = _layerDescs[l]._feedBackAlpha; alphas._w = _layerDescs[l]._hiddenBiasAlpha; int index = 0; if (l == _layers.size() - 1) { _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._visibleReconstructionPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, *pPrevLayer); _layerHiddenWeightUpdateLastKernel.setArg(index++, *pPrevLayerFeedForwardPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenFeedBackActivationsPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrevPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._feedForwardWeightsPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._lateralWeightsPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenBiasesPrev); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._feedForwardWeights); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._lateralWeights); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenBiases); _layerHiddenWeightUpdateLastKernel.setArg(index++, layerSize); _layerHiddenWeightUpdateLastKernel.setArg(index++, layerSizeMinusOne); _layerHiddenWeightUpdateLastKernel.setArg(index++, layerSizeMinusOneInv); _layerHiddenWeightUpdateLastKernel.setArg(index++, inputSize); _layerHiddenWeightUpdateLastKernel.setArg(index++, inputSizeMinusOne); _layerHiddenWeightUpdateLastKernel.setArg(index++, inputSizeMinusOneInv); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._receptiveFieldRadius); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._lateralConnectionRadius); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._reconstructionRadius); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._sparsity); _layerHiddenWeightUpdateLastKernel.setArg(index++, alphas); _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._weightDecay); cs.getQueue().enqueueNDRangeKernel(_layerHiddenWeightUpdateLastKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height)); } else { _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._visibleReconstructionPrev); _layerHiddenWeightUpdateKernel.setArg(index++, *pPrevLayer); _layerHiddenWeightUpdateKernel.setArg(index++, *pPrevLayerFeedForwardPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenFeedBackActivationsPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrevPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l + 1]._hiddenStatesFeedBackPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedForwardWeightsPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._lateralWeightsPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenBiasesPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedBackWeightsPrev); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedForwardWeights); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._lateralWeights); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenBiases); _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedBackWeights); _layerHiddenWeightUpdateKernel.setArg(index++, layerSize); _layerHiddenWeightUpdateKernel.setArg(index++, layerSizeMinusOne); _layerHiddenWeightUpdateKernel.setArg(index++, layerSizeMinusOneInv); _layerHiddenWeightUpdateKernel.setArg(index++, inputSize); _layerHiddenWeightUpdateKernel.setArg(index++, inputSizeMinusOne); _layerHiddenWeightUpdateKernel.setArg(index++, inputSizeMinusOneInv); _layerHiddenWeightUpdateKernel.setArg(index++, nextSize); _layerHiddenWeightUpdateKernel.setArg(index++, nextSizeMinusOne); _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._receptiveFieldRadius); _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._lateralConnectionRadius); _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._feedBackConnectionRadius); _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._reconstructionRadius); _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._sparsity); _layerHiddenWeightUpdateKernel.setArg(index++, alphas); _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._weightDecay); cs.getQueue().enqueueNDRangeKernel(_layerHiddenWeightUpdateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height)); } index = 0; _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._visibleReconstructionPrev); _layerVisibleWeightUpdateKernel.setArg(index++, *pPrevLayer); _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev); _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev); _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._visibleBiasesPrev); _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._reconstructionWeights); _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._visibleBiases); _layerVisibleWeightUpdateKernel.setArg(index++, _layerDescs[l]._reconstructionRadius); _layerVisibleWeightUpdateKernel.setArg(index++, inputSizeMinusOne); _layerVisibleWeightUpdateKernel.setArg(index++, inputSizeMinusOneInv); _layerVisibleWeightUpdateKernel.setArg(index++, layerSize); _layerVisibleWeightUpdateKernel.setArg(index++, layerSizeMinusOne); _layerVisibleWeightUpdateKernel.setArg(index++, layerSizeMinusOneInv); _layerVisibleWeightUpdateKernel.setArg(index++, _layerDescs[l]._reconstructionAlpha); cs.getQueue().enqueueNDRangeKernel(_layerVisibleWeightUpdateKernel, cl::NullRange, cl::NDRange(prevWidth, prevHeight)); pPrevLayer = &_layers[l]._hiddenStatesFeedForward; // Or _hiddenStatesFeedBack ? prevWidth = _layerDescs[l]._width; prevHeight = _layerDescs[l]._height; pPrevLayerFeedForwardPrev = &_layers[l]._hiddenStatesFeedForwardPrev; pPrevLayerFeedBackPrev = &_layers[l]._hiddenStatesFeedBackPrev; } }
void HTFE::activate(sys::ComputeSystem &cs) { { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _inputWidth; region[1] = _inputHeight; region[2] = 1; cs.getQueue().enqueueWriteImage(_inputImage, CL_TRUE, origin, region, 0, 0, _input.data()); } std::uniform_int_distribution<int> seedDist(0, 99999); // ------------------------------------------------------------------------------ // ------------------------------------ Go up ----------------------------------- // ------------------------------------------------------------------------------ cl::Image2D* pPrevLayer = &_inputImage; int prevWidth = _inputWidth; int prevHeight = _inputHeight; for (int l = 0; l < _layers.size(); l++) { float localActivity = std::round(_layerDescs[l]._sparsity * std::pow(2 * _layerDescs[l]._inhibitionRadius + 1, 2)); Int2 layerSize; layerSize._x = _layerDescs[l]._width; layerSize._y = _layerDescs[l]._height; Int2 layerSizeMinusOne; layerSizeMinusOne._x = _layerDescs[l]._width - 1; layerSizeMinusOne._y = _layerDescs[l]._height - 1; Float2 layerSizeMinusOneInv; layerSizeMinusOneInv._x = 1.0f / (_layerDescs[l]._width - 1); layerSizeMinusOneInv._y = 1.0f / (_layerDescs[l]._height - 1); Int2 inputSize; inputSize._x = prevWidth; inputSize._y = prevHeight; Int2 inputSizeMinusOne; inputSizeMinusOne._x = prevWidth - 1; inputSizeMinusOne._y = prevHeight - 1; Float2 inputSizeMinusOneInv; inputSizeMinusOneInv._x = 1.0f / (prevWidth - 1); inputSizeMinusOneInv._y = 1.0f / (prevHeight - 1); // -------------------------------- Activate -------------------------------- int index = 0; _layerHiddenFeedForwardActivateKernel.setArg(index++, *pPrevLayer); _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev); _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._feedForwardWeightsPrev); _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._lateralWeightsPrev); _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._hiddenBiasesPrev); _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations); _layerHiddenFeedForwardActivateKernel.setArg(index++, layerSize); _layerHiddenFeedForwardActivateKernel.setArg(index++, layerSizeMinusOneInv); _layerHiddenFeedForwardActivateKernel.setArg(index++, inputSize); _layerHiddenFeedForwardActivateKernel.setArg(index++, inputSizeMinusOne); _layerHiddenFeedForwardActivateKernel.setArg(index++, _layerDescs[l]._receptiveFieldRadius); _layerHiddenFeedForwardActivateKernel.setArg(index++, _layerDescs[l]._lateralConnectionRadius); cs.getQueue().enqueueNDRangeKernel(_layerHiddenFeedForwardActivateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height)); // ---------------------------------- Inhibit --------------------------------- index = 0; _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations); _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedForwardPrev); _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedForward); _layerHiddenInhibitKernel.setArg(index++, layerSize); _layerHiddenInhibitKernel.setArg(index++, _layerDescs[l]._inhibitionRadius); _layerHiddenInhibitKernel.setArg(index++, localActivity); cs.getQueue().enqueueNDRangeKernel(_layerHiddenInhibitKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height)); pPrevLayer = &_layers[l]._hiddenStatesFeedForward; prevWidth = _layerDescs[l]._width; prevHeight = _layerDescs[l]._height; } // ------------------------------------------------------------------------------ // -------------------------------- Go back down -------------------------------- // ------------------------------------------------------------------------------ for (int l = _layers.size() - 1; l >= 0; l--) { if (l > 0) { pPrevLayer = &_layers[l - 1]._hiddenStatesFeedForward; prevWidth = _layerDescs[l - 1]._width; prevHeight = _layerDescs[l - 1]._height; } else { pPrevLayer = &_inputImage; prevWidth = _inputWidth; prevHeight = _inputHeight; } float localActivity = std::round(_layerDescs[l]._sparsity * std::pow(2 * _layerDescs[l]._inhibitionRadius + 1, 2)); Int2 layerSize; layerSize._x = _layerDescs[l]._width; layerSize._y = _layerDescs[l]._height; Int2 layerSizeMinusOne; layerSizeMinusOne._x = _layerDescs[l]._width - 1; layerSizeMinusOne._y = _layerDescs[l]._height - 1; Float2 layerSizeMinusOneInv; layerSizeMinusOneInv._x = 1.0f / (_layerDescs[l]._width - 1); layerSizeMinusOneInv._y = 1.0f / (_layerDescs[l]._height - 1); Int2 inputSize; inputSize._x = prevWidth; inputSize._y = prevHeight; Int2 inputSizeMinusOne; inputSizeMinusOne._x = prevWidth - 1; inputSizeMinusOne._y = prevHeight - 1; Float2 inputSizeMinusOneInv; inputSizeMinusOneInv._x = 1.0f / (prevWidth - 1); inputSizeMinusOneInv._y = 1.0f / (prevHeight - 1); Int2 nextSize; Int2 nextSizeMinusOne; if (l == _layers.size() - 1) { nextSize._x = nextSize._y = 1; nextSizeMinusOne._x = nextSizeMinusOne._y = 0; } else { nextSize._x = _layerDescs[l + 1]._width; nextSize._y = _layerDescs[l + 1]._height; nextSizeMinusOne._x = _layerDescs[l + 1]._width - 1; nextSizeMinusOne._y = _layerDescs[l + 1]._height - 1; } // -------------------------------- Activate -------------------------------- int index = 0; if (l == _layers.size() - 1) { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = 1; cs.getQueue().enqueueCopyImage(_layers[l]._hiddenFeedForwardActivations, _layers[l]._hiddenFeedBackActivations, origin, origin, region); } else { _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations); _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l + 1]._hiddenFeedBackActivations); _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l]._feedBackWeightsPrev); _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l]._hiddenFeedBackActivations); _layerHiddenFeedBackActivateKernel.setArg(index++, layerSize); _layerHiddenFeedBackActivateKernel.setArg(index++, layerSizeMinusOneInv); _layerHiddenFeedBackActivateKernel.setArg(index++, nextSize); _layerHiddenFeedBackActivateKernel.setArg(index++, nextSizeMinusOne); _layerHiddenFeedBackActivateKernel.setArg(index++, _layerDescs[l]._feedBackConnectionRadius); cs.getQueue().enqueueNDRangeKernel(_layerHiddenFeedBackActivateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height)); } // ---------------------------------- Inhibit --------------------------------- index = 0; _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenFeedBackActivations); _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev); _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedBack); _layerHiddenInhibitKernel.setArg(index++, layerSize); _layerHiddenInhibitKernel.setArg(index++, _layerDescs[l]._inhibitionRadius); _layerHiddenInhibitKernel.setArg(index++, localActivity); cs.getQueue().enqueueNDRangeKernel(_layerHiddenInhibitKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height)); // --------------------- Make Predictions (Reconstruction) --------------------- index = 0; _layerVisibleReconstructKernel.setArg(index++, _layers[l]._hiddenStatesFeedBack); _layerVisibleReconstructKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev); _layerVisibleReconstructKernel.setArg(index++, _layers[l]._visibleBiasesPrev); _layerVisibleReconstructKernel.setArg(index++, _layers[l]._visibleReconstruction); _layerVisibleReconstructKernel.setArg(index++, _layerDescs[l]._reconstructionRadius); _layerVisibleReconstructKernel.setArg(index++, inputSizeMinusOne); _layerVisibleReconstructKernel.setArg(index++, inputSizeMinusOneInv); _layerVisibleReconstructKernel.setArg(index++, layerSize); _layerVisibleReconstructKernel.setArg(index++, layerSizeMinusOne); _layerVisibleReconstructKernel.setArg(index++, layerSizeMinusOneInv); cs.getQueue().enqueueNDRangeKernel(_layerVisibleReconstructKernel, cl::NullRange, cl::NDRange(prevWidth, prevHeight)); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _inputWidth; region[1] = _inputHeight; region[2] = 1; cs.getQueue().enqueueReadImage(_layers.front()._visibleReconstruction, CL_TRUE, origin, region, 0, 0, _prediction.data()); } }
void HTFE::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, int inputWidth, int inputHeight, const std::vector<LayerDesc> &layerDescs, float minInitWeight, float maxInitWeight) { std::mt19937 generator(time(nullptr)); std::uniform_int_distribution<int> seedDist(0, 99999); _inputWidth = inputWidth; _inputHeight = inputHeight; _layerDescs = layerDescs; _layers.resize(_layerDescs.size()); cl::Kernel initializeLayerHiddenKernel = cl::Kernel(program.getProgram(), "initializeLayerHidden"); cl::Kernel initializeLayerVisibleKernel = cl::Kernel(program.getProgram(), "initializeLayerVisible"); _input.clear(); _input.resize(_inputWidth * _inputHeight, 0.0f); _prediction.clear(); _prediction.resize(_inputWidth * _inputHeight, 0.0f); _inputImage = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight); _inputImagePrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight); { cl_uint4 clear = { 0, 0, 0, 0 }; cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _inputWidth; region[1] = _inputHeight; region[2] = 1; cs.getQueue().enqueueFillImage(_inputImage, clear, origin, region); cs.getQueue().enqueueFillImage(_inputImagePrev, clear, origin, region); } int prevWidth = _inputWidth; int prevHeight = _inputHeight; for (int l = 0; l < _layers.size(); l++) { int numFeedForwardWeights = std::pow(_layerDescs[l]._receptiveFieldRadius * 2 + 1, 2); int numReconstructionWeights = std::pow(_layerDescs[l]._reconstructionRadius * 2 + 1, 2); int numLateralWeights = std::pow(_layerDescs[l]._lateralConnectionRadius * 2 + 1, 2); int numFeedBackWeights = std::pow(_layerDescs[l]._feedBackConnectionRadius * 2 + 1, 2); _layers[l]._hiddenFeedForwardActivations = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenFeedBackActivations = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenFeedBackActivationsPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenStatesFeedForward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenStatesFeedForwardPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenStatesFeedBack = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenStatesFeedBackPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenStatesFeedBackPrevPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._feedForwardWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedForwardWeights); _layers[l]._feedForwardWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedForwardWeights); _layers[l]._reconstructionWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight, numReconstructionWeights); _layers[l]._reconstructionWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight, numReconstructionWeights); _layers[l]._visibleBiases = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight); _layers[l]._visibleBiasesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight); _layers[l]._hiddenBiases = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._hiddenBiasesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height); _layers[l]._lateralWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numLateralWeights); _layers[l]._lateralWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numLateralWeights); _layers[l]._feedBackWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedBackWeights); _layers[l]._feedBackWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedBackWeights); _layers[l]._visibleReconstruction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight); _layers[l]._visibleReconstructionPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight); // Initialize Uint2 initSeedHidden; initSeedHidden._x = seedDist(generator); initSeedHidden._y = seedDist(generator); int index = 0; initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations); initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenFeedBackActivations); initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenStatesFeedForward); initializeLayerHiddenKernel.setArg(index++, _layers[l]._feedForwardWeights); initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenBiases); initializeLayerHiddenKernel.setArg(index++, _layers[l]._lateralWeights); initializeLayerHiddenKernel.setArg(index++, _layers[l]._feedBackWeights); initializeLayerHiddenKernel.setArg(index++, numFeedForwardWeights); initializeLayerHiddenKernel.setArg(index++, numLateralWeights); initializeLayerHiddenKernel.setArg(index++, numFeedBackWeights); initializeLayerHiddenKernel.setArg(index++, initSeedHidden); initializeLayerHiddenKernel.setArg(index++, _layerDescs[l]._sparsity); initializeLayerHiddenKernel.setArg(index++, _layerDescs[l]._lateralScalar); initializeLayerHiddenKernel.setArg(index++, _layerDescs[l]._feedBackScalar); initializeLayerHiddenKernel.setArg(index++, minInitWeight); initializeLayerHiddenKernel.setArg(index++, maxInitWeight); cs.getQueue().enqueueNDRangeKernel(initializeLayerHiddenKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height)); Uint2 initSeedVisible; initSeedVisible._x = seedDist(generator); initSeedVisible._y = seedDist(generator); index = 0; initializeLayerVisibleKernel.setArg(index++, _layers[l]._visibleBiases); initializeLayerVisibleKernel.setArg(index++, _layers[l]._visibleReconstruction); initializeLayerVisibleKernel.setArg(index++, _layers[l]._reconstructionWeights); initializeLayerVisibleKernel.setArg(index++, numReconstructionWeights); initializeLayerVisibleKernel.setArg(index++, initSeedVisible); initializeLayerVisibleKernel.setArg(index++, minInitWeight); initializeLayerVisibleKernel.setArg(index++, maxInitWeight); cs.getQueue().enqueueNDRangeKernel(initializeLayerVisibleKernel, cl::NullRange, cl::NDRange(prevWidth, prevHeight)); { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = 1; cs.getQueue().enqueueCopyImage(_layers[l]._hiddenFeedBackActivations, _layers[l]._hiddenFeedBackActivationsPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = prevWidth; region[1] = prevHeight; region[2] = 1; cs.getQueue().enqueueCopyImage(_layers[l]._visibleReconstruction, _layers[l]._visibleReconstructionPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = 1; cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedForwardPrev, origin, origin, region); cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedBack, origin, origin, region); cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedBackPrev, origin, origin, region); cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedBackPrevPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = numFeedForwardWeights; cs.getQueue().enqueueCopyImage(_layers[l]._feedForwardWeights, _layers[l]._feedForwardWeightsPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = prevWidth; region[1] = prevHeight; region[2] = 1; cs.getQueue().enqueueCopyImage(_layers[l]._visibleBiases, _layers[l]._visibleBiasesPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = 1; cs.getQueue().enqueueCopyImage(_layers[l]._hiddenBiases, _layers[l]._hiddenBiasesPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = numLateralWeights; cs.getQueue().enqueueCopyImage(_layers[l]._lateralWeights, _layers[l]._lateralWeightsPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = _layerDescs[l]._width; region[1] = _layerDescs[l]._height; region[2] = numFeedBackWeights; cs.getQueue().enqueueCopyImage(_layers[l]._feedBackWeights, _layers[l]._feedBackWeightsPrev, origin, origin, region); } { cl::size_t<3> origin; origin[0] = 0; origin[1] = 0; origin[2] = 0; cl::size_t<3> region; region[0] = prevWidth; region[1] = prevHeight; region[2] = numReconstructionWeights; cs.getQueue().enqueueCopyImage(_layers[l]._reconstructionWeights, _layers[l]._reconstructionWeightsPrev, origin, origin, region); } prevWidth = _layerDescs[l]._width; prevHeight = _layerDescs[l]._height; } _layerHiddenFeedForwardActivateKernel = cl::Kernel(program.getProgram(), "layerHiddenFeedForwardActivate"); _layerHiddenFeedBackActivateKernel = cl::Kernel(program.getProgram(), "layerHiddenFeedBackActivate"); _layerHiddenInhibitKernel = cl::Kernel(program.getProgram(), "layerHiddenInhibit"); _layerVisibleReconstructKernel = cl::Kernel(program.getProgram(), "layerVisibleReconstruct"); _layerHiddenWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerHiddenWeightUpdate"); _layerHiddenWeightUpdateLastKernel = cl::Kernel(program.getProgram(), "layerHiddenWeightUpdateLast"); _layerVisibleWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerVisibleWeightUpdate"); }
void HEInet::createRandom(const std::vector<EIlayer::Configuration> &eilConfigs, int predictionRadiusFromE, int predictionRadiusFromI, float minInitEWeight, float maxInitEWeight, float minInitIWeight, float maxInitIWeight, float initEThreshold, float initIThreshold, float sparsityE, float sparsityI, sys::ComputeSystem &cs, const std::shared_ptr<EIlayer::Kernels> &eilKernels, const std::shared_ptr<Kernels> &heiKernels, std::mt19937 &generator) { _kernels = heiKernels; _predictionRadiusFromE = predictionRadiusFromE; _predictionRadiusFromI = predictionRadiusFromI; _eiLayers.resize(eilConfigs.size()); // Initialize all layers for (int li = 0; li < _eiLayers.size(); li++) { _eiLayers[li].createRandom(eilConfigs[li], minInitEWeight, maxInitEWeight, minInitIWeight, maxInitIWeight, initEThreshold, initIThreshold, sparsityE, sparsityI, cs, eilKernels, generator); } int predictionFromESize = std::pow(_predictionRadiusFromE * 2 + 1, 2); int predictionFromISize = std::pow(_predictionRadiusFromI * 2 + 1, 2); _prediction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _predictionPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _inputSpikes = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _inputSpikesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _inputSpikesHistory = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _inputSpikesHistoryPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _inputSpikeTimers = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _inputSpikeTimersPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight); _eSpikeSums = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eWidth, eilConfigs.front()._eHeight); _eSpikeSumsPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eWidth, eilConfigs.front()._eHeight); _iSpikeSums = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._iWidth, eilConfigs.front()._iHeight); _iSpikeSumsPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._iWidth, eilConfigs.front()._iHeight); _eSpikeSumsIterPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eWidth, eilConfigs.front()._eHeight); _iSpikeSumsIterPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._iWidth, eilConfigs.front()._iHeight); cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::size_t<3> zeroCoord; zeroCoord[0] = zeroCoord[1] = zeroCoord[2] = 0; cl::size_t<3> eFeedForwardDimsCoord; eFeedForwardDimsCoord[0] = eilConfigs.front()._eFeedForwardWidth; eFeedForwardDimsCoord[1] = eilConfigs.front()._eFeedForwardHeight; eFeedForwardDimsCoord[2] = 1; cl::size_t<3> ePredictionWeightsDims; ePredictionWeightsDims[0] = eilConfigs.front()._eFeedForwardWidth; ePredictionWeightsDims[1] = eilConfigs.front()._eFeedForwardHeight; ePredictionWeightsDims[2] = predictionFromESize; cl::size_t<3> iPredictionWeightsDims; iPredictionWeightsDims[0] = eilConfigs.front()._eFeedForwardWidth; iPredictionWeightsDims[1] = eilConfigs.front()._eFeedForwardHeight; iPredictionWeightsDims[2] = predictionFromISize; cl::size_t<3> eDims; eDims[0] = eilConfigs.front()._eWidth; eDims[1] = eilConfigs.front()._eHeight; eDims[2] = 1; cl::size_t<3> iDims; iDims[0] = eilConfigs.front()._iWidth; iDims[1] = eilConfigs.front()._iHeight; iDims[2] = 1; cs.getQueue().enqueueFillImage(_prediction, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_predictionPrev, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_inputSpikes, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_inputSpikesPrev, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_inputSpikesHistory, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_inputSpikesHistoryPrev, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_inputSpikeTimers, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_inputSpikeTimersPrev, zeroColor, zeroCoord, eFeedForwardDimsCoord); cs.getQueue().enqueueFillImage(_eSpikeSums, zeroColor, zeroCoord, eDims); cs.getQueue().enqueueFillImage(_eSpikeSumsPrev, zeroColor, zeroCoord, eDims); cs.getQueue().enqueueFillImage(_iSpikeSums, zeroColor, zeroCoord, iDims); cs.getQueue().enqueueFillImage(_iSpikeSumsPrev, zeroColor, zeroCoord, iDims); cs.getQueue().enqueueFillImage(_eSpikeSumsIterPrev, zeroColor, zeroCoord, eDims); cs.getQueue().enqueueFillImage(_iSpikeSumsIterPrev, zeroColor, zeroCoord, iDims); _predictionFromEWeights._weights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight, predictionFromESize); _predictionFromEWeights._weightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight, predictionFromESize); _predictionFromIWeights._weights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight, predictionFromISize); _predictionFromIWeights._weightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight, predictionFromISize); std::uniform_int_distribution<int> seedDist(0, 10000); cl_uint2 seed = { seedDist(generator), seedDist(generator) }; int index = 0; _kernels->_predictionInitializeKernel.setArg(index++, _predictionFromEWeights._weightsPrev); _kernels->_predictionInitializeKernel.setArg(index++, _predictionFromIWeights._weightsPrev); _kernels->_predictionInitializeKernel.setArg(index++, predictionFromESize); _kernels->_predictionInitializeKernel.setArg(index++, predictionFromISize); _kernels->_predictionInitializeKernel.setArg(index++, minInitEWeight); _kernels->_predictionInitializeKernel.setArg(index++, maxInitEWeight); _kernels->_predictionInitializeKernel.setArg(index++, seed); cs.getQueue().enqueueNDRangeKernel(_kernels->_predictionInitializeKernel, cl::NullRange, cl::NDRange(eilConfigs.front()._eFeedForwardWidth, eilConfigs.front()._eFeedForwardHeight)); cs.getQueue().enqueueCopyImage(_predictionFromEWeights._weightsPrev, _predictionFromEWeights._weights, zeroCoord, zeroCoord, ePredictionWeightsDims); cs.getQueue().enqueueCopyImage(_predictionFromIWeights._weightsPrev, _predictionFromIWeights._weights, zeroCoord, zeroCoord, iPredictionWeightsDims); }
void ComparisonSparseCoder::activate(sys::ComputeSystem &cs, const std::vector<cl::Image2D> &visibleStates, float activeRatio, bool bufferSwap) { // Start by clearing summation buffer to biases { cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 }; cs.getQueue().enqueueCopyImage(_hiddenBiases[_back], _hiddenActivationSummationTemp[_back], zeroOrigin, zeroOrigin, hiddenRegion); } for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; if (!vld._isPredictiveCoding) { if (vld._ignoreMiddle) { int argIndex = 0; _activateIgnoreMiddleKernel.setArg(argIndex++, visibleStates[vli]); _activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]); _activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_front]); _activateIgnoreMiddleKernel.setArg(argIndex++, vl._weights[_back]); _activateIgnoreMiddleKernel.setArg(argIndex++, vld._size); _activateIgnoreMiddleKernel.setArg(argIndex++, vl._hiddenToVisible); _activateIgnoreMiddleKernel.setArg(argIndex++, vld._radius); cs.getQueue().enqueueNDRangeKernel(_activateIgnoreMiddleKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } else { int argIndex = 0; _activateKernel.setArg(argIndex++, visibleStates[vli]); _activateKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]); _activateKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_front]); _activateKernel.setArg(argIndex++, vl._weights[_back]); _activateKernel.setArg(argIndex++, vld._size); _activateKernel.setArg(argIndex++, vl._hiddenToVisible); _activateKernel.setArg(argIndex++, vld._radius); cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } // Swap buffers std::swap(_hiddenActivationSummationTemp[_front], _hiddenActivationSummationTemp[_back]); } } // Start by clearing summation buffer to biases { cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 }; cs.getQueue().enqueueFillImage(_hiddenPredictionSummationTemp[_back], cl_float4{ 0.0f, 0.0f, 0.0f, 0.0f }, zeroOrigin, hiddenRegion); } for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; if (vld._isPredictiveCoding) { if (vld._ignoreMiddle) { int argIndex = 0; _activateIgnoreMiddleKernel.setArg(argIndex++, visibleStates[vli]); _activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]); _activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_front]); _activateIgnoreMiddleKernel.setArg(argIndex++, vl._weights[_back]); _activateIgnoreMiddleKernel.setArg(argIndex++, vld._size); _activateIgnoreMiddleKernel.setArg(argIndex++, vl._hiddenToVisible); _activateIgnoreMiddleKernel.setArg(argIndex++, vld._radius); cs.getQueue().enqueueNDRangeKernel(_activateIgnoreMiddleKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } else { int argIndex = 0; _activateKernel.setArg(argIndex++, visibleStates[vli]); _activateKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]); _activateKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_front]); _activateKernel.setArg(argIndex++, vl._weights[_back]); _activateKernel.setArg(argIndex++, vld._size); _activateKernel.setArg(argIndex++, vl._hiddenToVisible); _activateKernel.setArg(argIndex++, vld._radius); cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } // Swap buffers std::swap(_hiddenPredictionSummationTemp[_front], _hiddenPredictionSummationTemp[_back]); } } // Back now contains the sums. Solve sparse codes from this { int argIndex = 0; _solveHiddenKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]); _solveHiddenKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]); _solveHiddenKernel.setArg(argIndex++, _hiddenStates[_front]); _solveHiddenKernel.setArg(argIndex++, _hiddenSize); _solveHiddenKernel.setArg(argIndex++, _lateralRadius); _solveHiddenKernel.setArg(argIndex++, activeRatio); cs.getQueue().enqueueNDRangeKernel(_solveHiddenKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } // Swap hidden state buffers //if (bufferSwap) std::swap(_hiddenStates[_front], _hiddenStates[_back]); }
void ComparisonSparseCoder::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, const std::vector<VisibleLayerDesc> &visibleLayerDescs, cl_int2 hiddenSize, cl_int lateralRadius, cl_float2 initWeightRange, std::mt19937 &rng) { _visibleLayerDescs = visibleLayerDescs; _lateralRadius = lateralRadius; _hiddenSize = hiddenSize; cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 }; _visibleLayers.resize(_visibleLayerDescs.size()); cl::Kernel randomUniform2DKernel = cl::Kernel(program.getProgram(), "randomUniform2D"); cl::Kernel randomUniform3DKernel = cl::Kernel(program.getProgram(), "randomUniform3D"); // Create layers for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; const cl_channel_order weightChannels = vld._useTraces ? CL_RG : CL_R; vl._hiddenToVisible = cl_float2{ static_cast<float>(vld._size.x) / static_cast<float>(_hiddenSize.x), static_cast<float>(vld._size.y) / static_cast<float>(_hiddenSize.y) }; vl._visibleToHidden = cl_float2{ static_cast<float>(_hiddenSize.x) / static_cast<float>(vld._size.x), static_cast<float>(_hiddenSize.y) / static_cast<float>(vld._size.y) }; vl._reverseRadii = { static_cast<int>(std::ceil(vl._visibleToHidden.x * (vld._radius + 0.5f))), static_cast<int>(std::ceil(vl._visibleToHidden.y * (vld._radius + 0.5f))) }; // Create images { int weightDiam = vld._radius * 2 + 1; int numWeights = weightDiam * weightDiam; cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights }; vl._weights = createDoubleBuffer3D(cs, weightsSize, weightChannels, CL_FLOAT); randomUniform(vl._weights[_back], cs, randomUniform3DKernel, weightsSize, initWeightRange, rng); } } // Hidden state data _hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); _hiddenBiases = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); //randomUniform(_hiddenBiases[_back], cs, randomUniform2DKernel, _hiddenSize, initWeightRange, rng); cs.getQueue().enqueueFillImage(_hiddenBiases[_back], zeroColor, zeroOrigin, hiddenRegion); _hiddenActivationSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); _hiddenPredictionSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); cs.getQueue().enqueueFillImage(_hiddenStates[_back], zeroColor, zeroOrigin, hiddenRegion); // Create kernels _activateKernel = cl::Kernel(program.getProgram(), "cscActivate"); _activateIgnoreMiddleKernel = cl::Kernel(program.getProgram(), "cscActivateIgnoreMiddle"); _solveHiddenKernel = cl::Kernel(program.getProgram(), "cscSolveHidden"); _learnHiddenBiasesKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenBiases"); _learnHiddenWeightsActivationKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsActivation"); _learnHiddenWeightsTracesActivationKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsTracesActivation"); _learnHiddenWeightsPredictionKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsPrediction"); _learnHiddenWeightsTracesPredictionKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsTracesPrediction"); _forwardKernel = cl::Kernel(program.getProgram(), "cscForward"); }
void ComparisonSparseCoder::readFromStream(sys::ComputeSystem &cs, sys::ComputeProgram &program, std::istream &is) { abort(); // Fix me is >> _hiddenSize.x >> _hiddenSize.y >> _lateralRadius; _hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); _hiddenBiases = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); _hiddenActivationSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); //_hiddenReconstructionSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); { std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y); for (int si = 0; si < hiddenStates.size(); si++) is >> hiddenStates[si]; cs.getQueue().enqueueWriteImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data()); } { std::vector<cl_float> hiddenBiases(_hiddenSize.x * _hiddenSize.y); for (int bi = 0; bi < hiddenBiases.size(); bi++) is >> hiddenBiases[bi]; cs.getQueue().enqueueWriteImage(_hiddenBiases[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenBiases.data()); } // Layer information int numLayers; is >> numLayers; _visibleLayerDescs.resize(numLayers); _visibleLayers.resize(numLayers); for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; // Desc is >> vld._size.x >> vld._size.y >> vld._radius >> vld._weightAlpha >> vld._weightLambda >> vld._ignoreMiddle >> vld._useTraces; // Layer //vl._reconstructionError = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), vld._size.x, vld._size.y); int weightDiam = vld._radius * 2 + 1; int numWeights = weightDiam * weightDiam; cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights }; int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z; if (vld._useTraces) { //vl._weights = createDoubleBuffer3D(cs, weightsSize, CL_RG, CL_FLOAT); std::vector<cl_float2> weights(totalNumWeights); for (int wi = 0; wi < weights.size(); wi++) is >> weights[wi].x >> weights[wi].y; //cs.getQueue().enqueueWriteImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data()); } else {
void ComparisonSparseCoder::writeToStream(sys::ComputeSystem &cs, std::ostream &os) const { abort(); // Fix me os << _hiddenSize.x << " " << _hiddenSize.y << " " << _lateralRadius << std::endl; { std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y); cs.getQueue().enqueueReadImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data()); for (int si = 0; si < hiddenStates.size(); si++) os << hiddenStates[si] << " "; os << std::endl; } { std::vector<cl_float> hiddenBiases(_hiddenSize.x * _hiddenSize.y); cs.getQueue().enqueueReadImage(_hiddenBiases[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenBiases.data()); for (int bi = 0; bi < hiddenBiases.size(); bi++) os << hiddenBiases[bi] << " "; os << std::endl; } // Layer information os << _visibleLayers.size() << std::endl; for (int vli = 0; vli < _visibleLayers.size(); vli++) { const VisibleLayer &vl = _visibleLayers[vli]; const VisibleLayerDesc &vld = _visibleLayerDescs[vli]; // Desc os << vld._size.x << " " << vld._size.y << " " << vld._radius << " " << vld._weightAlpha << " " << vld._weightLambda << " " << vld._ignoreMiddle << " " << vld._useTraces << std::endl; // Layer int weightDiam = vld._radius * 2 + 1; int numWeights = weightDiam * weightDiam; cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights }; int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z; if (vld._useTraces) { std::vector<cl_float2> weights(totalNumWeights); //cs.getQueue().enqueueReadImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data()); for (int wi = 0; wi < weights.size(); wi++) os << weights[wi].x << " " << weights[wi].y << " "; } else { std::vector<cl_float> weights(totalNumWeights); //cs.getQueue().enqueueReadImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data()); for (int wi = 0; wi < weights.size(); wi++) os << weights[wi] << " "; } os << std::endl; os << vl._hiddenToVisible.x << " " << vl._hiddenToVisible.y << " " << vl._visibleToHidden.x << " " << vl._visibleToHidden.y << " " << vl._reverseRadii.x << " " << vl._reverseRadii.y << std::endl; } }
void ComparisonSparseCoder::learn(sys::ComputeSystem &cs, const cl::Image2D &rewards, std::vector<cl::Image2D> &visibleStates, float boostAlpha, float activeRatio) { // Learn biases { int argIndex = 0; _learnHiddenBiasesKernel.setArg(argIndex++, _hiddenBiases[_back]); _learnHiddenBiasesKernel.setArg(argIndex++, _hiddenBiases[_front]); _learnHiddenBiasesKernel.setArg(argIndex++, _hiddenStates[_back]); _learnHiddenBiasesKernel.setArg(argIndex++, boostAlpha); _learnHiddenBiasesKernel.setArg(argIndex++, activeRatio); cs.getQueue().enqueueNDRangeKernel(_learnHiddenBiasesKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); std::swap(_hiddenBiases[_front], _hiddenBiases[_back]); } // Learn weights for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; if (!vld._isPredictiveCoding) { if (vld._useTraces) { int argIndex = 0; _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, rewards); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, visibleStates[vli]); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, _hiddenStates[_back]); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vl._weights[_back]); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vl._weights[_front]); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._size); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vl._hiddenToVisible); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._radius); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._weightAlpha); _learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._weightLambda); cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsTracesActivationKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } else { int argIndex = 0; _learnHiddenWeightsActivationKernel.setArg(argIndex++, visibleStates[vli]); _learnHiddenWeightsActivationKernel.setArg(argIndex++, _hiddenStates[_back]); _learnHiddenWeightsActivationKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]); _learnHiddenWeightsActivationKernel.setArg(argIndex++, vl._weights[_back]); _learnHiddenWeightsActivationKernel.setArg(argIndex++, vl._weights[_front]); _learnHiddenWeightsActivationKernel.setArg(argIndex++, vld._size); _learnHiddenWeightsActivationKernel.setArg(argIndex++, vl._hiddenToVisible); _learnHiddenWeightsActivationKernel.setArg(argIndex++, vld._radius); _learnHiddenWeightsActivationKernel.setArg(argIndex++, vld._weightAlpha); cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsActivationKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } std::swap(vl._weights[_front], vl._weights[_back]); } } for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; if (vld._isPredictiveCoding) { if (vld._useTraces) { int argIndex = 0; _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, rewards); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, visibleStates[vli]); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, _hiddenStates[_back]); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vl._weights[_back]); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vl._weights[_front]); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._size); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vl._hiddenToVisible); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._radius); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._weightAlpha); _learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._weightLambda); cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsTracesPredictionKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } else { int argIndex = 0; _learnHiddenWeightsPredictionKernel.setArg(argIndex++, visibleStates[vli]); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, _hiddenStates[_back]); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, vl._weights[_back]); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, vl._weights[_front]); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, vld._size); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, vl._hiddenToVisible); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, vld._radius); _learnHiddenWeightsPredictionKernel.setArg(argIndex++, vld._weightAlpha); cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsPredictionKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y)); } std::swap(vl._weights[_front], vl._weights[_back]); } } }
void AgentER::simStep(sys::ComputeSystem &cs, const cl::Image2D &input, const cl::Image2D &actionTaken, float reward, std::mt19937 &rng, bool learn, bool whiten) { // Keep previous best action for later std::vector<float> prevBestAction(_actionSize.x * _actionSize.y); std::vector<float> prevTakenAction(_actionSize.x * _actionSize.y); cs.getQueue().enqueueReadImage(getAction(), CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0, prevBestAction.data()); cs.getQueue().enqueueReadImage(actionTaken, CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0, prevTakenAction.data()); // Place previous Q into Q buffer { int argIndex = 0; _setQKernel.setArg(argIndex++, _qTransform); _setQKernel.setArg(argIndex++, _qInput); _setQKernel.setArg(argIndex++, _prevQ); cs.getQueue().enqueueNDRangeKernel(_setQKernel, cl::NullRange, cl::NDRange(_qSize.x, _qSize.y)); } // Whiten input if (whiten) _inputWhitener.filter(cs, input, _whiteningKernelRadius, _whiteningIntensity); _actionWhitener.filter(cs, actionTaken, _whiteningKernelRadius, _whiteningIntensity); _qWhitener.filter(cs, _qInput, _whiteningKernelRadius, _whiteningIntensity); // Feed forward for (int l = 0; l < _layers.size(); l++) { { std::vector<cl::Image2D> visibleStates; if (l == 0) { visibleStates.resize(3); visibleStates[0] = whiten ? _inputWhitener.getResult() : input; visibleStates[1] = _actionWhitener.getResult(); visibleStates[2] = _qWhitener.getResult(); } else { visibleStates.resize(2); visibleStates[0] = _layers[l - 1]._sc.getHiddenStates()[_back]; visibleStates[1] = _layers[l]._sc.getHiddenStates()[_back]; } _layers[l]._sc.activate(cs, visibleStates, _layerDescs[l]._scActiveRatio); } } for (int l = _layers.size() - 1; l >= 0; l--) { std::vector<cl::Image2D> visibleStates; if (l < _layers.size() - 1) { visibleStates.resize(2); visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back]; visibleStates[1] = _layers[l + 1]._pred.getHiddenStates()[_back]; } else { visibleStates.resize(1); visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back]; } //_layers[l]._pred.activate(cs, visibleStates, l != 0); } // Q predictor { std::vector<cl::Image2D> visibleStates; if (0 < _layers.size() - 1) { visibleStates.resize(2); visibleStates[0] = _layers[0]._sc.getHiddenStates()[_back]; visibleStates[1] = _layers[0 + 1]._pred.getHiddenStates()[_back]; } else { visibleStates.resize(1); visibleStates[0] = _layers[0]._sc.getHiddenStates()[_back]; } //_qPred.activate(cs, visibleStates, false); } // Recover Q std::vector<float> qValues(_qSize.x * _qSize.y); cs.getQueue().enqueueReadImage(_qPred.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_qSize.x), static_cast<cl::size_type>(_qSize.y), 1 }, 0, 0, qValues.data()); // Average all Q values float q = 0.0f; for (int i = 0; i < qValues.size(); i++) q += qValues[i]; q /= qValues.size(); // Bellman equation float tdError = reward + _qGamma * q - _prevValue; float newQ = _prevValue + _qAlpha * tdError; // Update older samples float g = _qGamma; for (std::list<ReplayFrame>::iterator it = _frames.begin(); it != _frames.end(); it++) { it->_q += g * tdError; g *= _qGamma; } // Add replay sample ReplayFrame frame; frame._q = frame._originalQ = newQ; frame._layerStateBitIndices.resize(_layers.size()); frame._layerPredBitIndices.resize(_layers.size()); for (int l = 0; l < _layers.size(); l++) { std::vector<float> state(_layerDescs[l]._size.x * _layerDescs[l]._size.y); cs.getQueue().enqueueReadImage(_layers[l]._sc.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l]._size.x), static_cast<cl::size_type>(_layerDescs[l]._size.y), 1 }, 0, 0, state.data()); std::vector<float> pred; if (l == 0) { pred.resize(_actionSize.x * _actionSize.y); cs.getQueue().enqueueReadImage(_layers[l]._sc.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0, state.data()); } else { pred.resize(_layerDescs[l - 1]._size.x * _layerDescs[l - 1]._size.y); cs.getQueue().enqueueReadImage(_layers[l]._sc.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l - 1]._size.x), static_cast<cl::size_type>(_layerDescs[l - 1]._size.y), 1 }, 0, 0, pred.data()); } for (int i = 0; i < state.size(); i++) if (state[i] > 0.0f) frame._layerStateBitIndices[l].push_back(i); for (int i = 0; i < pred.size(); i++) if (pred[i] > 0.0f) frame._layerPredBitIndices[l].push_back(i); } // Add last action taken and last "thought best" action frame._prevExploratoryAction = prevTakenAction; frame._prevBestAction = prevBestAction; for (int i = 0; i < prevBestAction.size(); i++) frame._prevBestAction[i] = std::min(1.0f, std::max(-1.0f, prevBestAction[i])); _frames.push_front(frame); while (_frames.size() > _maxReplayFrames) _frames.pop_back(); if (learn && _frames.size() > 1) { // Convert list to vector std::vector<ReplayFrame*> pFrames(_frames.size()); int index = 0; for (std::list<ReplayFrame>::iterator it = _frames.begin(); it != _frames.end(); it++) pFrames[index++] = &(*it); std::uniform_int_distribution<int> replayDist(0, _frames.size() - 2); for (int iter = 0; iter < _replayIterations; iter++) { int randIndex = replayDist(rng); ReplayFrame* pFrame = pFrames[randIndex]; ReplayFrame* pFramePrev = pFrames[randIndex + 1]; // Load data cl_int2 prevLayerSize = _actionSize; for (int l = 0; l < _layers.size(); l++) { std::vector<float> state(_layerDescs[l]._size.x * _layerDescs[l]._size.y, 0.0f); std::vector<float> statePrev(_layerDescs[l]._size.x * _layerDescs[l]._size.y, 0.0f); std::vector<float> pred(prevLayerSize.x * prevLayerSize.y, 0.0f); std::vector<float> predPrev(prevLayerSize.x * prevLayerSize.y, 0.0f); for (int i = 0; i < pFrame->_layerStateBitIndices[l].size(); i++) state[pFrame->_layerStateBitIndices[l][i]] = 1.0f; for (int i = 0; i < pFramePrev->_layerStateBitIndices[l].size(); i++) statePrev[pFramePrev->_layerStateBitIndices[l][i]] = 1.0f; for (int i = 0; i < pFrame->_layerPredBitIndices[l].size(); i++) pred[pFrame->_layerPredBitIndices[l][i]] = 1.0f; for (int i = 0; i < pFramePrev->_layerPredBitIndices[l].size(); i++) predPrev[pFramePrev->_layerPredBitIndices[l][i]] = 1.0f; cs.getQueue().enqueueWriteImage(_layers[l]._scStatesTemp[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l]._size.x), static_cast<cl::size_type>(_layerDescs[l]._size.y), 1 }, 0, 0, state.data()); cs.getQueue().enqueueWriteImage(_layers[l]._scStatesTemp[_front], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l]._size.x), static_cast<cl::size_type>(_layerDescs[l]._size.y), 1 }, 0, 0, statePrev.data()); cs.getQueue().enqueueWriteImage(_layers[l]._predStatesTemp[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(prevLayerSize.x), static_cast<cl::size_type>(prevLayerSize.y), 1 }, 0, 0, pred.data()); cs.getQueue().enqueueWriteImage(_layers[l]._predStatesTemp[_front], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(prevLayerSize.x), static_cast<cl::size_type>(prevLayerSize.y), 1 }, 0, 0, predPrev.data()); prevLayerSize = _layerDescs[l]._size; } cs.getQueue().enqueueFillImage(_qTarget, cl_float4{ pFrame->_q, pFrame->_q, pFrame->_q, pFrame->_q }, { 0, 0, 0 }, { static_cast<cl::size_type>(_qSize.x), static_cast<cl::size_type>(_qSize.y), 1 }); // Choose better action to learn cs.getQueue().enqueueWriteImage(_actionTarget, CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0, (pFrame->_q > pFrame->_originalQ ? pFrame->_prevExploratoryAction.data() : pFrame->_prevBestAction.data())); for (int l = 0; l < _layers.size(); l++) { std::vector<cl::Image2D> visibleStates; if (l != 0) { visibleStates.resize(2); visibleStates[0] = _layers[l - 1]._sc.getHiddenStates()[_back]; visibleStates[1] = _layers[l]._sc.getHiddenStates()[_back]; _layers[l]._sc.activate(cs, visibleStates, _layerDescs[l]._scActiveRatio, false); _layers[l]._sc.learn(cs, visibleStates, _layerDescs[l]._scBoostAlpha, _layerDescs[l]._scActiveRatio); } std::vector<cl::Image2D> visibleStatesPrev; if (l < _layers.size() - 1) { visibleStatesPrev.resize(2); visibleStatesPrev[0] = _layers[l]._scStatesTemp[_front]; visibleStatesPrev[1] = _layers[l + 1]._predStatesTemp[_front]; } else { visibleStatesPrev.resize(1); visibleStatesPrev[0] = _layers[l]._scStatesTemp[_front]; } //_layers[l]._pred.activate(cs, visibleStatesPrev, l != 0, false); if (l == 0) _layers[l]._pred.learnCurrent(cs, _actionTarget, visibleStatesPrev, _layerDescs[l]._predWeightAlpha); else _layers[l]._pred.learnCurrent(cs, _layers[l - 1]._sc.getHiddenStates()[_back], visibleStatesPrev, _layerDescs[l]._predWeightAlpha); } // Q Pred { std::vector<cl::Image2D> visibleStatesPrev; if (0 < _layers.size() - 1) { visibleStatesPrev.resize(2); visibleStatesPrev[0] = _layers[0]._sc.getHiddenStates()[_front]; visibleStatesPrev[1] = _layers[0 + 1]._pred.getHiddenStates()[_front]; } else { visibleStatesPrev.resize(1); visibleStatesPrev[0] = _layers[0]._sc.getHiddenStates()[_front]; } //_qPred.activate(cs, visibleStatesPrev, false, false); _qPred.learnCurrent(cs, _qTarget, visibleStatesPrev, _qWeightAlpha); } } } std::cout << "Q: " << newQ << std::endl; _prevQ = newQ; _prevTDError = tdError; _prevValue = q; }
void PredictorSwarm::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, const std::vector<VisibleLayerDesc> &visibleLayerDescs, cl_int2 hiddenSize, cl_float2 initWeightRange, std::mt19937 &rng) { _visibleLayerDescs = visibleLayerDescs; _hiddenSize = hiddenSize; _visibleLayers.resize(_visibleLayerDescs.size()); cl::Kernel randomUniform2DKernel = cl::Kernel(program.getProgram(), "randomUniform2D"); cl::Kernel randomUniform3DKernel = cl::Kernel(program.getProgram(), "randomUniform3D"); cl::Kernel randomUniform3DXZKernel = cl::Kernel(program.getProgram(), "randomUniform3DXZ"); cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 }; // Create layers for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; vl._hiddenToVisible = cl_float2{ static_cast<float>(vld._size.x) / static_cast<float>(_hiddenSize.x), static_cast<float>(vld._size.y) / static_cast<float>(_hiddenSize.y) }; vl._visibleToHidden = cl_float2{ static_cast<float>(_hiddenSize.x) / static_cast<float>(vld._size.x), static_cast<float>(_hiddenSize.y) / static_cast<float>(vld._size.y) }; vl._reverseRadii = cl_int2{ static_cast<int>(std::ceil(vl._visibleToHidden.x * (vld._radius + 0.5f))), static_cast<int>(std::ceil(vl._visibleToHidden.y * (vld._radius + 0.5f))) }; int weightDiam = vld._radius * 2 + 1; int numWeights = weightDiam * weightDiam; cl_int3 weightsSize = { _hiddenSize.x, _hiddenSize.y, numWeights }; vl._weights = createDoubleBuffer3D(cs, weightsSize, CL_RGBA, CL_FLOAT); randomUniformXZ(vl._weights[_back], cs, randomUniform3DXZKernel, weightsSize, initWeightRange, rng); vl._qTraces = createDoubleBuffer3D(cs, weightsSize, CL_R, CL_FLOAT); cs.getQueue().enqueueFillImage(vl._qTraces[_back], zeroColor, zeroOrigin, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }); } // Hidden state data _hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_RG, CL_FLOAT); _hiddenActivations = createDoubleBuffer2D(cs, _hiddenSize, CL_RG, CL_FLOAT); _hiddenSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_RG, CL_FLOAT); cs.getQueue().enqueueFillImage(_hiddenStates[_back], zeroColor, zeroOrigin, hiddenRegion); cs.getQueue().enqueueFillImage(_hiddenActivations[_back], zeroColor, zeroOrigin, hiddenRegion); // Create kernels _activateKernel = cl::Kernel(program.getProgram(), "predActivateSwarm"); _solveHiddenKernel = cl::Kernel(program.getProgram(), "predSolveHiddenSwarm"); _solveHiddenNoInhibitionKernel = cl::Kernel(program.getProgram(), "predSolveHiddenNoInhibitionSwarm"); _learnWeightsTracesInhibitedKernel = cl::Kernel(program.getProgram(), "predLearnWeightsTracesSwarm"); _reconstructionErrorKernel = cl::Kernel(program.getProgram(), "predReconstructionErrorSwarm"); }
void AgentER::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, cl_int2 inputSize, cl_int2 actionSize, cl_int2 qSize, const std::vector<LayerDesc> &layerDescs, cl_float2 initWeightRange, std::mt19937 &rng) { _inputSize = inputSize; _actionSize = actionSize; _qSize = qSize; _layerDescs = layerDescs; _layers.resize(_layerDescs.size()); cl::Kernel randomUniform2DXYKernel = cl::Kernel(program.getProgram(), "randomUniform2DXY"); cl_int2 prevLayerSize = inputSize; for (int l = 0; l < _layers.size(); l++) { std::vector<ComparisonSparseCoder::VisibleLayerDesc> scDescs; if (l == 0) { scDescs.resize(3); scDescs[0]._size = prevLayerSize; scDescs[0]._radius = _layerDescs[l]._feedForwardRadius; scDescs[0]._ignoreMiddle = false; scDescs[0]._weightAlpha = _layerDescs[l]._scWeightAlpha; scDescs[0]._useTraces = false; scDescs[1]._size = _actionSize; scDescs[1]._radius = _layerDescs[l]._feedForwardRadius; scDescs[1]._ignoreMiddle = false; scDescs[1]._weightAlpha = _layerDescs[l]._scWeightAlpha; scDescs[1]._useTraces = false; scDescs[2]._size = _qSize; scDescs[2]._radius = _layerDescs[l]._feedForwardRadius; scDescs[2]._ignoreMiddle = false; scDescs[2]._weightAlpha = _layerDescs[l]._scWeightAlpha; scDescs[2]._useTraces = false; } else { scDescs.resize(2); scDescs[0]._size = prevLayerSize; scDescs[0]._radius = _layerDescs[l]._feedForwardRadius; scDescs[0]._ignoreMiddle = false; scDescs[0]._weightAlpha = _layerDescs[l]._scWeightAlpha; scDescs[0]._useTraces = false; scDescs[1]._size = _layerDescs[l]._size; scDescs[1]._radius = _layerDescs[l]._recurrentRadius; scDescs[1]._ignoreMiddle = true; scDescs[1]._weightAlpha = _layerDescs[l]._scWeightRecurrentAlpha; scDescs[1]._useTraces = false; } _layers[l]._sc.createRandom(cs, program, scDescs, _layerDescs[l]._size, _layerDescs[l]._lateralRadius, initWeightRange, rng); std::vector<Predictor::VisibleLayerDesc> predDescs; if (l < _layers.size() - 1) { predDescs.resize(2); predDescs[0]._size = _layerDescs[l]._size; predDescs[0]._radius = _layerDescs[l]._predictiveRadius; predDescs[1]._size = _layerDescs[l + 1]._size; predDescs[1]._radius = _layerDescs[l]._feedBackRadius; } else { predDescs.resize(1); predDescs[0]._size = _layerDescs[l]._size; predDescs[0]._radius = _layerDescs[l]._predictiveRadius; } if (l == 0) _layers[l]._pred.createRandom(cs, program, predDescs, _actionSize, initWeightRange, true, rng); else _layers[l]._pred.createRandom(cs, program, predDescs, _layerDescs[l - 1]._size, initWeightRange, true, rng); // Create baselines _layers[l]._predReward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._size.x, _layerDescs[l]._size.y); _layers[l]._propagatedPredReward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._size.x, _layerDescs[l]._size.y); cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f }; cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._size.x, _layerDescs[l]._size.y, 1 }; cs.getQueue().enqueueFillImage(_layers[l]._predReward, zeroColor, zeroOrigin, layerRegion); cs.getQueue().enqueueFillImage(_layers[l]._propagatedPredReward, zeroColor, zeroOrigin, layerRegion); _layers[l]._scStatesTemp = createDoubleBuffer2D(cs, _layerDescs[l]._size, CL_R, CL_FLOAT); _layers[l]._predStatesTemp = createDoubleBuffer2D(cs, prevLayerSize, CL_R, CL_FLOAT); prevLayerSize = _layerDescs[l]._size; } _qInput = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _qSize.x, _qSize.y); _qTarget = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _qSize.x, _qSize.y); _actionTarget = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _actionSize.x, _actionSize.y); _qTransform = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _qSize.x, _qSize.y); // Q Predictor { std::vector<Predictor::VisibleLayerDesc> predDescs; if (0 < _layers.size() - 1) { predDescs.resize(2); predDescs[0]._size = _layerDescs[0]._size; predDescs[0]._radius = _layerDescs[0]._predictiveRadius; predDescs[1]._size = _layerDescs[0 + 1]._size; predDescs[1]._radius = _layerDescs[0]._feedBackRadius; } else { predDescs.resize(1); predDescs[0]._size = _layerDescs[0]._size; predDescs[0]._radius = _layerDescs[0]._predictiveRadius; } _qPred.createRandom(cs, program, predDescs, _qSize, initWeightRange, true, rng); } // Random Q transform randomUniformXY(_qTransform, cs, randomUniform2DXYKernel, _qSize, { -1.0f, 1.0f }, rng); _inputWhitener.create(cs, program, _inputSize, CL_R, CL_FLOAT); _actionWhitener.create(cs, program, _actionSize, CL_R, CL_FLOAT); _qWhitener.create(cs, program, _qSize, CL_R, CL_FLOAT); _predictionRewardKernel = cl::Kernel(program.getProgram(), "phPredictionReward"); _predictionRewardPropagationKernel = cl::Kernel(program.getProgram(), "phPredictionRewardPropagation"); _setQKernel = cl::Kernel(program.getProgram(), "phSetQ"); }
void Predictor::readFromStream(sys::ComputeSystem &cs, sys::ComputeProgram &program, std::istream &is) { abort(); // Not yet working is >> _hiddenSize.x >> _hiddenSize.y; _hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); _hiddenSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT); { std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y); for (int si = 0; si < hiddenStates.size(); si++) is >> hiddenStates[si]; cs.getQueue().enqueueWriteImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data()); } // Layer information int numLayers; is >> numLayers; _visibleLayerDescs.resize(numLayers); _visibleLayers.resize(numLayers); for (int vli = 0; vli < _visibleLayers.size(); vli++) { VisibleLayer &vl = _visibleLayers[vli]; VisibleLayerDesc &vld = _visibleLayerDescs[vli]; // Desc is >> vld._size.x >> vld._size.y >> vld._radius; // Layer int weightDiam = vld._radius * 2 + 1; int numWeights = weightDiam * weightDiam; cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights }; int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z; { vl._weights = createDoubleBuffer3D(cs, weightsSize, CL_R, CL_FLOAT); std::vector<cl_float> weights(totalNumWeights); for (int wi = 0; wi < weights.size(); wi++) is >> weights[wi]; cs.getQueue().enqueueWriteImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data()); } is >> vl._hiddenToVisible.x >> vl._hiddenToVisible.y >> vl._visibleToHidden.x >> vl._visibleToHidden.y >> vl._reverseRadii.x >> vl._reverseRadii.y; } // Create kernels _activateKernel = cl::Kernel(program.getProgram(), "predActivate"); //_solveHiddenKernel = cl::Kernel(program.getProgram(), "predSolveHidden"); _learnWeightsKernel = cl::Kernel(program.getProgram(), "predLearnWeights"); }
void AgentSwarm::simStep(sys::ComputeSystem &cs, float reward, const cl::Image2D &input, std::mt19937 &rng) { // Feed forward cl_int2 prevLayerSize = _layers.front()._sc.getVisibleLayerDesc(0)._size; cl::Image2D prevLayerState = input; for (int l = 0; l < _layers.size(); l++) { { std::vector<cl::Image2D> visibleStates(2); // Modulate { int argIndex = 0; _modulateKernel.setArg(argIndex++, prevLayerState); _modulateKernel.setArg(argIndex++, _layers[l]._swarm.getVisibleLayer(0)._actionsExploratory); _modulateKernel.setArg(argIndex++, _layers[l]._modulatedFeedForwardInput); _modulateKernel.setArg(argIndex++, _layerDescs[l]._minAttention); cs.getQueue().enqueueNDRangeKernel(_modulateKernel, cl::NullRange, cl::NDRange(prevLayerSize.x, prevLayerSize.y)); } // Modulate { int argIndex = 0; _modulateKernel.setArg(argIndex++, _layers[l]._scHiddenStatesPrev); _modulateKernel.setArg(argIndex++, _layers[l]._swarm.getVisibleLayer(1)._actionsExploratory); _modulateKernel.setArg(argIndex++, _layers[l]._modulatedRecurrentInput); _modulateKernel.setArg(argIndex++, _layerDescs[l]._minAttention); cs.getQueue().enqueueNDRangeKernel(_modulateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y)); } visibleStates[0] = _layers[l]._modulatedFeedForwardInput; visibleStates[1] = _layers[l]._modulatedRecurrentInput; //_layers[l]._sc.activate(cs, visibleStates, _layerDescs[l]._scActiveRatio); _layers[l]._sc.learn(cs, _layers[l]._reward, visibleStates, _layerDescs[l]._scBoostAlpha, _layerDescs[l]._scActiveRatio); } // Get reward /*if (l == 0) { int argIndex = 0; _baseLineUpdateKernel.setArg(argIndex++, _layers[l]._pred.getVisibleLayer(0)._errors); _baseLineUpdateKernel.setArg(argIndex++, _layers[l]._sc.getHiddenStates()[_back]); _baseLineUpdateKernel.setArg(argIndex++, _layers[l]._baseLines[_back]); _baseLineUpdateKernel.setArg(argIndex++, _layers[l]._baseLines[_front]); _baseLineUpdateKernel.setArg(argIndex++, _layers[l]._reward); _baseLineUpdateKernel.setArg(argIndex++, _layerDescs[l]._baseLineDecay); _baseLineUpdateKernel.setArg(argIndex++, _layerDescs[l]._baseLineSensitivity); cs.getQueue().enqueueNDRangeKernel(_baseLineUpdateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y)); } else { int argIndex = 0; _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l - 1]._pred.getVisibleLayer(1)._errors); _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._pred.getVisibleLayer(0)._errors); _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._sc.getHiddenStates()[_back]); _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._baseLines[_back]); _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._baseLines[_front]); _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._reward); _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layerDescs[l]._baseLineDecay); _baseLineUpdateSumErrorKernel.setArg(argIndex++, _layerDescs[l]._baseLineSensitivity); cs.getQueue().enqueueNDRangeKernel(_baseLineUpdateSumErrorKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y)); }*/ prevLayerState = _layers[l]._sc.getHiddenStates()[_back]; prevLayerSize = _layerDescs[l]._hiddenSize; } for (int l = _layers.size() - 1; l >= 0; l--) { std::vector<cl::Image2D> visibleStates; if (l < _layers.size() - 1) { visibleStates.resize(2); visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back]; visibleStates[1] = _layers[l + 1]._pred.getHiddenStates()[_back]; } else { visibleStates.resize(1); visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back]; } //_layers[l]._pred.activate(cs, visibleStates); //if (l == 0) // _layers[l]._pred.propagateError(cs, input); //else // _layers[l]._pred.propagateError(cs, _layers[l - 1]._sc.getHiddenStates()[_back]); } for (int l = _layers.size() - 1; l >= 0; l--) { std::vector<cl::Image2D> visibleStatesPrev; if (l < _layers.size() - 1) { visibleStatesPrev.resize(2); visibleStatesPrev[0] = _layers[l]._scHiddenStatesPrev; visibleStatesPrev[1] = _layers[l + 1]._pred.getHiddenStates()[_front]; } else { visibleStatesPrev.resize(1); visibleStatesPrev[0] = _layers[l]._scHiddenStatesPrev; } if (l == 0) _layers[l]._pred.learn(cs, input, visibleStatesPrev, _layerDescs[l]._predWeightAlpha); else _layers[l]._pred.learn(cs, _layers[l - 1]._sc.getHiddenStates()[_back], visibleStatesPrev, _layerDescs[l]._predWeightAlpha); } // Swarm for (int l = _layers.size() - 1; l >= 0; l--) { std::vector<cl::Image2D> visibleStatesPrev; if (l < _layers.size() - 1) { _layers[l]._swarm.simStep(cs, reward, _layers[l]._sc.getHiddenStates()[_back], _layers[l + 1]._inhibitedAction, _layerDescs[l]._swarmExpPert, _layerDescs[l]._swarmExpBreak, _layerDescs[l]._swarmAnnealingIterations, _layerDescs[l]._swarmActionDeriveAlpha, _layerDescs[l]._swarmQHiddenAlpha, _layerDescs[l]._swarmQAlpha, _layerDescs[l]._swarmPredAlpha, _layerDescs[l]._swarmLambda, _layerDescs[l]._swarmGamma, rng); } else { _layers[l]._swarm.simStep(cs, reward, _layers[l]._sc.getHiddenStates()[_back], _lastLayerAction, _layerDescs[l]._swarmExpPert, _layerDescs[l]._swarmExpBreak, _layerDescs[l]._swarmAnnealingIterations, _layerDescs[l]._swarmActionDeriveAlpha, _layerDescs[l]._swarmQHiddenAlpha, _layerDescs[l]._swarmQAlpha, _layerDescs[l]._swarmPredAlpha, _layerDescs[l]._swarmLambda, _layerDescs[l]._swarmGamma, rng); } // If not first layer, inhibit the action if (l != 0) { int argIndex = 0; _inhibitKernel.setArg(argIndex++, _layers[l]._swarm.getVisibleLayer(2)._actionsExploratory); _inhibitKernel.setArg(argIndex++, _layers[l]._inhibitedAction); _inhibitKernel.setArg(argIndex++, _layerDescs[l - 1]._hiddenSize); _inhibitKernel.setArg(argIndex++, _layerDescs[l - 1]._lateralRadius); _inhibitKernel.setArg(argIndex++, _layerDescs[l - 1]._scActiveRatio); cs.getQueue().enqueueNDRangeKernel(_inhibitKernel, cl::NullRange, cl::NDRange(_layerDescs[l - 1]._hiddenSize.x, _layerDescs[l - 1]._hiddenSize.y)); } } // Buffer updates for (int l = 0; l < _layers.size(); l++) { cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 }; cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y, 1 }; cs.getQueue().enqueueCopyImage(_layers[l]._sc.getHiddenStates()[_back], _layers[l]._scHiddenStatesPrev, zeroOrigin, zeroOrigin, layerRegion); std::swap(_layers[l]._baseLines[_front], _layers[l]._baseLines[_back]); } }