Example #1
0
DoubleBuffer2D neo::createDoubleBuffer2D(sys::ComputeSystem &cs, cl_int2 size) {
	DoubleBuffer2D db;
	
	db[_front] = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), size.x, size.y);
	db[_back] = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), size.x, size.y);

	return db;
}
Example #2
0
void Predictor::activate(sys::ComputeSystem &cs, const std::vector<cl::Image2D> &visibleStates, NonlinearityType nonlinearityType, bool bufferSwap) {
	// Start by clearing summation buffer
	{
		cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

		cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
		cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 };

		cs.getQueue().enqueueFillImage(_hiddenSummationTemp[_back], zeroColor, zeroOrigin, hiddenRegion);
	}

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		int argIndex = 0;

		_activateKernel.setArg(argIndex++, visibleStates[vli]);
		_activateKernel.setArg(argIndex++, _hiddenSummationTemp[_back]);
		_activateKernel.setArg(argIndex++, _hiddenSummationTemp[_front]);
		_activateKernel.setArg(argIndex++, vl._weights[_back]);
		_activateKernel.setArg(argIndex++, vld._size);
		_activateKernel.setArg(argIndex++, vl._hiddenToVisible);
		_activateKernel.setArg(argIndex++, vld._radius);

		cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));

		// Swap buffers
		std::swap(_hiddenSummationTemp[_front], _hiddenSummationTemp[_back]);
	}

	if (nonlinearityType == _binary) {
		int argIndex = 0;

		_solveHiddenBinaryKernel.setArg(argIndex++, _hiddenSummationTemp[_back]);
		_solveHiddenBinaryKernel.setArg(argIndex++, _hiddenStates[_front]);
	
		cs.getQueue().enqueueNDRangeKernel(_solveHiddenBinaryKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
	}
	else if (nonlinearityType == _tanH) {
		int argIndex = 0;

		_solveHiddenTanHKernel.setArg(argIndex++, _hiddenSummationTemp[_back]);
		_solveHiddenTanHKernel.setArg(argIndex++, _hiddenStates[_front]);

		cs.getQueue().enqueueNDRangeKernel(_solveHiddenTanHKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
	}
	else
		cs.getQueue().enqueueCopyImage(_hiddenSummationTemp[_back], _hiddenStates[_front], { 0, 0, 0 }, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 });

	// Swap hidden state buffers
	std::swap(_hiddenStates[_front], _hiddenStates[_back]);
}
Example #3
0
void PredictorSwarm::activate(sys::ComputeSystem &cs, const cl::Image2D &targets, const std::vector<cl::Image2D> &visibleStates, const std::vector<cl::Image2D> &visibleStatesPrev, float activeRatio, int inhibitionRadius, float noise, std::mt19937 &rng) {
	// Start by clearing summation buffer
	{
		cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

		cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
		cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 };

		//cs.getQueue().enqueueCopyImage(_hiddenBiases[_back], _hiddenSummationTemp[_back], zeroOrigin, zeroOrigin, hiddenRegion);
		cs.getQueue().enqueueFillImage(_hiddenSummationTemp[_back], cl_float4{ 0.0f, 0.0f, 0.0f, 0.0f }, zeroOrigin, hiddenRegion);
	}

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		int argIndex = 0;

		_activateKernel.setArg(argIndex++, visibleStates[vli]);
		_activateKernel.setArg(argIndex++, _hiddenSummationTemp[_back]);
		_activateKernel.setArg(argIndex++, _hiddenSummationTemp[_front]);
		_activateKernel.setArg(argIndex++, vl._weights[_back]);
		_activateKernel.setArg(argIndex++, vld._size);
		_activateKernel.setArg(argIndex++, vl._hiddenToVisible);
		_activateKernel.setArg(argIndex++, vld._radius);

		cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));

		// Swap buffers
		std::swap(_hiddenSummationTemp[_front], _hiddenSummationTemp[_back]);
	}

	{
		std::uniform_int_distribution<int> seedDist(0, 999);

		cl_uint2 seed = { seedDist(rng), seedDist(rng) };

		int argIndex = 0;

		_solveHiddenKernel.setArg(argIndex++, _hiddenSummationTemp[_back]);
		_solveHiddenKernel.setArg(argIndex++, _hiddenStates[_front]);
		_solveHiddenKernel.setArg(argIndex++, _hiddenActivations[_front]);
		_solveHiddenKernel.setArg(argIndex++, _hiddenSize);
		_solveHiddenKernel.setArg(argIndex++, inhibitionRadius);
		_solveHiddenKernel.setArg(argIndex++, activeRatio);

		cs.getQueue().enqueueNDRangeKernel(_solveHiddenKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
	}

	// Swap hidden state buffers
	std::swap(_hiddenStates[_front], _hiddenStates[_back]);
	std::swap(_hiddenActivations[_front], _hiddenActivations[_back]);
}
Example #4
0
void Predictor::writeToStream(sys::ComputeSystem &cs, std::ostream &os) const {
	abort(); // Not yet working

	os << _hiddenSize.x << " " << _hiddenSize.y << std::endl;

	{
		std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y);

		cs.getQueue().enqueueReadImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data());

		for (int si = 0; si < hiddenStates.size(); si++)
			os << hiddenStates[si] << " ";

		os << std::endl;
	}

	// Layer information
	os << _visibleLayers.size() << std::endl;

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		const VisibleLayer &vl = _visibleLayers[vli];
		const VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		// Desc
		os << vld._size.x << " " << vld._size.y << " " << vld._radius << std::endl;

		// Layer
		int weightDiam = vld._radius * 2 + 1;

		int numWeights = weightDiam * weightDiam;

		cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights };

		int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z;

		{
			std::vector<cl_float> weights(totalNumWeights);

			cs.getQueue().enqueueReadImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data());

			for (int wi = 0; wi < weights.size(); wi++)
				os << weights[wi] << " ";
		}

		os << std::endl;

		os << vl._hiddenToVisible.x << " " << vl._hiddenToVisible.y << " " << vl._visibleToHidden.x << " " << vl._visibleToHidden.y << " " << vl._reverseRadii.x << " " << vl._reverseRadii.y << std::endl;
	}
}
Example #5
0
void Predictor::learn(sys::ComputeSystem &cs, float tdError, const cl::Image2D &targets, std::vector<cl::Image2D> &visibleStatesPrev, float weightAlpha, float weightLambda) {
	// Learn weights
	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		int argIndex = 0;

		_learnWeightsTracesKernel.setArg(argIndex++, visibleStatesPrev[vli]);
		_learnWeightsTracesKernel.setArg(argIndex++, targets);
		_learnWeightsTracesKernel.setArg(argIndex++, _hiddenStates[_front]);
		_learnWeightsTracesKernel.setArg(argIndex++, vl._weights[_back]);
		_learnWeightsTracesKernel.setArg(argIndex++, vl._weights[_front]);
		_learnWeightsTracesKernel.setArg(argIndex++, vld._size);
		_learnWeightsTracesKernel.setArg(argIndex++, vl._hiddenToVisible);
		_learnWeightsTracesKernel.setArg(argIndex++, vld._radius);
		_learnWeightsTracesKernel.setArg(argIndex++, weightAlpha);
		_learnWeightsTracesKernel.setArg(argIndex++, weightLambda);
		_learnWeightsTracesKernel.setArg(argIndex++, tdError);

		cs.getQueue().enqueueNDRangeKernel(_learnWeightsTracesKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));

		std::swap(vl._weights[_front], vl._weights[_back]);
	}
}
Example #6
0
void HEInet::learnPrediction(sys::ComputeSystem &cs, const cl::Image2D &inputImage, float alpha) {
	cl_float2 eFeedForwardDimsToEDims = { static_cast<float>(_eiLayers.front().getConfig()._eWidth + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardWidth + 1), static_cast<float>(_eiLayers.front().getConfig()._eHeight + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardHeight + 1) };
	cl_float2 eFeedForwardDimsToIDims = { static_cast<float>(_eiLayers.front().getConfig()._iWidth + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardWidth + 1), static_cast<float>(_eiLayers.front().getConfig()._iHeight + 1) / static_cast<float>(_eiLayers.front().getConfig()._eFeedForwardHeight + 1) };

	cl_int2 eDims = { _eiLayers.front().getConfig()._eWidth, _eiLayers.front().getConfig()._eHeight };
	cl_int2 iDims = { _eiLayers.front().getConfig()._iWidth, _eiLayers.front().getConfig()._iHeight };

	int index = 0;

	_kernels->_predictionLearnKernel.setArg(index++, _eSpikeSumsIterPrev);
	_kernels->_predictionLearnKernel.setArg(index++, _iSpikeSumsIterPrev);
	_kernels->_predictionLearnKernel.setArg(index++, inputImage);
	_kernels->_predictionLearnKernel.setArg(index++, _predictionPrev);
	_kernels->_predictionLearnKernel.setArg(index++, _predictionFromEWeights._weightsPrev);
	_kernels->_predictionLearnKernel.setArg(index++, _predictionFromIWeights._weightsPrev);
	_kernels->_predictionLearnKernel.setArg(index++, _predictionFromEWeights._weights);
	_kernels->_predictionLearnKernel.setArg(index++, _predictionFromIWeights._weights);

	_kernels->_predictionLearnKernel.setArg(index++, eFeedForwardDimsToEDims);
	_kernels->_predictionLearnKernel.setArg(index++, eFeedForwardDimsToIDims);
	_kernels->_predictionLearnKernel.setArg(index++, eDims);
	_kernels->_predictionLearnKernel.setArg(index++, iDims);
	_kernels->_predictionLearnKernel.setArg(index++, _predictionRadiusFromE);
	_kernels->_predictionLearnKernel.setArg(index++, _predictionRadiusFromI);
	_kernels->_predictionLearnKernel.setArg(index++, alpha);

	cs.getQueue().enqueueNDRangeKernel(_kernels->_predictionLearnKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._eFeedForwardWidth, _eiLayers.front().getConfig()._eFeedForwardHeight));
}
Example #7
0
void HEInet::update(sys::ComputeSystem &cs, const cl::Image2D &inputFrequencyImage, const cl::Image2D &zeroImage, float eta, float shDecay, float saDecay) {
	// Update input spikes
	int index = 0;

	_kernels->_updateInputSpikesKernel.setArg(index++, inputFrequencyImage);
	_kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikeTimersPrev);
	_kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikesHistoryPrev);
	_kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikeTimers);
	_kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikes);
	_kernels->_updateInputSpikesKernel.setArg(index++, _inputSpikesHistory);
	_kernels->_updateInputSpikesKernel.setArg(index++, shDecay);

	cs.getQueue().enqueueNDRangeKernel(_kernels->_updateInputSpikesKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._eFeedForwardWidth, _eiLayers.front().getConfig()._eFeedForwardHeight));

	const cl::Image2D* pLayerInput = &_inputSpikesPrev;

	// Feed forward
	for (int li = 0; li < _eiLayers.size(); li++) {
		_eiLayers[li].eActivate(cs, *pLayerInput, eta, shDecay, saDecay);

		pLayerInput = &_eiLayers[li]._eLayer._statesPrev;
	}

	pLayerInput = &zeroImage;

	// Feed back
	for (int li = _eiLayers.size() - 1; li >= 0; li--) {
		_eiLayers[li].iActivate(cs, *pLayerInput, eta, shDecay, saDecay);

		pLayerInput = &_eiLayers[li]._iLayer._statesPrev;
	}
}
Example #8
0
void PredictorSwarm::learn(sys::ComputeSystem &cs, float reward, float gamma, const cl::Image2D &targets, std::vector<cl::Image2D> &visibleStatesPrev, cl_float2 weightAlpha, cl_float2 weightLambda, cl_float biasAlpha, cl_float activeRatio, float noise) {
	// Learn weights
	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		int argIndex = 0;

		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, visibleStatesPrev[vli]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, targets);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, _hiddenStates[_back]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, _hiddenActivations[_front]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, _hiddenStates[_front]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._weights[_back]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._weights[_front]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._qTraces[_back]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._qTraces[_front]);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, vld._size);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, vl._hiddenToVisible);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, vld._radius);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, weightAlpha);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, weightLambda);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, reward);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, gamma);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, activeRatio);
		_learnWeightsTracesInhibitedKernel.setArg(argIndex++, noise);

		cs.getQueue().enqueueNDRangeKernel(_learnWeightsTracesInhibitedKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));

		std::swap(vl._weights[_front], vl._weights[_back]);
		std::swap(vl._qTraces[_front], vl._qTraces[_back]);
	}
}
Example #9
0
void HEInet::sumSpikes(sys::ComputeSystem &cs, float scalar) {
	int index = 0;

	_kernels->_sumSpikesKernel.setArg(index++, _eiLayers.front()._eLayer._states);
	_kernels->_sumSpikesKernel.setArg(index++, _eSpikeSumsPrev);
	_kernels->_sumSpikesKernel.setArg(index++, _eSpikeSums);
	_kernels->_sumSpikesKernel.setArg(index++, scalar);

	cs.getQueue().enqueueNDRangeKernel(_kernels->_sumSpikesKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._eWidth, _eiLayers.front().getConfig()._eHeight));

	index = 0;

	_kernels->_sumSpikesKernel.setArg(index++, _eiLayers.front()._iLayer._states);
	_kernels->_sumSpikesKernel.setArg(index++, _iSpikeSumsPrev);
	_kernels->_sumSpikesKernel.setArg(index++, _iSpikeSums);
	_kernels->_sumSpikesKernel.setArg(index++, scalar);

	cs.getQueue().enqueueNDRangeKernel(_kernels->_sumSpikesKernel, cl::NullRange, cl::NDRange(_eiLayers.front().getConfig()._iWidth, _eiLayers.front().getConfig()._iHeight));
}
Example #10
0
void AgentSwarm::clearMemory(sys::ComputeSystem &cs) {
    cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };
    cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };

    for (int l = 0; l < _layers.size(); l++) {
        cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y, 1 };

        cs.getQueue().enqueueFillImage(_layers[l]._scHiddenStatesPrev, zeroColor, zeroOrigin, layerRegion);
    }
}
Example #11
0
void HEInet::setInputPhase(sys::ComputeSystem &cs, cl_uint4 color) {
	cl::size_t<3> zeroCoord;
	zeroCoord[0] = zeroCoord[1] = zeroCoord[2] = 0;

	cl::size_t<3> eFeedForwardDimsCoord;
	eFeedForwardDimsCoord[0] = _eiLayers.front().getConfig()._eFeedForwardWidth;
	eFeedForwardDimsCoord[1] = _eiLayers.front().getConfig()._eFeedForwardHeight;
	eFeedForwardDimsCoord[2] = 1;

	cs.getQueue().enqueueFillImage(_inputSpikeTimersPrev, color, zeroCoord, eFeedForwardDimsCoord);
}
Example #12
0
void HEInet::setInputPhase(sys::ComputeSystem &cs, const cl::Image2D &inputPhaseImage) {
	cl::size_t<3> zeroCoord;
	zeroCoord[0] = zeroCoord[1] = zeroCoord[2] = 0;

	cl::size_t<3> eFeedForwardDimsCoord;
	eFeedForwardDimsCoord[0] = _eiLayers.front().getConfig()._eFeedForwardWidth;
	eFeedForwardDimsCoord[1] = _eiLayers.front().getConfig()._eFeedForwardHeight;
	eFeedForwardDimsCoord[2] = 1;

	cs.getQueue().enqueueCopyImage(inputPhaseImage, _inputSpikeTimersPrev, zeroCoord, zeroCoord, eFeedForwardDimsCoord);
}
Example #13
0
void HEInet::spikeSumBegin(sys::ComputeSystem &cs) {
	cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

	cl::size_t<3> zeroCoord;
	zeroCoord[0] = zeroCoord[1] = zeroCoord[2] = 0;

	cl::size_t<3> eDims;
	eDims[0] = _eiLayers.front().getConfig()._eWidth;
	eDims[1] = _eiLayers.front().getConfig()._eHeight;
	eDims[2] = 1;

	cl::size_t<3> iDims;
	iDims[0] = _eiLayers.front().getConfig()._iWidth;
	iDims[1] = _eiLayers.front().getConfig()._iHeight;
	iDims[2] = 1;

	cs.getQueue().enqueueFillImage(_eSpikeSums, zeroColor, zeroCoord, eDims);
	cs.getQueue().enqueueFillImage(_eSpikeSumsPrev, zeroColor, zeroCoord, eDims);
	cs.getQueue().enqueueFillImage(_iSpikeSums, zeroColor, zeroCoord, iDims);
	cs.getQueue().enqueueFillImage(_iSpikeSumsPrev, zeroColor, zeroCoord, iDims);
}
Example #14
0
void neo::randomUniform(cl::Image3D &image3D, sys::ComputeSystem &cs, cl::Kernel &randomUniform3DKernel, cl_int3 size, cl_float2 range, std::mt19937 &rng) {
	int argIndex = 0;

	std::uniform_int_distribution<int> seedDist;

	cl_uint2 seed = { seedDist(rng), seedDist(rng) };

	randomUniform3DKernel.setArg(argIndex++, image3D);
	randomUniform3DKernel.setArg(argIndex++, seed);
	randomUniform3DKernel.setArg(argIndex++, range);

	cs.getQueue().enqueueNDRangeKernel(randomUniform3DKernel, cl::NullRange, cl::NDRange(size.x, size.y, size.z));
}
Example #15
0
void HTFE::clearMemory(sys::ComputeSystem &cs) {
    // ------------------------------------------------------------------------------
    // -------------------------------- Clear Memory --------------------------------
    // ------------------------------------------------------------------------------

    cl_uint4 clear = { 0, 0, 0, 0 };

    for (int l = 0; l < _layers.size(); l++) {
        cl::size_t<3> origin;
        origin[0] = 0;
        origin[1] = 0;
        origin[2] = 0;

        cl::size_t<3> region;
        region[0] = _layerDescs[l]._width;
        region[1] = _layerDescs[l]._height;
        region[2] = 1;

        cs.getQueue().enqueueFillImage(_layers[l]._hiddenStatesFeedBackPrevPrev, clear, origin, region);
        cs.getQueue().enqueueFillImage(_layers[l]._hiddenStatesFeedBackPrev, clear, origin, region);
        cs.getQueue().enqueueFillImage(_layers[l]._hiddenStatesFeedBack, clear, origin, region);
    }
}
void ComparisonSparseCoder::reconstruct(sys::ComputeSystem &cs, const cl::Image2D &hiddenStates, int visibleLayerIndex, cl::Image2D &visibleStates) {
	VisibleLayer &vl = _visibleLayers[visibleLayerIndex];
	VisibleLayerDesc &vld = _visibleLayerDescs[visibleLayerIndex];

	int argIndex = 0;

	_forwardKernel.setArg(argIndex++, hiddenStates);
	_forwardKernel.setArg(argIndex++, visibleStates);
	_forwardKernel.setArg(argIndex++, vl._weights[_back]);
	_forwardKernel.setArg(argIndex++, vld._size);
	_forwardKernel.setArg(argIndex++, _hiddenSize);
	_forwardKernel.setArg(argIndex++, vl._visibleToHidden);
	_forwardKernel.setArg(argIndex++, vl._hiddenToVisible);
	_forwardKernel.setArg(argIndex++, vld._radius);
	_forwardKernel.setArg(argIndex++, vl._reverseRadii);

	cs.getQueue().enqueueNDRangeKernel(_forwardKernel, cl::NullRange, cl::NDRange(vld._size.x, vld._size.y));
}
Example #17
0
void HTFE::activate(sys::ComputeSystem &cs) {
    {
        cl::size_t<3> origin;
        origin[0] = 0;
        origin[1] = 0;
        origin[2] = 0;

        cl::size_t<3> region;
        region[0] = _inputWidth;
        region[1] = _inputHeight;
        region[2] = 1;

        cs.getQueue().enqueueWriteImage(_inputImage, CL_TRUE, origin, region, 0, 0, _input.data());
    }

    std::uniform_int_distribution<int> seedDist(0, 99999);

    // ------------------------------------------------------------------------------
    // ------------------------------------ Go up -----------------------------------
    // ------------------------------------------------------------------------------

    cl::Image2D* pPrevLayer = &_inputImage;
    int prevWidth = _inputWidth;
    int prevHeight = _inputHeight;

    for (int l = 0; l < _layers.size(); l++) {
        float localActivity = std::round(_layerDescs[l]._sparsity * std::pow(2 * _layerDescs[l]._inhibitionRadius + 1, 2));

        Int2 layerSize;
        layerSize._x = _layerDescs[l]._width;
        layerSize._y = _layerDescs[l]._height;

        Int2 layerSizeMinusOne;
        layerSizeMinusOne._x = _layerDescs[l]._width - 1;
        layerSizeMinusOne._y = _layerDescs[l]._height - 1;

        Float2 layerSizeMinusOneInv;
        layerSizeMinusOneInv._x = 1.0f / (_layerDescs[l]._width - 1);
        layerSizeMinusOneInv._y = 1.0f / (_layerDescs[l]._height - 1);

        Int2 inputSize;
        inputSize._x = prevWidth;
        inputSize._y = prevHeight;

        Int2 inputSizeMinusOne;
        inputSizeMinusOne._x = prevWidth - 1;
        inputSizeMinusOne._y = prevHeight - 1;

        Float2 inputSizeMinusOneInv;
        inputSizeMinusOneInv._x = 1.0f / (prevWidth - 1);
        inputSizeMinusOneInv._y = 1.0f / (prevHeight - 1);

        // -------------------------------- Activate --------------------------------

        int index = 0;

        _layerHiddenFeedForwardActivateKernel.setArg(index++, *pPrevLayer);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._feedForwardWeightsPrev);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._lateralWeightsPrev);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._hiddenBiasesPrev);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, layerSize);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, layerSizeMinusOneInv);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, inputSize);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, inputSizeMinusOne);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, _layerDescs[l]._receptiveFieldRadius);
        _layerHiddenFeedForwardActivateKernel.setArg(index++, _layerDescs[l]._lateralConnectionRadius);

        cs.getQueue().enqueueNDRangeKernel(_layerHiddenFeedForwardActivateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height));

        // ---------------------------------- Inhibit ---------------------------------

        index = 0;

        _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations);
        _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedForwardPrev);
        _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedForward);
        _layerHiddenInhibitKernel.setArg(index++, layerSize);
        _layerHiddenInhibitKernel.setArg(index++, _layerDescs[l]._inhibitionRadius);
        _layerHiddenInhibitKernel.setArg(index++, localActivity);

        cs.getQueue().enqueueNDRangeKernel(_layerHiddenInhibitKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height));

        pPrevLayer = &_layers[l]._hiddenStatesFeedForward;
        prevWidth = _layerDescs[l]._width;
        prevHeight = _layerDescs[l]._height;
    }

    // ------------------------------------------------------------------------------
    // -------------------------------- Go back down --------------------------------
    // ------------------------------------------------------------------------------

    for (int l = _layers.size() - 1; l >= 0; l--) {
        if (l > 0) {
            pPrevLayer = &_layers[l - 1]._hiddenStatesFeedForward;
            prevWidth = _layerDescs[l - 1]._width;
            prevHeight = _layerDescs[l - 1]._height;
        }
        else {
            pPrevLayer = &_inputImage;
            prevWidth = _inputWidth;
            prevHeight = _inputHeight;
        }

        float localActivity = std::round(_layerDescs[l]._sparsity * std::pow(2 * _layerDescs[l]._inhibitionRadius + 1, 2));

        Int2 layerSize;
        layerSize._x = _layerDescs[l]._width;
        layerSize._y = _layerDescs[l]._height;

        Int2 layerSizeMinusOne;
        layerSizeMinusOne._x = _layerDescs[l]._width - 1;
        layerSizeMinusOne._y = _layerDescs[l]._height - 1;

        Float2 layerSizeMinusOneInv;
        layerSizeMinusOneInv._x = 1.0f / (_layerDescs[l]._width - 1);
        layerSizeMinusOneInv._y = 1.0f / (_layerDescs[l]._height - 1);

        Int2 inputSize;
        inputSize._x = prevWidth;
        inputSize._y = prevHeight;

        Int2 inputSizeMinusOne;
        inputSizeMinusOne._x = prevWidth - 1;
        inputSizeMinusOne._y = prevHeight - 1;

        Float2 inputSizeMinusOneInv;
        inputSizeMinusOneInv._x = 1.0f / (prevWidth - 1);
        inputSizeMinusOneInv._y = 1.0f / (prevHeight - 1);

        Int2 nextSize;
        Int2 nextSizeMinusOne;

        if (l == _layers.size() - 1) {
            nextSize._x = nextSize._y = 1;
            nextSizeMinusOne._x = nextSizeMinusOne._y = 0;
        }
        else {
            nextSize._x = _layerDescs[l + 1]._width;
            nextSize._y = _layerDescs[l + 1]._height;
            nextSizeMinusOne._x = _layerDescs[l + 1]._width - 1;
            nextSizeMinusOne._y = _layerDescs[l + 1]._height - 1;
        }

        // -------------------------------- Activate --------------------------------

        int index = 0;

        if (l == _layers.size() - 1) {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = _layerDescs[l]._width;
            region[1] = _layerDescs[l]._height;
            region[2] = 1;

            cs.getQueue().enqueueCopyImage(_layers[l]._hiddenFeedForwardActivations, _layers[l]._hiddenFeedBackActivations, origin, origin, region);
        }
        else {
            _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations);
            _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l + 1]._hiddenFeedBackActivations);
            _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l]._feedBackWeightsPrev);
            _layerHiddenFeedBackActivateKernel.setArg(index++, _layers[l]._hiddenFeedBackActivations);
            _layerHiddenFeedBackActivateKernel.setArg(index++, layerSize);
            _layerHiddenFeedBackActivateKernel.setArg(index++, layerSizeMinusOneInv);
            _layerHiddenFeedBackActivateKernel.setArg(index++, nextSize);
            _layerHiddenFeedBackActivateKernel.setArg(index++, nextSizeMinusOne);
            _layerHiddenFeedBackActivateKernel.setArg(index++, _layerDescs[l]._feedBackConnectionRadius);

            cs.getQueue().enqueueNDRangeKernel(_layerHiddenFeedBackActivateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height));
        }

        // ---------------------------------- Inhibit ---------------------------------

        index = 0;

        _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenFeedBackActivations);
        _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev);
        _layerHiddenInhibitKernel.setArg(index++, _layers[l]._hiddenStatesFeedBack);
        _layerHiddenInhibitKernel.setArg(index++, layerSize);
        _layerHiddenInhibitKernel.setArg(index++, _layerDescs[l]._inhibitionRadius);
        _layerHiddenInhibitKernel.setArg(index++, localActivity);

        cs.getQueue().enqueueNDRangeKernel(_layerHiddenInhibitKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height));

        // --------------------- Make Predictions (Reconstruction) ---------------------

        index = 0;

        _layerVisibleReconstructKernel.setArg(index++, _layers[l]._hiddenStatesFeedBack);
        _layerVisibleReconstructKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev);
        _layerVisibleReconstructKernel.setArg(index++, _layers[l]._visibleBiasesPrev);
        _layerVisibleReconstructKernel.setArg(index++, _layers[l]._visibleReconstruction);
        _layerVisibleReconstructKernel.setArg(index++, _layerDescs[l]._reconstructionRadius);
        _layerVisibleReconstructKernel.setArg(index++, inputSizeMinusOne);
        _layerVisibleReconstructKernel.setArg(index++, inputSizeMinusOneInv);
        _layerVisibleReconstructKernel.setArg(index++, layerSize);
        _layerVisibleReconstructKernel.setArg(index++, layerSizeMinusOne);
        _layerVisibleReconstructKernel.setArg(index++, layerSizeMinusOneInv);

        cs.getQueue().enqueueNDRangeKernel(_layerVisibleReconstructKernel, cl::NullRange, cl::NDRange(prevWidth, prevHeight));
    }

    {
        cl::size_t<3> origin;
        origin[0] = 0;
        origin[1] = 0;
        origin[2] = 0;

        cl::size_t<3> region;
        region[0] = _inputWidth;
        region[1] = _inputHeight;
        region[2] = 1;

        cs.getQueue().enqueueReadImage(_layers.front()._visibleReconstruction, CL_TRUE, origin, region, 0, 0, _prediction.data());
    }
}
Example #18
0
void HTFE::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program, int inputWidth, int inputHeight, const std::vector<LayerDesc> &layerDescs, float minInitWeight, float maxInitWeight) {
    std::mt19937 generator(time(nullptr));

    std::uniform_int_distribution<int> seedDist(0, 99999);

    _inputWidth = inputWidth;
    _inputHeight = inputHeight;

    _layerDescs = layerDescs;

    _layers.resize(_layerDescs.size());

    cl::Kernel initializeLayerHiddenKernel = cl::Kernel(program.getProgram(), "initializeLayerHidden");
    cl::Kernel initializeLayerVisibleKernel = cl::Kernel(program.getProgram(), "initializeLayerVisible");

    _input.clear();
    _input.resize(_inputWidth * _inputHeight, 0.0f);

    _prediction.clear();
    _prediction.resize(_inputWidth * _inputHeight, 0.0f);

    _inputImage = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight);
    _inputImagePrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _inputWidth, _inputHeight);

    {
        cl_uint4 clear = { 0, 0, 0, 0 };

        cl::size_t<3> origin;
        origin[0] = 0;
        origin[1] = 0;
        origin[2] = 0;

        cl::size_t<3> region;
        region[0] = _inputWidth;
        region[1] = _inputHeight;
        region[2] = 1;

        cs.getQueue().enqueueFillImage(_inputImage, clear, origin, region);
        cs.getQueue().enqueueFillImage(_inputImagePrev, clear, origin, region);
    }

    int prevWidth = _inputWidth;
    int prevHeight = _inputHeight;

    for (int l = 0; l < _layers.size(); l++) {
        int numFeedForwardWeights = std::pow(_layerDescs[l]._receptiveFieldRadius * 2 + 1, 2);
        int numReconstructionWeights = std::pow(_layerDescs[l]._reconstructionRadius * 2 + 1, 2);
        int numLateralWeights = std::pow(_layerDescs[l]._lateralConnectionRadius * 2 + 1, 2);
        int numFeedBackWeights = std::pow(_layerDescs[l]._feedBackConnectionRadius * 2 + 1, 2);

        _layers[l]._hiddenFeedForwardActivations = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);

        _layers[l]._hiddenFeedBackActivations = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);
        _layers[l]._hiddenFeedBackActivationsPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);

        _layers[l]._hiddenStatesFeedForward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);
        _layers[l]._hiddenStatesFeedForwardPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);

        _layers[l]._hiddenStatesFeedBack = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);
        _layers[l]._hiddenStatesFeedBackPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);
        _layers[l]._hiddenStatesFeedBackPrevPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);

        _layers[l]._feedForwardWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedForwardWeights);
        _layers[l]._feedForwardWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedForwardWeights);

        _layers[l]._reconstructionWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight, numReconstructionWeights);
        _layers[l]._reconstructionWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight, numReconstructionWeights);

        _layers[l]._visibleBiases = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight);
        _layers[l]._visibleBiasesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight);

        _layers[l]._hiddenBiases = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);
        _layers[l]._hiddenBiasesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height);

        _layers[l]._lateralWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numLateralWeights);
        _layers[l]._lateralWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numLateralWeights);

        _layers[l]._feedBackWeights = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedBackWeights);
        _layers[l]._feedBackWeightsPrev = cl::Image3D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._width, _layerDescs[l]._height, numFeedBackWeights);

        _layers[l]._visibleReconstruction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight);
        _layers[l]._visibleReconstructionPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevWidth, prevHeight);

        // Initialize
        Uint2 initSeedHidden;
        initSeedHidden._x = seedDist(generator);
        initSeedHidden._y = seedDist(generator);

        int index = 0;

        initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenFeedForwardActivations);
        initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenFeedBackActivations);
        initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenStatesFeedForward);
        initializeLayerHiddenKernel.setArg(index++, _layers[l]._feedForwardWeights);
        initializeLayerHiddenKernel.setArg(index++, _layers[l]._hiddenBiases);
        initializeLayerHiddenKernel.setArg(index++, _layers[l]._lateralWeights);
        initializeLayerHiddenKernel.setArg(index++, _layers[l]._feedBackWeights);
        initializeLayerHiddenKernel.setArg(index++, numFeedForwardWeights);
        initializeLayerHiddenKernel.setArg(index++, numLateralWeights);
        initializeLayerHiddenKernel.setArg(index++, numFeedBackWeights);
        initializeLayerHiddenKernel.setArg(index++, initSeedHidden);
        initializeLayerHiddenKernel.setArg(index++, _layerDescs[l]._sparsity);
        initializeLayerHiddenKernel.setArg(index++, _layerDescs[l]._lateralScalar);
        initializeLayerHiddenKernel.setArg(index++, _layerDescs[l]._feedBackScalar);
        initializeLayerHiddenKernel.setArg(index++, minInitWeight);
        initializeLayerHiddenKernel.setArg(index++, maxInitWeight);

        cs.getQueue().enqueueNDRangeKernel(initializeLayerHiddenKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height));

        Uint2 initSeedVisible;
        initSeedVisible._x = seedDist(generator);
        initSeedVisible._y = seedDist(generator);

        index = 0;

        initializeLayerVisibleKernel.setArg(index++, _layers[l]._visibleBiases);
        initializeLayerVisibleKernel.setArg(index++, _layers[l]._visibleReconstruction);
        initializeLayerVisibleKernel.setArg(index++, _layers[l]._reconstructionWeights);
        initializeLayerVisibleKernel.setArg(index++, numReconstructionWeights);
        initializeLayerVisibleKernel.setArg(index++, initSeedVisible);
        initializeLayerVisibleKernel.setArg(index++, minInitWeight);
        initializeLayerVisibleKernel.setArg(index++, maxInitWeight);

        cs.getQueue().enqueueNDRangeKernel(initializeLayerVisibleKernel, cl::NullRange, cl::NDRange(prevWidth, prevHeight));

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = _layerDescs[l]._width;
            region[1] = _layerDescs[l]._height;
            region[2] = 1;

            cs.getQueue().enqueueCopyImage(_layers[l]._hiddenFeedBackActivations, _layers[l]._hiddenFeedBackActivationsPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = prevWidth;
            region[1] = prevHeight;
            region[2] = 1;

            cs.getQueue().enqueueCopyImage(_layers[l]._visibleReconstruction, _layers[l]._visibleReconstructionPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = _layerDescs[l]._width;
            region[1] = _layerDescs[l]._height;
            region[2] = 1;

            cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedForwardPrev, origin, origin, region);
            cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedBack, origin, origin, region);
            cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedBackPrev, origin, origin, region);
            cs.getQueue().enqueueCopyImage(_layers[l]._hiddenStatesFeedForward, _layers[l]._hiddenStatesFeedBackPrevPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = _layerDescs[l]._width;
            region[1] = _layerDescs[l]._height;
            region[2] = numFeedForwardWeights;

            cs.getQueue().enqueueCopyImage(_layers[l]._feedForwardWeights, _layers[l]._feedForwardWeightsPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = prevWidth;
            region[1] = prevHeight;
            region[2] = 1;

            cs.getQueue().enqueueCopyImage(_layers[l]._visibleBiases, _layers[l]._visibleBiasesPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = _layerDescs[l]._width;
            region[1] = _layerDescs[l]._height;
            region[2] = 1;

            cs.getQueue().enqueueCopyImage(_layers[l]._hiddenBiases, _layers[l]._hiddenBiasesPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = _layerDescs[l]._width;
            region[1] = _layerDescs[l]._height;
            region[2] = numLateralWeights;

            cs.getQueue().enqueueCopyImage(_layers[l]._lateralWeights, _layers[l]._lateralWeightsPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = _layerDescs[l]._width;
            region[1] = _layerDescs[l]._height;
            region[2] = numFeedBackWeights;

            cs.getQueue().enqueueCopyImage(_layers[l]._feedBackWeights, _layers[l]._feedBackWeightsPrev, origin, origin, region);
        }

        {
            cl::size_t<3> origin;
            origin[0] = 0;
            origin[1] = 0;
            origin[2] = 0;

            cl::size_t<3> region;
            region[0] = prevWidth;
            region[1] = prevHeight;
            region[2] = numReconstructionWeights;

            cs.getQueue().enqueueCopyImage(_layers[l]._reconstructionWeights, _layers[l]._reconstructionWeightsPrev, origin, origin, region);
        }

        prevWidth = _layerDescs[l]._width;
        prevHeight = _layerDescs[l]._height;
    }

    _layerHiddenFeedForwardActivateKernel = cl::Kernel(program.getProgram(), "layerHiddenFeedForwardActivate");
    _layerHiddenFeedBackActivateKernel = cl::Kernel(program.getProgram(), "layerHiddenFeedBackActivate");
    _layerHiddenInhibitKernel = cl::Kernel(program.getProgram(), "layerHiddenInhibit");
    _layerVisibleReconstructKernel = cl::Kernel(program.getProgram(), "layerVisibleReconstruct");
    _layerHiddenWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerHiddenWeightUpdate");
    _layerHiddenWeightUpdateLastKernel = cl::Kernel(program.getProgram(), "layerHiddenWeightUpdateLast");
    _layerVisibleWeightUpdateKernel = cl::Kernel(program.getProgram(), "layerVisibleWeightUpdate");
}
Example #19
0
void AgentER::simStep(sys::ComputeSystem &cs, const cl::Image2D &input, const cl::Image2D &actionTaken, float reward, std::mt19937 &rng, bool learn, bool whiten) {
	// Keep previous best action for later
	std::vector<float> prevBestAction(_actionSize.x * _actionSize.y);
	std::vector<float> prevTakenAction(_actionSize.x * _actionSize.y);

	cs.getQueue().enqueueReadImage(getAction(), CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0, prevBestAction.data());
	cs.getQueue().enqueueReadImage(actionTaken, CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0, prevTakenAction.data());

	// Place previous Q into Q buffer
	{
		int argIndex = 0;

		_setQKernel.setArg(argIndex++, _qTransform);
		_setQKernel.setArg(argIndex++, _qInput);
		_setQKernel.setArg(argIndex++, _prevQ);

		cs.getQueue().enqueueNDRangeKernel(_setQKernel, cl::NullRange, cl::NDRange(_qSize.x, _qSize.y));
	}

	// Whiten input
	if (whiten)
		_inputWhitener.filter(cs, input, _whiteningKernelRadius, _whiteningIntensity);

	_actionWhitener.filter(cs, actionTaken, _whiteningKernelRadius, _whiteningIntensity);

	_qWhitener.filter(cs, _qInput, _whiteningKernelRadius, _whiteningIntensity);

	// Feed forward
	for (int l = 0; l < _layers.size(); l++) {
		{
			std::vector<cl::Image2D> visibleStates;

			if (l == 0) {
				visibleStates.resize(3);

				visibleStates[0] = whiten ? _inputWhitener.getResult() : input;
				visibleStates[1] = _actionWhitener.getResult();
				visibleStates[2] = _qWhitener.getResult();
			}
			else {
				visibleStates.resize(2);

				visibleStates[0] = _layers[l - 1]._sc.getHiddenStates()[_back];
				visibleStates[1] = _layers[l]._sc.getHiddenStates()[_back];
			}

			_layers[l]._sc.activate(cs, visibleStates, _layerDescs[l]._scActiveRatio);
		}
	}

	for (int l = _layers.size() - 1; l >= 0; l--) {
		std::vector<cl::Image2D> visibleStates;

		if (l < _layers.size() - 1) {
			visibleStates.resize(2);

			visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back];
			visibleStates[1] = _layers[l + 1]._pred.getHiddenStates()[_back];
		}
		else {
			visibleStates.resize(1);

			visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back];
		}

		//_layers[l]._pred.activate(cs, visibleStates, l != 0);
	}

	// Q predictor
	{
		std::vector<cl::Image2D> visibleStates;

		if (0 < _layers.size() - 1) {
			visibleStates.resize(2);

			visibleStates[0] = _layers[0]._sc.getHiddenStates()[_back];
			visibleStates[1] = _layers[0 + 1]._pred.getHiddenStates()[_back];
		}
		else {
			visibleStates.resize(1);

			visibleStates[0] = _layers[0]._sc.getHiddenStates()[_back];
		}

		//_qPred.activate(cs, visibleStates, false);
	}

	// Recover Q
	std::vector<float> qValues(_qSize.x * _qSize.y);

	cs.getQueue().enqueueReadImage(_qPred.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_qSize.x), static_cast<cl::size_type>(_qSize.y), 1 }, 0, 0, qValues.data());

	// Average all Q values
	float q = 0.0f;

	for (int i = 0; i < qValues.size(); i++)
		q += qValues[i];

	q /= qValues.size();

	// Bellman equation
	float tdError = reward + _qGamma * q - _prevValue;

	float newQ = _prevValue + _qAlpha * tdError;

	// Update older samples
	float g = _qGamma;

	for (std::list<ReplayFrame>::iterator it = _frames.begin(); it != _frames.end(); it++) {
		it->_q += g * tdError;

		g *= _qGamma;
	}

	// Add replay sample
	ReplayFrame frame;

	frame._q = frame._originalQ = newQ;

	frame._layerStateBitIndices.resize(_layers.size());
	frame._layerPredBitIndices.resize(_layers.size());

	for (int l = 0; l < _layers.size(); l++) {
		std::vector<float> state(_layerDescs[l]._size.x * _layerDescs[l]._size.y);

		cs.getQueue().enqueueReadImage(_layers[l]._sc.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l]._size.x), static_cast<cl::size_type>(_layerDescs[l]._size.y), 1 }, 0, 0, state.data());
	
		std::vector<float> pred;
		
		if (l == 0) {
			pred.resize(_actionSize.x * _actionSize.y);

			cs.getQueue().enqueueReadImage(_layers[l]._sc.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0, state.data());
		}
		else {
			pred.resize(_layerDescs[l - 1]._size.x * _layerDescs[l - 1]._size.y);

			cs.getQueue().enqueueReadImage(_layers[l]._sc.getHiddenStates()[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l - 1]._size.x), static_cast<cl::size_type>(_layerDescs[l - 1]._size.y), 1 }, 0, 0, pred.data());
		}

		for (int i = 0; i < state.size(); i++)
			if (state[i] > 0.0f)
				frame._layerStateBitIndices[l].push_back(i);

		for (int i = 0; i < pred.size(); i++)
			if (pred[i] > 0.0f)
				frame._layerPredBitIndices[l].push_back(i);
	}

	// Add last action taken and last "thought best" action
	frame._prevExploratoryAction = prevTakenAction;
	frame._prevBestAction = prevBestAction;

	for (int i = 0; i < prevBestAction.size(); i++)
		frame._prevBestAction[i] = std::min(1.0f, std::max(-1.0f, prevBestAction[i]));

	_frames.push_front(frame);

	while (_frames.size() > _maxReplayFrames)
		_frames.pop_back();

	if (learn && _frames.size() > 1) {
		// Convert list to vector
		std::vector<ReplayFrame*> pFrames(_frames.size());

		int index = 0;

		for (std::list<ReplayFrame>::iterator it = _frames.begin(); it != _frames.end(); it++)
			pFrames[index++] = &(*it);

		std::uniform_int_distribution<int> replayDist(0, _frames.size() - 2);

		for (int iter = 0; iter < _replayIterations; iter++) {
			int randIndex = replayDist(rng);

			ReplayFrame* pFrame = pFrames[randIndex];
			ReplayFrame* pFramePrev = pFrames[randIndex + 1];

			// Load data
			cl_int2 prevLayerSize = _actionSize;

			for (int l = 0; l < _layers.size(); l++) {
				std::vector<float> state(_layerDescs[l]._size.x * _layerDescs[l]._size.y, 0.0f);
				std::vector<float> statePrev(_layerDescs[l]._size.x * _layerDescs[l]._size.y, 0.0f);
				std::vector<float> pred(prevLayerSize.x * prevLayerSize.y, 0.0f);
				std::vector<float> predPrev(prevLayerSize.x * prevLayerSize.y, 0.0f);

				for (int i = 0; i < pFrame->_layerStateBitIndices[l].size(); i++)
					state[pFrame->_layerStateBitIndices[l][i]] = 1.0f;

				for (int i = 0; i < pFramePrev->_layerStateBitIndices[l].size(); i++)
					statePrev[pFramePrev->_layerStateBitIndices[l][i]] = 1.0f;

				for (int i = 0; i < pFrame->_layerPredBitIndices[l].size(); i++)
					pred[pFrame->_layerPredBitIndices[l][i]] = 1.0f;

				for (int i = 0; i < pFramePrev->_layerPredBitIndices[l].size(); i++)
					predPrev[pFramePrev->_layerPredBitIndices[l][i]] = 1.0f;

				cs.getQueue().enqueueWriteImage(_layers[l]._scStatesTemp[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l]._size.x), static_cast<cl::size_type>(_layerDescs[l]._size.y), 1 }, 0, 0, state.data());
				cs.getQueue().enqueueWriteImage(_layers[l]._scStatesTemp[_front], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_layerDescs[l]._size.x), static_cast<cl::size_type>(_layerDescs[l]._size.y), 1 }, 0, 0, statePrev.data());
			
				cs.getQueue().enqueueWriteImage(_layers[l]._predStatesTemp[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(prevLayerSize.x), static_cast<cl::size_type>(prevLayerSize.y), 1 }, 0, 0, pred.data());
				cs.getQueue().enqueueWriteImage(_layers[l]._predStatesTemp[_front], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(prevLayerSize.x), static_cast<cl::size_type>(prevLayerSize.y), 1 }, 0, 0, predPrev.data());

				prevLayerSize = _layerDescs[l]._size;
			}

			cs.getQueue().enqueueFillImage(_qTarget, cl_float4{ pFrame->_q, pFrame->_q, pFrame->_q, pFrame->_q }, { 0, 0, 0 }, { static_cast<cl::size_type>(_qSize.x), static_cast<cl::size_type>(_qSize.y), 1 });
			
			// Choose better action to learn
			cs.getQueue().enqueueWriteImage(_actionTarget, CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_actionSize.x), static_cast<cl::size_type>(_actionSize.y), 1 }, 0, 0,
				(pFrame->_q > pFrame->_originalQ ? pFrame->_prevExploratoryAction.data() : pFrame->_prevBestAction.data()));

			for (int l = 0; l < _layers.size(); l++) {
				std::vector<cl::Image2D> visibleStates;

				if (l != 0) {
					visibleStates.resize(2);

					visibleStates[0] = _layers[l - 1]._sc.getHiddenStates()[_back];
					visibleStates[1] = _layers[l]._sc.getHiddenStates()[_back];

					_layers[l]._sc.activate(cs, visibleStates, _layerDescs[l]._scActiveRatio, false);

					_layers[l]._sc.learn(cs, visibleStates, _layerDescs[l]._scBoostAlpha, _layerDescs[l]._scActiveRatio);
				}

				std::vector<cl::Image2D> visibleStatesPrev;

				if (l < _layers.size() - 1) {
					visibleStatesPrev.resize(2);

					visibleStatesPrev[0] = _layers[l]._scStatesTemp[_front];
					visibleStatesPrev[1] = _layers[l + 1]._predStatesTemp[_front];
				}
				else {
					visibleStatesPrev.resize(1);

					visibleStatesPrev[0] = _layers[l]._scStatesTemp[_front];
				}

				//_layers[l]._pred.activate(cs, visibleStatesPrev, l != 0, false);

				if (l == 0)
					_layers[l]._pred.learnCurrent(cs, _actionTarget, visibleStatesPrev, _layerDescs[l]._predWeightAlpha);
				else
					_layers[l]._pred.learnCurrent(cs, _layers[l - 1]._sc.getHiddenStates()[_back], visibleStatesPrev, _layerDescs[l]._predWeightAlpha);
			}

			// Q Pred
			{
				std::vector<cl::Image2D> visibleStatesPrev;

				if (0 < _layers.size() - 1) {
					visibleStatesPrev.resize(2);

					visibleStatesPrev[0] = _layers[0]._sc.getHiddenStates()[_front];
					visibleStatesPrev[1] = _layers[0 + 1]._pred.getHiddenStates()[_front];
				}
				else {
					visibleStatesPrev.resize(1);

					visibleStatesPrev[0] = _layers[0]._sc.getHiddenStates()[_front];
				}

				//_qPred.activate(cs, visibleStatesPrev, false, false);

				_qPred.learnCurrent(cs, _qTarget, visibleStatesPrev, _qWeightAlpha);
			}
		}
	}

	std::cout << "Q: " << newQ << std::endl;

	_prevQ = newQ;
	_prevTDError = tdError;
	_prevValue = q;
}
void ComparisonSparseCoder::activate(sys::ComputeSystem &cs, const std::vector<cl::Image2D> &visibleStates, float activeRatio, bool bufferSwap) {
	// Start by clearing summation buffer to biases
	{
		cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
		cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 };

		cs.getQueue().enqueueCopyImage(_hiddenBiases[_back], _hiddenActivationSummationTemp[_back], zeroOrigin, zeroOrigin, hiddenRegion);
	}

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		if (!vld._isPredictiveCoding) {
			if (vld._ignoreMiddle) {
				int argIndex = 0;

				_activateIgnoreMiddleKernel.setArg(argIndex++, visibleStates[vli]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_front]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vl._weights[_back]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vld._size);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vl._hiddenToVisible);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vld._radius);

				cs.getQueue().enqueueNDRangeKernel(_activateIgnoreMiddleKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}
			else {
				int argIndex = 0;

				_activateKernel.setArg(argIndex++, visibleStates[vli]);
				_activateKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]);
				_activateKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_front]);
				_activateKernel.setArg(argIndex++, vl._weights[_back]);
				_activateKernel.setArg(argIndex++, vld._size);
				_activateKernel.setArg(argIndex++, vl._hiddenToVisible);
				_activateKernel.setArg(argIndex++, vld._radius);

				cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}

			// Swap buffers
			std::swap(_hiddenActivationSummationTemp[_front], _hiddenActivationSummationTemp[_back]);
		}
	}

	// Start by clearing summation buffer to biases
	{
		cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
		cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 };

		cs.getQueue().enqueueFillImage(_hiddenPredictionSummationTemp[_back], cl_float4{ 0.0f, 0.0f, 0.0f, 0.0f }, zeroOrigin, hiddenRegion);
	}

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		if (vld._isPredictiveCoding) {
			if (vld._ignoreMiddle) {
				int argIndex = 0;

				_activateIgnoreMiddleKernel.setArg(argIndex++, visibleStates[vli]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_front]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vl._weights[_back]);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vld._size);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vl._hiddenToVisible);
				_activateIgnoreMiddleKernel.setArg(argIndex++, vld._radius);

				cs.getQueue().enqueueNDRangeKernel(_activateIgnoreMiddleKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}
			else {
				int argIndex = 0;

				_activateKernel.setArg(argIndex++, visibleStates[vli]);
				_activateKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]);
				_activateKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_front]);
				_activateKernel.setArg(argIndex++, vl._weights[_back]);
				_activateKernel.setArg(argIndex++, vld._size);
				_activateKernel.setArg(argIndex++, vl._hiddenToVisible);
				_activateKernel.setArg(argIndex++, vld._radius);

				cs.getQueue().enqueueNDRangeKernel(_activateKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}

			// Swap buffers
			std::swap(_hiddenPredictionSummationTemp[_front], _hiddenPredictionSummationTemp[_back]);
		}
	}

	// Back now contains the sums. Solve sparse codes from this
	{
		int argIndex = 0;

		_solveHiddenKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]);
		_solveHiddenKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]);
		_solveHiddenKernel.setArg(argIndex++, _hiddenStates[_front]);
		_solveHiddenKernel.setArg(argIndex++, _hiddenSize);
		_solveHiddenKernel.setArg(argIndex++, _lateralRadius);
		_solveHiddenKernel.setArg(argIndex++, activeRatio);

		cs.getQueue().enqueueNDRangeKernel(_solveHiddenKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
	}

	// Swap hidden state buffers
	//if (bufferSwap)
	std::swap(_hiddenStates[_front], _hiddenStates[_back]);
}
void ComparisonSparseCoder::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program,
	const std::vector<VisibleLayerDesc> &visibleLayerDescs,
	cl_int2 hiddenSize, cl_int lateralRadius, cl_float2 initWeightRange,
	std::mt19937 &rng)
{
	_visibleLayerDescs = visibleLayerDescs;

	_lateralRadius = lateralRadius;

	_hiddenSize = hiddenSize;

	cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

	cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
	cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 };

	_visibleLayers.resize(_visibleLayerDescs.size());

	cl::Kernel randomUniform2DKernel = cl::Kernel(program.getProgram(), "randomUniform2D");
	cl::Kernel randomUniform3DKernel = cl::Kernel(program.getProgram(), "randomUniform3D");

	// Create layers
	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		const cl_channel_order weightChannels = vld._useTraces ? CL_RG : CL_R;

		vl._hiddenToVisible = cl_float2{ static_cast<float>(vld._size.x) / static_cast<float>(_hiddenSize.x),
			static_cast<float>(vld._size.y) / static_cast<float>(_hiddenSize.y)
		};

		vl._visibleToHidden = cl_float2{ static_cast<float>(_hiddenSize.x) / static_cast<float>(vld._size.x),
			static_cast<float>(_hiddenSize.y) / static_cast<float>(vld._size.y)
		};

		vl._reverseRadii = { static_cast<int>(std::ceil(vl._visibleToHidden.x * (vld._radius + 0.5f))), static_cast<int>(std::ceil(vl._visibleToHidden.y * (vld._radius + 0.5f))) };

		// Create images
		{
			int weightDiam = vld._radius * 2 + 1;

			int numWeights = weightDiam * weightDiam;

			cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights };

			vl._weights = createDoubleBuffer3D(cs, weightsSize, weightChannels, CL_FLOAT);

			randomUniform(vl._weights[_back], cs, randomUniform3DKernel, weightsSize, initWeightRange, rng);
		}
	}

	// Hidden state data
	_hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	_hiddenBiases = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	//randomUniform(_hiddenBiases[_back], cs, randomUniform2DKernel, _hiddenSize, initWeightRange, rng);
	cs.getQueue().enqueueFillImage(_hiddenBiases[_back], zeroColor, zeroOrigin, hiddenRegion);

	_hiddenActivationSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);
	_hiddenPredictionSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	cs.getQueue().enqueueFillImage(_hiddenStates[_back], zeroColor, zeroOrigin, hiddenRegion);

	// Create kernels
	_activateKernel = cl::Kernel(program.getProgram(), "cscActivate");
	_activateIgnoreMiddleKernel = cl::Kernel(program.getProgram(), "cscActivateIgnoreMiddle");
	_solveHiddenKernel = cl::Kernel(program.getProgram(), "cscSolveHidden");
	_learnHiddenBiasesKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenBiases");
	_learnHiddenWeightsActivationKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsActivation");
	_learnHiddenWeightsTracesActivationKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsTracesActivation");
	_learnHiddenWeightsPredictionKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsPrediction");
	_learnHiddenWeightsTracesPredictionKernel = cl::Kernel(program.getProgram(), "cscLearnHiddenWeightsTracesPrediction");
	_forwardKernel = cl::Kernel(program.getProgram(), "cscForward");
}
Example #22
0
void AgentSwarm::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program,
                              cl_int2 inputSize, cl_int2 actionSize, cl_int firstLayerPredictorRadius, const std::vector<LayerDesc> &layerDescs,
                              cl_float2 initWeightRange,
                              std::mt19937 &rng)
{
    _layerDescs = layerDescs;
    _layers.resize(_layerDescs.size());

    cl_int2 prevLayerSize = inputSize;

    for (int l = 0; l < _layers.size(); l++) {
        std::vector<ComparisonSparseCoder::VisibleLayerDesc> scDescs(2);

        scDescs[0]._size = prevLayerSize;
        scDescs[0]._radius = _layerDescs[l]._feedForwardRadius;
        scDescs[0]._ignoreMiddle = false;
        scDescs[0]._weightAlpha = _layerDescs[l]._scWeightAlpha;
        scDescs[0]._weightLambda = _layerDescs[l]._scWeightLambda;
        scDescs[0]._useTraces = false;

        scDescs[1]._size = _layerDescs[l]._hiddenSize;
        scDescs[1]._radius = _layerDescs[l]._recurrentRadius;
        scDescs[1]._ignoreMiddle = true;
        scDescs[1]._weightAlpha = _layerDescs[l]._scWeightRecurrentAlpha;
        scDescs[1]._weightLambda = _layerDescs[l]._scWeightLambda;
        scDescs[1]._useTraces = false;

        _layers[l]._sc.createRandom(cs, program, scDescs, _layerDescs[l]._hiddenSize, _layerDescs[l]._lateralRadius, initWeightRange, rng);

        _layers[l]._modulatedFeedForwardInput = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), prevLayerSize.x, prevLayerSize.y);

        _layers[l]._modulatedRecurrentInput = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y);

        std::vector<Predictor::VisibleLayerDesc> predDescs;

        if (l < _layers.size() - 1) {
            predDescs.resize(2);

            predDescs[0]._size = _layerDescs[l]._hiddenSize;
            predDescs[0]._radius = _layerDescs[l]._predictiveRadius;

            predDescs[1]._size = _layerDescs[l + 1]._hiddenSize;
            predDescs[1]._radius = _layerDescs[l]._feedBackRadius;
        }
        else {
            predDescs.resize(1);

            predDescs[0]._size = _layerDescs[l]._hiddenSize;
            predDescs[0]._radius = _layerDescs[l]._predictiveRadius;
        }

        _layers[l]._pred.createRandom(cs, program, predDescs, prevLayerSize, initWeightRange, false, rng);

        std::vector<Swarm::VisibleLayerDesc> swarmDescs;

        if (l == 0) {
            swarmDescs.resize(3);

            swarmDescs[0]._size = inputSize;
            swarmDescs[0]._qRadius = _layerDescs[l]._qRadiusHiddenFeedForwardAttention;
            swarmDescs[0]._startRadius = _layerDescs[l]._startRadiusHiddenFeedForwardAttention;

            swarmDescs[1]._size = _layerDescs[l]._hiddenSize;
            swarmDescs[1]._qRadius = _layerDescs[l]._qRadiusHiddenRecurrentAttention;
            swarmDescs[1]._startRadius = _layerDescs[l]._startRadiusHiddenRecurrentAttention;

            swarmDescs[2]._size = actionSize;
            swarmDescs[2]._qRadius = _layerDescs[l]._qRadiusHiddenAction;
            swarmDescs[2]._startRadius = _layerDescs[l]._startRadiusHiddenAction;
        }
        else {
            swarmDescs.resize(3);

            swarmDescs[0]._size = _layerDescs[l - 1]._hiddenSize;
            swarmDescs[0]._qRadius = _layerDescs[l]._qRadiusHiddenFeedForwardAttention;
            swarmDescs[0]._startRadius = _layerDescs[l]._startRadiusHiddenFeedForwardAttention;

            swarmDescs[1]._size = _layerDescs[l]._hiddenSize;
            swarmDescs[1]._qRadius = _layerDescs[l]._qRadiusHiddenRecurrentAttention;
            swarmDescs[1]._startRadius = _layerDescs[l]._startRadiusHiddenRecurrentAttention;

            swarmDescs[2]._size = _layerDescs[l - 1]._hiddenSize;
            swarmDescs[2]._qRadius = _layerDescs[l]._qRadiusHiddenAction;
            swarmDescs[2]._startRadius = _layerDescs[l]._startRadiusHiddenAction;
        }

        _layers[l]._swarm.createRandom(cs, program, swarmDescs, _layerDescs[l]._qSize, _layerDescs[l]._hiddenSize, _layerDescs[l]._qRadius, initWeightRange, rng);

        // Create baselines
        _layers[l]._baseLines = createDoubleBuffer2D(cs, _layerDescs[l]._hiddenSize, CL_R, CL_FLOAT);

        _layers[l]._reward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y);

        _layers[l]._scHiddenStatesPrev = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y);

        cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

        cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };

        if (l != 0) {
            cl::array<cl::size_type, 3> actionRegion = { _layers[l]._swarm.getVisibleLayerDesc(2)._size.x, _layers[l]._swarm.getVisibleLayerDesc(2)._size.y, 1 };

            _layers[l]._inhibitedAction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), swarmDescs[1]._size.x, swarmDescs[1]._size.y);

            cs.getQueue().enqueueFillImage(_layers[l]._inhibitedAction, zeroColor, zeroOrigin, actionRegion);
        }

        cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y, 1 };

        cs.getQueue().enqueueFillImage(_layers[l]._baseLines[_back], zeroColor, zeroOrigin, layerRegion);
        cs.getQueue().enqueueFillImage(_layers[l]._reward, zeroColor, zeroOrigin, layerRegion);
        cs.getQueue().enqueueFillImage(_layers[l]._scHiddenStatesPrev, zeroColor, zeroOrigin, layerRegion);

        prevLayerSize = _layerDescs[l]._hiddenSize;
    }

    {
        cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

        cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
        cl::array<cl::size_type, 3> layerRegion = { _layerDescs.back()._hiddenSize.x, _layerDescs.back()._hiddenSize.y, 1 };

        _lastLayerAction = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs.back()._hiddenSize.x, _layerDescs.back()._hiddenSize.y);

        cs.getQueue().enqueueFillImage(_lastLayerAction, zeroColor, zeroOrigin, layerRegion);
    }

    _baseLineUpdateKernel = cl::Kernel(program.getProgram(), "phBaseLineUpdate");
    _baseLineUpdateSumErrorKernel = cl::Kernel(program.getProgram(), "phBaseLineUpdateSumError");
    _inhibitKernel = cl::Kernel(program.getProgram(), "phInhibit");
    _modulateKernel = cl::Kernel(program.getProgram(), "phModulate");
}
void ComparisonSparseCoder::writeToStream(sys::ComputeSystem &cs, std::ostream &os) const {
	abort(); // Fix me
	os << _hiddenSize.x << " " << _hiddenSize.y << " " << _lateralRadius << std::endl;

	{
		std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y);

		cs.getQueue().enqueueReadImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data());

		for (int si = 0; si < hiddenStates.size(); si++)
			os << hiddenStates[si] << " ";

		os << std::endl;
	}

	{
		std::vector<cl_float> hiddenBiases(_hiddenSize.x * _hiddenSize.y);

		cs.getQueue().enqueueReadImage(_hiddenBiases[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenBiases.data());

		for (int bi = 0; bi < hiddenBiases.size(); bi++)
			os << hiddenBiases[bi] << " ";

		os << std::endl;
	}

	// Layer information
	os << _visibleLayers.size() << std::endl;

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		const VisibleLayer &vl = _visibleLayers[vli];
		const VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		// Desc
		os << vld._size.x << " " << vld._size.y << " " << vld._radius << " " << vld._weightAlpha << " " << vld._weightLambda << " " << vld._ignoreMiddle << " " << vld._useTraces << std::endl;

		// Layer
		int weightDiam = vld._radius * 2 + 1;

		int numWeights = weightDiam * weightDiam;

		cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights };

		int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z;

		if (vld._useTraces) {
			std::vector<cl_float2> weights(totalNumWeights);

			//cs.getQueue().enqueueReadImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data());

			for (int wi = 0; wi < weights.size(); wi++)
				os << weights[wi].x << " " << weights[wi].y << " ";
		}
		else {
			std::vector<cl_float> weights(totalNumWeights);

			//cs.getQueue().enqueueReadImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data());

			for (int wi = 0; wi < weights.size(); wi++)
				os << weights[wi] << " ";
		}

		os << std::endl;

		os << vl._hiddenToVisible.x << " " << vl._hiddenToVisible.y << " " << vl._visibleToHidden.x << " " << vl._visibleToHidden.y << " " << vl._reverseRadii.x << " " << vl._reverseRadii.y << std::endl;
	}
}
void ComparisonSparseCoder::learn(sys::ComputeSystem &cs, const cl::Image2D &rewards, std::vector<cl::Image2D> &visibleStates, float boostAlpha, float activeRatio) {
	// Learn biases
	{
		int argIndex = 0;

		_learnHiddenBiasesKernel.setArg(argIndex++, _hiddenBiases[_back]);
		_learnHiddenBiasesKernel.setArg(argIndex++, _hiddenBiases[_front]);
		_learnHiddenBiasesKernel.setArg(argIndex++, _hiddenStates[_back]);
		_learnHiddenBiasesKernel.setArg(argIndex++, boostAlpha);
		_learnHiddenBiasesKernel.setArg(argIndex++, activeRatio);

		cs.getQueue().enqueueNDRangeKernel(_learnHiddenBiasesKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));

		std::swap(_hiddenBiases[_front], _hiddenBiases[_back]);
	}

	// Learn weights
	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		if (!vld._isPredictiveCoding) {
			if (vld._useTraces) {
				int argIndex = 0;

				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, rewards);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, visibleStates[vli]);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, _hiddenStates[_back]);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vl._weights[_back]);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vl._weights[_front]);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._size);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vl._hiddenToVisible);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._radius);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._weightAlpha);
				_learnHiddenWeightsTracesActivationKernel.setArg(argIndex++, vld._weightLambda);

				cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsTracesActivationKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}
			else {
				int argIndex = 0;

				_learnHiddenWeightsActivationKernel.setArg(argIndex++, visibleStates[vli]);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, _hiddenStates[_back]);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, _hiddenActivationSummationTemp[_back]);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, vl._weights[_back]);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, vl._weights[_front]);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, vld._size);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, vl._hiddenToVisible);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, vld._radius);
				_learnHiddenWeightsActivationKernel.setArg(argIndex++, vld._weightAlpha);

				cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsActivationKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}

			std::swap(vl._weights[_front], vl._weights[_back]);
		}
	}

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		if (vld._isPredictiveCoding) {
			if (vld._useTraces) {
				int argIndex = 0;

				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, rewards);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, visibleStates[vli]);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, _hiddenStates[_back]);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vl._weights[_back]);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vl._weights[_front]);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._size);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vl._hiddenToVisible);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._radius);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._weightAlpha);
				_learnHiddenWeightsTracesPredictionKernel.setArg(argIndex++, vld._weightLambda);

				cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsTracesPredictionKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}
			else {
				int argIndex = 0;

				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, visibleStates[vli]);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, _hiddenStates[_back]);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, _hiddenPredictionSummationTemp[_back]);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, vl._weights[_back]);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, vl._weights[_front]);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, vld._size);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, vl._hiddenToVisible);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, vld._radius);
				_learnHiddenWeightsPredictionKernel.setArg(argIndex++, vld._weightAlpha);

				cs.getQueue().enqueueNDRangeKernel(_learnHiddenWeightsPredictionKernel, cl::NullRange, cl::NDRange(_hiddenSize.x, _hiddenSize.y));
			}

			std::swap(vl._weights[_front], vl._weights[_back]);
		}
	}
}
Example #25
0
void AgentER::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program,
	cl_int2 inputSize, cl_int2 actionSize, cl_int2 qSize,
	const std::vector<LayerDesc> &layerDescs,
	cl_float2 initWeightRange,
	std::mt19937 &rng)
{
	_inputSize = inputSize;
	_actionSize = actionSize;
	_qSize = qSize;

	_layerDescs = layerDescs;
	_layers.resize(_layerDescs.size());

	cl::Kernel randomUniform2DXYKernel = cl::Kernel(program.getProgram(), "randomUniform2DXY");

	cl_int2 prevLayerSize = inputSize;

	for (int l = 0; l < _layers.size(); l++) {
		std::vector<ComparisonSparseCoder::VisibleLayerDesc> scDescs;

		if (l == 0) {
			scDescs.resize(3);

			scDescs[0]._size = prevLayerSize;
			scDescs[0]._radius = _layerDescs[l]._feedForwardRadius;
			scDescs[0]._ignoreMiddle = false;
			scDescs[0]._weightAlpha = _layerDescs[l]._scWeightAlpha;
			scDescs[0]._useTraces = false;

			scDescs[1]._size = _actionSize;
			scDescs[1]._radius = _layerDescs[l]._feedForwardRadius;
			scDescs[1]._ignoreMiddle = false;
			scDescs[1]._weightAlpha = _layerDescs[l]._scWeightAlpha;
			scDescs[1]._useTraces = false;

			scDescs[2]._size = _qSize;
			scDescs[2]._radius = _layerDescs[l]._feedForwardRadius;
			scDescs[2]._ignoreMiddle = false;
			scDescs[2]._weightAlpha = _layerDescs[l]._scWeightAlpha;
			scDescs[2]._useTraces = false;
		}
		else {
			scDescs.resize(2);

			scDescs[0]._size = prevLayerSize;
			scDescs[0]._radius = _layerDescs[l]._feedForwardRadius;
			scDescs[0]._ignoreMiddle = false;
			scDescs[0]._weightAlpha = _layerDescs[l]._scWeightAlpha;
			scDescs[0]._useTraces = false;

			scDescs[1]._size = _layerDescs[l]._size;
			scDescs[1]._radius = _layerDescs[l]._recurrentRadius;
			scDescs[1]._ignoreMiddle = true;
			scDescs[1]._weightAlpha = _layerDescs[l]._scWeightRecurrentAlpha;
			scDescs[1]._useTraces = false;
		}

		_layers[l]._sc.createRandom(cs, program, scDescs, _layerDescs[l]._size, _layerDescs[l]._lateralRadius, initWeightRange, rng);

		std::vector<Predictor::VisibleLayerDesc> predDescs;

		if (l < _layers.size() - 1) {
			predDescs.resize(2);

			predDescs[0]._size = _layerDescs[l]._size;
			predDescs[0]._radius = _layerDescs[l]._predictiveRadius;

			predDescs[1]._size = _layerDescs[l + 1]._size;
			predDescs[1]._radius = _layerDescs[l]._feedBackRadius;
		}
		else {
			predDescs.resize(1);

			predDescs[0]._size = _layerDescs[l]._size;
			predDescs[0]._radius = _layerDescs[l]._predictiveRadius;
		}

		if (l == 0)
			_layers[l]._pred.createRandom(cs, program, predDescs, _actionSize, initWeightRange, true, rng);
		else
			_layers[l]._pred.createRandom(cs, program, predDescs, _layerDescs[l - 1]._size, initWeightRange, true, rng);

		// Create baselines
		_layers[l]._predReward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._size.x, _layerDescs[l]._size.y);
		_layers[l]._propagatedPredReward = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _layerDescs[l]._size.x, _layerDescs[l]._size.y);

		cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

		cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
		cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._size.x, _layerDescs[l]._size.y, 1 };

		cs.getQueue().enqueueFillImage(_layers[l]._predReward, zeroColor, zeroOrigin, layerRegion);
		cs.getQueue().enqueueFillImage(_layers[l]._propagatedPredReward, zeroColor, zeroOrigin, layerRegion);

		_layers[l]._scStatesTemp = createDoubleBuffer2D(cs, _layerDescs[l]._size, CL_R, CL_FLOAT);
		_layers[l]._predStatesTemp = createDoubleBuffer2D(cs, prevLayerSize, CL_R, CL_FLOAT);

		prevLayerSize = _layerDescs[l]._size;
	}

	_qInput = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _qSize.x, _qSize.y);

	_qTarget = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _qSize.x, _qSize.y);

	_actionTarget = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), _actionSize.x, _actionSize.y);

	_qTransform = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_RG, CL_FLOAT), _qSize.x, _qSize.y);

	// Q Predictor
	{
		std::vector<Predictor::VisibleLayerDesc> predDescs;

		if (0 < _layers.size() - 1) {
			predDescs.resize(2);

			predDescs[0]._size = _layerDescs[0]._size;
			predDescs[0]._radius = _layerDescs[0]._predictiveRadius;

			predDescs[1]._size = _layerDescs[0 + 1]._size;
			predDescs[1]._radius = _layerDescs[0]._feedBackRadius;
		}
		else {
			predDescs.resize(1);

			predDescs[0]._size = _layerDescs[0]._size;
			predDescs[0]._radius = _layerDescs[0]._predictiveRadius;
		}

		_qPred.createRandom(cs, program, predDescs, _qSize, initWeightRange, true, rng);
	}

	// Random Q transform
	randomUniformXY(_qTransform, cs, randomUniform2DXYKernel, _qSize, { -1.0f, 1.0f }, rng);

	_inputWhitener.create(cs, program, _inputSize, CL_R, CL_FLOAT);
	_actionWhitener.create(cs, program, _actionSize, CL_R, CL_FLOAT);
	_qWhitener.create(cs, program, _qSize, CL_R, CL_FLOAT);

	_predictionRewardKernel = cl::Kernel(program.getProgram(), "phPredictionReward");
	_predictionRewardPropagationKernel = cl::Kernel(program.getProgram(), "phPredictionRewardPropagation");
	_setQKernel = cl::Kernel(program.getProgram(), "phSetQ");
}
Example #26
0
void PredictorSwarm::createRandom(sys::ComputeSystem &cs, sys::ComputeProgram &program,
	const std::vector<VisibleLayerDesc> &visibleLayerDescs, cl_int2 hiddenSize, cl_float2 initWeightRange,
	std::mt19937 &rng)
{
	_visibleLayerDescs = visibleLayerDescs;

	_hiddenSize = hiddenSize;

	_visibleLayers.resize(_visibleLayerDescs.size());

	cl::Kernel randomUniform2DKernel = cl::Kernel(program.getProgram(), "randomUniform2D");
	cl::Kernel randomUniform3DKernel = cl::Kernel(program.getProgram(), "randomUniform3D");
	cl::Kernel randomUniform3DXZKernel = cl::Kernel(program.getProgram(), "randomUniform3DXZ");

	cl_float4 zeroColor = { 0.0f, 0.0f, 0.0f, 0.0f };

	cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
	cl::array<cl::size_type, 3> hiddenRegion = { _hiddenSize.x, _hiddenSize.y, 1 };

	// Create layers
	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		vl._hiddenToVisible = cl_float2{ static_cast<float>(vld._size.x) / static_cast<float>(_hiddenSize.x),
			static_cast<float>(vld._size.y) / static_cast<float>(_hiddenSize.y)
		};

		vl._visibleToHidden = cl_float2{ static_cast<float>(_hiddenSize.x) / static_cast<float>(vld._size.x),
			static_cast<float>(_hiddenSize.y) / static_cast<float>(vld._size.y)
		};

		vl._reverseRadii = cl_int2{ static_cast<int>(std::ceil(vl._visibleToHidden.x * (vld._radius + 0.5f))), static_cast<int>(std::ceil(vl._visibleToHidden.y * (vld._radius + 0.5f))) };

		int weightDiam = vld._radius * 2 + 1;

		int numWeights = weightDiam * weightDiam;

		cl_int3 weightsSize = { _hiddenSize.x, _hiddenSize.y, numWeights };

		vl._weights = createDoubleBuffer3D(cs, weightsSize, CL_RGBA, CL_FLOAT);

		randomUniformXZ(vl._weights[_back], cs, randomUniform3DXZKernel, weightsSize, initWeightRange, rng);

		vl._qTraces = createDoubleBuffer3D(cs, weightsSize, CL_R, CL_FLOAT);

		cs.getQueue().enqueueFillImage(vl._qTraces[_back], zeroColor, zeroOrigin, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) });
	}

	// Hidden state data
	_hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_RG, CL_FLOAT);

	_hiddenActivations = createDoubleBuffer2D(cs, _hiddenSize, CL_RG, CL_FLOAT);

	_hiddenSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_RG, CL_FLOAT);

	cs.getQueue().enqueueFillImage(_hiddenStates[_back], zeroColor, zeroOrigin, hiddenRegion);
	cs.getQueue().enqueueFillImage(_hiddenActivations[_back], zeroColor, zeroOrigin, hiddenRegion);

	// Create kernels
	_activateKernel = cl::Kernel(program.getProgram(), "predActivateSwarm");
	_solveHiddenKernel = cl::Kernel(program.getProgram(), "predSolveHiddenSwarm");
	_solveHiddenNoInhibitionKernel = cl::Kernel(program.getProgram(), "predSolveHiddenNoInhibitionSwarm");
	_learnWeightsTracesInhibitedKernel = cl::Kernel(program.getProgram(), "predLearnWeightsTracesSwarm");
	_reconstructionErrorKernel = cl::Kernel(program.getProgram(), "predReconstructionErrorSwarm");
}
Example #27
0
void Predictor::readFromStream(sys::ComputeSystem &cs, sys::ComputeProgram &program, std::istream &is) {
	abort(); // Not yet working
	
	is >> _hiddenSize.x >> _hiddenSize.y;

	_hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	_hiddenSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	{
		std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y);

		for (int si = 0; si < hiddenStates.size(); si++)
			is >> hiddenStates[si];

		cs.getQueue().enqueueWriteImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data());

	}

	// Layer information
	int numLayers;

	is >> numLayers;

	_visibleLayerDescs.resize(numLayers);
	_visibleLayers.resize(numLayers);

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		// Desc
		is >> vld._size.x >> vld._size.y >> vld._radius;

		// Layer
		int weightDiam = vld._radius * 2 + 1;

		int numWeights = weightDiam * weightDiam;

		cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights };

		int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z;

		{
			vl._weights = createDoubleBuffer3D(cs, weightsSize, CL_R, CL_FLOAT);

			std::vector<cl_float> weights(totalNumWeights);

			for (int wi = 0; wi < weights.size(); wi++)
				is >> weights[wi];

			cs.getQueue().enqueueWriteImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data());
		}

		is >> vl._hiddenToVisible.x >> vl._hiddenToVisible.y >> vl._visibleToHidden.x >> vl._visibleToHidden.y >> vl._reverseRadii.x >> vl._reverseRadii.y;
	}

	// Create kernels
	_activateKernel = cl::Kernel(program.getProgram(), "predActivate");
	//_solveHiddenKernel = cl::Kernel(program.getProgram(), "predSolveHidden");
	_learnWeightsKernel = cl::Kernel(program.getProgram(), "predLearnWeights");
}
Example #28
0
void HTFE::learn(sys::ComputeSystem &cs) {
    // ------------------------------------------------------------------------------
    // ---------------------- Weight Update and Predictions  ------------------------
    // ------------------------------------------------------------------------------

    cl::Image2D* pPrevLayer = &_inputImage;
    int prevWidth = _inputWidth;
    int prevHeight = _inputHeight;

    cl::Image2D* pPrevLayerFeedForwardPrev = &_inputImagePrev;
    cl::Image2D* pPrevLayerFeedBackPrev = &_inputImagePrev;

    for (int l = 0; l < _layers.size(); l++) {
        float localActivity = std::round(_layerDescs[l]._sparsity * std::pow(2 * _layerDescs[l]._inhibitionRadius + 1, 2));

        Int2 layerSize;
        layerSize._x = _layerDescs[l]._width;
        layerSize._y = _layerDescs[l]._height;

        Int2 layerSizeMinusOne;
        layerSizeMinusOne._x = _layerDescs[l]._width - 1;
        layerSizeMinusOne._y = _layerDescs[l]._height - 1;

        Float2 layerSizeMinusOneInv;
        layerSizeMinusOneInv._x = 1.0f / (_layerDescs[l]._width - 1);
        layerSizeMinusOneInv._y = 1.0f / (_layerDescs[l]._height - 1);

        Int2 inputSize;
        inputSize._x = prevWidth;
        inputSize._y = prevHeight;

        Int2 inputSizeMinusOne;
        inputSizeMinusOne._x = prevWidth - 1;
        inputSizeMinusOne._y = prevHeight - 1;

        Float2 inputSizeMinusOneInv;
        inputSizeMinusOneInv._x = 1.0f / (prevWidth - 1);
        inputSizeMinusOneInv._y = 1.0f / (prevHeight - 1);

        Int2 nextSize;
        Int2 nextSizeMinusOne;

        if (l == _layers.size() - 1) {
            nextSize._x = nextSize._y = 1;
            nextSizeMinusOne._x = nextSizeMinusOne._y = 0;
        }
        else {
            nextSize._x = _layerDescs[l + 1]._width;
            nextSize._y = _layerDescs[l + 1]._height;
            nextSizeMinusOne._x = _layerDescs[l + 1]._width - 1;
            nextSizeMinusOne._y = _layerDescs[l + 1]._height - 1;
        }

        // ------------------------------- Weight Updates -------------------------------

        Float4 alphas;
        alphas._x = _layerDescs[l]._feedForwardAlpha;
        alphas._y = _layerDescs[l]._lateralAlpha;
        alphas._z = _layerDescs[l]._feedBackAlpha;
        alphas._w = _layerDescs[l]._hiddenBiasAlpha;

        int index = 0;

        if (l == _layers.size() - 1) {
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._visibleReconstructionPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, *pPrevLayer);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, *pPrevLayerFeedForwardPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenFeedBackActivationsPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrevPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._feedForwardWeightsPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._lateralWeightsPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenBiasesPrev);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._feedForwardWeights);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._lateralWeights);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layers[l]._hiddenBiases);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, layerSize);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, layerSizeMinusOne);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, layerSizeMinusOneInv);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, inputSize);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, inputSizeMinusOne);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, inputSizeMinusOneInv);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._receptiveFieldRadius);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._lateralConnectionRadius);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._reconstructionRadius);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._sparsity);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, alphas);
            _layerHiddenWeightUpdateLastKernel.setArg(index++, _layerDescs[l]._weightDecay);

            cs.getQueue().enqueueNDRangeKernel(_layerHiddenWeightUpdateLastKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height));
        }
        else {
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._visibleReconstructionPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, *pPrevLayer);
            _layerHiddenWeightUpdateKernel.setArg(index++, *pPrevLayerFeedForwardPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenFeedBackActivationsPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrevPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l + 1]._hiddenStatesFeedBackPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedForwardWeightsPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._lateralWeightsPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenBiasesPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedBackWeightsPrev);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedForwardWeights);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._lateralWeights);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._hiddenBiases);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layers[l]._feedBackWeights);
            _layerHiddenWeightUpdateKernel.setArg(index++, layerSize);
            _layerHiddenWeightUpdateKernel.setArg(index++, layerSizeMinusOne);
            _layerHiddenWeightUpdateKernel.setArg(index++, layerSizeMinusOneInv);
            _layerHiddenWeightUpdateKernel.setArg(index++, inputSize);
            _layerHiddenWeightUpdateKernel.setArg(index++, inputSizeMinusOne);
            _layerHiddenWeightUpdateKernel.setArg(index++, inputSizeMinusOneInv);
            _layerHiddenWeightUpdateKernel.setArg(index++, nextSize);
            _layerHiddenWeightUpdateKernel.setArg(index++, nextSizeMinusOne);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._receptiveFieldRadius);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._lateralConnectionRadius);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._feedBackConnectionRadius);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._reconstructionRadius);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._sparsity);
            _layerHiddenWeightUpdateKernel.setArg(index++, alphas);
            _layerHiddenWeightUpdateKernel.setArg(index++, _layerDescs[l]._weightDecay);

            cs.getQueue().enqueueNDRangeKernel(_layerHiddenWeightUpdateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._width, _layerDescs[l]._height));
        }

        index = 0;

        _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._visibleReconstructionPrev);
        _layerVisibleWeightUpdateKernel.setArg(index++, *pPrevLayer);
        _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._hiddenStatesFeedBackPrev);
        _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._reconstructionWeightsPrev);
        _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._visibleBiasesPrev);
        _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._reconstructionWeights);
        _layerVisibleWeightUpdateKernel.setArg(index++, _layers[l]._visibleBiases);
        _layerVisibleWeightUpdateKernel.setArg(index++, _layerDescs[l]._reconstructionRadius);
        _layerVisibleWeightUpdateKernel.setArg(index++, inputSizeMinusOne);
        _layerVisibleWeightUpdateKernel.setArg(index++, inputSizeMinusOneInv);
        _layerVisibleWeightUpdateKernel.setArg(index++, layerSize);
        _layerVisibleWeightUpdateKernel.setArg(index++, layerSizeMinusOne);
        _layerVisibleWeightUpdateKernel.setArg(index++, layerSizeMinusOneInv);
        _layerVisibleWeightUpdateKernel.setArg(index++, _layerDescs[l]._reconstructionAlpha);

        cs.getQueue().enqueueNDRangeKernel(_layerVisibleWeightUpdateKernel, cl::NullRange, cl::NDRange(prevWidth, prevHeight));

        pPrevLayer = &_layers[l]._hiddenStatesFeedForward; // Or _hiddenStatesFeedBack ?
        prevWidth = _layerDescs[l]._width;
        prevHeight = _layerDescs[l]._height;

        pPrevLayerFeedForwardPrev = &_layers[l]._hiddenStatesFeedForwardPrev;
        pPrevLayerFeedBackPrev = &_layers[l]._hiddenStatesFeedBackPrev;
    }
}
void ComparisonSparseCoder::readFromStream(sys::ComputeSystem &cs, sys::ComputeProgram &program, std::istream &is) {
	abort(); // Fix me
	is >> _hiddenSize.x >> _hiddenSize.y >> _lateralRadius;

	_hiddenStates = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	_hiddenBiases = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	_hiddenActivationSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);
	//_hiddenReconstructionSummationTemp = createDoubleBuffer2D(cs, _hiddenSize, CL_R, CL_FLOAT);

	{
		std::vector<cl_float> hiddenStates(_hiddenSize.x * _hiddenSize.y);

		for (int si = 0; si < hiddenStates.size(); si++)
			is >> hiddenStates[si];

		cs.getQueue().enqueueWriteImage(_hiddenStates[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenStates.data());

	}

	{
		std::vector<cl_float> hiddenBiases(_hiddenSize.x * _hiddenSize.y);

		for (int bi = 0; bi < hiddenBiases.size(); bi++)
			is >> hiddenBiases[bi];

		cs.getQueue().enqueueWriteImage(_hiddenBiases[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(_hiddenSize.x), static_cast<cl::size_type>(_hiddenSize.y), 1 }, 0, 0, hiddenBiases.data());
	}

	// Layer information
	int numLayers;

	is >> numLayers;

	_visibleLayerDescs.resize(numLayers);
	_visibleLayers.resize(numLayers);

	for (int vli = 0; vli < _visibleLayers.size(); vli++) {
		VisibleLayer &vl = _visibleLayers[vli];
		VisibleLayerDesc &vld = _visibleLayerDescs[vli];

		// Desc
		is >> vld._size.x >> vld._size.y >> vld._radius >> vld._weightAlpha >> vld._weightLambda >> vld._ignoreMiddle >> vld._useTraces;

		// Layer
		//vl._reconstructionError = cl::Image2D(cs.getContext(), CL_MEM_READ_WRITE, cl::ImageFormat(CL_R, CL_FLOAT), vld._size.x, vld._size.y);

		int weightDiam = vld._radius * 2 + 1;

		int numWeights = weightDiam * weightDiam;

		cl_int3 weightsSize = cl_int3{ _hiddenSize.x, _hiddenSize.y, numWeights };

		int totalNumWeights = weightsSize.x * weightsSize.y * weightsSize.z;

		if (vld._useTraces) {
			//vl._weights = createDoubleBuffer3D(cs, weightsSize, CL_RG, CL_FLOAT);

			std::vector<cl_float2> weights(totalNumWeights);

			for (int wi = 0; wi < weights.size(); wi++)
				is >> weights[wi].x >> weights[wi].y;

			//cs.getQueue().enqueueWriteImage(vl._weights[_back], CL_TRUE, { 0, 0, 0 }, { static_cast<cl::size_type>(weightsSize.x), static_cast<cl::size_type>(weightsSize.y), static_cast<cl::size_type>(weightsSize.z) }, 0, 0, weights.data());
		}
		else {
Example #30
0
void AgentSwarm::simStep(sys::ComputeSystem &cs, float reward, const cl::Image2D &input, std::mt19937 &rng) {
    // Feed forward
    cl_int2 prevLayerSize = _layers.front()._sc.getVisibleLayerDesc(0)._size;
    cl::Image2D prevLayerState = input;

    for (int l = 0; l < _layers.size(); l++) {
        {
            std::vector<cl::Image2D> visibleStates(2);

            // Modulate
            {
                int argIndex = 0;

                _modulateKernel.setArg(argIndex++, prevLayerState);
                _modulateKernel.setArg(argIndex++, _layers[l]._swarm.getVisibleLayer(0)._actionsExploratory);
                _modulateKernel.setArg(argIndex++, _layers[l]._modulatedFeedForwardInput);
                _modulateKernel.setArg(argIndex++, _layerDescs[l]._minAttention);

                cs.getQueue().enqueueNDRangeKernel(_modulateKernel, cl::NullRange, cl::NDRange(prevLayerSize.x, prevLayerSize.y));
            }

            // Modulate
            {
                int argIndex = 0;

                _modulateKernel.setArg(argIndex++, _layers[l]._scHiddenStatesPrev);
                _modulateKernel.setArg(argIndex++, _layers[l]._swarm.getVisibleLayer(1)._actionsExploratory);
                _modulateKernel.setArg(argIndex++, _layers[l]._modulatedRecurrentInput);
                _modulateKernel.setArg(argIndex++, _layerDescs[l]._minAttention);

                cs.getQueue().enqueueNDRangeKernel(_modulateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y));
            }

            visibleStates[0] = _layers[l]._modulatedFeedForwardInput;
            visibleStates[1] = _layers[l]._modulatedRecurrentInput;

            //_layers[l]._sc.activate(cs, visibleStates, _layerDescs[l]._scActiveRatio);

            _layers[l]._sc.learn(cs, _layers[l]._reward, visibleStates, _layerDescs[l]._scBoostAlpha, _layerDescs[l]._scActiveRatio);
        }

        // Get reward
        /*if (l == 0) {
        	int argIndex = 0;

        	_baseLineUpdateKernel.setArg(argIndex++, _layers[l]._pred.getVisibleLayer(0)._errors);
        	_baseLineUpdateKernel.setArg(argIndex++, _layers[l]._sc.getHiddenStates()[_back]);
        	_baseLineUpdateKernel.setArg(argIndex++, _layers[l]._baseLines[_back]);
        	_baseLineUpdateKernel.setArg(argIndex++, _layers[l]._baseLines[_front]);
        	_baseLineUpdateKernel.setArg(argIndex++, _layers[l]._reward);
        	_baseLineUpdateKernel.setArg(argIndex++, _layerDescs[l]._baseLineDecay);
        	_baseLineUpdateKernel.setArg(argIndex++, _layerDescs[l]._baseLineSensitivity);

        	cs.getQueue().enqueueNDRangeKernel(_baseLineUpdateKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y));
        }
        else {
        	int argIndex = 0;

        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l - 1]._pred.getVisibleLayer(1)._errors);
        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._pred.getVisibleLayer(0)._errors);
        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._sc.getHiddenStates()[_back]);
        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._baseLines[_back]);
        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._baseLines[_front]);
        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layers[l]._reward);
        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layerDescs[l]._baseLineDecay);
        	_baseLineUpdateSumErrorKernel.setArg(argIndex++, _layerDescs[l]._baseLineSensitivity);

        	cs.getQueue().enqueueNDRangeKernel(_baseLineUpdateSumErrorKernel, cl::NullRange, cl::NDRange(_layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y));
        }*/

        prevLayerState = _layers[l]._sc.getHiddenStates()[_back];
        prevLayerSize = _layerDescs[l]._hiddenSize;
    }

    for (int l = _layers.size() - 1; l >= 0; l--) {
        std::vector<cl::Image2D> visibleStates;

        if (l < _layers.size() - 1) {
            visibleStates.resize(2);

            visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back];
            visibleStates[1] = _layers[l + 1]._pred.getHiddenStates()[_back];
        }
        else {
            visibleStates.resize(1);

            visibleStates[0] = _layers[l]._sc.getHiddenStates()[_back];
        }

        //_layers[l]._pred.activate(cs, visibleStates);

        //if (l == 0)
        //	_layers[l]._pred.propagateError(cs, input);
        //else
        //	_layers[l]._pred.propagateError(cs, _layers[l - 1]._sc.getHiddenStates()[_back]);
    }

    for (int l = _layers.size() - 1; l >= 0; l--) {
        std::vector<cl::Image2D> visibleStatesPrev;

        if (l < _layers.size() - 1) {
            visibleStatesPrev.resize(2);

            visibleStatesPrev[0] = _layers[l]._scHiddenStatesPrev;
            visibleStatesPrev[1] = _layers[l + 1]._pred.getHiddenStates()[_front];
        }
        else {
            visibleStatesPrev.resize(1);

            visibleStatesPrev[0] = _layers[l]._scHiddenStatesPrev;
        }

        if (l == 0)
            _layers[l]._pred.learn(cs, input, visibleStatesPrev, _layerDescs[l]._predWeightAlpha);
        else
            _layers[l]._pred.learn(cs, _layers[l - 1]._sc.getHiddenStates()[_back], visibleStatesPrev, _layerDescs[l]._predWeightAlpha);
    }

    // Swarm
    for (int l = _layers.size() - 1; l >= 0; l--) {
        std::vector<cl::Image2D> visibleStatesPrev;

        if (l < _layers.size() - 1) {
            _layers[l]._swarm.simStep(cs, reward, _layers[l]._sc.getHiddenStates()[_back], _layers[l + 1]._inhibitedAction,
                                      _layerDescs[l]._swarmExpPert, _layerDescs[l]._swarmExpBreak,
                                      _layerDescs[l]._swarmAnnealingIterations, _layerDescs[l]._swarmActionDeriveAlpha,
                                      _layerDescs[l]._swarmQHiddenAlpha, _layerDescs[l]._swarmQAlpha, _layerDescs[l]._swarmPredAlpha,
                                      _layerDescs[l]._swarmLambda, _layerDescs[l]._swarmGamma, rng);
        }
        else {
            _layers[l]._swarm.simStep(cs, reward, _layers[l]._sc.getHiddenStates()[_back], _lastLayerAction,
                                      _layerDescs[l]._swarmExpPert, _layerDescs[l]._swarmExpBreak,
                                      _layerDescs[l]._swarmAnnealingIterations, _layerDescs[l]._swarmActionDeriveAlpha,
                                      _layerDescs[l]._swarmQHiddenAlpha, _layerDescs[l]._swarmQAlpha, _layerDescs[l]._swarmPredAlpha,
                                      _layerDescs[l]._swarmLambda, _layerDescs[l]._swarmGamma, rng);
        }

        // If not first layer, inhibit the action
        if (l != 0) {
            int argIndex = 0;

            _inhibitKernel.setArg(argIndex++, _layers[l]._swarm.getVisibleLayer(2)._actionsExploratory);
            _inhibitKernel.setArg(argIndex++, _layers[l]._inhibitedAction);
            _inhibitKernel.setArg(argIndex++, _layerDescs[l - 1]._hiddenSize);
            _inhibitKernel.setArg(argIndex++, _layerDescs[l - 1]._lateralRadius);
            _inhibitKernel.setArg(argIndex++, _layerDescs[l - 1]._scActiveRatio);

            cs.getQueue().enqueueNDRangeKernel(_inhibitKernel, cl::NullRange, cl::NDRange(_layerDescs[l - 1]._hiddenSize.x, _layerDescs[l - 1]._hiddenSize.y));
        }
    }

    // Buffer updates
    for (int l = 0; l < _layers.size(); l++) {
        cl::array<cl::size_type, 3> zeroOrigin = { 0, 0, 0 };
        cl::array<cl::size_type, 3> layerRegion = { _layerDescs[l]._hiddenSize.x, _layerDescs[l]._hiddenSize.y, 1 };

        cs.getQueue().enqueueCopyImage(_layers[l]._sc.getHiddenStates()[_back], _layers[l]._scHiddenStatesPrev, zeroOrigin, zeroOrigin, layerRegion);

        std::swap(_layers[l]._baseLines[_front], _layers[l]._baseLines[_back]);
    }
}