sparse_fully_connected_1x1_layer_tester_cuda::sparse_fully_connected_1x1_layer_tester_cuda()
			: output_data_desc(0)
			, bias_desc(0)
		{
			cudnn_safe_call(cudnnCreateTensorDescriptor(&output_data_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&bias_desc));
		}
コード例 #2
0
		void softmax_layer_tester_cuda::enqueue_forward_propagation(
			cudaStream_t stream_id,
			cuda_linear_buffer_device::ptr output_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_buffers,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			unsigned int entry_count)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_util::set_tensor_descriptor(
				input_data_desc,
				output_configuration_specific,
				entry_count);

			float alpha = 1.0F;
			float beta = 0.0F;
			cudnn_safe_call(cudnnSoftmaxForward(
				cuda_config->get_cudnn_handle(),
				CUDNN_SOFTMAX_ACCURATE,
				CUDNN_SOFTMAX_MODE_CHANNEL,
				&alpha,
				input_data_desc,
				*input_buffers[0],
				&beta,
				input_data_desc,
				*output_buffer));
		}
コード例 #3
0
		convolution_1x1_layer_tester_cuda::convolution_1x1_layer_tester_cuda()
			: output_data_desc(0)
			, bias_desc(0)
		{
			cudnn_safe_call(cudnnCreateTensorDescriptor(&output_data_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&bias_desc));
		}
コード例 #4
0
		void softmax_layer_tester_cuda::enqueue_test(
			cudaStream_t stream_id,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& schema_data,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& data,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& data_custom,
			cuda_linear_buffer_device_smart_ptr input_buffer,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& additional_buffers,
			unsigned int entry_count)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				input_data_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				entry_count,
				input_configuration_specific.feature_map_count,
				(input_configuration_specific.dimension_sizes.size() > 1) ? input_configuration_specific.dimension_sizes[1] : 1,
				input_configuration_specific.dimension_sizes[0]));

			float alpha = 1.0F;
			float beta = 0.0F;
			cudnn_safe_call(cudnnSoftmaxForward(
				cuda_config->get_cudnn_handle(),
				CUDNN_SOFTMAX_ACCURATE,
				CUDNN_SOFTMAX_MODE_CHANNEL,
				&alpha,
				input_data_desc,
				*input_buffer,
				&beta,
				input_data_desc,
				*additional_buffers[0]));
		}
コード例 #5
0
		activation_layer_cudnn_updater_cuda::activation_layer_cudnn_updater_cuda(cudnnActivationMode_t af)
			: input_data_desc(0)
			, activation_desc(0)
		{
			cudnn_safe_call(cudnnCreateTensorDescriptor(&input_data_desc));
			cudnn_safe_call(cudnnCreateActivationDescriptor(&activation_desc));

			cudnnSetActivationDescriptor(activation_desc, af, CUDNN_NOT_PROPAGATE_NAN, 0.0F);
		}
		void sparse_fully_connected_1x1_layer_tester_cuda::enqueue_forward_propagation(
			cudaStream_t stream_id,
			cuda_linear_buffer_device::ptr output_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_buffers,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			unsigned int entry_count)
		{
			{
				cusparse_safe_call(cusparseSetStream(cuda_config->get_cusparse_handle(), stream_id));
				float alpha = 1.0F;
				float beta = 0.0F;
				cusparseMatDescr_t mat_descr;
				cusparse_safe_call(cusparseCreateMatDescr(&mat_descr));
				cusparse_safe_call(cusparseScsrmm(
					cuda_config->get_cusparse_handle(),
					CUSPARSE_OPERATION_NON_TRANSPOSE,
					output_elem_count_per_entry,
					entry_count,
					input_elem_count_per_entry_list[0],
					feature_map_connection_count,
					&alpha,
					mat_descr,
					*data[0],
					*data_custom[1],
					*data_custom[0],
					*input_buffers[0],
					input_elem_count_per_entry_list[0],
					&beta,
					*output_buffer,
					output_elem_count_per_entry));
			}

			// Add bias
			{
				cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));
				cudnn_util::set_tensor_descriptor(
					output_data_desc,
					output_configuration_specific,
					entry_count);
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnAddTensor(
					cuda_config->get_cudnn_handle(),
					&alpha,
					bias_desc,
					*data[1],
					&beta,
					output_data_desc,
					*output_buffer));
			}
		}
コード例 #7
0
		void fully_connected_layer_updater_cuda::enqueue_backward_weights_propagation(
			cudaStream_t stream_id,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::ptr>& gradient,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_neurons_buffers,
			cuda_linear_buffer_device::const_ptr output_errors_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			cuda_linear_buffer_device::const_ptr temporary_fixed_buffer,
			cuda_linear_buffer_device::const_ptr temporary_per_entry_buffer,
			unsigned int entry_count)
		{
			// Update weights
			{
				cublas_safe_call(cublasSetStream(cuda_config->get_cublas_handle(), stream_id));
				float alpha = 1.0F;
				float beta = 1.0F;
				cublas_safe_call(cublasSgemm(
					cuda_config->get_cublas_handle(),
					CUBLAS_OP_N,
					CUBLAS_OP_T,
					input_elem_count_per_entry_list[0],
					output_elem_count_per_entry,
					entry_count,
					&alpha,
					*input_neurons_buffers[0],
					input_elem_count_per_entry_list[0],
					*output_errors_buffer,
					output_elem_count_per_entry,
					&beta,
					*gradient[0],
					input_elem_count_per_entry_list[0]));
			}

			// Update biases
			if (bias)
			{
				cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));
				cudnn_util::set_tensor_descriptor(
					output_data_desc,
					output_configuration_specific,
					entry_count);
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnConvolutionBackwardBias(
					cuda_config->get_cudnn_handle(),
					&alpha,
					output_data_desc,
					*output_errors_buffer,
					&beta,
					bias_desc,
					*gradient[1]));
			}
		}
コード例 #8
0
		sparse_1x1_layer_tester_cuda::sparse_1x1_layer_tester_cuda()
			: output_data_desc(0)
			, bias_desc(0)
		{
			cudnn_safe_call(cudnnCreateTensorDescriptor(&input_strided_data_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&input_converted_NHWC_data_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&input_converted_CNHW_data_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&output_data_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&bias_desc));
		}
コード例 #9
0
		void fully_connected_layer_updater_cuda::enqueue_forward_propagation(
			cudaStream_t stream_id,
			cuda_linear_buffer_device::ptr output_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_buffers,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			cuda_linear_buffer_device::ptr temporary_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_per_entry_buffer,
			unsigned int entry_count)
		{
			{
				cublas_safe_call(cublasSetStream(cuda_config->get_cublas_handle(), stream_id));
				float alpha = 1.0F;
				float beta = 0.0F;
				cublas_safe_call(cublasSgemm(
					cuda_config->get_cublas_handle(),
					CUBLAS_OP_T,
					CUBLAS_OP_N,
					output_elem_count_per_entry,
					entry_count,
					input_elem_count_per_entry_list[0],
					&alpha,
					*data[0],
					input_elem_count_per_entry_list[0],
					*input_buffers[0],
					input_elem_count_per_entry_list[0],
					&beta,
					*output_buffer,
					output_elem_count_per_entry));
			}

			if (bias)
			{
				cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));
				cudnn_util::set_tensor_descriptor(
					output_data_desc,
					output_configuration_specific,
					entry_count);
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnAddTensor(
					cuda_config->get_cudnn_handle(),
					&alpha,
					bias_desc,
					*data[1],
					&beta,
					output_data_desc,
					*output_buffer));
			}
		}
コード例 #10
0
		void convolution_layer_updater_cuda::enqueue_backprop(
			cudaStream_t stream_id,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& data,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& data_custom,
			const_cuda_linear_buffer_device_smart_ptr output_neurons_buffer,
			const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer,
			cuda_linear_buffer_device_smart_ptr output_errors_buffer,
			cuda_linear_buffer_device_smart_ptr input_errors_buffer,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& additional_buffers,
			std::vector<cuda_memobject_smart_ptr>& dynamic_memobjects,
			unsigned int entry_count,
			bool force_deterministic)
		{
			if (!backprop_required)
				throw neural_network_exception("convolution_layer_updater_cuda is not configured to do backprop but requested to");

			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				input_data_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				entry_count,
				input_configuration_specific.feature_map_count,
				(input_configuration_specific.dimension_sizes.size() > 1) ? input_configuration_specific.dimension_sizes[1] : 1,
				input_configuration_specific.dimension_sizes[0]));
			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				output_data_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				entry_count,
				output_configuration_specific.feature_map_count,
				(output_configuration_specific.dimension_sizes.size() > 1) ? output_configuration_specific.dimension_sizes[1] : 1,
				output_configuration_specific.dimension_sizes[0]));

			{
				float alpha = 1.0F;
				float beta = 0.0F;
				cudnn_safe_call(cudnnConvolutionBackwardData(
					cuda_config->get_cudnn_handle(),
					&alpha,
					weights_desc,
					*data[0],
					output_data_desc,
					*output_errors_buffer,
					convolution_desc,
					&beta,
					input_data_desc,
					*input_errors_buffer));
			}
		}
コード例 #11
0
		convolution_layer_updater_cuda::convolution_layer_updater_cuda()
			: input_data_desc(0)
			, output_data_desc(0)
			, weights_desc(0)
			, convolution_desc(0)
			, bias_desc(0)
		{
			cudnn_safe_call(cudnnCreateTensorDescriptor(&input_data_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&output_data_desc));
			cudnn_safe_call(cudnnCreateFilterDescriptor(&weights_desc));
			cudnn_safe_call(cudnnCreateConvolutionDescriptor(&convolution_desc));
			cudnn_safe_call(cudnnCreateTensorDescriptor(&bias_desc));
		}
コード例 #12
0
		void activation_layer_cudnn_updater_cuda::enqueue_backward_data_propagation(
			cudaStream_t stream_id,
			unsigned int input_index,
			cuda_linear_buffer_device::ptr input_errors_buffer,
			cuda_linear_buffer_device::const_ptr output_errors_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_neurons_buffers,
			cuda_linear_buffer_device::const_ptr output_neurons_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			cuda_linear_buffer_device::const_ptr temporary_fixed_buffer,
			cuda_linear_buffer_device::const_ptr temporary_per_entry_buffer,
			bool add_update_to_destination,
			unsigned int entry_count)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_util::set_tensor_descriptor(
				input_data_desc,
				output_configuration_specific,
				entry_count);

			float alpha = 1.0F;
			float beta = add_update_to_destination ? 1.0F : 0.0F;
			cudnn_safe_call(cudnnActivationBackward_v4(
				cuda_config->get_cudnn_handle(),
				activation_desc,
				&alpha,
				input_data_desc,
				*output_neurons_buffer,
				input_data_desc,
				*output_errors_buffer,
				input_data_desc,
				*input_neurons_buffers[0],
				&beta,
				input_data_desc,
				*input_errors_buffer));
		}
コード例 #13
0
		void convolution_layer_updater_cuda::updater_configured()
		{
			nnforge_shared_ptr<const convolution_layer> layer_derived = nnforge_dynamic_pointer_cast<const convolution_layer>(layer_schema);

			window_sizes = layer_derived->window_sizes;

			zero_padding = layer_derived->left_zero_padding;
			for(int i = 0; i < window_sizes.size(); ++i)
			{
				if (zero_padding[i] != layer_derived->right_zero_padding[i])
					throw neural_network_exception("cuDNN is not able to run convolution when left and right padding sizes don't match");
			}

			cudnn_safe_call(cudnnSetFilter4dDescriptor(
				weights_desc,
				CUDNN_DATA_FLOAT,
				output_configuration_specific.feature_map_count,
				input_configuration_specific.feature_map_count,
				(window_sizes.size() > 1) ? window_sizes[1] : 1,
				window_sizes[0]));

			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				bias_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				1,
				output_configuration_specific.feature_map_count,
				1,
				1));

			cudnn_safe_call(cudnnSetConvolution2dDescriptor(
				convolution_desc,
				(zero_padding.size() > 1) ? zero_padding[1] : 1,
				zero_padding[0],
				1,
				1,
				1,
				1,
				CUDNN_CROSS_CORRELATION));
		}
コード例 #14
0
		void convolution_1x1_layer_tester_cuda::tester_configured()
		{
			nnforge_shared_ptr<const convolution_layer> layer_derived = nnforge_dynamic_pointer_cast<const convolution_layer>(layer_schema);

			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				bias_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				1,
				output_configuration_specific.feature_map_count,
				1,
				1));
		}
コード例 #15
0
ファイル: cudnn_util.cpp プロジェクト: anshumang/nnForge
		void cudnn_util::set_pooling_descriptor(
			cudnnPoolingDescriptor_t pooling_desc,
			cudnnPoolingMode_t pooling_mode,
			const std::vector<unsigned int>& subsampling_sizes)
		{
			std::vector<int> padding(subsampling_sizes.size(), 0);
			std::vector<int> dimensions(subsampling_sizes.rbegin(), subsampling_sizes.rend());
			cudnn_safe_call(cudnnSetPoolingNdDescriptor(
				pooling_desc,
				pooling_mode,
				static_cast<int>(subsampling_sizes.size()),
				&dimensions[0],
				&padding[0],
				&dimensions[0]));
		}
コード例 #16
0
ファイル: cudnn_util.cpp プロジェクト: anshumang/nnForge
		void cudnn_util::set_convolution_descriptor(
			cudnnConvolutionDescriptor_t convolution_desc,
			const std::vector<unsigned int> zero_padding)
		{
			std::vector<int> conv_padding(zero_padding.rbegin(), zero_padding.rend());
			std::vector<int> filter_stride(zero_padding.size(), 1);
			std::vector<int> upscale(zero_padding.size(), 1);
			cudnn_safe_call(cudnnSetConvolutionNdDescriptor_v3(
				convolution_desc,
				static_cast<int>(zero_padding.size()),
				&conv_padding[0],
				&filter_stride[0],
				&upscale[0],
				CUDNN_CROSS_CORRELATION,
				CUDNN_DATA_FLOAT));
		}
コード例 #17
0
ファイル: cudnn_util.cpp プロジェクト: anshumang/nnForge
		void cudnn_util::set_filter_descriptor(
			cudnnFilterDescriptor_t filter_desc,
			unsigned int output_feature_map_count,
			unsigned int input_feature_map_count,
			const std::vector<unsigned int>& windows_sizes)
		{
			std::vector<int> filter_dimensions(windows_sizes.size() + 2);
			filter_dimensions[0] = output_feature_map_count;
			filter_dimensions[1] = input_feature_map_count;
			for(int i = 0; i < windows_sizes.size(); ++i)
				filter_dimensions[i + 2] = windows_sizes[windows_sizes.size() - 1 - i];

			cudnn_safe_call(cudnnSetFilterNdDescriptor(
				filter_desc,
				CUDNN_DATA_FLOAT,
				static_cast<int>(filter_dimensions.size()),
				&filter_dimensions[0]));
		}
コード例 #18
0
ファイル: cudnn_util.cpp プロジェクト: anshumang/nnForge
		void cudnn_util::set_tensor_descriptor(
			cudnnTensorDescriptor_t tensor_desc,
			const layer_configuration_specific& config,
			unsigned int entry_count)
		{
			std::vector<int> tensor_dimensions(config.dimension_sizes.size() + 2);
			tensor_dimensions[0] = entry_count;
			tensor_dimensions[1] = config.feature_map_count;
			for(int i = 0; i < config.dimension_sizes.size(); ++i)
				tensor_dimensions[i + 2] = config.dimension_sizes[config.dimension_sizes.size() - 1 - i];

			std::vector<int> tensor_strides(tensor_dimensions.size());
			tensor_strides.back() = 1;
			for(int i = static_cast<int>(tensor_strides.size()) - 2; i >= 0; --i)
				tensor_strides[i] = tensor_strides[i + 1] * tensor_dimensions[i + 1];

			cudnn_safe_call(cudnnSetTensorNdDescriptor(
				tensor_desc,
				CUDNN_DATA_FLOAT,
				static_cast<int>(tensor_dimensions.size()),
				&tensor_dimensions[0],
				&tensor_strides[0]));
		}
コード例 #19
0
		void convolution_layer_updater_cuda::enqueue_backward_data_propagation(
			cudaStream_t stream_id,
			unsigned int input_index,
			cuda_linear_buffer_device::ptr input_errors_buffer,
			cuda_linear_buffer_device::const_ptr output_errors_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_neurons_buffers,
			cuda_linear_buffer_device::const_ptr output_neurons_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			cuda_linear_buffer_device::const_ptr temporary_fixed_buffer,
			cuda_linear_buffer_device::const_ptr temporary_per_entry_buffer,
			bool add_update_to_destination,
			unsigned int entry_count)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_util::set_tensor_descriptor(
				input_data_desc,
				input_configuration_specific_list[0],
				entry_count);
			cudnn_util::set_tensor_descriptor(
				output_data_desc,
				output_configuration_specific,
				entry_count);

			{
				void * workspace = 0;
				size_t workspace_size = 0;
				if (temporary_working_fixed_buffer)
				{
					workspace = *temporary_working_fixed_buffer;
					workspace_size = temporary_working_fixed_buffer->get_size();
				}

				cudnnConvolutionBwdDataAlgo_t algo = cuda_config->cudnn_find_convolution_backward_data_algo(
					input_data_desc,
					weights_desc,
					convolution_desc,
					output_data_desc,
					*output_errors_buffer,
					*data[0],
					*temporary_working_per_entry_buffer,
					workspace,
					workspace_size);

				float alpha = 1.0F;
				float beta = (add_update_to_destination ? 1.0F : 0.0F);
				cudnn_safe_call(cudnnConvolutionBackwardData(
					cuda_config->get_cudnn_handle(),
					&alpha,
					weights_desc,
					*data[0],
					output_data_desc,
					*output_errors_buffer,
					convolution_desc,
					algo,
					workspace,
					workspace_size,
					&beta,
					input_data_desc,
					*input_errors_buffer));
			}
		}
コード例 #20
0
		softmax_layer_tester_cuda::softmax_layer_tester_cuda()
			: input_data_desc(0)
		{
			cudnn_safe_call(cudnnCreateTensorDescriptor(&input_data_desc));
		}
コード例 #21
0
		void convolution_layer_updater_cuda::enqueue_test(
			unsigned int offset_input_entry_id,
			cudaStream_t stream_id,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& data,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& data_custom,
			const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer,
			cuda_linear_buffer_device_smart_ptr output_neurons_buffer,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& additional_buffers,
			std::vector<cuda_memobject_smart_ptr>& dynamic_memobjects,
			unsigned int entry_count,
			bool force_deterministic)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				input_data_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				entry_count,
				input_configuration_specific.feature_map_count,
				(input_configuration_specific.dimension_sizes.size() > 1) ? input_configuration_specific.dimension_sizes[1] : 1,
				input_configuration_specific.dimension_sizes[0]));
			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				output_data_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				entry_count,
				output_configuration_specific.feature_map_count,
				(output_configuration_specific.dimension_sizes.size() > 1) ? output_configuration_specific.dimension_sizes[1] : 1,
				output_configuration_specific.dimension_sizes[0]));

			{
				cudnnConvolutionFwdAlgo_t algo;
				cudnn_safe_call(cudnnGetConvolutionForwardAlgorithm(
					cuda_config->get_cudnn_handle(),
					input_data_desc,
					weights_desc,
					convolution_desc,
					output_data_desc,
					CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
					additional_buffers[0]->get_size(),
					&algo));

				float alpha = 1.0F;
				float beta = 0.0F;
				cudnn_safe_call(cudnnConvolutionForward(
					cuda_config->get_cudnn_handle(),
					&alpha,
					input_data_desc,
					(const float *)(*input_neurons_buffer) + input_elem_count_per_entry * offset_input_entry_id,
					weights_desc,
					*data[0],
					convolution_desc,
					algo,
					*additional_buffers[0],
					additional_buffers[0]->get_size(),
					&beta,
					output_data_desc,
					*output_neurons_buffer));
			}

			{
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnAddTensor(
					cuda_config->get_cudnn_handle(),
					CUDNN_ADD_SAME_C,
					&alpha,
					bias_desc,
					*data[1],
					&beta,
					output_data_desc,
					*output_neurons_buffer));
			}
		}
コード例 #22
0
		void convolution_layer_tester_cuda::enqueue_forward_propagation(
			cudaStream_t stream_id,
			cuda_linear_buffer_device::ptr output_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_buffers,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			unsigned int entry_count)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_util::set_tensor_descriptor(
				input_data_desc,
				input_configuration_specific_list[0],
				entry_count);
			cudnn_util::set_tensor_descriptor(
				output_data_desc,
				output_configuration_specific,
				entry_count);

			{
				void * workspace = 0;
				size_t workspace_size = 0;
				if (temporary_working_fixed_buffer)
				{
					workspace = *temporary_working_fixed_buffer;
					workspace_size = temporary_working_fixed_buffer->get_size();
				}

				cudnnConvolutionFwdAlgo_t algo = cuda_config->cudnn_find_convolution_forward_algo(
					input_data_desc,
					weights_desc,
					convolution_desc,
					output_data_desc,
					*input_buffers[0],
					*data[0],
					*output_buffer,
					workspace,
					workspace_size);

				float alpha = 1.0F;
				float beta = 0.0F;
				cudnn_safe_call(cudnnConvolutionForward(
					cuda_config->get_cudnn_handle(),
					&alpha,
					input_data_desc,
					*input_buffers[0],
					weights_desc,
					*data[0],
					convolution_desc,
					algo,
					workspace,
					workspace_size,
					&beta,
					output_data_desc,
					*output_buffer));
			}

			if (bias)
			{
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnAddTensor(
					cuda_config->get_cudnn_handle(),
					&alpha,
					bias_desc,
					*data[1],
					&beta,
					output_data_desc,
					*output_buffer));
			}
		}
コード例 #23
0
		void sparse_1x1_layer_tester_cuda::enqueue_forward_propagation(
			cudaStream_t stream_id,
			cuda_linear_buffer_device::ptr output_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_buffers,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			unsigned int entry_count)
		{
			// Convert input data strided NCHW to packed CNHW format
			if (unit_stride)
			{
				cuda_util::transpose23(
					*cuda_config,
					*input_buffers[0],
					*temporary_working_per_entry_buffer,
					input_elem_count_per_feature_map_list[0],
					input_configuration_specific_list[0].feature_map_count,
					entry_count,
					stream_id);
			}
			else
			{
				std::vector<unsigned int> input_converted_CNHW_strides = input_converted_CNHW_strides_base;
				input_converted_CNHW_strides[input_converted_CNHW_strides.size() - 2] = input_converted_CNHW_strides[input_converted_CNHW_strides.size() - 1] * entry_count;
				cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));
				cudnn_util::set_tensor_descriptor(
					input_strided_data_desc,
					input_strided_config,
					entry_count,
					input_strides);
				cudnn_util::set_tensor_descriptor(
					input_converted_CNHW_data_desc,
					input_strided_config,
					entry_count,
					input_converted_CNHW_strides);
				float alpha = 1.0F;
				float beta = 0.0F;
				cudnn_safe_call(cudnnAddTensor(
					cuda_config->get_cudnn_handle(),
					&alpha,
					input_strided_data_desc,
					*input_buffers[0],
					&beta,
					input_converted_CNHW_data_desc,
					*temporary_working_per_entry_buffer));
			}

			{
				cusparse_safe_call(cusparseSetStream(cuda_config->get_cusparse_handle(), stream_id));
				float alpha = 1.0F;
				float beta = 0.0F;
				cusparseMatDescr_t mat_descr;
				cusparse_safe_call(cusparseCreateMatDescr(&mat_descr));
				cusparse_safe_call(cusparseScsrmm2(
					cuda_config->get_cusparse_handle(),
					CUSPARSE_OPERATION_NON_TRANSPOSE,
					CUSPARSE_OPERATION_TRANSPOSE,
					output_configuration_specific.feature_map_count,
					entry_count * output_elem_count_per_feature_map,
					input_strided_config.feature_map_count,
					feature_map_connection_count,
					&alpha,
					mat_descr,
					*data[0],
					*data_custom[1],
					*data_custom[0],
					*temporary_working_per_entry_buffer,
					entry_count * output_elem_count_per_feature_map,
					&beta,
					((float *)*temporary_working_per_entry_buffer) + input_converted_elem_count_per_entry_aligned * entry_count,
					output_configuration_specific.feature_map_count));
			}

			// Convert output from NHWC to NCHW
			{
				cuda_util::transpose(
					*cuda_config,
					((float *)*temporary_working_per_entry_buffer) + input_converted_elem_count_per_entry_aligned * entry_count,
					*output_buffer,
					output_configuration_specific.feature_map_count,
					output_elem_count_per_feature_map,
					entry_count,
					stream_id);
			}

			// Add bias
			if (bias)
			{
				cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));
				cudnn_util::set_tensor_descriptor(
					output_data_desc,
					output_configuration_specific,
					entry_count);
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnAddTensor(
					cuda_config->get_cudnn_handle(),
					&alpha,
					bias_desc,
					*data[1],
					&beta,
					output_data_desc,
					*output_buffer));
			}
		}
コード例 #24
0
		void convolution_layer_updater_cuda::enqueue_update_weights(
			unsigned int offset_input_entry_id,
			cudaStream_t stream_id,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& gradient,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& data_custom,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& schema_data,
			cuda_linear_buffer_device_smart_ptr output_errors_buffer,
			const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& additional_buffers,
			std::vector<cuda_memobject_smart_ptr>& dynamic_memobjects,
			unsigned int entry_count,
			bool force_deterministic)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				input_data_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				entry_count,
				input_configuration_specific.feature_map_count,
				(input_configuration_specific.dimension_sizes.size() > 1) ? input_configuration_specific.dimension_sizes[1] : 1,
				input_configuration_specific.dimension_sizes[0]));
			cudnn_safe_call(cudnnSetTensor4dDescriptor(
				output_data_desc,
				CUDNN_TENSOR_NCHW,
				CUDNN_DATA_FLOAT,
				entry_count,
				output_configuration_specific.feature_map_count,
				(output_configuration_specific.dimension_sizes.size() > 1) ? output_configuration_specific.dimension_sizes[1] : 1,
				output_configuration_specific.dimension_sizes[0]));

			{
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnConvolutionBackwardFilter(
					cuda_config->get_cudnn_handle(),
					&alpha,
					input_data_desc,
					(const float *)(*input_neurons_buffer) + input_elem_count_per_entry * offset_input_entry_id,
					output_data_desc,
					*output_errors_buffer,
					convolution_desc,
					&beta,
					weights_desc,
					*gradient[0]));
			}

			{
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnConvolutionBackwardBias(
					cuda_config->get_cudnn_handle(),
					&alpha,
					output_data_desc,
					*output_errors_buffer,
					&beta,
					bias_desc,
					*gradient[1]));
			}
		}
コード例 #25
0
		void convolution_1x1_layer_tester_cuda::enqueue_test(
			cudaStream_t stream_id,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& schema_data,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& data,
			const std::vector<const_cuda_linear_buffer_device_smart_ptr>& data_custom,
			cuda_linear_buffer_device_smart_ptr input_buffer,
			const std::vector<cuda_linear_buffer_device_smart_ptr>& additional_buffers,
			unsigned int entry_count)
		{
			{
				cuda_util::transpose(
					*cuda_config,
					*input_buffer,
					*additional_buffers[1],
					input_elem_count_per_feature_map,
					input_configuration_specific.feature_map_count,
					entry_count,
					stream_id);

				cublas_safe_call(cublasSetStream(cuda_config->get_cublas_handle(), stream_id));
				float alpha = 1.0F;
				float beta = 0.0F;
				cublas_safe_call(cublasSgemm(
					cuda_config->get_cublas_handle(),
					CUBLAS_OP_T,
					CUBLAS_OP_N,
					output_configuration_specific.feature_map_count,
					entry_count * input_elem_count_per_feature_map,
					input_configuration_specific.feature_map_count,
					&alpha,
					*data[0],
					input_configuration_specific.feature_map_count,
					*additional_buffers[1],
					input_configuration_specific.feature_map_count,
					&beta,
					*additional_buffers[2],
					output_configuration_specific.feature_map_count));

				cuda_util::transpose(
					*cuda_config,
					*additional_buffers[2],
					*additional_buffers[0],
					output_configuration_specific.feature_map_count,
					output_elem_count_per_feature_map,
					entry_count,
					stream_id);
			}

			// Add bias
			{
				cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));
				cudnn_safe_call(cudnnSetTensor4dDescriptor(
					output_data_desc,
					CUDNN_TENSOR_NCHW,
					CUDNN_DATA_FLOAT,
					entry_count,
					output_configuration_specific.feature_map_count,
					1,
					output_elem_count_per_feature_map));

				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnAddTensor(
					cuda_config->get_cudnn_handle(),
					CUDNN_ADD_SAME_C,
					&alpha,
					bias_desc,
					*data[1],
					&beta,
					output_data_desc,
					*additional_buffers[0]));
			}
		}
コード例 #26
0
		void convolution_layer_updater_cuda::enqueue_backward_weights_propagation(
			cudaStream_t stream_id,
			const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data,
			const std::vector<cuda_linear_buffer_device::ptr>& gradient,
			const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom,
			const std::vector<cuda_linear_buffer_device::const_ptr>& input_neurons_buffers,
			cuda_linear_buffer_device::const_ptr output_errors_buffer,
			const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data,
			cuda_linear_buffer_device::ptr temporary_working_fixed_buffer,
			cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer,
			cuda_linear_buffer_device::const_ptr temporary_fixed_buffer,
			cuda_linear_buffer_device::const_ptr temporary_per_entry_buffer,
			unsigned int entry_count)
		{
			cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id));

			cudnn_util::set_tensor_descriptor(
				input_data_desc,
				input_configuration_specific_list[0],
				entry_count);
			cudnn_util::set_tensor_descriptor(
				output_data_desc,
				output_configuration_specific,
				entry_count);

			{
				void * workspace = 0;
				size_t workspace_size = 0;
				if (temporary_working_fixed_buffer)
				{
					workspace = *temporary_working_fixed_buffer;
					workspace_size = temporary_working_fixed_buffer->get_size();
				}

				cudnnConvolutionBwdFilterAlgo_t algo = cuda_config->cudnn_find_convolution_backward_weights_algo(
					input_data_desc,
					weights_desc,
					convolution_desc,
					output_data_desc,
					*input_neurons_buffers[0],
					*output_errors_buffer,
					(unsigned char *)workspace,
					(unsigned char *)workspace + update_weights_find_algo_working_buffer_size,
					workspace_size - update_weights_find_algo_working_buffer_size);

				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnConvolutionBackwardFilter(
					cuda_config->get_cudnn_handle(),
					&alpha,
					input_data_desc,
					*input_neurons_buffers[0],
					output_data_desc,
					*output_errors_buffer,
					convolution_desc,
					algo,
					workspace,
					workspace_size,
					&beta,
					weights_desc,
					*gradient[0]));
			}

			if (bias)
			{
				float alpha = 1.0F;
				float beta = 1.0F;
				cudnn_safe_call(cudnnConvolutionBackwardBias(
					cuda_config->get_cudnn_handle(),
					&alpha,
					output_data_desc,
					*output_errors_buffer,
					&beta,
					bias_desc,
					*gradient[1]));
			}
		}