void fully_connected_layer_updater_cuda::enqueue_backward_weights_propagation( cudaStream_t stream_id, const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data, const std::vector<cuda_linear_buffer_device::ptr>& gradient, const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom, const std::vector<cuda_linear_buffer_device::const_ptr>& input_neurons_buffers, cuda_linear_buffer_device::const_ptr output_errors_buffer, const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data, cuda_linear_buffer_device::ptr temporary_working_fixed_buffer, cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer, cuda_linear_buffer_device::const_ptr temporary_fixed_buffer, cuda_linear_buffer_device::const_ptr temporary_per_entry_buffer, unsigned int entry_count) { // Update weights { cublas_safe_call(cublasSetStream(cuda_config->get_cublas_handle(), stream_id)); float alpha = 1.0F; float beta = 1.0F; cublas_safe_call(cublasSgemm( cuda_config->get_cublas_handle(), CUBLAS_OP_N, CUBLAS_OP_T, input_elem_count_per_entry_list[0], output_elem_count_per_entry, entry_count, &alpha, *input_neurons_buffers[0], input_elem_count_per_entry_list[0], *output_errors_buffer, output_elem_count_per_entry, &beta, *gradient[0], input_elem_count_per_entry_list[0])); } // Update biases if (bias) { cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id)); cudnn_util::set_tensor_descriptor( output_data_desc, output_configuration_specific, entry_count); float alpha = 1.0F; float beta = 1.0F; cudnn_safe_call(cudnnConvolutionBackwardBias( cuda_config->get_cudnn_handle(), &alpha, output_data_desc, *output_errors_buffer, &beta, bias_desc, *gradient[1])); } }
void convolution_layer_updater_cuda::enqueue_update_weights( unsigned int offset_input_entry_id, cudaStream_t stream_id, const std::vector<cuda_linear_buffer_device_smart_ptr>& gradient, const std::vector<cuda_linear_buffer_device_smart_ptr>& data_custom, const std::vector<const_cuda_linear_buffer_device_smart_ptr>& schema_data, cuda_linear_buffer_device_smart_ptr output_errors_buffer, const_cuda_linear_buffer_device_smart_ptr input_neurons_buffer, const std::vector<cuda_linear_buffer_device_smart_ptr>& additional_buffers, std::vector<cuda_memobject_smart_ptr>& dynamic_memobjects, unsigned int entry_count, bool force_deterministic) { cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id)); cudnn_safe_call(cudnnSetTensor4dDescriptor( input_data_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, entry_count, input_configuration_specific.feature_map_count, (input_configuration_specific.dimension_sizes.size() > 1) ? input_configuration_specific.dimension_sizes[1] : 1, input_configuration_specific.dimension_sizes[0])); cudnn_safe_call(cudnnSetTensor4dDescriptor( output_data_desc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, entry_count, output_configuration_specific.feature_map_count, (output_configuration_specific.dimension_sizes.size() > 1) ? output_configuration_specific.dimension_sizes[1] : 1, output_configuration_specific.dimension_sizes[0])); { float alpha = 1.0F; float beta = 1.0F; cudnn_safe_call(cudnnConvolutionBackwardFilter( cuda_config->get_cudnn_handle(), &alpha, input_data_desc, (const float *)(*input_neurons_buffer) + input_elem_count_per_entry * offset_input_entry_id, output_data_desc, *output_errors_buffer, convolution_desc, &beta, weights_desc, *gradient[0])); } { float alpha = 1.0F; float beta = 1.0F; cudnn_safe_call(cudnnConvolutionBackwardBias( cuda_config->get_cudnn_handle(), &alpha, output_data_desc, *output_errors_buffer, &beta, bias_desc, *gradient[1])); } }
void convolution_layer_updater_cuda::enqueue_backward_weights_propagation( cudaStream_t stream_id, const std::vector<cuda_linear_buffer_device::const_ptr>& schema_data, const std::vector<cuda_linear_buffer_device::ptr>& gradient, const std::vector<cuda_linear_buffer_device::const_ptr>& data_custom, const std::vector<cuda_linear_buffer_device::const_ptr>& input_neurons_buffers, cuda_linear_buffer_device::const_ptr output_errors_buffer, const std::vector<cuda_linear_buffer_device::const_ptr>& persistent_working_data, cuda_linear_buffer_device::ptr temporary_working_fixed_buffer, cuda_linear_buffer_device::ptr temporary_working_per_entry_buffer, cuda_linear_buffer_device::const_ptr temporary_fixed_buffer, cuda_linear_buffer_device::const_ptr temporary_per_entry_buffer, unsigned int entry_count) { cudnn_safe_call(cudnnSetStream(cuda_config->get_cudnn_handle(), stream_id)); cudnn_util::set_tensor_descriptor( input_data_desc, input_configuration_specific_list[0], entry_count); cudnn_util::set_tensor_descriptor( output_data_desc, output_configuration_specific, entry_count); { void * workspace = 0; size_t workspace_size = 0; if (temporary_working_fixed_buffer) { workspace = *temporary_working_fixed_buffer; workspace_size = temporary_working_fixed_buffer->get_size(); } cudnnConvolutionBwdFilterAlgo_t algo = cuda_config->cudnn_find_convolution_backward_weights_algo( input_data_desc, weights_desc, convolution_desc, output_data_desc, *input_neurons_buffers[0], *output_errors_buffer, (unsigned char *)workspace, (unsigned char *)workspace + update_weights_find_algo_working_buffer_size, workspace_size - update_weights_find_algo_working_buffer_size); float alpha = 1.0F; float beta = 1.0F; cudnn_safe_call(cudnnConvolutionBackwardFilter( cuda_config->get_cudnn_handle(), &alpha, input_data_desc, *input_neurons_buffers[0], output_data_desc, *output_errors_buffer, convolution_desc, algo, workspace, workspace_size, &beta, weights_desc, *gradient[0])); } if (bias) { float alpha = 1.0F; float beta = 1.0F; cudnn_safe_call(cudnnConvolutionBackwardBias( cuda_config->get_cudnn_handle(), &alpha, output_data_desc, *output_errors_buffer, &beta, bias_desc, *gradient[1])); } }