void CNeuralLinearLayer::compute_activations(SGVector<float64_t> parameters, CDynamicObjectArray* layers) { float64_t* biases = parameters.vector; #ifdef HAVE_EIGEN3 typedef Eigen::Map<Eigen::MatrixXd> EMappedMatrix; typedef Eigen::Map<Eigen::VectorXd> EMappedVector; EMappedMatrix A(m_activations.matrix, m_num_neurons, m_batch_size); EMappedVector B(biases, m_num_neurons); A.colwise() = B; #else for (int32_t i=0; i<m_num_neurons; i++) { for (int32_t j=0; j<m_batch_size; j++) { m_activations[i+j*m_num_neurons] = biases[i]; } } #endif int32_t weights_index_offset = m_num_neurons; for (int32_t l=0; l<m_input_indices.vlen; l++) { CNeuralLayer* layer = (CNeuralLayer*)layers->element(m_input_indices[l]); float64_t* weights = parameters.vector + weights_index_offset; weights_index_offset += m_num_neurons*layer->get_num_neurons(); #ifdef HAVE_EIGEN3 EMappedMatrix W(weights, m_num_neurons, layer->get_num_neurons()); EMappedMatrix X(layer->get_activations().matrix, layer->get_num_neurons(), m_batch_size); A += W*X; #else // activations = weights*previous_layer_activations for (int32_t i=0; i<m_num_neurons; i++) { for (int32_t j=0; j<m_batch_size; j++) { float64_t sum = 0; for (int32_t k=0; k<layer->get_num_neurons(); k++) { sum += weights[i+k*m_num_neurons]* layer->get_activations()(k,j); } m_activations[i+j*m_num_neurons] += sum; } } #endif SG_UNREF(layer); } }
void CConvolutionalFeatureMap::compute_activations( SGVector< float64_t > parameters, CDynamicObjectArray* layers, SGVector< int32_t > input_indices, SGMatrix<float64_t> activations) { int32_t batch_size = activations.num_cols; float64_t bias = parameters[0]; for (int32_t i=0; i<m_output_num_neurons; i++) { for (int32_t j=0; j<batch_size; j++) { activations(i+m_row_offset,j) = bias; } } int32_t weights_index_offset = 1; for (int32_t l=0; l<input_indices.vlen; l++) { CNeuralLayer* layer = (CNeuralLayer*)layers->element(input_indices[l]); int32_t num_maps = layer->get_num_neurons()/m_input_num_neurons; for (int32_t m=0; m<num_maps; m++) { SGMatrix<float64_t> weights_matrix(parameters.vector+weights_index_offset, m_filter_height, m_filter_width, false); weights_index_offset += m_filter_height*m_filter_width; convolve(layer->get_activations(), weights_matrix, activations, false, false, m*m_input_num_neurons, m_row_offset); } SG_UNREF(layer); } if (m_activation_function==CMAF_LOGISTIC) { for (int32_t i=0; i<m_output_num_neurons; i++) for (int32_t j=0; j<batch_size; j++) activations(i+m_row_offset,j) = 1.0/(1.0+CMath::exp(-1.0*activations(i+m_row_offset,j))); } else if (m_activation_function==CMAF_RECTIFIED_LINEAR) { for (int32_t i=0; i<m_output_num_neurons; i++) for (int32_t j=0; j<batch_size; j++) activations(i+m_row_offset,j) = CMath::max<float64_t>(0, activations(i+m_row_offset,j)); } }
void CNeuralLinearLayer::compute_gradients( SGVector<float64_t> parameters, SGMatrix<float64_t> targets, CDynamicObjectArray* layers, SGVector<float64_t> parameter_gradients) { compute_local_gradients(targets); // compute bias gradients float64_t* bias_gradients = parameter_gradients.vector; #ifdef HAVE_EIGEN3 typedef Eigen::Map<Eigen::MatrixXd> EMappedMatrix; typedef Eigen::Map<Eigen::VectorXd> EMappedVector; EMappedVector BG(bias_gradients, m_num_neurons); EMappedMatrix LG(m_local_gradients.matrix, m_num_neurons, m_batch_size); BG = LG.rowwise().sum(); #else for (int32_t i=0; i<m_num_neurons; i++) { float64_t sum = 0; for (int32_t j=0; j<m_batch_size; j++) { sum += m_local_gradients[i+j*m_num_neurons]; } bias_gradients[i] = sum; } #endif // apply dropout to the local gradients if (dropout_prop>0.0) { int32_t len = m_num_neurons*m_batch_size; for (int32_t i=0; i<len; i++) m_local_gradients[i] *= m_dropout_mask[i]; } int32_t weights_index_offset = m_num_neurons; for (int32_t l=0; l<m_input_indices.vlen; l++) { CNeuralLayer* layer = (CNeuralLayer*)layers->element(m_input_indices[l]); float64_t* weights = parameters.vector + weights_index_offset; float64_t* weight_gradients = parameter_gradients.vector + weights_index_offset; weights_index_offset += m_num_neurons*layer->get_num_neurons(); #ifdef HAVE_EIGEN3 EMappedMatrix X(layer->get_activations().matrix, layer->get_num_neurons(), m_batch_size); EMappedMatrix W(weights, m_num_neurons, layer->get_num_neurons()); EMappedMatrix WG(weight_gradients, m_num_neurons, layer->get_num_neurons()); EMappedMatrix IG(layer->get_activation_gradients().matrix, layer->get_num_neurons(), m_batch_size); // compute weight gradients WG = LG*X.transpose(); // compute input gradients if (!layer->is_input()) IG += W.transpose()*LG; #else // weight_gradients=local_gradients*previous_layer_activations.T for (int32_t i=0; i<m_num_neurons; i++) { for (int32_t j=0; j<layer->get_num_neurons(); j++) { float64_t sum = 0; for (int32_t k=0; k<m_batch_size; k++) { sum += m_local_gradients(i,k)*layer->get_activations()(j,k); } weight_gradients[i+j*m_num_neurons] = sum; } } if (!layer->is_input()) { // input_gradients = weights.T*local_gradients for (int32_t i=0; i<layer->get_num_neurons(); i++) { for (int32_t j=0; j<m_batch_size; j++) { float64_t sum = 0; for (int32_t k=0; k<m_num_neurons; k++) { sum += weights[k+i*m_num_neurons]* m_local_gradients[k+j*m_num_neurons]; } layer->get_activation_gradients()(i,j) += sum; } } } #endif SG_UNREF(layer); } if (contraction_coefficient != 0) { compute_contraction_term_gradients(parameters, parameter_gradients); } }
void CConvolutionalFeatureMap::compute_gradients( SGVector< float64_t > parameters, SGMatrix<float64_t> activations, SGMatrix< float64_t > activation_gradients, CDynamicObjectArray* layers, SGVector< int32_t > input_indices, SGVector< float64_t > parameter_gradients) { int32_t batch_size = activation_gradients.num_cols; if (m_activation_function==CMAF_LOGISTIC) { for (int32_t i=0; i<m_output_num_neurons; i++) { for (int32_t j=0; j<batch_size; j++) { activation_gradients(i+m_row_offset,j) *= activation_gradients(i+m_row_offset,j) * (1.0-activation_gradients(i+m_row_offset,j)); } } } else if (m_activation_function==CMAF_RECTIFIED_LINEAR) { for (int32_t i=0; i<m_output_num_neurons; i++) for (int32_t j=0; j<batch_size; j++) if (activations(i+m_row_offset,j)==0) activation_gradients(i+m_row_offset,j) = 0; } float64_t bias_gradient = 0; for (int32_t i=0; i<m_output_num_neurons; i++) for (int32_t j=0; j<batch_size; j++) bias_gradient += activation_gradients(i+m_row_offset,j); parameter_gradients[0] = bias_gradient; int32_t weights_index_offset = 1; for (int32_t l=0; l<input_indices.vlen; l++) { CNeuralLayer* layer = (CNeuralLayer*)layers->element(input_indices[l]); int32_t num_maps = layer->get_num_neurons()/m_input_num_neurons; for (int32_t m=0; m<num_maps; m++) { SGMatrix<float64_t> W(parameters.vector+weights_index_offset, m_filter_height, m_filter_width, false); SGMatrix<float64_t> WG(parameter_gradients.vector+weights_index_offset, m_filter_height, m_filter_width, false); weights_index_offset += m_filter_height*m_filter_width; compute_weight_gradients(layer->get_activations(), activation_gradients, WG, m*m_input_num_neurons, m_row_offset); if (!layer->is_input()) convolve(activation_gradients, W, layer->get_activation_gradients(), true, false, m_row_offset, m*m_input_num_neurons); } SG_UNREF(layer); } }