NeuralNetwork::NeuralNetwork(std::vector<int> sizes) : nlayers_(sizes.size()), sizes_(sizes) { random_gen_ = boost::mt11213b(time(0)); // 高斯分布生成器 boost::normal_distribution<> nd(0.0, 1.0); boost::variate_generator<boost::mt11213b&, boost::normal_distribution<>> var_gen(random_gen_, nd); // 初始化基向量 for (size_t i = 1; i < sizes_.size(); ++i) { // 每一层都有一个基向量,向量中的元素对应这一层的每一个神经元 Vector biases(sizes_[i]); for (int j = 0; j < sizes_[i]; ++j) { biases(j) = var_gen(); } biases_.push_back(biases); } // 初始化权重向量 for (size_t i = 0; i < sizes_.size() - 1; ++i) { // 相邻两个层之间都有一个权重矩阵,如果第 i 层和第 i + 1 层分别有 m 和 n 个神经元, // 那么两层之间的权重矩阵的维度为 m * n Matrix weights(sizes_[i + 1], sizes_[i]); for (int row = 0; row < sizes_[i + 1]; ++row) { for (int col = 0; col < sizes_[i]; ++ col) { weights(row, col) = var_gen(); } } weights_.push_back(weights); } if (biases_.size() != weights_.size()) { std::cout << "Biases and weights size not equal!"; } }
void Layer<TT,T>::PrepareForTraining() { if(!de_dw_.HaveSameSize(weights())) de_dw_ = Tensor<T>(weights().dims()); if(!de_db_.HaveSameSize(biases())) de_db_ = Tensor<T>(biases().dims()); if(!d2e_dw2_.HaveSameSize(weights())) d2e_dw2_ = Tensor<T>(weights().dims()); if(!d2e_db2_.HaveSameSize(biases())) d2e_db2_ = Tensor<T>(biases().dims()); if (!d2e_dx2_.HaveSameSize(out())) d2e_dx2_ = out(); if (!de_dx_.HaveSameSize(out())) de_dx_ = out(); }
std::unique_ptr<TorchStage> SpatialConvolution::loadFromFile( std::ifstream& file) { int32_t filt_width, filt_height, n_input_features, n_output_features, padding; file.read((char*)(&filt_width), sizeof(filt_width)); file.read((char*)(&filt_height), sizeof(filt_height)); file.read((char*)(&n_input_features), sizeof(n_input_features)); file.read((char*)(&n_output_features), sizeof(n_output_features)); file.read((char*)(&padding), sizeof(padding)); #if defined(DEBUG) || defined(_DEBUG) std::cout << "\t\t(fout,fin,kh,kw,pad)=(" << n_output_features << "," << n_input_features << "," << filt_height << "," << filt_width << "," << padding << ")" << std::endl; #endif std::unique_ptr<SpatialConvolution> ret(new SpatialConvolution( n_input_features, n_output_features, filt_height, filt_width, padding)); int32_t filt_dim = filt_width * filt_height; std::unique_ptr<float[]> weights( new float[n_output_features * n_input_features * filt_dim]); for (int32_t i = 0; i < n_output_features * n_input_features; i++) { float* bank = &weights[i * filt_dim]; file.read((char*)(bank), sizeof(bank[0]) * filt_dim); } ret->setWeights(weights.get()); std::unique_ptr<float[]> biases(new float[n_output_features]); file.read((char*)(biases.get()), sizeof(biases[0]) * n_output_features); ret->setBiases(biases.get()); return std::unique_ptr<TorchStage>(std::move(ret)); }
// Print some information which is useful for debugging void debug(const Eigen::VectorXd& parameters) { std::wcout << "There are " << count_weights_layers() << " layers of weights" << std::endl << std::endl; for (unsigned int i = 0; i < count_weights_layers(); i++) { std::cout << "W[" << i << "] = " << weights(parameters, i) << std::endl << std::endl; } for (unsigned int i = 0; i < count_weights_layers(); i++) { std::cout << "b[" << i << "] = " << biases(parameters, i) << std::endl << std::endl; } }
void potential_and_gradient(const Eigen::VectorXd& parameters, const Eigen::VectorXd& hyperparameters, View& view, double& potential, Eigen::VectorXd& gradient) { // Loop over layers to calculate weights part of potential, and non-data part of gradient potential = 0; for (size_t layer_idx = 0; layer_idx < count_weights_layers(); layer_idx++) { //potential -= 0.5 * (hyperparameters[layer_idx * 2] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[layer_idx * 2 + 1] * biases(parameters, layer_idx).squaredNorm()); potential -= 0.5 * (hyperparameters[0] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[1] * biases(parameters, layer_idx).squaredNorm()); // TODO: Debugging here! //weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2]).matrix(); //biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2 + 1]).matrix(); weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[0]).matrix(); biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[1]).matrix(); } /*if (std::isnan(gradient[0])) { std::cout << gradient[0] << std::endl; }*/ // Calculate output part of potential and gradient for (size_t data_idx = 0; data_idx < view.size(); data_idx++) { // Get the class label for this observation size_t class_idx = get_nonzero_idx(view.second(data_idx)); // Calculate the output for this sample, and the gradient of the output with respect to the parameters // gradient_and_output(size_t variable_idx, const Eigen::VectorXd& inputs, const Eigen::VectorXd& parameters, Eigen::VectorXd& outputs, Eigen::VectorXd& gradient_vector) /*if (std::isnan(temp_gradient_[0])) { std::cout << temp_gradient_[0] << std::endl; }*/ log_gradient_and_output(class_idx, view.first(data_idx), parameters, outputs(), temp_gradient_); //if (outputs()[class_idx] != 0.) gradient = gradient + temp_gradient_; /*if (std::isnan(temp_gradient_[0])) { std::cout << temp_gradient_[0] << std::endl; } if (std::isnan(gradient[0])) { std::cout << gradient[0] << std::endl; }*/ //if () // NOTE: Does it matter here when -E[theta] = -INF? //potential += log(outputs()[class_idx]); potential += outputs()[class_idx]; } // DEBUG: Check that all entries are finite and not NaN /*if (!std::isfinite(potential)) { if (std::isnan(potential)) std::cout << "NaN: Potential" << std::endl; else if (std::isinf(potential)) std::cout << "INF: Potential" << std::endl; } for (size_t idx = 0; idx < static_cast<size_t>(gradient.size()); idx++) { if (!std::isfinite(gradient[idx])) { if (std::isnan(gradient[idx])) std::cout << "NaN: Gradient[" << idx << "]" << std::endl; else if (std::isinf(gradient[idx])) std::cout << "NaN: Gradient[" << idx << "]" << std::endl; } }*/ }