Exemple #1
0
 NeuralNetwork::NeuralNetwork(std::vector<int> sizes) : nlayers_(sizes.size()), sizes_(sizes) {
     random_gen_ = boost::mt11213b(time(0));
     
     // 高斯分布生成器
     boost::normal_distribution<> nd(0.0, 1.0);
     boost::variate_generator<boost::mt11213b&, boost::normal_distribution<>> var_gen(random_gen_, nd);
     
     // 初始化基向量
     for (size_t i = 1; i < sizes_.size(); ++i) {
         // 每一层都有一个基向量,向量中的元素对应这一层的每一个神经元
         Vector biases(sizes_[i]);
         for (int j = 0; j < sizes_[i]; ++j) {
             biases(j) = var_gen();
         }
         biases_.push_back(biases);
     }
     
     // 初始化权重向量
     for (size_t i = 0; i < sizes_.size() - 1; ++i) {
         // 相邻两个层之间都有一个权重矩阵,如果第 i 层和第 i + 1 层分别有 m 和 n 个神经元,
         // 那么两层之间的权重矩阵的维度为 m * n
         Matrix weights(sizes_[i + 1], sizes_[i]);
         for (int row = 0; row < sizes_[i + 1]; ++row) {
             for (int col = 0; col < sizes_[i]; ++ col) {
                 weights(row, col) = var_gen();
             }
         }
         weights_.push_back(weights);
     }
     
     if (biases_.size() != weights_.size()) {
         std::cout << "Biases and weights size not equal!";
     }
 }
Exemple #2
0
void Layer<TT,T>::PrepareForTraining()
{
	if(!de_dw_.HaveSameSize(weights()))
		de_dw_ = Tensor<T>(weights().dims());
	if(!de_db_.HaveSameSize(biases()))
		de_db_ = Tensor<T>(biases().dims());
	if(!d2e_dw2_.HaveSameSize(weights()))
		d2e_dw2_ = Tensor<T>(weights().dims());
	if(!d2e_db2_.HaveSameSize(biases()))
		d2e_db2_ = Tensor<T>(biases().dims());
	if (!d2e_dx2_.HaveSameSize(out()))	
		d2e_dx2_ = out();
	if (!de_dx_.HaveSameSize(out()))	
		de_dx_ = out();
}
std::unique_ptr<TorchStage> SpatialConvolution::loadFromFile(
    std::ifstream& file) {
  int32_t filt_width, filt_height, n_input_features, n_output_features, padding;
  file.read((char*)(&filt_width), sizeof(filt_width));
  file.read((char*)(&filt_height), sizeof(filt_height));
  file.read((char*)(&n_input_features), sizeof(n_input_features));
  file.read((char*)(&n_output_features), sizeof(n_output_features));
  file.read((char*)(&padding), sizeof(padding));

#if defined(DEBUG) || defined(_DEBUG)
  std::cout << "\t\t(fout,fin,kh,kw,pad)=(" << n_output_features << ","
            << n_input_features << "," << filt_height << "," << filt_width
            << "," << padding << ")" << std::endl;
#endif

  std::unique_ptr<SpatialConvolution> ret(new SpatialConvolution(
      n_input_features, n_output_features, filt_height, filt_width, padding));

  int32_t filt_dim = filt_width * filt_height;
  std::unique_ptr<float[]> weights(
      new float[n_output_features * n_input_features * filt_dim]);
  for (int32_t i = 0; i < n_output_features * n_input_features; i++) {
    float* bank = &weights[i * filt_dim];
    file.read((char*)(bank), sizeof(bank[0]) * filt_dim);
  }
  ret->setWeights(weights.get());

  std::unique_ptr<float[]> biases(new float[n_output_features]);
  file.read((char*)(biases.get()), sizeof(biases[0]) * n_output_features);
  ret->setBiases(biases.get());

  return std::unique_ptr<TorchStage>(std::move(ret));
}
	// Print some information which is useful for debugging
	void debug(const Eigen::VectorXd& parameters)
	{
		std::wcout << "There are " << count_weights_layers() << " layers of weights" << std::endl << std::endl;
		for (unsigned int i = 0; i < count_weights_layers(); i++) {
			std::cout << "W[" << i << "] = " << weights(parameters, i) << std::endl << std::endl;
		}
		for (unsigned int i = 0; i < count_weights_layers(); i++) {
			std::cout << "b[" << i << "] = " << biases(parameters, i) << std::endl << std::endl;
		}
	}
	void potential_and_gradient(const Eigen::VectorXd& parameters, const Eigen::VectorXd& hyperparameters, View& view, double& potential, Eigen::VectorXd& gradient)
	{
		// Loop over layers to calculate weights part of potential, and non-data part of gradient
		potential = 0;
		for (size_t layer_idx = 0; layer_idx < count_weights_layers(); layer_idx++) {
			//potential -= 0.5 * (hyperparameters[layer_idx * 2] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[layer_idx * 2 + 1] * biases(parameters, layer_idx).squaredNorm());
			potential -= 0.5 * (hyperparameters[0] * weights(parameters, layer_idx).squaredNorm() + hyperparameters[1] * biases(parameters, layer_idx).squaredNorm());

			// TODO: Debugging here!
			//weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2]).matrix();
			//biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[layer_idx * 2 + 1]).matrix();
			weights(gradient, layer_idx) = (weights(parameters, layer_idx).array() * -hyperparameters[0]).matrix();
			biases(gradient, layer_idx) = (biases(parameters, layer_idx).array() * -hyperparameters[1]).matrix();
		}

		/*if (std::isnan(gradient[0])) {
			std::cout << gradient[0] << std::endl;
		}*/

		// Calculate output part of potential and gradient
		for (size_t data_idx = 0; data_idx < view.size(); data_idx++) {
			// Get the class label for this observation
			size_t class_idx = get_nonzero_idx(view.second(data_idx));

			// Calculate the output for this sample, and the gradient of the output with respect to the parameters
			// gradient_and_output(size_t variable_idx, const Eigen::VectorXd& inputs, const Eigen::VectorXd& parameters, Eigen::VectorXd& outputs, Eigen::VectorXd& gradient_vector)

			/*if (std::isnan(temp_gradient_[0])) {
				std::cout << temp_gradient_[0] << std::endl;
			}*/

			log_gradient_and_output(class_idx, view.first(data_idx), parameters, outputs(), temp_gradient_);

			//if (outputs()[class_idx] != 0.)
				gradient = gradient + temp_gradient_;

			
			/*if (std::isnan(temp_gradient_[0])) {
				std::cout << temp_gradient_[0] << std::endl;
			}

			if (std::isnan(gradient[0])) {
				std::cout << gradient[0] << std::endl;
			}*/

			//if ()
			// NOTE: Does it matter here when -E[theta] = -INF?
			//potential += log(outputs()[class_idx]);
			potential += outputs()[class_idx];
		}

		// DEBUG: Check that all entries are finite and not NaN
		/*if (!std::isfinite(potential)) {
			if (std::isnan(potential))
				std::cout << "NaN: Potential" << std::endl;
			else if (std::isinf(potential))
				std::cout << "INF: Potential" << std::endl;
		}
		for (size_t idx = 0; idx < static_cast<size_t>(gradient.size()); idx++) {
			if (!std::isfinite(gradient[idx])) {
				if (std::isnan(gradient[idx]))
					std::cout << "NaN: Gradient[" << idx << "]" << std::endl;
				else if (std::isinf(gradient[idx]))
					std::cout << "NaN: Gradient[" << idx << "]" << std::endl;
			}
		}*/
	}