Beispiel #1
0
Matrix<double> MeanSquaredError::calculate_terms_Jacobian(void) const {
// Control sentence

#ifndef NDEBUG

  check();

#endif

  // Neural network stuff

  const MultilayerPerceptron* multilayer_perceptron_pointer =
      neural_network_pointer->get_multilayer_perceptron_pointer();

  const unsigned inputs_number =
      multilayer_perceptron_pointer->get_inputs_number();
  const unsigned outputs_number =
      multilayer_perceptron_pointer->get_outputs_number();

  const unsigned layers_number =
      multilayer_perceptron_pointer->get_layers_number();

  const unsigned neural_parameters_number =
      multilayer_perceptron_pointer->count_parameters_number();

  Vector<Vector<Vector<double> > > first_order_forward_propagation(2);

  Vector<double> particular_solution;
  Vector<double> homogeneous_solution;

  const bool has_conditions_layer =
      neural_network_pointer->has_conditions_layer();

  const ConditionsLayer* conditions_layer_pointer =
      has_conditions_layer
          ? neural_network_pointer->get_conditions_layer_pointer()
          : NULL;

  // Data set stuff

  const Instances& instances = data_set_pointer->get_instances();

  const unsigned training_instances_number =
      instances.count_training_instances_number();

  Vector<double> inputs(inputs_number);
  Vector<double> targets(outputs_number);

  // Objective functional

  Vector<double> term(outputs_number);
  double term_norm;

  Vector<double> output_objective_gradient(outputs_number);

  Vector<Vector<double> > layers_delta(layers_number);
  Vector<double> point_gradient(neural_parameters_number);

  Matrix<double> terms_Jacobian(training_instances_number,
                                neural_parameters_number);

  // Main loop

  for (unsigned i = 0; i < training_instances_number; i++) {
    inputs = data_set_pointer->get_training_input_instance(i);

    targets = data_set_pointer->get_training_target_instance(i);

    first_order_forward_propagation = multilayer_perceptron_pointer
        ->calculate_first_order_forward_propagation(inputs);

    const Vector<Vector<double> >& layers_activation =
        first_order_forward_propagation[0];
    const Vector<Vector<double> >& layers_activation_derivative =
        first_order_forward_propagation[1];

    if (!has_conditions_layer) {
      const Vector<double>& outputs =
          first_order_forward_propagation[0][layers_number - 1];

      term = (outputs - targets);
      term_norm = term.calculate_norm();

      if (term_norm == 0.0) {
        output_objective_gradient.initialize(0.0);
      } else {
        output_objective_gradient = term / term_norm;
      }

      layers_delta = calculate_layers_delta(layers_activation_derivative,
                                            output_objective_gradient);
    } else {
      particular_solution =
          conditions_layer_pointer->calculate_particular_solution(inputs);
      homogeneous_solution =
          conditions_layer_pointer->calculate_homogeneous_solution(inputs);

      term = (particular_solution +
              homogeneous_solution * layers_activation[layers_number - 1] -
              targets) /
             sqrt((double)training_instances_number);
      term_norm = term.calculate_norm();

      if (term_norm == 0.0) {
        output_objective_gradient.initialize(0.0);
      } else {
        output_objective_gradient = term / term_norm;
      }

      layers_delta = calculate_layers_delta(layers_activation_derivative,
                                            homogeneous_solution,
                                            output_objective_gradient);
    }

    point_gradient =
        calculate_point_gradient(inputs, layers_activation, layers_delta);

    terms_Jacobian.set_row(i, point_gradient);
  }

  return (terms_Jacobian / sqrt((double)training_instances_number));
}
Beispiel #2
0
Vector<double> MeanSquaredError::calculate_gradient(void) const {
// Control sentence (if debug)

#ifndef NDEBUG

  check();

#endif

  // Multilayer percepron stuff

  const MultilayerPerceptron* multilayer_perceptron_pointer =
      neural_network_pointer->get_multilayer_perceptron_pointer();

  const unsigned inputs_number =
      multilayer_perceptron_pointer->get_inputs_number();
  const unsigned outputs_number =
      multilayer_perceptron_pointer->get_outputs_number();

  const unsigned layers_number =
      multilayer_perceptron_pointer->get_layers_number();

  const unsigned parameters_number =
      multilayer_perceptron_pointer->count_parameters_number();

  Vector<Vector<Vector<double> > > first_order_forward_propagation(2);

  const bool has_conditions_layer =
      neural_network_pointer->has_conditions_layer();

  const ConditionsLayer* conditions_layer_pointer =
      has_conditions_layer
          ? neural_network_pointer->get_conditions_layer_pointer()
          : NULL;

  Vector<double> particular_solution;
  Vector<double> homogeneous_solution;

  // Data set stuff

  const Instances& instances = data_set_pointer->get_instances();

  const unsigned training_instances_number =
      instances.count_training_instances_number();

  Vector<double> inputs(inputs_number);
  Vector<double> targets(outputs_number);

  // Mean squared error stuff

  Vector<Vector<double> > layers_delta;

  Vector<double> output_objective_gradient(outputs_number);

  Vector<double> point_gradient(parameters_number, 0.0);

  Vector<double> objective_gradient(parameters_number, 0.0);

  // Main loop

  for (unsigned i = 0; i < training_instances_number; i++) {
    inputs = data_set_pointer->get_training_input_instance(i);

    targets = data_set_pointer->get_training_target_instance(i);

    first_order_forward_propagation = multilayer_perceptron_pointer
        ->calculate_first_order_forward_propagation(inputs);

    const Vector<Vector<double> >& layers_activation =
        first_order_forward_propagation[0];
    const Vector<Vector<double> >& layers_activation_derivative =
        first_order_forward_propagation[1];

    if (!has_conditions_layer) {
      output_objective_gradient =
          (layers_activation[layers_number - 1] - targets) *
          (2.0 / (double)training_instances_number);

      layers_delta = calculate_layers_delta(layers_activation_derivative,
                                            output_objective_gradient);
    } else {
      particular_solution =
          conditions_layer_pointer->calculate_particular_solution(inputs);
      homogeneous_solution =
          conditions_layer_pointer->calculate_homogeneous_solution(inputs);

      output_objective_gradient =
          (particular_solution +
           homogeneous_solution * layers_activation[layers_number - 1] -
           targets) *
          (2.0 / (double)training_instances_number);

      layers_delta = calculate_layers_delta(layers_activation_derivative,
                                            homogeneous_solution,
                                            output_objective_gradient);
    }

    point_gradient =
        calculate_point_gradient(inputs, layers_activation, layers_delta);

    objective_gradient += point_gradient;
  }

  return (objective_gradient);
}
Beispiel #3
0
Vector<double> CrossEntropyError::calculate_gradient(void) const
{
   #ifdef __OPENNN_DEBUG__ 

   check();

   #endif

   // Neural network stuff

   const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();

   const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
   const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();

   const size_t layers_number = multilayer_perceptron_pointer->get_layers_number();

   const size_t neural_parameters_number = multilayer_perceptron_pointer->count_parameters_number();

   const bool has_conditions_layer = neural_network_pointer->has_conditions_layer();

   const ConditionsLayer* conditions_layer_pointer = has_conditions_layer ? neural_network_pointer->get_conditions_layer_pointer() : NULL;

   #ifdef __OPENNN_DEBUG__ 

   std::ostringstream buffer;

   const Matrix<double> target_data = data_set_pointer->arrange_target_data();

   if(target_data < 0.0)
   {
      buffer << "OpenNN Exception: CrossEntropyError class.\n"
             << "Vector<double> calculate_gradient(void) const method.\n"
             << "Target data must be equal or greater than zero.\n";

      throw std::logic_error(buffer.str());
   }

   if(target_data > 1.0)
   {
      buffer << "OpenNN Exception: CrossEntropyError class.\n"
             << "Vector<double> calculate_gradient(void) const method.\n"
             << "Target data must be less or equal or than one.\n";

      throw std::logic_error(buffer.str());
   }

   #endif

   // Neural network stuff    

   Vector< Vector< Vector<double> > > first_order_forward_propagation(2); 

   Vector<double> particular_solution;
   Vector<double> homogeneous_solution;

   // Data set stuff

   const Instances& instances = data_set_pointer->get_instances();

   const size_t training_instances_number = instances.count_training_instances_number();

   const Vector<size_t> training_indices = instances.arrange_training_indices();

   size_t training_index;

   const Variables& variables = data_set_pointer->get_variables();

   const Vector<size_t> inputs_indices = variables.arrange_inputs_indices();
   const Vector<size_t> targets_indices = variables.arrange_targets_indices();

   Vector<double> inputs(inputs_number);
   Vector<double> targets(outputs_number);

   // Cross-entropy error stuff

   Vector<double> output_gradient(outputs_number);

   Vector< Vector<double> > layers_delta; 

   Vector<double> point_gradient(neural_parameters_number, 0.0);

   Vector<double> gradient(neural_parameters_number, 0.0);

   for(size_t i = 0; i < training_instances_number; i++)
   {
       training_index = training_indices[i];

      inputs = data_set_pointer->get_instance(training_index, inputs_indices);

      targets = data_set_pointer->get_instance(training_index, targets_indices);

      first_order_forward_propagation = multilayer_perceptron_pointer->calculate_first_order_forward_propagation(inputs);

      const Vector< Vector<double> >& layers_activation = first_order_forward_propagation[0];
      const Vector< Vector<double> >& layers_activation_derivative = first_order_forward_propagation[1];

      if(!has_conditions_layer)
      {
         const Vector<double>& outputs = layers_activation[layers_number-1];

         for(size_t j = 0; j < outputs_number; j++)
         {
             if(outputs[j] == 0.0)
             {
                 output_gradient[j] = -targets[j]/1.e-6 + (1.0 - targets[j])/(1.0 - 1.e-6);
             }
             else if(outputs[j] == 1.0)
             {
                 output_gradient[j] = -targets[j]/0.999999 + (1.0 - targets[j])/(1.0 - 0.999999);
             }
             else
             {
                output_gradient[j] = -targets[j]/outputs[j] + (1.0 - targets[j])/(1.0 - outputs[j]);
             }
         }

         layers_delta = calculate_layers_delta(layers_activation_derivative, output_gradient);
      }
      else
      {
         particular_solution = conditions_layer_pointer->calculate_particular_solution(inputs);
         homogeneous_solution = conditions_layer_pointer->calculate_homogeneous_solution(inputs);

         const Vector<double>& outputs = particular_solution + homogeneous_solution*layers_activation[layers_number-1];

         for(size_t j = 0; j < outputs_number; j++)
         {
             if(outputs[j] == 0.0)
             {
                 output_gradient[j] = -targets[j]/1.e-6 + (1.0 - targets[j])/(1.0 - 1.e-6);
             }
             else if(outputs[j] == 1.0)
             {
                 output_gradient[j] = -targets[j]/0.999999 + (1.0 - targets[j])/(1.0 - 0.999999);
             }
             else
             {
                output_gradient[j] = -targets[j]/outputs[j] + (1.0 - targets[j])/(1.0 - outputs[j]);
             }
         }

		 layers_delta = calculate_layers_delta(layers_activation_derivative, homogeneous_solution, output_gradient);
      }

      point_gradient = calculate_point_gradient(inputs, layers_activation, layers_delta);

      gradient += point_gradient;
   }

   return(gradient);
}
Beispiel #4
0
Vector<double> MinkowskiError::calculate_gradient(void) const
{
   // Control sentence (if debug)

   #ifdef __OPENNN_DEBUG__ 

   check();

   #endif

   // Neural network stuff

   const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();

   // Neural network stuff

   const bool has_conditions_layer = neural_network_pointer->has_conditions_layer();

   const ConditionsLayer* conditions_layer_pointer = has_conditions_layer ? neural_network_pointer->get_conditions_layer_pointer() : NULL;

   const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
   const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();

   const size_t layers_number = multilayer_perceptron_pointer->get_layers_number();

   const size_t neural_parameters_number = multilayer_perceptron_pointer->count_parameters_number();

   Vector< Vector< Vector<double> > > first_order_forward_propagation(2);

   Vector<double> particular_solution;
   Vector<double> homogeneous_solution;

   // Data set stuff

   const Instances& instances = data_set_pointer->get_instances();

   const size_t training_instances_number = instances.count_training_instances_number();

   const Vector<size_t> training_indices = instances.arrange_training_indices();

   size_t training_index;

   const Variables& variables = data_set_pointer->get_variables();

   const Vector<size_t> inputs_indices = variables.arrange_inputs_indices();
   const Vector<size_t> targets_indices = variables.arrange_targets_indices();

   Vector<double> inputs(inputs_number);
   Vector<double> targets(outputs_number);

   // Minkowski error stuff

   Vector<double> output_gradient(outputs_number);

   Vector< Matrix<double> > layers_combination_parameters_Jacobian;

   Vector< Vector<double> > layers_inputs(layers_number);
   Vector< Vector<double> > layers_delta;

   Vector<double> point_gradient(neural_parameters_number, 0.0);

   Vector<double> gradient(neural_parameters_number, 0.0);

   int i = 0;

   #pragma omp parallel for private(i, training_index, inputs, targets, first_order_forward_propagation, layers_inputs, layers_combination_parameters_Jacobian, \
    output_gradient, layers_delta, particular_solution, homogeneous_solution, point_gradient)

   for(i = 0; i < (int)training_instances_number; i++)
   {
       training_index = training_indices[i];

       // Data set

      inputs = data_set_pointer->get_instance(training_index, inputs_indices);

      targets = data_set_pointer->get_instance(training_index, targets_indices);

      // Neural network

      first_order_forward_propagation = multilayer_perceptron_pointer->calculate_first_order_forward_propagation(inputs);

      const Vector< Vector<double> >& layers_activation = first_order_forward_propagation[0];
      const Vector< Vector<double> >& layers_activation_derivative = first_order_forward_propagation[1];

      layers_inputs = multilayer_perceptron_pointer->arrange_layers_input(inputs, layers_activation);

      layers_combination_parameters_Jacobian = multilayer_perceptron_pointer->calculate_layers_combination_parameters_Jacobian(layers_inputs);

      // Performance functional

      if(!has_conditions_layer)
      {
         output_gradient = (layers_activation[layers_number-1]-targets).calculate_p_norm_gradient(Minkowski_parameter);

         layers_delta = calculate_layers_delta(layers_activation_derivative, output_gradient);
      }
      else
      {
         particular_solution = conditions_layer_pointer->calculate_particular_solution(inputs);
         homogeneous_solution = conditions_layer_pointer->calculate_homogeneous_solution(inputs);

         output_gradient = (particular_solution+homogeneous_solution*layers_activation[layers_number-1] - targets).calculate_pow(Minkowski_parameter-1.0)*Minkowski_parameter;

         layers_delta = calculate_layers_delta(layers_activation_derivative, homogeneous_solution, output_gradient);
      }

      point_gradient = calculate_point_gradient(layers_combination_parameters_Jacobian, layers_delta);

      #pragma omp critical

      gradient += point_gradient;
   }

   return(gradient);
}
Vector<double> RootMeanSquaredError::calculate_gradient(const double& total_training_instances_number, const double& loss) const
{
    // Control sentence

       #ifdef __OPENNN_DEBUG__

       check();

       #endif

       // Neural network stuff

       const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();

       const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
       const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();

       const size_t layers_number = multilayer_perceptron_pointer->get_layers_number();

       const size_t parameters_number = multilayer_perceptron_pointer->count_parameters_number();

       // Data set stuff

       Vector< Vector< Vector<double> > > first_order_forward_propagation(2);

       const bool has_conditions_layer = neural_network_pointer->has_conditions_layer();

       const ConditionsLayer* conditions_layer_pointer = has_conditions_layer ? neural_network_pointer->get_conditions_layer_pointer() : NULL;

       Vector<double> particular_solution;
       Vector<double> homogeneous_solution;

       // Data set stuff

       const Instances& instances = data_set_pointer->get_instances();

       const size_t training_instances_number = instances.count_training_instances_number();

       const Vector<size_t> training_indices = instances.arrange_training_indices();

       size_t training_index;

       const Variables& variables = data_set_pointer->get_variables();

       const Vector<size_t> inputs_indices = variables.arrange_inputs_indices();
       const Vector<size_t> targets_indices = variables.arrange_targets_indices();

       const MissingValues& missing_values = data_set_pointer->get_missing_values();

       Vector<double> inputs(inputs_number);
       Vector<double> targets(outputs_number);

       // Loss index stuff

       Vector< Vector<double> > layers_delta;

       Vector<double> output_gradient(outputs_number);

       Vector<double> point_gradient(parameters_number, 0.0);

       // Main loop

       Vector<double> gradient(parameters_number, 0.0);

       int i = 0;

       #pragma omp parallel for private(i, training_index, inputs, targets, first_order_forward_propagation, output_gradient, \
        layers_delta, particular_solution, homogeneous_solution, point_gradient)

       for(i = 0; i < (int)training_instances_number; i++)
       {
           training_index = training_indices[i];

           if(missing_values.has_missing_values(training_index))
           {
               continue;
           }

          inputs = data_set_pointer->get_instance(training_index, inputs_indices);

          targets = data_set_pointer->get_instance(training_index, targets_indices);

          first_order_forward_propagation = multilayer_perceptron_pointer->calculate_first_order_forward_propagation(inputs);

          const Vector< Vector<double> >& layers_activation = first_order_forward_propagation[0];
          const Vector< Vector<double> >& layers_activation_derivative = first_order_forward_propagation[1];

          if(!has_conditions_layer)
          {
             output_gradient = (layers_activation[layers_number-1]-targets)/(total_training_instances_number*loss);

             layers_delta = calculate_layers_delta(layers_activation_derivative, output_gradient);
          }
          else
          {
             particular_solution = conditions_layer_pointer->calculate_particular_solution(inputs);
             homogeneous_solution = conditions_layer_pointer->calculate_homogeneous_solution(inputs);

             output_gradient = (particular_solution+homogeneous_solution*layers_activation[layers_number-1] - targets)/(total_training_instances_number*loss);

             layers_delta = calculate_layers_delta(layers_activation_derivative, homogeneous_solution, output_gradient);
          }

          point_gradient = calculate_point_gradient(inputs, layers_activation, layers_delta);

          #pragma omp critical

          gradient += point_gradient;
       }

       return(gradient);
}
Matrix<double> NormalizedSquaredError::calculate_terms_Jacobian(void) const
{
   // Control sentence

   #ifdef __OPENNN_DEBUG__ 

   check();

   #endif

   // Neural network stuff

   const MultilayerPerceptron* multilayer_perceptron_pointer = neural_network_pointer->get_multilayer_perceptron_pointer();

   const size_t inputs_number = multilayer_perceptron_pointer->get_inputs_number();
   const size_t outputs_number = multilayer_perceptron_pointer->get_outputs_number();
   const size_t layers_number = multilayer_perceptron_pointer->get_layers_number();

   const size_t parameters_number = multilayer_perceptron_pointer->count_parameters_number();

   Vector< Vector< Vector<double> > > first_order_forward_propagation(2);

   Vector< Matrix<double> > layers_combination_parameters_Jacobian; 

   Vector< Vector<double> > layers_inputs(layers_number); 

   Vector<double> particular_solution;
   Vector<double> homogeneous_solution;

   const bool has_conditions_layer = neural_network_pointer->has_conditions_layer();

   const ConditionsLayer* conditions_layer_pointer = has_conditions_layer ? neural_network_pointer->get_conditions_layer_pointer() : NULL;

   // Data set stuff

   const Instances& instances = data_set_pointer->get_instances();

   const size_t training_instances_number = instances.count_training_instances_number();

   const Vector<size_t> training_indices = instances.arrange_training_indices();

   size_t training_index;

   const Variables& variables = data_set_pointer->get_variables();

   const Vector<size_t> inputs_indices = variables.arrange_inputs_indices();
   const Vector<size_t> targets_indices = variables.arrange_targets_indices();

   const Vector<double> training_target_data_mean = data_set_pointer->calculate_training_target_data_mean();

   Vector<double> inputs(inputs_number);
   Vector<double> targets(outputs_number);

   // Normalized squared error

   Vector<double> term(outputs_number);
   double term_norm;

   Vector<double> output_gradient(outputs_number);

   Vector< Vector<double> > layers_delta(layers_number);
   Vector<double> point_gradient(parameters_number);

   Matrix<double> terms_Jacobian(training_instances_number, parameters_number);

   double normalization_coefficient = 0.0;

   // Main loop

   int i = 0;

   #pragma omp parallel for private(i, training_index, inputs, targets, first_order_forward_propagation, layers_inputs, \
    layers_combination_parameters_Jacobian, term, term_norm, output_gradient, layers_delta, particular_solution, homogeneous_solution, point_gradient)

   for(i = 0; i < (int)training_instances_number; i++)
   {
       training_index = training_indices[i];

       // Data set

      inputs = data_set_pointer->get_instance(training_index, inputs_indices);

      targets = data_set_pointer->get_instance(training_index, targets_indices);

	  // Neural network

      first_order_forward_propagation = multilayer_perceptron_pointer->calculate_first_order_forward_propagation(inputs);

      const Vector< Vector<double> >& layers_activation = first_order_forward_propagation[0];
      const Vector< Vector<double> >& layers_activation_derivative = first_order_forward_propagation[1];

      layers_inputs = multilayer_perceptron_pointer->arrange_layers_input(inputs, layers_activation);

	  layers_combination_parameters_Jacobian = multilayer_perceptron_pointer->calculate_layers_combination_parameters_Jacobian(layers_inputs);
	  
	  // Performance functional

      if(!has_conditions_layer) // No conditions
      {
         const Vector<double>& outputs = layers_activation[layers_number-1]; 

         term = outputs-targets;
         term_norm = term.calculate_norm();

         if(term_norm == 0.0)
   	     {
            output_gradient.initialize(0.0);
	     }
         else
	     {
            output_gradient = term/term_norm;
	     }

         layers_delta = calculate_layers_delta(layers_activation_derivative, output_gradient);
      }
      else // Conditions
      {        
         particular_solution = conditions_layer_pointer->calculate_particular_solution(inputs);
         homogeneous_solution = conditions_layer_pointer->calculate_homogeneous_solution(inputs);

         const Vector<double>& output_layer_activation = layers_activation[layers_number-1]; 

         term = (particular_solution+homogeneous_solution*output_layer_activation - targets);              
         term_norm = term.calculate_norm();

         if(term_norm == 0.0)
   	     {
            output_gradient.initialize(0.0);
	     }
	     else
	     {
            output_gradient = term/term_norm;
	     }

         layers_delta = calculate_layers_delta(layers_activation_derivative, homogeneous_solution, output_gradient);
	  }

	  normalization_coefficient += targets.calculate_sum_squared_error(training_target_data_mean);

      point_gradient = calculate_point_gradient(layers_combination_parameters_Jacobian, layers_delta);

      terms_Jacobian.set_row(i, point_gradient);

  }

   if(normalization_coefficient < 1.0e-99)
   {
      std::ostringstream buffer;

      buffer << "OpenNN Exception: NormalizedSquaredError class.\n"
             << "Matrix<double> calculate_terms_Jacobian(void) const method.\n"
             << "Normalization coefficient is zero.\n"
             << "Unuse constant target variables or choose another error functional. ";

      throw std::logic_error(buffer.str());
   }

   return(terms_Jacobian/sqrt(normalization_coefficient));
}