Ejemplo n.º 1
0
void apply_momentum_correction(rbm_t *rbm, delta_w_t *dw) {
  for(int i=0;i<rbm[0].n_outputs;i++) {
    rbm[0].bias_outputs[i]+= rbm[0].learning_rate*dw[0].delta_output_bias[i]/(double)dw[0].batch_size; 
    rbm[0].output_momentum[i]+= rbm[0].learning_rate*dw[0].delta_output_bias[i]/(double)dw[0].batch_size; 
    for(int j=0;j<rbm[0].n_inputs;j++) {
      double step= get_matrix_value(dw[0].delta_w, i, j); // delta_w_i_j
      double previous_w_i_j= get_matrix_value(rbm[0].io_weights, i, j);

      // If using L2 penalty (a.k.a "weight decay"), apply that here.
      if(rbm[0].use_l2_penalty) 
        step-= rbm[0].weight_cost*previous_w_i_j; // Do I apply this to the momentum term as well, or just the correction?!
      step*= rbm[0].learning_rate/(double)dw[0].batch_size; // For the momentum method ... do I still scale by the batch size?!

      // Update weights.  \theta_t = \theta_t' - \epsilon_{t-1} \gradient_f(\theta_{t-1} + \mu_{t-1}v_{t-1}) // (eq. 7.10, 2nd half).
      // \theta_t' was applied before taking the step.
      set_matrix_value(rbm[0].io_weights, i, j, previous_w_i_j+step);  //  

      // Update velocities.  v_t = v_t' - \epsilon_{t-1} \gradient_f(\theta_{t-1} + \mu_{t-1}v_{t-1}) // (eq. 7.11, 2nd half).
      // v_t' was applied before taking the step.
      double previous_momentum_i_j= get_matrix_value(rbm[0].momentum, i, j);
      set_matrix_value(rbm[0].momentum, i, j, previous_momentum_i_j+step);

      if(i==0 && rbm[0].update_input_bias) { // Only update once... and if everything says to update.
        rbm[0].bias_inputs[j]+= rbm[0].learning_rate*dw[0].delta_input_bias[j]/(double)dw[0].batch_size;
        rbm[0].input_momentum[j]+= rbm[0].learning_rate*dw[0].delta_input_bias[j]/(double)dw[0].batch_size;
      }
    }
  }
}
Ejemplo n.º 2
0
void apply_momentum_correction(rbm_t *rbm, delta_w_t *dw) {
  double alpha= (rbm->learning_rate/(double)dw->batch_size);

  for(int i=0;i<rbm->n_outputs;i++) {
    rbm->bias_outputs[i]+= alpha*dw->delta_output_bias[i]; 
    rbm->output_momentum[i]+= alpha*dw->delta_output_bias[i]; 
    for(int j=0;j<rbm->n_inputs;j++) {
      double step= alpha*get_matrix_value(dw->delta_w, i, j); // delta_w_i_j
      double previous_w_i_j= get_matrix_value(rbm->io_weights, i, j);

{ // NOTE: Moving this next block (below || above) the L2 penalty application (will || not) apply the penalty to the momentum term.
      // Update velocities.  v_t = v_t' - \epsilon_{t-1} \gradient_f(\theta_{t-1} + \mu_{t-1}v_{t-1}) // (eq. 7.11, 2nd half).
      // v_t' was applied before taking the step.
      double previous_momentum_i_j= get_matrix_value(rbm->momentum, i, j);
      set_matrix_value(rbm->momentum, i, j, previous_momentum_i_j+step);
}

      // If using L2 penalty (a.k.a "weight decay"), apply that here.
      if(rbm->use_l2_penalty) // rbm->learning_rate * (delta_w/ batch_size - weightcost * w_i_j) [same as: http://www.cs.toronto.edu/~hinton/code/rbm.m]
        step-= rbm->weight_cost*previous_w_i_j*rbm->learning_rate; // Do I apply this to the momentum term as well, or just the correction?!

      // Update weights.  \theta_t = \theta_t' - \epsilon_{t-1} \gradient_f(\theta_{t-1} + \mu_{t-1}v_{t-1}) // (eq. 7.10, 2nd half).
      // \theta_t' was applied before taking the step.
      set_matrix_value(rbm->io_weights, i, j, previous_w_i_j+step);  //  

      if(i==0 && rbm->update_input_bias) { // Only update once... and if everything says to update.
        rbm->bias_inputs[j]+= alpha*dw->delta_input_bias[j];
        rbm->input_momentum[j]+= alpha*dw->delta_input_bias[j];
      }
    }
  }
}
Ejemplo n.º 3
0
void initial_momentum_step(rbm_t *rbm) {
  for(int i=0;i<rbm[0].n_outputs;i++) {
    rbm[0].output_momentum[i]*= rbm[0].momentum_decay;
    rbm[0].bias_outputs[i]+= rbm[0].output_momentum[i];
    for(int j=0;j<rbm[0].n_inputs;j++) {
      // Computes(eq 7.11, 1st half): v_t=\mu_{t-1}v_{t-1}.
      double momentum_i_j= rbm[0].momentum_decay*get_matrix_value(rbm[0].momentum, i, j); 
      set_matrix_value(rbm[0].momentum, i, j, momentum_i_j); // Updates momentum matrix.

      // Computes(eq 7.10, 1st half): \theta_t = \theta_{t-1} + \mu_{t-11}v_{t-1}.
      set_matrix_value(rbm[0].io_weights, i, j, get_matrix_value(rbm[0].io_weights, i, j)+momentum_i_j);

      if(i==0 && rbm[0].update_input_bias) {
        rbm[0].input_momentum[j]*= rbm[0].momentum_decay;
        rbm[0].bias_inputs[j]+= rbm[0].input_momentum[j];
      }
    }
  }
}
Ejemplo n.º 4
0
/*
 * Add matricies io_weights and delta_w.  The result will be in io_weights.
 *
 * Also includes 
 */
void apply_delta_w(rbm_t *rbm, delta_w_t *dw) {
  for(int i=0;i<rbm[0].n_outputs;i++) {
    rbm[0].bias_outputs[i] += rbm[0].learning_rate*dw[0].delta_output_bias[i]/(double)dw[0].batch_size; 
    for(int j=0;j<rbm[0].n_inputs;j++) {
      double previous_w_i_j= get_matrix_value(rbm[0].io_weights, i, j);
      double delta_w_i_j= get_matrix_value(dw[0].delta_w, i, j);

      // If using L2 penalty (a.k.a "weight decay"), apply that here.
      if(rbm[0].use_l2_penalty) 
        delta_w_i_j-= rbm[0].weight_cost*previous_w_i_j; // Is this the right sign!?

      double new_w_i_j= previous_w_i_j+rbm[0].learning_rate*delta_w_i_j/(double)dw[0].batch_size;

      set_matrix_value(rbm[0].io_weights, i, j, new_w_i_j);

      if(i==0 && rbm[0].update_input_bias) // Only update once... and if everything says to update.
        rbm[0].bias_inputs[j] += rbm[0].learning_rate*dw[0].delta_input_bias[j]/(double)dw[0].batch_size;
    }
  }
}
Ejemplo n.º 5
0
/*
 * Add matricies io_weights and delta_w.  The result will be in io_weights.
 *
 * Also includes 
 */
void apply_delta_w(rbm_t *rbm, delta_w_t *dw) {
  double alpha= (rbm->learning_rate/(double)dw->batch_size);

  for(int i=0;i<rbm->n_outputs;i++) {
    rbm->bias_outputs[i] += alpha*dw->delta_output_bias[i]; 
    for(int j=0;j<rbm->n_inputs;j++) {
      double previous_w_i_j= get_matrix_value(rbm->io_weights, i, j);
      double delta_w_i_j= alpha*get_matrix_value(dw->delta_w, i, j); // delta_w_i_j/ batch_size

      // If using L2 penalty (a.k.a "weight decay"), apply that here.
      if(rbm->use_l2_penalty) 
        delta_w_i_j-= rbm->weight_cost*previous_w_i_j*rbm->learning_rate; // from: http://www.cs.toronto.edu/~hinton/code/rbm.m, distributing learning rate.

      double new_w_i_j= previous_w_i_j+delta_w_i_j;

      set_matrix_value(rbm->io_weights, i, j, new_w_i_j);

      if(i==0 && rbm->update_input_bias) // Only update once... and if everything says to update.
        rbm->bias_inputs[j] += alpha*dw->delta_input_bias[j];
    }
  }
}