Esempio n. 1
0
  // Test that the correct update is computed for a regularized least squares
  // problem:
  //
  //            E = (1/(2n)) || X w - y ||^2 + (lambda / 2) || w ||^2
  //   \nabla_w E = (1/n) (X^T X w - X^T y) + lambda * w
  //
  // X \in R^{n x (d+1)} (each example is a row, (d+1)th element is always 1)
  // w \in R^{(d+1) x 1} ((d+1)th element is the bias)
  // y \in R^{n x 1}
  // lambda is weight_decay
  //
  // TestLeastSquaresUpdate works "inductively", assuming that the solver
  // correctly updates the net K (= iter_to_check) times, then given the history
  // from the Kth update, we compute the (K+1)th update and check that it
  // matches the solver's (K+1)th update.
  void TestLeastSquaresUpdate(const Dtype learning_rate = 1.0,
      const Dtype weight_decay = 0.0, const Dtype momentum = 0.0,
      const int iter_to_check = 0) {
    // Initialize the solver and run K (= iter_to_check) solver iterations.
    RunLeastSquaresSolver(learning_rate, weight_decay, momentum, iter_to_check);

    // Compute the (K+1)th update using the analytic least squares gradient.
    vector<shared_ptr<Blob<Dtype> > > updated_params;
    ComputeLeastSquaresUpdate(learning_rate, weight_decay, momentum,
                              &updated_params);

    // Reinitialize the solver and run K+1 solver iterations.
    RunLeastSquaresSolver(learning_rate, weight_decay, momentum,
                          iter_to_check + 1);

    // Check that the solver's solution matches ours.
    CheckLeastSquaresUpdate(updated_params);
  }
  void TestSnapshot(const Dtype learning_rate = 1.0,
      const Dtype weight_decay = 0.0, const Dtype momentum = 0.0,
      const int num_iters = 1) {
    // Run the solver for num_iters * 2 iterations.
    const int total_num_iters = num_iters * 2;
    bool snapshot = false;
    const int kIterSize = 1;
    RunLeastSquaresSolver(learning_rate, weight_decay, momentum,
        total_num_iters, kIterSize, snapshot);

    // Save the resulting param values.
    vector<shared_ptr<Blob<Dtype> > > param_copies;
    const vector<Blob<Dtype>*>& orig_params =
        solver_->net()->learnable_params();
    param_copies.resize(orig_params.size());
    for (int i = 0; i < orig_params.size(); ++i) {
      param_copies[i].reset(new Blob<Dtype>());
      const bool kReshape = true;
      for (int copy_diff = false; copy_diff <= true; ++copy_diff) {
        param_copies[i]->CopyFrom(*orig_params[i], copy_diff, kReshape);
      }
    }

    // Save the solver history
    vector<shared_ptr<Blob<Dtype> > > history_copies;
    const vector<shared_ptr<Blob<Dtype> > >& orig_history = solver_->history();
    history_copies.resize(orig_history.size());
    for (int i = 0; i < orig_history.size(); ++i) {
      history_copies[i].reset(new Blob<Dtype>());
      const bool kReshape = true;
      for (int copy_diff = false; copy_diff <= true; ++copy_diff) {
        history_copies[i]->CopyFrom(*orig_history[i], copy_diff, kReshape);
      }
    }

    // Run the solver for num_iters iterations and snapshot.
    snapshot = true;
    string snapshot_name = RunLeastSquaresSolver(learning_rate, weight_decay,
        momentum, num_iters, kIterSize, snapshot);

    // Reinitialize the solver and run for num_iters more iterations.
    snapshot = false;
    RunLeastSquaresSolver(learning_rate, weight_decay, momentum,
        total_num_iters, kIterSize, snapshot, snapshot_name.c_str());

    // Check that params now match.
    const vector<Blob<Dtype>*>& params = solver_->net()->learnable_params();
    for (int i = 0; i < params.size(); ++i) {
      for (int j = 0; j < params[i]->count(); ++j) {
        EXPECT_EQ(param_copies[i]->cpu_data()[j], params[i]->cpu_data()[j])
            << "param " << i << " data differed at dim " << j;
        EXPECT_EQ(param_copies[i]->cpu_diff()[j], params[i]->cpu_diff()[j])
            << "param " << i << " diff differed at dim " << j;
      }
    }

    // Check that history now matches.
    const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history();
    for (int i = 0; i < history.size(); ++i) {
      for (int j = 0; j < history[i]->count(); ++j) {
        EXPECT_EQ(history_copies[i]->cpu_data()[j], history[i]->cpu_data()[j])
            << "history blob " << i << " data differed at dim " << j;
        EXPECT_EQ(history_copies[i]->cpu_diff()[j], history[i]->cpu_diff()[j])
            << "history blob " << i << " diff differed at dim " << j;
      }
    }
  }