// Test that the correct update is computed for a regularized least squares // problem: // // E = (1/(2n)) || X w - y ||^2 + (lambda / 2) || w ||^2 // \nabla_w E = (1/n) (X^T X w - X^T y) + lambda * w // // X \in R^{n x (d+1)} (each example is a row, (d+1)th element is always 1) // w \in R^{(d+1) x 1} ((d+1)th element is the bias) // y \in R^{n x 1} // lambda is weight_decay // // TestLeastSquaresUpdate works "inductively", assuming that the solver // correctly updates the net K (= iter_to_check) times, then given the history // from the Kth update, we compute the (K+1)th update and check that it // matches the solver's (K+1)th update. void TestLeastSquaresUpdate(const Dtype learning_rate = 1.0, const Dtype weight_decay = 0.0, const Dtype momentum = 0.0, const int iter_to_check = 0) { // Initialize the solver and run K (= iter_to_check) solver iterations. RunLeastSquaresSolver(learning_rate, weight_decay, momentum, iter_to_check); // Compute the (K+1)th update using the analytic least squares gradient. vector<shared_ptr<Blob<Dtype> > > updated_params; ComputeLeastSquaresUpdate(learning_rate, weight_decay, momentum, &updated_params); // Reinitialize the solver and run K+1 solver iterations. RunLeastSquaresSolver(learning_rate, weight_decay, momentum, iter_to_check + 1); // Check that the solver's solution matches ours. CheckLeastSquaresUpdate(updated_params); }
void TestSnapshot(const Dtype learning_rate = 1.0, const Dtype weight_decay = 0.0, const Dtype momentum = 0.0, const int num_iters = 1) { // Run the solver for num_iters * 2 iterations. const int total_num_iters = num_iters * 2; bool snapshot = false; const int kIterSize = 1; RunLeastSquaresSolver(learning_rate, weight_decay, momentum, total_num_iters, kIterSize, snapshot); // Save the resulting param values. vector<shared_ptr<Blob<Dtype> > > param_copies; const vector<Blob<Dtype>*>& orig_params = solver_->net()->learnable_params(); param_copies.resize(orig_params.size()); for (int i = 0; i < orig_params.size(); ++i) { param_copies[i].reset(new Blob<Dtype>()); const bool kReshape = true; for (int copy_diff = false; copy_diff <= true; ++copy_diff) { param_copies[i]->CopyFrom(*orig_params[i], copy_diff, kReshape); } } // Save the solver history vector<shared_ptr<Blob<Dtype> > > history_copies; const vector<shared_ptr<Blob<Dtype> > >& orig_history = solver_->history(); history_copies.resize(orig_history.size()); for (int i = 0; i < orig_history.size(); ++i) { history_copies[i].reset(new Blob<Dtype>()); const bool kReshape = true; for (int copy_diff = false; copy_diff <= true; ++copy_diff) { history_copies[i]->CopyFrom(*orig_history[i], copy_diff, kReshape); } } // Run the solver for num_iters iterations and snapshot. snapshot = true; string snapshot_name = RunLeastSquaresSolver(learning_rate, weight_decay, momentum, num_iters, kIterSize, snapshot); // Reinitialize the solver and run for num_iters more iterations. snapshot = false; RunLeastSquaresSolver(learning_rate, weight_decay, momentum, total_num_iters, kIterSize, snapshot, snapshot_name.c_str()); // Check that params now match. const vector<Blob<Dtype>*>& params = solver_->net()->learnable_params(); for (int i = 0; i < params.size(); ++i) { for (int j = 0; j < params[i]->count(); ++j) { EXPECT_EQ(param_copies[i]->cpu_data()[j], params[i]->cpu_data()[j]) << "param " << i << " data differed at dim " << j; EXPECT_EQ(param_copies[i]->cpu_diff()[j], params[i]->cpu_diff()[j]) << "param " << i << " diff differed at dim " << j; } } // Check that history now matches. const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history(); for (int i = 0; i < history.size(); ++i) { for (int j = 0; j < history[i]->count(); ++j) { EXPECT_EQ(history_copies[i]->cpu_data()[j], history[i]->cpu_data()[j]) << "history blob " << i << " data differed at dim " << j; EXPECT_EQ(history_copies[i]->cpu_diff()[j], history[i]->cpu_diff()[j]) << "history blob " << i << " diff differed at dim " << j; } } }