void Solver<Dtype>::Step(int iters) { vector<Blob<Dtype>*> bottom_vec; const int start_iter = iter_; const int stop_iter = iter_ + iters; int average_loss = this->param_.average_loss(); vector<Dtype> losses; Dtype smoothed_loss = 0; for (; iter_ < stop_iter; ++iter_) { Messenger::SendMessage("SOLVER_ITER_CHANGED", &iter_); if (param_.test_interval() && iter_ % param_.test_interval() == 0 && (iter_ > 0 || param_.test_initialization())) { TestAll(); } const bool display = param_.display() && iter_ % param_.display() == 0; net_->set_debug_info(display && param_.debug_info()); Dtype loss = net_->ForwardBackward(bottom_vec); if (losses.size() < average_loss) { losses.push_back(loss); int size = losses.size(); smoothed_loss = (smoothed_loss * (size - 1) + loss) / size; } else { int idx = (iter_ - start_iter) % average_loss; smoothed_loss += (loss - losses[idx]) / average_loss; losses[idx] = loss; } if (display) { LOG(INFO) << "Iteration " << iter_ << ", loss = " << smoothed_loss; const vector<Blob<Dtype>*>& result = net_->output_blobs(); int score_index = 0; for (int j = 0; j < result.size(); ++j) { const Dtype* result_vec = result[j]->cpu_data(); const string& output_name = net_->blob_names()[net_->output_blob_indices()[j]]; const Dtype loss_weight = net_->blob_loss_weights()[net_->output_blob_indices()[j]]; for (int k = 0; k < result[j]->count(); ++k) { ostringstream loss_msg_stream; if (loss_weight) { loss_msg_stream << " (* " << loss_weight << " = " << loss_weight * result_vec[k] << " loss)"; } LOG(INFO) << " Train net output #" << score_index++ << ": " << output_name << " = " << result_vec[k] << loss_msg_stream.str(); } } } ComputeUpdateValue(); net_->Update(); // Save a snapshot if needed. if (param_.snapshot() && (iter_ + 1) % param_.snapshot() == 0) { Snapshot(); } } }
void Solver<Dtype>::Solve(const char* resume_file) { Caffe::set_mode(Caffe::Brew(param_.solver_mode())); if (param_.solver_mode() && param_.has_device_id()) { Caffe::SetDevice(param_.device_id()); } Caffe::set_phase(Caffe::TRAIN); LOG(INFO) << "Solving " << net_->name(); PreSolve(); iter_ = 0; if (resume_file) { LOG(INFO) << "Restoring previous solver status from " << resume_file; Restore(resume_file); } // For a network that is trained by the solver, no bottom or top vecs // should be given, and we will just provide dummy vecs. vector<Blob<Dtype>*> bottom_vec; while (iter_++ < param_.max_iter()) { Dtype loss = net_->ForwardBackward(bottom_vec); ComputeUpdateValue(); net_->Update(); // Notify the info monitors by its interval for (int i = 0; i < info_.size(); ++i) { info_[i].get()->Iter(loss, iter_); } // Display // if (param_.display() && iter_ % param_.display() == 0) { // Handle the loss printing to info monitor installed. // LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; //} if (param_.test_interval() && iter_ % param_.test_interval() == 0) { // We need to set phase to test before running. Caffe::set_phase(Caffe::TEST); Test(); Caffe::set_phase(Caffe::TRAIN); } // Check if we need to do snapshot if (param_.snapshot() && iter_ % param_.snapshot() == 0) { Snapshot(); } } // After the optimization is done, always do a snapshot. iter_--; Snapshot(); LOG(INFO) << "Optimization Done."; }
void FeedbackSolver<Dtype>::Solve(const char* resume_file) { Caffe::set_mode(Caffe::Brew(param_.solver_mode())); if (param_.solver_mode() == SolverParameter_SolverMode_GPU && param_.has_device_id()) { Caffe::SetDevice(param_.device_id()); } Caffe::set_phase(Caffe::TRAIN); LOG(INFO) << "Solving " << net_->name(); PreSolve(); iter_ = 0; resume_file = NULL; if (resume_file ) { LOG(INFO) << "Restoring previous solver status from " << resume_file; Restore(resume_file); } // Run a test pass before doing any training to avoid waiting a potentially // very long time (param_.test_interval() training iterations) to report that // there's not enough memory to run the test net and crash, etc.; and to gauge // the effect of the first training iterations. if (param_.test_interval()) { Test(); } // For a network that is trained by the solver, no bottom or top vecs // should be given, and we will just provide dummy vecs. vector<Blob<Dtype>*> bottom_vec; while (iter_++ < param_.max_iter()) { Dtype loss = net_->FeedbackForwardBackward(bottom_vec, param_.top_k()); ComputeUpdateValue(); net_->Update(); if (param_.display() && iter_ % param_.display() == 0) { LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; } if (param_.test_interval() && iter_ % param_.test_interval() == 0) { Test(); } // Check if we need to do snapshot if (param_.snapshot() && iter_ % param_.snapshot() == 0) { Snapshot(); } } // After the optimization is done, always do a snapshot. iter_--; Snapshot(); LOG(INFO) << "Optimization Done."; }
void SGDSolver<Dtype>::ApplyUpdate() { Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << this->iter_ << ", lr = " << rate; } ClipGradients(); for (int param_id = 0; param_id < this->net_->learnable_params().size(); ++param_id) { Normalize(param_id); Regularize(param_id); ComputeUpdateValue(param_id, rate); } this->net_->Update(); }
void SGDSolver<Dtype>::ApplyUpdate() { Dtype rate = GetLearningRate(); if (this->param_.display() && this->iter_ % this->param_.display() == 0) { LOG_IF(INFO, Caffe::root_solver()) << "Iteration " << this->iter_ << ", lr = " << rate; } ClipGradients(); for (int param_id = 0; param_id < this->net_->learnable_params().size(); ++param_id) { Normalize(param_id); Regularize(param_id); ComputeUpdateValue(param_id, rate); } this->net_->Update(); // Increment the internal iter_ counter -- its value should always indicate // the number of times the weights have been updated. ++this->iter_; }
void Solver<Dtype>::Solve(Net<Dtype>* net) { net_ = net; LOG(INFO) << "Solving " << net_->name(); PreSolve(); iter_ = 0; // For a network that is trained by the solver, no bottom or top vecs // should be given, and we will just provide dummy vecs. vector<Blob<Dtype>*> bottom_vec; while (iter_++ < param_.max_iter()) { Dtype loss = net_->ForwardBackward(bottom_vec); ComputeUpdateValue(); net_->Update(); // Check if we need to do snapshot if (param_.snapshot() > 0 && iter_ % param_.snapshot() == 0) { Snapshot(false); } if (param_.display()) { LOG(ERROR) << "Iteration " << iter_ << ", loss = " << loss; } } LOG(INFO) << "Optimization Done."; }
void Solver<Dtype>::Solve(const char* resume_file) { Caffe::set_phase(Caffe::TRAIN); LOG(INFO) << "Solving " << net_->name(); PreSolve(); iter_ = 0; if (resume_file) { LOG(INFO) << "Restoring previous solver status from " << resume_file; Restore(resume_file); } // Remember the initial iter_ value; will be non-zero if we loaded from a // resume_file above. const int start_iter = iter_; // For a network that is trained by the solver, no bottom or top vecs // should be given, and we will just provide dummy vecs. vector<Blob<Dtype>*> bottom_vec; for (; iter_ < param_.max_iter(); ++iter_) { // Save a snapshot if needed. if (param_.snapshot() && iter_ > start_iter && iter_ % param_.snapshot() == 0) { Snapshot(); } if (param_.test_interval() && iter_ % param_.test_interval() == 0 && (iter_ > 0 || param_.test_initialization())) { TestAll(); } const bool display = param_.display() && iter_ % param_.display() == 0; net_->set_debug_info(display && param_.debug_info()); net_->set_sample_print(display && param_.debug_info() && param_.sample_print()); Dtype loss = net_->ForwardBackward(bottom_vec); if (display) { LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; const vector<Blob<Dtype>*>& result = net_->output_blobs(); int score_index = 0; for (int j = 0; j < result.size(); ++j) { const Dtype* result_vec = result[j]->cpu_data(); const string& output_name = net_->blob_names()[net_->output_blob_indices()[j]]; const Dtype loss_weight = net_->blob_loss_weights()[net_->output_blob_indices()[j]]; for (int k = 0; k < result[j]->count(); ++k) { ostringstream loss_msg_stream; if (loss_weight) { loss_msg_stream << " (* " << loss_weight << " = " << loss_weight * result_vec[k] << " loss)"; } LOG(INFO) << " Train net output #" << score_index++ << ": " << output_name << " = " << result_vec[k] << loss_msg_stream.str(); } } } ComputeUpdateValue(); net_->Update(); } // Always save a snapshot after optimization, unless overridden by setting // snapshot_after_train := false. if (param_.snapshot_after_train()) { Snapshot(); } // After the optimization is done, run an additional train and test pass to // display the train and test loss/outputs if appropriate (based on the // display and test_interval settings, respectively). Unlike in the rest of // training, for the train net we only run a forward pass as we've already // updated the parameters "max_iter" times -- this final pass is only done to // display the loss, which is computed in the forward pass. if (param_.display() && iter_ % param_.display() == 0) { Dtype loss; net_->Forward(bottom_vec, &loss); LOG(INFO) << "Iteration " << iter_ << ", loss = " << loss; } if (param_.test_interval() && iter_ % param_.test_interval() == 0) { TestAll(); } LOG(INFO) << "Optimization Done."; }
void Solver<Dtype>::Solve(const char* resume_file) { Caffe::set_mode(Caffe::Brew(param_.solver_mode())); if (param_.solver_mode() && param_.has_device_id()) { Caffe::SetDevice(param_.device_id()); } Caffe::set_phase(Caffe::TRAIN); LOG(INFO)<< "Solving " << net_->name(); PreSolve(); iter_ = 0; if (resume_file) { LOG(INFO)<< "Restoring previous solver status from " << resume_file; Restore(resume_file); } // For a network that is trained by the solver, no bottom or top vecs // should be given, and we will just provide dummy vecs. vector<Blob<Dtype>*> bottom_vec; timeval start_t, finish_t, tmp_t; gettimeofday(&start_t, NULL); gettimeofday(&tmp_t, NULL); int pic_counts = 0; int pos_triplets = 0; int triplets_count = 0; while (iter_++ < param_.max_iter()) { Dtype loss = net_->ForwardBackward(bottom_vec); ComputeUpdateValue(); net_->Update(); pic_counts += Caffe::mutable_name2id().size(); pos_triplets += Caffe::mutable_pos_triplets(); triplets_count += Caffe::mutable_triplets().size(); if (param_.display() && iter_ % param_.display() == 0) { gettimeofday(&finish_t, NULL); long int time_cost = (finish_t.tv_sec - tmp_t.tv_sec) * 1000000 + (finish_t.tv_usec - tmp_t.tv_usec); LOG(INFO)<< "Iteration " << iter_ << ", loss = " << loss << ", image counts: " << (pic_counts * 1.0 / param_.display()) << ", triplets count: " << (triplets_count * 1.0 / param_.display()) << ", positive triplet: " << (pos_triplets * 1.0 / param_.display()) << ", cost time = " << (time_cost / 1000.0) << "ms"; gettimeofday(&tmp_t, NULL); pic_counts = 0; pos_triplets = 0; triplets_count = 0; } if (param_.test_interval() && iter_ % param_.test_interval() == 0) { // We need to set phase to test before running. Caffe::set_phase(Caffe::TEST); Test(); Caffe::set_phase(Caffe::TRAIN); } // Check if we need to do snapshot if (param_.snapshot() && iter_ % param_.snapshot() == 0) { Snapshot(); } } // After the optimization is done, always do a snapshot. iter_--; Snapshot(); LOG(INFO)<< "Optimization Done."; }