Teuchos::RCP<Anasazi::SolverManager<Scalar,MV,OP> > build_eigsolver(const Teuchos::RCP<const Teuchos::Comm<int> > &comm, Teuchos::ParameterList& test_params, Teuchos::RCP<Anasazi::Eigenproblem<Scalar,MV,OP> > problem) { typedef Anasazi::Eigenproblem<Scalar,MV,OP> AEigProb; Teuchos::RCP<Anasazi::SolverManager<Scalar,MV,OP> > solver; Teuchos::ParameterList aparams; if (test_params.isSublist("Anasazi")) { aparams = test_params.sublist("Anasazi"); } std::string solver_type("not specified"); Ifpack2::getParameter(test_params, "eigen_solver_type", solver_type); if (solver_type == "BlockKrylovSchur") { // if (comm->getRank() == 0) std::cout << aparams << std::endl; solver = Teuchos::rcp(new Anasazi::BlockKrylovSchurSolMgr<Scalar,MV,OP>(problem,aparams)); } else if (solver_type == "not specified") { throw std::runtime_error("Error in build_eigsolver: solver_type not specified."); } else { std::ostringstream os; os << "Error in build_eigsolver: solver_type ("<<solver_type<<") not recognized."; os << "\nIfpack2's test-driver recognizes these solvers: PseudoBlockCG, PesudoBlockGmres, BlockGmres, TFQMR."; std::string str = os.str(); throw std::runtime_error(str); } return solver; }
virtual void InitSolverFromProtoString(const string& proto) { SolverParameter param; CHECK(google::protobuf::TextFormat::ParseFromString(proto, ¶m)); // Set the solver_mode according to current Caffe::mode. switch (Caffe::mode()) { case Caffe::CPU: param.set_solver_mode(SolverParameter_SolverMode_CPU); break; case Caffe::GPU: param.set_solver_mode(SolverParameter_SolverMode_GPU); break; default: LOG(FATAL) << "Unknown Caffe mode: " << Caffe::mode(); } InitSolver(param); delta_ = (solver_type() == SolverParameter_SolverType_ADAGRAD || solver_type() == SolverParameter_SolverType_RMSPROP) ? param.delta() : 0; }
virtual void InitSolverFromProtoString(const string& proto) { SolverParameter param; CHECK(google::protobuf::TextFormat::ParseFromString(proto, ¶m)); // Disable saving a final snapshot so the tests don't pollute the user's // working directory with useless snapshots. param.set_snapshot_after_train(false); // Set the solver_mode according to current Caffe::mode. switch (Caffe::mode()) { case Caffe::CPU: param.set_solver_mode(SolverParameter_SolverMode_CPU); break; case Caffe::GPU: param.set_solver_mode(SolverParameter_SolverMode_GPU); break; default: LOG(FATAL) << "Unknown Caffe mode: " << Caffe::mode(); } InitSolver(param); delta_ = (solver_type() == SolverParameter_SolverType_ADAGRAD || solver_type() == SolverParameter_SolverType_RMSPROP) ? param.delta() : 0; }
void CheckLeastSquaresUpdate( const vector<shared_ptr<Blob<Dtype> > >& updated_params) { const int D = channels_ * height_ * width_; const Blob<Dtype>& updated_weights = *updated_params[0]; const Blob<Dtype>& updated_bias = *updated_params[1]; Net<Dtype>& net = *this->solver_->net(); ASSERT_TRUE(net.has_layer("innerprod")); const vector<shared_ptr<Blob<Dtype> > >& param_blobs = net.layer_by_name("innerprod")->blobs(); ASSERT_EQ(2, param_blobs.size()); const Blob<Dtype>& solver_updated_weights = *param_blobs[0]; ASSERT_EQ(D, solver_updated_weights.count()); const double kPrecision = 1e-2; const double kMinPrecision = 1e-7; for (int i = 0; i < D; ++i) { const Dtype expected_updated_weight = updated_weights.cpu_data()[i]; const Dtype solver_updated_weight = solver_updated_weights.cpu_data()[i]; const Dtype error_margin = std::max(kMinPrecision, kPrecision * std::min(fabs(expected_updated_weight), fabs(solver_updated_weight))); EXPECT_NEAR(expected_updated_weight, solver_updated_weight, error_margin); } const Blob<Dtype>& solver_updated_bias_blob = *param_blobs[1]; ASSERT_EQ(1, solver_updated_bias_blob.count()); const Dtype expected_updated_bias = updated_bias.cpu_data()[0]; const Dtype solver_updated_bias = solver_updated_bias_blob.cpu_data()[0]; const Dtype error_margin = std::max(kMinPrecision, kPrecision * std::min(fabs(expected_updated_bias), fabs(solver_updated_bias))); EXPECT_NEAR(expected_updated_bias, solver_updated_bias, error_margin); // Check the solver's history -- should contain the previous update value. if (solver_type() == SolverParameter_SolverType_SGD) { const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history(); ASSERT_EQ(2, history.size()); for (int i = 0; i < D; ++i) { const Dtype expected_history = updated_weights.cpu_diff()[i]; const Dtype solver_history = history[0]->cpu_data()[i]; const Dtype error_margin_hist = std::max(kMinPrecision, kPrecision * std::min(fabs(expected_history), fabs(solver_history))); EXPECT_NEAR(expected_history, solver_history, error_margin_hist); } const Dtype expected_history = updated_bias.cpu_diff()[0]; const Dtype solver_history = history[1]->cpu_data()[0]; const Dtype error_margin_hist = std::max(kMinPrecision, kPrecision * std::min(fabs(expected_history), fabs(solver_history))); EXPECT_NEAR(expected_history, solver_history, error_margin_hist); } }
Teuchos::RCP<Belos::SolverManager<Scalar,MV,OP> > build_solver(Teuchos::ParameterList& test_params, Teuchos::RCP<Belos::LinearProblem<Scalar,MV,OP> > problem) { Teuchos::RCP<Belos::SolverManager<Scalar,MV,OP> > solver; Teuchos::ParameterList bparams; if (test_params.isSublist("Belos")) { bparams = test_params.sublist("Belos"); } Teuchos::RCP<Teuchos::ParameterList> rcpparams = Teuchos::rcp(&bparams,false); std::string solver_type("not specified"); Ifpack2::getParameter(test_params, "solver_type", solver_type); if (solver_type == "PseudoBlockCG") { solver = Teuchos::rcp(new Belos::PseudoBlockCGSolMgr<Scalar,MV,OP>(problem,rcpparams)); } else if (solver_type == "BlockCG") { solver = Teuchos::rcp(new Belos::BlockCGSolMgr<Scalar,MV,OP>(problem,rcpparams)); } // PseudoBlockGmres does not work right now with QD #ifndef USING_QD else if (solver_type == "PseudoBlockGmres") { solver = Teuchos::rcp(new Belos::PseudoBlockGmresSolMgr<Scalar,MV,OP>(problem,rcpparams)); } #endif else if (solver_type == "BlockGmres") { solver = Teuchos::rcp(new Belos::BlockGmresSolMgr<Scalar,MV,OP>(problem,rcpparams)); } else if (solver_type == "TFQMR") { solver = Teuchos::rcp(new Belos::TFQMRSolMgr<Scalar,MV,OP>(problem,rcpparams)); } else if (solver_type == "not specified") { throw std::runtime_error("Error in build_solver: solver_type not specified."); } else { std::ostringstream os; os << "Error in build_solver: solver_type ("<<solver_type<<") not recognized."; os << "\nIfpack2's test-driver recognizes these solvers: PseudoBlockCG, BlockCG, PesudoBlockGmres, BlockGmres, TFQMR."; std::string str = os.str(); throw std::runtime_error(str); } return solver; }
Teuchos::RCP<Belos::SolverManager<Scalar,MV,OP> > build_solver(const Teuchos::RCP<const Teuchos::Comm<int> > &comm, Teuchos::ParameterList& test_params, Teuchos::RCP<Belos::LinearProblem<Scalar,MV,OP> > problem) { typedef Belos::LinearProblem<Scalar,MV,OP> BLinProb; Teuchos::RCP<Belos::SolverManager<Scalar,MV,OP> > solver; Teuchos::ParameterList bparams; if (test_params.isSublist("Belos")) { bparams = test_params.sublist("Belos"); } Teuchos::RCP<Teuchos::ParameterList> rcpparams = Teuchos::rcpFromRef(bparams); std::string solver_type("not specified"); Ifpack2::getParameter(test_params, "solver_type", solver_type); if (solver_type == "BlockGmres") { // if (comm->getRank() == 0) std::cout << *rcpparams << std::endl; solver = Teuchos::rcp(new Belos::BlockGmresSolMgr<Scalar,MV,OP>(problem,rcpparams)); } // else if (solver_type == "PseudoBlockGmres") { // solver = Teuchos::rcp(new Belos::PseudoBlockGmresSolMgr<Scalar,MV,OP>(problem,rcpparams)); // } // else if (solver_type == "PseudoBlockCG") { // solver = Teuchos::rcp(new Belos::PseudoBlockCGSolMgr<Scalar,MV,OP>(problem,rcpparams)); // } // else if (solver_type == "TFQMR") { // solver = Teuchos::rcp(new Belos::TFQMRSolMgr<Scalar,MV,OP>(problem,rcpparams)); // } else if (solver_type == "not specified") { throw std::runtime_error("Error in build_solver: solver_type not specified."); } else { std::ostringstream os; os << "Error in build_solver: solver_type ("<<solver_type<<") not recognized."; os << "\nIfpack2's test-driver recognizes these solvers: PseudoBlockCG, PesudoBlockGmres, BlockGmres, TFQMR."; std::string str = os.str(); throw std::runtime_error(str); } return solver; }
// Compute an update value given the current state of the train net, // using the analytical formula for the least squares gradient. // updated_params will store the updated weight and bias results, // using the blobs' diffs to hold the update values themselves. void ComputeLeastSquaresUpdate(const Dtype learning_rate, const Dtype weight_decay, const Dtype momentum, vector<shared_ptr<Blob<Dtype> > >* updated_params) { const int N = num_; const int D = channels_ * height_ * width_; // Run a forward pass, and manually compute the update values from the // result. Net<Dtype>& net = *this->solver_->net(); vector<Blob<Dtype>*> empty_bottom_vec; net.Forward(empty_bottom_vec); ASSERT_TRUE(net.has_blob("data")); const Blob<Dtype>& data = *net.blob_by_name("data"); ASSERT_TRUE(net.has_blob("targets")); const Blob<Dtype>& targets = *net.blob_by_name("targets"); ASSERT_TRUE(net.has_layer("innerprod")); const vector<shared_ptr<Blob<Dtype> > >& param_blobs = net.layer_by_name("innerprod")->blobs(); const int num_param_blobs = 2; ASSERT_EQ(num_param_blobs, param_blobs.size()); const Blob<Dtype>& weights = *param_blobs[0]; const Blob<Dtype>& bias = *param_blobs[1]; ASSERT_EQ(D * N, data.count()); ASSERT_EQ(N, targets.count()); ASSERT_EQ(D, weights.count()); ASSERT_EQ(1, bias.count()); updated_params->clear(); updated_params->resize(num_param_blobs); for (int i = 0; i < num_param_blobs; ++i) { (*updated_params)[i].reset(new Blob<Dtype>()); } Blob<Dtype>& updated_weights = *(*updated_params)[0]; updated_weights.ReshapeLike(weights); Blob<Dtype>& updated_bias = *(*updated_params)[1]; updated_bias.ReshapeLike(bias); for (int i = 0; i <= D; ++i) { // Compute the derivative with respect to the ith weight (i.e., the ith // element of the gradient). Dtype grad = 0; for (int j = 0; j <= D; ++j) { // Compute element (i, j) of X^T * X. Dtype element = 0; for (int k = 0; k < N; ++k) { // (i, k) in X^T (== (k, i) in X) times (k, j) in X. const Dtype element_i = (i == D) ? 1 : data.cpu_data()[k * D + i]; const Dtype element_j = (j == D) ? 1 : data.cpu_data()[k * D + j]; element += element_i * element_j; } if (j == D) { grad += element * bias.cpu_data()[0]; } else { grad += element * weights.cpu_data()[j]; } } for (int k = 0; k < N; ++k) { const Dtype element_i = (i == D) ? 1 : data.cpu_data()[k * D + i]; grad -= element_i * targets.cpu_data()[k]; } // Scale the gradient over the N samples. grad /= N; // Add the weight decay to the gradient. grad += weight_decay * ((i == D) ? bias.cpu_data()[0] : weights.cpu_data()[i]); // Finally, compute update. const vector<shared_ptr<Blob<Dtype> > >& history = solver_->history(); ASSERT_EQ(2, history.size()); // 1 blob for weights, 1 for bias Dtype update_value = learning_rate * grad; const Dtype history_value = (i == D) ? history[1]->cpu_data()[0] : history[0]->cpu_data()[i]; const Dtype temp = momentum * history_value; switch (solver_type()) { case SolverParameter_SolverType_SGD: update_value += temp; break; case SolverParameter_SolverType_NESTEROV: update_value += temp; // step back then over-step update_value = (1 + momentum) * update_value - temp; break; case SolverParameter_SolverType_ADAGRAD: update_value /= std::sqrt(history_value + grad * grad) + delta_; break; case SolverParameter_SolverType_RMSPROP: { const Dtype rms_decay = 0.95; update_value /= std::sqrt(rms_decay*history_value + grad * grad * (1 - rms_decay)) + delta_; } break; default: LOG(FATAL) << "Unknown solver type: " << solver_type(); } if (i == D) { updated_bias.mutable_cpu_diff()[0] = update_value; updated_bias.mutable_cpu_data()[0] = bias.cpu_data()[0] - update_value; } else { updated_weights.mutable_cpu_diff()[i] = update_value; updated_weights.mutable_cpu_data()[i] = weights.cpu_data()[i] - update_value; } } }