void ANN::train(int32_t opt_type) { label_decode(); srand( (unsigned)time(NULL)); _in_hid_w.resize(_feature_dim, std::vector<float>(_hid_lev_cnt, 0.0)); _hid_out_w.resize(_hid_lev_cnt, std::vector<float>(_out_lev_cnt, 0.0)); _hid_loss_val.resize(_hid_lev_cnt, 0.0); _out_loss_val.resize(_out_lev_cnt, 0.0); for (int32_t j = 0; j < _hid_lev_cnt; ++j ){ for (int32_t i = 0; i < _feature_dim; ++i) { _in_hid_w[i][j] = (rand() % 100) / 10000.0; } for (int32_t k = 0; k < _out_lev_cnt; ++k) { _hid_out_w[j][k] = (rand() % 100) / 10000.0; } } _hid_lev_output.resize(_hid_lev_cnt, 0.0); _out_lev_output.resize(_out_lev_cnt, 0.0); float last_loss_val = 0.0; for (int32_t iter = 0; iter < _max_iter_cnt; ++iter) { // TODO shuffle the samples stochastic_gradient_descent(); float loss = calc_loss_val(); std::cout << "Iter " << iter + 1 << " : " << loss << std::endl; for (int32_t i = 0; i < _feature_dim; ++i) { for (int32_t j = 0; j < _hid_lev_cnt; ++j ){ std::cout << _in_hid_w[i][j] << " "; } std::cout << std::endl; } if (fabs(last_loss_val - loss) < EPS) { break; } last_loss_val = loss; } }
void test_stochastic_gradient_descent() { const size_t n_functions = 3; std::vector<std::function<double(const std::vector<double>&)>> functions(n_functions); for(size_t i = 0; i < n_functions; ++i) functions[i] = [=](const std::vector<double>& x) -> double { return std::pow(x[i], 2); }; const std::vector<double> initial_x {1.0, 2.0, 3.0}; const double step_size {1.0}; const double tolerance {1e-10}; const int max_iterations {10000}; const double delta {0.001}; std::cout << "Testing stochastic gradient descent..." << std::endl; std::vector<double> result {stochastic_gradient_descent(functions, initial_x, step_size, tolerance, max_iterations, delta)}; std::cout << "These values have to be close to zero: "; for(auto& i:result) std::cout << i << " "; std::cout << std::endl << "Test complete" << std::endl; }