std::vector<std::vector<float> > network_trainer_sdlm::get_average_hessian_list( network_data_smart_ptr hessian, const std::vector<testing_result_smart_ptr>& history) const { std::vector<std::vector<float> >res; float min_hessian = std::numeric_limits<float>::max(); float max_hessian = std::numeric_limits<float>::min(); for(network_data::iterator it = hessian->begin(); it != hessian->end(); it++) { if (!(*it)->empty()) { std::vector<float> hs_list; for(layer_data::iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++) { float sum = std::accumulate(it2->begin(), it2->end(), 0.0F); float new_hessian_per_block = sum / it2->size(); hs_list.push_back(new_hessian_per_block); min_hessian = std::min<float>(min_hessian, new_hessian_per_block); max_hessian = std::max<float>(max_hessian, new_hessian_per_block); } res.push_back(hs_list); } } return res; }
std::string network_trainer_sdlm::convert_hessian_to_training_vector( network_data_smart_ptr hessian, const std::vector<std::vector<float> >& average_hessian_list, const std::vector<testing_result_smart_ptr>& history) const { float min_hessian = std::numeric_limits<float>::max(); float max_hessian = std::numeric_limits<float>::min(); for(std::vector<std::vector<float> >::const_iterator it = average_hessian_list.begin(); it != average_hessian_list.end(); ++it) { const std::vector<float>& avl = *it; std::vector<float>::const_iterator it_max = std::max_element(avl.begin(), avl.end()); if (it_max != avl.end()) max_hessian = std::max(max_hessian, *it_max); std::vector<float>::const_iterator it_min = std::min_element(avl.begin(), avl.end()); if (it_min != avl.end()) min_hessian = std::min(min_hessian, *it_min); } float max_mu_current = std::min(max_mu, max_hessian * 0.5F); float mu = min_hessian * 0.5F * powf(mu_increase_factor, static_cast<float>(history.size())); mu = std::min(mu, max_mu_current); float eta = mu * speed * get_tail_decay_factor(static_cast<unsigned int>(history.size())); std::vector<std::vector<float> > avg_lr_lists; for(network_data::iterator it = hessian->begin(); it != hessian->end(); it++) { if ((*it)->size() > 0) { std::vector<float> avg_lr_list; for(layer_data::iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++) { hessian_transform ht(mu, eta); std::transform(it2->begin(), it2->end(), it2->begin(), ht); float sum = std::accumulate(it2->begin(), it2->end(), 0.0F); float new_vg_lr = sum / it2->size(); avg_lr_list.push_back(new_vg_lr); } avg_lr_lists.push_back(avg_lr_list); } } std::string average_lr_str; for(std::vector<std::vector<float> >::const_iterator it = avg_lr_lists.begin(); it != avg_lr_lists.end(); it++) { if (it != avg_lr_lists.begin()) average_lr_str += ", "; for(std::vector<float>::const_iterator it2 = it->begin(); it2 != it->end(); it2++) { if (it2 != it->begin()) average_lr_str += " "; average_lr_str += (boost::format("%|1$.1e|") % *it2).str(); } } return (boost::format("Eta = %|1$.2e|, Mu = %|2$.2e|, LR (%|3$s|)") % eta % mu % average_lr_str).str(); }
std::string network_trainer_sdlm::convert_hessian_to_training_vector_per_layer_mu( network_data_smart_ptr hessian, const std::vector<std::vector<float> >& average_hessian_list, const std::vector<testing_result_smart_ptr>& history) const { std::vector<std::vector<float> >::const_iterator ah_it = average_hessian_list.begin(); std::vector<std::vector<float> > avg_lr_lists; for(network_data::iterator it = hessian->begin(); it != hessian->end(); it++) { if ((*it)->size() > 0) { std::vector<float>::const_iterator ah_it2 = ah_it->begin(); std::vector<float> avg_lr_list; for(layer_data::iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++, ah_it2++) { float mu = *ah_it2; float eta = mu * speed * get_tail_decay_factor(static_cast<unsigned int>(history.size())); hessian_transform ht(mu, eta); std::transform(it2->begin(), it2->end(), it2->begin(), ht); float sum = std::accumulate(it2->begin(), it2->end(), 0.0F); float new_vg_lr = sum / it2->size(); avg_lr_list.push_back(new_vg_lr); } avg_lr_lists.push_back(avg_lr_list); ++ah_it; } } std::string average_lr_str; for(std::vector<std::vector<float> >::const_iterator it = avg_lr_lists.begin(); it != avg_lr_lists.end(); it++) { if (it != avg_lr_lists.begin()) average_lr_str += ", "; for(std::vector<float>::const_iterator it2 = it->begin(); it2 != it->end(); it2++) { if (it2 != it->begin()) average_lr_str += " "; average_lr_str += (boost::format("%|1$.1e|") % *it2).str(); } } return (boost::format("LR (%|1$s|)") % average_lr_str).str(); }
void network_trainer_sdlm::dump_lists( network_data_smart_ptr hessian, const char * filename_prefix) const { for(network_data::const_iterator it = hessian->begin(); it != hessian->end(); it++) { for(layer_data::const_iterator it2 = (*it)->begin(); it2 != (*it)->end(); it2++) { if (!it2->empty()) { std::string filename = (boost::format("%1%_%|2$02d|_%|3$02d|.txt") % filename_prefix % (it - hessian->begin()) % (it2 - (*it)->begin())).str(); std::ofstream out(filename.c_str()); for(std::vector<float>::const_iterator it3 = it2->begin(); it3 != it2->end(); ++it3) out << *it3 << std::endl; } } } }