snippet_t select_weighted(const snippet_ptr_set_t& snippets, const scorer_t& scorer, const State& state, int8_t /*weight_factor*/) {//TODO /* shortcuts: save us a rand() call: */ if (snippets.empty()) { return snippet_t(); } if (snippets.size() == 1) { return *snippets.begin(); } /* first pass: get sum score from which to derive 'select' */ score_t sum_score = 0; const snippet_ptr_set_t::const_iterator& end = snippets.end(); for (snippet_ptr_set_t::const_iterator iter = snippets.begin(); iter != end; ++iter) { sum_score += (*iter)->score(scorer, state); } score_t select = pick_rand(sum_score); /* second pass: subtract scores from select, return when select hits 0 */ for (snippet_ptr_set_t::const_iterator iter = snippets.begin(); iter != end; ++iter) { score_t s = (*iter)->score(scorer, state); if (select < s) { return *iter; } select -= s; } return snippet_t(); }
snippet_t select_random(const snippet_ptr_set_t& snippets) { /* shortcuts: save us a rand() call: */ if (snippets.empty()) { return snippet_t(); } if (snippets.size() == 1) { return *snippets.begin(); } size_t select = pick_rand(snippets.size()); /* get the nth element from the std::list */ snippet_ptr_set_t::const_iterator iter = snippets.begin(); while (select != 0) { ++iter; --select; } return *iter; }
std::pair<size_t, size_t> tune_params( const double* divs, size_t num_bags, const std::vector<label_type> &labels, const boost::ptr_vector<Kernel> &kernels, const std::vector<double> &c_vals, const svm_parameter &svm_params, size_t folds, size_t num_threads) { typedef std::pair<size_t, size_t> config; size_t num_kernels = kernels.size(); if (num_kernels == 0) { BOOST_THROW_EXCEPTION(std::domain_error( "no kernels in the kernel group")); } else if (num_kernels == 1 && c_vals.size() == 1) { // only one option, we already know what's best return config(0, 0); } // want to be able to take sub-lists of kernels. // this is like c_array(), but constness is slightly different and // it works in old, old boosts. const Kernel * const * kern_array = reinterpret_cast<const Kernel* const*>(&kernels.begin().base()[0]); // how many threads are we using? num_threads = npdivs::get_num_threads(num_threads); if (num_threads > num_kernels) num_threads = num_kernels; if (num_threads == 1) { // don't actually make a new thread if it's just 1-threaded double score; return pick_rand(tune_params_single(divs, num_bags, labels, kern_array, num_kernels, c_vals, svm_params, folds, &score)); } // grunt work to set up multithreading boost::ptr_vector< tune_params_worker<label_type> > workers; std::vector<boost::exception_ptr> errors(num_threads); boost::thread_group worker_threads; std::vector< std::vector<config> > results(num_threads); std::vector<double> scores(num_threads, 0); size_t kerns_per_thread = (size_t) std::ceil(double(num_kernels) / num_threads); size_t kern_start = 0; // give each thread a few kernels and get their most-accurate configs // TODO: better allocation algo for (size_t i = 0; i < num_threads; i++) { int n_kerns = (int)(std::min(kern_start+kerns_per_thread, num_kernels)) - (int)(kern_start); if (n_kerns <= 0) break; workers.push_back(new tune_params_worker<label_type>( divs, num_bags, labels, kern_array + kern_start, n_kerns, c_vals, svm_params, folds, &results[i], &scores[i], errors[i] )); worker_threads.create_thread(boost::ref(workers[i])); kern_start += kerns_per_thread; } worker_threads.join_all(); for (size_t i = 0; i < num_threads; i++) if (errors[i]) boost::rethrow_exception(errors[i]); // get all the best configs into one vector double best_score = *std::max_element( scores.begin(), scores.end()); std::vector<config> best_configs; if (best_score == -std::numeric_limits<double>::infinity()) { FILE_LOG(logERROR) << "all kernels were terrible"; BOOST_THROW_EXCEPTION(std::domain_error("all kernels were terrible")); } kern_start = 0; for (size_t i = 0; i < num_threads; i++) { if (scores[i] == best_score) { for (size_t j = 0; j < results[i].size(); j++) { config cfg = results[i][j]; best_configs.push_back( config(cfg.first + kern_start, cfg.second)); } } kern_start += kerns_per_thread; } return pick_rand(best_configs); }