예제 #1
0
파일: selector.cpp 프로젝트: nickbp/marky
    snippet_t select_weighted(const snippet_ptr_set_t& snippets,
            const scorer_t& scorer, const State& state,
            int8_t /*weight_factor*/) {//TODO
        /* shortcuts: save us a rand() call: */
        if (snippets.empty()) { return snippet_t(); }
        if (snippets.size() == 1) { return *snippets.begin(); }

        /* first pass: get sum score from which to derive 'select' */
        score_t sum_score = 0;
        const snippet_ptr_set_t::const_iterator& end = snippets.end();
        for (snippet_ptr_set_t::const_iterator iter = snippets.begin();
             iter != end; ++iter) {
            sum_score += (*iter)->score(scorer, state);
        }

        score_t select = pick_rand(sum_score);

        /* second pass: subtract scores from select, return when select hits 0 */
        for (snippet_ptr_set_t::const_iterator iter = snippets.begin();
             iter != end; ++iter) {
            score_t s = (*iter)->score(scorer, state);
            if (select < s) {
                return *iter;
            }
            select -= s;
        }
        return snippet_t();
    }
예제 #2
0
파일: selector.cpp 프로젝트: nickbp/marky
    snippet_t select_random(const snippet_ptr_set_t& snippets) {
        /* shortcuts: save us a rand() call: */
        if (snippets.empty()) { return snippet_t(); }
        if (snippets.size() == 1) { return *snippets.begin(); }

        size_t select = pick_rand(snippets.size());

        /* get the nth element from the std::list */
        snippet_ptr_set_t::const_iterator iter = snippets.begin();
        while (select != 0) {
            ++iter;
            --select;
        }
        return *iter;
    }
예제 #3
0
std::pair<size_t, size_t> tune_params(
        const double* divs, size_t num_bags,
        const std::vector<label_type> &labels,
        const boost::ptr_vector<Kernel> &kernels,
        const std::vector<double> &c_vals,
        const svm_parameter &svm_params,
        size_t folds,
        size_t num_threads)
{
    typedef std::pair<size_t, size_t> config;

    size_t num_kernels = kernels.size();

    if (num_kernels == 0) {
        BOOST_THROW_EXCEPTION(std::domain_error(
                    "no kernels in the kernel group"));
    } else if (num_kernels == 1 && c_vals.size() == 1) {
        // only one option, we already know what's best
        return config(0, 0);
    }


    // want to be able to take sub-lists of kernels.
    // this is like c_array(), but constness is slightly different and
    // it works in old, old boosts.
    const Kernel * const * kern_array =
        reinterpret_cast<const Kernel* const*>(&kernels.begin().base()[0]);

    // how many threads are we using?
    num_threads = npdivs::get_num_threads(num_threads);
    if (num_threads > num_kernels)
        num_threads = num_kernels;

    if (num_threads == 1) {
        // don't actually make a new thread if it's just 1-threaded
        double score;
        return pick_rand(tune_params_single(divs, num_bags, labels,
                    kern_array, num_kernels, c_vals, svm_params, folds,
                    &score));
    }

    // grunt work to set up multithreading
    boost::ptr_vector< tune_params_worker<label_type> > workers;
    std::vector<boost::exception_ptr> errors(num_threads);
    boost::thread_group worker_threads;

    std::vector< std::vector<config> > results(num_threads);
    std::vector<double> scores(num_threads, 0);

    size_t kerns_per_thread = (size_t)
            std::ceil(double(num_kernels) / num_threads);
    size_t kern_start = 0;

    // give each thread a few kernels and get their most-accurate configs
    // TODO: better allocation algo
    for (size_t i = 0; i < num_threads; i++) {
        int n_kerns =
            (int)(std::min(kern_start+kerns_per_thread, num_kernels))
            - (int)(kern_start);

        if (n_kerns <= 0)
            break;

        workers.push_back(new tune_params_worker<label_type>(
                    divs, num_bags, labels,
                    kern_array + kern_start, n_kerns,
                    c_vals, svm_params, folds,
                    &results[i], &scores[i],
                    errors[i]
        ));

        worker_threads.create_thread(boost::ref(workers[i]));

        kern_start += kerns_per_thread;
    }
    worker_threads.join_all();
    for (size_t i = 0; i < num_threads; i++)
        if (errors[i])
            boost::rethrow_exception(errors[i]);

    // get all the best configs into one vector
    double best_score = *std::max_element(
            scores.begin(), scores.end());
    std::vector<config> best_configs;

    if (best_score == -std::numeric_limits<double>::infinity()) {
        FILE_LOG(logERROR) << "all kernels were terrible";
        BOOST_THROW_EXCEPTION(std::domain_error("all kernels were terrible"));
    }

    kern_start = 0;
    for (size_t i = 0; i < num_threads; i++) {
        if (scores[i] == best_score) {
            for (size_t j = 0; j < results[i].size(); j++) {
                config cfg = results[i][j];
                best_configs.push_back(
                        config(cfg.first + kern_start, cfg.second));
            }
        }
        kern_start += kerns_per_thread;
    }

    return pick_rand(best_configs);
}