double Gaussian::log_prob(const MCMC::State& state, MCMC::State& gradient) { for (size_t i = 0; i < state.size(); i++) gradient[i] = -2.0 * state[i] * axis_scales[i]; return log_prob(state); }
double Gaussian::log_prob(const MCMC::State& state, MCMC::State& gradient, MCMC::State::Tensor& hessian) { hessian.zero(); for (size_t i = 0; i < state.size(); i++) hessian(i, i) = 2.0 * axis_scales[i]; return log_prob(state, gradient); }
double Gaussian::log_prob_and_fisher( const MCMC::State& state, MCMC::State& gradient, MCMC::State::Tensor& fisher, std::vector<MCMC::State::Tensor>& fisher_gradient) { fisher_gradient.resize(state.size()); for (size_t i = 0; i < state.size(); i++) { fisher_gradient[i].resize(state.size(), state.size()); fisher_gradient[i] = 0.0; } return log_prob(state, gradient, fisher); }
std::vector<std::pair<std::string, float>> language_model::top_k(const sentence& prev, size_t k) const { // this is horribly inefficient due to this LM's structure using pair_t = std::pair<std::string, float>; auto candidates = util::make_fixed_heap<pair_t>( k, [](const pair_t& a, const pair_t& b) { return a.second > b.second; }); token_list candidate{prev, vocabulary_}; candidate.push_back(0_tid); for (const auto& word : vocabulary_) { candidate[candidate.size() - 1] = word.second; candidates.emplace(word.first, log_prob(candidate)); } return candidates.extract_top(); }
float language_model::perplexity(const sentence& tokens) const { if (tokens.size() == 0) throw language_model_exception{"perplexity() called on empty sentence"}; return std::pow(10.0f, -(log_prob(tokens) / tokens.size())); }
float language_model::log_prob(const sentence& tokens) const { return log_prob(token_list{tokens, vocabulary_}); }