/** * Evaluates a context, return the conditional probability p(y|x). * * This method calculates the conditional probability p(y|x) for given x and y. * * @param context A list of pair<string, double> indicates names of * the contextual predicates and their values which are to be * evaluated together. * @param outcome The outcome label for which the conditional probability is * calculated. * @return The conditional probability of p(outcome|context). * \sa eval_all() */ double MaxentModel::eval(const context_type& context, const outcome_type& outcome) const{ size_t oid = m_outcome_map->id(outcome); if (oid == m_outcome_map->null_id) { cerr << "[MaxentModel::eval()] unknown outcome id:" << oid << endl; return 0.0; } static vector<double> probs; if (probs.size() != m_outcome_map->size()) probs.resize(m_outcome_map->size()); fill(probs.begin(), probs.end(), 0.0); size_t pid; for (size_t i = 0; i < context.size(); ++i) { pid = m_pred_map->id(context[i].first); if (pid != m_pred_map->null_id) { std::vector<pair<size_t, size_t> >& param = (*m_params)[pid]; float fval = context[i].second; for(size_t j = 0;j < param.size(); ++j) probs[param[j].first] += m_theta[param[j].second] * fval; } else { //#warning how to deal with unseen predicts? //m_debug.debug(0,"Predict id %d not found.",i); } } /* For the rationale behind subtracting max_prob from the log-probabilities see maxentmodel.cpp:maxent::MaxentModel::eval_all*/ // Find the maximum log-prob double max_prob = numeric_limits<double>::min(); for (size_t i = 0; i < probs.size(); ++i) { max_prob = max(max_prob, probs[i]); } double sum = 0.0; for (size_t i = 0; i < probs.size(); ++i) { // Subtract the maximum log-prob from the others to get them in // the (-inf,0] range. probs[i] = exp(probs[i] - max_prob); sum += probs[i]; } for (size_t i = 0; i < probs.size(); ++i) { probs[i] /= sum; } return probs[oid]; }
/** * Evaluates a context, return the conditional distribution of the context. * * This method calculates the conditional probability p(y|x) for each possible * outcome tag y. * * @param context A list of pair<string, double> indicates the contextual * predicates and their values (must be >= 0) which are to be * evaluated together. * @param outcomes An array of the outcomes paired with it's probability * predicted by the model (the conditional distribution). * @param sort_result Whether or not the returned outcome array is sorted * (larger probability first). Default is true. * * TODO: need optimized for large number of outcomes * * \sa eval() */ void MaxentModel::eval_all(const context_type& context, std::vector<pair<outcome_type, double> >& outcomes, bool sort_result) const { assert(m_params); //static vector<double> probs; //REMIND remove static here vector<double> probs; if (probs.size() != m_outcome_map->size()) probs.resize(m_outcome_map->size()); fill(probs.begin(), probs.end(), 0.0); size_t pid; for (size_t i = 0; i < context.size(); ++i) { pid = m_pred_map->id(context[i].first); if (pid != m_pred_map->null_id) { std::vector<pair<size_t, size_t> >& param = (*m_params)[pid]; float fval = context[i].second; for(size_t j = 0;j < param.size(); ++j) probs[param[j].first] += m_theta[param[j].second] * fval; } else { //#warning how to deal with unseen predicts? //m_debug.debug(0,"Predict id %d not found.",i); } } double sum = 0.0; for (size_t i = 0; i < probs.size(); ++i) { probs[i] = exp(probs[i]); sum += probs[i]; } for (size_t i = 0; i < probs.size(); ++i) { probs[i] /= sum; } outcomes.resize(m_outcome_map->size()); for (size_t i = 0;i < outcomes.size(); ++i) { outcomes[i].first = (*m_outcome_map)[i]; outcomes[i].second = probs[i]; } if (sort_result) sort(outcomes.begin(),outcomes.end(), cmp_outcome()); }
/** * Evaluates a context, return the conditional probability p(y|x). * * This method calculates the conditional probability p(y|x) for given x and y. * * @param context A list of pair<string, double> indicates names of * the contextual predicates and their values which are to be * evaluated together. * @param outcome The outcome label for which the conditional probability is * calculated. * @return The conditional probability of p(outcome|context). * \sa eval_all() */ double MaxentModel::eval(const context_type& context, const outcome_type& outcome) const{ size_t oid = m_outcome_map->id(outcome); if (oid == m_outcome_map->null_id) { //cerr << "[MaxentModel::eval()] unknown outcome id:" << oid << endl; return 0.0; } static vector<double> probs; if (probs.size() != m_outcome_map->size()) probs.resize(m_outcome_map->size()); fill(probs.begin(), probs.end(), 0.0); size_t pid; for (size_t i = 0; i < context.size(); ++i) { pid = m_pred_map->id(context[i].first); if (pid != m_pred_map->null_id) { std::vector<pair<size_t, size_t> >& param = (*m_params)[pid]; float fval = context[i].second; for(size_t j = 0;j < param.size(); ++j) probs[param[j].first] += m_theta[param[j].second] * fval; } else { //#warning how to deal with unseen predicts? //m_debug.debug(0,"Predict id %d not found.",i); } } double sum = 0.0; for (size_t i = 0; i < probs.size(); ++i) { probs[i] = exp(probs[i]); if (!finite(probs[i])) probs[i] = numeric_limits<double>::max();// DBL_MAX; sum += probs[i]; } for (size_t i = 0; i < probs.size(); ++i) { probs[i] /= sum; } return probs[oid]; }
/** * Evaluates a context, return the conditional distribution of the context. * * This method calculates the conditional probability p(y|x) for each possible * outcome tag y. * * @param context A list of pair<string, double> indicates the contextual * predicates and their values (must be >= 0) which are to be * evaluated together. * @param outcomes An array of the outcomes paired with it's probability * predicted by the model (the conditional distribution). * @param sort_result Whether or not the returned outcome array is sorted * (larger probability first). Default is true. * * TODO: need optimized for large number of outcomes * * \sa eval() */ void MaxentModel::eval_all(const context_type& context, std::vector<pair<outcome_type, double> >& outcomes, bool sort_result) const { assert(m_params); //TODO:static? static vector<double> probs; if (probs.size() != m_outcome_map->size()) probs.resize(m_outcome_map->size()); fill(probs.begin(), probs.end(), 0.0); size_t pid; for (size_t i = 0; i < context.size(); ++i) { pid = m_pred_map->id(context[i].first); if (pid != m_pred_map->null_id) { std::vector<pair<size_t, size_t> >& param = (*m_params)[pid]; float fval = context[i].second; for(size_t j = 0;j < param.size(); ++j) probs[param[j].first] += m_theta[param[j].second] * fval; } else { //#warning how to deal with unseen predicts? //m_debug.debug(0,"Predict id %d not found.",i); } } /* We will need to exponentiate the log-probabilites in probs. These log-probabilites can however be quite large and exponentiating them can render them infinite. At some places in the library, there is an effort to fight this by reducing the infinite value down to DBL_MAX, which isn't okay either, because we can have two such large probabilites and when we try to find their sum for normalization, we overflow again. Trying to normalize these large probabilities would also make them NaN, which is a fatal error in this domain. Also, by clipping all large values to DBL_MAX, we can lose a lot of information when more than 1 log-prob with very distinct values crosses over the maximum exponent. The proposed solution is to subtract some value from the log-probs to put them in the (-inf,O] range, so that exponentiation won't cause an overflow. The log-probabilities aren't so large that we would have to fear an underflow. If an underflow would occur, the exponentiation would make the probability 0 (exp(-inf) == 0) and we can show that this is correct. Because one of the log-probabilites now equals 0, we know that after exponentiation their sum is >= 1. This means that the true normalized probabilites will be even smaller. As the logarithm of the smallest positive double is pretty much finite (about -700 on my machine), we know that our probability is vastly smaller and 0 is about the best way to represent it. In this way, it could be possible that a significant difference between two highly improbable outcomes might be lost (the chance is however very small because we would have to have a sum of features * parameters equal to negative infinity). This is however much more tolerable than a loss of significant difference between two highly likely outcomes. */ // Find the maximum log-prob double max_prob = numeric_limits<double>::min(); for (size_t i = 0; i < probs.size(); ++i) { max_prob = max(max_prob, probs[i]); } double sum = 0.0; for (size_t i = 0; i < probs.size(); ++i) { // Subtract the maximum log-prob from the others to get them in // the (-inf,0] range. probs[i] = exp(probs[i] - max_prob); sum += probs[i]; } for (size_t i = 0; i < probs.size(); ++i) { probs[i] /= sum; } outcomes.resize(m_outcome_map->size()); for (size_t i = 0;i < outcomes.size(); ++i) { outcomes[i].first = (*m_outcome_map)[i]; outcomes[i].second = probs[i]; } if (sort_result) sort(outcomes.begin(),outcomes.end(), cmp_outcome()); }