示例#1
0
/**
 * Evaluates a context, return the conditional probability p(y|x).
 *
 * This method calculates the conditional probability p(y|x) for given x and y.
 *
 * @param context A list of pair<string, double> indicates names of 
 *        the contextual predicates and their values which are to be
 *        evaluated together.
 * @param outcome The outcome label for which the conditional probability is
 *        calculated.
 * @return The conditional probability of p(outcome|context).
 * \sa eval_all()
 */
double MaxentModel::eval(const context_type& context,
                const outcome_type& outcome) const{

    size_t oid = m_outcome_map->id(outcome);

    if (oid == m_outcome_map->null_id) {
        cerr << "[MaxentModel::eval()] unknown outcome id:" << oid << endl;
        return 0.0;
    }

    static vector<double> probs;
    if (probs.size() != m_outcome_map->size())
        probs.resize(m_outcome_map->size());
        fill(probs.begin(), probs.end(), 0.0);

    size_t pid;
    for (size_t i = 0; i < context.size(); ++i) {
        pid = m_pred_map->id(context[i].first);
        if (pid != m_pred_map->null_id) {
            std::vector<pair<size_t, size_t> >& param = (*m_params)[pid];
            float fval = context[i].second;
            for(size_t j = 0;j < param.size(); ++j)
                probs[param[j].first] += m_theta[param[j].second] * fval;
        } else {
            //#warning how to deal with unseen predicts?
            //m_debug.debug(0,"Predict id %d not found.",i);
        }
    }


    /* For the rationale behind subtracting max_prob from the log-probabilities
       see maxentmodel.cpp:maxent::MaxentModel::eval_all*/

    // Find the maximum log-prob
    double max_prob = numeric_limits<double>::min();
    for (size_t i = 0; i < probs.size(); ++i) {
        max_prob = max(max_prob, probs[i]);
    }

    double sum = 0.0;
    for (size_t i = 0; i < probs.size(); ++i) {
        // Subtract the maximum log-prob from the others to get them in
        // the (-inf,0] range.
        probs[i] = exp(probs[i] - max_prob);
        sum += probs[i];
    }

    for (size_t i = 0; i < probs.size(); ++i) {
        probs[i] /= sum;
    }

    return probs[oid];
}
示例#2
0
/**
 * Evaluates a context, return the conditional distribution of the context.
 *
 * This method calculates the conditional probability p(y|x) for each possible
 * outcome tag y.
 *
 * @param context A list of pair<string, double> indicates the contextual
 *                predicates and their values (must be >= 0) which are to be
 *                evaluated together.
 * @param outcomes An array of the outcomes paired with it's probability
 *        predicted by the model (the conditional distribution).
 * @param sort_result Whether or not the returned outcome array is sorted
 *                    (larger probability first). Default is true.
 *
 * TODO:  need optimized for large number of outcomes
 *
 * \sa eval()
 */
void MaxentModel::eval_all(const context_type& context,
        std::vector<pair<outcome_type, double> >& outcomes,
        bool sort_result) const {
    assert(m_params);

    //static vector<double> probs; //REMIND remove static here
    vector<double> probs;
    if (probs.size() != m_outcome_map->size())
        probs.resize(m_outcome_map->size());
    fill(probs.begin(), probs.end(), 0.0);

    size_t pid;
    for (size_t i = 0; i < context.size(); ++i) {
        pid = m_pred_map->id(context[i].first);
        if (pid != m_pred_map->null_id) {
            std::vector<pair<size_t, size_t> >& param = (*m_params)[pid];
            float fval = context[i].second;
            for(size_t j = 0;j < param.size(); ++j)
                probs[param[j].first] += m_theta[param[j].second] * fval;
        } else {
            //#warning how to deal with unseen predicts?
            //m_debug.debug(0,"Predict id %d not found.",i);
        }
    }

    double sum = 0.0;
    for (size_t i = 0; i < probs.size(); ++i) {
        probs[i] = exp(probs[i]);
        sum += probs[i];
    }

    for (size_t i = 0; i < probs.size(); ++i) {
        probs[i] /= sum;
    }

    outcomes.resize(m_outcome_map->size());
    for (size_t i = 0;i < outcomes.size(); ++i) {
        outcomes[i].first = (*m_outcome_map)[i];
        outcomes[i].second = probs[i];
    }


    if (sort_result)
        sort(outcomes.begin(),outcomes.end(), cmp_outcome());
}
示例#3
0
/**
 * Evaluates a context, return the conditional probability p(y|x).
 *
 * This method calculates the conditional probability p(y|x) for given x and y.
 *
 * @param context A list of pair<string, double> indicates names of 
 *        the contextual predicates and their values which are to be
 *        evaluated together.
 * @param outcome The outcome label for which the conditional probability is
 *        calculated.
 * @return The conditional probability of p(outcome|context).
 * \sa eval_all()
 */
double MaxentModel::eval(const context_type& context,
                const outcome_type& outcome) const{

    size_t oid = m_outcome_map->id(outcome);

    if (oid == m_outcome_map->null_id) {
        //cerr << "[MaxentModel::eval()] unknown outcome id:" << oid << endl;
        return 0.0;
    }

    static vector<double> probs;
    if (probs.size() != m_outcome_map->size())
        probs.resize(m_outcome_map->size());
        fill(probs.begin(), probs.end(), 0.0);

    size_t pid;
    for (size_t i = 0; i < context.size(); ++i) {
        pid = m_pred_map->id(context[i].first);
        if (pid != m_pred_map->null_id) {
            std::vector<pair<size_t, size_t> >& param = (*m_params)[pid];
            float fval = context[i].second;
            for(size_t j = 0;j < param.size(); ++j)
                probs[param[j].first] += m_theta[param[j].second] * fval;
        } else {
            //#warning how to deal with unseen predicts?
            //m_debug.debug(0,"Predict id %d not found.",i);
        }
    }

    double sum = 0.0;
    for (size_t i = 0; i < probs.size(); ++i) {
        probs[i] = exp(probs[i]);
        if (!finite(probs[i]))
            probs[i] = numeric_limits<double>::max();// DBL_MAX;
        sum += probs[i];
    }
    for (size_t i = 0; i < probs.size(); ++i) {
        probs[i] /= sum;
    }

    return probs[oid];
}
示例#4
0
/**
 * Evaluates a context, return the conditional distribution of the context.
 *
 * This method calculates the conditional probability p(y|x) for each possible
 * outcome tag y.
 *
 * @param context A list of pair<string, double> indicates the contextual
 *                predicates and their values (must be >= 0) which are to be
 *                evaluated together.
 * @param outcomes An array of the outcomes paired with it's probability
 *        predicted by the model (the conditional distribution).
 * @param sort_result Whether or not the returned outcome array is sorted
 *                    (larger probability first). Default is true.
 *
 * TODO:  need optimized for large number of outcomes
 *
 * \sa eval()
 */
void MaxentModel::eval_all(const context_type& context,
        std::vector<pair<outcome_type, double> >& outcomes,
        bool sort_result) const {
    assert(m_params);

    //TODO:static?
    static vector<double> probs;
    if (probs.size() != m_outcome_map->size())
        probs.resize(m_outcome_map->size());
    fill(probs.begin(), probs.end(), 0.0);

    size_t pid;
    for (size_t i = 0; i < context.size(); ++i) {
        pid = m_pred_map->id(context[i].first);
        if (pid != m_pred_map->null_id) {
            std::vector<pair<size_t, size_t> >& param = (*m_params)[pid];
            float fval = context[i].second;
            for(size_t j = 0;j < param.size(); ++j)
                probs[param[j].first] += m_theta[param[j].second] * fval;
        } else {
            //#warning how to deal with unseen predicts?
            //m_debug.debug(0,"Predict id %d not found.",i);
        }
    }


    /* We will need to exponentiate the log-probabilites in probs. These
       log-probabilites can however be quite large and exponentiating them
       can render them infinite. At some places in the library, there is
       an effort to fight this by reducing the infinite value down to
       DBL_MAX, which isn't okay either, because we can have two such
       large probabilites and when we try to find their sum for normalization,
       we overflow again. Trying to normalize these large probabilities
       would also make them NaN, which is a fatal error in this domain.
       Also, by clipping all large values to DBL_MAX, we can lose a lot of
       information when more than 1 log-prob with very distinct values
       crosses over the maximum exponent.

       The proposed solution is to subtract some value from the log-probs
       to put them in the (-inf,O] range, so that exponentiation won't
       cause an overflow. The log-probabilities aren't so large that we
       would have to fear an underflow. If an underflow would occur, the
       exponentiation would make the probability 0 (exp(-inf) == 0) and we can
       show that this is correct. Because one of the log-probabilites now
       equals 0, we know that after exponentiation their sum is >= 1. This
       means that the true normalized probabilites will be even smaller. As
       the logarithm of the smallest positive double is pretty much finite
       (about -700 on my machine), we know that our probability is vastly
       smaller and 0 is about the best way to represent it.
       
       In this way, it could be possible that a significant difference between
       two highly improbable outcomes might be lost (the chance is however
       very small because we would have to have a sum of features * parameters
       equal to negative infinity). This is however much more tolerable than a
       loss of significant difference between two highly likely outcomes. */

    // Find the maximum log-prob
    double max_prob = numeric_limits<double>::min();
    for (size_t i = 0; i < probs.size(); ++i) {
        max_prob = max(max_prob, probs[i]);
    }

    double sum = 0.0;
    for (size_t i = 0; i < probs.size(); ++i) {
        // Subtract the maximum log-prob from the others to get them in
        // the (-inf,0] range.
        probs[i] = exp(probs[i] - max_prob);
        sum += probs[i];
    }

    for (size_t i = 0; i < probs.size(); ++i) {
        probs[i] /= sum;
    }

    outcomes.resize(m_outcome_map->size());
    for (size_t i = 0;i < outcomes.size(); ++i) {
        outcomes[i].first = (*m_outcome_map)[i];
        outcomes[i].second = probs[i];
    }


    if (sort_result)
        sort(outcomes.begin(),outcomes.end(), cmp_outcome());
}