Beispiel #1
0
pair<double,vector<size_t>> viterbi(string observation, const HMM& model)
{
    if (!model.isFinalized())
        throw invalid_argument("Model should be finalized!");
    
    // (state, prob)
    Matrix<pair<int,double>> omega(observation.length(), model.numStates(),
                                   make_pair(-1, -numeric_limits<double>::infinity()));

    unordered_map<double, double> logmemory;
    auto ln = [&logmemory] (double arg) {
        if (logmemory.count(arg) > 0)
            return logmemory.at(arg);
        
        double val = log(arg);
        logmemory.insert(make_pair(arg, val));
        return val;
    };
    
    for (size_t i = 0; i < model.numStates(); i++)
        omega(0, i) = make_pair(-1, ln(model.startProb(i)) + ln(model.emissionProb(i, observation.substr(0,1))));
    
    for (size_t l = 1; l < observation.length(); l++) {
        for (size_t i = 0; i < model.numStates(); i++) {
            // Find where we should come from
            pair<int, double> best = make_pair(-1, -numeric_limits<double>::infinity());
            for (auto k : model.incommingStates(i)) {
                if (l < model.stateArity(i))
                    continue;
                    
                double candidate = omega(l - model.stateArity(i) , k).second + ln(model.transitionProb(k, i));
                if (candidate > best.second)
                    best = make_pair(k, candidate);
            }
            
            if (best.first == -1) {
                // State is not possible
                omega(l, i) = make_pair(-1, -numeric_limits<double>::infinity());
            } else {
                // Update current cell with right values
                omega(l, i) = make_pair(best.first,
                                        best.second + ln(model.emissionProb(i, observation.substr(l - model.stateArity(i) + 1, model.stateArity(i)))));
            }
        }
    }
    
    // Final result is now in prev
    pair<int, double> best = make_pair(-1, -numeric_limits<double>::infinity());
    for (int i = 0; i < model.numStates(); i++) {
        double candidate = omega(observation.length()-1, i).second;
        if (candidate > best.second)
            best = make_pair(i, candidate);
    }
    
    if (best.first == -1)
        return make_pair(-numeric_limits<double>::infinity(), vector<size_t>());
    
    // Backtrack
    vector<size_t> stateTrace;
    stateTrace.push_back(best.first);
    size_t pos = observation.length() - 1;
    auto cur = omega(pos, best.first);
    size_t prevState = best.first; // TODO: Could probably be refactored
    while (cur.first != -1) {
        stateTrace.push_back(cur.first);

        pos -= model.stateArity(prevState);
        prevState = cur.first;
        cur = omega(pos, cur.first);
    }
    
    return make_pair(best.second,
                     vector<size_t>(stateTrace.rbegin(), stateTrace.rend()));
}
Beispiel #2
0
tuple<vector<double>, Matrix<double>, Matrix<double>> forward_backward(string obs, const HMM& model)
{
    if (!model.isFinalized())
        throw runtime_error("Model should be finalized!");
    
    // Forward algorithm
    Matrix<double> forward(obs.length(), model.numStates(), 0);
    vector<double> cs(obs.length(), 0);
    
    // Calculate c1
    for (size_t state = 0; state < model.numStates(); state++)
        cs[0] += model.startProb(state) * model.emissionProb(state, obs.substr(0,1));
    // Base case
    for (size_t state = 0; state < model.numStates(); state++)
        forward(0, state) = model.startProb(state) * model.emissionProb(state, obs.substr(0,1)) / cs[0];

    // Recursion
    for (size_t i = 1; i < obs.length(); i++) {
        vector<double> delta(model.numStates(), 0);
        for (size_t state = 0; state < model.numStates(); state++) {
            if (i < model.stateArity(state))
                continue;
            
            for (auto prevState : model.incommingStates(state)) {
                double val = forward(i - model.stateArity(state), prevState) * model.transitionProb(prevState, state);
                for (size_t k = 1; k < model.stateArity(state); k++)
                    val /= cs[i - k];
                
                delta[state] += val;
            }
            delta[state] *= model.emissionProb(state, obs.substr(i - model.stateArity(state) + 1, model.stateArity(state)));
            
            cs[i] += delta[state];
        }
        
        for (size_t state = 0; state < model.numStates(); state++) {
            forward(i, state) = delta[state] / cs[i];
        }
    }
    
    // Backward algorithm
    Matrix<double> backward(obs.length(), model.numStates(), 0);
    const size_t N = obs.length() - 1;
    for (size_t state = 0; state < model.numStates(); state++)
        backward(N, state) = 1;
    
    for (long i = N - 1; i >= 0; i--) {
        for (size_t state = 0; state < model.numStates(); state++) {
            double prob = 0;
            
            for (auto nextState : model.outgoingStates(state)) {
                if (i + model.stateArity(nextState) > N)
                    continue;
                
                double val = backward(i + model.stateArity(nextState), nextState) * model.transitionProb(state, nextState)
                               * model.emissionProb(nextState, obs.substr(i + 1, model.stateArity(nextState)));
                
                for (size_t k = 0; k < model.stateArity(nextState); k++)
                    val /= cs[i + 1 + k];
                
                prob += val;
            }
            backward(i, state) = prob;
        }
    }
    
    return make_tuple(cs, forward, backward);
}