void MainCaitra::createGraph(vector<SearchGraphNode> v) { map<int, int> recombination; for (int i = 0; i < v.size(); i++) { int id = v[i].hypo->GetId(); int toState; int forward = v[i].forward; float fscore = v[i].fscore; if(id == 0){ State newState = State(forward, fscore, fscore); states.push_back(newState); stateId2hypId.push_back(id); continue; } const Hypothesis *prevHypo = v[i].hypo->GetPrevHypo(); int fromState = prevHypo->GetId(); float backwardScore = v[i].hypo->GetScore(); float transitionScore = (v[i].hypo->GetScore() - prevHypo->GetScore()); int recombined = -1; if (v[i].recombinationHypo != NULL) recombined = v[i].recombinationHypo->GetId(); string out = v[i].hypo->GetCurrTargetPhrase().GetStringRep(StaticData::Instance().GetOutputFactorOrder()); if (recombined >= 0) { recombination[ id ] = recombined; toState = recombined; } else { toState = id; } vector<Word> o = tokenize( out, backwardScore + fscore ); Transition newTransition( toState, transitionScore, o); int thisKey = hypId2stateId[ fromState ]; states[ thisKey ].transitions.push_back( newTransition ); if (recombined == -1) { State newState(forward, fscore, backwardScore+fscore ); states.push_back( newState ); hypId2stateId[ id ] = stateId2hypId.size(); stateId2hypId.push_back( id ); } } hypId2stateId[-1]=-1; for(int state=0; state<states.size(); state++) { int forward = states[state].forward; if (recombination.count(forward)) { forward = recombination[ forward ]; } states[state].forward = hypId2stateId[ forward ]; for ( transIter transition = states[state].transitions.begin(); transition != states[state].transitions.end(); transition++ ) { transition->to_state = hypId2stateId[ transition->to_state ]; } } transitionsSize = v.size(); TRACE_ERR("graph has " << states.size() << " states, pruned down from " << v.size() << "\n"); }
void HMM<Distribution>::Train(const std::vector<arma::mat>& dataSeq) { // We should allow a guess at the transition and emission matrices. double loglik = 0; double oldLoglik = 0; // Maximum iterations? size_t iterations = 1000; // Find length of all sequences and ensure they are the correct size. size_t totalLength = 0; for (size_t seq = 0; seq < dataSeq.size(); seq++) { totalLength += dataSeq[seq].n_cols; if (dataSeq[seq].n_rows != dimensionality) Log::Fatal << "HMM::Train(): data sequence " << seq << " has " << "dimensionality " << dataSeq[seq].n_rows << " (expected " << dimensionality << " dimensions)." << std::endl; } // These are used later for training of each distribution. We initialize it // all now so we don't have to do any allocation later on. std::vector<arma::vec> emissionProb(transition.n_cols, arma::vec(totalLength)); arma::mat emissionList(dimensionality, totalLength); // This should be the Baum-Welch algorithm (EM for HMM estimation). This // follows the procedure outlined in Elliot, Aggoun, and Moore's book "Hidden // Markov Models: Estimation and Control", pp. 36-40. for (size_t iter = 0; iter < iterations; iter++) { // Clear new transition matrix and emission probabilities. arma::mat newTransition(transition.n_rows, transition.n_cols); newTransition.zeros(); // Reset log likelihood. loglik = 0; // Sum over time. size_t sumTime = 0; // Loop over each sequence. for (size_t seq = 0; seq < dataSeq.size(); seq++) { arma::mat stateProb; arma::mat forward; arma::mat backward; arma::vec scales; // Add the log-likelihood of this sequence. This is the E-step. loglik += Estimate(dataSeq[seq], stateProb, forward, backward, scales); // Now re-estimate the parameters. This is the M-step. // T_ij = sum_d ((1 / P(seq[d])) sum_t (f(i, t) T_ij E_i(seq[d][t]) b(i, // t + 1))) // E_ij = sum_d ((1 / P(seq[d])) sum_{t | seq[d][t] = j} f(i, t) b(i, t) // We store the new estimates in a different matrix. for (size_t t = 0; t < dataSeq[seq].n_cols; t++) { for (size_t j = 0; j < transition.n_cols; j++) { if (t < dataSeq[seq].n_cols - 1) { // Estimate of T_ij (probability of transition from state j to state // i). We postpone multiplication of the old T_ij until later. for (size_t i = 0; i < transition.n_rows; i++) newTransition(i, j) += forward(j, t) * backward(i, t + 1) * emission[i].Probability(dataSeq[seq].unsafe_col(t + 1)) / scales[t + 1]; } // Add to list of emission observations, for Distribution::Estimate(). emissionList.col(sumTime) = dataSeq[seq].col(t); emissionProb[j][sumTime] = stateProb(j, t); } sumTime++; } } // Assign the new transition matrix. We use %= (element-wise // multiplication) because every element of the new transition matrix must // still be multiplied by the old elements (this is the multiplication we // earlier postponed). transition %= newTransition; // Now we normalize the transition matrix. for (size_t i = 0; i < transition.n_cols; i++) transition.col(i) /= accu(transition.col(i)); // Now estimate emission probabilities. for (size_t state = 0; state < transition.n_cols; state++) emission[state].Estimate(emissionList, emissionProb[state]); Log::Debug << "Iteration " << iter << ": log-likelihood " << loglik << std::endl; if (std::abs(oldLoglik - loglik) < tolerance) { Log::Debug << "Converged after " << iter << " iterations." << std::endl; break; } oldLoglik = loglik; } }