void KneserNeySmoothing::_EstimateWeighted(ProbVector &probs, ProbVector &bows) { const IndexVector &hists(_pLM->hists(_order)); const IndexVector &backoffs(_pLM->backoffs(_order)); const ProbVector & boProbs(_pLM->probs(_order - 1)); // Compute discounts. ProbVector &discounts(probs); // Reuse probs vector for discounts. discounts = _discParams[min(_effCounts, _discOrder)]; // Compute backoff weights. bows.set(0); BinWeight(hists, _ngramWeights * discounts, bows); bows = CondExpr(_invHistCounts == 0, 1, bows * _invHistCounts); // Compute interpolated probabilities. if (_order == 1 && !_pLM->vocab().IsFixedVocab()) probs = CondExpr(!_effCounts, 0, _ngramWeights * (_effCounts - discounts) * _invHistCounts[hists] + boProbs[backoffs] * bows[hists]); else probs = CondExpr(!_effCounts, 0, _ngramWeights * (_effCounts - discounts) * _invHistCounts[hists]) + boProbs[backoffs] * bows[hists]; }
void KneserNeySmoothing::_Estimate(ProbVector &probs, ProbVector &bows) { const IndexVector &hists(_pLM->hists(_order)); const IndexVector &backoffs(_pLM->backoffs(_order)); const ProbVector & boProbs(_pLM->probs(_order - 1)); // Compute discounts. ProbVector discounts(probs.length(), 0.0); // Reuse probs vector for discounts. discounts = _discParams[min(_effCounts, _discOrder)]; // Compute backoff weights. bows.set(0); BinWeight(hists, discounts, bows); // if (_order == 4) std::cerr << bows[hists[8]] << std::endl; // bows = CondExpr(_invHistCounts == 0, 1, bows * _invHistCounts); bows = bows * _invHistCounts; assert(allTrue(isfinite(_invHistCounts))); assert(allTrue(isfinite(bows))); // Compute interpolated probabilities. if (_order == 1 && !_pLM->vocab().IsFixedVocab()) probs = CondExpr(!_effCounts, 0, (_effCounts - discounts) * _invHistCounts[hists] + boProbs[backoffs] * bows[hists]); else probs = CondExpr(!_effCounts, 0, (_effCounts - discounts) * _invHistCounts[hists]) + boProbs[backoffs] * bows[hists] ; // if (_order == 4) std::cerr << probs[8] // << "\t" << _effCounts[8] // << "\t" << discounts[8] // << "\t" << (1/_invHistCounts[hists[8]]) // << "\t" << backoffs[8] // << "\t" << boProbs[backoffs[8]] // << "\t" << bows[hists[8]] // << std::endl; // if (_order == 4) std::cerr << probs[9] // << "\t" << _effCounts[9] // << "\t" << discounts[9] // << "\t" << (1/_invHistCounts[hists[9]]) // << "\t" << backoffs[9] // << "\t" << boProbs[backoffs[9]] // << "\t" << bows[hists[9]] // << std::endl; // if (_order == 4) std::cerr << _discParams << std::endl; assert(!anyTrue(isnan(probs))); }
bool MaxLikelihoodSmoothing::Estimate(const ParamVector ¶ms, const NgramLMMask *pMask, ProbVector &probs, ProbVector &bows) { if (!_estimated) { const CountVector &counts(_pLM->counts(_order)); const IndexVector &hists(_pLM->hists(_order)); // Compute inverse of sum of adjusted counts for each history. CountVector histCounts(_pLM->sizes(_order - 1), 0); ProbVector invHistCounts(histCounts.length()); BinCount(hists, histCounts); invHistCounts = 1.0 / asDouble(histCounts); // Compute maximum likelihood probability. 0 backoff. probs = counts * invHistCounts[hists]; bows.set(0); _estimated = true; } return true; }