void KneserNeySmoothing::_Estimate(ProbVector &probs, ProbVector &bows) { const IndexVector &hists(_pLM->hists(_order)); const IndexVector &backoffs(_pLM->backoffs(_order)); const ProbVector & boProbs(_pLM->probs(_order - 1)); // Compute discounts. ProbVector discounts(probs.length(), 0.0); // Reuse probs vector for discounts. discounts = _discParams[min(_effCounts, _discOrder)]; // Compute backoff weights. bows.set(0); BinWeight(hists, discounts, bows); // if (_order == 4) std::cerr << bows[hists[8]] << std::endl; // bows = CondExpr(_invHistCounts == 0, 1, bows * _invHistCounts); bows = bows * _invHistCounts; assert(allTrue(isfinite(_invHistCounts))); assert(allTrue(isfinite(bows))); // Compute interpolated probabilities. if (_order == 1 && !_pLM->vocab().IsFixedVocab()) probs = CondExpr(!_effCounts, 0, (_effCounts - discounts) * _invHistCounts[hists] + boProbs[backoffs] * bows[hists]); else probs = CondExpr(!_effCounts, 0, (_effCounts - discounts) * _invHistCounts[hists]) + boProbs[backoffs] * bows[hists] ; // if (_order == 4) std::cerr << probs[8] // << "\t" << _effCounts[8] // << "\t" << discounts[8] // << "\t" << (1/_invHistCounts[hists[8]]) // << "\t" << backoffs[8] // << "\t" << boProbs[backoffs[8]] // << "\t" << bows[hists[8]] // << std::endl; // if (_order == 4) std::cerr << probs[9] // << "\t" << _effCounts[9] // << "\t" << discounts[9] // << "\t" << (1/_invHistCounts[hists[9]]) // << "\t" << backoffs[9] // << "\t" << boProbs[backoffs[9]] // << "\t" << bows[hists[9]] // << std::endl; // if (_order == 4) std::cerr << _discParams << std::endl; assert(!anyTrue(isnan(probs))); }
void KneserNeySmoothing::_EstimateWeighted(ProbVector &probs, ProbVector &bows) { const IndexVector &hists(_pLM->hists(_order)); const IndexVector &backoffs(_pLM->backoffs(_order)); const ProbVector & boProbs(_pLM->probs(_order - 1)); // Compute discounts. ProbVector &discounts(probs); // Reuse probs vector for discounts. discounts = _discParams[min(_effCounts, _discOrder)]; // Compute backoff weights. bows.set(0); BinWeight(hists, _ngramWeights * discounts, bows); bows = CondExpr(_invHistCounts == 0, 1, bows * _invHistCounts); // Compute interpolated probabilities. if (_order == 1 && !_pLM->vocab().IsFixedVocab()) probs = CondExpr(!_effCounts, 0, _ngramWeights * (_effCounts - discounts) * _invHistCounts[hists] + boProbs[backoffs] * bows[hists]); else probs = CondExpr(!_effCounts, 0, _ngramWeights * (_effCounts - discounts) * _invHistCounts[hists]) + boProbs[backoffs] * bows[hists]; }
void KneserNeySmoothing::_EstimateWeightedMasked(const NgramLMMask *pMask, ProbVector &probs, ProbVector &bows) { const IndexVector &hists(_pLM->hists(_order)); const IndexVector &backoffs(_pLM->backoffs(_order)); const ProbVector & boProbs(_pLM->probs(_order - 1)); // Compute discounts. ProbVector &discounts(probs); // Reuse probs vector for discounts. const BitVector &discMask(((KneserNeySmoothingMask *) pMask->SmoothingMasks[_order].get())->DiscMask); assert(discMask.length() == _effCounts.length()); // discounts.masked(discMask) = _discParams[min(_effCounts, _discOrder)]; for (size_t i = 0; i < _effCounts.length(); i++) if (discMask[i]) discounts[i] = _discParams[min(_effCounts[i], (int)_discOrder)]; // Compute backoff weights. const BitVector &bowMask(pMask->BowMaskVectors[_order - 1]); MaskedVectorClosure<ProbVector, BitVector> maskedBows(bows.masked(bowMask)); maskedBows.set(0); BinWeight(hists, _ngramWeights * discounts, maskedBows); // maskedBows = CondExpr(_invHistCounts == 0, 1, bows * _invHistCounts); for (size_t i = 0; i < bows.length(); i++) if (bowMask[i]) { if (_invHistCounts[i] == 0) bows[i] = 1; else bows[i] *= _invHistCounts[i]; } // Compute interpolated probabilities. const BitVector &probMask(pMask->ProbMaskVectors[_order]); if (_order == 1 && !_pLM->vocab().IsFixedVocab()) probs.masked(probMask) = CondExpr(!_effCounts, 0, _ngramWeights * (_effCounts - discounts) * _invHistCounts[hists] + boProbs[backoffs] * bows[hists]); else probs.masked(probMask) = CondExpr(!_effCounts, 0, _ngramWeights * (_effCounts - discounts) * _invHistCounts[hists]) + boProbs[backoffs] * bows[hists]; }
bool MaxLikelihoodSmoothing::Estimate(const ParamVector ¶ms, const NgramLMMask *pMask, ProbVector &probs, ProbVector &bows) { if (!_estimated) { const CountVector &counts(_pLM->counts(_order)); const IndexVector &hists(_pLM->hists(_order)); // Compute inverse of sum of adjusted counts for each history. CountVector histCounts(_pLM->sizes(_order - 1), 0); ProbVector invHistCounts(histCounts.length()); BinCount(hists, histCounts); invHistCounts = 1.0 / asDouble(histCounts); // Compute maximum likelihood probability. 0 backoff. probs = counts * invHistCounts[hists]; bows.set(0); _estimated = true; } return true; }