static float lm_trie_hist_score(lm_trie_t *trie, int32 wid, int32 *hist, int32 n_hist, int32 *n_used) { float prob; int i, j; node_range_t node; bitarr_address_t address; *n_used = 1; prob = unigram_find(trie->unigrams, wid, &node)->prob; if (n_hist == 0) return prob; for (i = 0; i < n_hist - 1; i++) { address = middle_find(&trie->middle_begin[i], hist[i], &node); if (address.base == NULL) { for (j = i; j < n_hist; j++) { prob += trie->backoff[j]; } return prob; } else { (*n_used)++; prob = lm_trie_quant_mpread(trie->quant, address, i); } } address = longest_find(trie->longest, hist[n_hist - 1], &node); if (address.base == NULL) { return prob + trie->backoff[n_hist - 1]; } else { (*n_used)++; return lm_trie_quant_lpread(trie->quant, address); } }
static float get_available_prob(lm_trie_t *trie, int32 wid, int32 *hist, int max_order, int32 n_hist, int32 *n_used) { float prob; node_range_t node; bitarr_address_t address; int order_minus_2; uint8 independent_left; int32 *hist_iter, *hist_end; *n_used = 1; prob = unigram_find(trie->unigrams, wid, &node)->prob; if (n_hist == 0) { return prob; } //find ngrams of higher order if any order_minus_2 = 0; independent_left = (node.begin == node.end); hist_iter = hist; hist_end = hist + n_hist; for (;;order_minus_2++, hist_iter++) { if (hist_iter == hist_end) return prob; if (independent_left) return prob; if (order_minus_2 == max_order - 2) break; address = middle_find(&trie->middle_begin[order_minus_2], *hist_iter, &node); independent_left = (address.base == NULL) || (node.begin == node.end); //didn't find entry if (address.base == NULL) return prob; prob = lm_trie_quant_mpread(trie->quant, address, order_minus_2); *n_used = order_minus_2 + 2; } address = longest_find(trie->longest, *hist_iter, &node); if (address.base != NULL) { prob = lm_trie_quant_lpread(trie->quant, address); *n_used = max_order; } return prob; }
static void fill_raw_ngram(lm_trie_t * trie, logmath_t * lmath, ngram_raw_t * raw_ngrams, uint32 * raw_ngram_idx, uint32 * counts, node_range_t range, uint32 * hist, int n_hist, int order, int max_order) { if (n_hist > 0 && range.begin == range.end) { return; } if (n_hist == 0) { uint32 i; for (i = 0; i < counts[0]; i++) { node_range_t node; unigram_find(trie->unigrams, i, &node); hist[0] = i; fill_raw_ngram(trie, lmath, raw_ngrams, raw_ngram_idx, counts, node, hist, 1, order, max_order); } } else if (n_hist < order - 1) { uint32 ptr; node_range_t node; bitarr_address_t address; uint32 new_word; middle_t *middle = &trie->middle_begin[n_hist - 1]; for (ptr = range.begin; ptr < range.end; ptr++) { address.base = middle->base.base; address.offset = ptr * middle->base.total_bits; new_word = bitarr_read_int25(address, middle->base.word_bits, middle->base.word_mask); hist[n_hist] = new_word; address.offset += middle->base.word_bits + middle->quant_bits; node.begin = bitarr_read_int25(address, middle->next_mask.bits, middle->next_mask.mask); address.offset = (ptr + 1) * middle->base.total_bits + middle->base.word_bits + middle->quant_bits; node.end = bitarr_read_int25(address, middle->next_mask.bits, middle->next_mask.mask); fill_raw_ngram(trie, lmath, raw_ngrams, raw_ngram_idx, counts, node, hist, n_hist + 1, order, max_order); } } else { bitarr_address_t address; uint32 ptr; float prob, backoff; int i; assert(n_hist == order - 1); for (ptr = range.begin; ptr < range.end; ptr++) { ngram_raw_t *raw_ngram = &raw_ngrams[*raw_ngram_idx]; raw_ngram->weights = (float *) ckd_calloc(order == max_order ? 1 : 2, sizeof(*raw_ngram->weights)); if (order == max_order) { longest_t *longest = trie->longest; //access address.base = longest->base.base; address.offset = ptr * longest->base.total_bits; hist[n_hist] = bitarr_read_int25(address, longest->base.word_bits, longest->base.word_mask); address.offset += longest->base.word_bits; prob = lm_trie_quant_lpread(trie->quant, address); } else { middle_t *middle = &trie->middle_begin[n_hist - 1]; address.base = middle->base.base; address.offset = ptr * middle->base.total_bits; hist[n_hist] = bitarr_read_int25(address, middle->base.word_bits, middle->base.word_mask); address.offset += middle->base.word_bits; prob = lm_trie_quant_mpread(trie->quant, address, n_hist - 1); backoff = lm_trie_quant_mboread(trie->quant, address, n_hist - 1); raw_ngram->weights[1] = (float) logmath_log_float_to_log10(lmath, backoff); } raw_ngram->weights[0] = (float) logmath_log_float_to_log10(lmath, prob); raw_ngram->words = (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); for (i = 0; i <= n_hist; i++) { raw_ngram->words[i] = hist[n_hist - i]; } (*raw_ngram_idx)++; } } }