static void update_backoff(lm_trie_t *trie, int32 *hist, int32 n_hist) { int i; node_range_t node; bitarr_address_t address; memset(trie->backoff, 0, sizeof(trie->backoff)); trie->backoff[0] = unigram_find(trie->unigrams, hist[0], &node)->bo; for (i = 1; i < n_hist; i++) { address = middle_find(&trie->middle_begin[i - 1], hist[i], &node); if (address.base == NULL) { break; } trie->backoff[i] = lm_trie_quant_mboread(trie->quant, address, i - 1); } memcpy(trie->prev_hist, hist, n_hist * sizeof(*hist)); }
static float get_available_backoff(lm_trie_t *trie, int32 start, int32 *hist, int32 n_hist) { float backoff = 0.0f; int order_minus_2; int32 *hist_iter; node_range_t node; unigram_t *first_hist = unigram_find(trie->unigrams, hist[0], &node); if (start <= 1) { backoff += first_hist->bo; start = 2; } order_minus_2 = start - 2; for (hist_iter = hist + start - 1; hist_iter < hist + n_hist; hist_iter++, order_minus_2++) { bitarr_address_t address = middle_find(&trie->middle_begin[order_minus_2], *hist_iter, &node); if (address.base == NULL) break; backoff += lm_trie_quant_mboread(trie->quant, address, order_minus_2); } return backoff; }
static void fill_raw_ngram(lm_trie_t * trie, logmath_t * lmath, ngram_raw_t * raw_ngrams, uint32 * raw_ngram_idx, uint32 * counts, node_range_t range, uint32 * hist, int n_hist, int order, int max_order) { if (n_hist > 0 && range.begin == range.end) { return; } if (n_hist == 0) { uint32 i; for (i = 0; i < counts[0]; i++) { node_range_t node; unigram_find(trie->unigrams, i, &node); hist[0] = i; fill_raw_ngram(trie, lmath, raw_ngrams, raw_ngram_idx, counts, node, hist, 1, order, max_order); } } else if (n_hist < order - 1) { uint32 ptr; node_range_t node; bitarr_address_t address; uint32 new_word; middle_t *middle = &trie->middle_begin[n_hist - 1]; for (ptr = range.begin; ptr < range.end; ptr++) { address.base = middle->base.base; address.offset = ptr * middle->base.total_bits; new_word = bitarr_read_int25(address, middle->base.word_bits, middle->base.word_mask); hist[n_hist] = new_word; address.offset += middle->base.word_bits + middle->quant_bits; node.begin = bitarr_read_int25(address, middle->next_mask.bits, middle->next_mask.mask); address.offset = (ptr + 1) * middle->base.total_bits + middle->base.word_bits + middle->quant_bits; node.end = bitarr_read_int25(address, middle->next_mask.bits, middle->next_mask.mask); fill_raw_ngram(trie, lmath, raw_ngrams, raw_ngram_idx, counts, node, hist, n_hist + 1, order, max_order); } } else { bitarr_address_t address; uint32 ptr; float prob, backoff; int i; assert(n_hist == order - 1); for (ptr = range.begin; ptr < range.end; ptr++) { ngram_raw_t *raw_ngram = &raw_ngrams[*raw_ngram_idx]; raw_ngram->weights = (float *) ckd_calloc(order == max_order ? 1 : 2, sizeof(*raw_ngram->weights)); if (order == max_order) { longest_t *longest = trie->longest; //access address.base = longest->base.base; address.offset = ptr * longest->base.total_bits; hist[n_hist] = bitarr_read_int25(address, longest->base.word_bits, longest->base.word_mask); address.offset += longest->base.word_bits; prob = lm_trie_quant_lpread(trie->quant, address); } else { middle_t *middle = &trie->middle_begin[n_hist - 1]; address.base = middle->base.base; address.offset = ptr * middle->base.total_bits; hist[n_hist] = bitarr_read_int25(address, middle->base.word_bits, middle->base.word_mask); address.offset += middle->base.word_bits; prob = lm_trie_quant_mpread(trie->quant, address, n_hist - 1); backoff = lm_trie_quant_mboread(trie->quant, address, n_hist - 1); raw_ngram->weights[1] = (float) logmath_log_float_to_log10(lmath, backoff); } raw_ngram->weights[0] = (float) logmath_log_float_to_log10(lmath, prob); raw_ngram->words = (uint32 *) ckd_calloc(order, sizeof(*raw_ngram->words)); for (i = 0; i <= n_hist; i++) { raw_ngram->words[i] = hist[n_hist - i]; } (*raw_ngram_idx)++; } } }