unsigned short num_of_types(int k, int ind, ng_t *ng) { ngram_sz_t start; ngram_sz_t end; start = get_full_index(ng->ind[k][ind], ng->ptr_table[k], ng->ptr_table_size[k], ind); if (k>0) { if (ind < (ng->num_kgrams[k]-1)) { end = get_full_index(ng->ind[k][ind+1], ng->ptr_table[k], ng->ptr_table_size[k], ind+1); }else end = ng->num_kgrams[k+1]; }else { if (ind < ng->vocab_size) { end = get_full_index(ng->ind[k][ind+1], ng->ptr_table[k], ng->ptr_table_size[k], ind+1); }else end = ng->num_kgrams[k+1]; } return(end-start); }
void begin_browse(arpa_lm_t* lm,int k,TBROWSE *browse_st) { int i; browse_st->lm=lm; browse_st->k=k; /* Find the first i-gram with i+1-grams for each i */ for (i=0;i<k-1;i++) { browse_st->pos[i]=-1; do { browse_st->pos[i]++; /* dhuggins@cs: Unlikely that we'll get to the end of the i-grams, but this is here for correctness' sake. */ if ( (i==0 && browse_st->pos[i]==lm->vocab_size) || (browse_st->pos[i]==lm->num_kgrams[i]-1) ) { browse_st->end[i]=lm->num_kgrams[i+1]-1; }else { browse_st->end[i]=get_full_index(lm->ind[i][browse_st->pos[i]+1], lm->ptr_table[i], lm->ptr_table_size[i], browse_st->pos[i]+1)-1; } } while (browse_st->end[i]==-1); } /* Start with the first k-gram (i.e. first word in position k) */ browse_st->pos[k-1]=0; }
void increase_pos(int* pos,int* end,int k,arpa_lm_t* lm) { int i; /* Go to next word in position k */ pos[k-1]++; dprintf(("increase_pos(%x,%d): pos[%d]=%d table_size=%d\n", (int)(long)lm&0xff, k, k-1, pos[k-1], lm->table_sizes[k-1])); /* If we are at the end of the k-grams for position k, then step back through previous positions to find the next k-gram. */ for (i=k-2; i>=0 && pos[i+1] > end[i]; i--) { /* Keep iterating through position i until we find * another one with k-grams */ do { pos[i]++; dprintf(("increase_pos(%x,%d): pos[%d]=%d end[%d]=%d table_size=%d\n", (int)(long)lm&0xff, k, i, pos[i], i, end[i], lm->table_sizes[i])); /* dhuggins@cs: don't run off the end of the * list of pos[i]! */ if ((i==0 && pos[i]>=lm->vocab_size) || (pos[i]>=lm->num_kgrams[i]-1) ) { end[i]=lm->num_kgrams[i+1]-1; dprintf(("at end of %d-grams pos[%d]=%d\n", i, i, pos[i])); dprintf(("setting last end[%d]=%d\n", i, end[i])); /* dhuggins@cs: don't run off the end * of the list of pos[i]! */ break; } else { end[i]=get_full_index(lm->ind[i][pos[i]+1], lm->ptr_table[i], lm->ptr_table_size[i], pos[i]+1)-1; dprintf(("setting get_full_index end[%d]=%d\n", i, end[i])); } } while (pos[i+1] > end[i]); } }
void combine_lm(arpa_lm_t *arpa_lm, arpa_lm_t *lm1, arpa_lm_t *lm2) { char *in_line; char *input_line; int i,j,k; int num_of_args; int pos_of_novelty; char *input_line_ptr_orig; char *word_copy; id__t *previous_ngram; id__t *current_ngram; vocab_sz_t temp_id; vocab_sz_t *pos_in_list; int previd; TBROWSE_UNION bru; char** words; words=(char**)NewArray(15,MAX_WORD,sizeof(char)); in_line = (char *) rr_malloc(1024*sizeof(char)); input_line = (char *) rr_malloc(1024*sizeof(char)); #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ input_line_ptr_orig = input_line; #endif /* Read number of each k-gram */ arpa_lm->table_sizes = (table_size_t *) rr_malloc(sizeof(table_size_t)*11); arpa_lm->num_kgrams = (ngram_sz_t *) rr_malloc(sizeof(ngram_sz_t)*11); calc_merged_ngram_num(arpa_lm, lm1, lm2); previous_ngram = (id__t *) rr_calloc(arpa_lm->n,sizeof(id__t)); current_ngram = (id__t *) rr_calloc(arpa_lm->n,sizeof(id__t)); pos_in_list = (vocab_sz_t *) rr_malloc(sizeof(vocab_sz_t) * arpa_lm->n); ng_arpa_lm_alloc_struct(arpa_lm); /* Process 1-grams */ printf("Reading unigrams...\n"); i=0; begin_browse_union(lm1,lm2,1,&bru); while (get_next_ngram_union(words,&bru)) { word_copy = rr_salloc(words[0]); /* Do checks about open or closed vocab */ check_open_close_vocab(arpa_lm,word_copy,&i); } /* Process 2, ... , n-1 grams */ #import "OpenEarsStaticAnalysisToggle.h" #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ previd = -1; for (i=2;i<=arpa_lm->n-1;i++) { printf("\nReading %d-grams...\n",i); previd = -1; j=0; for (k=0;k<=arpa_lm->n-1;k++) { pos_in_list[k] = 0; } begin_browse_union(lm1,lm2,i,&bru); while (get_next_ngram_union(words,&bru)) { /* Process line into all relevant temp_words */ num_of_args = 0; #endif sih_lookup(arpa_lm->vocab_ht,words[i-1],&temp_id); arpa_lm->word_id[i-1][j] = temp_id; show_dot(j); j++; if (j>arpa_lm->table_sizes[i-1]) { quit(-1,"Error - Header information in ARPA format language model is incorrect.\nMore than %d %d-grams needed to be stored.\n",arpa_lm->table_sizes[i-1],i); } /* Make sure that indexes in previous table point to the right thing. */ for (k=0;k<=i-1;k++) { previous_ngram[k] = current_ngram[k]; sih_lookup(arpa_lm->vocab_ht,words[k],&temp_id); if (temp_id == 0 && strcmp(words[k],"<UNK>")) { quit(-1,"Error - found unknown word in n-gram file : %s\n", words[k]); } current_ngram[k] = temp_id; } /* Find position of novelty */ /*bug fixed, for the first ngram, pos_of novelty should be 0 - Wei Xu*/ if (j==1) pos_of_novelty=0; else { pos_of_novelty = i; for (k=0;k<=i-1;k++) { if (current_ngram[k] > previous_ngram[k]) { pos_of_novelty = k; k = arpa_lm->n; } else { if ((current_ngram[k] > previous_ngram[k]) && (j > 0)) { quit(-1,"Error : n-grams are not correctly ordered.\n"); } } } } if (pos_of_novelty == i && j != 1) quit(-1,"Error - Repeated %d-gram in ARPA format language model.\n", i); if (pos_of_novelty != i-1) { if (i==2) { /* Deal with unigram pointers */ for (k = previd + 1; k <= current_ngram[0]; k++) { arpa_lm->ind[0][k] = new_index(j-1, arpa_lm->ptr_table[0], &(arpa_lm->ptr_table_size[0]), k); } previd = current_ngram[0]; }else { for (k=pos_of_novelty;k<=i-2;k++) { if (k == 0) { pos_in_list[0] = current_ngram[0]; } else { pos_in_list[k] = MIN(get_full_index(arpa_lm->ind[k-1][pos_in_list[k-1]], arpa_lm->ptr_table[k-1], arpa_lm->ptr_table_size[k-1], pos_in_list[k-1]),pos_in_list[k]); while (arpa_lm->word_id[k][pos_in_list[k]] < current_ngram[k]) { pos_in_list[k]++; } } } for (k = previd + 1; k <= pos_in_list[i-2]; k++) { arpa_lm->ind[i-2][k] = new_index(j-1, arpa_lm->ptr_table[i-2], &(arpa_lm->ptr_table_size[i-2]), k); } previd = pos_in_list[i-2]; } } } /* Now need to tidy up pointers for bottom section of unigrams */ for (k = previd + 1; k <= arpa_lm->vocab_size; k++) { arpa_lm->ind[0][k] = new_index(arpa_lm->num_kgrams[1], arpa_lm->ptr_table[0], &(arpa_lm->ptr_table_size[0]), k); } } printf("\nReading %d-grams...\n",arpa_lm->n); j = 0; previd = 0; arpa_lm->ind[arpa_lm->n-2][0] = 0; for (k=0;k<=arpa_lm->n-1;k++) { /* bug fixed by Wei Xu : this is a serious bug*/ pos_in_list[k] = 0; // pos_in_list[0] = 0; } begin_browse_union(lm1,lm2,arpa_lm->n,&bru); while (get_next_ngram_union(words,&bru)) { show_dot(j); sih_lookup(arpa_lm->vocab_ht,words[arpa_lm->n-1],&temp_id); arpa_lm->word_id[arpa_lm->n-1][j] = temp_id; j++; for (k=0;k<=arpa_lm->n-1;k++) { previous_ngram[k] = current_ngram[k]; sih_lookup(arpa_lm->vocab_ht,words[k],&temp_id); if (temp_id == 0 && strcmp(words[k],"<UNK>")) { quit(-1,"Error - found unknown word in n-gram file : %s\n", words[k]); } current_ngram[k] = temp_id; } /* Find position of novelty */ /*bug fixed, for the first ngram, pos_of novelty should be 0 - Wei Xu*/ if (j==1) pos_of_novelty=0; else { pos_of_novelty = arpa_lm->n+1; for (k=0;k<=arpa_lm->n-1;k++) { if (current_ngram[k] > previous_ngram[k]) { pos_of_novelty = k; k = arpa_lm->n; }else { if ((current_ngram[k] > previous_ngram[k]) && (j>0)) { quit(-1,"Error : n-grams are not correctly ordered.\n"); } } } } if ( pos_of_novelty == arpa_lm->n+1 && j != 1 ) { quit(-1,"Error : Same %d-gram occurs twice in ARPA format LM.\n", arpa_lm->n); } if (pos_of_novelty != arpa_lm->n-1) { for (k=pos_of_novelty;k<=arpa_lm->n-2;k++) { if (k == 0) { pos_in_list[0] = current_ngram[0]; }else { pos_in_list[k] = MAX(get_full_index(arpa_lm->ind[k-1][pos_in_list[k-1]], arpa_lm->ptr_table[k-1], arpa_lm->ptr_table_size[k-1], pos_in_list[k-1]),pos_in_list[k]); while (arpa_lm->word_id[k][pos_in_list[k]] < current_ngram[k]) { pos_in_list[k]++; } } } for (k = previd + 1; k <= pos_in_list[arpa_lm->n-2]; k++) { arpa_lm->ind[arpa_lm->n-2][k] = new_index(j-1, arpa_lm->ptr_table[arpa_lm->n-2], &(arpa_lm->ptr_table_size[arpa_lm->n-2]), k); } previd = pos_in_list[arpa_lm->n-2]; } if (j>arpa_lm->table_sizes[arpa_lm->n-1]) { quit(-1,"Error - Header information in ARPA format language model is incorrect.\nMore than %d %d-grams needed to be stored.\n",arpa_lm->table_sizes[arpa_lm->n-1],arpa_lm->n-1); } } /* Tidy up */ free(previous_ngram); free(current_ngram); free(in_line); free(input_line); DeleteArray(words); }
void compute_back_off(ng_t *ng,int n, int verbosity) { int *current_pos; int *end_pos; id__t *sought_ngram; int current_table; int ng_count; int i; double sum_cond_prob; double sum_bo_prob; double discounted_ngcount; double cond_prob; double bo_prob; double discount_mass; double leftout_bo_prob; double alpha; int bo_case; sum_cond_prob = 0.0; sum_bo_prob = 0.0; /* For the sake of warning-free compilation... */ discounted_ngcount = 0.0; current_pos = (int *)rr_calloc(n+1,sizeof(int)); sought_ngram = (id__t *) rr_calloc(n+1,sizeof(id__t)); end_pos = (int *)rr_calloc(n+1,sizeof(int)); /* Process the tree so that we get all the n-grams out in the right order. */ for (current_pos[0]=ng->first_id; current_pos[0]<=ng->vocab_size; current_pos[0]++) { if (return_count(ng->four_byte_counts, ng->count_table[0], ng->marg_counts, ng->marg_counts4, current_pos[0]) > 0) { current_table = 1; if (current_pos[0] == ng->vocab_size) { end_pos[1] = ng->num_kgrams[1]-1; } else { end_pos[1] = get_full_index(ng->ind[0][current_pos[0]+1], ng->ptr_table[0], ng->ptr_table_size[0], current_pos[0]+1)-1; } while (current_table > 0) { if (current_table == n) { if (current_pos[n] <= end_pos[n]){ ng_count = return_count(ng->four_byte_counts, ng->count_table[n], ng->count[n], ng->count4[n], current_pos[n]); switch (ng->discounting_method) { case GOOD_TURING: if (ng_count <= ng->disc_range[n]) { discounted_ngcount = ng->gt_disc_ratio[n][ng_count] * ng_count; } else { discounted_ngcount = ng_count; } break; case LINEAR: discounted_ngcount = ng->lin_disc_ratio[n] * ng_count; break; case ABSOLUTE: discounted_ngcount = ng_count - ng->abs_disc_const[n]; break; case WITTEN_BELL: if (n==1) { discounted_ngcount = ((double) return_count(ng->four_byte_counts, ng->count_table[0], ng->marg_counts, ng->marg_counts4, current_pos[0]) * ng_count) / (return_count(ng->four_byte_counts, ng->count_table[0], ng->marg_counts, ng->marg_counts4, current_pos[0]) + num_of_types(0,current_pos[0],ng)); } else { discounted_ngcount = ((double) return_count(ng->four_byte_counts, ng->count_table[n-1], ng->count[n-1], ng->count4[n-1], current_pos[n-1])* ng_count) / (return_count(ng->four_byte_counts, ng->count_table[n-1], ng->count[n-1], ng->count4[n-1], current_pos[n-1]) + num_of_types(n-1,current_pos[n-1],ng)); } break; } if (n==1) { cond_prob = ((double) discounted_ngcount / return_count(ng->four_byte_counts, ng->count_table[0], ng->marg_counts, ng->marg_counts4, current_pos[0])); } else { cond_prob = ((double) discounted_ngcount / return_count(ng->four_byte_counts, ng->count_table[n-1], ng->count[n-1], ng->count4[n-1], current_pos[n-1])); } sum_cond_prob += cond_prob; /* Fill up sought ngram array with correct stuff */ for (i=1;i<=n;i++) { sought_ngram[i-1] = ng->word_id[i][current_pos[i]]; } bo_ng_prob(n-1,sought_ngram,ng,verbosity,&bo_prob,&bo_case); sum_bo_prob += bo_prob; current_pos[n]++; } else { discount_mass = 1.0 - sum_cond_prob; if (discount_mass < 1e-10) { discount_mass = 0.0; pc_message(verbosity,2,"Warning : Back off weight for %s(id %d) ", ng->vocab[current_pos[0]],current_pos[0]); for (i=1;i<=n-1;i++) { pc_message(verbosity,2,"%s(id %d) ",ng->vocab[ng->word_id[i][current_pos[i]]],ng->word_id[i][current_pos[i]]); } pc_message(verbosity,2, "is set to 0 (sum of probs = %f).\nMay cause problems with zero probabilities.\n",sum_cond_prob); } leftout_bo_prob = 1.0 - sum_bo_prob; if (leftout_bo_prob < 1e-10) { leftout_bo_prob = 0.0; } if (leftout_bo_prob > 0.0) { alpha = discount_mass / leftout_bo_prob; } else { alpha = 0.0; /* Will not be used. Should happen very rarely. */ pc_message(verbosity,2,"Warning : Back off weight for %s(id %d) ", ng->vocab[current_pos[0]],current_pos[0]); for (i=1;i<=n-1;i++) { pc_message(verbosity,2,"%s(id %d) ",ng->vocab[ng->word_id[i][current_pos[i]]],ng->word_id[i][current_pos[i]]); } pc_message(verbosity,2, "is set to 0.\nMay cause problems with zero probabilities.\n"); } if (ng->four_byte_alphas) { ng->bo_weight4[n-1][current_pos[n-1]] = alpha; } else { ng->bo_weight[n-1][current_pos[n-1]] = short_alpha(alpha, ng->alpha_array, &(ng->size_of_alpha_array), 65535 - ng->out_of_range_alphas, ng->min_alpha, ng->max_alpha); } /* Finished current (n-1)-gram */ sum_cond_prob = 0.0; sum_bo_prob = 0.0; current_table--; if (current_table > 0) { current_pos[current_table]++; } } } else { if (current_pos[current_table] <= end_pos[current_table]) { current_table++; if (current_pos[current_table-1] == ng->num_kgrams[current_table-1]-1) { end_pos[current_table] = ng->num_kgrams[current_table]-1; } else { end_pos[current_table] = get_full_index(ng->ind[current_table-1][current_pos[current_table-1]+1],ng->ptr_table[current_table-1],ng->ptr_table_size[current_table-1],current_pos[current_table-1]+1)-1; } } else { current_table--; if (current_table > 0) { current_pos[current_table]++; } } } } } /* Now deal with zeroton unigrams */ else { if (n == 1) { if (ng->four_byte_alphas) { ng->bo_weight4[0][current_pos[0]] = 1.0; } else { ng->bo_weight[0][current_pos[0]] = short_alpha(1.0, ng->alpha_array, &(ng->size_of_alpha_array), 65535 - ng->out_of_range_alphas, ng->min_alpha, ng->max_alpha); } } } } free(end_pos); free(current_pos); free(sought_ngram); }
void write_arpa_lm(ng_t *ng,int verbosity) { int *current_pos; int *end_pos; ngram_sz_t i; double log_10_of_e = 1.0 / log(10.0); /* HEADER */ pc_message(verbosity,1,"ARPA-style %d-gram will be written to %s\n",ng->n,ng->arpa_filename); write_arpa_copyright(ng->arpa_fp,ng->n,ng->vocab_size, ng->vocab[1],ng->vocab[2],ng->vocab[3]); display_vocabtype(ng->vocab_type,ng->oov_fraction, ng->arpa_fp); display_discounting_method(ng,ng->arpa_fp); write_arpa_format(ng->arpa_fp,ng->n); write_arpa_num_grams(ng->arpa_fp,ng,NULL,0); write_arpa_k_gram_header(ng->arpa_fp,1); for (i=ng->first_id; i<= (int) ng->vocab_size;i++) { double log10_uniprob; double log10_alpha; double alpha; log10_uniprob = ng->uni_log_probs[i]*log_10_of_e; if (ng->uni_probs[i]<=0.0) log10_uniprob = BAD_LOG_PROB; alpha=ng_double_alpha(ng,0,i); if(alpha > 0.0) log10_alpha = log10(alpha); else log10_alpha = BAD_LOG_PROB; fprintf(ng->arpa_fp,"%.4f %s",log10_uniprob,ng->vocab[i]); if (ng->n>1) fprintf(ng->arpa_fp,"\t%.4f\n",log10_alpha); else fprintf(ng->arpa_fp,"\n"); } current_pos = (int *) rr_malloc(ng->n*sizeof(int)); end_pos = (int *) rr_malloc(ng->n*sizeof(int)); /* Print 2-gram, ... (n-1)-gram info. */ for (i=1;i<=ng->n-1;i++) { /* Print out the (i+1)-gram */ int current_table, j; count_t ngcount, marg_count; double discounted_ngcount; double ngprob, log_10_ngprob, ngalpha, log_10_ngalpha; /* Initialise variables for the sake of warning-free compilation */ #ifdef STATICANALYZEDEPENDENCIES #define __clang_analyzer__ 1 #endif #if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES) #undef __clang_analyzer__ discounted_ngcount = 0.0; log_10_ngalpha = 0.0; #endif write_arpa_k_gram_header(ng->arpa_fp,i+1); /* Go through the n-gram list in order */ for (j=0;j<=ng->n-1;j++) { current_pos[j] = 0; end_pos[j] = 0; } for (current_pos[0]=ng->first_id; current_pos[0]<=(int) ng->vocab_size; current_pos[0]++) { if (return_count(ng->four_byte_counts, ng->count_table[0], ng->marg_counts, ng->marg_counts4, current_pos[0]) > 0) { current_table = 1; if (current_pos[0] == (int) ng->vocab_size) end_pos[1] = (int ) ng->num_kgrams[1]-1; else { end_pos[1] = get_full_index(ng->ind[0][current_pos[0]+1], ng->ptr_table[0], ng->ptr_table_size[0], current_pos[0]+1)-1; } while (current_table > 0) { /* fprintf(stderr, "i %d, current_pos[i] %d, end_pos[i] %d\n", i, current_pos[i], end_pos[i]); fflush(stderr);*/ if (current_table == i) { if (current_pos[i] <= end_pos[i]) { /* fprintf(stderr, "%d\n",ng->count[i][current_pos[i]]); fprintf(stderr, "%d\n",ng->count_table[i][ng->count[i][current_pos[i]]]);*/ ngcount = return_count(ng->four_byte_counts, ng->count_table[i], ng->count[i], ng->count4[i], current_pos[i]); if (i==1) { marg_count = return_count(ng->four_byte_counts, ng->count_table[0], ng->marg_counts, ng->marg_counts4, current_pos[0]); }else { marg_count = return_count(ng->four_byte_counts, ng->count_table[i-1], ng->count[i-1], ng->count4[i-1], current_pos[i-1]); } if(ng->disc_meth==NULL) ng->disc_meth=(disc_meth_t*) disc_meth_init(ng->discounting_method); assert(ng->disc_meth); discounted_ngcount = NG_DISC_METH(ng)->dump_discounted_ngram_count(ng,i,ngcount,marg_count,current_pos); ngprob = (double) discounted_ngcount / marg_count; if (ngprob > 1.0) { fprintf(stderr, "discounted_ngcount = %f marg_count = %d %d %d %d\n", discounted_ngcount,marg_count,current_pos[0], current_pos[1],current_pos[2]); quit(-1,"Error : probablity of ngram is greater than one.\n"); } if (ngprob > 0.0) log_10_ngprob = log10(ngprob); else log_10_ngprob = BAD_LOG_PROB; if (i <= ng->n-2) { ngalpha = ng_double_alpha(ng, i, current_pos[i]); if (ngalpha > 0.0) log_10_ngalpha = log10(ngalpha); else log_10_ngalpha = BAD_LOG_PROB; } // BEGIN HLW VERSION if(((strstr (ng->vocab[current_pos[0]],"</s>")) == NULL)&&((i <= 1) || ((i > 1) && ((strstr (ng->vocab[(unsigned int) ng->word_id[i][current_pos[i]]],"<s>")) == NULL)))) { // if the overall entry is a trigram and it's going to end with <s>, skip it -- HLW fprintf(ng->arpa_fp,"%.4f ",log_10_ngprob); fprintf(ng->arpa_fp,"%s ",ng->vocab[current_pos[0]]); for (j=1;j<=i;j++){ fprintf(ng->arpa_fp,"%s ",ng->vocab[(unsigned int) ng->word_id[j][current_pos[j]]]); } if (i <= ng->n-2){ fprintf(ng->arpa_fp,"%.4f\n",log_10_ngalpha); } else{ fprintf(ng->arpa_fp,"\n"); } } else { // something is being skipped -- HLW if(i==0) { skipped_unigrams++; } else if(i==1) { skipped_bigrams++; } else if (i==2) { skipped_trigrams++; } } // END HLW VERSION // PREVIOUS VERSION: /* if (i <= ng->n-2) { ngalpha = ng_double_alpha(ng, i, current_pos[i]); if (ngalpha > 0.0) log_10_ngalpha = log10(ngalpha); else log_10_ngalpha = BAD_LOG_PROB; } fprintf(ng->arpa_fp,"%.4f ",log_10_ngprob); fprintf(ng->arpa_fp,"%s ",ng->vocab[current_pos[0]]); for (j=1;j<=i;j++){ // fprintf(stderr, "j %d, ng->wordid[j] %u, current_pos[j] %d, ng->word_id[j][current_pos[j]] %u\n",j, ng->word_id[j], current_pos[j], ng->word_id[j][current_pos[j]]); fprintf(ng->arpa_fp,"%s ",ng->vocab[(unsigned int) ng->word_id[j][current_pos[j]]]); } if (i <= ng->n-2) fprintf(ng->arpa_fp,"%.4f\n",log_10_ngalpha); else fprintf(ng->arpa_fp,"\n"); */ current_pos[i]++; }else { current_table--; if (current_table > 0) current_pos[current_table]++; } }else { if (current_pos[current_table] <= end_pos[current_table]) { current_table++; if (current_pos[current_table-1] == (int) ng->num_kgrams[current_table-1]-1) end_pos[current_table] = (int) ng->num_kgrams[current_table]-1; else { end_pos[current_table] = get_full_index(ng->ind[current_table-1][current_pos[current_table-1]+1], ng->ptr_table[current_table-1], ng->ptr_table_size[current_table-1], current_pos[current_table-1]+1) - 1; } }else { current_table--; if (current_table > 0) current_pos[current_table]++; } } } } } } free(current_pos); free(end_pos); fprintf(ng->arpa_fp,"\n\\end\\\n"); rr_oclose(ng->arpa_fp); // BEGIN HLW ADDITION // Now that the file is complete, let's go back and replace the placeholder ngram counts with the real final counts -- HLW final_ngram_count_replacement(ng->n,ng); unigram_count = 0; bigram_count = 0; trigram_count = 0; skipped_unigrams = 0; skipped_bigrams = 0; skipped_trigrams = 0; // END HLW ADDITION }