Пример #1
0
unsigned short num_of_types(int k,
			    int ind,
			    ng_t *ng) {
  
  ngram_sz_t start;
  ngram_sz_t end;

  start = get_full_index(ng->ind[k][ind],
			 ng->ptr_table[k],
			 ng->ptr_table_size[k],
			 ind);

  if (k>0) {
    if (ind < (ng->num_kgrams[k]-1)) {      
      end = get_full_index(ng->ind[k][ind+1],
			   ng->ptr_table[k],
			   ng->ptr_table_size[k],
			   ind+1);
    }else      
      end =  ng->num_kgrams[k+1];

  }else {
    if (ind < ng->vocab_size) {
      end = get_full_index(ng->ind[k][ind+1],
			   ng->ptr_table[k],
			   ng->ptr_table_size[k],
			   ind+1);
    }else
      end =  ng->num_kgrams[k+1];
  }

  return(end-start);

}
Пример #2
0
void begin_browse(arpa_lm_t* lm,int k,TBROWSE *browse_st)
{
  int i;
  
  browse_st->lm=lm;
  browse_st->k=k;
  
  /* Find the first i-gram with i+1-grams for each i */
  for (i=0;i<k-1;i++) {
    browse_st->pos[i]=-1;
    do {
      browse_st->pos[i]++;
      /* dhuggins@cs: Unlikely that we'll get to the
	 end of the i-grams, but this is here for
	 correctness' sake. */
      if ( (i==0 && browse_st->pos[i]==lm->vocab_size) || 
	   (browse_st->pos[i]==lm->num_kgrams[i]-1) ) {
	browse_st->end[i]=lm->num_kgrams[i+1]-1;
      }else {
	browse_st->end[i]=get_full_index(lm->ind[i][browse_st->pos[i]+1],
					 lm->ptr_table[i],
					 lm->ptr_table_size[i],
					 browse_st->pos[i]+1)-1;
      }
    } while (browse_st->end[i]==-1);
  }
  /* Start with the first k-gram (i.e. first word in position k) */
  browse_st->pos[k-1]=0;
}
Пример #3
0
void increase_pos(int* pos,int* end,int k,arpa_lm_t* lm)
{
	int i;

	/* Go to next word in position k */
	pos[k-1]++;
	dprintf(("increase_pos(%x,%d): pos[%d]=%d table_size=%d\n",
		 (int)(long)lm&0xff, k, k-1, pos[k-1], lm->table_sizes[k-1]));

	/* If we are at the end of the k-grams for position k, then
	   step back through previous positions to find the next
	   k-gram. */
	for (i=k-2; i>=0 && pos[i+1] > end[i]; i--) {
		/* Keep iterating through position i until we find
		 * another one with k-grams */
		do {
			pos[i]++;
			dprintf(("increase_pos(%x,%d): pos[%d]=%d end[%d]=%d table_size=%d\n",
				 (int)(long)lm&0xff, k, i, pos[i], i, end[i],
				 lm->table_sizes[i]));
			/* dhuggins@cs: don't run off the end of the
			 * list of pos[i]! */
			if ((i==0 && pos[i]>=lm->vocab_size) || 
			    (pos[i]>=lm->num_kgrams[i]-1) ) {
				end[i]=lm->num_kgrams[i+1]-1;
				dprintf(("at end of %d-grams pos[%d]=%d\n",
					 i, i, pos[i]));
				dprintf(("setting last end[%d]=%d\n",
					 i, end[i]));
				/* dhuggins@cs: don't run off the end
				 * of the list of pos[i]! */
				break;
			}
			else {
				end[i]=get_full_index(lm->ind[i][pos[i]+1],
					lm->ptr_table[i],
					lm->ptr_table_size[i],
					pos[i]+1)-1;
				dprintf(("setting get_full_index end[%d]=%d\n",
					 i, end[i]));
			}
		} while (pos[i+1] > end[i]);
	}
}
Пример #4
0
void combine_lm(arpa_lm_t *arpa_lm, arpa_lm_t *lm1, arpa_lm_t *lm2)
{
	char *in_line;
	char *input_line;
	int i,j,k;
	int num_of_args;
	int pos_of_novelty;
	char *input_line_ptr_orig;
	char *word_copy;
	id__t *previous_ngram;
	id__t *current_ngram;
	vocab_sz_t temp_id;
	vocab_sz_t *pos_in_list;
	int previd;
	TBROWSE_UNION bru;
	char** words;
	
	words=(char**)NewArray(15,MAX_WORD,sizeof(char));
	
	in_line = (char *) rr_malloc(1024*sizeof(char));
	input_line = (char *) rr_malloc(1024*sizeof(char));
#import "OpenEarsStaticAnalysisToggle.h"
#ifdef STATICANALYZEDEPENDENCIES
#define __clang_analyzer__ 1
#endif
#if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES)
#undef __clang_analyzer__	
	input_line_ptr_orig = input_line;
#endif	
	
	/* Read number of each k-gram */
	
	arpa_lm->table_sizes = (table_size_t *) rr_malloc(sizeof(table_size_t)*11);
    
	arpa_lm->num_kgrams = (ngram_sz_t *) rr_malloc(sizeof(ngram_sz_t)*11);
		
	calc_merged_ngram_num(arpa_lm, lm1, lm2);
	
	previous_ngram = (id__t *) rr_calloc(arpa_lm->n,sizeof(id__t));
	current_ngram = (id__t *) rr_calloc(arpa_lm->n,sizeof(id__t));

	pos_in_list = (vocab_sz_t *) rr_malloc(sizeof(vocab_sz_t) * arpa_lm->n);
	ng_arpa_lm_alloc_struct(arpa_lm);
	
	/* Process 1-grams */
	
	printf("Reading unigrams...\n");
	
	i=0;
	
	begin_browse_union(lm1,lm2,1,&bru);
	
	while (get_next_ngram_union(words,&bru)) {
	  word_copy = rr_salloc(words[0]);
	  /* Do checks about open or closed vocab */
	  check_open_close_vocab(arpa_lm,word_copy,&i);
	}
	
	/* Process 2, ... , n-1 grams */
#import "OpenEarsStaticAnalysisToggle.h"
#ifdef STATICANALYZEDEPENDENCIES
#define __clang_analyzer__ 1
#endif
#if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES)
#undef __clang_analyzer__	
	previd = -1;
	
	for (i=2;i<=arpa_lm->n-1;i++) {
		
		printf("\nReading %d-grams...\n",i);
		
		previd = -1;
		
		j=0;
		
		for (k=0;k<=arpa_lm->n-1;k++) {
			pos_in_list[k] = 0;
		}
		
		begin_browse_union(lm1,lm2,i,&bru);
		while (get_next_ngram_union(words,&bru)) {
			
			/* Process line into all relevant temp_words */			
			num_of_args = 0;						
#endif		
			sih_lookup(arpa_lm->vocab_ht,words[i-1],&temp_id);
			arpa_lm->word_id[i-1][j] = temp_id;
			
			show_dot(j);
			
			j++;
			if (j>arpa_lm->table_sizes[i-1]) {
				quit(-1,"Error - Header information in ARPA format language model is incorrect.\nMore than %d %d-grams needed to be stored.\n",arpa_lm->table_sizes[i-1],i);
			}
			
			/* Make sure that indexes in previous table point to 
			the right thing. */
			
			for (k=0;k<=i-1;k++) {
				previous_ngram[k] = current_ngram[k];
				sih_lookup(arpa_lm->vocab_ht,words[k],&temp_id);
				if (temp_id == 0 && strcmp(words[k],"<UNK>")) {
					quit(-1,"Error - found unknown word in n-gram file : %s\n",
						words[k]);
				}
				current_ngram[k] = temp_id;
			}
			
			/* Find position of novelty */
			
			/*bug fixed, for the first ngram, pos_of novelty should be 0 - Wei Xu*/
			if (j==1) pos_of_novelty=0;
			else {
				pos_of_novelty = i;
				
				for (k=0;k<=i-1;k++) {
					if (current_ngram[k] > previous_ngram[k]) {
						pos_of_novelty = k;
						k = arpa_lm->n;
					}
					else {
						if ((current_ngram[k] > previous_ngram[k]) && (j > 0)) {
							quit(-1,"Error : n-grams are not correctly ordered.\n");
						}
					}
				}
			}
			
			if (pos_of_novelty == i && j != 1)
			  quit(-1,"Error - Repeated %d-gram in ARPA format language model.\n",
			       i);
			
			if (pos_of_novelty != i-1) {
				if (i==2) {
					/* Deal with unigram pointers */
					
					for (k = previd + 1; k <= current_ngram[0]; k++) {
						arpa_lm->ind[0][k] = new_index(j-1,
							arpa_lm->ptr_table[0],
							&(arpa_lm->ptr_table_size[0]),
							k);
					}
					previd = current_ngram[0];
				}else {
					
					for (k=pos_of_novelty;k<=i-2;k++) {
						if (k == 0) {
							pos_in_list[0] = current_ngram[0];
						}
						else {
							pos_in_list[k] = 
								MIN(get_full_index(arpa_lm->ind[k-1][pos_in_list[k-1]],
								arpa_lm->ptr_table[k-1],   
								arpa_lm->ptr_table_size[k-1],   
								pos_in_list[k-1]),pos_in_list[k]);
							while (arpa_lm->word_id[k][pos_in_list[k]] < 
								current_ngram[k]) {
								pos_in_list[k]++;
							}
						}
					}
					for (k = previd + 1; k <= pos_in_list[i-2]; k++) {
						arpa_lm->ind[i-2][k] = 
							new_index(j-1,
							arpa_lm->ptr_table[i-2],
							&(arpa_lm->ptr_table_size[i-2]),
							k);
					}
					previd = pos_in_list[i-2];	    
				}
			}
		}
	
		/* Now need to tidy up pointers for bottom section of unigrams */
	
		for (k = previd + 1; k <= arpa_lm->vocab_size; k++) {
			arpa_lm->ind[0][k] = new_index(arpa_lm->num_kgrams[1],
				arpa_lm->ptr_table[0],
				&(arpa_lm->ptr_table_size[0]),
				k);
		}      
	
	}
  
	printf("\nReading %d-grams...\n",arpa_lm->n);
	
	j = 0;
	previd = 0;
	
	arpa_lm->ind[arpa_lm->n-2][0] = 0;
	
	for (k=0;k<=arpa_lm->n-1;k++) {
		/* bug fixed by Wei Xu : this is a serious bug*/
		pos_in_list[k] = 0;
		//    pos_in_list[0] = 0;
	}
	
	begin_browse_union(lm1,lm2,arpa_lm->n,&bru);
	while (get_next_ngram_union(words,&bru)) {

	  show_dot(j);
	  
		sih_lookup(arpa_lm->vocab_ht,words[arpa_lm->n-1],&temp_id);
		
		arpa_lm->word_id[arpa_lm->n-1][j] = temp_id;
		
		j++;
		
		for (k=0;k<=arpa_lm->n-1;k++) {
			previous_ngram[k] = current_ngram[k];
			sih_lookup(arpa_lm->vocab_ht,words[k],&temp_id);
			if (temp_id == 0 && strcmp(words[k],"<UNK>")) {
				quit(-1,"Error - found unknown word in n-gram file : %s\n",
					words[k]);
			}
			current_ngram[k] = temp_id;
		}
		
		/* Find position of novelty */
		
		/*bug fixed, for the first ngram, pos_of novelty should be 0 - Wei Xu*/
		if (j==1) pos_of_novelty=0;
		else {
			pos_of_novelty = arpa_lm->n+1;
			
			for (k=0;k<=arpa_lm->n-1;k++) {
				if (current_ngram[k] > previous_ngram[k]) {
					pos_of_novelty = k;
					k = arpa_lm->n;
				}else {
					if ((current_ngram[k] > previous_ngram[k]) && (j>0)) {
						quit(-1,"Error : n-grams are not correctly ordered.\n");
					}
				}
			}
		}
		
		if ( pos_of_novelty == arpa_lm->n+1 && j != 1 ) {
			quit(-1,"Error : Same %d-gram occurs twice in ARPA format LM.\n",
				arpa_lm->n);
		}
		
		if (pos_of_novelty != arpa_lm->n-1) {
			
			for (k=pos_of_novelty;k<=arpa_lm->n-2;k++) {
				if (k == 0) {
					pos_in_list[0] = current_ngram[0];
				}else {
					pos_in_list[k] = 
						MAX(get_full_index(arpa_lm->ind[k-1][pos_in_list[k-1]],
						arpa_lm->ptr_table[k-1],   
						arpa_lm->ptr_table_size[k-1],   
						pos_in_list[k-1]),pos_in_list[k]);
					while (arpa_lm->word_id[k][pos_in_list[k]] < 
						current_ngram[k]) {
						pos_in_list[k]++;
					}
				}
			}
			for (k = previd + 1; k <= pos_in_list[arpa_lm->n-2]; k++) {
				arpa_lm->ind[arpa_lm->n-2][k] = 
					new_index(j-1,
					arpa_lm->ptr_table[arpa_lm->n-2],
					&(arpa_lm->ptr_table_size[arpa_lm->n-2]),
					k);
			}
			previd = pos_in_list[arpa_lm->n-2];
		}
		
		if (j>arpa_lm->table_sizes[arpa_lm->n-1]) {
			quit(-1,"Error - Header information in ARPA format language model is incorrect.\nMore than %d %d-grams needed to be stored.\n",arpa_lm->table_sizes[arpa_lm->n-1],arpa_lm->n-1);
		}
	}
	
	
	
	/* Tidy up */
	
	
	free(previous_ngram);
	free(current_ngram);
	free(in_line);
	free(input_line);
	DeleteArray(words);
  
}
Пример #5
0
void compute_back_off(ng_t *ng,int n, int verbosity) {

  int *current_pos;
  int *end_pos;
  id__t *sought_ngram;
  int current_table;
  int ng_count;
  int i;
  double sum_cond_prob;
  double sum_bo_prob;
  double discounted_ngcount;
  double cond_prob;
  double bo_prob;
  double discount_mass;
  double leftout_bo_prob;
  double alpha;

  int bo_case;

  sum_cond_prob = 0.0;
  sum_bo_prob = 0.0;

  /* For the sake of warning-free compilation... */

  discounted_ngcount = 0.0;
  
  current_pos = (int *)rr_calloc(n+1,sizeof(int));
  sought_ngram = (id__t *) rr_calloc(n+1,sizeof(id__t));
  end_pos = (int *)rr_calloc(n+1,sizeof(int)); 
  
  /* Process the tree so that we get all the n-grams out in the right
     order. */
  
  for (current_pos[0]=ng->first_id;
       current_pos[0]<=ng->vocab_size;
       current_pos[0]++) {
    
    if (return_count(ng->four_byte_counts,
		     ng->count_table[0],
		     ng->marg_counts,
		     ng->marg_counts4,
		     current_pos[0]) > 0) {

      current_table = 1;
      
      if (current_pos[0] == ng->vocab_size) {
	end_pos[1] = ng->num_kgrams[1]-1;
      }
      else {
 	end_pos[1] = get_full_index(ng->ind[0][current_pos[0]+1],
				    ng->ptr_table[0],
				    ng->ptr_table_size[0],
				    current_pos[0]+1)-1;
      }

      while (current_table > 0) {

	if (current_table == n) {

	  if (current_pos[n] <= end_pos[n]){

	    ng_count = return_count(ng->four_byte_counts,
				    ng->count_table[n],
				    ng->count[n],
				    ng->count4[n],
				    current_pos[n]);

	    switch (ng->discounting_method) {
	    case GOOD_TURING:
	      if (ng_count <= ng->disc_range[n]) {
		discounted_ngcount = ng->gt_disc_ratio[n][ng_count] * ng_count;
	      }
	      else {
		discounted_ngcount = ng_count;
	      }
	      break;
	    case LINEAR:
	      discounted_ngcount = ng->lin_disc_ratio[n] * ng_count;
	      break;
	    case ABSOLUTE:
	      discounted_ngcount = ng_count - ng->abs_disc_const[n];
	      break;
	    case WITTEN_BELL:
	      if (n==1) {

		discounted_ngcount = ((double) 
				      return_count(ng->four_byte_counts,
						   ng->count_table[0],
						   ng->marg_counts,
						   ng->marg_counts4,
						   current_pos[0]) * ng_count)
		  / (return_count(ng->four_byte_counts,
				  ng->count_table[0],
				  ng->marg_counts,
				  ng->marg_counts4,
				  current_pos[0]) + 
		     num_of_types(0,current_pos[0],ng));
	      }
	      else {
		
		discounted_ngcount = ((double) 
				      return_count(ng->four_byte_counts,
						   ng->count_table[n-1],
						   ng->count[n-1],
						   ng->count4[n-1],
						   current_pos[n-1])* ng_count)
		  / (return_count(ng->four_byte_counts,
				  ng->count_table[n-1],
				  ng->count[n-1],
				  ng->count4[n-1],
				  current_pos[n-1]) + 
		     num_of_types(n-1,current_pos[n-1],ng));

	      }	  
	      
	      break;
	    }

	    if (n==1) {
	      cond_prob = ((double) discounted_ngcount / 
			   return_count(ng->four_byte_counts,
					ng->count_table[0],
					ng->marg_counts,
					ng->marg_counts4,
					current_pos[0]));
	    }
	    else {
	      cond_prob = ((double) discounted_ngcount /  
			   return_count(ng->four_byte_counts,
					ng->count_table[n-1],
					ng->count[n-1],
					ng->count4[n-1],
					current_pos[n-1]));

	    }
	    sum_cond_prob += cond_prob;

	    /* Fill up sought ngram array with correct stuff */

	    for (i=1;i<=n;i++) {
	      sought_ngram[i-1] = ng->word_id[i][current_pos[i]];
	    }


	    bo_ng_prob(n-1,sought_ngram,ng,verbosity,&bo_prob,&bo_case);
	    sum_bo_prob += bo_prob;
	    current_pos[n]++;			
					       
	  }
	  else {

	    discount_mass = 1.0 - sum_cond_prob;

	    if (discount_mass < 1e-10) {
	      discount_mass = 0.0;
	      pc_message(verbosity,2,"Warning : Back off weight for %s(id %d) ",
			 ng->vocab[current_pos[0]],current_pos[0]);
	      for (i=1;i<=n-1;i++) {
		pc_message(verbosity,2,"%s(id %d) ",ng->vocab[ng->word_id[i][current_pos[i]]],ng->word_id[i][current_pos[i]]);
	      }
	      pc_message(verbosity,2,
			 "is set to 0 (sum of probs = %f).\nMay cause problems with zero probabilities.\n",sum_cond_prob);
	    }

	    leftout_bo_prob = 1.0 - sum_bo_prob;
	    if (leftout_bo_prob < 1e-10) {
	      leftout_bo_prob = 0.0;
	    }

	    if (leftout_bo_prob > 0.0) {
	      alpha = discount_mass / leftout_bo_prob;
	    }
	    else {
	      alpha = 0.0;	/* Will not be used. Should happen very rarely. */
	      pc_message(verbosity,2,"Warning : Back off weight for %s(id %d) ",
			 ng->vocab[current_pos[0]],current_pos[0]);
	      for (i=1;i<=n-1;i++) {
		pc_message(verbosity,2,"%s(id %d) ",ng->vocab[ng->word_id[i][current_pos[i]]],ng->word_id[i][current_pos[i]]);
	      }
	      pc_message(verbosity,2,
			 "is set to 0.\nMay cause problems with zero probabilities.\n");

	    }
	  
	    if (ng->four_byte_alphas) {
	      ng->bo_weight4[n-1][current_pos[n-1]] = alpha;
	    }
	    else {
	      ng->bo_weight[n-1][current_pos[n-1]] = 
		short_alpha(alpha,
			    ng->alpha_array,
			    &(ng->size_of_alpha_array),
			    65535 - ng->out_of_range_alphas,
			    ng->min_alpha,
			    ng->max_alpha);
	    }
	  
	    /* Finished current (n-1)-gram */

	    sum_cond_prob = 0.0;
	    sum_bo_prob = 0.0;
	    current_table--;
	    if (current_table > 0) {
	      current_pos[current_table]++;
	    }
	  }
	}
	else {

	  if (current_pos[current_table] <= end_pos[current_table]) {
	    current_table++;
	    if (current_pos[current_table-1] == ng->num_kgrams[current_table-1]-1) {
	      end_pos[current_table] = ng->num_kgrams[current_table]-1;
	    }
	    else {
	      end_pos[current_table] = get_full_index(ng->ind[current_table-1][current_pos[current_table-1]+1],ng->ptr_table[current_table-1],ng->ptr_table_size[current_table-1],current_pos[current_table-1]+1)-1;
	    }
	  }
	  else {
	    current_table--;
	    if (current_table > 0) {
	      current_pos[current_table]++;
	    }
	  }
	}
      }
    }

    /* Now deal with zeroton unigrams */

    else {
      if (n == 1) {
	if (ng->four_byte_alphas) {
	  ng->bo_weight4[0][current_pos[0]] = 1.0;
	}
	else {
	  ng->bo_weight[0][current_pos[0]] = 
	    short_alpha(1.0,
			ng->alpha_array,
			&(ng->size_of_alpha_array),
			65535 - ng->out_of_range_alphas,
			ng->min_alpha,
			ng->max_alpha);
	}
      }
    }
  }
  free(end_pos);
  free(current_pos);
  free(sought_ngram);
  
}
Пример #6
0
void write_arpa_lm(ng_t *ng,int verbosity) {
    
    int *current_pos;
    int *end_pos;
    ngram_sz_t i;
    double log_10_of_e = 1.0 / log(10.0);
    
    /* HEADER */
    
    pc_message(verbosity,1,"ARPA-style %d-gram will be written to %s\n",ng->n,ng->arpa_filename);
    
    write_arpa_copyright(ng->arpa_fp,ng->n,ng->vocab_size, ng->vocab[1],ng->vocab[2],ng->vocab[3]);
    
    display_vocabtype(ng->vocab_type,ng->oov_fraction, ng->arpa_fp);  
    display_discounting_method(ng,ng->arpa_fp);
    write_arpa_format(ng->arpa_fp,ng->n);
    write_arpa_num_grams(ng->arpa_fp,ng,NULL,0);
    write_arpa_k_gram_header(ng->arpa_fp,1);
    
    for (i=ng->first_id; i<= (int) ng->vocab_size;i++) {
        
        double log10_uniprob;
        double log10_alpha;
        double alpha;
        
        log10_uniprob = ng->uni_log_probs[i]*log_10_of_e;
        
        if (ng->uni_probs[i]<=0.0)
            log10_uniprob = BAD_LOG_PROB;
        
        alpha=ng_double_alpha(ng,0,i);
        
        if(alpha > 0.0)
            log10_alpha = log10(alpha);
        else
            log10_alpha = BAD_LOG_PROB;
        
        fprintf(ng->arpa_fp,"%.4f %s",log10_uniprob,ng->vocab[i]);
        if (ng->n>1)
            fprintf(ng->arpa_fp,"\t%.4f\n",log10_alpha);
        else
            fprintf(ng->arpa_fp,"\n");
    }
    
    current_pos = (int *) rr_malloc(ng->n*sizeof(int));
    end_pos = (int *) rr_malloc(ng->n*sizeof(int)); 
    
    /* Print 2-gram, ... (n-1)-gram info. */
    
    for (i=1;i<=ng->n-1;i++) {
        
        /* Print out the (i+1)-gram */
        
        int current_table, j;
        count_t ngcount, marg_count;
        double discounted_ngcount;    
        double ngprob, log_10_ngprob, ngalpha, log_10_ngalpha;
        
        /* Initialise variables for the sake of warning-free compilation */
#ifdef STATICANALYZEDEPENDENCIES
#define __clang_analyzer__ 1
#endif
#if !defined(__clang_analyzer__) || defined(STATICANALYZEDEPENDENCIES)
#undef __clang_analyzer__
        
        discounted_ngcount = 0.0;
        log_10_ngalpha = 0.0;
#endif
        write_arpa_k_gram_header(ng->arpa_fp,i+1);
        
        /* Go through the n-gram list in order */
        
        for (j=0;j<=ng->n-1;j++) {
            current_pos[j] = 0;
            end_pos[j] = 0;
        }
        
        for (current_pos[0]=ng->first_id;
             current_pos[0]<=(int) ng->vocab_size;
             current_pos[0]++) {
            
            if (return_count(ng->four_byte_counts,
                             ng->count_table[0], 
                             ng->marg_counts,
                             ng->marg_counts4,
                             current_pos[0]) > 0) {
                
                current_table = 1;
                
                if (current_pos[0] == (int) ng->vocab_size)
                    end_pos[1] = (int ) ng->num_kgrams[1]-1;
                else {
                    end_pos[1] = get_full_index(ng->ind[0][current_pos[0]+1],
                                                ng->ptr_table[0],
                                                ng->ptr_table_size[0],
                                                current_pos[0]+1)-1;
                }
                
                while (current_table > 0) {
                    
                    /*	  fprintf(stderr, "i %d, current_pos[i] %d, end_pos[i] %d\n",
                     i,
                     current_pos[i],
                     end_pos[i]);
                     fflush(stderr);*/
                    
                    
                    if (current_table == i) {
                        
                        if (current_pos[i] <= end_pos[i]) {
                            
                            /*	      fprintf(stderr, "%d\n",ng->count[i][current_pos[i]]);
                             fprintf(stderr, "%d\n",ng->count_table[i][ng->count[i][current_pos[i]]]);*/
                            
                            ngcount = return_count(ng->four_byte_counts,
                                                   ng->count_table[i],
                                                   ng->count[i],
                                                   ng->count4[i],
                                                   current_pos[i]);
                            
                            
                            if (i==1) {
                                marg_count = return_count(ng->four_byte_counts,
                                                          ng->count_table[0], 
                                                          ng->marg_counts,
                                                          ng->marg_counts4,
                                                          current_pos[0]);
                            }else {
                                marg_count = return_count(ng->four_byte_counts,
                                                          ng->count_table[i-1],
                                                          ng->count[i-1],
                                                          ng->count4[i-1],
                                                          current_pos[i-1]);
                            }
                            
                            if(ng->disc_meth==NULL)
                                ng->disc_meth=(disc_meth_t*) disc_meth_init(ng->discounting_method);
                            
                            assert(ng->disc_meth);
                            discounted_ngcount = 
                            NG_DISC_METH(ng)->dump_discounted_ngram_count(ng,i,ngcount,marg_count,current_pos);
                            
                            ngprob = (double) discounted_ngcount / marg_count;
                            
                            if (ngprob > 1.0) {
                                fprintf(stderr,
                                        "discounted_ngcount = %f marg_count = %d %d %d %d\n",
                                        discounted_ngcount,marg_count,current_pos[0],
                                        current_pos[1],current_pos[2]);
                                quit(-1,"Error : probablity of ngram is greater than one.\n");
                            }
                            
                            if (ngprob > 0.0) 
                                log_10_ngprob = log10(ngprob);
                            else 
                                log_10_ngprob = BAD_LOG_PROB;
                            
                            if (i <= ng->n-2) {
                                ngalpha = ng_double_alpha(ng, i, current_pos[i]);
                                
                                if (ngalpha > 0.0)
                                    log_10_ngalpha = log10(ngalpha);
                                else
                                    log_10_ngalpha = BAD_LOG_PROB;
                            }
                            // BEGIN HLW VERSION
                            if(((strstr (ng->vocab[current_pos[0]],"</s>")) == NULL)&&((i <= 1) || ((i > 1) && ((strstr (ng->vocab[(unsigned int) ng->word_id[i][current_pos[i]]],"<s>")) == NULL)))) { // if the overall entry is a trigram and it's going to end with <s>, skip it -- HLW
                                
                                fprintf(ng->arpa_fp,"%.4f ",log_10_ngprob);
                                fprintf(ng->arpa_fp,"%s ",ng->vocab[current_pos[0]]);
                                for (j=1;j<=i;j++){
                                    
                                    fprintf(ng->arpa_fp,"%s ",ng->vocab[(unsigned int) ng->word_id[j][current_pos[j]]]);
                                }
                                
                                if (i <= ng->n-2){
                                    fprintf(ng->arpa_fp,"%.4f\n",log_10_ngalpha);
                                } else{
                                    fprintf(ng->arpa_fp,"\n");
                                }
                            } else {
                                // something is being skipped  -- HLW
                                if(i==0) {
                                    skipped_unigrams++;
                                } else if(i==1) {
                                    skipped_bigrams++;
                                } else if (i==2) {
                                    skipped_trigrams++;
                                }
                            }
                            // END HLW VERSION
                            
                            // PREVIOUS VERSION:
                            
                            /*
                             if (i <= ng->n-2) {
                             ngalpha = ng_double_alpha(ng, i, current_pos[i]);
                             
                             if (ngalpha > 0.0)
                             log_10_ngalpha = log10(ngalpha);
                             else
                             log_10_ngalpha = BAD_LOG_PROB;
                             }
                             
                             fprintf(ng->arpa_fp,"%.4f ",log_10_ngprob);
                             fprintf(ng->arpa_fp,"%s ",ng->vocab[current_pos[0]]);
                             for (j=1;j<=i;j++){
                             
                             //		fprintf(stderr, "j %d, ng->wordid[j] %u, current_pos[j] %d, ng->word_id[j][current_pos[j]] %u\n",j, ng->word_id[j], current_pos[j], ng->word_id[j][current_pos[j]]);
                             
                             fprintf(ng->arpa_fp,"%s ",ng->vocab[(unsigned int) ng->word_id[j][current_pos[j]]]);
                             }
                             
                             if (i <= ng->n-2)
                             fprintf(ng->arpa_fp,"%.4f\n",log_10_ngalpha);
                             else
                             fprintf(ng->arpa_fp,"\n");
                             */
                            
                            current_pos[i]++;        
                        }else {
                            current_table--;
                            if (current_table > 0)
                                current_pos[current_table]++;
                        }
                    }else {
                        
                        if (current_pos[current_table] <= end_pos[current_table]) {
                            current_table++;
                            if (current_pos[current_table-1] == (int) ng->num_kgrams[current_table-1]-1)
                                end_pos[current_table] = (int) ng->num_kgrams[current_table]-1;
                            else {
                                end_pos[current_table] = get_full_index(ng->ind[current_table-1][current_pos[current_table-1]+1],
                                                                        ng->ptr_table[current_table-1],
                                                                        ng->ptr_table_size[current_table-1],
                                                                        current_pos[current_table-1]+1) - 1;
                            }
                        }else {
                            current_table--;
                            if (current_table > 0)
                                current_pos[current_table]++;
                        }
                    }
                }
            }
        }
    } 
    
    free(current_pos);
    free(end_pos);
    
    fprintf(ng->arpa_fp,"\n\\end\\\n");
    
    rr_oclose(ng->arpa_fp);
    
    // BEGIN HLW ADDITION
    
    // Now that the file is complete, let's go back and replace the placeholder ngram counts with the real final counts  -- HLW
    
    final_ngram_count_replacement(ng->n,ng);
    
    unigram_count = 0;
    bigram_count = 0;
    trigram_count = 0;
    skipped_unigrams = 0;
    skipped_bigrams = 0;
    skipped_trigrams = 0;
    
    // END HLW ADDITION
}