void ng_allocate_vocab_ht(ng_t *ng, /**< ng_t with binary format stuffs */ arpa_lm_t *arpa_ng, /**< arpa_lm_t */ flag is_arpa ) { if(is_arpa) { arpa_ng->vocab_ht = sih_create(1000,0.5,2.0,1); arpa_ng->vocab = (char **) rr_malloc(sizeof(char *)* (arpa_ng->table_sizes[0]+1)); arpa_ng->vocab_size = arpa_ng->table_sizes[0]; } else { ng->vocab_ht = sih_create(1000,0.5,2.0,1); ng->vocab = (char **) rr_malloc(sizeof(char *)* (ng->table_sizes[0]+1)); ng->vocab_size = ng->table_sizes[0]; } }
void read_vocab(ng_t* ng, int verbosity) { vocab_sz_t test_cc_id; vocab_sz_t current_cc_id; char current_cc[200]; char wlist_entry[1024]; pc_message(verbosity,2,"Reading vocabulary.\n"); /* Don't change the parameter of sih_create, because it will change the binary layout of the .binlm file */ ng->vocab_ht = sih_create(1000,0.5,2.0,1); read_voc(ng->vocab_filename,verbosity,ng->vocab_ht,&ng->vocab,&(ng->vocab_size)); /* Determine which of the vocabulary words are context cues */ ng->no_of_ccs = 0; ng->context_cue = (flag *) rr_calloc(ng->vocab_size+1,sizeof(flag)); if (ng->context_set) { /* This should be tied to l889 to l894 in lm_combine.c */ while (fgets (wlist_entry, sizeof (wlist_entry),ng->context_cues_fp)) { if (strncmp(wlist_entry,"##",2)==0) continue; sscanf (wlist_entry, "%s ",current_cc); warn_on_wrong_vocab_comments(wlist_entry); if (sih_lookup(ng->vocab_ht,current_cc,¤t_cc_id) == 0) pc_message(verbosity,1,"Warning : %s in the context cues file does not appear in the vocabulary.\n",current_cc); else { ng->context_cue[(unsigned short) current_cc_id] = 1; pc_message(verbosity,2,"Context cue word : %s id = %d\n",current_cc,current_cc_id); ng->no_of_ccs++; } } rr_iclose(ng->context_cues_fp); } if ((sih_lookup(ng->vocab_ht,"<s>",&test_cc_id) != 0)) if (ng->context_cue[(unsigned short) test_cc_id] == 0) fprintf(stderr,"WARNING: <s> appears as a vocabulary item, but is not labelled as a\ncontext cue.\n"); if ((sih_lookup(ng->vocab_ht,"<p>",&test_cc_id) != 0)) if (ng->context_cue[(unsigned short) test_cc_id] == 0) fprintf(stderr,"WARNING: <p> appears as a vocabulary item, but is not labelled as a\ncontext cue.\n"); if ((sih_lookup(ng->vocab_ht,"<art>",&test_cc_id) != 0)) if (ng->context_cue[(unsigned short) test_cc_id] == 0) fprintf(stderr,"WARNING: <art> appears as a vocabulary item, but is not labelled as a\ncontext cue.\n"); }