/* Attempt to convert a CFG to a FSG. No heuristic simplifcation is performed * here. The conversion will only take place if all expansion rule in the * CFG takes one of the following form * * X -> w0 w1 ... wN Y * X -> w0 w1 ... wN * X -> Y * X -> w0 * X -> nil * * where X, Y are non-terminals, and w0, ..., wN are terminals. If the * conversion is not possible, _fsg is set to NULL and the function returns -1. */ void s3_cfg_convert_to_fsg(s3_cfg_t *_cfg,s2_fsg_t **_fsg) { hash_table_t *item2state=NULL; int num_states=1; /* let 0 be the end state */ int start_state=0; int from_state; int to_state; int i=0,j=0; s3_cfg_id_t id; s3u_vector_t *items=NULL; s3u_vector_t *rules=NULL; s3_cfg_item_t *item=NULL; s3_cfg_rule_t *rule=NULL; s2_fsg_t *fsg=NULL; char *word; item2state=hash_new(S3_CFG_NAME_HASH_SIZE,HASH_CASE_YES); if (item2state==NULL) goto cleanup; fsg=(s2_fsg_t*)ckd_calloc(1,sizeof(s2_fsg_t)); fsg->name=NULL; fsg->trans_list=NULL; items=&_cfg->item_info; for (i=s3u_vec_count(items)-1;i>=0;i--) { if ((item=s3u_vec_get(items,i))==NULL) goto cleanup; rules=&item->rules; if (!s3_cfg_is_terminal(item->id) && (item->nil_rule!=NULL || (rules!=NULL && s3u_vec_count(rules)>0))) hash_enter_bkey(item2state,&item->id,sizeof(s3_cfg_id_t),num_states++); } /* iterate through the CFG's expansion rules and convert them to FSG * transitions. If at any point the conversion fails, do some cleanup * and return. */ rules=&_cfg->rules; for (i=s3u_vec_count(rules)-1;i>=0;i--) { if ((rule=s3u_vec_get(rules,i))==NULL) goto cleanup; hash_lookup_bkey(item2state,&rule->src,sizeof(s3_cfg_id_t),&from_state); /* a NULL production rule means we transition to the end state */ if (rule->len==0) add_trans(fsg,from_state,0,rule->prob_score,NULL); else if (rule->len==1) { id=rule->products[0]; /* a single terminal means we output the terminal and transition to * the end state */ if (s3_cfg_is_terminal(id)) { word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name; add_trans(fsg,from_state,0,rule->prob_score,word); } /* a single non-terminal means we take an epsilon transition */ else { hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state); add_trans(fsg,from_state,to_state,rule->prob_score,NULL); } } else { for (j=1;j<rule->len;j++) { /* get the output for the transition */ id=rule->products[j-1]; if (!s3_cfg_is_terminal(id)) goto cleanup; word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name; /* get the target state for the transition */ id=rule->products[j]; if (s3_cfg_is_terminal(id)) to_state=num_states++; else hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state); add_trans(fsg,from_state,to_state,j==1?rule->prob_score:1.0,word); from_state=to_state; } } } *_fsg=fsg; return 0; cleanup: if (fsg!=NULL) free_fsg(fsg); return -1; }
/* * Convert the glist of ssids to a composite sseq id. Return the composite ID. */ static s3ssid_t ssidlist2comsseq (glist_t g, mdef_t *mdef, dict2pid_t *dict2pid, hash_table_t *hs, /* For composite states */ hash_table_t *hp) /* For composite senone seq */ { int32 i, j, n, s, ssid; s3senid_t **sen; s3senid_t *comsenid; gnode_t *gn; n = glist_count (g); if (n <= 0) E_FATAL("Panic: length(ssidlist)= %d\n", n); /* Space for list of senones for each state, derived from the given glist */ sen = (s3senid_t **) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t *)); for (i = 0; i < mdef_n_emit_state (mdef); i++) { sen[i] = (s3senid_t *) ckd_calloc (n+1, sizeof(s3senid_t)); sen[i][0] = BAD_S3SENID; /* Sentinel */ } /* Space for composite senone ID for each state position */ comsenid = (s3senid_t *) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t)); for (gn = g; gn; gn = gnode_next(gn)) { ssid = gnode_int32 (gn); /* Expand ssid into individual states (senones); insert in sen[][] if not present */ for (i = 0; i < mdef_n_emit_state (mdef); i++) { s = mdef->sseq[ssid][i]; for (j = 0; (IS_S3SENID(sen[i][j])) && (sen[i][j] != s); j++); if (NOT_S3SENID(sen[i][j])) { sen[i][j] = s; sen[i][j+1] = BAD_S3SENID; } } } /* Convert senones list for each state position into composite state */ for (i = 0; i < mdef_n_emit_state (mdef); i++) { for (j = 0; IS_S3SENID(sen[i][j]); j++); assert (j > 0); j = hash_enter_bkey (hs, (char *)(sen[i]), j*sizeof(s3senid_t), dict2pid->n_comstate); if (j == dict2pid->n_comstate) dict2pid->n_comstate++; /* New composite state */ else ckd_free ((void *) sen[i]); comsenid[i] = j; } ckd_free (sen); /* Convert sequence of composite senids to composite sseq ID */ j = hash_enter_bkey (hp, (char *)comsenid, mdef->n_emit_state * sizeof(s3senid_t), dict2pid->n_comsseq); if (j == dict2pid->n_comsseq) { dict2pid->n_comsseq++; if (dict2pid->n_comsseq >= MAX_S3SENID) E_FATAL("#Composite sseq limit(%d) reached; increase MAX_S3SENID\n", dict2pid->n_comsseq); } else ckd_free ((void *) comsenid); return ((s3ssid_t)j); }