Ejemplo n.º 1
0
/* Attempt to convert a CFG to a FSG.  No heuristic simplifcation is performed
 * here.  The conversion will only take place if all expansion rule in the 
 * CFG takes one of the following form
 * 
 *   X -> w0 w1 ... wN Y
 *   X -> w0 w1 ... wN
 *   X -> Y
 *   X -> w0
 *   X -> nil
 *
 * where X, Y are non-terminals, and w0, ..., wN are terminals.  If the
 * conversion is not possible, _fsg is set to NULL and the function returns -1.
 */
void
s3_cfg_convert_to_fsg(s3_cfg_t *_cfg,s2_fsg_t **_fsg)
{
  hash_table_t *item2state=NULL;
  int num_states=1; /* let 0 be the end state */
  int start_state=0;
  int from_state;
  int to_state;
  int i=0,j=0;
  s3_cfg_id_t id;
  s3u_vector_t *items=NULL;
  s3u_vector_t *rules=NULL;
  s3_cfg_item_t *item=NULL;
  s3_cfg_rule_t *rule=NULL;
  s2_fsg_t *fsg=NULL;
  char *word;

  item2state=hash_new(S3_CFG_NAME_HASH_SIZE,HASH_CASE_YES);
  if (item2state==NULL)
    goto cleanup;

  fsg=(s2_fsg_t*)ckd_calloc(1,sizeof(s2_fsg_t));
  fsg->name=NULL;
  fsg->trans_list=NULL;

  items=&_cfg->item_info;
  for (i=s3u_vec_count(items)-1;i>=0;i--) {
    if ((item=s3u_vec_get(items,i))==NULL)
      goto cleanup;

    rules=&item->rules;
    if (!s3_cfg_is_terminal(item->id) &&
	(item->nil_rule!=NULL || (rules!=NULL && s3u_vec_count(rules)>0)))
      hash_enter_bkey(item2state,&item->id,sizeof(s3_cfg_id_t),num_states++);
  }

  /* iterate through the CFG's expansion rules and convert them to FSG
   * transitions.  If at any point the conversion fails, do some cleanup
   * and return.
   */
  rules=&_cfg->rules;
  for (i=s3u_vec_count(rules)-1;i>=0;i--) {
    if ((rule=s3u_vec_get(rules,i))==NULL)
      goto cleanup;

    hash_lookup_bkey(item2state,&rule->src,sizeof(s3_cfg_id_t),&from_state);

    /* a NULL production rule means we transition to the end state */
    if (rule->len==0)
      add_trans(fsg,from_state,0,rule->prob_score,NULL);
    else if (rule->len==1) {
      id=rule->products[0];
      /* a single terminal means we output the terminal and transition to 
       * the end state
       */
      if (s3_cfg_is_terminal(id)) {
	word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name;
	add_trans(fsg,from_state,0,rule->prob_score,word);
      }
      /* a single non-terminal means we take an epsilon transition */
      else {
	hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state);
	add_trans(fsg,from_state,to_state,rule->prob_score,NULL);
      }
    }
    else {
      for (j=1;j<rule->len;j++) {

	/* get the output for the transition */
	id=rule->products[j-1];
	if (!s3_cfg_is_terminal(id))
	  goto cleanup;
	word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name;

	/* get the target state for the transition */
	id=rule->products[j];
	if (s3_cfg_is_terminal(id))
	  to_state=num_states++;
	else
	  hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state);

	add_trans(fsg,from_state,to_state,j==1?rule->prob_score:1.0,word);

	from_state=to_state;
      }
    }
  }
  
  *_fsg=fsg;
  return 0;

 cleanup:
  if (fsg!=NULL)
    free_fsg(fsg);

  return -1;
}
/*
 * Convert the glist of ssids to a composite sseq id.  Return the composite ID.
 */
static s3ssid_t ssidlist2comsseq (glist_t g, mdef_t *mdef, dict2pid_t *dict2pid,
				  hash_table_t *hs,	/* For composite states */
				  hash_table_t *hp)	/* For composite senone seq */
{
    int32 i, j, n, s, ssid;
    s3senid_t **sen;
    s3senid_t *comsenid;
    gnode_t *gn;
    
    n = glist_count (g);
    if (n <= 0)
	E_FATAL("Panic: length(ssidlist)= %d\n", n);
    
    /* Space for list of senones for each state, derived from the given glist */
    sen = (s3senid_t **) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t *));
    for (i = 0; i < mdef_n_emit_state (mdef); i++) {
	sen[i] = (s3senid_t *) ckd_calloc (n+1, sizeof(s3senid_t));
	sen[i][0] = BAD_S3SENID;	/* Sentinel */
    }
    /* Space for composite senone ID for each state position */
    comsenid = (s3senid_t *) ckd_calloc (mdef_n_emit_state (mdef), sizeof(s3senid_t));
    
    for (gn = g; gn; gn = gnode_next(gn)) {
	ssid = gnode_int32 (gn);
	
	/* Expand ssid into individual states (senones); insert in sen[][] if not present */
	for (i = 0; i < mdef_n_emit_state (mdef); i++) {
	    s = mdef->sseq[ssid][i];
	    
	    for (j = 0; (IS_S3SENID(sen[i][j])) && (sen[i][j] != s); j++);
	    if (NOT_S3SENID(sen[i][j])) {
		sen[i][j] = s;
		sen[i][j+1] = BAD_S3SENID;
	    }
	}
    }
    
    /* Convert senones list for each state position into composite state */
    for (i = 0; i < mdef_n_emit_state (mdef); i++) {
	for (j = 0; IS_S3SENID(sen[i][j]); j++);
	assert (j > 0);
	
	j = hash_enter_bkey (hs, (char *)(sen[i]), j*sizeof(s3senid_t), dict2pid->n_comstate);
	if (j == dict2pid->n_comstate)
	    dict2pid->n_comstate++;	/* New composite state */
	else
	    ckd_free ((void *) sen[i]);
	
	comsenid[i] = j;
    }
    ckd_free (sen);
    
    /* Convert sequence of composite senids to composite sseq ID */
    j = hash_enter_bkey (hp, (char *)comsenid, mdef->n_emit_state * sizeof(s3senid_t),
			 dict2pid->n_comsseq);
    if (j == dict2pid->n_comsseq) {
	dict2pid->n_comsseq++;
	if (dict2pid->n_comsseq >= MAX_S3SENID)
	    E_FATAL("#Composite sseq limit(%d) reached; increase MAX_S3SENID\n",
		    dict2pid->n_comsseq);
    } else
	ckd_free ((void *) comsenid);
    
    return ((s3ssid_t)j);
}