/* Attempt to convert a CFG to a FSG.  No heuristic simplifcation is performed
 * here.  The conversion will only take place if all expansion rule in the 
 * CFG takes one of the following form
 * 
 *   X -> w0 w1 ... wN Y
 *   X -> w0 w1 ... wN
 *   X -> Y
 *   X -> w0
 *   X -> nil
 *
 * where X, Y are non-terminals, and w0, ..., wN are terminals.  If the
 * conversion is not possible, _fsg is set to NULL and the function returns -1.
 */
void
s3_cfg_convert_to_fsg(s3_cfg_t *_cfg,s2_fsg_t **_fsg)
{
  hash_table_t *item2state=NULL;
  int num_states=1; /* let 0 be the end state */
  int start_state=0;
  int from_state;
  int to_state;
  int i=0,j=0;
  s3_cfg_id_t id;
  s3u_vector_t *items=NULL;
  s3u_vector_t *rules=NULL;
  s3_cfg_item_t *item=NULL;
  s3_cfg_rule_t *rule=NULL;
  s2_fsg_t *fsg=NULL;
  char *word;

  item2state=hash_new(S3_CFG_NAME_HASH_SIZE,HASH_CASE_YES);
  if (item2state==NULL)
    goto cleanup;

  fsg=(s2_fsg_t*)ckd_calloc(1,sizeof(s2_fsg_t));
  fsg->name=NULL;
  fsg->trans_list=NULL;

  items=&_cfg->item_info;
  for (i=s3u_vec_count(items)-1;i>=0;i--) {
    if ((item=s3u_vec_get(items,i))==NULL)
      goto cleanup;

    rules=&item->rules;
    if (!s3_cfg_is_terminal(item->id) &&
	(item->nil_rule!=NULL || (rules!=NULL && s3u_vec_count(rules)>0)))
      hash_enter_bkey(item2state,&item->id,sizeof(s3_cfg_id_t),num_states++);
  }

  /* iterate through the CFG's expansion rules and convert them to FSG
   * transitions.  If at any point the conversion fails, do some cleanup
   * and return.
   */
  rules=&_cfg->rules;
  for (i=s3u_vec_count(rules)-1;i>=0;i--) {
    if ((rule=s3u_vec_get(rules,i))==NULL)
      goto cleanup;

    hash_lookup_bkey(item2state,&rule->src,sizeof(s3_cfg_id_t),&from_state);

    /* a NULL production rule means we transition to the end state */
    if (rule->len==0)
      add_trans(fsg,from_state,0,rule->prob_score,NULL);
    else if (rule->len==1) {
      id=rule->products[0];
      /* a single terminal means we output the terminal and transition to 
       * the end state
       */
      if (s3_cfg_is_terminal(id)) {
	word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name;
	add_trans(fsg,from_state,0,rule->prob_score,word);
      }
      /* a single non-terminal means we take an epsilon transition */
      else {
	hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state);
	add_trans(fsg,from_state,to_state,rule->prob_score,NULL);
      }
    }
    else {
      for (j=1;j<rule->len;j++) {

	/* get the output for the transition */
	id=rule->products[j-1];
	if (!s3_cfg_is_terminal(id))
	  goto cleanup;
	word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name;

	/* get the target state for the transition */
	id=rule->products[j];
	if (s3_cfg_is_terminal(id))
	  to_state=num_states++;
	else
	  hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state);

	add_trans(fsg,from_state,to_state,j==1?rule->prob_score:1.0,word);

	from_state=to_state;
      }
    }
  }
  
  *_fsg=fsg;
  return 0;

 cleanup:
  if (fsg!=NULL)
    free_fsg(fsg);

  return -1;
}
Exemple #2
0
static void
eval_state(s3_cfg_t *_cfg, s3_cfg_state_t *_state)
{
  s3_cfg_rule_t *rule = NULL;
  s3_cfg_entry_t *entry = NULL;
  s3_cfg_entry_t *cmplt_entry = NULL;
  s3_cfg_state_t *target_state = NULL;
  s3_cfg_state_t *origin_state = NULL;
  s3_cfg_item_t *item = NULL;
  s3_cfg_id_t scan;
  s3_arraylist_t *arraylist = NULL;
  int8 *predictions = NULL;
  int32 score;
  int index;
  int dot;
  int i, j;

  assert(_cfg != NULL);
  assert(_state != NULL);

  if (_state->back != NULL) {
    _state->back->num_expanded++;
  }
  _state->num_expanded = 0;

  predictions = _cfg->predictions;
  memset(predictions, 0, _cfg->item_info.count * sizeof(int8));

  /* iterate thru the entries in the state and perform prediction, scan,
   * and completion steps */
  for (i = 0; i < _state->entries.count; i++) {
    entry = (s3_cfg_entry_t *)s3_arraylist_get(&_state->entries, i);
    rule = entry->rule;
    dot = entry->dot;
    origin_state = entry->origin;
    score = entry->score;
    
    scan = rule->products[dot];
    index = s3_cfg_id2index(scan);

    DEBUG_ENTRY(entry);

    item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);

    /* saving some scores */
    if (_state->best_overall_entry == NULL ||
        score < _state->best_overall_entry->score)
      _state->best_overall_entry = entry;
    
    if (_state->best_overall_parse == NULL ||
        score < _state->best_overall_parse->score)
      _state->best_overall_parse = entry;

    if (s3_cfg_is_terminal(scan)) {
      /************************************************************************
       * NORMAL COMPLETION
       *
       * When we encounter an entry of the form
       *
       *   $X -> (A * #EOR#, s0, i),
       *
       * we look for any entry in state S(i) of the form
       *
       *   $Z -> (A * $X B #EOR#, s1, j)
       *
       * and add the entry
       *
       *   $Z -> (A $X * B #EOR#, s1 + s2, j)
       *
       * to the current state.  We also need to keep a record of which
       * subparses were used to complete entries.  In this case, we need to
       * remember that this particular completed entry of $X is used to
       * advance the parsing of $Z.  In this case, the pointer p1 is added to
       * the entry
       *
       *   $Z -> (A $X(p1) * B #EOR#, s1 + s2, j)
       *
       * for records keeping sake.
       */
      if (scan == S3_CFG_EOR_ITEM) {
        scan = entry->rule->src;
        arraylist = &entry->origin->entries;

        for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) {
          cmplt_entry = (s3_cfg_entry_t *)s3_arraylist_get(arraylist, j);

          if (cmplt_entry->rule->products[cmplt_entry->dot] == scan)
            add_entry(_state,
                      cmplt_entry->rule,
                      cmplt_entry->dot + 1,
                      cmplt_entry->origin,
                      cmplt_entry->score + entry->score,
                      cmplt_entry,
                      entry);
        }
      }
      /************************************************************************
       * PARSE COMPLETION
       *
       * We encountered an entry of the form
       *
       *   ($PSTART -> $START * #EOI#, s i).
       *
       * Instead of waiting for an input symbol #EOI# and completing the
       * pseudo-start rule in the next state, we finish the parse here and save
       * us a step.  We do need to check against other completed parses in this
       * state, since only the parse with the highest score is kept.
       */
      else if (scan == S3_CFG_EOI_ITEM) {
        if (_state->best_completed_entry == NULL ||
            score < _state->best_completed_entry->score)
          _state->best_completed_entry = entry;

        if (_state->best_completed_parse == NULL ||
            score < _state->best_completed_parse->score)
          _state->best_completed_parse = entry;

      }
      /************************************************************************
       * NORNAL SCANNING
       *
       * When we encounter an entry of the form
       *
       *   ($X -> A * y B #EOR#, s, i),
       *
       * and the input symbol/terminal is y, we add to the next state the entry
       *
       *   ($X -> A y * B #EOR#, s, i)
       */
      else {
        index = s3_cfg_id2index(scan);
        arraylist = &_state->expansions;
        target_state = (s3_cfg_state_t *)s3_arraylist_get(arraylist, index);
        if (target_state == NULL)
          target_state = add_state(_cfg, _state, scan);
        add_entry(target_state, rule, dot + 1, origin_state, score,
                  entry, NULL);
      }
    }
    else {
      /************************************************************************
       * AUTOMATIC COMPLETION OF EPSILON PRODUCING NON-TERMINALS
       *
       * When we encounter an entry of the form
       *
       *   ($X -> A * $Y B #EOR#, s0, i),
       *
       * we check whether $Y is a epsilon producing non-terminal, i.e.,
       * whether the rule
       * 
       *   $Y -> #EOR#
       * 
       * exists.  If that is the case, we do not add any entry corresponding to
       * such epsilon producing rule.  Instead, we take a short-cut by add the
       * following entry to the current state
       *   
       *   ($X -> A $Y(null) * B  #EOR#, s0 + s1, i).
       *
       * Note in this new entry, the completed non-terminal $Y has a NULL sub-
       * parse pointer.
       */
      if (item->nil_rule != NULL)
        add_entry(_state, rule, dot + 1, origin_state,
                  score + item->nil_rule->log_score, entry, NULL);

      /************************************************************************
       * NORMAL PREDICTION
       * 
       * When we encounter an entry of the form
       * 
       *   ($X -> A * $Y B #EOR#, s0, i),
       *
       * we want to expand the non-terminal $Y.  That is, we add an entry for
       * each rule that has $Y on its left-hand side.  However, we don't want
       * to keep repeated copies of the same entries, so we keep track of which
       * non-terminals we've already expanded in a table.
       */
      if (!predictions[index]) {
        predictions[index] = 1;
        arraylist = &item->rules;
        for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) {
          rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, j);
          if (rule->products[0] != S3_CFG_EOR_ITEM)
            add_entry(_state, rule, 0, _state, rule->log_score, NULL, NULL);
        }
      }
    }
  }
}