Ejemplo n.º 1
0
void
s3_cfg_print_rule(s3_cfg_t *_cfg, s3_cfg_rule_t *_rule, FILE *_out)
{
  s3_cfg_item_t *item = NULL;
  int index, len, i;

  assert(_cfg != NULL);
  assert(_rule != NULL);

  index = s3_cfg_id2index(_rule->src);
  item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);
  
  fprintf(_out, "(%s -> ", item->name);

  for (i = 0, len = _rule->len; i < len; i++) {
    index = s3_cfg_id2index(_rule->products[i]);
    item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);

    fprintf(_out, "%s", item->name);
    if (i != len - 1)
      fprintf(_out, " ");
  }
  
  fprintf(_out, ", %.3f)", _rule->prob_score);
}
Ejemplo n.º 2
0
s2_fsg_t *
s3_cfg_convert_to_fsg(s3_cfg_t *_cfg, int _max_expansion)
{
  s3_cfg_rule_t *rule;
  s2_fsg_t *fsg;
  int *expansions;
  int i, n;

  assert(_cfg != NULL);

  n = s3_arraylist_count(&_cfg->item_info);
  rule = s3_arraylist_get(&_cfg->rules, 0);

  expansions = (int *)ckd_calloc(n, sizeof(int));
  fsg = (s2_fsg_t *)ckd_calloc(1, sizeof(s2_fsg_t));
  fsg->name = NULL;
  fsg->n_state = 2;
  fsg->start_state = 0;
  fsg->final_state = 1;
  fsg->trans_list = NULL;

  for (i = 0; i < n; i++)
    expansions[i] = 0;
  convert_cfg_rule(_cfg, fsg, rule, 0, 1, expansions, _max_expansion);

  prune_states(fsg);

  return fsg;
}
Ejemplo n.º 3
0
void
s3_cfg_write_simple(s3_cfg_t *_cfg, const char *_fn)
{
  FILE *file = NULL;
  s3_arraylist_t *rules = NULL;
  s3_cfg_rule_t *rule = NULL;
  int i, j, count;

  assert(_cfg != NULL);
  assert(_fn != NULL);

  if ((file = fopen(_fn, "w")) == NULL)
    E_FATAL("Failed to open output file for writing");

  rules = &_cfg->rules;
  count = s3_arraylist_count(rules);
  for (i = 1; i < count; i++) {
    rule = (s3_cfg_rule_t *)s3_arraylist_get(rules, i);
    fprintf(file, "%f %s %d",
            rule->score, s3_cfg_id2str(_cfg, rule->src), rule->len);
    for (j = 0; j < rule->len; j++)
      fprintf(file, " %s", s3_cfg_id2str(_cfg, rule->products[j]));
  }
  fprintf(file, "\n");
  
  fclose(file);
}
Ejemplo n.º 4
0
const char *
s3_cfg_id2str(s3_cfg_t *_cfg, s3_cfg_id_t _id)
{
  assert(_cfg != NULL);

  return ((s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info,
                                             s3_cfg_id2index(_id)))->name;
}
Ejemplo n.º 5
0
void
s3_cfg_close(s3_cfg_t *_cfg)
{
  int i;
  s3_cfg_rule_t *rule = NULL;
  s3_cfg_item_t *item = NULL;

  for (i = _cfg->rules.count - 1; i >= 0; i--) {
    rule = (s3_cfg_rule_t *)s3_arraylist_get(&_cfg->rules, i);
    free(rule->products);
    free(rule);
  }

  for (i = _cfg->item_info.count - 1; i >= 0; i--) { 
    item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, i);
    free(item->name);
    free(item); 
  } 

  if (_cfg->name2id != NULL)
    hash_table_free(_cfg->name2id); 
}
Ejemplo n.º 6
0
static void
compile_nonterm(s3_cfg_t *_cfg, s3_cfg_item_t *_item, logmath_t *logmath)
{
  int i, n;
  s3_arraylist_t *arraylist;
  float32 sum = 0;
  s3_cfg_rule_t *rule;

  assert(_cfg != NULL);
  assert(_item != NULL);

  /* calculate fake score sum */
  arraylist = &_item->rules;
  n = s3_arraylist_count(arraylist);
  for (i = n - 1; i >= 0; i--) {
    rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, i);
    sum += rule->score;
  }

  if (_item->nil_rule != NULL)
    sum += _item->nil_rule->score;

  if (sum == 0)
    E_FATAL("CFG production rule scores cannot sum to 0\n");

  /* calculate probability and log score */
  for (i = n - 1; i >= 0; i--) {
    rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, i);
    rule->prob_score = rule->score / sum;
    rule->log_score = logs3(logmath, rule->prob_score);
  }

  if (_item->nil_rule != NULL) {
    _item->nil_rule->prob_score = _item->nil_rule->score / sum;
    _item->nil_rule->log_score = logs3(logmath, _item->nil_rule->prob_score);
  }
}
Ejemplo n.º 7
0
void
s3_cfg_print_entry(s3_cfg_t *_cfg, s3_cfg_entry_t *_entry, FILE *_out)
{
  s3_cfg_item_t *item = NULL;
  s3_cfg_rule_t *rule = NULL;
  int index;
  int dot;
  int i;

  assert(_cfg != NULL);
  assert(_entry != NULL);

  rule = _entry->rule;
  dot = _entry->dot;

  index = s3_cfg_id2index(rule->src);
  item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);

  fprintf(_out, "(%s -> ", item->name);

  for (i = 0; i < rule->len; i++) {
    if (i == dot)
      fprintf(_out, "* ");
    
    index = s3_cfg_id2index(rule->products[i]);
    item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);

    fprintf(_out, "%s", item->name);
    fprintf(_out, " ");
  }

  if (dot == rule->len)
    fprintf(_out, "*, %d)", _entry->score);
  else
    fprintf(_out, ", %d)", _entry->score);
}
Ejemplo n.º 8
0
void
free_parse(s3_cfg_state_t *_parse)
{
  int i;
  s3_cfg_state_t *scan = NULL;

  if (_parse->num_expanded > 0) {
    for (i = s3_arraylist_count(&_parse->expansions) - 1; i >= 0; i--) {
      scan = (s3_cfg_state_t *)s3_arraylist_get(&_parse->expansions, i);
      free_parse(scan);
    }
  }

  free_state(_parse);
}
Ejemplo n.º 9
0
void
s3_cfg_rescore(s3_cfg_t *_cfg, logmath_t *logmath)
{
  int i;
  s3_arraylist_t *rules = NULL;
  s3_cfg_rule_t *rule = NULL;

  assert(_cfg != NULL);

  rules = &_cfg->rules;
  for (i = s3_arraylist_count(rules) - 1; i >= 0; i--) {
    rule = (s3_cfg_rule_t *)s3_arraylist_get(rules, i);
    rule->log_score = logs3(logmath, rule->prob_score);
  }
}
Ejemplo n.º 10
0
s3_cfg_rule_t *
s3_cfg_add_rule(s3_cfg_t *_cfg, s3_cfg_id_t _src, float32 _score, 
                s3_cfg_id_t *_products)
{
  s3_cfg_rule_t *rule = NULL;
  s3_cfg_id_t *products = NULL;
  s3_cfg_item_t *item = NULL;
  int len = 0;
  int index;

  assert(_cfg != NULL);
  assert(_products != NULL);

  /****************************************************************************
   * Create a new rule
   */
  index = s3_cfg_id2index(_src);
  for (len = 0; len < S3_CFG_MAX_ITEM_COUNT; len++)
    if (_products[len] == S3_CFG_EOR_ITEM)
      break;
  if (_products[len] != S3_CFG_EOR_ITEM)
    E_FATAL("CFG Production rule does not contain EOR item");

  rule = (s3_cfg_rule_t *)ckd_calloc(1, sizeof(s3_cfg_rule_t));
  products = (s3_cfg_id_t *)ckd_calloc(len + 1, sizeof(s3_cfg_id_t));
  memcpy(products, _products, (len + 1) * sizeof(s3_cfg_id_t));
  
  rule->src = _src;
  rule->score = _score;
  rule->products = products;
  rule->len = len;

  /****************************************************************************
   * Add the new rule to the CFG
   */

  s3_arraylist_append(&_cfg->rules, rule);

  item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);

  if (len > 0)
    s3_arraylist_append(&item->rules, rule);
  else if (item->nil_rule == NULL || item->nil_rule->score < _score)
    item->nil_rule = rule;

  return rule;
}
Ejemplo n.º 11
0
void
s3_cfg_compile_rules(s3_cfg_t *_cfg, logmath_t *logmath)
{
  s3_cfg_item_t *item = NULL;
  s3_arraylist_t *arraylist = NULL;
  int i, n;

  assert(_cfg != NULL);

  arraylist = &_cfg->item_info;
  n = s3_arraylist_count(arraylist);
  for (i = n - 1; i >= 0; i--) {
    item = s3_arraylist_get(arraylist, i);
    if (!s3_cfg_is_terminal(item->id))
      compile_nonterm(_cfg, item, logmath);
  }

  _cfg->predictions = (int8 *)ckd_calloc(n, sizeof(int8));
}
Ejemplo n.º 12
0
s3_cfg_state_t *
s3_cfg_input_term(s3_cfg_t *_cfg, s3_cfg_state_t *_cur, s3_cfg_id_t _term)
{
  int index;
  s3_cfg_state_t *state = NULL;
  
  assert(_cfg != NULL);

  index = s3_cfg_id2index(_term);
  state = (s3_cfg_state_t *)s3_arraylist_get(&_cur->expansions, index);

  if (state == NULL)
    return NULL;

  if (state->num_expanded == -1)
    eval_state(_cfg, state);

  return state;
}
Ejemplo n.º 13
0
s3_cfg_state_t *
s3_cfg_create_parse(s3_cfg_t *_cfg)
{
  s3_cfg_state_t *state = NULL;
  s3_cfg_rule_t *rule = NULL;
  
  assert(_cfg != NULL);

  add_state(_cfg, NULL, S3_CFG_NIL_ITEM);
  
  /* to initialize the parser, we need to create the root state and add to 
   * it the starting entry using the pseudo start rule
   *
   *   0.0 $PSTART -> $START #EOR#
   */
  rule = s3_arraylist_get(&_cfg->rules, 0);
  add_entry(state, rule, 0, 0, rule->log_score, NULL, NULL);
  
  eval_state(_cfg, state);

  return state;
}
Ejemplo n.º 14
0
void
free_state(s3_cfg_state_t *_state)
{
  int i;
  s3_cfg_entry_t *entry = NULL;
  s3_cfg_state_t *parent = NULL;

  for (i = _state->entries.count - 1; i >= 0; i--) {
    entry = (s3_cfg_entry_t *)s3_arraylist_get(&_state->entries, i);
    free(entry);
  }

  parent = _state->back;
  i = s3_cfg_id2index(_state->input);

  s3_arraylist_close(&_state->entries);
  s3_arraylist_close(&_state->expansions);
  free(_state);

  if (parent != NULL) {
    parent->num_expanded--;
    s3_arraylist_set(&parent->expansions, i, NULL);
  }
}
Ejemplo n.º 15
0
static void
convert_cfg_rule(s3_cfg_t *_cfg,
		 s2_fsg_t *_fsg,
		 s3_cfg_rule_t *_rule,
		 int _src,
		 int _dest,
		 int *_expansions,
		 int _max_expansion)
{
  int index;
  int i, j, n;
  int cur, u, v;
  s3_cfg_id_t id;
  s3_cfg_item_t *item;
  s3_cfg_rule_t *rule;
  s2_fsg_trans_t *trans;

  cur = _src;

  /* Check whether the target rule has any variables that exceeded the
   * expansion count
   */
  for (i = 0; i < _rule->len; i++) {
    id = _rule->products[i];
    if (_expansions[s3_cfg_id2index(id)] > _max_expansion)
      return;
  }

  /* Iterate through the production variables. */
  for (i = 0; i < _rule->len; i++) {
    id = _rule->products[i];

    /* For each terminal:
     *   1.  Create a new state.
     *   2.  Add a single definite transition from the current state to the
     *       new state that emits the terminal.
     *   3.  Use the new state as the current state.
     */
    if (s3_cfg_is_terminal(id)) {
      if (id != S3_CFG_EOI_ITEM) {
	trans = (s2_fsg_trans_t*)ckd_calloc(1, sizeof(s2_fsg_trans_t));
	trans->from_state = cur;
	trans->to_state = _fsg->n_state;
	trans->prob = 1.0;
	trans->word = (char *)ckd_salloc(s3_cfg_id2str(_cfg, id));
	trans->next = _fsg->trans_list;
	_fsg->trans_list = trans;
	
	cur = _fsg->n_state++;
      }
    }

    /* For each non-terminal X:
     *   1.  Create a new destination state, v.
     *   2.  Increment expansion count for X.
     *   3.  For each (non-epsilon) expansion rule with X as source:
     *      a.  Create a new source state u
     *      b.  Convert the rule with u as src and v as dest.
     *      c.  Create a new epsilong transition from cur to u with the rule's
     *          expansion probability.
     *   4.  Set the current state to v.
     *   5.  Decrement expansion count for X.
     */
    else {
      index = s3_cfg_id2index(id);
      v = _fsg->n_state++;
      _expansions[index]++;
      item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);
      n = s3_arraylist_count(&item->rules);
      for (j = 0; j < n; j++) {
	rule = (s3_cfg_rule_t *)s3_arraylist_get(&item->rules, j);
	u = _fsg->n_state++;
	convert_cfg_rule(_cfg, _fsg, rule, u, v, _expansions, _max_expansion);
	trans = (s2_fsg_trans_t *)ckd_calloc(1, sizeof(s2_fsg_trans_t));
	trans->from_state = cur;
	trans->to_state = u;
	trans->prob = rule->prob_score;
	trans->word = NULL;
	trans->next = _fsg->trans_list;
	_fsg->trans_list = trans;
      }

      if (item->nil_rule != NULL) {
	trans = (s2_fsg_trans_t *)ckd_calloc(1, sizeof(s2_fsg_trans_t));
	trans->from_state = cur;
	trans->to_state = v;
	trans->prob = item->nil_rule->prob_score;
	trans->word = NULL;
	trans->next = _fsg->trans_list;
	_fsg->trans_list = trans;
      }
	
      cur = v;
      _expansions[index]--;
    }
  }

  /* Make one final transition from our last state to the destination state. */
  trans = (s2_fsg_trans_t*)ckd_calloc(1, sizeof(s2_fsg_trans_t));
  trans->from_state = cur;
  trans->to_state = _dest;
  trans->prob = 1;
  trans->word = NULL;
  trans->next = _fsg->trans_list;
  _fsg->trans_list = trans;
}
Ejemplo n.º 16
0
static void
eval_state(s3_cfg_t *_cfg, s3_cfg_state_t *_state)
{
  s3_cfg_rule_t *rule = NULL;
  s3_cfg_entry_t *entry = NULL;
  s3_cfg_entry_t *cmplt_entry = NULL;
  s3_cfg_state_t *target_state = NULL;
  s3_cfg_state_t *origin_state = NULL;
  s3_cfg_item_t *item = NULL;
  s3_cfg_id_t scan;
  s3_arraylist_t *arraylist = NULL;
  int8 *predictions = NULL;
  int32 score;
  int index;
  int dot;
  int i, j;

  assert(_cfg != NULL);
  assert(_state != NULL);

  if (_state->back != NULL) {
    _state->back->num_expanded++;
  }
  _state->num_expanded = 0;

  predictions = _cfg->predictions;
  memset(predictions, 0, _cfg->item_info.count * sizeof(int8));

  /* iterate thru the entries in the state and perform prediction, scan,
   * and completion steps */
  for (i = 0; i < _state->entries.count; i++) {
    entry = (s3_cfg_entry_t *)s3_arraylist_get(&_state->entries, i);
    rule = entry->rule;
    dot = entry->dot;
    origin_state = entry->origin;
    score = entry->score;
    
    scan = rule->products[dot];
    index = s3_cfg_id2index(scan);

    DEBUG_ENTRY(entry);

    item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index);

    /* saving some scores */
    if (_state->best_overall_entry == NULL ||
        score < _state->best_overall_entry->score)
      _state->best_overall_entry = entry;
    
    if (_state->best_overall_parse == NULL ||
        score < _state->best_overall_parse->score)
      _state->best_overall_parse = entry;

    if (s3_cfg_is_terminal(scan)) {
      /************************************************************************
       * NORMAL COMPLETION
       *
       * When we encounter an entry of the form
       *
       *   $X -> (A * #EOR#, s0, i),
       *
       * we look for any entry in state S(i) of the form
       *
       *   $Z -> (A * $X B #EOR#, s1, j)
       *
       * and add the entry
       *
       *   $Z -> (A $X * B #EOR#, s1 + s2, j)
       *
       * to the current state.  We also need to keep a record of which
       * subparses were used to complete entries.  In this case, we need to
       * remember that this particular completed entry of $X is used to
       * advance the parsing of $Z.  In this case, the pointer p1 is added to
       * the entry
       *
       *   $Z -> (A $X(p1) * B #EOR#, s1 + s2, j)
       *
       * for records keeping sake.
       */
      if (scan == S3_CFG_EOR_ITEM) {
        scan = entry->rule->src;
        arraylist = &entry->origin->entries;

        for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) {
          cmplt_entry = (s3_cfg_entry_t *)s3_arraylist_get(arraylist, j);

          if (cmplt_entry->rule->products[cmplt_entry->dot] == scan)
            add_entry(_state,
                      cmplt_entry->rule,
                      cmplt_entry->dot + 1,
                      cmplt_entry->origin,
                      cmplt_entry->score + entry->score,
                      cmplt_entry,
                      entry);
        }
      }
      /************************************************************************
       * PARSE COMPLETION
       *
       * We encountered an entry of the form
       *
       *   ($PSTART -> $START * #EOI#, s i).
       *
       * Instead of waiting for an input symbol #EOI# and completing the
       * pseudo-start rule in the next state, we finish the parse here and save
       * us a step.  We do need to check against other completed parses in this
       * state, since only the parse with the highest score is kept.
       */
      else if (scan == S3_CFG_EOI_ITEM) {
        if (_state->best_completed_entry == NULL ||
            score < _state->best_completed_entry->score)
          _state->best_completed_entry = entry;

        if (_state->best_completed_parse == NULL ||
            score < _state->best_completed_parse->score)
          _state->best_completed_parse = entry;

      }
      /************************************************************************
       * NORNAL SCANNING
       *
       * When we encounter an entry of the form
       *
       *   ($X -> A * y B #EOR#, s, i),
       *
       * and the input symbol/terminal is y, we add to the next state the entry
       *
       *   ($X -> A y * B #EOR#, s, i)
       */
      else {
        index = s3_cfg_id2index(scan);
        arraylist = &_state->expansions;
        target_state = (s3_cfg_state_t *)s3_arraylist_get(arraylist, index);
        if (target_state == NULL)
          target_state = add_state(_cfg, _state, scan);
        add_entry(target_state, rule, dot + 1, origin_state, score,
                  entry, NULL);
      }
    }
    else {
      /************************************************************************
       * AUTOMATIC COMPLETION OF EPSILON PRODUCING NON-TERMINALS
       *
       * When we encounter an entry of the form
       *
       *   ($X -> A * $Y B #EOR#, s0, i),
       *
       * we check whether $Y is a epsilon producing non-terminal, i.e.,
       * whether the rule
       * 
       *   $Y -> #EOR#
       * 
       * exists.  If that is the case, we do not add any entry corresponding to
       * such epsilon producing rule.  Instead, we take a short-cut by add the
       * following entry to the current state
       *   
       *   ($X -> A $Y(null) * B  #EOR#, s0 + s1, i).
       *
       * Note in this new entry, the completed non-terminal $Y has a NULL sub-
       * parse pointer.
       */
      if (item->nil_rule != NULL)
        add_entry(_state, rule, dot + 1, origin_state,
                  score + item->nil_rule->log_score, entry, NULL);

      /************************************************************************
       * NORMAL PREDICTION
       * 
       * When we encounter an entry of the form
       * 
       *   ($X -> A * $Y B #EOR#, s0, i),
       *
       * we want to expand the non-terminal $Y.  That is, we add an entry for
       * each rule that has $Y on its left-hand side.  However, we don't want
       * to keep repeated copies of the same entries, so we keep track of which
       * non-terminals we've already expanded in a table.
       */
      if (!predictions[index]) {
        predictions[index] = 1;
        arraylist = &item->rules;
        for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) {
          rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, j);
          if (rule->products[0] != S3_CFG_EOR_ITEM)
            add_entry(_state, rule, 0, _state, rule->log_score, NULL, NULL);
        }
      }
    }
  }
}