void s3_cfg_print_rule(s3_cfg_t *_cfg, s3_cfg_rule_t *_rule, FILE *_out) { s3_cfg_item_t *item = NULL; int index, len, i; assert(_cfg != NULL); assert(_rule != NULL); index = s3_cfg_id2index(_rule->src); item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); fprintf(_out, "(%s -> ", item->name); for (i = 0, len = _rule->len; i < len; i++) { index = s3_cfg_id2index(_rule->products[i]); item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); fprintf(_out, "%s", item->name); if (i != len - 1) fprintf(_out, " "); } fprintf(_out, ", %.3f)", _rule->prob_score); }
s2_fsg_t * s3_cfg_convert_to_fsg(s3_cfg_t *_cfg, int _max_expansion) { s3_cfg_rule_t *rule; s2_fsg_t *fsg; int *expansions; int i, n; assert(_cfg != NULL); n = s3_arraylist_count(&_cfg->item_info); rule = s3_arraylist_get(&_cfg->rules, 0); expansions = (int *)ckd_calloc(n, sizeof(int)); fsg = (s2_fsg_t *)ckd_calloc(1, sizeof(s2_fsg_t)); fsg->name = NULL; fsg->n_state = 2; fsg->start_state = 0; fsg->final_state = 1; fsg->trans_list = NULL; for (i = 0; i < n; i++) expansions[i] = 0; convert_cfg_rule(_cfg, fsg, rule, 0, 1, expansions, _max_expansion); prune_states(fsg); return fsg; }
void s3_cfg_write_simple(s3_cfg_t *_cfg, const char *_fn) { FILE *file = NULL; s3_arraylist_t *rules = NULL; s3_cfg_rule_t *rule = NULL; int i, j, count; assert(_cfg != NULL); assert(_fn != NULL); if ((file = fopen(_fn, "w")) == NULL) E_FATAL("Failed to open output file for writing"); rules = &_cfg->rules; count = s3_arraylist_count(rules); for (i = 1; i < count; i++) { rule = (s3_cfg_rule_t *)s3_arraylist_get(rules, i); fprintf(file, "%f %s %d", rule->score, s3_cfg_id2str(_cfg, rule->src), rule->len); for (j = 0; j < rule->len; j++) fprintf(file, " %s", s3_cfg_id2str(_cfg, rule->products[j])); } fprintf(file, "\n"); fclose(file); }
const char * s3_cfg_id2str(s3_cfg_t *_cfg, s3_cfg_id_t _id) { assert(_cfg != NULL); return ((s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, s3_cfg_id2index(_id)))->name; }
void s3_cfg_close(s3_cfg_t *_cfg) { int i; s3_cfg_rule_t *rule = NULL; s3_cfg_item_t *item = NULL; for (i = _cfg->rules.count - 1; i >= 0; i--) { rule = (s3_cfg_rule_t *)s3_arraylist_get(&_cfg->rules, i); free(rule->products); free(rule); } for (i = _cfg->item_info.count - 1; i >= 0; i--) { item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, i); free(item->name); free(item); } if (_cfg->name2id != NULL) hash_table_free(_cfg->name2id); }
static void compile_nonterm(s3_cfg_t *_cfg, s3_cfg_item_t *_item, logmath_t *logmath) { int i, n; s3_arraylist_t *arraylist; float32 sum = 0; s3_cfg_rule_t *rule; assert(_cfg != NULL); assert(_item != NULL); /* calculate fake score sum */ arraylist = &_item->rules; n = s3_arraylist_count(arraylist); for (i = n - 1; i >= 0; i--) { rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, i); sum += rule->score; } if (_item->nil_rule != NULL) sum += _item->nil_rule->score; if (sum == 0) E_FATAL("CFG production rule scores cannot sum to 0\n"); /* calculate probability and log score */ for (i = n - 1; i >= 0; i--) { rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, i); rule->prob_score = rule->score / sum; rule->log_score = logs3(logmath, rule->prob_score); } if (_item->nil_rule != NULL) { _item->nil_rule->prob_score = _item->nil_rule->score / sum; _item->nil_rule->log_score = logs3(logmath, _item->nil_rule->prob_score); } }
void s3_cfg_print_entry(s3_cfg_t *_cfg, s3_cfg_entry_t *_entry, FILE *_out) { s3_cfg_item_t *item = NULL; s3_cfg_rule_t *rule = NULL; int index; int dot; int i; assert(_cfg != NULL); assert(_entry != NULL); rule = _entry->rule; dot = _entry->dot; index = s3_cfg_id2index(rule->src); item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); fprintf(_out, "(%s -> ", item->name); for (i = 0; i < rule->len; i++) { if (i == dot) fprintf(_out, "* "); index = s3_cfg_id2index(rule->products[i]); item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); fprintf(_out, "%s", item->name); fprintf(_out, " "); } if (dot == rule->len) fprintf(_out, "*, %d)", _entry->score); else fprintf(_out, ", %d)", _entry->score); }
void free_parse(s3_cfg_state_t *_parse) { int i; s3_cfg_state_t *scan = NULL; if (_parse->num_expanded > 0) { for (i = s3_arraylist_count(&_parse->expansions) - 1; i >= 0; i--) { scan = (s3_cfg_state_t *)s3_arraylist_get(&_parse->expansions, i); free_parse(scan); } } free_state(_parse); }
void s3_cfg_rescore(s3_cfg_t *_cfg, logmath_t *logmath) { int i; s3_arraylist_t *rules = NULL; s3_cfg_rule_t *rule = NULL; assert(_cfg != NULL); rules = &_cfg->rules; for (i = s3_arraylist_count(rules) - 1; i >= 0; i--) { rule = (s3_cfg_rule_t *)s3_arraylist_get(rules, i); rule->log_score = logs3(logmath, rule->prob_score); } }
s3_cfg_rule_t * s3_cfg_add_rule(s3_cfg_t *_cfg, s3_cfg_id_t _src, float32 _score, s3_cfg_id_t *_products) { s3_cfg_rule_t *rule = NULL; s3_cfg_id_t *products = NULL; s3_cfg_item_t *item = NULL; int len = 0; int index; assert(_cfg != NULL); assert(_products != NULL); /**************************************************************************** * Create a new rule */ index = s3_cfg_id2index(_src); for (len = 0; len < S3_CFG_MAX_ITEM_COUNT; len++) if (_products[len] == S3_CFG_EOR_ITEM) break; if (_products[len] != S3_CFG_EOR_ITEM) E_FATAL("CFG Production rule does not contain EOR item"); rule = (s3_cfg_rule_t *)ckd_calloc(1, sizeof(s3_cfg_rule_t)); products = (s3_cfg_id_t *)ckd_calloc(len + 1, sizeof(s3_cfg_id_t)); memcpy(products, _products, (len + 1) * sizeof(s3_cfg_id_t)); rule->src = _src; rule->score = _score; rule->products = products; rule->len = len; /**************************************************************************** * Add the new rule to the CFG */ s3_arraylist_append(&_cfg->rules, rule); item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); if (len > 0) s3_arraylist_append(&item->rules, rule); else if (item->nil_rule == NULL || item->nil_rule->score < _score) item->nil_rule = rule; return rule; }
void s3_cfg_compile_rules(s3_cfg_t *_cfg, logmath_t *logmath) { s3_cfg_item_t *item = NULL; s3_arraylist_t *arraylist = NULL; int i, n; assert(_cfg != NULL); arraylist = &_cfg->item_info; n = s3_arraylist_count(arraylist); for (i = n - 1; i >= 0; i--) { item = s3_arraylist_get(arraylist, i); if (!s3_cfg_is_terminal(item->id)) compile_nonterm(_cfg, item, logmath); } _cfg->predictions = (int8 *)ckd_calloc(n, sizeof(int8)); }
s3_cfg_state_t * s3_cfg_input_term(s3_cfg_t *_cfg, s3_cfg_state_t *_cur, s3_cfg_id_t _term) { int index; s3_cfg_state_t *state = NULL; assert(_cfg != NULL); index = s3_cfg_id2index(_term); state = (s3_cfg_state_t *)s3_arraylist_get(&_cur->expansions, index); if (state == NULL) return NULL; if (state->num_expanded == -1) eval_state(_cfg, state); return state; }
s3_cfg_state_t * s3_cfg_create_parse(s3_cfg_t *_cfg) { s3_cfg_state_t *state = NULL; s3_cfg_rule_t *rule = NULL; assert(_cfg != NULL); add_state(_cfg, NULL, S3_CFG_NIL_ITEM); /* to initialize the parser, we need to create the root state and add to * it the starting entry using the pseudo start rule * * 0.0 $PSTART -> $START #EOR# */ rule = s3_arraylist_get(&_cfg->rules, 0); add_entry(state, rule, 0, 0, rule->log_score, NULL, NULL); eval_state(_cfg, state); return state; }
void free_state(s3_cfg_state_t *_state) { int i; s3_cfg_entry_t *entry = NULL; s3_cfg_state_t *parent = NULL; for (i = _state->entries.count - 1; i >= 0; i--) { entry = (s3_cfg_entry_t *)s3_arraylist_get(&_state->entries, i); free(entry); } parent = _state->back; i = s3_cfg_id2index(_state->input); s3_arraylist_close(&_state->entries); s3_arraylist_close(&_state->expansions); free(_state); if (parent != NULL) { parent->num_expanded--; s3_arraylist_set(&parent->expansions, i, NULL); } }
static void convert_cfg_rule(s3_cfg_t *_cfg, s2_fsg_t *_fsg, s3_cfg_rule_t *_rule, int _src, int _dest, int *_expansions, int _max_expansion) { int index; int i, j, n; int cur, u, v; s3_cfg_id_t id; s3_cfg_item_t *item; s3_cfg_rule_t *rule; s2_fsg_trans_t *trans; cur = _src; /* Check whether the target rule has any variables that exceeded the * expansion count */ for (i = 0; i < _rule->len; i++) { id = _rule->products[i]; if (_expansions[s3_cfg_id2index(id)] > _max_expansion) return; } /* Iterate through the production variables. */ for (i = 0; i < _rule->len; i++) { id = _rule->products[i]; /* For each terminal: * 1. Create a new state. * 2. Add a single definite transition from the current state to the * new state that emits the terminal. * 3. Use the new state as the current state. */ if (s3_cfg_is_terminal(id)) { if (id != S3_CFG_EOI_ITEM) { trans = (s2_fsg_trans_t*)ckd_calloc(1, sizeof(s2_fsg_trans_t)); trans->from_state = cur; trans->to_state = _fsg->n_state; trans->prob = 1.0; trans->word = (char *)ckd_salloc(s3_cfg_id2str(_cfg, id)); trans->next = _fsg->trans_list; _fsg->trans_list = trans; cur = _fsg->n_state++; } } /* For each non-terminal X: * 1. Create a new destination state, v. * 2. Increment expansion count for X. * 3. For each (non-epsilon) expansion rule with X as source: * a. Create a new source state u * b. Convert the rule with u as src and v as dest. * c. Create a new epsilong transition from cur to u with the rule's * expansion probability. * 4. Set the current state to v. * 5. Decrement expansion count for X. */ else { index = s3_cfg_id2index(id); v = _fsg->n_state++; _expansions[index]++; item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); n = s3_arraylist_count(&item->rules); for (j = 0; j < n; j++) { rule = (s3_cfg_rule_t *)s3_arraylist_get(&item->rules, j); u = _fsg->n_state++; convert_cfg_rule(_cfg, _fsg, rule, u, v, _expansions, _max_expansion); trans = (s2_fsg_trans_t *)ckd_calloc(1, sizeof(s2_fsg_trans_t)); trans->from_state = cur; trans->to_state = u; trans->prob = rule->prob_score; trans->word = NULL; trans->next = _fsg->trans_list; _fsg->trans_list = trans; } if (item->nil_rule != NULL) { trans = (s2_fsg_trans_t *)ckd_calloc(1, sizeof(s2_fsg_trans_t)); trans->from_state = cur; trans->to_state = v; trans->prob = item->nil_rule->prob_score; trans->word = NULL; trans->next = _fsg->trans_list; _fsg->trans_list = trans; } cur = v; _expansions[index]--; } } /* Make one final transition from our last state to the destination state. */ trans = (s2_fsg_trans_t*)ckd_calloc(1, sizeof(s2_fsg_trans_t)); trans->from_state = cur; trans->to_state = _dest; trans->prob = 1; trans->word = NULL; trans->next = _fsg->trans_list; _fsg->trans_list = trans; }
static void eval_state(s3_cfg_t *_cfg, s3_cfg_state_t *_state) { s3_cfg_rule_t *rule = NULL; s3_cfg_entry_t *entry = NULL; s3_cfg_entry_t *cmplt_entry = NULL; s3_cfg_state_t *target_state = NULL; s3_cfg_state_t *origin_state = NULL; s3_cfg_item_t *item = NULL; s3_cfg_id_t scan; s3_arraylist_t *arraylist = NULL; int8 *predictions = NULL; int32 score; int index; int dot; int i, j; assert(_cfg != NULL); assert(_state != NULL); if (_state->back != NULL) { _state->back->num_expanded++; } _state->num_expanded = 0; predictions = _cfg->predictions; memset(predictions, 0, _cfg->item_info.count * sizeof(int8)); /* iterate thru the entries in the state and perform prediction, scan, * and completion steps */ for (i = 0; i < _state->entries.count; i++) { entry = (s3_cfg_entry_t *)s3_arraylist_get(&_state->entries, i); rule = entry->rule; dot = entry->dot; origin_state = entry->origin; score = entry->score; scan = rule->products[dot]; index = s3_cfg_id2index(scan); DEBUG_ENTRY(entry); item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); /* saving some scores */ if (_state->best_overall_entry == NULL || score < _state->best_overall_entry->score) _state->best_overall_entry = entry; if (_state->best_overall_parse == NULL || score < _state->best_overall_parse->score) _state->best_overall_parse = entry; if (s3_cfg_is_terminal(scan)) { /************************************************************************ * NORMAL COMPLETION * * When we encounter an entry of the form * * $X -> (A * #EOR#, s0, i), * * we look for any entry in state S(i) of the form * * $Z -> (A * $X B #EOR#, s1, j) * * and add the entry * * $Z -> (A $X * B #EOR#, s1 + s2, j) * * to the current state. We also need to keep a record of which * subparses were used to complete entries. In this case, we need to * remember that this particular completed entry of $X is used to * advance the parsing of $Z. In this case, the pointer p1 is added to * the entry * * $Z -> (A $X(p1) * B #EOR#, s1 + s2, j) * * for records keeping sake. */ if (scan == S3_CFG_EOR_ITEM) { scan = entry->rule->src; arraylist = &entry->origin->entries; for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) { cmplt_entry = (s3_cfg_entry_t *)s3_arraylist_get(arraylist, j); if (cmplt_entry->rule->products[cmplt_entry->dot] == scan) add_entry(_state, cmplt_entry->rule, cmplt_entry->dot + 1, cmplt_entry->origin, cmplt_entry->score + entry->score, cmplt_entry, entry); } } /************************************************************************ * PARSE COMPLETION * * We encountered an entry of the form * * ($PSTART -> $START * #EOI#, s i). * * Instead of waiting for an input symbol #EOI# and completing the * pseudo-start rule in the next state, we finish the parse here and save * us a step. We do need to check against other completed parses in this * state, since only the parse with the highest score is kept. */ else if (scan == S3_CFG_EOI_ITEM) { if (_state->best_completed_entry == NULL || score < _state->best_completed_entry->score) _state->best_completed_entry = entry; if (_state->best_completed_parse == NULL || score < _state->best_completed_parse->score) _state->best_completed_parse = entry; } /************************************************************************ * NORNAL SCANNING * * When we encounter an entry of the form * * ($X -> A * y B #EOR#, s, i), * * and the input symbol/terminal is y, we add to the next state the entry * * ($X -> A y * B #EOR#, s, i) */ else { index = s3_cfg_id2index(scan); arraylist = &_state->expansions; target_state = (s3_cfg_state_t *)s3_arraylist_get(arraylist, index); if (target_state == NULL) target_state = add_state(_cfg, _state, scan); add_entry(target_state, rule, dot + 1, origin_state, score, entry, NULL); } } else { /************************************************************************ * AUTOMATIC COMPLETION OF EPSILON PRODUCING NON-TERMINALS * * When we encounter an entry of the form * * ($X -> A * $Y B #EOR#, s0, i), * * we check whether $Y is a epsilon producing non-terminal, i.e., * whether the rule * * $Y -> #EOR# * * exists. If that is the case, we do not add any entry corresponding to * such epsilon producing rule. Instead, we take a short-cut by add the * following entry to the current state * * ($X -> A $Y(null) * B #EOR#, s0 + s1, i). * * Note in this new entry, the completed non-terminal $Y has a NULL sub- * parse pointer. */ if (item->nil_rule != NULL) add_entry(_state, rule, dot + 1, origin_state, score + item->nil_rule->log_score, entry, NULL); /************************************************************************ * NORMAL PREDICTION * * When we encounter an entry of the form * * ($X -> A * $Y B #EOR#, s0, i), * * we want to expand the non-terminal $Y. That is, we add an entry for * each rule that has $Y on its left-hand side. However, we don't want * to keep repeated copies of the same entries, so we keep track of which * non-terminals we've already expanded in a table. */ if (!predictions[index]) { predictions[index] = 1; arraylist = &item->rules; for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) { rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, j); if (rule->products[0] != S3_CFG_EOR_ITEM) add_entry(_state, rule, 0, _state, rule->log_score, NULL, NULL); } } } } }