/* Attempt to convert a CFG to a FSG. No heuristic simplifcation is performed * here. The conversion will only take place if all expansion rule in the * CFG takes one of the following form * * X -> w0 w1 ... wN Y * X -> w0 w1 ... wN * X -> Y * X -> w0 * X -> nil * * where X, Y are non-terminals, and w0, ..., wN are terminals. If the * conversion is not possible, _fsg is set to NULL and the function returns -1. */ void s3_cfg_convert_to_fsg(s3_cfg_t *_cfg,s2_fsg_t **_fsg) { hash_table_t *item2state=NULL; int num_states=1; /* let 0 be the end state */ int start_state=0; int from_state; int to_state; int i=0,j=0; s3_cfg_id_t id; s3u_vector_t *items=NULL; s3u_vector_t *rules=NULL; s3_cfg_item_t *item=NULL; s3_cfg_rule_t *rule=NULL; s2_fsg_t *fsg=NULL; char *word; item2state=hash_new(S3_CFG_NAME_HASH_SIZE,HASH_CASE_YES); if (item2state==NULL) goto cleanup; fsg=(s2_fsg_t*)ckd_calloc(1,sizeof(s2_fsg_t)); fsg->name=NULL; fsg->trans_list=NULL; items=&_cfg->item_info; for (i=s3u_vec_count(items)-1;i>=0;i--) { if ((item=s3u_vec_get(items,i))==NULL) goto cleanup; rules=&item->rules; if (!s3_cfg_is_terminal(item->id) && (item->nil_rule!=NULL || (rules!=NULL && s3u_vec_count(rules)>0))) hash_enter_bkey(item2state,&item->id,sizeof(s3_cfg_id_t),num_states++); } /* iterate through the CFG's expansion rules and convert them to FSG * transitions. If at any point the conversion fails, do some cleanup * and return. */ rules=&_cfg->rules; for (i=s3u_vec_count(rules)-1;i>=0;i--) { if ((rule=s3u_vec_get(rules,i))==NULL) goto cleanup; hash_lookup_bkey(item2state,&rule->src,sizeof(s3_cfg_id_t),&from_state); /* a NULL production rule means we transition to the end state */ if (rule->len==0) add_trans(fsg,from_state,0,rule->prob_score,NULL); else if (rule->len==1) { id=rule->products[0]; /* a single terminal means we output the terminal and transition to * the end state */ if (s3_cfg_is_terminal(id)) { word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name; add_trans(fsg,from_state,0,rule->prob_score,word); } /* a single non-terminal means we take an epsilon transition */ else { hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state); add_trans(fsg,from_state,to_state,rule->prob_score,NULL); } } else { for (j=1;j<rule->len;j++) { /* get the output for the transition */ id=rule->products[j-1]; if (!s3_cfg_is_terminal(id)) goto cleanup; word=((s3_cfg_item_t*)s3u_vec_get(items,s3_cfg_id2index(id)))->name; /* get the target state for the transition */ id=rule->products[j]; if (s3_cfg_is_terminal(id)) to_state=num_states++; else hash_lookup_bkey(item2state,&id,sizeof(s3_cfg_id_t),&to_state); add_trans(fsg,from_state,to_state,j==1?rule->prob_score:1.0,word); from_state=to_state; } } } *_fsg=fsg; return 0; cleanup: if (fsg!=NULL) free_fsg(fsg); return -1; }
static void eval_state(s3_cfg_t *_cfg, s3_cfg_state_t *_state) { s3_cfg_rule_t *rule = NULL; s3_cfg_entry_t *entry = NULL; s3_cfg_entry_t *cmplt_entry = NULL; s3_cfg_state_t *target_state = NULL; s3_cfg_state_t *origin_state = NULL; s3_cfg_item_t *item = NULL; s3_cfg_id_t scan; s3_arraylist_t *arraylist = NULL; int8 *predictions = NULL; int32 score; int index; int dot; int i, j; assert(_cfg != NULL); assert(_state != NULL); if (_state->back != NULL) { _state->back->num_expanded++; } _state->num_expanded = 0; predictions = _cfg->predictions; memset(predictions, 0, _cfg->item_info.count * sizeof(int8)); /* iterate thru the entries in the state and perform prediction, scan, * and completion steps */ for (i = 0; i < _state->entries.count; i++) { entry = (s3_cfg_entry_t *)s3_arraylist_get(&_state->entries, i); rule = entry->rule; dot = entry->dot; origin_state = entry->origin; score = entry->score; scan = rule->products[dot]; index = s3_cfg_id2index(scan); DEBUG_ENTRY(entry); item = (s3_cfg_item_t *)s3_arraylist_get(&_cfg->item_info, index); /* saving some scores */ if (_state->best_overall_entry == NULL || score < _state->best_overall_entry->score) _state->best_overall_entry = entry; if (_state->best_overall_parse == NULL || score < _state->best_overall_parse->score) _state->best_overall_parse = entry; if (s3_cfg_is_terminal(scan)) { /************************************************************************ * NORMAL COMPLETION * * When we encounter an entry of the form * * $X -> (A * #EOR#, s0, i), * * we look for any entry in state S(i) of the form * * $Z -> (A * $X B #EOR#, s1, j) * * and add the entry * * $Z -> (A $X * B #EOR#, s1 + s2, j) * * to the current state. We also need to keep a record of which * subparses were used to complete entries. In this case, we need to * remember that this particular completed entry of $X is used to * advance the parsing of $Z. In this case, the pointer p1 is added to * the entry * * $Z -> (A $X(p1) * B #EOR#, s1 + s2, j) * * for records keeping sake. */ if (scan == S3_CFG_EOR_ITEM) { scan = entry->rule->src; arraylist = &entry->origin->entries; for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) { cmplt_entry = (s3_cfg_entry_t *)s3_arraylist_get(arraylist, j); if (cmplt_entry->rule->products[cmplt_entry->dot] == scan) add_entry(_state, cmplt_entry->rule, cmplt_entry->dot + 1, cmplt_entry->origin, cmplt_entry->score + entry->score, cmplt_entry, entry); } } /************************************************************************ * PARSE COMPLETION * * We encountered an entry of the form * * ($PSTART -> $START * #EOI#, s i). * * Instead of waiting for an input symbol #EOI# and completing the * pseudo-start rule in the next state, we finish the parse here and save * us a step. We do need to check against other completed parses in this * state, since only the parse with the highest score is kept. */ else if (scan == S3_CFG_EOI_ITEM) { if (_state->best_completed_entry == NULL || score < _state->best_completed_entry->score) _state->best_completed_entry = entry; if (_state->best_completed_parse == NULL || score < _state->best_completed_parse->score) _state->best_completed_parse = entry; } /************************************************************************ * NORNAL SCANNING * * When we encounter an entry of the form * * ($X -> A * y B #EOR#, s, i), * * and the input symbol/terminal is y, we add to the next state the entry * * ($X -> A y * B #EOR#, s, i) */ else { index = s3_cfg_id2index(scan); arraylist = &_state->expansions; target_state = (s3_cfg_state_t *)s3_arraylist_get(arraylist, index); if (target_state == NULL) target_state = add_state(_cfg, _state, scan); add_entry(target_state, rule, dot + 1, origin_state, score, entry, NULL); } } else { /************************************************************************ * AUTOMATIC COMPLETION OF EPSILON PRODUCING NON-TERMINALS * * When we encounter an entry of the form * * ($X -> A * $Y B #EOR#, s0, i), * * we check whether $Y is a epsilon producing non-terminal, i.e., * whether the rule * * $Y -> #EOR# * * exists. If that is the case, we do not add any entry corresponding to * such epsilon producing rule. Instead, we take a short-cut by add the * following entry to the current state * * ($X -> A $Y(null) * B #EOR#, s0 + s1, i). * * Note in this new entry, the completed non-terminal $Y has a NULL sub- * parse pointer. */ if (item->nil_rule != NULL) add_entry(_state, rule, dot + 1, origin_state, score + item->nil_rule->log_score, entry, NULL); /************************************************************************ * NORMAL PREDICTION * * When we encounter an entry of the form * * ($X -> A * $Y B #EOR#, s0, i), * * we want to expand the non-terminal $Y. That is, we add an entry for * each rule that has $Y on its left-hand side. However, we don't want * to keep repeated copies of the same entries, so we keep track of which * non-terminals we've already expanded in a table. */ if (!predictions[index]) { predictions[index] = 1; arraylist = &item->rules; for (j = s3_arraylist_count(arraylist) - 1; j >= 0; j--) { rule = (s3_cfg_rule_t *)s3_arraylist_get(arraylist, j); if (rule->products[0] != S3_CFG_EOR_ITEM) add_entry(_state, rule, 0, _state, rule->log_score, NULL, NULL); } } } } }