HParserCacheValue* recall(HParserCacheKey *k, HParseState *state) { HParserCacheValue *cached = h_hashtable_get(state->cache, k); HRecursionHead *head = h_hashtable_get(state->recursion_heads, &k->input_pos); if (!head) { // No heads found return cached; } else { // Some heads found if (!cached && head->head_parser != k->parser && !h_slist_find(head->involved_set, k->parser)) { // Nothing in the cache, and the key parser is not involved cached = cached_result(state, NULL); cached->input_stream = k->input_pos; } if (h_slist_find(head->eval_set, k->parser)) { // Something is in the cache, and the key parser is in the eval set. Remove the key parser from the eval set of the head. head->eval_set = h_slist_remove_all(head->eval_set, k->parser); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); // update the cache if (!cached) { cached = cached_result(state, tmp_res); h_hashtable_put(state->cache, k, cached); } else { cached->value_type = PC_RIGHT; cached->right = tmp_res; cached->input_stream = state->input_stream; } } return cached; } }
// no-op on terminal symbols static void transform_productions(const HLRTable *table, HLREnhGrammar *eg, size_t x, HCFChoice *xAy) { if(xAy->type != HCF_CHOICE) return; // XXX CHARSET? HArena *arena = eg->arena; HCFSequence **seq = h_arena_malloc(arena, seqsize(xAy->seq) * sizeof(HCFSequence *)); HCFSequence **p, **q; for(p=xAy->seq, q=seq; *p; p++, q++) { // trace rhs starting in state x and following the transitions // xAy -> ... iBj ... size_t i = x; HCFChoice **B = (*p)->items; HCFChoice **items = h_arena_malloc(arena, seqsize(B) * sizeof(HCFChoice *)); HCFChoice **iBj = items; for(; *B; B++, iBj++) { size_t j = follow_transition(table, i, *B); HLRTransition *i_B_j = transition(arena, i, *B, j); *iBj = h_hashtable_get(eg->tmap, i_B_j); assert(*iBj != NULL); i = j; } *iBj = NULL; *q = h_arena_malloc(arena, sizeof(HCFSequence)); (*q)->items = items; } *q = NULL; xAy->seq = seq; }
void* h_symbol_get(HParseState *state, const char* key) { if (state->symbol_table) { HHashTable *head = h_slist_top(state->symbol_table); if (head) { return h_hashtable_get(head, key); } } return NULL; }
static HLRAction * lrtable_lookup(const HLRTable *table, size_t state, const HCFChoice *symbol) { switch(symbol->type) { case HCF_END: return table->tmap[state]->end_branch; case HCF_CHAR: return h_stringmap_get(table->tmap[state], &symbol->chr, 1, false); default: // nonterminal case return h_hashtable_get(table->ntmap[state], symbol); } }
void h_hashtable_merge(void *(*combine)(void *v1, const void *v2), HHashTable *dst, const HHashTable *src) { size_t i; HHashTableEntry *hte; for(i=0; i < src->capacity; i++) { for(hte = &src->contents[i]; hte; hte = hte->next) { if(hte->key == NULL) continue; void *dstvalue = h_hashtable_get(dst, hte->key); void *srcvalue = hte->value; h_hashtable_put(dst, hte->key, combine(dstvalue, srcvalue)); } } }
// check whether a sequence of enhanced-grammar symbols (p) matches the given // (original-grammar) production rhs and terminates in the given end state. static bool match_production(HLREnhGrammar *eg, HCFChoice **p, HCFChoice **rhs, size_t endstate) { size_t state = endstate; // initialized to end in case of empty rhs for(; *p && *rhs; p++, rhs++) { HLRTransition *t = h_hashtable_get(eg->smap, *p); assert(t != NULL); if(!h_eq_symbol(t->symbol, *rhs)) return false; state = t->to; } return (*p == *rhs // both NULL && state == endstate); }
static HCFChoice *new_enhanced_symbol(HLREnhGrammar *eg, const HCFChoice *sym) { HArena *arena = eg->arena; HCFChoice *esym = h_arena_malloc(arena, sizeof(HCFChoice)); *esym = *sym; HHashSet *cs = h_hashtable_get(eg->corr, sym); if (!cs) { cs = h_hashset_new(arena, h_eq_symbol, h_hash_symbol); h_hashtable_put(eg->corr, sym, cs); } h_hashset_put(cs, esym); return esym; }
HParseResult* grow(HParserCacheKey *k, HParseState *state, HRecursionHead *head) { // Store the head into the recursion_heads h_hashtable_put(state->recursion_heads, &k->input_pos, head); HParserCacheValue *old_cached = h_hashtable_get(state->cache, k); if (!old_cached || PC_LEFT == old_cached->value_type) h_platform_errx(1, "impossible match"); HParseResult *old_res = old_cached->right; // rewind the input state->input_stream = k->input_pos; // reset the eval_set of the head of the recursion at each beginning of growth head->eval_set = h_slist_copy(head->involved_set); HParseResult *tmp_res = perform_lowlevel_parse(state, k->parser); if (tmp_res) { if (pos_lt(old_cached->input_stream, state->input_stream)) { h_hashtable_put(state->cache, k, cached_result(state, tmp_res)); return grow(k, state, head); } else { // we're done with growing, we can remove data from the recursion head h_hashtable_del(state->recursion_heads, &k->input_pos); HParserCacheValue *cached = h_hashtable_get(state->cache, k); if (cached && PC_RIGHT == cached->value_type) { state->input_stream = cached->input_stream; return cached->right; } else { h_platform_errx(1, "impossible match"); } } } else { h_hashtable_del(state->recursion_heads, &k->input_pos); state->input_stream = old_cached->input_stream; return old_res; } }
// for each lookahead symbol (fs), put action into tmap // returns 0 on success, -1 on conflict // ignores forall entries static int terminals_put(HStringMap *tmap, const HStringMap *fs, HLRAction *action) { int ret = 0; if (fs->epsilon_branch) { HLRAction *prev = tmap->epsilon_branch; if (prev && prev != action) { // conflict tmap->epsilon_branch = h_lr_conflict(tmap->arena, prev, action); ret = -1; } else { tmap->epsilon_branch = action; } } if (fs->end_branch) { HLRAction *prev = tmap->end_branch; if (prev && prev != action) { // conflict tmap->end_branch = h_lr_conflict(tmap->arena, prev, action); ret = -1; } else { tmap->end_branch = action; } } H_FOREACH(fs->char_branches, void *key, HStringMap *fs_) HStringMap *tmap_ = h_hashtable_get(tmap->char_branches, key); if (!tmap_) { tmap_ = h_stringmap_new(tmap->arena); h_hashtable_put(tmap->char_branches, key, tmap_); } if (terminals_put(tmap_, fs_, action) < 0) { ret = -1; } H_END_FOREACH return ret; }
/* Warth's recursion. Hi Alessandro! */ HParseResult* h_do_parse(const HParser* parser, HParseState *state) { HParserCacheKey *key = a_new(HParserCacheKey, 1); key->input_pos = state->input_stream; key->parser = parser; HParserCacheValue *m = recall(key, state); // check to see if there is already a result for this object... if (!m) { // It doesn't exist, so create a dummy result to cache HLeftRec *base = a_new(HLeftRec, 1); base->seed = NULL; base->rule = parser; base->head = NULL; h_slist_push(state->lr_stack, base); // cache it h_hashtable_put(state->cache, key, cached_lr(state, base)); // parse the input HParseResult *tmp_res = perform_lowlevel_parse(state, parser); // the base variable has passed equality tests with the cache h_slist_pop(state->lr_stack); // update the cached value to our new position HParserCacheValue *cached = h_hashtable_get(state->cache, key); assert(cached != NULL); cached->input_stream = state->input_stream; // setupLR, used below, mutates the LR to have a head if appropriate, so we check to see if we have one if (NULL == base->head) { h_hashtable_put(state->cache, key, cached_result(state, tmp_res)); return tmp_res; } else { base->seed = tmp_res; HParseResult *res = lr_answer(key, state, base); return res; } } else { // it exists! state->input_stream = m->input_stream; if (PC_LEFT == m->value_type) { setupLR(parser, state, m->left); return m->left->seed; } else { return m->right; } } }
int h_lalr_compile(HAllocator* mm__, HParser* parser, const void* params) { // generate (augmented) CFG from parser // construct LR(0) DFA // build LR(0) table // if necessary, resolve conflicts "by conversion to SLR" if (!parser->vtable->isValidCF(parser->env)) { return -1; } HCFGrammar *g = h_cfgrammar_(mm__, h_desugar_augmented(mm__, parser)); if(g == NULL) // backend not suitable (language not context-free) return -1; HLRDFA *dfa = h_lr0_dfa(g); if (dfa == NULL) { // this should normally not happen h_cfgrammar_free(g); return -1; } HLRTable *table = h_lr0_table(g, dfa); if (table == NULL) { // this should normally not happen h_cfgrammar_free(g); return -1; } if(has_conflicts(table)) { HArena *arena = table->arena; HLREnhGrammar *eg = enhance_grammar(g, dfa, table); if(eg == NULL) { // this should normally not happen h_cfgrammar_free(g); h_lrtable_free(table); return -1; } // go through the inadequate states; replace inadeq with a new list HSlist *inadeq = table->inadeq; table->inadeq = h_slist_new(arena); for(HSlistNode *x=inadeq->head; x; x=x->next) { size_t state = (uintptr_t)x->elem; bool inadeq = false; // clear old forall entry, it's being replaced by more fine-grained ones table->forall[state] = NULL; // go through each reducible item of state H_FOREACH_KEY(dfa->states[state], HLRItem *item) if(item->mark < item->len) continue; // action to place in the table cells indicated by lookahead HLRAction *action = h_reduce_action(arena, item); // find all LR(0)-enhanced productions matching item HHashSet *lhss = h_hashtable_get(eg->corr, item->lhs); assert(lhss != NULL); H_FOREACH_KEY(lhss, HCFChoice *lhs) assert(lhs->type == HCF_CHOICE); // XXX could be CHARSET? for(HCFSequence **p=lhs->seq; *p; p++) { HCFChoice **rhs = (*p)->items; if(!match_production(eg, rhs, item->rhs, state)) { continue; } // the left-hand symbol's follow set is this production's // contribution to the lookahead const HStringMap *fs = h_follow(1, eg->grammar, lhs); assert(fs != NULL); assert(fs->epsilon_branch == NULL); assert(!h_stringmap_empty(fs)); // for each lookahead symbol, put action into table cell if(terminals_put(table->tmap[state], fs, action) < 0) inadeq = true; } H_END_FOREACH // enhanced production H_END_FOREACH // reducible item if(inadeq) { h_slist_push(table->inadeq, (void *)(uintptr_t)state); } } } h_cfgrammar_free(g); parser->backend_data = table; return has_conflicts(table)? -1 : 0; }