//========================================================================================================== //========================================================================================================== void Grammar::extract_symbols(string production_str, vector<Symbol>& symbols) { trim(production_str); // Must trim! o/w stringstream gets confused! regex production_re("(\\w+)\\s*->\\s*(.*)"); // Note: the RHS needs to parsed word by word smatch match_res; if(not regex_match(production_str, match_res, production_re)) throw string("Unknown production in line:\n" + production_str); Symbol N = symbol_str_map[match_res[1]]; if(not is_nonterminal(N)) throw string("Expected left hand side of a production to be a nonterminal but received:\n" + production_str); symbols.push_back(N); stringstream sts(match_res[2]); string symbol_str; while(!sts.eof()) { // Extract the symbol names sts >> symbol_str; symbols.push_back(symbol_str_map[symbol_str]); } if(symbols.size() < 2) throw string("Production must have at least two symbols"); // TODO: what about production of the empty string? }
//========================================================================================================== // Create grammar from definitions in input file. // Production are of the form: // N -> A B C ... //========================================================================================================== void Grammar::read_grammar_file(string grammar_file) { ifstream file = ifstream(grammar_file); if(!file.is_open()) throw string("File " + grammar_file + " not found"); //------------------------------------------------------------------------------------------------------ // For each production line extract the symbols and add a production //------------------------------------------------------------------------------------------------------ for(string line; getline(file, line);) { if(trim(line, "//").empty()) continue; // Ignore comments and empty lines vector<Symbol> symbols; string action_name; // If an action is specified, a number sign will serve to separate between the production and it int split_pos = line.find_first_of('#'); extract_symbols(line.substr(0, split_pos), symbols); if(split_pos != string::npos) extract_action(line.substr(split_pos + 1), action_name); productions.push_back(Production(symbols, action_name)); } //------------------------------------------------------------------------------------------------------ // First production nonterminal considered the start symbol. If it is not START, add a production: // START -> FIRST-NONTERMINAL //------------------------------------------------------------------------------------------------------ if(productions[0][0] == START) { if(productions[0].size() != 2) throw string("Expected production for START to have just one symbol on right hand side"); if(not is_nonterminal(productions[0][1])) throw string("Expected production for START to have a nonterminal on the right hand side"); } else { productions.push_front(Production({START, productions[0][0]}, "")); } } // read_grammar_file()
Grammar* load_grammar(FILE* file, Grammar* g) { enum States current_state = START; // Stato iniziale //enum States error = -1; Symbol s; Production* p = NULL; Errors error; error.size = 0; if(file != stdin) g->numprod = 0; // Inizializza la grammatica while ( !feof(file) ) { s = read_sym(file); switch (current_state) { case START: if (is_terminal(s) || is_nonterminal(s)) { current_state = LEFT; //p = &(g->productions[g->numprod++]); //p->left.length = 0; p = add_new_production(g); add_symbol(&p->left, s); //L'istruzione precedente corrisponde a p->left.word[p->left.length++] = s; } else if (is_prodsep(s)) { current_state = START; } else if (ispunct(s) || isgraph(s)) { current_state = LEFT; p = add_new_production(g); add_symbol(&p->left, s); error.type[error.size] = INVALID_SYMBOL; error.lines[error.size] = g->numprod; error.size++; } break; case LEFT: if (is_terminal(s) || is_nonterminal(s)) { current_state = LEFT; add_symbol(&p->left, s); } else if (is_prodsym(s)) { current_state = RIGHT; //ErrorManager(NO_NT, p); } else if (ispunct(s) || isgraph(s)) { current_state = LEFT; add_symbol(&p->left, s); error.type[error.size] = INVALID_SYMBOL; error.lines[error.size] = g->numprod; error.size++; } else if(is_prodsep(s) || s == EOF) { error.type[error.size] = NO_PRODSYM; error.lines[error.size] = g->numprod; error.size++; //ErrorManager(NO_PRODSYM_MAYBE, p, g->numprod); current_state = START; if (!CheckNonTerminal(p)) { error.type[error.size] = NO_NT; error.lines[error.size] = g->numprod; error.size++; } } else { error.type[error.size] = NO_PRODSYM; error.lines[error.size] = g->numprod; error.size++; current_state = RIGHT; } break; case RIGHT: if (is_terminal(s) || is_nonterminal(s)) { current_state = RIGHT; add_symbol(&p->right, s); } else if (is_prodsep(s) || s == EOF) { current_state = START; g->productions[g->numprod-1].left.word[g->productions[g->numprod-1].left.length] = '\0'; g->productions[g->numprod - 1].right.word[g->productions[g->numprod - 1].right.length] = '\0'; //ErrorManager(error, p,g->numprod); } else if (ispunct(s) || isgraph(s)) { current_state = RIGHT; add_symbol(&p->right, s); error.type[error.size] = INVALID_SYMBOL; error.lines[error.size] = g->numprod; error.size++; } break; } } if (error.size > 0) { DrawErrors(error, g); if (!CheckInitSymbol(g)) ErrorManager(NO_INITSYM, NULL, 0); g = NULL; } if (g) if (!CheckInitSymbol(g)) { ErrorManager(NO_INITSYM, NULL,0); g = NULL; } return g; }
Grammar* load_grammar(FILE* file, Grammar* g){ enum States {START,LEFT,RIGHT,ERROR}; /* START = Scansione di una nuova produzione [F] LEFT = Scansione della parte sinistra RIGHT = Scansione della parte destra [F] ERROR = Errore di scansione */ enum States current_state = START; // Stato iniziale Symbol s; Production* p; int contatore=0; while ( !feof(file)) { s = read_sym(file); if (feof(file)) break; switch(current_state){ case START: if (is_terminal(s) || is_nonterminal(s)||is_prodsym(s)){ current_state = LEFT; p = add_new_production(g); if (is_prodsym(s)){ current_state = RIGHT; p->error=4; add_symbol(&p->left,' '); } else add_symbol(&p->left,s); } else if (is_prodsep(s)){ current_state = START; } else { current_state = LEFT; add_symbol(&p->left,s); p->error=3; } break; case LEFT: if (is_terminal(s) || is_nonterminal(s)){ current_state = LEFT; add_symbol(&p->left,s); } else if (is_prodsym(s)){ current_state = RIGHT; } else{ if(is_prodsep(s)){ p->error=1; current_state=START; } else{ current_state = LEFT; add_symbol(&p->left,s); p->error=3; } } break; case RIGHT: if (is_terminal(s) || is_nonterminal(s)){ current_state = RIGHT; add_symbol(&p->right,s); } else if(is_prodsep(s)){ current_state = START; } else{ current_state = RIGHT; p->error=2; add_symbol(&p->right,s); } break; } } return g; }