void analyser_environment_t::add_a_terminal_to_lookahead_set( node_t * const node, lookahead_set_t * const lookahead_set, unsigned int * const prev_level_walk_count, size_t const cur_level, size_t const max_level, std::list<recur_lookahead_t> &recur_parent_stack, std::list<last_symbol_t> &last_symbol_stack, unsigned int const curr_last_symbol_level) const { lookahead_set_t * const new_lookahead_set = insert_lookahead(lookahead_set, node); assert(new_lookahead_set != 0); #if defined(TRACING_LOOKAHEAD) log(L"%c\n", L'*'); #endif assert(1 == node->next_nodes().size()); if (cur_level < (max_level - 1)) { node_t * const next_node = node->next_nodes().front(); #if 0 wchar_t *str; if (0 == next_node->name().size()) { str = form_rule_end_node_name(next_node, false); } else { str = const_cast<wchar_t *>(next_node->name().c_str()); } log(L"<INFO>: compute lookahead %d from node %s[%d]\n", cur_level + 1, str, next_node->overall_idx()); if (0 == next_node->name().size()) { fmtstr_delete(str); } #endif // search more lookahead from here. compute_lookahead_set(next_node, new_lookahead_set, prev_level_walk_count, cur_level + 1, max_level, recur_parent_stack, last_symbol_stack, curr_last_symbol_level + 1); } }
void * library_load( char const * const filename) { #if LINUX return ::dlopen(filename, RTLD_LAZY); #elif WIN32 wchar_t * const filename_w = fmtstr_mbstowcs(filename, NULL); HMODULE module = LoadLibrary(filename_w); assert(module != NULL); fmtstr_delete(static_cast<void *>(filename_w)); return module; #else #error "Platform doesn't support dlopen and we have no implementation." #endif }
int mkdir( char const *pathname, mode_t mode) { assert(pathname != 0); wchar_t * const pathname_w = fmtstr_mbstowcs(pathname, NULL); assert(pathname_w != NULL); int const result = CreateDirectory(pathname_w, NULL); fmtstr_delete(static_cast<void *>(pathname_w)); /* mkdir returns 0 for success, opposite of * CreateDirectory(). */ return ((result != 0) ? 0 : -1); }
void analyser_environment_t::compute_lookahead_terminal_for_node( node_t * const node, size_t const needed_depth) const { std::list<recur_lookahead_t> recur_parent_stack; std::list<last_symbol_t> last_symbol_stack; wchar_t *str; if (0 == node->name().size()) { str = form_rule_end_node_name(node, false); } else { str = const_cast<wchar_t *>(node->name().c_str()); } log(L"<INFO>: compute lookahead %d from node %s[%d]\n", needed_depth, str, node->overall_idx()); if (0 == node->name().size()) { fmtstr_delete(str); } compute_lookahead_set(node, &(node->lookahead_set()), 0, 0, needed_depth, recur_parent_stack, last_symbol_stack, 0); }
bool analyser_environment_t::parse_command_line(int argc, char **argv) { int i; bool correct = true; for (i = 1; i < argc; ++i) { wchar_t * const tmp = fmtstr_mbstowcs(argv[i], 0); assert(tmp != 0); boost::shared_ptr<wchar_t> parm_ptr(tmp, fmtstr_delete); if ((0 == wcscmp(L"-h", parm_ptr.get())) || (0 == wcscmp(L"--help", parm_ptr.get()))) { fwprintf(stderr, L"%s\n", USAGE_MESSAGE); } #if defined(_DEBUG) else if (0 == wcscmp(L"-cmp_ans", parm_ptr.get())) { m_cmp_ans = true; } #endif else if (0 == wcscmp(L"-o", parm_ptr.get())) { wchar_t * const tmp = fmtstr_mbstowcs(argv[++i], 0); assert(tmp != 0); parm_ptr.reset(tmp, fmtstr_delete); wchar_t * const filename = fmtstr_new(L"%s%d", parm_ptr.get(), m_curr_output_filename_idx); mp_output_file = new std::wfstream(filename, std::ios_base::out | std::ios_base::binary); fmtstr_delete(filename); if (false == mp_output_file->is_open()) { fwprintf(stderr, L"Can not open output file: %s\n", argv[i]); return false; } m_output_filename = parm_ptr.get(); } else { if (L'-' == *parm_ptr) { fprintf(stderr, "unknown option: %s\n", parm_ptr.get()); correct = false; } else { // The only possible now is source file, // then try to find one. m_grammar_file_name = parm_ptr.get(); m_grammar_file.open(parm_ptr.get(), std::ios_base::in | std::ios_base::binary); if (false == m_grammar_file.is_open()) { fwprintf(stderr, L"Can not open grammar file: %s\n", argv[i]); return false; } } } } if (false == correct) { return false; } if (false == m_grammar_file.is_open()) { fprintf(stderr, "You must specify a grammar file.\n"); return false; } return true; }
/// @brief Compute lookahead terminals. /// /// Compute lookahead terminals from the @p node , and put /// them into the @p lookahead_set . /// /// This function utilize a cache facility. In the following /// situation: /// /// A /// B c /// B /// /// If I need to collect @e x (or any number) lookahead /// terminal from the node A, and after tracing node B in /// the first alternative, I have collected @e x (or any /// number) lookahead terminal, then I don't need to trace /// node B in the second alternative. /// /// The way I achieve this facility is to pass a local /// variable (initial to 0) of the function /// compute_lookahead_set() whose first argument @p node is /// the node B in the first alternative above to the called /// compute_lookahead_set() for the nonterminal rule node of /// the mentioned node B. If the compute_lookahead_set() /// function calls chain tracing after the node B in the /// first alternative (i.e. to the node c above), then I /// will set the local variable to 1. Before to trace into /// the node B in the second alternative, I will check the /// value of the local variable specified above, if its /// value is 1, then I still need to trace into it, besides, /// I will just return. /// /// @param node /// @param lookahead_set /// @param cur_level /// @param max_level /// @param recur_parent_stack /// @param last_symbol_stack /// @param curr_last_symbol_level /// void analyser_environment_t::compute_lookahead_set( node_t * const node, lookahead_set_t * const lookahead_set, unsigned int * const prev_level_walk_count, size_t const cur_level, size_t const max_level, std::list<recur_lookahead_t> &recur_parent_stack, std::list<last_symbol_t> &last_symbol_stack, unsigned int const curr_last_symbol_level) const { assert(max_level >= 1); assert(cur_level < max_level); #if defined(TRACING_LOOKAHEAD) wchar_t *str; if (0 == node->name().size()) { str = form_rule_end_node_name(node, false); } else { str = const_cast<wchar_t *>(node->name().c_str()); } log(L"<INFO>: trace to %s[%d]\n", str, node->overall_idx()); if (0 == node->name().size()) { fmtstr_delete(str); } #endif if (true == node->name().empty()) { // ======================================= // This is a rule end node // ======================================= assert(0 == node->next_nodes().size()); if (recur_parent_stack.size() != 0) { // ================================================= // A rule end node which is come from another rule // ================================================= // I come from this rule from another rule, // thus I can go back to the original node to trace more // lookahead terminals. // node_t * const return_node = recur_parent_stack.back().node(); unsigned int * const return_node_prev_level_walk_count = recur_parent_stack.back().prev_level_walk_count(); recur_parent_stack.pop_back(); // If I trace back to the upper layer, I have to set // the walk_count of this layer to 0. // // Ex: // // A->B->C // / ^ // / \_____ // / \ // B ->D->F---->o // \ / // ->D->F->G/ // // After tracing the first alternative of rule B // (i.e. B->D->F->o), I go back to node B in // A->B->C. The walk count of B->D->F->o is 2. If I // don't set the walk count of it from 2 to 0, then if // I go back to rule B and want to trace the second // alternative (i.e. B->D->F->G->o), I will find that // I can skip to trace it (but this is wrong, I have // to trace it) because of the first 2 symbols are // equal in the first and second alternatives and the // walk count is 2. // if (prev_level_walk_count != 0) { (*prev_level_walk_count) = 0; } assert(return_node->name().size() != 0); assert(false == return_node->is_terminal()); assert(false == return_node->is_rule_head()); assert(1 == return_node->next_nodes().size()); #if defined(TRACING_LOOKAHEAD) log(L"<INFO>: trace back to %s[%d]\n", return_node->name().c_str(), return_node->overall_idx()); #endif compute_lookahead_set(return_node->next_nodes().front(), lookahead_set, return_node_prev_level_walk_count, cur_level, max_level, recur_parent_stack, last_symbol_stack, curr_last_symbol_level); // return from 'compute_lookahead_set' // // Ex: // // A->B->C // / ^ // / \__ // / \ // B ->D->F->o // \ / // ->E->G/ // // I am at 'o', just return from node 'B'. // thus I should add node 'B' to the back of the // 'recur_parent_stack' to indicate that I can go to // node 'B' when I tracing other paths of rule 'B'. // recur_parent_stack.push_back( recur_lookahead_t(return_node, return_node_prev_level_walk_count)); } else { // ======================================= // rule end node for the initial rule // ======================================= // I come to here without going through any previous rule, // so that I have to continue tracing along with // 'refer_to_me_nodes()'. // if (0 == node->rule_node()->refer_to_me_nodes().size()) { // ======================================= // EOF rule end node // ======================================= // This is a rule end node, and I can not find any // other node refer to this rule node, thus, I // should see an EOF here. // // In lookahead symbols, I use 'rule end nodes' as a // sign which represents EOF symbol. // assert(true == node->is_eof()); insert_lookahead(lookahead_set, node); #if defined(TRACING_LOOKAHEAD) log(L"%c\n", L'*'); #endif // Because I should see EOF here, thus I should not // find more lookahead terminals from here. // } else { // ======================================= // rule end node has 'refer_to_me' // ======================================= // Indicate that I am trying to find what terminals // can follow 'node->rule_node()' // last_symbol_stack.push_back(last_symbol_t(node->rule_node(), curr_last_symbol_level)); for (std::list<node_t *>::const_iterator iter = node->rule_node()->refer_to_me_nodes().begin(); iter != node->rule_node()->refer_to_me_nodes().end(); ++iter) { assert((*iter)->name().size() != 0); assert(false == (*iter)->is_terminal()); assert(false == (*iter)->is_rule_head()); assert(1 == (*iter)->next_nodes().size()); if (((*iter)->rule_node() == node->rule_node()) && ((*iter)->next_nodes().front() == node->rule_node()->rule_end_node())) { // If I encounter a node such that it is the // last node of the form: // // A -> a A // // then I will skip tracing that node to look // for more lookahead terminals. // // Because it is a right recursion, I can not // find any more lookahead terminals from the // right recursion. // // Thus, in this case, I don't need to trace // further and the only possible token I shall // see here is an EOF symbol. // // In lookahead symbols, I use 'rule end nodes' // as a sign which represents EOF symbol. // if (true == node->is_eof()) { insert_lookahead(lookahead_set, node->rule_node()->rule_end_node()); } continue; } if (((*iter)->next_nodes().front() == (*iter)->rule_node()->rule_end_node()) && (true == find_last_symbol_stack(last_symbol_stack, (*iter)->rule_node(), curr_last_symbol_level))) { //=========================================== // why I need last_symbol_stack here? //=========================================== // Consider the following grammar: // --- // S // a A // // A // c // f // // E // C // a F // // C // a S // // F // E // V // // V // e // --- // If we want to find 2-lookahead symbols for // the 'a' alternative of the S rule, it will // be {a,c; a,f}. // // However, if we want to find 3-lookahead for // this 'a', the searching sequence will be: // // S -> a -> A -> c -> C -> E -> F -> E -> F -> // E -> F -> ... // // This is an infinite loop ! // // Indeed, if we start from 'C': // // C -> a -> S -> a -> A -> {c,f} // // The terminals which we can trace from 'S' are // just {a,c; a,f}. There are no the third // terminal we can trace from 'S'. // // Thus, we have to remember every nonterminals // when using the "refer_to_me" // relationships. If we find that the oncoming // nonterminal has already been traced, then we // will stop the recursive searching and just // return. // // This is a 'X_end' node, and if I need to find // more lookahead terminals from the 'X_end', // this means that I must find what terminals // can follow X. // // If 'last_symbol_stack' has 'X', this means I // have already try to find what terminals can // follow 'X'. // // Thus, in this case, I don't need to trace // further and the only possible token I shall // see here is an EOF symbol. // // In lookahead symbols, I use 'rule end nodes' // as a sign which represents EOF symbol. // if (true == node->is_eof()) { insert_lookahead(lookahead_set, node->rule_node()->rule_end_node()); } continue; } else { #if defined(TRACING_LOOKAHEAD) log(L"<INFO>: [N]trace further to %s[%d]\n", (*iter)->name().c_str(), (*iter)->overall_idx()); #endif compute_lookahead_set((*iter)->next_nodes().front(), lookahead_set, 0, cur_level, max_level, recur_parent_stack, last_symbol_stack, curr_last_symbol_level); } } assert(last_symbol_stack.size() >= 1); assert(last_symbol_stack.back().mp_node == node->rule_node()); last_symbol_stack.pop_back(); } } } else if (true == node->is_terminal()) { // ======================================= // This is a terminal node // ======================================= if (prev_level_walk_count != 0) { if (node->distance_from_rule_head() > (*prev_level_walk_count)) { (*prev_level_walk_count) = node->distance_from_rule_head(); } } add_a_terminal_to_lookahead_set( node, lookahead_set, prev_level_walk_count, cur_level, max_level, recur_parent_stack, last_symbol_stack, curr_last_symbol_level); } else { // ====================================== // nonterminal node // ====================================== assert(false == node->is_terminal()); if (prev_level_walk_count != 0) { if (node->distance_from_rule_head() > (*prev_level_walk_count)) { (*prev_level_walk_count) = node->distance_from_rule_head(); } } if (false == node->is_rule_head()) { // ================================================ // Normal non-terminal node, NOT a rule node // ================================================ assert(node->nonterminal_rule_node() != 0); // I design a 'treat_as_terminal' feature to avoid // to trace lookahead symbols depthly. However, I // found that this feature is not good enough to // solve all such problems and has significant // restriction. // // Ex: // // "S" [as_terminal:A] // : "A" "a" ---------(1) // | "B" "b" ---------(2) // ; // // "A" // : "c" "S" "e" "f" // ; // // "B" // : "A" // ; // // The fact is I can not distinguish the rule (1) & // rule (2) of nonterminal S, because the lookahead // symbols for this 2 rules are equal - ("c", "c", // "c", ...). However, if I use 'as_terminal:A', then // this algorithm will distinguish these 2 rules, and // this is wrong. // // So that users must be careful to use this // 'as_terminal' feature. bool treat_as_terminal = false; // To see whether I want to treat this nonterminal // node as a terminal node. for (std::list<std::wstring>::const_iterator iter = lookahead_set->mp_orig_node->rule_node()-> token_name_as_terminal_during_lookahead().begin(); iter != lookahead_set->mp_orig_node->rule_node()-> token_name_as_terminal_during_lookahead().end(); ++iter) { if (0 == (*iter).compare(node->name())) { // Want to treat this nonterminal node as a // terminal. treat_as_terminal = true; break; } } if (true == treat_as_terminal) { add_a_terminal_to_lookahead_set( node, lookahead_set, prev_level_walk_count, cur_level, max_level, recur_parent_stack, last_symbol_stack, curr_last_symbol_level); } else { #if defined(TRACING_LOOKAHEAD) log(L"<INFO>: trace to rule - %s[%d]\n", node->nonterminal_rule_node()->name().c_str(), node->nonterminal_rule_node()->overall_idx()); #endif // indicate that I can go back to here. recur_parent_stack.push_back( recur_lookahead_t(node, prev_level_walk_count)); std::list<unsigned int> walk_counts( node->nonterminal_rule_node()->next_nodes().size(), 0); std::list<unsigned int>::iterator iter_count = walk_counts.begin(); for (std::list<node_t *>::const_iterator iter = node->nonterminal_rule_node()->next_nodes().begin(); iter != node->nonterminal_rule_node()->next_nodes().end(); ++iter, ++iter_count) { if (true == is_duplicated_alternatives( node->nonterminal_rule_node()->next_nodes().begin(), iter, walk_counts)) { } else { compute_lookahead_set(*iter, lookahead_set, &(*iter_count), cur_level, max_level, recur_parent_stack, last_symbol_stack, curr_last_symbol_level); assert((*iter_count) <= (*iter)->distance_to_rule_end_node()); } } assert(recur_parent_stack.size() > 0); assert(recur_parent_stack.back().node() == node); recur_parent_stack.pop_back(); } } else { // ====================================== // Rule non-terminal node // ====================================== std::list<unsigned int> walk_counts( node->next_nodes().size(), 0); std::list<unsigned int>::iterator iter_count = walk_counts.begin(); for (std::list<node_t *>::const_iterator iter = node->next_nodes().begin(); iter != node->next_nodes().end(); ++iter, ++iter_count) { // handle duplicated alternatives if (true == is_duplicated_alternatives(node->next_nodes().begin(), iter, walk_counts)) { } else { compute_lookahead_set(*iter, lookahead_set, &(*iter_count), cur_level, max_level, recur_parent_stack, last_symbol_stack, curr_last_symbol_level); assert((*iter_count) <= (*iter)->distance_to_rule_end_node()); } } } } }