void serialise (basic_state_machine<CharT> &sm_, Archive &ar_, unsigned int version_ = 1) { detail::internals &internals_ = const_cast<detail::internals &> (sm_.data ()); ar_ & version_; ar_ & *internals_._lookup; ar_ & internals_._dfa_alphabet; ar_ & *internals_._dfa; ar_ & internals_._seen_BOL_assertion; ar_ & internals_._seen_EOL_assertion; }
static void dump_tables (const basic_state_machine<char_type, id_type> &sm_, const std::size_t tabs_, const bool pointers_, std::ostream &os_) { const typename detail::basic_internals<id_type> &internals_ = sm_.data (); const std::size_t lookup_divisor_ = 8; // Lookup is always 256 entries long now const std::size_t lookup_quotient_ = 256 / lookup_divisor_; const std::size_t dfas_ = internals_._lookup->size (); std::size_t col_ = 1; std::size_t row_ = 1; output_tabs (tabs_, os_); os_ << "static const id_type lookup"; if (dfas_ > 1) { os_ << "s_[][" << 256; } else { os_ << "_["; } os_ << "] = \n"; output_tabs (tabs_ + 1, os_); if (dfas_ > 1) { os_ << '{'; } for (std::size_t l_ = 0; l_ < dfas_; ++l_) { const id_type *ptr_ = &internals_._lookup[l_]->front (); // We want numbers regardless of id_type. os_ << "{0x" << std::hex << static_cast<std::size_t>(*ptr_++); for (col_ = 1; col_ < lookup_divisor_; ++col_) { // We want numbers regardless of id_type. os_ << ", 0x" << std::hex << static_cast<std::size_t>(*ptr_++); } for (row_ = 1; row_ < lookup_quotient_; ++row_) { os_ << ",\n"; output_tabs (tabs_ + 1, os_); // We want numbers regardless of id_type. os_ << "0x" << std::hex << static_cast<std::size_t>(*ptr_++); for (col_ = 1; col_ < lookup_divisor_; ++col_) { // We want numbers regardless of id_type. os_ << ", 0x" << std::hex << static_cast<std::size_t>(*ptr_++); } } os_ << '}'; if (l_ + 1 < dfas_) { os_ << ",\n"; output_tabs (tabs_ + 1, os_); } } if (dfas_ > 1) { os_ << '}'; } os_ << ";\n"; output_tabs (tabs_, os_); os_ << "static const id_type dfa_alphabet"; if (dfas_ > 1) { os_ << "s_[" << dfas_ << "] = {"; } else { os_ << "_ = "; } // We want numbers regardless of id_type. os_ << "0x" << std::hex << static_cast<std::size_t> (internals_._dfa_alphabet[0]); for (col_ = 1; col_ < dfas_; ++col_) { // We want numbers regardless of id_type. os_ << ", 0x" << std::hex << static_cast<std::size_t>(internals_. _dfa_alphabet[col_]); } if (dfas_ > 1) { os_ << '}'; } os_ << ";\n"; // DFAs are usually different sizes, so dump separately for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) { const id_type dfa_alphabet_ = internals_._dfa_alphabet[dfa_]; const std::size_t rows_ = internals_._dfa[dfa_]->size () / dfa_alphabet_; const id_type *ptr_ = &internals_._dfa[dfa_]->front (); std::string dfa_name_ = "dfa"; output_tabs (tabs_, os_); os_ << "static const "; if (pointers_) { os_ << "void *"; } else { os_ << "id_type "; } os_ << dfa_name_; if (dfas_ > 1) { std::ostringstream ss_; ss_ << dfa_; dfa_name_ += ss_.str (); os_ << dfa_; } dfa_name_ += '_'; os_ << "_[] = {"; for (std::size_t row_ = 0; row_ < rows_; ++row_) { dump_row (row_ == 0, ptr_, dfa_name_, dfa_alphabet_, pointers_, os_); if (row_ + 1 < rows_) { os_ << ",\n"; output_tabs (tabs_ + 1, os_); } } os_ << "};\n"; } if (dfas_ > 1) { output_tabs (tabs_, os_); os_ << "static const "; if (pointers_) { os_ << "void * const"; } else { os_ << "id_type"; } os_ << " *dfas_[] = {dfa0_"; for (col_ = 1; col_ < dfas_; ++col_) { os_ << ", dfa" << col_ << '_'; } os_ << "};\n"; } }
void generate_cpp (const basic_state_machine<CharT> &state_machine_, std::ostream &os_, const bool use_pointers_ = false, const bool skip_unknown_ = true, const bool optimise_parameters_ = true, const char *name_ = "next_token") { const detail::internals &sm_ = state_machine_.data (); if (sm_._lookup->size () == 0) { throw runtime_error ("Cannot generate code from an empty " "state machine"); } std::string upper_name_ (__DATE__); const std::size_t lookups_ = sm_._lookup->front ()->size (); const std::size_t dfas_ = sm_._dfa->size (); std::string::size_type pos_ = upper_name_.find (' '); const char *iterator_ = 0; if (use_pointers_) { if (lookups_ == 256) { iterator_ = "const char *"; } else { iterator_ = "const wchar_t *"; } } else { iterator_ = "Iterator &"; } while (pos_ != std::string::npos) { upper_name_.replace (pos_, 1, "_"); pos_ = upper_name_.find (' ', pos_); } upper_name_ += '_'; upper_name_ += __TIME__; pos_ = upper_name_.find (':'); while (pos_ != std::string::npos) { upper_name_.erase (pos_, 1); pos_ = upper_name_.find (':', pos_); } upper_name_ = '_' + upper_name_; upper_name_ = name_ + upper_name_; std::transform (upper_name_.begin (), upper_name_.end (), upper_name_.begin (), ::toupper); os_ << "#ifndef " << upper_name_ + '\n'; os_ << "#define " << upper_name_ + '\n'; os_ << "// Copyright (c) 2008-2009 Ben Hanson\n"; os_ << "//\n"; os_ << "// Distributed under the Boost Software License, " "Version 1.0. (See accompanying\n"; os_ << "// file licence_1_0.txt or copy at " "http://www.lslboost.org/LICENSE_1_0.txt)\n\n"; os_ << "// Auto-generated by lslboost::lexer\n"; os_ << "template<typename Iterator>\n"; os_ << "std::size_t " << name_ << " ("; if (dfas_ > 1 || !optimise_parameters_) { os_ << "std::size_t &start_state_, "; } if (use_pointers_) { os_ << iterator_ << " &"; } else { os_ << iterator_; } os_ << "start_token_, "; if (use_pointers_) { os_ << iterator_ << " const "; } else { os_ << "const " << iterator_; } os_ << "end_, \n"; os_ << " std::size_t &unique_id_"; if (sm_._seen_BOL_assertion || !optimise_parameters_) { os_ << ", bool &beg_of_line_"; } os_ << ")\n"; os_ << "{\n"; os_ << " enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n"; os_ << " eol_index, dead_state_index, dfa_offset};\n"; os_ << " static const std::size_t npos = static_cast" "<std::size_t>(~0);\n"; if (dfas_ > 1) { std::size_t state_ = 0; for (; state_ < dfas_; ++state_) { std::size_t i_ = 0; std::size_t j_ = 1; std::size_t count_ = lookups_ / 8; const std::size_t *lookup_ = &sm_._lookup[state_]->front (); const std::size_t *dfa_ = &sm_._dfa[state_]->front (); os_ << " static const std::size_t lookup" << state_ << "_[" << lookups_ << "] = {"; for (; i_ < count_; ++i_) { const std::size_t index_ = i_ * 8; os_ << lookup_[index_]; for (; j_ < 8; ++j_) { os_ << ", " << lookup_[index_ + j_]; } if (i_ < count_ - 1) { os_ << "," << std::endl << " "; } j_ = 1; } os_ << "};\n"; count_ = sm_._dfa[state_]->size (); os_ << " static const std::size_t dfa" << state_ << "_[" << count_ << "] = {"; count_ /= 8; for (i_ = 0; i_ < count_; ++i_) { const std::size_t index_ = i_ * 8; os_ << dfa_[index_]; for (j_ = 1; j_ < 8; ++j_) { os_ << ", " << dfa_[index_ + j_]; } if (i_ < count_ - 1) { os_ << "," << std::endl << " "; } } const std::size_t mod_ = sm_._dfa[state_]->size () % 8; if (mod_) { const std::size_t index_ = count_ * 8; if (count_) { os_ << ",\n "; } os_ << dfa_[index_]; for (j_ = 1; j_ < mod_; ++j_) { os_ << ", " << dfa_[index_ + j_]; } } os_ << "};\n"; } std::size_t count_ = sm_._dfa_alphabet.size (); std::size_t i_ = 1; os_ << " static const std::size_t *lookup_arr_[" << count_ << "] = {"; os_ << "lookup0_"; for (i_ = 1; i_ < count_; ++i_) { os_ << ", " << "lookup" << i_ << "_"; } os_ << "};\n"; os_ << " static const std::size_t dfa_alphabet_arr_[" << count_ << "] = {"; os_ << sm_._dfa_alphabet.front (); for (i_ = 1; i_ < count_; ++i_) { os_ << ", " << sm_._dfa_alphabet[i_]; } os_ << "};\n"; os_ << " static const std::size_t *dfa_arr_[" << count_ << "] = {"; os_ << "dfa0_"; for (i_ = 1; i_ < count_; ++i_) { os_ << ", " << "dfa" << i_ << "_"; } os_ << "};\n"; } else { const std::size_t *lookup_ = &sm_._lookup->front ()->front (); const std::size_t *dfa_ = &sm_._dfa->front ()->front (); std::size_t i_ = 0; std::size_t j_ = 1; std::size_t count_ = lookups_ / 8; os_ << " static const std::size_t lookup_["; os_ << sm_._lookup->front ()->size () << "] = {"; for (; i_ < count_; ++i_) { const std::size_t index_ = i_ * 8; os_ << lookup_[index_]; for (; j_ < 8; ++j_) { os_ << ", " << lookup_[index_ + j_]; } if (i_ < count_ - 1) { os_ << "," << std::endl << " "; } j_ = 1; } os_ << "};\n"; os_ << " static const std::size_t dfa_alphabet_ = " << sm_._dfa_alphabet.front () << ";\n"; os_ << " static const std::size_t dfa_[" << sm_._dfa->front ()->size () << "] = {"; count_ = sm_._dfa->front ()->size () / 8; for (i_ = 0; i_ < count_; ++i_) { const std::size_t index_ = i_ * 8; os_ << dfa_[index_]; for (j_ = 1; j_ < 8; ++j_) { os_ << ", " << dfa_[index_ + j_]; } if (i_ < count_ - 1) { os_ << "," << std::endl << " "; } } const std::size_t mod_ = sm_._dfa->front ()->size () % 8; if (mod_) { const std::size_t index_ = count_ * 8; if (count_) { os_ << ",\n "; } os_ << dfa_[index_]; for (j_ = 1; j_ < mod_; ++j_) { os_ << ", " << dfa_[index_ + j_]; } } os_ << "};\n"; } os_ << "\n if (start_token_ == end_)\n"; os_ << " {\n"; os_ << " unique_id_ = npos;\n"; os_ << " return 0;\n"; os_ << " }\n\n"; if (dfas_ > 1) { os_ << "again:\n"; os_ << " const std::size_t * lookup_ = " "lookup_arr_[start_state_];\n"; os_ << " std::size_t dfa_alphabet_ = " "dfa_alphabet_arr_[start_state_];\n"; os_ << " const std::size_t *dfa_ = dfa_arr_[start_state_];\n"; } os_ << " const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n"; os_ << " Iterator curr_ = start_token_;\n"; os_ << " bool end_state_ = *ptr_ != 0;\n"; os_ << " std::size_t id_ = *(ptr_ + id_index);\n"; os_ << " std::size_t uid_ = *(ptr_ + unique_id_index);\n"; if (dfas_ > 1) { os_ << " std::size_t end_start_state_ = start_state_;\n"; } if (sm_._seen_BOL_assertion) { os_ << " bool bol_ = beg_of_line_;\n"; os_ << " bool end_bol_ = bol_;\n"; } os_ << " Iterator end_token_ = start_token_;\n"; os_ << '\n'; os_ << " while (curr_ != end_)\n"; os_ << " {\n"; if (sm_._seen_BOL_assertion) { os_ << " const std::size_t BOL_state_ = ptr_[bol_index];\n"; } if (sm_._seen_EOL_assertion) { os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n"; } if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion) { os_ << '\n'; } if (sm_._seen_BOL_assertion) { os_ << " if (BOL_state_ && bol_)\n"; os_ << " {\n"; os_ << " ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n"; os_ << " }\n"; } if (sm_._seen_EOL_assertion) { os_ << " "; if (sm_._seen_BOL_assertion) { os_ << "else "; } os_ << "if (EOL_state_ && *curr_ == '\\n')\n"; os_ << " {\n"; os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; os_ << " }\n"; } std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? " " : ""); if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion) { os_ << " else\n"; os_ << " {\n"; } if (sm_._seen_BOL_assertion) { os_ << " "; if (lookups_ == 256) { os_ << "char"; } else { os_ << "wchar_t"; } os_ << " prev_char_ = *curr_++;\n\n"; os_ << " bol_ = prev_char_ == '\\n';\n\n"; } os_ << tab_; os_ << " const std::size_t state_ =\n"; os_ << tab_; os_ << " ptr_[lookup_["; if (lookups_ == 256) { os_ << "static_cast<unsigned char>("; } if (sm_._seen_BOL_assertion) { os_ << "prev_char"; } else { os_ << "*curr_++"; } if (lookups_ == 256) { os_ << ')'; } os_ << "]];\n\n"; os_ << tab_; os_ << " if (state_ == 0) break;\n\n"; os_ << tab_; os_ << " ptr_ = &dfa_[state_ * dfa_alphabet_];\n"; if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion) { os_ << " }\n"; } os_ << '\n'; os_ << " if (*ptr_)\n"; os_ << " {\n"; os_ << " end_state_ = true;\n"; os_ << " id_ = *(ptr_ + id_index);\n"; os_ << " uid_ = *(ptr_ + unique_id_index);\n"; if (dfas_ > 1) { os_ << " end_start_state_ = *(ptr_ + state_index);\n"; } if (sm_._seen_BOL_assertion) { os_ << " end_bol_ = bol_;\n"; } os_ << " end_token_ = curr_;\n"; os_ << " }\n"; os_ << " }\n"; os_ << '\n'; if (sm_._seen_EOL_assertion) { os_ << " const std::size_t EOL_state_ = ptr_[eol_index];\n"; os_ << '\n'; os_ << " if (EOL_state_ && curr_ == end_)\n"; os_ << " {\n"; os_ << " ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n"; os_ << '\n'; os_ << " if (*ptr_)\n"; os_ << " {\n"; os_ << " end_state_ = true;\n"; os_ << " id_ = *(ptr_ + id_index);\n"; os_ << " uid_ = *(ptr_ + unique_id_index);\n"; if (dfas_ > 1) { os_ << " end_start_state_ = *(ptr_ + state_index);\n"; } if (sm_._seen_BOL_assertion) { os_ << " end_bol_ = bol_;\n"; } os_ << " end_token_ = curr_;\n"; os_ << " }\n"; os_ << " }\n"; os_ << '\n'; } os_ << " if (end_state_)\n"; os_ << " {\n"; os_ << " // return longest match\n"; if (dfas_ > 1) { os_ << " start_state_ = end_start_state_;\n"; } if (sm_._seen_BOL_assertion && dfas_ < 2) { os_ << " beg_of_line_ = end_bol_;\n"; } os_ << " start_token_ = end_token_;\n"; if (dfas_ > 1) { os_ << '\n'; os_ << " if (id_ == 0)\n"; os_ << " {\n"; if (sm_._seen_BOL_assertion) { os_ << " bol_ = end_bol_;\n"; } os_ << " goto again;\n"; os_ << " }\n"; if (sm_._seen_BOL_assertion) { os_ << " else\n"; os_ << " {\n"; os_ << " beg_of_line_ = end_bol_;\n"; os_ << " }\n"; } } os_ << " }\n"; os_ << " else\n"; os_ << " {\n"; if (sm_._seen_BOL_assertion) { os_ << " beg_of_line_ = *start_token_ == '\\n';\n"; } if (skip_unknown_) { os_ << " // No match causes char to be skipped\n"; os_ << " ++start_token_;\n"; } os_ << " id_ = npos;\n"; os_ << " uid_ = npos;\n"; os_ << " }\n"; os_ << '\n'; os_ << " unique_id_ = uid_;\n"; os_ << " return id_;\n"; os_ << "}\n"; os_ << "\n#endif\n"; }
static void generate_cpp (const std::string &name_, const basic_state_machine<char_type, id_type> &sm_, const bool pointers_, std::ostream &os_) { typedef basic_state_machine<char_type, id_type> sm; typedef typename sm::internals internals; const internals &internals_ = sm_.data (); std::size_t additional_tabs_ = 0; os_ << "template<typename iter_type, typename id_type>\n"; os_ << "void " << name_ << " (lexertl::"; if (internals_._features & recursive_bit) { os_ << "basic_push_match_results"; } else { os_ << "basic_match_results"; } os_ << "<iter_type, id_type> &results_)\n"; os_ << "{\n"; os_ << " typedef lexertl::"; if (internals_._features & recursive_bit) { os_ << "basic_push_match_results"; } else { os_ << "basic_match_results"; } os_ << "<iter_type, id_type> results;\n"; os_ << " typename results::iter_type end_ = results_.eoi;\n"; if (internals_._features & skip_bit) { os_ << "skip:\n"; } os_ << " typename results::iter_type start_ = results_.start = " "results_.end;\n\n"; if (internals_._features & again_bit) { os_ << "again:\n"; } os_ << " if (start_ == end_)\n"; os_ << " {\n"; // We want a number regardless of id_type. os_ << " results_.id = " << static_cast<std::size_t> (internals_._eoi) << ";\n"; os_ << " results_.user_id = results::npos ();\n"; os_ << " results_.end = start_;\n"; os_ << " return;\n"; os_ << " }\n\n"; os_ << " typename results::iter_type curr_ = start_;\n"; if (internals_._features & bol_bit) { os_ << " bool bol_ = results_.bol;\n"; } dump_tables (sm_, 1, pointers_, os_); if (internals_._dfa->size () > 1) { os_ << " const id_type *lookup_ = lookups_[results_.state];\n"; os_ << " const id_type dfa_alphabet_ = dfa_alphabets_" "[results_.state];\n"; os_ << " const "; if (pointers_) { os_ << "void * const"; } else { os_ << "id_type"; } os_ << " *dfa_ = dfas_[results_.state];\n"; } os_ << " const "; if (pointers_) { os_ << "void * const"; } else { os_ << "id_type"; } os_ << " *ptr_ = dfa_ + dfa_alphabet_;\n"; os_ << " bool end_state_ = *ptr_ != 0;\n"; if (internals_._features & recursive_bit) { os_ << " bool pop_ = ("; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*ptr_"; if (pointers_) { os_ << ')'; } os_ <<" & " << pop_dfa_bit; if (pointers_) { os_ << ')'; } os_ << ") != 0;\n"; } os_ << " id_type id_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << id_index << ")"; if (pointers_) { os_ << "))"; } os_ << ";\n"; os_ << " id_type uid_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << user_id_index << ")"; if (pointers_) { os_ << "))"; } os_ << ";\n"; if (internals_._features & recursive_bit) { os_ << " id_type push_dfa_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << push_dfa_index << ")"; if (pointers_) { os_ << "))"; } os_ << ";\n"; } if (internals_._dfa->size () > 1) { os_ << " id_type start_state_ = results_.state;\n"; } if (internals_._features & bol_bit) { os_ << " bool end_bol_ = bol_;\n"; } os_ << " typename results::iter_type end_token_ = curr_;\n"; if (internals_._features & eol_bit) { os_ << " "; if (pointers_) { os_ << "const void * const *"; } else { os_ << "id_type "; } os_ << "EOL_state_ = 0;\n"; } os_ << '\n'; if (internals_._features & bol_bit) { os_ << " if (bol_)\n"; os_ << " {\n"; os_ << " const "; if (pointers_) { os_ << "void *"; } else { os_ << "id_type "; } os_ << "state_ = *dfa_;\n\n"; os_ << " if (state_)\n"; os_ << " {\n"; os_ << " ptr_ = "; if (pointers_) { os_ << "reinterpret_cast<void * const *>(state_);\n"; } else { os_ << "&dfa_[state_ * dfa_alphabet_];\n"; } os_ << " }\n"; os_ << " }\n\n"; } os_ << " while (curr_ != end_)\n"; os_ << " {\n"; if (internals_._features & eol_bit) { os_ << " EOL_state_ = "; if (pointers_) { os_ << "reinterpret_cast<const void * const *>("; } os_ << "ptr_[" << eol_index << ']'; if (pointers_) { os_ << ')'; } os_ << ";\n\n"; os_ << " if (EOL_state_ && *curr_ == '\\n')\n"; os_ << " {\n"; os_ << " ptr_ = "; if (pointers_) { os_ << "EOL_state_"; } else { os_ << "&dfa_[EOL_state_ * dfa_alphabet_]"; } os_ << ";\n"; os_ << " }\n"; os_ << " else\n"; os_ << " {\n"; ++additional_tabs_; } output_char_loop (internals_._features, additional_tabs_, pointers_, os_, bool_<(sizeof (typename sm::traits::input_char_type) > 1)> ()); if (internals_._features & eol_bit) { output_tabs (additional_tabs_, os_); os_ << " }\n"; --additional_tabs_; } os_ << '\n'; os_ << " if (*ptr_)\n"; os_ << " {\n"; os_ << " end_state_ = true;\n"; if (internals_._features & recursive_bit) { os_ << " pop_ = ("; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*ptr_"; if (pointers_) { os_ << ')'; } os_ <<" & " << pop_dfa_bit; if (pointers_) { os_ << ')'; } os_ << ") != 0;\n"; } os_ << " id_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << id_index << ")"; if (pointers_) { os_ << "))"; } os_ << ";\n"; os_ << " uid_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << user_id_index << ")"; if (pointers_) { os_ << "))"; } os_ << ";\n"; if (internals_._features & recursive_bit) { os_ << " push_dfa_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << push_dfa_index << ')'; if (pointers_) { os_ << "))"; } os_ << ";\n"; } if (internals_._dfa->size () > 1) { os_ << " start_state_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << next_dfa_index << ')'; if (pointers_) { os_ << "))"; } os_ << ";\n"; } if (internals_._features & bol_bit) { os_ << " end_bol_ = bol_;\n"; } os_ << " end_token_ = curr_;\n"; os_ << " }\n"; os_ << " }\n\n"; output_quit (os_, bool_<(sizeof (typename sm::traits::input_char_type) > 1)> ()); if (internals_._features & eol_bit) { os_ << " if (curr_ == end_)\n"; os_ << " {\n"; os_ << " EOL_state_ = "; if (pointers_) { os_ << "reinterpret_cast<const void * const *>("; } os_ << "ptr_[" << eol_index << ']'; if (pointers_) { os_ << ')'; } os_ << ";\n"; os_ << "\n"; os_ << " if (EOL_state_)\n"; os_ << " {\n"; os_ << " ptr_ = "; if (pointers_) { os_ << "EOL_state_"; } else { os_ << "&dfa_[EOL_state_ * dfa_alphabet_]"; } os_ << ";\n\n"; os_ << " if (*ptr_)\n"; os_ << " {\n"; os_ << " end_state_ = true;\n"; if (internals_._features & recursive_bit) { os_ << " pop_ = ("; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*ptr_"; if (pointers_) { os_ << ')'; } os_ <<" & " << pop_dfa_bit; if (pointers_) { os_ << ')'; } os_ << ") != 0;\n"; } os_ << " id_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << id_index << ")"; if (pointers_) { os_ << "))"; } os_ << ";\n"; os_ << " uid_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << user_id_index << ")"; if (pointers_) { os_ << "))"; } os_ <<";\n"; if (internals_._features & recursive_bit) { os_ << " push_dfa_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << push_dfa_index << ')'; if (pointers_) { os_ << "))"; } os_ << ";\n"; } if (internals_._dfa->size () > 1) { os_ << " start_state_ = "; if (pointers_) { // Done this way for GCC: os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>("; } os_ << "*(ptr_ + " << next_dfa_index << ')'; if (pointers_) { os_ << "))"; } os_ << ";\n"; } if (internals_._features & bol_bit) { os_ << " end_bol_ = bol_;\n"; } os_ << " end_token_ = curr_;\n"; os_ << " }\n"; os_ << " }\n"; os_ << " }\n\n"; } os_ << " if (end_state_)\n"; os_ << " {\n"; os_ << " // Return longest match\n"; if (internals_._features & recursive_bit) { os_ << " if (pop_)\n"; os_ << " {\n"; os_ << " start_state_ = results_." "stack.top ().first;\n"; os_ << " results_.stack.pop ();\n"; os_ << " }\n"; os_ << " else if (push_dfa_ != results_.npos ())\n"; os_ << " {\n"; os_ << " results_.stack.push (typename results::" "id_type_pair\n"; os_ << " (push_dfa_, id_));\n"; os_ << " }\n\n"; } if (internals_._dfa->size () > 1) { os_ << " results_.state = start_state_;\n"; } if (internals_._features & bol_bit) { os_ << " results_.bol = end_bol_;\n"; } os_ << " results_.end = end_token_;\n"; if (internals_._features & skip_bit) { // We want a number regardless of id_type. os_ << "\n if (id_ == results_.skip ()) goto skip;\n"; } if (internals_._features & again_bit) { // We want a number regardless of id_type. os_ << "\n if (id_ == " << static_cast<std::size_t>(internals_._eoi); if (internals_._features & recursive_bit) { os_ << " || (pop_ && !results_.stack.empty () &&\n"; // We want a number regardless of id_type. os_ << " results_.stack.top ().second == " << static_cast<std::size_t>(internals_._eoi) << ')'; } os_ << ")\n"; os_ << " {\n"; os_ << " start_ = end_token_;\n"; os_ << " goto again;\n"; os_ << " }\n"; } os_ << " }\n"; os_ << " else\n"; os_ << " {\n"; os_ << " // No match causes char to be skipped\n"; if (internals_._features & bol_bit) { os_ << " results_.bol = *start_ == '\\n';\n"; } os_ << " results_.end = results_.start;\n"; os_ << " ++results_.end;\n"; os_ << " id_ = results::npos ();\n"; os_ << " uid_ = results::npos ();\n"; os_ << " }\n\n"; os_ << " results_.id = id_;\n"; os_ << " results_.user_id = uid_;\n"; os_ << "}\n"; }
void generate_re2c (const basic_state_machine<CharT> &state_machine_, std::ostream &os_, const bool use_pointers_ = false, const bool skip_unknown_ = true, const bool optimise_parameters_ = true, const char *name_ = "next_token") { typedef typename boost::lexer::basic_string_token<CharT> string_token; const detail::internals &sm_ = state_machine_.data (); if (sm_._lookup->size () == 0) { throw runtime_error ("Cannot generate code from an empty " "state machine"); } std::string upper_name_ (__DATE__); const std::size_t lookups_ = sm_._lookup->front ()->size (); typename boost::lexer::basic_state_machine<CharT>::iterator iter_ = state_machine_.begin(); typename boost::lexer::basic_state_machine<CharT>::iterator end_ = state_machine_.end(); const std::size_t dfas_ = sm_._dfa->size (); std::string::size_type pos_ = upper_name_.find (' '); const char *iterator_ = 0; if (use_pointers_) { if (lookups_ == 256) { iterator_ = "const char *"; } else { iterator_ = "const wchar_t *"; } } else { iterator_ = "Iterator &"; } while (pos_ != std::string::npos) { upper_name_.replace (pos_, 1, "_"); pos_ = upper_name_.find (' ', pos_); } upper_name_ += '_'; upper_name_ += __TIME__; pos_ = upper_name_.find (':'); while (pos_ != std::string::npos) { upper_name_.erase (pos_, 1); pos_ = upper_name_.find (':', pos_); } upper_name_ = '_' + upper_name_; upper_name_ = name_ + upper_name_; std::transform (upper_name_.begin (), upper_name_.end (), upper_name_.begin (), ::toupper); os_ << "#ifndef " << upper_name_ + '\n'; os_ << "#define " << upper_name_ + '\n'; os_ << "// Copyright (c) 2008-2009 Ben Hanson\n"; os_ << "//\n"; os_ << "// Distributed under the Boost Software License, " "Version 1.0. (See accompanying\n"; os_ << "// file licence_1_0.txt or copy at " "http://www.boost.org/LICENSE_1_0.txt)\n\n"; os_ << "// Auto-generated by boost::lexer\n"; os_ << "template<typename Iterator>\n"; os_ << "std::size_t " << name_ << " ("; if (dfas_ > 1 || !optimise_parameters_) { os_ << "std::size_t &start_state_, "; } if (use_pointers_) { os_ << iterator_ << " &"; } else { os_ << iterator_; } os_ << "start_token_, "; if (use_pointers_) { os_ << iterator_ << " const "; } else { os_ << "const " << iterator_; } os_ << "end_, \n"; os_ << " std::size_t &unique_id_"; if (sm_._seen_BOL_assertion || !optimise_parameters_) { os_ << ", bool &beg_of_line_"; } os_ << ")\n"; os_ << "{\n"; os_ << " static const std::size_t npos = static_cast" "<std::size_t>(~0);\n"; os_ << "\n if (start_token_ == end_)\n"; os_ << " {\n"; os_ << " unique_id_ = npos;\n"; os_ << " return 0;\n"; os_ << " }\n\n"; if (dfas_ > 1) { os_ << "again:\n"; } os_ << " Iterator curr_ = start_token_;\n"; os_ << " bool end_state_ = false;\n"; os_ << " std::size_t id_ = npos;\n"; os_ << " std::size_t uid_ = npos;\n"; if (dfas_ > 1) { os_ << " std::size_t end_start_state_ = start_state_;\n"; } if (sm_._seen_BOL_assertion) { os_ << " bool bol_ = beg_of_line_;\n"; os_ << " bool end_bol_ = bol_;\n"; } os_ << " Iterator end_token_ = start_token_;\n"; os_ << '\n'; if (dfas_ > 1) { os_ << " switch (start_state_)\n"; os_ << " {\n"; for (std::size_t i_ = 0; i_ < dfas_; ++i_) { os_ << " case " << i_ << ":\n"; os_ << " goto " << i_ << "_0;\n"; os_ << " // Not needed, but to prevent warnings\n"; os_ << " break;\n"; } os_ << " default:\n"; os_ << " throw std::runtime_error (\"Invalid start state!\")\n"; os_ << " break;\n"; os_ << " }\n\n"; } os_ << " "; if (lookups_ == 256) { os_ << "char"; } else { os_ << "wchar_t"; } os_ << " ch_ = 0;\n\n"; bool need_state0_0_label = need_label0_0(state_machine_); for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_) { const std::size_t states_ = iter_->states; for (std::size_t state_ = 0; state_ < states_; ++state_) { const std::size_t transitions_ = iter_->transitions; std::size_t t_ = 0; if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label) { os_ << "state" << dfa_ << '_' << state_ << ":\n"; } if (iter_->end_state) { os_ << " end_state_ = true;\n"; os_ << " id_ = " << iter_->id << ";\n"; os_ << " uid_ = " << iter_->unique_id << ";\n"; os_ << " end_token_ = curr_;\n"; if (dfas_ > 1) { os_ << " end_start_state_ = " << iter_->goto_dfa << ";\n"; } if (sm_._seen_BOL_assertion) { os_ << " end_bol_ = bol_;\n"; } if (transitions_) os_ << '\n'; } if (t_ < transitions_ || iter_->bol_index != boost::lexer::npos || iter_->eol_index != boost::lexer::npos) { os_ << " if (curr_ == end_) goto end;\n\n"; os_ << " ch_ = *curr_;\n"; if (iter_->bol_index != boost::lexer::npos) { os_ << "\n if (bol_) goto state" << dfa_ << '_' << iter_->bol_index << ";\n\n"; } if (iter_->eol_index != boost::lexer::npos) { os_ << "\n if (ch_ == '\n') goto state" << dfa_ << '_' << iter_->eol_index << ";\n\n"; } os_ << " ++curr_;\n"; } for (; t_ < transitions_; ++t_) { const char *ptr_ = iter_->token._charset.c_str(); const char *end_ = ptr_ + iter_->token._charset.size(); char start_char_ = 0; char curr_char_ = 0; bool range_ = false; bool first_char_ = true; os_ << "\n if ("; while (ptr_ != end_) { curr_char_ = *ptr_++; if (*ptr_ == curr_char_ + 1) { if (!range_) { start_char_ = curr_char_; } range_ = true; } else { if (!first_char_) { if (iter_->token._negated) { os_ << " && "; } else { os_ << " || "; } } first_char_ = false; if (range_) { typename string_token::string temp_; if (iter_->token._negated) { os_ << "!"; } string_token::escape_char (start_char_, temp_); os_ << "(ch_ >= '" << temp_; temp_.clear (); string_token::escape_char (curr_char_, temp_); os_ << "' && ch_ <= '" << temp_ << "')"; range_ = false; } else { typename string_token::string temp_; os_ << "ch_ "; if (iter_->token._negated) { os_ << "!="; } else { os_ << "=="; } string_token::escape_char (curr_char_, temp_); os_ << " '" << temp_ << "'"; } } } os_ << ") goto state" << dfa_ << '_' << iter_->goto_state << ";\n\n"; ++iter_; } if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1)) { os_ << " goto end;\n"; } if (transitions_ == 0) ++iter_; } } os_ << "end:\n"; os_ << " if (end_state_)\n"; os_ << " {\n"; os_ << " // return longest match\n"; if (dfas_ > 1) { os_ << " start_state_ = end_start_state_;\n"; } if (sm_._seen_BOL_assertion && dfas_ < 2) { os_ << " beg_of_line_ = end_bol_;\n"; } os_ << " start_token_ = end_token_;\n"; if (dfas_ > 1) { os_ << '\n'; os_ << " if (id_ == 0)\n"; os_ << " {\n"; if (sm_._seen_BOL_assertion) { os_ << " bol_ = end_bol_;\n"; } os_ << " goto again;\n"; os_ << " }\n"; if (sm_._seen_BOL_assertion) { os_ << " else\n"; os_ << " {\n"; os_ << " beg_of_line_ = end_bol_;\n"; os_ << " }\n"; } } os_ << " }\n"; os_ << " else\n"; os_ << " {\n"; if (sm_._seen_BOL_assertion) { os_ << " beg_of_line_ = *start_token_ == '\\n';\n"; } if (skip_unknown_) { os_ << " // No match causes char to be skipped\n"; os_ << " ++start_token_;\n"; } os_ << " id_ = npos;\n"; os_ << " uid_ = npos;\n"; os_ << " }\n"; os_ << '\n'; os_ << " unique_id_ = uid_;\n"; os_ << " return id_;\n"; os_ << "}\n"; os_ << "\n#endif\n"; }