static void dump (const basic_state_machine<CharT> &state_machine_,
        ostream &stream_)
    {
        typename basic_state_machine<CharT>::iterator iter_ =
            state_machine_.begin ();
        typename basic_state_machine<CharT>::iterator end_ =
            state_machine_.end ();

        for (std::size_t dfa_ = 0, dfas_ = state_machine_.size ();
            dfa_ < dfas_; ++dfa_)
        {
            lexer_state (stream_);
            stream_ << dfa_ << std::endl << std::endl;

            dump_ex (iter_, stream_);
        }
    }
void serialise (basic_state_machine<CharT> &sm_, Archive &ar_,
    unsigned int version_ = 1)
{
    detail::internals &internals_ = const_cast<detail::internals &>
        (sm_.data ());

    ar_ & version_;
    ar_ & *internals_._lookup;
    ar_ & internals_._dfa_alphabet;
    ar_ & *internals_._dfa;
    ar_ & internals_._seen_BOL_assertion;
    ar_ & internals_._seen_EOL_assertion;
}
Exemple #3
0
    static void dump_tables
        (const basic_state_machine<char_type, id_type> &sm_,
        const std::size_t tabs_, const bool pointers_, std::ostream &os_)
    {
        const typename detail::basic_internals<id_type> &internals_ =
            sm_.data ();
        const std::size_t lookup_divisor_ = 8;
        // Lookup is always 256 entries long now
        const std::size_t lookup_quotient_ = 256 / lookup_divisor_;
        const std::size_t dfas_ = internals_._lookup->size ();
        std::size_t col_ = 1;
        std::size_t row_ = 1;

        output_tabs (tabs_, os_);
        os_ << "static const id_type lookup";

        if (dfas_ > 1)
        {
            os_ << "s_[][" << 256;
        }
        else
        {
            os_ << "_[";
        }

        os_ << "] = \n";
        output_tabs (tabs_ + 1, os_);

        if (dfas_ > 1)
        {
            os_ << '{';
        }

        for (std::size_t l_ = 0; l_ < dfas_; ++l_)
        {
            const id_type *ptr_ = &internals_._lookup[l_]->front ();

            // We want numbers regardless of id_type.
            os_ << "{0x" << std::hex << static_cast<std::size_t>(*ptr_++);

            for (col_ = 1; col_ < lookup_divisor_; ++col_)
            {
                // We want numbers regardless of id_type.
                os_ << ", 0x" << std::hex << static_cast<std::size_t>(*ptr_++);
            }

            for (row_ = 1; row_ < lookup_quotient_; ++row_)
            {
                os_ << ",\n";
                output_tabs (tabs_ + 1, os_);
                // We want numbers regardless of id_type.
                os_ << "0x" << std::hex << static_cast<std::size_t>(*ptr_++);

                for (col_ = 1; col_ < lookup_divisor_; ++col_)
                {
                    // We want numbers regardless of id_type.
                    os_ << ", 0x" << std::hex <<
                        static_cast<std::size_t>(*ptr_++);
                }
            }

            os_ << '}';

            if (l_ + 1 < dfas_)
            {
                os_ << ",\n";
                output_tabs (tabs_ + 1, os_);
            }
        }

        if (dfas_ > 1)
        {
            os_ << '}';
        }

        os_ << ";\n";
        output_tabs (tabs_, os_);
        os_ << "static const id_type dfa_alphabet";

        if (dfas_ > 1)
        {
            os_ << "s_[" << dfas_ << "] = {";
        }
        else
        {
            os_ << "_ = ";
        }

        // We want numbers regardless of id_type.
        os_ << "0x" << std::hex << static_cast<std::size_t>
            (internals_._dfa_alphabet[0]);

        for (col_ = 1; col_ < dfas_; ++col_)
        {
            // We want numbers regardless of id_type.
            os_ << ", 0x" << std::hex << static_cast<std::size_t>(internals_.
                _dfa_alphabet[col_]);
        }

        if (dfas_ > 1)
        {
            os_ << '}';
        }

        os_ << ";\n";

        // DFAs are usually different sizes, so dump separately
        for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
        {
            const id_type dfa_alphabet_ = internals_._dfa_alphabet[dfa_];
            const std::size_t rows_ = internals_._dfa[dfa_]->size () /
                dfa_alphabet_;
            const id_type *ptr_ = &internals_._dfa[dfa_]->front ();
            std::string dfa_name_ = "dfa";

            output_tabs (tabs_, os_);
            os_ << "static const ";

            if (pointers_)
            {
                os_ << "void *";
            }
            else
            {
                os_ << "id_type ";
            }

            os_ << dfa_name_;

            if (dfas_ > 1)
            {
                std::ostringstream ss_;

                ss_ << dfa_;
                dfa_name_ += ss_.str ();
                os_ << dfa_;
            }

            dfa_name_ += '_';
            os_ << "_[] = {";

            for (std::size_t row_ = 0; row_ < rows_; ++row_)
            {
                dump_row (row_ == 0, ptr_, dfa_name_, dfa_alphabet_,
                    pointers_, os_);

                if (row_ + 1 < rows_)
                {
                    os_ << ",\n";
                    output_tabs (tabs_ + 1, os_);
                }
            }

            os_ << "};\n";
        }

        if (dfas_ > 1)
        {
            output_tabs (tabs_, os_);
            os_ << "static const ";

            if (pointers_)
            {
                os_ << "void * const";
            }
            else
            {
                os_ << "id_type";
            }

            os_ << " *dfas_[] = {dfa0_";

            for (col_ = 1; col_ < dfas_; ++col_)
            {
                os_ << ", dfa" << col_ << '_';
            }

            os_ << "};\n";
        }
    }
Exemple #4
0
    static void generate_cpp
        (const std::string &name_,
        const basic_state_machine<char_type, id_type> &sm_,
        const bool pointers_, std::ostream &os_)
    {
        typedef basic_state_machine<char_type, id_type> sm;
        typedef typename sm::internals internals;
        const internals &internals_ = sm_.data ();
        std::size_t additional_tabs_ = 0;

        os_ << "template<typename iter_type, typename id_type>\n";
        os_ << "void " << name_ << " (lexertl::";

        if (internals_._features & recursive_bit)
        {
            os_ << "basic_push_match_results";
        }
        else
        {
            os_ << "basic_match_results";
        }

        os_ << "<iter_type, id_type> &results_)\n";
        os_ << "{\n";
        os_ << "    typedef lexertl::";

        if (internals_._features & recursive_bit)
        {
            os_ << "basic_push_match_results";
        }
        else
        {
            os_ << "basic_match_results";
        }

        os_ << "<iter_type, id_type> results;\n";
        os_ << "    typename results::iter_type end_ = results_.eoi;\n";

        if (internals_._features & skip_bit)
        {
            os_ << "skip:\n";
        }

        os_ << "    typename results::iter_type start_ = results_.start = "
            "results_.end;\n\n";

        if (internals_._features & again_bit)
        {
            os_ << "again:\n";
        }

        os_ << "    if (start_ == end_)\n";
        os_ << "    {\n";
        // We want a number regardless of id_type.
        os_ << "        results_.id = " << static_cast<std::size_t>
            (internals_._eoi) << ";\n";
        os_ << "        results_.user_id = results::npos ();\n";
        os_ << "        results_.end = start_;\n";
        os_ << "        return;\n";
        os_ << "    }\n\n";
        os_ << "    typename results::iter_type curr_ = start_;\n";

        if (internals_._features & bol_bit)
        {
            os_ << "    bool bol_ = results_.bol;\n";
        }

        dump_tables (sm_, 1, pointers_, os_);

        if (internals_._dfa->size () > 1)
        {
            os_ << "    const id_type *lookup_ = lookups_[results_.state];\n";
            os_ << "    const id_type dfa_alphabet_ = dfa_alphabets_"
                "[results_.state];\n";
            os_ << "    const ";

            if (pointers_)
            {
                os_ << "void * const";
            }
            else
            {
                os_ << "id_type";
            }

            os_ << " *dfa_ = dfas_[results_.state];\n";
        }

        os_ << "    const ";

        if (pointers_)
        {
            os_ << "void * const";
        }
        else
        {
            os_ << "id_type";
        }

        os_ << " *ptr_ = dfa_ + dfa_alphabet_;\n";
        os_ << "    bool end_state_ = *ptr_ != 0;\n";

        if (internals_._features & recursive_bit)
        {
            os_ << "    bool pop_ = (";

            if (pointers_)
            {
                // Done this way for GCC:
                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
            }

            os_ << "*ptr_";

            if (pointers_)
            {
                os_ << ')';
            }

            os_ <<" & " << pop_dfa_bit;

            if (pointers_)
            {
                os_ << ')';
            }

            os_ << ") != 0;\n";
        }

        os_ << "    id_type id_ = ";

        if (pointers_)
        {
            // Done this way for GCC:
            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
        }

        os_ << "*(ptr_ + " << id_index << ")";

        if (pointers_)
        {
            os_ << "))";
        }

        os_ << ";\n";
        os_ << "    id_type uid_ = ";

        if (pointers_)
        {
            // Done this way for GCC:
            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
        }

        os_ << "*(ptr_ + " << user_id_index << ")";

        if (pointers_)
        {
            os_ << "))";
        }

        os_ << ";\n";

        if (internals_._features & recursive_bit)
        {
            os_ << "    id_type push_dfa_ = ";

            if (pointers_)
            {
                // Done this way for GCC:
                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
            }

            os_ << "*(ptr_ + " << push_dfa_index << ")";

            if (pointers_)
            {
                os_ << "))";
            }

            os_ << ";\n";
        }

        if (internals_._dfa->size () > 1)
        {
            os_ << "    id_type start_state_ = results_.state;\n";
        }

        if (internals_._features & bol_bit)
        {
            os_ << "    bool end_bol_ = bol_;\n";
        }

        os_ << "    typename results::iter_type end_token_ = curr_;\n";

        if (internals_._features & eol_bit)
        {
            os_ << "    ";

            if (pointers_)
            {
                os_ << "const void * const *";
            }
            else
            {
                os_ << "id_type ";
            }

            os_ << "EOL_state_ = 0;\n";
        }

        os_ << '\n';

        if (internals_._features & bol_bit)
        {
            os_ << "    if (bol_)\n";
            os_ << "    {\n";
            os_ << "        const ";

            if (pointers_)
            {
                os_ << "void *";
            }
            else
            {
                os_ << "id_type ";
            }

            os_ << "state_ = *dfa_;\n\n";
            os_ << "        if (state_)\n";
            os_ << "        {\n";
            os_ << "            ptr_ = ";

            if (pointers_)
            {
                os_ << "reinterpret_cast<void * const *>(state_);\n";
            }
            else
            {
                os_ << "&dfa_[state_ * dfa_alphabet_];\n";
            }

            os_ << "        }\n";
            os_ << "    }\n\n";
        }

        os_ << "    while (curr_ != end_)\n";
        os_ << "    {\n";

        if (internals_._features & eol_bit)
        {
            os_ << "        EOL_state_ = ";

            if (pointers_)
            {
                os_ << "reinterpret_cast<const void * const *>(";
            }

            os_ << "ptr_[" << eol_index << ']';

            if (pointers_)
            {
                os_ << ')';
            }

            os_ << ";\n\n";
            os_ << "        if (EOL_state_ && *curr_ == '\\n')\n";
            os_ << "        {\n";
            os_ << "            ptr_ = ";

            if (pointers_)
            {
                os_ << "EOL_state_";
            }
            else
            {
                os_ << "&dfa_[EOL_state_ * dfa_alphabet_]";
            }

            os_ << ";\n";
            os_ << "        }\n";
            os_ << "        else\n";
            os_ << "        {\n";
            ++additional_tabs_;
        }

        output_char_loop (internals_._features, additional_tabs_, pointers_,
            os_, bool_<(sizeof (typename sm::traits::input_char_type) > 1)> ());

        if (internals_._features & eol_bit)
        {
            output_tabs (additional_tabs_, os_);
            os_ << "    }\n";
            --additional_tabs_;
        }

        os_ << '\n';
        os_ << "        if (*ptr_)\n";
        os_ << "        {\n";
        os_ << "            end_state_ = true;\n";


        if (internals_._features & recursive_bit)
        {
            os_ << "            pop_ = (";

            if (pointers_)
            {
                // Done this way for GCC:
                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
            }

            os_ << "*ptr_";

            if (pointers_)
            {
                os_ << ')';
            }

            os_ <<" & " << pop_dfa_bit;

            if (pointers_)
            {
                os_ << ')';
            }

            os_ << ") != 0;\n";
        }

        os_ << "            id_ = ";

        if (pointers_)
        {
            // Done this way for GCC:
            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
        }

        os_ << "*(ptr_ + " << id_index << ")";

        if (pointers_)
        {
            os_ << "))";
        }

        os_ << ";\n";
        os_ << "            uid_ = ";

        if (pointers_)
        {
            // Done this way for GCC:
            os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
        }

        os_ << "*(ptr_ + " << user_id_index << ")";

        if (pointers_)
        {
            os_ << "))";
        }

        os_ << ";\n";

        if (internals_._features & recursive_bit)
        {
            os_ << "            push_dfa_ = ";

            if (pointers_)
            {
                // Done this way for GCC:
                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
            }

            os_ << "*(ptr_ + " << push_dfa_index << ')';

            if (pointers_)
            {
                os_ << "))";
            }

            os_ << ";\n";
        }

        if (internals_._dfa->size () > 1)
        {
            os_ << "            start_state_ = ";

            if (pointers_)
            {
                // Done this way for GCC:
                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
            }

            os_ << "*(ptr_ + " << next_dfa_index << ')';

            if (pointers_)
            {
                os_ << "))";
            }

            os_ << ";\n";
        }

        if (internals_._features & bol_bit)
        {
            os_ << "            end_bol_ = bol_;\n";
        }

        os_ << "            end_token_ = curr_;\n";
        os_ << "        }\n";
        os_ << "    }\n\n";
        output_quit (os_,
            bool_<(sizeof (typename sm::traits::input_char_type) > 1)> ());

        if (internals_._features & eol_bit)
        {
            os_ << "    if (curr_ == end_)\n";
            os_ << "    {\n";
            os_ << "        EOL_state_ = ";

            if (pointers_)
            {
                os_ << "reinterpret_cast<const void * const *>(";
            }

            os_ << "ptr_[" <<  eol_index << ']';

            if (pointers_)
            {
                os_ << ')';
            }

            os_ << ";\n";
            os_ << "\n";
            os_ << "        if (EOL_state_)\n";
            os_ << "        {\n";
            os_ << "            ptr_ = ";

            if (pointers_)
            {
                os_ << "EOL_state_";
            }
            else
            {
                os_ << "&dfa_[EOL_state_ * dfa_alphabet_]";
            }

            os_ << ";\n\n";
            os_ << "            if (*ptr_)\n";
            os_ << "            {\n";
            os_ << "                end_state_ = true;\n";


            if (internals_._features & recursive_bit)
            {
                os_ << "                pop_ = (";

                if (pointers_)
                {
                    // Done this way for GCC:
                    os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
                }

                os_ << "*ptr_";

                if (pointers_)
                {
                    os_ << ')';
                }

                os_ <<" & " << pop_dfa_bit;

                if (pointers_)
                {
                    os_ << ')';
                }

                os_ << ") != 0;\n";
            }

            os_ << "                id_ = ";

            if (pointers_)
            {
                // Done this way for GCC:
                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
            }

            os_ << "*(ptr_ + " << id_index << ")";

            if (pointers_)
            {
                os_ << "))";
            }

            os_ << ";\n";
            os_ << "                uid_ = ";

            if (pointers_)
            {
                // Done this way for GCC:
                os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
            }

            os_ << "*(ptr_ + " << user_id_index << ")";

            if (pointers_)
            {
                os_ << "))";
            }

            os_ <<";\n";

            if (internals_._features & recursive_bit)
            {
                os_ << "                push_dfa_ = ";

                if (pointers_)
                {
                    // Done this way for GCC:
                    os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
                }

                os_ << "*(ptr_ + " << push_dfa_index << ')';

                if (pointers_)
                {
                    os_ << "))";
                }

                os_ << ";\n";
            }

            if (internals_._dfa->size () > 1)
            {
                os_ << "                start_state_ = ";

                if (pointers_)
                {
                    // Done this way for GCC:
                    os_ << "static_cast<id_type>(reinterpret_cast<ptrdiff_t>(";
                }

                os_ << "*(ptr_ + " << next_dfa_index << ')';

                if (pointers_)
                {
                    os_ << "))";
                }

                os_ << ";\n";
            }

            if (internals_._features & bol_bit)
            {
                os_ << "                end_bol_ = bol_;\n";
            }

            os_ << "                end_token_ = curr_;\n";
            os_ << "            }\n";
            os_ << "        }\n";
            os_ << "    }\n\n";
        }

        os_ << "    if (end_state_)\n";
        os_ << "    {\n";
        os_ << "        // Return longest match\n";

        if (internals_._features & recursive_bit)
        {
            os_ << "        if (pop_)\n";
            os_ << "        {\n";
            os_ << "            start_state_ =  results_."
                "stack.top ().first;\n";
            os_ << "            results_.stack.pop ();\n";
            os_ << "        }\n";
            os_ << "        else if (push_dfa_ != results_.npos ())\n";
            os_ << "        {\n";
            os_ << "            results_.stack.push (typename results::"
                "id_type_pair\n";
            os_ << "                (push_dfa_, id_));\n";
            os_ << "        }\n\n";
        }

        if (internals_._dfa->size () > 1)
        {
            os_ << "        results_.state = start_state_;\n";
        }

        if (internals_._features & bol_bit)
        {
            os_ << "        results_.bol = end_bol_;\n";
        }

        os_ << "        results_.end = end_token_;\n";

        if (internals_._features & skip_bit)
        {
            // We want a number regardless of id_type.
            os_ << "\n        if (id_ == results_.skip ()) goto skip;\n";
        }

        if (internals_._features & again_bit)
        {
            // We want a number regardless of id_type.
            os_ << "\n        if (id_ == "
                << static_cast<std::size_t>(internals_._eoi);

            if (internals_._features & recursive_bit)
            {
                os_ << " || (pop_ && !results_.stack.empty () &&\n";
                // We want a number regardless of id_type.
                os_ << "            results_.stack.top ().second == "
                    << static_cast<std::size_t>(internals_._eoi) << ')';
            }

            os_ << ")\n";
            os_ << "        {\n";
            os_ << "            start_ = end_token_;\n";
            os_ << "            goto again;\n";
            os_ << "        }\n";
        }

        os_ << "    }\n";
        os_ << "    else\n";
        os_ << "    {\n";
        os_ << "        // No match causes char to be skipped\n";

        if (internals_._features & bol_bit)
        {
            os_ << "        results_.bol = *start_ == '\\n';\n";
        }

        os_ << "        results_.end = results_.start;\n";
        os_ << "        ++results_.end;\n";
        os_ << "        id_ = results::npos ();\n";
        os_ << "        uid_ = results::npos ();\n";
        os_ << "    }\n\n";
        os_ << "    results_.id = id_;\n";
        os_ << "    results_.user_id = uid_;\n";
        os_ << "}\n";
    }
void generate_cpp (const basic_state_machine<CharT> &state_machine_,
                   std::ostream &os_, const bool use_pointers_ = false,
                   const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
                   const char *name_ = "next_token")
{
    const detail::internals &sm_ = state_machine_.data ();

    if (sm_._lookup->size () == 0)
    {
        throw runtime_error ("Cannot generate code from an empty "
                             "state machine");
    }

    std::string upper_name_ (__DATE__);
    const std::size_t lookups_ = sm_._lookup->front ()->size ();
    const std::size_t dfas_ = sm_._dfa->size ();
    std::string::size_type pos_ = upper_name_.find (' ');
    const char *iterator_ = 0;

    if (use_pointers_)
    {
        if (lookups_ == 256)
        {
            iterator_ = "const char *";
        }
        else
        {
            iterator_ = "const wchar_t *";
        }
    }
    else
    {
        iterator_ = "Iterator &";
    }

    while (pos_ != std::string::npos)
    {
        upper_name_.replace (pos_, 1, "_");
        pos_ = upper_name_.find (' ', pos_);
    }

    upper_name_ += '_';
    upper_name_ +=  __TIME__;

    pos_ = upper_name_.find (':');

    while (pos_ != std::string::npos)
    {
        upper_name_.erase (pos_, 1);
        pos_ = upper_name_.find (':', pos_);
    }

    upper_name_ = '_' + upper_name_;
    upper_name_ = name_ + upper_name_;
    std::transform (upper_name_.begin (), upper_name_.end (),
                    upper_name_.begin (), ::toupper);
    os_ << "#ifndef " << upper_name_ + '\n';
    os_ << "#define " << upper_name_ + '\n';
    os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
    os_ << "//\n";
    os_ << "// Distributed under the Boost Software License, "
        "Version 1.0. (See accompanying\n";
    os_ << "// file licence_1_0.txt or copy at "
        "http://www.lslboost.org/LICENSE_1_0.txt)\n\n";
    os_ << "// Auto-generated by lslboost::lexer\n";
    os_ << "template<typename Iterator>\n";
    os_ << "std::size_t " << name_  << " (";

    if (dfas_ > 1 || !optimise_parameters_)
    {
        os_ << "std::size_t &start_state_, ";
    }

    if (use_pointers_)
    {
        os_ << iterator_ << " &";
    }
    else
    {
        os_ << iterator_;
    }

    os_ << "start_token_, ";

    if (use_pointers_)
    {
        os_ << iterator_ << " const ";
    }
    else
    {
        os_ << "const " << iterator_;
    }

    os_ << "end_, \n";
    os_ << "    std::size_t &unique_id_";

    if (sm_._seen_BOL_assertion || !optimise_parameters_)
    {
        os_ << ", bool &beg_of_line_";
    }

    os_ << ")\n";
    os_ << "{\n";
    os_ << "    enum {end_state_index, id_index, unique_id_index, state_index, bol_index,\n";
    os_ << "        eol_index, dead_state_index, dfa_offset};\n";
    os_ << "    static const std::size_t npos = static_cast"
        "<std::size_t>(~0);\n";

    if (dfas_ > 1)
    {
        std::size_t state_ = 0;

        for (; state_ < dfas_; ++state_)
        {
            std::size_t i_ = 0;
            std::size_t j_ = 1;
            std::size_t count_ = lookups_ / 8;
            const std::size_t *lookup_ = &sm_._lookup[state_]->front ();
            const std::size_t *dfa_ = &sm_._dfa[state_]->front ();

            os_ << "    static const std::size_t lookup" << state_ << "_[" <<
                lookups_ << "] = {";

            for (; i_ < count_; ++i_)
            {
                const std::size_t index_ = i_ * 8;

                os_ << lookup_[index_];

                for (; j_ < 8; ++j_)
                {
                    os_ << ", " << lookup_[index_ + j_];
                }

                if (i_ < count_ - 1)
                {
                    os_ << "," << std::endl << "        ";
                }

                j_ = 1;
            }

            os_ << "};\n";
            count_ = sm_._dfa[state_]->size ();
            os_ << "    static const std::size_t dfa" << state_ << "_[" <<
                count_ << "] = {";
            count_ /= 8;

            for (i_ = 0; i_ < count_; ++i_)
            {
                const std::size_t index_ = i_ * 8;

                os_ << dfa_[index_];

                for (j_ = 1; j_ < 8; ++j_)
                {
                    os_ << ", " << dfa_[index_ + j_];
                }

                if (i_ < count_ - 1)
                {
                    os_ << "," << std::endl << "        ";
                }
            }

            const std::size_t mod_ = sm_._dfa[state_]->size () % 8;

            if (mod_)
            {
                const std::size_t index_ = count_ * 8;

                if (count_)
                {
                    os_ << ",\n        ";
                }

                os_ << dfa_[index_];

                for (j_ = 1; j_ < mod_; ++j_)
                {
                    os_ << ", " << dfa_[index_ + j_];
                }
            }

            os_ << "};\n";
        }

        std::size_t count_ = sm_._dfa_alphabet.size ();
        std::size_t i_ = 1;

        os_ << "    static const std::size_t *lookup_arr_[" << count_ <<
            "] = {";
        os_ << "lookup0_";

        for (i_ = 1; i_ < count_; ++i_)
        {
            os_ << ", " << "lookup" << i_ << "_";
        }

        os_ << "};\n";
        os_ << "    static const std::size_t dfa_alphabet_arr_[" << count_ <<
            "] = {";
        os_ << sm_._dfa_alphabet.front ();

        for (i_ = 1; i_ < count_; ++i_)
        {
            os_ << ", " << sm_._dfa_alphabet[i_];
        }

        os_ << "};\n";
        os_ << "    static const std::size_t *dfa_arr_[" << count_ <<
            "] = {";
        os_ << "dfa0_";

        for (i_ = 1; i_ < count_; ++i_)
        {
            os_ << ", " << "dfa" << i_ << "_";
        }

        os_ << "};\n";
    }
    else
    {
        const std::size_t *lookup_ = &sm_._lookup->front ()->front ();
        const std::size_t *dfa_ = &sm_._dfa->front ()->front ();
        std::size_t i_ = 0;
        std::size_t j_ = 1;
        std::size_t count_ = lookups_ / 8;

        os_ << "    static const std::size_t lookup_[";
        os_ << sm_._lookup->front ()->size () << "] = {";

        for (; i_ < count_; ++i_)
        {
            const std::size_t index_ = i_ * 8;

            os_ << lookup_[index_];

            for (; j_ < 8; ++j_)
            {
                os_ << ", " << lookup_[index_ + j_];
            }

            if (i_ < count_ - 1)
            {
                os_ << "," << std::endl << "        ";
            }

            j_ = 1;
        }

        os_ << "};\n";
        os_ << "    static const std::size_t dfa_alphabet_ = " <<
            sm_._dfa_alphabet.front () << ";\n";
        os_ << "    static const std::size_t dfa_[" <<
            sm_._dfa->front ()->size () << "] = {";
        count_ = sm_._dfa->front ()->size () / 8;

        for (i_ = 0; i_ < count_; ++i_)
        {
            const std::size_t index_ = i_ * 8;

            os_ << dfa_[index_];

            for (j_ = 1; j_ < 8; ++j_)
            {
                os_ << ", " << dfa_[index_ + j_];
            }

            if (i_ < count_ - 1)
            {
                os_ << "," << std::endl << "        ";
            }
        }

        const std::size_t mod_ = sm_._dfa->front ()->size () % 8;

        if (mod_)
        {
            const std::size_t index_ = count_ * 8;

            if (count_)
            {
                os_ << ",\n        ";
            }

            os_ << dfa_[index_];

            for (j_ = 1; j_ < mod_; ++j_)
            {
                os_ << ", " << dfa_[index_ + j_];
            }
        }

        os_ << "};\n";
    }

    os_ << "\n    if (start_token_ == end_)\n";
    os_ << "    {\n";
    os_ << "        unique_id_ = npos;\n";
    os_ << "        return 0;\n";
    os_ << "    }\n\n";

    if (dfas_ > 1)
    {
        os_ << "again:\n";
        os_ << "    const std::size_t * lookup_ = "
            "lookup_arr_[start_state_];\n";
        os_ << "    std::size_t dfa_alphabet_ = "
            "dfa_alphabet_arr_[start_state_];\n";
        os_ << "    const std::size_t *dfa_ = dfa_arr_[start_state_];\n";
    }

    os_ << "    const std::size_t *ptr_ = dfa_ + dfa_alphabet_;\n";
    os_ << "    Iterator curr_ = start_token_;\n";
    os_ << "    bool end_state_ = *ptr_ != 0;\n";
    os_ << "    std::size_t id_ = *(ptr_ + id_index);\n";
    os_ << "    std::size_t uid_ = *(ptr_ + unique_id_index);\n";

    if (dfas_ > 1)
    {
        os_ << "    std::size_t end_start_state_ = start_state_;\n";
    }

    if (sm_._seen_BOL_assertion)
    {
        os_ << "    bool bol_ = beg_of_line_;\n";
        os_ << "    bool end_bol_ = bol_;\n";
    }

    os_ << "    Iterator end_token_ = start_token_;\n";
    os_ << '\n';
    os_ << "    while (curr_ != end_)\n";
    os_ << "    {\n";

    if (sm_._seen_BOL_assertion)
    {
        os_ << "        const std::size_t BOL_state_ = ptr_[bol_index];\n";
    }

    if (sm_._seen_EOL_assertion)
    {
        os_ << "        const std::size_t EOL_state_ = ptr_[eol_index];\n";
    }

    if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
    {
        os_ << '\n';
    }

    if (sm_._seen_BOL_assertion)
    {
        os_ << "        if (BOL_state_ && bol_)\n";
        os_ << "        {\n";
        os_ << "            ptr_ = &dfa_[BOL_state_ * dfa_alphabet_];\n";
        os_ << "        }\n";
    }

    if (sm_._seen_EOL_assertion)
    {
        os_ << "        ";

        if (sm_._seen_BOL_assertion)
        {
            os_ << "else ";
        }

        os_ << "if (EOL_state_ && *curr_ == '\\n')\n";
        os_ << "        {\n";
        os_ << "            ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
        os_ << "        }\n";
    }

    std::string tab_ (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion ? "    " : "");

    if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
    {
        os_ << "        else\n";
        os_ << "        {\n";
    }

    if (sm_._seen_BOL_assertion)
    {
        os_ << "            ";

        if (lookups_ == 256)
        {
            os_ << "char";
        }
        else
        {
            os_ << "wchar_t";
        }

        os_ << " prev_char_ = *curr_++;\n\n";
        os_ << "            bol_ = prev_char_ == '\\n';\n\n";
    }

    os_ << tab_;
    os_ << "        const std::size_t state_ =\n";
    os_ << tab_;
    os_ << "            ptr_[lookup_[";

    if (lookups_ == 256)
    {
        os_ << "static_cast<unsigned char>(";
    }

    if (sm_._seen_BOL_assertion)
    {
        os_ << "prev_char";
    }
    else
    {
        os_ << "*curr_++";
    }


    if (lookups_ == 256)
    {
        os_ << ')';
    }

    os_ << "]];\n\n";

    os_ << tab_;
    os_ << "        if (state_ == 0) break;\n\n";
    os_ << tab_;
    os_ << "        ptr_ = &dfa_[state_ * dfa_alphabet_];\n";

    if (sm_._seen_BOL_assertion || sm_._seen_EOL_assertion)
    {
        os_ << "        }\n";
    }

    os_ << '\n';
    os_ << "        if (*ptr_)\n";
    os_ << "        {\n";
    os_ << "            end_state_ = true;\n";
    os_ << "            id_ = *(ptr_ + id_index);\n";
    os_ << "            uid_ = *(ptr_ + unique_id_index);\n";

    if (dfas_ > 1)
    {
        os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
    }

    if (sm_._seen_BOL_assertion)
    {
        os_ << "            end_bol_ = bol_;\n";
    }

    os_ << "            end_token_ = curr_;\n";
    os_ << "        }\n";
    os_ << "    }\n";
    os_ << '\n';

    if (sm_._seen_EOL_assertion)
    {
        os_ << "    const std::size_t EOL_state_ = ptr_[eol_index];\n";
        os_ << '\n';
        os_ << "    if (EOL_state_ && curr_ == end_)\n";
        os_ << "    {\n";
        os_ << "        ptr_ = &dfa_[EOL_state_ * dfa_alphabet_];\n";
        os_ << '\n';
        os_ << "        if (*ptr_)\n";
        os_ << "        {\n";
        os_ << "            end_state_ = true;\n";
        os_ << "            id_ = *(ptr_ + id_index);\n";
        os_ << "            uid_ = *(ptr_ + unique_id_index);\n";

        if (dfas_ > 1)
        {
            os_ << "            end_start_state_ = *(ptr_ + state_index);\n";
        }

        if (sm_._seen_BOL_assertion)
        {
            os_ << "            end_bol_ = bol_;\n";
        }

        os_ << "            end_token_ = curr_;\n";
        os_ << "        }\n";
        os_ << "    }\n";
        os_ << '\n';
    }

    os_ << "    if (end_state_)\n";
    os_ << "    {\n";
    os_ << "        // return longest match\n";

    if (dfas_ > 1)
    {
        os_ << "        start_state_ = end_start_state_;\n";
    }

    if (sm_._seen_BOL_assertion && dfas_ < 2)
    {
        os_ << "        beg_of_line_ = end_bol_;\n";
    }

    os_ << "        start_token_ = end_token_;\n";

    if (dfas_ > 1)
    {
        os_ << '\n';
        os_ << "        if (id_ == 0)\n";
        os_ << "        {\n";

        if (sm_._seen_BOL_assertion)
        {
            os_ << "            bol_ = end_bol_;\n";
        }

        os_ << "            goto again;\n";
        os_ << "        }\n";

        if (sm_._seen_BOL_assertion)
        {
            os_ << "        else\n";
            os_ << "        {\n";
            os_ << "            beg_of_line_ = end_bol_;\n";
            os_ << "        }\n";
        }
    }

    os_ << "    }\n";
    os_ << "    else\n";
    os_ << "    {\n";

    if (sm_._seen_BOL_assertion)
    {
        os_ << "        beg_of_line_ = *start_token_ == '\\n';\n";
    }

    if (skip_unknown_)
    {
        os_ << "        // No match causes char to be skipped\n";
        os_ << "        ++start_token_;\n";
    }

    os_ << "        id_ = npos;\n";
    os_ << "        uid_ = npos;\n";
    os_ << "    }\n";
    os_ << '\n';
    os_ << "    unique_id_ = uid_;\n";
    os_ << "    return id_;\n";
    os_ << "}\n";
    os_ << "\n#endif\n";
}
Exemple #6
0
void generate_re2c (const basic_state_machine<CharT> &state_machine_,
    std::ostream &os_, const bool use_pointers_ = false,
    const bool skip_unknown_ = true, const bool optimise_parameters_ = true,
    const char *name_ = "next_token")
{
    typedef typename boost::lexer::basic_string_token<CharT> string_token;
    const detail::internals &sm_ = state_machine_.data ();

    if (sm_._lookup->size () == 0)
    {
        throw runtime_error ("Cannot generate code from an empty "
            "state machine");
    }

    std::string upper_name_ (__DATE__);
    const std::size_t lookups_ = sm_._lookup->front ()->size ();
    typename boost::lexer::basic_state_machine<CharT>::iterator iter_ =
        state_machine_.begin();
    typename boost::lexer::basic_state_machine<CharT>::iterator end_ =
        state_machine_.end();
    const std::size_t dfas_ = sm_._dfa->size ();
    std::string::size_type pos_ = upper_name_.find (' ');
    const char *iterator_ = 0;

    if (use_pointers_)
    {
        if (lookups_ == 256)
        {
            iterator_ = "const char *";
        }
        else
        {
            iterator_ = "const wchar_t *";
        }
    }
    else
    {
        iterator_ = "Iterator &";
    }

    while (pos_ != std::string::npos)
    {
        upper_name_.replace (pos_, 1, "_");
        pos_ = upper_name_.find (' ', pos_);
    }

    upper_name_ += '_';
    upper_name_ +=  __TIME__;

    pos_ = upper_name_.find (':');

    while (pos_ != std::string::npos)
    {
        upper_name_.erase (pos_, 1);
        pos_ = upper_name_.find (':', pos_);
    }

    upper_name_ = '_' + upper_name_;
    upper_name_ = name_ + upper_name_;
    std::transform (upper_name_.begin (), upper_name_.end (),
        upper_name_.begin (), ::toupper);
    os_ << "#ifndef " << upper_name_ + '\n';
    os_ << "#define " << upper_name_ + '\n';
    os_ << "// Copyright (c) 2008-2009 Ben Hanson\n";
    os_ << "//\n";
    os_ << "// Distributed under the Boost Software License, "
        "Version 1.0. (See accompanying\n";
    os_ << "// file licence_1_0.txt or copy at "
        "http://www.boost.org/LICENSE_1_0.txt)\n\n";
    os_ << "// Auto-generated by boost::lexer\n";
    os_ << "template<typename Iterator>\n";
    os_ << "std::size_t " << name_  << " (";

    if (dfas_ > 1 || !optimise_parameters_)
    {
        os_ << "std::size_t &start_state_, ";
    }

    if (use_pointers_)
    {
        os_ << iterator_ << " &";
    }
    else
    {
        os_ << iterator_;
    }

    os_ << "start_token_, ";

    if (use_pointers_)
    {
        os_ << iterator_ << " const ";
    }
    else
    {
        os_ << "const " << iterator_;
    }

    os_ << "end_, \n";
    os_ << "    std::size_t &unique_id_";

    if (sm_._seen_BOL_assertion || !optimise_parameters_)
    {
        os_ << ", bool &beg_of_line_";
    }

    os_ << ")\n";
    os_ << "{\n";
    os_ << "    static const std::size_t npos = static_cast"
        "<std::size_t>(~0);\n";
    os_ << "\n    if (start_token_ == end_)\n";
    os_ << "    {\n";
    os_ << "        unique_id_ = npos;\n";
    os_ << "        return 0;\n";
    os_ << "    }\n\n";

    if (dfas_ > 1)
    {
        os_ << "again:\n";
    }

    os_ << "    Iterator curr_ = start_token_;\n";
    os_ << "    bool end_state_ = false;\n";
    os_ << "    std::size_t id_ = npos;\n";
    os_ << "    std::size_t uid_ = npos;\n";

    if (dfas_ > 1)
    {
        os_ << "    std::size_t end_start_state_ = start_state_;\n";
    }

    if (sm_._seen_BOL_assertion)
    {
        os_ << "    bool bol_ = beg_of_line_;\n";
        os_ << "    bool end_bol_ = bol_;\n";
    }

    os_ << "    Iterator end_token_ = start_token_;\n";
    os_ << '\n';

    if (dfas_ > 1)
    {
        os_ << "    switch (start_state_)\n";
        os_ << "    {\n";

        for (std::size_t i_ = 0; i_ < dfas_; ++i_)
        {
            os_ << "    case " << i_ << ":\n";
            os_ << "        goto " << i_ << "_0;\n";
            os_ << "        // Not needed, but to prevent warnings\n";
            os_ << "        break;\n";
        }

        os_ << "    default:\n";
        os_ << "        throw std::runtime_error (\"Invalid start state!\")\n";
        os_ << "        break;\n";
        os_ << "    }\n\n";
    }

    os_ << "    ";

    if (lookups_ == 256)
    {
        os_ << "char";
    }
    else
    {
        os_ << "wchar_t";
    }

    os_ << " ch_ = 0;\n\n";

    bool need_state0_0_label = need_label0_0(state_machine_);

    for (std::size_t dfa_ = 0; dfa_ < dfas_; ++dfa_)
    {
        const std::size_t states_ = iter_->states;

        for (std::size_t state_ = 0; state_ < states_; ++state_)
        {
            const std::size_t transitions_ = iter_->transitions;
            std::size_t t_ = 0;

            if (dfas_ > 1 || dfa_ != 0 || state_ != 0 || need_state0_0_label)
            {
                os_ << "state" << dfa_ << '_' << state_ << ":\n";
            }

            if (iter_->end_state)
            {
                os_ << "    end_state_ = true;\n";
                os_ << "    id_ = " << iter_->id << ";\n";
                os_ << "    uid_ = " << iter_->unique_id << ";\n";
                os_ << "    end_token_ = curr_;\n";

                if (dfas_ > 1)
                {
                    os_ << "    end_start_state_ = " << iter_->goto_dfa <<
                        ";\n";
                }

                if (sm_._seen_BOL_assertion)
                {
                    os_ << "    end_bol_ = bol_;\n";
                }

                if (transitions_) os_ << '\n';
            }

            if (t_ < transitions_ || iter_->bol_index != boost::lexer::npos ||
                iter_->eol_index != boost::lexer::npos)
            {
                os_ << "    if (curr_ == end_) goto end;\n\n";
                os_ << "    ch_ = *curr_;\n";

                if (iter_->bol_index != boost::lexer::npos)
                {
                    os_ << "\n    if (bol_) goto state" << dfa_ << '_' <<
                        iter_->bol_index << ";\n\n";
                }

                if (iter_->eol_index != boost::lexer::npos)
                {
                    os_ << "\n    if (ch_ == '\n') goto state" << dfa_ << '_' <<
                        iter_->eol_index << ";\n\n";
                }

                os_ << "    ++curr_;\n";
            }

            for (; t_ < transitions_; ++t_)
            {
                const char *ptr_ = iter_->token._charset.c_str();
                const char *end_ = ptr_ + iter_->token._charset.size();
                char start_char_ = 0;
                char curr_char_ = 0;
                bool range_ = false;
                bool first_char_ = true;

                os_ << "\n    if (";

                while (ptr_ != end_)
                {
                    curr_char_ = *ptr_++;

                    if (*ptr_ == curr_char_ + 1)
                    {
                        if (!range_)
                        {
                            start_char_ = curr_char_;
                        }

                        range_ = true;
                    }
                    else
                    {
                        if (!first_char_)
                        {
                            if (iter_->token._negated)
                            {
                                os_ << " && ";
                            }
                            else
                            {
                                os_ << " || ";
                            }
                        }

                        first_char_ = false;

                        if (range_)
                        {
                            typename string_token::string temp_;

                            if (iter_->token._negated)
                            {
                                os_ << "!";
                            }

                            string_token::escape_char (start_char_, temp_);
                            os_ << "(ch_ >= '" << temp_;
                            temp_.clear ();
                            string_token::escape_char (curr_char_, temp_);
                            os_ << "' && ch_ <= '" << temp_ << "')";
                            range_ = false;
                        }
                        else
                        {
                            typename string_token::string temp_;

                            os_ << "ch_ ";

                            if (iter_->token._negated)
                            {
                                os_ << "!=";
                            }
                            else
                            {
                                os_ << "==";
                            }

                            string_token::escape_char (curr_char_, temp_);
                            os_ << " '" << temp_ << "'";
                        }
                    }
                }

                os_ << ") goto state" << dfa_ << '_' << iter_->goto_state <<
                    ";\n\n";
                ++iter_;
            }

            if (!(dfa_ == dfas_ - 1 && state_ == states_ - 1))
            {
                os_ << "    goto end;\n";
            }

            if (transitions_ == 0) ++iter_;
        }
    }

    os_ << "end:\n";
    os_ << "    if (end_state_)\n";
    os_ << "    {\n";
    os_ << "        // return longest match\n";

    if (dfas_ > 1)
    {
        os_ << "        start_state_ = end_start_state_;\n";
    }

    if (sm_._seen_BOL_assertion && dfas_ < 2)
    {
        os_ << "        beg_of_line_ = end_bol_;\n";
    }

    os_ << "        start_token_ = end_token_;\n";

    if (dfas_ > 1)
    {
        os_ << '\n';
        os_ << "        if (id_ == 0)\n";
        os_ << "        {\n";

        if (sm_._seen_BOL_assertion)
        {
            os_ << "            bol_ = end_bol_;\n";
        }

        os_ << "            goto again;\n";
        os_ << "        }\n";

        if (sm_._seen_BOL_assertion)
        {
            os_ << "        else\n";
            os_ << "        {\n";
            os_ << "            beg_of_line_ = end_bol_;\n";
            os_ << "        }\n";
        }
    }

    os_ << "    }\n";
    os_ << "    else\n";
    os_ << "    {\n";

    if (sm_._seen_BOL_assertion)
    {
        os_ << "        beg_of_line_ = *start_token_ == '\\n';\n";
    }

    if (skip_unknown_)
    {
        os_ << "        // No match causes char to be skipped\n";
        os_ << "        ++start_token_;\n";
    }

    os_ << "        id_ = npos;\n";
    os_ << "        uid_ = npos;\n";
    os_ << "    }\n";
    os_ << '\n';
    os_ << "    unique_id_ = uid_;\n";
    os_ << "    return id_;\n";
    os_ << "}\n";
    os_ << "\n#endif\n";
}
Exemple #7
0
    static void dump (const basic_state_machine<CharT> &state_machine_, ostream &stream_)
    {
        typename basic_state_machine<CharT>::iterator iter_ =
            state_machine_.begin ();
        typename basic_state_machine<CharT>::iterator end_ =
            state_machine_.end ();

        for (std::size_t dfa_ = 0, dfas_ = state_machine_.size ();
            dfa_ < dfas_; ++dfa_)
        {
            const std::size_t states_ = iter_->states;

            for (std::size_t i_ = 0; i_ < states_; ++i_)
            {
                state (stream_);
                stream_ << i_ << std::endl;

                if (iter_->end_state)
                {
                    end_state (stream_);
                    stream_ << iter_->id;
                    dfa (stream_);
                    stream_ << iter_->goto_dfa;
                    stream_ << std::endl;
                }

                if (iter_->bol_index != npos)
                {
                    bol (stream_);
                    stream_ << iter_->bol_index << std::endl;
                }

                if (iter_->eol_index != npos)
                {
                    eol (stream_);
                    stream_ << iter_->eol_index << std::endl;
                }

                const std::size_t transitions_ = iter_->transitions;

                if (transitions_ == 0)
                {
                    ++iter_;
                }

                for (std::size_t t_ = 0; t_ < transitions_; ++t_)
                {
                    std::size_t goto_state_ = iter_->goto_state;

                    if (iter_->token.any ())
                    {
                        any (stream_);
                    }
                    else
                    {
                        open_bracket (stream_);

                        if (iter_->token._negated)
                        {
                            negated (stream_);
                        }

                        string charset_;
                        CharT c_ = 0;

                        escape_control_chars (iter_->token._charset,
                            charset_);
                        c_ = *charset_.c_str ();

                        if (!iter_->token._negated &&
                            (c_ == '^' || c_ == ']'))
                        {
                            stream_ << '\\';
                        }

                        stream_ << charset_;
                        close_bracket (stream_);
                    }

                    stream_ << goto_state_ << std::endl;
                    ++iter_;
                }

                stream_ << std::endl;
            }
        }
    }