Esempio n. 1
0
int main(int /*argc*/, char ** /*argv*/)
{
    lexertl::rules rules_;
    lexertl::state_machine sm_;

    rules_.push_state("FW");
    rules_.push_state("SEMI");
    rules_.push_state("NESTED");

    rules_.push("*", "[/][/].*|[/][*](.|\n)*?[*][/]|[\"](.|\\\")*[\"]",
                rules_.skip(), ".");
    rules_.push("INITIAL", "for\\s*\\([^;]*;[^;]*;|while\\s*\\(",
                rules_.skip(), "FW");
    rules_.push("FW", "\\)", rules_.skip(), "SEMI");
    rules_.push("FW,NESTED", "\\(", ">NESTED");
    rules_.push("NESTED", "\\)", rules_.skip(), "<");
    rules_.push("SEMI", "\\s*;", 1, "INITIAL");
    rules_.push("SEMI", ".|\n", rules_.skip(), "INITIAL");
    lexertl::generator::build(rules_, sm_);

    lexertl::memory_file buff_("main.cpp");
    const char *start_ = buff_.data();
    const char *end_ = start_ + buff_.size();
    lexertl::crmatch results_(start_, end_);

    do
    {
        lexertl::lookup(sm_, results_);

        if (results_.id == 1)
        {
            std::cout << "found on line " <<
                      std::count(start_, results_.second, '\n') + 1 << '\n';
        }
    } while (results_.id != sm_.eoi());

    return 0;
}
Esempio n. 2
0
void lex_prop_list()
{
    lexertl::rules rules_;
    lexertl::state_machine state_machine_;
    std::ifstream if_("PropList.txt");
    lexertl::stream_shared_iterator iter_(if_);
    lexertl::stream_shared_iterator end_;
    lexertl::match_results<lexertl::stream_shared_iterator>
        results_(iter_, end_);
    enum {eRange = 1, eName, eShortName};

    rules_.push_state("RANGE");
    rules_.push_state("WS");
    rules_.push_state("NAME");
    rules_.push_state("SHORT_NAME");
    rules_.push_state("FINISH");
    rules_.push("^#.*", rules_.skip());
    rules_.push("\n", rules_.skip());
    rules_.push("INITIAL", "^[0-9A-F]+(\\.\\.[0-9A-F]+)?", eRange, "RANGE");
    rules_.push("RANGE", " *; ", rules_.skip(), "NAME");
    rules_.push("NAME", "[A-Z][a-zA-Z_]+", eName, "WS");
    rules_.push("WS", " # ", rules_.skip(), "SHORT_NAME");
    rules_.push("SHORT_NAME", "[A-Z][a-z&]", eShortName, "FINISH");
    rules_.push("FINISH", ".*\n", rules_.skip(), "INITIAL");
    lexertl::generator::build(rules_, state_machine_);

    do
    {
        lexertl::lookup(state_machine_, results_);
        std::cout << "Id: " << results_.id << ", Token: '" <<
            std::string(results_.start, results_.end) << "'\n";

        if (results_.id > eShortName)
        {
//            int i = 0;
        }
    } while (results_.id != 0);
}
Esempio n. 3
0
void lex_unicode_data()
{
    clock_t started_ = ::clock();
    lexertl::rules rules_;
    lexertl::state_machine state_machine_;
    lexertl::memory_file if_("UnicodeData.txt");
    const char *start_ = if_.data();
    const char *end_ = start_ + if_.size();
    lexertl::cmatch results_(start_, end_);
    enum {eNumber = 1, eName};

    std::size_t num_ = 0;
    std::map<std::string, lexertl::basic_string_token<std::size_t> > map_;

    rules_.push_state("LONG_NAME");
    rules_.push_state("SHORT_NAME");
    rules_.push_state("FINISH");

    rules_.push("INITIAL", "^[A-F0-9]+", eNumber, "LONG_NAME");
    rules_.push("LONG_NAME", ";[^;]+;", rules_.skip(), "SHORT_NAME");
    rules_.push("SHORT_NAME", "[A-Z][a-z]?", eName, "FINISH");
    rules_.push("FINISH", ".*\n", rules_.skip(), "INITIAL");
    lexertl::generator::build(rules_, state_machine_);

    do
    {
        lexertl::lookup(state_machine_, results_);

        if (results_.id == eNumber)
        {
            num_ = 0;

            for (;;)
            {
                if (*results_.start >= '0' && *results_.start <= '9')
                {
                    num_ <<= 4;
                    num_ |= *results_.start++ - '0';
                }
                else if (*results_.start >= 'A' && *results_.start <= 'F')
                {
                    num_ <<= 4;
                    num_ |= *results_.start++ - 'A' + 10;
                }
                else
                {
                    break;
                }
            }

//            ::sscanf(&*results_.start, "%x", &num_); // Too slow!
        }
        else if (results_.id == eName)
        {
            const std::string name_(results_.start, results_.end);

            map_[name_].insert(lexertl::basic_string_token<std::size_t>::range
                (num_, num_));
        }
    } while (results_.id != 0);

    clock_t finished_ = ::clock();
    double seconds_ = static_cast<double>
        (finished_ - started_) / CLOCKS_PER_SEC;

    std::cout << seconds_ << "\n";
}
Esempio n. 4
0
void test_unicode()
{
    lexertl::cmatch r_;
    const char *i_ = "";

    r_.clear();
    r_.reset(i_, i_);

    const char utf8_[] = "\xf0\x90\x8d\x86\xe6\x97\xa5\xd1\x88\x7f";
    lexertl::basic_utf8_in_iterator<const char *, int> u8iter_(utf8_,
        utf8_ + sizeof(utf8_));
    int i = *u8iter_; // 0x10346

    i = *++u8iter_; // 0x65e5
    i = *u8iter_++; // 0x65e5
    i = *u8iter_; // 0x0448
    i = *++u8iter_; // 0x7f

    const wchar_t utf16_[] = L"\xdbff\xdfff\xd801\xdc01\xd800\xdc00\xd7ff";
    lexertl::basic_utf16_in_iterator<const wchar_t *, int> u16iter_(utf16_,
        utf16_ + sizeof(utf16_) / sizeof(wchar_t));

    i = *u16iter_; // 0x10ffff
    i = *++u16iter_; // 0x10401
    i = *u16iter_++; // 0x10401
    i = *u16iter_; // 0x10000
    i = *++u16iter_; // 0xd7ff

    // Not all compilers have char32_t, so use int for now
    lexertl::basic_rules<char, int> rules_(lexertl::icase);
    lexertl::basic_state_machine<int> sm_;
    const int in_[] = {0x393, ' ', 0x393, 0x398, ' ', 0x398,
        '1', ' ', 'i', 'd', 0x41f, 0};
    std::basic_string<int> input_(in_);
    const int *iter_ = input_.c_str();
    const int *end_ = iter_ + input_.size();
    lexertl::match_results<const int *> results_(iter_, end_);

    rules_.push("\\p{LC}[\\p{LC}0-9]*", 1);
    lexertl::basic_generator<lexertl::basic_rules<char, int>,
        lexertl::basic_state_machine<int> >::build(rules_, sm_);

#ifdef WIN32
    HANDLE hStdOut = GetStdHandle(STD_OUTPUT_HANDLE);
    DWORD dwBytesWritten = 0;
#endif

    do
    {
#ifdef WIN32
        std::wstring str_;
#else
        std::string str_;
#endif

        lexertl::lookup(sm_, results_);

#ifdef WIN32
        str_.assign(lexertl::basic_utf16_out_iterator<const int *>
            (results_.start, results_.end),
            lexertl::basic_utf16_out_iterator<const int *>
            (results_.end, results_.end));
        std::wcout << L"Id: " << results_.id << L", Token: '";
        ::WriteConsoleW(hStdOut, str_.c_str(), str_.size(), &dwBytesWritten, 0);
        std::wcout << '\'' << std::endl;
#else
        str_.assign(lexertl::basic_utf8_out_iterator<const int *>
            (results_.start, results_.end),
            lexertl::basic_utf8_out_iterator<const int *>
            (results_.end, results_.end));
        std::cout << "Id: " << results_.id << ", Token: '" <<
            str_ << '\'' << std::endl;
#endif
    } while (results_.id != 0);
}
Esempio n. 5
0
void case_mapping()
{
    lexertl::rules rules_;
    lexertl::state_machine sm_;
    std::ifstream if_("UnicodeData.txt");
    lexertl::stream_shared_iterator iter_(if_);
    lexertl::stream_shared_iterator end_;
    lexertl::match_results<lexertl::stream_shared_iterator>
        results_(iter_, end_);
    enum e_Token {eEOF, eCodeValue, eName, eLl, eLu, eNeither, eMapping, eEmpty};
    e_Token eToken = eEOF;
    std::string code_;
    std::string mapping_;
    int count_ = 0;


    rules_.push_state("NAME");
    rules_.push_state("TYPE");
    rules_.push_state("Ll");
    rules_.push_state("Lu");
    rules_.push_state("MAPPING");
    rules_.push_state("END");

    rules_.push("INITIAL", "^[0-9A-F]{4,6};", eCodeValue, "NAME");
    rules_.push("NAME", "[^;]*;", sm_.skip(), "TYPE");
    rules_.push("TYPE", "Ll;", eLl, "Ll");
    rules_.push("Ll", "([^;]*;){9}", sm_.skip(), "MAPPING");
    rules_.push("TYPE", "Lu;", eLu, "Lu");
    rules_.push("Lu", "([^;]*;){10}", sm_.skip(), "MAPPING");
    rules_.push("TYPE", "[^;]*;", eNeither, "END");
    rules_.push("MAPPING", ";", eEmpty, "END");
    rules_.push("MAPPING", "[0-9A-F]{4,6};", eMapping, "END");
    rules_.push("END", "[^\n]*\n", sm_.skip(), "INITIAL");
    lexertl::generator::build(rules_, sm_);

    do
    {
        lexertl::lookup(sm_, results_);
        eToken = static_cast<e_Token>(results_.id);

        if (eToken == eEOF)
        {
            break;
        }
        else if (eToken != eCodeValue)
        {
            throw std::runtime_error("Syntax error");
        }

        code_.assign(results_.start, results_.end);
        lexertl::lookup(sm_, results_);
        eToken = static_cast<e_Token>(results_.id);

        if (eToken != eLl && eToken != eLu && eToken != eNeither)
        {
            throw std::runtime_error("Syntax error");
        }

        if (eToken != eNeither)
        {
            lexertl::lookup(sm_, results_);
            eToken = static_cast<e_Token>(results_.id);

            if (eToken == eMapping)
            {
                mapping_.assign(results_.start, results_.end);
                std::cout << "(0x" << code_.substr(0, code_.size() - 1) << ", " <<
                    "0x" << mapping_.substr(0, mapping_.size() - 1) << "), ";
                code_.clear();
                mapping_.clear();
                ++count_;

                if (count_ > 2)
                {
                    count_ = 0;
                    std::cout << '\n';
                }
            }
        }
    } while (results_.id != 0);
}
Esempio n. 6
0
bool search(iterator first_, iterator second_, captures &captures_,
    lsm &lsm_, const sm_type &gsm_)
{
    typedef lexertl::iterator<iterator, lsm, lexertl::
        match_results<iterator> > lex_iterator;
    lex_iterator iter_(first_, second_, lsm_);
    lex_iterator end_;
    basic_match_results<sm_type> results_(iter_->id, gsm_);
    typedef parsertl::token<lex_iterator> token;
    typedef typename token::token_vector token_vector;
    typedef std::multimap<typename sm_type::id_type, token_vector> prod_map;
    prod_map prod_map_;
    bool success_ = search(gsm_, iter_, end_, &prod_map_);

    captures_.clear();

    if (success_)
    {
        iterator last_ = iter_->first;
        typename prod_map::const_iterator pi_ = prod_map_.begin();
        typename prod_map::const_iterator pe_ = prod_map_.end();

        captures_.resize((gsm_._captures.empty() ? 0 :
            gsm_._captures.back().first +
            gsm_._captures.back().second.size()) + 1);
        captures_[0].push_back(std::make_pair(iter_->first, iter_->first));

        for (; pi_ != pe_; ++pi_)
        {
            if (gsm_._captures.size() > pi_->first)
            {
                const typename sm_type::capture_vec_pair &row_ =
                    gsm_._captures[pi_->first];

                if (!row_.second.empty())
                {
                    typedef typename sm_type::capture_vector capture_vector;
                    typename capture_vector::const_iterator ti_ =
                        row_.second.begin();
                    typename capture_vector::const_iterator te_ =
                        row_.second.end();
                    std::size_t index_ = 0;

                    for (; ti_ != te_; ++ti_)
                    {
                        const token &token1_ = pi_->second[ti_->first];
                        const token &token2_ = pi_->second[ti_->second];

                        captures_[row_.first + index_ + 1].
                            push_back(std::make_pair(token1_.first,
                                token2_.second));
                        ++index_;
                    }
                }
            }
        }

        pi_ = prod_map_.begin();
        pe_ = prod_map_.end();

        for (; pi_ != pe_; ++pi_)
        {
            typename token::iter_type second_ = pi_->second.back().second;

            if (second_ > last_)
            {
                last_ = second_;
            }
        }

        captures_.front().back().second = last_;
    }

    return success_;
}