Beispiel #1
0
void build_cpp(lexertl::rules &rules_)
{
	for (macro_pair *ptr_ = g_macros_; ptr_->_name; ++ptr_)
	{
		rules_.insert_macro(ptr_->_name, ptr_->_regex);
	}

	for (rule_pair *ptr_ = g_regexes_; ptr_->_regex; ++ptr_)
	{
		rules_.push(ptr_->_regex, ptr_->_id);
	}
}
Beispiel #2
0
void build_rev_cpp(lexertl::rules &rules_)
{
	lexertl::state_machine rev_rx_sm_;

	build_rev_regex_lexer(rev_rx_sm_);

	for (macro_pair *ptr_ = g_macros_; ptr_->_name; ++ptr_)
	{
		rules_.insert_macro(ptr_->_name,
			rev_regex(ptr_->_regex, rev_rx_sm_).c_str());
	}

	for (rule_pair *ptr_ = g_regexes_; ptr_->_regex; ++ptr_)
	{
		rules_.push(rev_regex(ptr_->_regex, rev_rx_sm_).c_str(), ptr_->_id);
	}
}
Beispiel #3
0
void build_cpp(lexertl::rules &rules_)
{
    rules_.insert_macro("any", "[\t\v\f\r\n\040-\377]");
    rules_.insert_macro("anyctrl", "[\001-\037]");
    rules_.insert_macro("OctalDigit", "[0-7]");
    rules_.insert_macro("Digit", "[0-9]");
    rules_.insert_macro("HexDigit", "[a-fA-F0-9]");
    rules_.insert_macro("Integer",
        "((0[xX]{HexDigit}+)|(0{OctalDigit}*)|([1-9]{Digit}*))");
    rules_.insert_macro("ExponentStart", "[Ee][-+]");
    rules_.insert_macro("ExponentPart", "[Ee][-+]?{Digit}+");
    rules_.insert_macro("FractionalConstant", "({Digit}*[.]{Digit}+)|({Digit}+[.])");
    rules_.insert_macro("FloatingSuffix", "[fF][lL]?|[lL][fF]?");
    rules_.insert_macro("IntegerSuffix", "[uU][lL]?|[lL][uU]?");
    rules_.insert_macro("LongIntegerSuffix", "[uU]([lL][lL])|([lL][lL])[uU]?");
    rules_.insert_macro("MSLongIntegerSuffix", "u?i64");
    rules_.insert_macro("Backslash", "[\\\\]|\"??/\"");
    rules_.insert_macro("EscapeSequence",
        "{Backslash}([abfnrtv?'\"]|{Backslash}|x{HexDigit}+|"
        "{OctalDigit}{OctalDigit}?{OctalDigit}?)");
    rules_.insert_macro("HexQuad", "{HexDigit}{HexDigit}{HexDigit}{HexDigit}");
    rules_.insert_macro("UniversalChar", "{Backslash}(u{HexQuad}|U{HexQuad}{HexQuad})");
    rules_.insert_macro("Newline", "\r\n|\n|\r");
    rules_.insert_macro("PPSpace", "([\t\f\v]|(\"/*\"({any}{-}[*]|{Newline}|"
        "([*]+({any}{-}[*/ ]|{Newline})))*[*]+[/]))*");
    rules_.insert_macro("Pound", "#|\"??=\"|%:");
    rules_.insert_macro("NonDigit", "[a-zA-Z$]|{UniversalChar}");

    rules_.push("\\/\\*(.|\n)*?\\*\\/", T_CCOMMENT);
    rules_.push("\\/\\/.*", T_CPPCOMMENT);
#ifdef detect_pp_numbers
    rules_.push("[.]?{Digit}[.]?{Digit}({Digit}|{NonDigit}|{ExponentStart}|"
        "[.])*", T_PP_NUMBER);
#else
    rules_.push("[.]?{Digit}({FractionalConstant}{ExponentPart}?|"
        "{Digit}+{ExponentPart}){FloatingSuffix}?", T_FLOATLIT);

#ifdef enable_ms_extensions
    rules_.push("{Integer}({LongIntegerSuffix}|{MSLongIntegerSuffix})",
        T_LONGINTLIT);
#else
    rules_.push("{Integer}{LongIntegerSuffix}", T_LONGINTLIT);
#endif
    rules_.push("{Integer}{IntegerSuffix}?", T_INTLIT);
#endif

#ifdef act_in_cpp0x_mode
    rules_.push("alignas", T_ALIGNAS);
    rules_.push("alignof", T_ALIGNOF);
    rules_.push("char16_t", T_CHAR16_T);
    rules_.push("char32_t", T_CHAR32_T);
    rules_.push("constexpr", T_CONSTEXPR);
    rules_.push("decltype", T_DECLTYPE);
    rules_.push("noexcept", T_NOEXCEPT);
    rules_.push("nullptr", T_NULLPTR);
    rules_.push("static_assert", T_STATICASSERT);
    rules_.push("thread_local", T_THREADLOCAL);
    rules_.push("(L|[uU]|u8)?R[\"]({EscapeSequence}|{UniversalChar}|"
        "{any}{-}[\r\n\\\\\"])[\"]", T_RAWSTRINGLIT);
    rules_.push("[uU]'({EscapeSequence}|{UniversalChar}|{any}{-}[\n\r\\\\'])'",
        T_CHARLIT);
    rules_.push("([uU]|u8)[\"]({EscapeSequence}|UniversalChar|"
        "{any}{-}[\n\r\\\\\"])*[\"]", T_STRINGLIT);
#else
    rules_.push("alignas", T_IDENTIFIER);
    rules_.push("alignof", T_IDENTIFIER);
    rules_.push("char16_t", T_IDENTIFIER);
    rules_.push("char32_t", T_IDENTIFIER);
    rules_.push("constexpr", T_IDENTIFIER);
    rules_.push("decltype", T_IDENTIFIER);
    rules_.push("noexcept", T_IDENTIFIER);
    rules_.push("nullptr", T_IDENTIFIER);
    rules_.push("static_assert", T_IDENTIFIER);
    rules_.push("thread_local", T_IDENTIFIER);
#endif
    rules_.push("asm", T_ASM);
    rules_.push("auto", T_AUTO);
    rules_.push("bool", T_BOOL);
    rules_.push("break", T_BREAK);
    rules_.push("case", T_CASE);
    rules_.push("catch", T_CATCH);
    rules_.push("char", T_CHAR);
    rules_.push("class", T_CLASS);
    rules_.push("const", T_CONST);
    rules_.push("const_cast", T_CONSTCAST);
    rules_.push("continue", T_CONTINUE);
    rules_.push("default", T_DEFAULT);
    rules_.push("delete", T_DELETE);
    rules_.push("do", T_DO);
    rules_.push("double", T_DOUBLE);
    rules_.push("dynamic_cast", T_DYNAMICCAST);
    rules_.push("else", T_ELSE);
    rules_.push("enum", T_ENUM);
    rules_.push("explicit", T_EXPLICIT);
    rules_.push("export", T_EXPORT);
    rules_.push("extern", T_EXTERN);
    rules_.push("false", T_FALSE);
    rules_.push("float", T_FLOAT);
    rules_.push("for", T_FOR);
    rules_.push("friend", T_FRIEND);
    rules_.push("goto", T_GOTO);
    rules_.push("if", T_IF);
#ifdef enable_import_keyword
    rules_.push("import", T_IMPORT);
#else
    rules_.push("import", T_IDENTIFIER);
#endif
    rules_.push("inline", T_INLINE);
    rules_.push("int", T_INT);
    rules_.push("long", T_LONG);
    rules_.push("mutable", T_MUTABLE);
    rules_.push("namespace", T_NAMESPACE);
    rules_.push("new", T_NEW);
    rules_.push("operator", T_OPERATOR);
    rules_.push("private", T_PRIVATE);
    rules_.push("protected", T_PROTECTED);
    rules_.push("public", T_PUBLIC);
    rules_.push("register", T_REGISTER);
    rules_.push("reinterpret_cast", T_REINTERPRETCAST);
    rules_.push("return", T_RETURN);
    rules_.push("short", T_SHORT);
    rules_.push("signed", T_SIGNED);
    rules_.push("sizeof", T_SIZEOF);
    rules_.push("static", T_STATIC);
    rules_.push("static_cast", T_STATICCAST);
    rules_.push("struct", T_STRUCT);
    rules_.push("switch", T_SWITCH);
    rules_.push("template", T_TEMPLATE);
    rules_.push("this", T_THIS);
    rules_.push("throw", T_THROW);
    rules_.push("true", T_TRUE);
    rules_.push("try", T_TRY);
    rules_.push("typedef", T_TYPEDEF);
    rules_.push("typeid", T_TYPEID);
    rules_.push("typename", T_TYPENAME);
    rules_.push("union", T_UNION);
    rules_.push("unsigned", T_UNSIGNED);
    rules_.push("using", T_USING);
    rules_.push("virtual", T_VIRTUAL);
    rules_.push("void", T_VOID);
    rules_.push("volatile", T_VOLATILE);
    rules_.push("wchar_t", T_WCHART);
    rules_.push("while", T_WHILE);

#ifdef enable_ms_extensions
    rules_.push("__int8", T_MSEXT_INT8);
    rules_.push("__int16", T_MSEXT_INT16);
    rules_.push("__int32", T_MSEXT_INT32);
    rules_.push("__int64", T_MSEXT_INT64);
    rules_.push("_?_based", T_MSEXT_BASED);
    rules_.push("_?_declspec", T_MSEXT_DECLSPEC);
    rules_.push("_?_cdecl", T_MSEXT_CDECL);
    rules_.push("_?_fastcall", T_MSEXT_FASTCALL);
    rules_.push("_?_stdcall", T_MSEXT_STDCALL);
    rules_.push("__try", T_MSEXT_TRY);
    rules_.push("__except", T_MSEXT_EXCEPT);
    rules_.push("__finally", T_MSEXT_FINALLY);
    rules_.push("__leave", T_MSEXT_LEAVE);
    rules_.push("_?_inline", T_MSEXT_INLINE);
    rules_.push("_?_asm", T_MSEXT_ASM);
#else
    rules_.push("__int8", T_IDENTIFIER);
    rules_.push("__int16", T_IDENTIFIER);
    rules_.push("__int32", T_IDENTIFIER);
    rules_.push("__int64", T_IDENTIFIER);
    rules_.push("_?_based", T_IDENTIFIER);
    rules_.push("_?_declspec", T_IDENTIFIER);
    rules_.push("_?_cdecl", T_IDENTIFIER);
    rules_.push("_?_fastcall", T_IDENTIFIER);
    rules_.push("_?_stdcall", T_IDENTIFIER);
    rules_.push("__try", T_IDENTIFIER);
    rules_.push("__except", T_IDENTIFIER);
    rules_.push("__finally", T_IDENTIFIER);
    rules_.push("__leave", T_IDENTIFIER);
    rules_.push("_?_inline", T_IDENTIFIER);
    rules_.push("_?_asm", T_IDENTIFIER);
#endif

    rules_.push("[{]", T_LEFTBRACE);
    rules_.push("\"??<\"", T_LEFTBRACE_TRIGRAPH);
    rules_.push("<%", T_LEFTBRACE_ALT);
    rules_.push("[}]", T_RIGHTBRACE);
    rules_.push("\"??>\"", T_RIGHTBRACE_TRIGRAPH);
    rules_.push("%>", T_RIGHTBRACE_ALT);
    rules_.push("[[]", T_LEFTBRACKET);
    rules_.push("\"??(\"", T_LEFTBRACKET_TRIGRAPH);
    rules_.push("<:", T_LEFTBRACKET_ALT);
    rules_.push("\\]", T_RIGHTBRACKET);
    rules_.push("\"??)\"", T_RIGHTBRACKET_TRIGRAPH);
    rules_.push(":>", T_RIGHTBRACKET_ALT);
    rules_.push("#", T_POUND);
    rules_.push("%:", T_POUND_ALT);
    rules_.push("\"??=\"", T_POUND_TRIGRAPH);
    rules_.push("##", T_POUND_POUND);
    rules_.push("\"#??=\"", T_POUND_POUND_TRIGRAPH);
    rules_.push("\"??=#\"", T_POUND_POUND_TRIGRAPH);
    rules_.push("\"??=??=\"", T_POUND_POUND_TRIGRAPH);
    rules_.push("%:%:", T_POUND_POUND_ALT);
    rules_.push("[(]", T_LEFTPAREN);
    rules_.push("[)]", T_RIGHTPAREN);
    rules_.push(";", T_SEMICOLON);
    rules_.push(":", T_COLON);
    rules_.push("...", T_ELLIPSIS);
    rules_.push("[?]", T_QUESTION_MARK);
    rules_.push("::", T_COLON_COLON);
    rules_.push("[.]", T_DOT);
    rules_.push("\".*\"", T_DOTSTAR);
    rules_.push("[+]", T_PLUS);
    rules_.push("-", T_MINUS);
    rules_.push("[*]", T_STAR);
    rules_.push("[/]", T_DIVIDE);
    rules_.push("%", T_PERCENT);
    rules_.push("[^]", T_XOR);
    rules_.push("\"??'\"", T_XOR_TRIGRAPH);
    rules_.push("xor", T_XOR_ALT);
    rules_.push("&", T_AND);
    rules_.push("bitand", T_AND_ALT);
    rules_.push("[|]", T_OR);
    rules_.push("bitor", T_OR_ALT);
    rules_.push("\"??!\"", T_OR_TRIGRAPH);
    rules_.push("~", T_COMPL);
    rules_.push("\"??-\"", T_COMPL_TRIGRAPH);
    rules_.push("compl", T_COMPL_ALT);
    rules_.push("!", T_NOT);
    rules_.push("not", T_NOT_ALT);
    rules_.push("=", T_ASSIGN);
    rules_.push("<", T_LESS);
    rules_.push(">", T_GREATER);
    rules_.push("[+]=", T_PLUSASSIGN);
    rules_.push("-=", T_MINUSASSIGN);
    rules_.push("[*]=", T_STARASSIGN);
    rules_.push("[/]=", T_DIVIDEASSIGN);
    rules_.push("%=", T_PERCENTASSIGN);
    rules_.push("[^]=", T_XORASSIGN);
    rules_.push("xor_eq", T_XORASSIGN_ALT);
    rules_.push("\"??'=\"", T_XORASSIGN_TRIGRAPH);
    rules_.push("&=", T_ANDASSIGN);
    rules_.push("and_eq", T_ANDASSIGN_ALT);
    rules_.push("[|]=", T_ORASSIGN);
    rules_.push("or_eq", T_ORASSIGN_ALT);
    rules_.push("\"??!=\"", T_ORASSIGN_TRIGRAPH);
    rules_.push("<<", T_SHIFTLEFT);
    rules_.push(">>", T_SHIFTRIGHT);
    rules_.push(">>=", T_SHIFTRIGHTASSIGN);
    rules_.push("<<=", T_SHIFTLEFTASSIGN);
    rules_.push("==", T_EQUAL);
    rules_.push("!=", T_NOTEQUAL);
    rules_.push("not_eq", T_NOTEQUAL_ALT);
    rules_.push("<=", T_LESSEQUAL);
    rules_.push(">=", T_GREATEREQUAL);
    rules_.push("&&", T_ANDAND);
    rules_.push("and", T_ANDAND_ALT);
    rules_.push("\"||\"", T_OROR);
    rules_.push("\"??!|\"", T_OROR_TRIGRAPH);
    rules_.push("\"|??!\"", T_OROR_TRIGRAPH);
    rules_.push("or", T_OROR_ALT);
    rules_.push("\"??!??!\"", T_OROR_TRIGRAPH);
    rules_.push("\"++\"", T_PLUSPLUS);
    rules_.push("--", T_MINUSMINUS);
    rules_.push(",", T_COMMA);
    rules_.push("->[*]", T_ARROWSTAR);
    rules_.push("->", T_ARROW);
    rules_.push("\"??/\"", T_ANY_TRIGRAPH);

    rules_.push("L?('({EscapeSequence}|{UniversalChar}|"
        "{any}{-}[\n\r\\\\'])+')", T_CHARLIT);
    rules_.push("L?([\"]({EscapeSequence}|{UniversalChar}|"
        "{any}{-}[\n\r\\\\\"])*[\"])", T_STRINGLIT);

    rules_.push("([a-zA-Z_$]|{UniversalChar})([a-zA-Z_0-9$]|"
        "{UniversalChar})*", T_IDENTIFIER);
    rules_.push("{Pound}{PPSpace}(include|include_next){PPSpace}"
        "<({any}{-}[\n\r>])+>", T_PP_HHEADER);
    rules_.push("{Pound}{PPSpace}(include|include_next){PPSpace}[\"]"
        "({any}{-}[\n\r\"])+[\"]", T_PP_QHEADER);
    rules_.push("{Pound}{PPSpace}(include|include_next){PPSpace}",    
        T_PP_INCLUDE);
    rules_.push("{Pound}{PPSpace}if", T_PP_IF);
    rules_.push("{Pound}{PPSpace}ifdef", T_PP_IFDEF);
    rules_.push("{Pound}{PPSpace}ifndef", T_PP_IFNDEF);
    rules_.push("{Pound}{PPSpace}else", T_PP_ELSE);
    rules_.push("{Pound}{PPSpace}elif", T_PP_ELIF);
    rules_.push("{Pound}{PPSpace}endif", T_PP_ENDIF);
    rules_.push("{Pound}{PPSpace}define", T_PP_DEFINE);
    rules_.push("{Pound}{PPSpace}undef", T_PP_UNDEF);
    rules_.push("{Pound}{PPSpace}line", T_PP_LINE);
    rules_.push("{Pound}{PPSpace}error", T_PP_ERROR);
    rules_.push("{Pound}{PPSpace}pragma", T_PP_PRAGMA);
    rules_.push("{Pound}{PPSpace}warning", T_PP_WARNING);

    rules_.push("{Pound}{PPSpace}region", T_MSEXT_PP_REGION);
    rules_.push("{Pound}{PPSpace}endregion", T_MSEXT_PP_ENDREGION);

    rules_.push("[\t\v\f]+", T_SPACE);
    rules_.push("{Newline}", T_NEWLINE);
}