int cpp_main(int argc, char * argv[]) { // start by processing the command line args: if(argc < 2) return show_usage(); int result = 0; for(int c = 1; c < argc; ++c) { result += handle_argument(argv[c]); } if(result) return result; if(test_matches) { // start with a simple test, this is basically a measure of the minimal overhead // involved in calling a regex matcher: test_match("abc", "abc"); // these are from the regex docs: test_match("^([0-9]+)(\\-| |$)(.*)$", "100- this is a line of ftp response which contains a message string"); test_match("([[:digit:]]{4}[- ]){3}[[:digit:]]{3,4}", "1234-5678-1234-456"); // these are from http://www.regxlib.com/ test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "*****@*****.**"); test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "*****@*****.**"); test_match("^([a-zA-Z0-9_\\-\\.]+)@((\\[[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.)|(([a-zA-Z0-9\\-]+\\.)+))([a-zA-Z]{2,4}|[0-9]{1,3})(\\]?)$", "*****@*****.**"); test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "EH10 2QQ"); test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "G1 1AA"); test_match("^[a-zA-Z]{1,2}[0-9][0-9A-Za-z]{0,1} {0,1}[0-9][A-Za-z]{2}$", "SW1 1ZZ"); test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "4/1/2001"); test_match("^[[:digit:]]{1,2}/[[:digit:]]{1,2}/[[:digit:]]{4}$", "12/12/2001"); test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "123"); test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "+3.14159"); test_match("^[-+]?[[:digit:]]*\\.?[[:digit:]]*$", "-3.14159"); } output_html_results(true, "%short_matches%"); std::string file_contents; if(test_code) { load_file(file_contents, "../../../boost/crc.hpp"); const char* highlight_expression = // preprocessor directives: index 1 "(^[ \t]*#(?:[^\\\\\\n]|\\\\[^\\n_[:punct:][:alnum:]]*[\\n[:punct:][:word:]])*)|" // comment: index 2 "(//[^\\n]*|/\\*.*?\\*/)|" // literals: index 3 "\\<([+-]?(?:(?:0x[[:xdigit:]]+)|(?:(?:[[:digit:]]*\\.)?[[:digit:]]+(?:[eE][+-]?[[:digit:]]+)?))u?(?:(?:int(?:8|16|32|64))|L)?)\\>|" // string literals: index 4 "('(?:[^\\\\']|\\\\.)*'|\"(?:[^\\\\\"]|\\\\.)*\")|" // keywords: index 5 "\\<(__asm|__cdecl|__declspec|__export|__far16|__fastcall|__fortran|__import" "|__pascal|__rtti|__stdcall|_asm|_cdecl|__except|_export|_far16|_fastcall" "|__finally|_fortran|_import|_pascal|_stdcall|__thread|__try|asm|auto|bool" "|break|case|catch|cdecl|char|class|const|const_cast|continue|default|delete" "|do|double|dynamic_cast|else|enum|explicit|extern|false|float|for|friend|goto" "|if|inline|int|long|mutable|namespace|new|operator|pascal|private|protected" "|public|register|reinterpret_cast|return|short|signed|sizeof|static|static_cast" "|struct|switch|template|this|throw|true|try|typedef|typeid|typename|union|unsigned" "|using|virtual|void|volatile|wchar_t|while)\\>" ; const char* class_expression = "^(template[[:space:]]*<[^;:{]+>[[:space:]]*)?" "(class|struct)[[:space:]]*(\\<\\w+\\>([ \t]*\\([^)]*\\))?" "[[:space:]]*)*(\\<\\w*\\>)[[:space:]]*(<[^;:{]+>[[:space:]]*)?" "(\\{|:[^;\\{()]*\\{)"; const char* include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"[^\"]+\"|<[^>]+>)"; const char* boost_include_expression = "^[ \t]*#[ \t]*include[ \t]+(\"boost/[^\"]+\"|<boost/[^>]+>)"; test_find_all(class_expression, file_contents); test_find_all(highlight_expression, file_contents); test_find_all(include_expression, file_contents); test_find_all(boost_include_expression, file_contents); } output_html_results(false, "%code_search%"); if(test_html) { load_file(file_contents, "../../../libs/libraries.htm"); test_find_all("beman|john|dave", file_contents, true); test_find_all("<p>.*?</p>", file_contents, true); test_find_all("<a[^>]+href=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); test_find_all("<h[12345678][^>]*>.*?</h[12345678]>", file_contents, true); test_find_all("<img[^>]+src=(\"[^\"]*\"|[^[:space:]]+)[^>]*>", file_contents, true); test_find_all("<font[^>]+face=(\"[^\"]*\"|[^[:space:]]+)[^>]*>.*?</font>", file_contents, true); } output_html_results(false, "%html_search%"); if(test_short_twain) { load_file(file_contents, "short_twain.txt"); test_find_all("Twain", file_contents); test_find_all("Huck[[:alpha:]]+", file_contents); test_find_all("[[:alpha:]]+ing", file_contents); test_find_all("^[^\n]*?Twain", file_contents); test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); } output_html_results(false, "%short_twain_search%"); if(test_long_twain) { load_file(file_contents, "mtent13.txt"); test_find_all("Twain", file_contents); test_find_all("Huck[[:alpha:]]+", file_contents); test_find_all("[[:alpha:]]+ing", file_contents); test_find_all("^[^\n]*?Twain", file_contents); test_find_all("Tom|Sawyer|Huckleberry|Finn", file_contents); time_posix = false; test_find_all("(Tom|Sawyer|Huckleberry|Finn).{0,30}river|river.{0,30}(Tom|Sawyer|Huckleberry|Finn)", file_contents); time_posix = true; } output_html_results(false, "%long_twain_search%"); output_final_html(); return 0; }
inline void test_find_all(const std::string& re, const std::string& text, bool icase = false) { test_find_all(re, text, "", icase); }
inline void test_find_all(const std::string& re, const std::string& text) { test_find_all(re, text, ""); }