void check_label_indentation(file const& f) { if(!f.is_of_phylum(e_c_or_cxx)) { return; } static boost::regex const r("\\n( *)([A-Za-z][A-Za-z0-9_]*)( *:)(?!:)"); boost::sregex_iterator i(f.data().begin(), f.data().end(), r); boost::sregex_iterator const omega; for(; i != omega; ++i) { boost::smatch const& z(*i); if ( "default" != z[2] && " " != z[1] && " " != z[1] ) { std::ostringstream oss; oss << "has misindented label '" << z[1] << z[2] << z[3] << "'."; complain(f, oss.str()); } } }
void check_reserved_names(file const& f) { // Remove this exception once this file has been reworked. if(f.phyloanalyze("^ledger_xml_io.cpp$")) { return; } if(f.is_of_phylum(e_log)) { return; } static boost::regex const r("(\\b\\w*__\\w*\\b)"); boost::sregex_iterator i(f.data().begin(), f.data().end(), r); boost::sregex_iterator const omega; for(; i != omega; ++i) { boost::smatch const& z(*i); std::string const s = z[0]; static boost::regex const not_all_underscore("[A-Za-z0-9]"); if ( !check_reserved_name_exception(s) && boost::regex_search(s, not_all_underscore) ) { std::ostringstream oss; oss << "contains reserved name '" << s << "'."; complain(f, oss.str()); } } }
void check_cxx(file const& f) { // Remove this once these files have been rewritten. if(f.phyloanalyze("^md5.[ch]pp$")) { return; } if(!f.is_of_phylum(e_c_or_cxx)) { return; } { static boost::regex const r("(\\w+)( +)([*&])(\\w+\\b)([*;]?)([^\\n]*)"); boost::sregex_iterator i(f.data().begin(), f.data().end(), r); boost::sregex_iterator const omega; for(; i != omega; ++i) { boost::smatch const& z(*i); if ( "return" != z[1] // 'return *p' && "nix" != z[4] // '*nix' && !('*' == z[3] && '*' == z[5]) // '*emphasis*' in comment && !('&' == z[3] && ';' == z[5]) // ' ' ) { std::ostringstream oss; oss << "should fuse '" << z[3] << "' with type: '" << z[0] << "'."; complain(f, oss.str()); } } } { static boost::regex const r("\\bconst +([A-Za-z][A-Za-z0-9_:]*) *[*&]"); boost::sregex_iterator i(f.data().begin(), f.data().end(), r); boost::sregex_iterator const omega; for(; i != omega; ++i) { boost::smatch const& z(*i); if ( "volatile" != z[1] // 'const volatile' ) { std::ostringstream oss; oss << "should write 'const' after the type it modifies: '" << z[0] << "'." ; complain(f, oss.str()); } } } }
void assay_non_latin(file const& f) { static boost::regex const forbidden("[\\x00-\\x08\\x0e-\\x1f\\x7f-\\x9f]"); if(boost::regex_search(f.data(), forbidden)) { throw std::runtime_error("File contains a forbidden character."); } }
void check_config_hpp(file const& f) { static std::string const loose ("# *include *[<\"]config.hpp[>\"]"); static std::string const strict ("\\n(#include \"config.hpp\")\\n"); static std::string const indent ("\\n(# include \"config.hpp\")\\n"); if ( f.is_of_phylum(e_log) || f.phyloanalyze("^test_coding_rules_test.sh$") || f.phyloanalyze("^GNUmakefile$") || f.phyloanalyze("^pchfile(_.*)?\\.hpp$") ) { return; } else if(f.is_of_phylum(e_header) && f.phyloanalyze("^pchlist(_.*)?\\.hpp$")) { require(f, loose , "must include 'config.hpp'."); require(f, indent, "lacks line '# include \"config.hpp\"'."); boost::smatch match; static boost::regex const first_include("(# *include[^\\n]*)"); boost::regex_search(f.data(), match, first_include); if("# include \"config.hpp\"" != match[1]) { complain(f, "must include 'config.hpp' first."); } } else if(f.is_of_phylum(e_header) && !f.phyloanalyze("^config(_.*)?\\.hpp$")) { require(f, loose , "must include 'config.hpp'."); require(f, strict, "lacks line '#include \"config.hpp\"'."); boost::smatch match; static boost::regex const first_include("(# *include[^\\n]*)"); boost::regex_search(f.data(), match, first_include); if("#include \"config.hpp\"" != match[1]) { complain(f, "must include 'config.hpp' first."); } } else { forbid(f, loose, "must not include 'config.hpp'."); } }
void forbid (file const& f ,std::string const& regex ,std::string const& complaint ) { if(boost::regex_search(f.data(), boost::regex(regex))) { complain(f, complaint); } }
void assay_whitespace(file const& f) { if(contains(f.data(), '\r')) { throw std::runtime_error("File contains '\\r'."); } if(contains(f.data(), '\v')) { throw std::runtime_error("File contains '\\v'."); } if ( !f.is_of_phylum(e_gpl) && !f.is_of_phylum(e_touchstone) && contains(f.data(), '\f') ) { throw std::runtime_error("File contains '\\f'."); } if ( !f.is_of_phylum(e_gpl) && !f.is_of_phylum(e_make) && !f.is_of_phylum(e_patch) && !f.is_of_phylum(e_script) && contains(f.data(), '\t') ) { throw std::runtime_error("File contains '\\t'."); } static boost::regex const postinitial_tab("[^\\n]\\t"); if(f.is_of_phylum(e_make) && boost::regex_search(f.data(), postinitial_tab)) { throw std::runtime_error("File contains postinitial '\\t'."); } if ( !f.is_of_phylum(e_gpl) && !f.is_of_phylum(e_touchstone) && contains(f.data(), "\n\n\n") ) { complain(f, "contains '\\n\\n\\n'."); } if ( !f.is_of_phylum(e_patch) && contains(f.data(), " \n") ) { complain(f, "contains ' \\n'."); } }
void taboo (file const& f ,std::string const& regex ,boost::regex::flag_type flags = boost::regex::ECMAScript ) { boost::regex::flag_type syntax = flags | boost::regex::ECMAScript; if(boost::regex_search(f.data(), boost::regex(regex, syntax))) { std::ostringstream oss; oss << "breaks taboo '" << regex << "'."; complain(f, oss.str()); } }
void check_logs(file const& f) { if(!f.is_of_phylum(e_log)) { return; } std::string entries = f.data(); entries.erase(0, entries.find("\nMAINTENANCE\n")); if(entries.empty()) { complain(f, "lacks expected 'MAINTENANCE' line."); entries = f.data(); } static boost::regex const r("\\n(?!\\|)(?! *https?:)([^\\n]{71,})(?=\\n)"); boost::sregex_iterator i(entries.begin(), entries.end(), r); boost::sregex_iterator const omega; if(omega == i) { return; } std::ostringstream oss; oss << "violates seventy-character limit:\n" << "0000000001111111111222222222233333333334444444444555555555566666666667\n" << "1234567890123456789012345678901234567890123456789012345678901234567890" ; for(; i != omega; ++i) { boost::smatch const& z(*i); oss << '\n' << z[1]; } complain(f, oss.str()); }
statistics statistics::analyze_file(file const& f) { statistics z; if ( f.is_of_phylum(e_binary) || f.is_of_phylum(e_expungible) || f.is_of_phylum(e_gpl) || f.is_of_phylum(e_log) || f.is_of_phylum(e_md5) || f.is_of_phylum(e_patch) || f.is_of_phylum(e_touchstone) || f.is_of_phylum(e_xml_input) || f.phyloanalyze("^INSTALL") || f.phyloanalyze("^README") ) { return z; } ++z.files_; std::string const& s = f.data(); for(std::string::size_type i = 1; i < s.size(); ++i) { if('\n' == s[i]) { ++z.lines_; } if('?' == s[i - 1] && '?' == s[i]) { ++z.defects_; } } return z; }
void enforce_taboos(file const& f) { if ( f.phyloanalyze("test_coding_rules") || f.phyloanalyze("^md5sums$") ) { return; } // ASCII copyright symbol requires upper-case 'C'. taboo(f, "\\(c\\) *[0-9]"); // Former addresses of the Free Software Foundation. taboo(f, "Cambridge"); taboo(f, "Temple"); // Patented. taboo(f, "\\.gif", boost::regex::icase); // Obsolete email address. taboo(f, "*****@*****.**"); // Obscured email address. taboo(f, "address@hidden"); // Certain proprietary libraries. taboo(f, "\\bowl\\b", boost::regex::icase); taboo(f, "vtss", boost::regex::icase); // Suspiciously specific to msw. taboo(f, "Microsoft"); taboo(f, "Visual [A-Z]"); taboo(f, "\\bWIN\\b"); taboo(f, "\\bExcel\\b"); // Insinuated by certain msw tools. taboo(f, "Microsoft Word"); taboo(f, "Stylus Studio"); taboo(f, "Sonic Software"); // This IANA-approved charset is still useful for html. if(!f.is_of_phylum(e_html)) { taboo(f, "windows-1252"); } taboo(f, "Arial"); if ( !f.is_of_phylum(e_log) && !f.is_of_phylum(e_make) && !f.is_of_phylum(e_synopsis) ) { taboo(f, "\\bexe\\b", boost::regex::icase); } if ( !f.is_of_phylum(e_make) && !f.is_of_phylum(e_patch) && !f.phyloanalyze("config.hpp") && !f.phyloanalyze("configure.ac") // GNU libtool uses 'win32-dll'. ) { taboo(f, "WIN32", boost::regex::icase); } if ( !boost::regex_search(f.data(), boost::regex(my_taboo_indulgence())) && !contains(f.data(), "Automatically generated from custom input.") ) { // Unspeakable private taboos. std::map<std::string, bool> const z = my_taboos(); typedef std::map<std::string, bool>::const_iterator mci; for(mci i = z.begin(); i != z.end(); ++i) { boost::regex::flag_type syntax = i->second ? boost::regex::ECMAScript | boost::regex::icase : boost::regex::ECMAScript ; taboo(f, i->first, syntax); } } }
void check_defect_markers(file const& f) { if(f.phyloanalyze("^test_coding_rules_test.sh$")) { return; } { static boost::regex const r("(\\b\\w+\\b\\W*)\\?\\?(.)"); boost::sregex_iterator i(f.data().begin(), f.data().end(), r); boost::sregex_iterator const omega; for(; i != omega; ++i) { boost::smatch const& z(*i); bool const error_preceding = "TODO " != z[1]; bool const error_following = " " != z[2] && "\n" != z[2]; if(error_preceding || error_following) { std::ostringstream oss; oss << "has irregular defect marker '" << z[0] << "'."; complain(f, oss.str()); } } } { static boost::regex const r("(\\b\\w+\\b\\W?)!!(.)"); boost::sregex_iterator i(f.data().begin(), f.data().end(), r); boost::sregex_iterator const omega; for(; i != omega; ++i) { boost::smatch const& z(*i); bool const error_preceding = true && "APACHE " != z[1] && "BOOST " != z[1] && "COMPILER " != z[1] && "CYGWIN " != z[1] && "DATABASE " != z[1] && "ET " != z[1] && "EVGENIY " != z[1] && "IHS " != z[1] && "INELEGANT " != z[1] && "INPUT " != z[1] && "PORT " != z[1] && "SOMEDAY " != z[1] && "TAXATION " != z[1] && "THIRD_PARTY " != z[1] && "TRICKY " != z[1] && "USER " != z[1] && "WX " != z[1] && "XMLWRAPP " != z[1] ; bool const error_following = " " != z[2] && "\n" != z[2]; if(error_preceding || error_following) { std::ostringstream oss; oss << "has irregular defect marker '" << z[0] << "'."; complain(f, oss.str()); } } } }