deprecated_macro_check::deprecated_macro_check() : m_files_with_errors(0) , m_from_boost_root( fs::exists(search_root_path() / "boost") && fs::exists(search_root_path() / "libs")) { register_signature( ".c" ); register_signature( ".cpp" ); register_signature( ".cxx" ); register_signature( ".h" ); register_signature( ".hpp" ); register_signature( ".hxx" ); register_signature( ".ipp" ); }
void link_check::inspect( const string & /*library_name*/, const path & full_path ) { // keep track of paths already encountered to reduce disk activity if ( !fs::is_directory( full_path ) ) m_paths[ relative_to( full_path, search_root_path() ) ] |= m_present; }
// may return an empty string [gps] string impute_library( const path & full_dir_path ) { path relative( relative_to( full_dir_path, search_root_path() ) ); if ( relative.empty() ) return "boost-root"; string first( (*relative.begin()).string() ); string second = // borland 5.61 requires op= ++relative.begin() == relative.end() ? string() : (*++relative.begin()).string(); if ( first == "boost" ) return second; return (( first == "libs" || first == "tools" ) && !second.empty()) ? second : first; }
void inspector::error( const string & library_name, const path & full_path, const string & msg, std::size_t line_number) { ++error_count; error_msg err_msg; err_msg.library = library_name; err_msg.rel_path = relative_to( full_path, search_root_path() ); err_msg.msg = msg; err_msg.line_number = line_number; msgs.push_back( err_msg ); // std::cout << library_name << ": " // << full_path.string() << ": " // << msg << '\n'; }
void apple_macro_check::inspect( const string & library_name, const path & full_path, // example: c:/foo/boost/filesystem/path.hpp const string & contents ) // contents of file to be inspected { if (contents.find( "hpxinspect:" "noapple_macros" ) != string::npos) return; // Only check files in the boost directory, as we can avoid including the // apple test headers elsewhere. path relative( relative_to( full_path, search_root_path() ) ); if ( relative.empty() || *relative.begin() != "boost") return; boost::sregex_iterator cur(contents.begin(), contents.end(), apple_macro_regex), end; long errors = 0; for( ; cur != end; ++cur /*, ++m_files_with_errors*/ ) { if(!(*cur)[3].matched) { string::const_iterator it = contents.begin(); string::const_iterator match_it = (*cur)[0].first; string::const_iterator line_start = it; string::size_type line_number = 1; for ( ; it != match_it; ++it) { if (string::traits_type::eq(*it, '\n')) { ++line_number; line_start = it + 1; // could be end() } } ++errors; error( library_name, full_path, "Apple macro clash: " + std::string((*cur)[0].first, (*cur)[0].second-1), line_number ); } } if(errors > 0) { ++m_files_with_errors; } }
void link_check::close() { for ( m_path_map::const_iterator itr = m_paths.begin(); itr != m_paths.end(); ++itr ) { // std::clog << itr->first << " " << itr->second << "\n"; if ( (itr->second & m_linked_to) != m_linked_to && (itr->second & m_nounlinked_errors) != m_nounlinked_errors && (itr->first.rfind( ".html" ) == itr->first.size()-5 || itr->first.rfind( ".htm" ) == itr->first.size()-4 || itr->first.rfind( ".css" ) == itr->first.size()-4) // because they may be redirectors, it is OK if these are unlinked: && itr->first.rfind( "index.html" ) == string::npos && itr->first.rfind( "index.htm" ) == string::npos ) { ++m_unlinked_errors; path full_path( search_root_path() / path(itr->first) ); error( impute_library( full_path ), full_path, "Unlinked file" ); } } }
void link_check::do_url( const string & url, const string & library_name, const path & source_path, bool no_link_errors, bool allow_external_content, std::string::const_iterator contents_begin, std::string::const_iterator url_start ) // precondition: source_path.is_complete() { if(!no_link_errors && url.empty()) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Empty URL.", ln ); return; } // Decode ampersand encoded characters. string decoded_url = is_css(source_path) ? url : decode_ampersands(url); if(decoded_url.empty()) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (invalid ampersand encodings): " + url, ln ); } return; } boost::smatch m; if(!boost::regex_match(decoded_url, m, url_decompose_regex)) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL: " + decoded_url, ln ); } return; } bool scheme_matched = m[2].matched, authority_matched = m[4].matched, //query_matched = m[7].matched, fragment_matched = m[9].matched; std::string scheme(m[2]), authority(m[4]), url_path(m[5]), //query(m[7]), fragment(m[9]); // Check for external content if(!allow_external_content && (authority_matched || scheme_matched)) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "External content: " + decoded_url, ln ); } } // Protocol checks if(scheme_matched) { if(scheme == "http" || scheme == "https") { // All http links should have a hostname. Generally if they don't // it's by mistake. If they shouldn't, then a protocol isn't // required. if(!authority_matched) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "No hostname: " + decoded_url, ln ); } } return; } else if(scheme == "file") { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (hardwired file): " + decoded_url, ln ); } } else if(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript") { if ( !no_link_errors && is_css(source_path) ) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid protocol for css: " + decoded_url, ln ); } } else { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Unknown protocol: '" + scheme + "' in url: " + decoded_url, ln ); } } return; } // Hostname without protocol. if(authority_matched) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (hostname without protocol): " + decoded_url, ln ); } } // Check the fragment identifier if ( fragment_matched ) { if ( is_css(source_path) ) { if ( !no_link_errors ) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Fragment link in CSS: " + decoded_url, ln ); } } else { if ( !no_link_errors && fragment.find( '#' ) != string::npos ) { ++m_bookmark_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid bookmark: " + decoded_url, ln ); } else if ( !no_link_errors && url_path.empty() && !fragment.empty() // w3.org recommends case-sensitive broken bookmark checking // since some browsers do a case-sensitive match. && bookmarks.find(decode_percents(fragment)) == bookmarks.end() ) { ++m_broken_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Unknown bookmark: " + decoded_url, ln ); } } // No more to do if it's just a fragment identifier if(url_path.empty()) return; } // Detect characters banned by RFC2396: if ( !no_link_errors && decoded_url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos ) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid character in URL: " + decoded_url, ln ); } // Check that we actually have a path. if(url_path.empty()) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (empty path in relative url): " + decoded_url, ln ); } } // Decode percent encoded characters. string decoded_path = decode_percents(url_path); if(decoded_path.empty()) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (invalid character encodings): " + decoded_url, ln ); } return; } // strip url of references to current dir if ( decoded_path[0]=='.' && decoded_path[1]=='/' ) decoded_path.erase( 0, 2 ); // url is relative source_path.branch() // convert to target_path, which is_complete() path target_path; try { target_path = source_path.branch_path() /= path( decoded_path ); } catch ( const fs::filesystem_error & ) { if(!no_link_errors) { int ln = std::count( contents_begin, url_start, '\n' ) + 1; ++m_invalid_errors; error( library_name, source_path, "Invalid URL (error resolving path): " + decoded_url, ln ); } return; } // create a m_paths entry if necessary std::pair< const string, int > entry( relative_to( target_path, search_root_path() ), 0 ); m_path_map::iterator itr( m_paths.find( entry.first ) ); if ( itr == m_paths.end() ) { if ( fs::exists( target_path ) ) entry.second = m_present; itr = m_paths.insert( entry ).first; } // itr now points to the m_paths entry itr->second |= m_linked_to; // if target isn't present, the link is broken if ( !no_link_errors && (itr->second & m_present) == 0 ) { ++m_broken_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Broken link: " + decoded_url, ln ); } }
void link_check::inspect( const string & library_name, const path & full_path, // example: c:/foo/boost/filesystem/path.hpp const string & contents ) // contents of file to be inspected { if (contents.find( "boostinspect:" "nounlinked" ) != string::npos) m_paths[ relative_to( full_path, search_root_path() ) ] |= m_nounlinked_errors; bool no_link_errors = (contents.find( "boostinspect:" "nolink" ) != string::npos); // build bookmarks databases bookmarks.clear(); bookmarks_lowercase.clear(); string::const_iterator a_start( contents.begin() ); string::const_iterator a_end( contents.end() ); boost::match_results< string::const_iterator > a_what; boost::match_flag_type a_flags = boost::match_default; if(!is_css(full_path)) { string previous_id; while( boost::regex_search( a_start, a_end, a_what, html_bookmark_regex, a_flags) ) { // a_what[0] contains the whole string iterators. // a_what[1] contains the tag iterators. // a_what[2] contains the attribute name. // a_what[4] contains the bookmark iterators. if (a_what[4].matched) { string tag( a_what[1].first, a_what[1].second ); boost::algorithm::to_lower(tag); string attribute( a_what[2].first, a_what[2].second ); boost::algorithm::to_lower(attribute); string bookmark( a_what[4].first, a_what[4].second ); bool name_following_id = ( attribute == "name" && previous_id == bookmark ); if ( tag != "meta" && attribute == "id" ) previous_id = bookmark; else previous_id.clear(); if ( tag != "meta" && !name_following_id ) { bookmarks.insert( bookmark ); // std::cout << "******************* " << bookmark << '\n'; // w3.org recommends case-insensitive checking for duplicate bookmarks // since some browsers do a case-insensitive match. string bookmark_lowercase( bookmark ); boost::algorithm::to_lower(bookmark_lowercase); std::pair<bookmark_set::iterator, bool> result = bookmarks_lowercase.insert( bookmark_lowercase ); if (!result.second) { ++m_duplicate_bookmark_errors; int ln = std::count( contents.begin(), a_what[3].first, '\n' ) + 1; error( library_name, full_path, "Duplicate bookmark: " + bookmark, ln ); } } } a_start = a_what[0].second; // update search position a_flags |= boost::match_prev_avail; // update flags a_flags |= boost::match_not_bob; } } // process urls string::const_iterator start( contents.begin() ); string::const_iterator end( contents.end() ); boost::match_results< string::const_iterator > what; boost::match_flag_type flags = boost::match_default; if(!is_css(full_path)) { while( boost::regex_search( start, end, what, html_url_regex, flags) ) { // what[0] contains the whole string iterators. // what[1] contains the element type iterators. // what[3] contains the URL iterators. if(what[3].matched) { string type( what[1].first, what[1].second ); boost::algorithm::to_lower(type); // TODO: Complain if 'link' tags use external stylesheets. do_url( string( what[3].first, what[3].second ), library_name, full_path, no_link_errors, type == "a" || type == "link", contents.begin(), what[3].first ); } start = what[0].second; // update search position flags |= boost::match_prev_avail; // update flags flags |= boost::match_not_bob; } } while( boost::regex_search( start, end, what, css_url_regex, flags) ) { // what[0] contains the whole string iterators. // what[2] contains the URL iterators. if(what[2].matched) { do_url( string( what[2].first, what[2].second ), library_name, full_path, no_link_errors, false, contents.begin(), what[3].first ); } start = what[0].second; // update search position flags |= boost::match_prev_avail; // update flags flags |= boost::match_not_bob; } }