/// Checks whether the path is a parent of another path. /// /// A path is considered to be a parent of itself. /// /// \return True if this path is a parent of p. bool fs::path::is_parent_of(path p) const { do { if ((*this) == p) return true; p = p.branch_path(); } while (p != fs::path(".") && p != fs::path("/")); return false; }
void drillDown( path root, bool use_db, bool use_coll, bool top_level=false ) { log(2) << "drillDown: " << root.string() << endl; // skip hidden files and directories if (root.leaf()[0] == '.' && root.leaf() != ".") return; if ( is_directory( root ) ) { directory_iterator end; directory_iterator i(root); path indexes; while ( i != end ) { path p = *i; i++; if (use_db) { if (is_directory(p)) { cerr << "ERROR: root directory must be a dump of a single database" << endl; cerr << " when specifying a db name with --db" << endl; printHelp(cout); return; } } if (use_coll) { if (is_directory(p) || i != end) { cerr << "ERROR: root directory must be a dump of a single collection" << endl; cerr << " when specifying a collection name with --collection" << endl; printHelp(cout); return; } } // don't insert oplog if (top_level && !use_db && p.leaf() == "oplog.bson") continue; if ( p.leaf() == "system.indexes.bson" ) indexes = p; else drillDown(p, use_db, use_coll); } if (!indexes.empty()) drillDown(indexes, use_db, use_coll); return; } if ( ! ( endsWith( root.string().c_str() , ".bson" ) || endsWith( root.string().c_str() , ".bin" ) ) ) { cerr << "don't know what to do with file [" << root.string() << "]" << endl; return; } log() << root.string() << endl; if ( root.leaf() == "system.profile.bson" ) { log() << "\t skipping" << endl; return; } string ns; if (use_db) { ns += _db; } else { string dir = root.branch_path().string(); if ( dir.find( "/" ) == string::npos ) ns += dir; else ns += dir.substr( dir.find_last_of( "/" ) + 1 ); if ( ns.size() == 0 ) ns = "test"; } assert( ns.size() ); if (use_coll) { ns += "." + _coll; } else { string l = root.leaf(); l = l.substr( 0 , l.find_last_of( "." ) ); ns += "." + l; } out() << "\t going into namespace [" << ns << "]" << endl; if ( _drop ) { if (root.leaf() != "system.users.bson" ) { out() << "\t dropping" << endl; conn().dropCollection( ns ); } else { // Create map of the users currently in the DB BSONObj fields = BSON("user" << 1); scoped_ptr<DBClientCursor> cursor(conn().query(ns, Query(), 0, 0, &fields)); while (cursor->more()) { BSONObj user = cursor->next(); _users.insert(user["user"].String()); } } } _curns = ns.c_str(); _curdb = NamespaceString(_curns).db; processFile( root ); if (_drop && root.leaf() == "system.users.bson") { // Delete any users that used to exist but weren't in the dump file for (set<string>::iterator it = _users.begin(); it != _users.end(); ++it) { BSONObj userMatch = BSON("user" << *it); conn().remove(ns, Query(userMatch)); } _users.clear(); } }
void drillDown( path root ) { if ( is_directory( root ) ) { directory_iterator end; directory_iterator i(root); while ( i != end ) { path p = *i; drillDown( p ); i++; } return; } if ( ! ( endsWith( root.string().c_str() , ".bson" ) || endsWith( root.string().c_str() , ".bin" ) ) ) { cerr << "don't know what to do with [" << root.string() << "]" << endl; return; } out() << root.string() << endl; string ns; { string dir = root.branch_path().string(); if ( dir.find( "/" ) == string::npos ) ns += dir; else ns += dir.substr( dir.find_last_of( "/" ) + 1 ); } { string l = root.leaf(); l = l.substr( 0 , l.find_last_of( "." ) ); ns += "." + l; } if ( boost::filesystem::file_size( root ) == 0 ) { out() << "file " + root.native_file_string() + " empty, aborting" << endl; return; } out() << "\t going into namespace [" << ns << "]" << endl; MemoryMappedFile mmf; assert( mmf.map( root.string().c_str() ) ); char * data = (char*)mmf.viewOfs(); int read = 0; int num = 0; while ( read < mmf.length() ) { BSONObj o( data ); conn().insert( ns.c_str() , o ); read += o.objsize(); data += o.objsize(); if ( ! ( ++num % 1000 ) ) out() << "read " << read << "/" << mmf.length() << " bytes so far. " << num << " objects" << endl; } out() << "\t " << num << " objects" << endl; }
void drillDown( path root, bool use_db = false, bool use_coll = false ) { log(2) << "drillDown: " << root.string() << endl; if ( is_directory( root ) ) { directory_iterator end; directory_iterator i(root); path indexes; while ( i != end ) { path p = *i; i++; if (use_db) { if (is_directory(p)) { cerr << "ERROR: root directory must be a dump of a single database" << endl; cerr << " when specifying a db name with --db" << endl; printHelp(cout); return; } } if (use_coll) { if (is_directory(p) || i != end) { cerr << "ERROR: root directory must be a dump of a single collection" << endl; cerr << " when specifying a collection name with --collection" << endl; printHelp(cout); return; } } if ( _indexesLast && p.leaf() == "system.indexes.bson" ) indexes = p; else drillDown(p, use_db, use_coll); } if (!indexes.empty()) drillDown(indexes, use_db, use_coll); return; } if ( ! ( endsWith( root.string().c_str() , ".bson" ) || endsWith( root.string().c_str() , ".bin" ) ) ) { cerr << "don't know what to do with [" << root.string() << "]" << endl; return; } log() << root.string() << endl; if ( root.leaf() == "system.profile.bson" ){ log() << "\t skipping" << endl; return; } string ns; if (use_db) { ns += _db; } else { string dir = root.branch_path().string(); if ( dir.find( "/" ) == string::npos ) ns += dir; else ns += dir.substr( dir.find_last_of( "/" ) + 1 ); if ( ns.size() == 0 ) ns = "test"; } assert( ns.size() ); if (use_coll) { ns += "." + _coll; } else { string l = root.leaf(); l = l.substr( 0 , l.find_last_of( "." ) ); ns += "." + l; } out() << "\t going into namespace [" << ns << "]" << endl; if ( _drop ){ out() << "\t dropping" << endl; conn().dropCollection( ns ); } _curns = ns.c_str(); processFile( root ); }
void drillDown( path root, bool use_db = false ) { log(2) << "drillDown: " << root.string() << endl; if ( is_directory( root ) ) { directory_iterator end; directory_iterator i(root); while ( i != end ) { path p = *i; if (use_db) { if (is_directory(p) || !(endsWith(p.string().c_str(), ".bson") || endsWith(p.string().c_str(), ".bin" ))) { cerr << "ERROR: root directory must be a dump of a single database" << endl; cerr << " when specifying a db name with --db" << endl; printHelp(cout); return; } } drillDown(p, use_db); i++; } return; } if ( ! ( endsWith( root.string().c_str() , ".bson" ) || endsWith( root.string().c_str() , ".bin" ) ) ) { cerr << "don't know what to do with [" << root.string() << "]" << endl; return; } out() << root.string() << endl; string ns; if (use_db) { ns += _db; } else { string dir = root.branch_path().string(); if ( dir.find( "/" ) == string::npos ) ns += dir; else ns += dir.substr( dir.find_last_of( "/" ) + 1 ); } { string l = root.leaf(); l = l.substr( 0 , l.find_last_of( "." ) ); ns += "." + l; } long long fileLength = file_size( root ); if ( fileLength == 0 ) { out() << "file " + root.native_file_string() + " empty, skipping" << endl; return; } out() << "\t going into namespace [" << ns << "]" << endl; string fileString = root.string(); ifstream file( fileString.c_str() , ios_base::in | ios_base::binary); if ( ! file.is_open() ){ log() << "error opening file: " << fileString << endl; return; } log(1) << "\t file size: " << fileLength << endl; long long read = 0; long long num = 0; int msgDelay = (int)(1000 * ( 1 + ( fileLength / ( 1024.0 * 1024 * 400 ) ) ) ); log(1) << "\t msg delay: " << msgDelay << endl; const int BUF_SIZE = 1024 * 1024 * 5; char * buf = (char*)malloc( BUF_SIZE ); while ( read < fileLength ) { file.read( buf , 4 ); int size = ((int*)buf)[0]; assert( size < BUF_SIZE ); file.read( buf + 4 , size - 4 ); BSONObj o( buf ); conn().insert( ns.c_str() , o ); read += o.objsize(); num++; if ( ( logLevel > 0 && num < 10 ) || ! ( num % msgDelay ) ) out() << "read " << read << "/" << fileLength << " bytes so far. (" << (int)( (read * 100) / fileLength) << "%) " << num << " objects" << endl; } free( buf ); out() << "\t " << num << " objects" << endl; }
void link_check::do_url( const string & url, const string & library_name, const path & source_path, bool no_link_errors, bool allow_external_content, std::string::const_iterator contents_begin, std::string::const_iterator url_start ) // precondition: source_path.is_complete() { if(!no_link_errors && url.empty()) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Empty URL.", ln ); return; } // Decode ampersand encoded characters. string decoded_url = is_css(source_path) ? url : decode_ampersands(url); if(decoded_url.empty()) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (invalid ampersand encodings): " + url, ln ); } return; } boost::smatch m; if(!boost::regex_match(decoded_url, m, url_decompose_regex)) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL: " + decoded_url, ln ); } return; } bool scheme_matched = m[2].matched, authority_matched = m[4].matched, //query_matched = m[7].matched, fragment_matched = m[9].matched; std::string scheme(m[2]), authority(m[4]), url_path(m[5]), //query(m[7]), fragment(m[9]); // Check for external content if(!allow_external_content && (authority_matched || scheme_matched)) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "External content: " + decoded_url, ln ); } } // Protocol checks if(scheme_matched) { if(scheme == "http" || scheme == "https") { // All http links should have a hostname. Generally if they don't // it's by mistake. If they shouldn't, then a protocol isn't // required. if(!authority_matched) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "No hostname: " + decoded_url, ln ); } } return; } else if(scheme == "file") { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (hardwired file): " + decoded_url, ln ); } } else if(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript") { if ( !no_link_errors && is_css(source_path) ) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid protocol for css: " + decoded_url, ln ); } } else { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Unknown protocol: '" + scheme + "' in url: " + decoded_url, ln ); } } return; } // Hostname without protocol. if(authority_matched) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (hostname without protocol): " + decoded_url, ln ); } } // Check the fragment identifier if ( fragment_matched ) { if ( is_css(source_path) ) { if ( !no_link_errors ) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Fragment link in CSS: " + decoded_url, ln ); } } else { if ( !no_link_errors && fragment.find( '#' ) != string::npos ) { ++m_bookmark_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid bookmark: " + decoded_url, ln ); } else if ( !no_link_errors && url_path.empty() && !fragment.empty() // w3.org recommends case-sensitive broken bookmark checking // since some browsers do a case-sensitive match. && bookmarks.find(decode_percents(fragment)) == bookmarks.end() ) { ++m_broken_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Unknown bookmark: " + decoded_url, ln ); } } // No more to do if it's just a fragment identifier if(url_path.empty()) return; } // Detect characters banned by RFC2396: if ( !no_link_errors && decoded_url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos ) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid character in URL: " + decoded_url, ln ); } // Check that we actually have a path. if(url_path.empty()) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (empty path in relative url): " + decoded_url, ln ); } } // Decode percent encoded characters. string decoded_path = decode_percents(url_path); if(decoded_path.empty()) { if(!no_link_errors) { ++m_invalid_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Invalid URL (invalid character encodings): " + decoded_url, ln ); } return; } // strip url of references to current dir if ( decoded_path[0]=='.' && decoded_path[1]=='/' ) decoded_path.erase( 0, 2 ); // url is relative source_path.branch() // convert to target_path, which is_complete() path target_path; try { target_path = source_path.branch_path() /= path( decoded_path ); } catch ( const fs::filesystem_error & ) { if(!no_link_errors) { int ln = std::count( contents_begin, url_start, '\n' ) + 1; ++m_invalid_errors; error( library_name, source_path, "Invalid URL (error resolving path): " + decoded_url, ln ); } return; } // create a m_paths entry if necessary std::pair< const string, int > entry( relative_to( target_path, search_root_path() ), 0 ); m_path_map::iterator itr( m_paths.find( entry.first ) ); if ( itr == m_paths.end() ) { if ( fs::exists( target_path ) ) entry.second = m_present; itr = m_paths.insert( entry ).first; } // itr now points to the m_paths entry itr->second |= m_linked_to; // if target isn't present, the link is broken if ( !no_link_errors && (itr->second & m_present) == 0 ) { ++m_broken_errors; int ln = std::count( contents_begin, url_start, '\n' ) + 1; error( library_name, source_path, "Broken link: " + decoded_url, ln ); } }