示例#1
0
文件: path.cpp 项目: Bhudipta/minix
/// Checks whether the path is a parent of another path.
///
/// A path is considered to be a parent of itself.
///
/// \return True if this path is a parent of p.
bool
fs::path::is_parent_of(path p) const
{
    do {
        if ((*this) == p)
            return true;
        p = p.branch_path();
    } while (p != fs::path(".") && p != fs::path("/"));
    return false;
}
示例#2
0
    void drillDown( path root, bool use_db, bool use_coll, bool top_level=false ) {
        log(2) << "drillDown: " << root.string() << endl;

        // skip hidden files and directories
        if (root.leaf()[0] == '.' && root.leaf() != ".")
            return;

        if ( is_directory( root ) ) {
            directory_iterator end;
            directory_iterator i(root);
            path indexes;
            while ( i != end ) {
                path p = *i;
                i++;

                if (use_db) {
                    if (is_directory(p)) {
                        cerr << "ERROR: root directory must be a dump of a single database" << endl;
                        cerr << "       when specifying a db name with --db" << endl;
                        printHelp(cout);
                        return;
                    }
                }

                if (use_coll) {
                    if (is_directory(p) || i != end) {
                        cerr << "ERROR: root directory must be a dump of a single collection" << endl;
                        cerr << "       when specifying a collection name with --collection" << endl;
                        printHelp(cout);
                        return;
                    }
                }

                // don't insert oplog
                if (top_level && !use_db && p.leaf() == "oplog.bson")
                    continue;

                if ( p.leaf() == "system.indexes.bson" )
                    indexes = p;
                else
                    drillDown(p, use_db, use_coll);
            }

            if (!indexes.empty())
                drillDown(indexes, use_db, use_coll);

            return;
        }

        if ( ! ( endsWith( root.string().c_str() , ".bson" ) ||
                 endsWith( root.string().c_str() , ".bin" ) ) ) {
            cerr << "don't know what to do with file [" << root.string() << "]" << endl;
            return;
        }

        log() << root.string() << endl;

        if ( root.leaf() == "system.profile.bson" ) {
            log() << "\t skipping" << endl;
            return;
        }

        string ns;
        if (use_db) {
            ns += _db;
        }
        else {
            string dir = root.branch_path().string();
            if ( dir.find( "/" ) == string::npos )
                ns += dir;
            else
                ns += dir.substr( dir.find_last_of( "/" ) + 1 );

            if ( ns.size() == 0 )
                ns = "test";
        }

        assert( ns.size() );

        if (use_coll) {
            ns += "." + _coll;
        }
        else {
            string l = root.leaf();
            l = l.substr( 0 , l.find_last_of( "." ) );
            ns += "." + l;
        }

        out() << "\t going into namespace [" << ns << "]" << endl;

        if ( _drop ) {
            if (root.leaf() != "system.users.bson" ) {
                out() << "\t dropping" << endl;
                conn().dropCollection( ns );
            } else {
                // Create map of the users currently in the DB
                BSONObj fields = BSON("user" << 1);
                scoped_ptr<DBClientCursor> cursor(conn().query(ns, Query(), 0, 0, &fields));
                while (cursor->more()) {
                    BSONObj user = cursor->next();
                    _users.insert(user["user"].String());
                }
            }
        }

        _curns = ns.c_str();
        _curdb = NamespaceString(_curns).db;
        processFile( root );
        if (_drop && root.leaf() == "system.users.bson") {
            // Delete any users that used to exist but weren't in the dump file
            for (set<string>::iterator it = _users.begin(); it != _users.end(); ++it) {
                BSONObj userMatch = BSON("user" << *it);
                conn().remove(ns, Query(userMatch));
            }
            _users.clear();
        }
    }
示例#3
0
    void drillDown( path root ) {

        if ( is_directory( root ) ) {
            directory_iterator end;
            directory_iterator i(root);
            while ( i != end ) {
                path p = *i;
                drillDown( p );
                i++;
            }
            return;
        }
        
        if ( ! ( endsWith( root.string().c_str() , ".bson" ) ||
                 endsWith( root.string().c_str() , ".bin" ) ) ) {
            cerr << "don't know what to do with [" << root.string() << "]" << endl;
            return;
        }
        
        out() << root.string() << endl;
        
        string ns;
        {
            string dir = root.branch_path().string();
            if ( dir.find( "/" ) == string::npos )
                ns += dir;
            else
                ns += dir.substr( dir.find_last_of( "/" ) + 1 );
        }
        
        {
            string l = root.leaf();
            l = l.substr( 0 , l.find_last_of( "." ) );
            ns += "." + l;
        }
        
        if ( boost::filesystem::file_size( root ) == 0 ) {
            out() << "file " + root.native_file_string() + " empty, aborting" << endl;
            return;
        }

        out() << "\t going into namespace [" << ns << "]" << endl;
        
        MemoryMappedFile mmf;
        assert( mmf.map( root.string().c_str() ) );
        
        char * data = (char*)mmf.viewOfs();
        int read = 0;
        
        int num = 0;
        
        while ( read < mmf.length() ) {
            BSONObj o( data );
            
            conn().insert( ns.c_str() , o );
            
            read += o.objsize();
            data += o.objsize();

            if ( ! ( ++num % 1000 ) )
                out() << "read " << read << "/" << mmf.length() << " bytes so far. " << num << " objects" << endl;
        }
        
        out() << "\t "  << num << " objects" << endl;
    }
示例#4
0
    void drillDown( path root, bool use_db = false, bool use_coll = false ) {
        log(2) << "drillDown: " << root.string() << endl;

        if ( is_directory( root ) ) {
            directory_iterator end;
            directory_iterator i(root);
            path indexes;
            while ( i != end ) {
                path p = *i;
                i++;

                if (use_db) {
                    if (is_directory(p)) {
                        cerr << "ERROR: root directory must be a dump of a single database" << endl;
                        cerr << "       when specifying a db name with --db" << endl;
                        printHelp(cout);
                        return;
                    }
                }

                if (use_coll) {
                    if (is_directory(p) || i != end) {
                        cerr << "ERROR: root directory must be a dump of a single collection" << endl;
                        cerr << "       when specifying a collection name with --collection" << endl;
                        printHelp(cout);
                        return;
                    }
                }

                if ( _indexesLast && p.leaf() == "system.indexes.bson" )
                    indexes = p;
                else
                    drillDown(p, use_db, use_coll);
            }

            if (!indexes.empty())
                drillDown(indexes, use_db, use_coll);

            return;
        }

        if ( ! ( endsWith( root.string().c_str() , ".bson" ) ||
                 endsWith( root.string().c_str() , ".bin" ) ) ) {
            cerr << "don't know what to do with [" << root.string() << "]" << endl;
            return;
        }

        log() << root.string() << endl;

        if ( root.leaf() == "system.profile.bson" ){
            log() << "\t skipping" << endl;
            return;
        }

        string ns;
        if (use_db) {
            ns += _db;
        } 
        else {
            string dir = root.branch_path().string();
            if ( dir.find( "/" ) == string::npos )
                ns += dir;
            else
                ns += dir.substr( dir.find_last_of( "/" ) + 1 );
            
            if ( ns.size() == 0 )
                ns = "test";
        }
        
        assert( ns.size() );

        if (use_coll) {
            ns += "." + _coll;
        } else {
            string l = root.leaf();
            l = l.substr( 0 , l.find_last_of( "." ) );
            ns += "." + l;
        }

        out() << "\t going into namespace [" << ns << "]" << endl;

        if ( _drop ){
            out() << "\t dropping" << endl;
            conn().dropCollection( ns );
        }
        
        _curns = ns.c_str();
        processFile( root );
    }
示例#5
0
    void drillDown( path root, bool use_db = false ) {
        log(2) << "drillDown: " << root.string() << endl;

        if ( is_directory( root ) ) {
            directory_iterator end;
            directory_iterator i(root);
            while ( i != end ) {
                path p = *i;

                if (use_db) {
                    if (is_directory(p) ||
                        !(endsWith(p.string().c_str(), ".bson") ||
                          endsWith(p.string().c_str(), ".bin" ))) {
                        cerr << "ERROR: root directory must be a dump of a single database" << endl;
                        cerr << "       when specifying a db name with --db" << endl;
                        printHelp(cout);
                        return;
                    }
                }

                drillDown(p, use_db);
                i++;
            }
            return;
        }

        if ( ! ( endsWith( root.string().c_str() , ".bson" ) ||
                 endsWith( root.string().c_str() , ".bin" ) ) ) {
            cerr << "don't know what to do with [" << root.string() << "]" << endl;
            return;
        }

        out() << root.string() << endl;

        string ns;
        if (use_db) {
            ns += _db;
        } else {
            string dir = root.branch_path().string();
            if ( dir.find( "/" ) == string::npos )
                ns += dir;
            else
                ns += dir.substr( dir.find_last_of( "/" ) + 1 );
        }

        {
            string l = root.leaf();
            l = l.substr( 0 , l.find_last_of( "." ) );
            ns += "." + l;
        }

        long long fileLength = file_size( root );

        if ( fileLength == 0 ) {
            out() << "file " + root.native_file_string() + " empty, skipping" << endl;
            return;
        }

        out() << "\t going into namespace [" << ns << "]" << endl;

        string fileString = root.string();
        ifstream file( fileString.c_str() , ios_base::in | ios_base::binary);
        if ( ! file.is_open() ){
            log() << "error opening file: " << fileString << endl;
            return;
        }

        log(1) << "\t file size: " << fileLength << endl;

        long long read = 0;
        long long num = 0;

        int msgDelay = (int)(1000 * ( 1 + ( fileLength / ( 1024.0 * 1024 * 400 ) ) ) );
        log(1) << "\t msg delay: " << msgDelay << endl;

        const int BUF_SIZE = 1024 * 1024 * 5;
        char * buf = (char*)malloc( BUF_SIZE );

        while ( read < fileLength ) {
            file.read( buf , 4 );
            int size = ((int*)buf)[0];
            assert( size < BUF_SIZE );

            file.read( buf + 4 , size - 4 );

            BSONObj o( buf );
            conn().insert( ns.c_str() , o );

            read += o.objsize();
            num++;

            if ( ( logLevel > 0 && num < 10 ) || ! ( num % msgDelay ) )
                out() << "read " << read << "/" << fileLength << " bytes so far. (" << (int)( (read * 100) / fileLength) << "%) " << num << " objects" << endl;
        }

        free( buf );
        out() << "\t "  << num << " objects" << endl;
    }
示例#6
0
    void link_check::do_url( const string & url, const string & library_name,
      const path & source_path, bool no_link_errors, bool allow_external_content,
        std::string::const_iterator contents_begin, std::string::const_iterator url_start )
        // precondition: source_path.is_complete()
    {
      if(!no_link_errors && url.empty()) {
        ++m_invalid_errors;
        int ln = std::count( contents_begin, url_start, '\n' ) + 1;
        error( library_name, source_path, "Empty URL.", ln );
        return;
      }

      // Decode ampersand encoded characters.
      string decoded_url = is_css(source_path) ? url : decode_ampersands(url);
      if(decoded_url.empty()) {
        if(!no_link_errors) {
          ++m_invalid_errors;
          int ln = std::count( contents_begin, url_start, '\n' ) + 1;
          error( library_name, source_path,
            "Invalid URL (invalid ampersand encodings): " + url, ln );
        }
        return;
      }
    
      boost::smatch m;
      if(!boost::regex_match(decoded_url, m, url_decompose_regex)) {
        if(!no_link_errors) {
          ++m_invalid_errors;
          int ln = std::count( contents_begin, url_start, '\n' ) + 1;
          error( library_name, source_path, "Invalid URL: " + decoded_url, ln );
        }
        return;
      }

      bool scheme_matched = m[2].matched,
        authority_matched = m[4].matched,
        //query_matched = m[7].matched,
        fragment_matched = m[9].matched;

      std::string scheme(m[2]),
        authority(m[4]),
        url_path(m[5]),
        //query(m[7]),
        fragment(m[9]);

      // Check for external content
      if(!allow_external_content && (authority_matched || scheme_matched)) {
        if(!no_link_errors) {
          ++m_invalid_errors;
          int ln = std::count( contents_begin, url_start, '\n' ) + 1;
          error( library_name, source_path, "External content: " + decoded_url, ln );
        }
      }

      // Protocol checks
      if(scheme_matched) {
        if(scheme == "http" || scheme == "https") {
          // All http links should have a hostname. Generally if they don't
          // it's by mistake. If they shouldn't, then a protocol isn't
          // required.
          if(!authority_matched) {
            if(!no_link_errors) {
              ++m_invalid_errors;
              int ln = std::count( contents_begin, url_start, '\n' ) + 1;
              error( library_name, source_path, "No hostname: " + decoded_url, ln );
            }
          }

          return;
        }
        else if(scheme == "file") {
          if(!no_link_errors) {
            ++m_invalid_errors;
            int ln = std::count( contents_begin, url_start, '\n' ) + 1;
            error( library_name, source_path,
              "Invalid URL (hardwired file): " + decoded_url, ln );
          }
        }
        else if(scheme == "mailto" || scheme == "ftp" || scheme == "news" || scheme == "javascript") {
          if ( !no_link_errors && is_css(source_path) ) {
            ++m_invalid_errors;
            int ln = std::count( contents_begin, url_start, '\n' ) + 1;
            error( library_name, source_path,
              "Invalid protocol for css: " + decoded_url, ln );
          }
        }
        else {
          if(!no_link_errors) {
            ++m_invalid_errors;
            int ln = std::count( contents_begin, url_start, '\n' ) + 1;
            error( library_name, source_path, "Unknown protocol: '" + scheme + "' in url: " + decoded_url, ln );
          }
        }

        return;
      }

      // Hostname without protocol.
      if(authority_matched) {
        if(!no_link_errors) {
          ++m_invalid_errors;
          int ln = std::count( contents_begin, url_start, '\n' ) + 1;
          error( library_name, source_path,
            "Invalid URL (hostname without protocol): " + decoded_url, ln );
        }
      }

      // Check the fragment identifier
      if ( fragment_matched ) {
        if ( is_css(source_path) ) {
            if ( !no_link_errors ) {
              ++m_invalid_errors;
              int ln = std::count( contents_begin, url_start, '\n' ) + 1;
              error( library_name, source_path,
                "Fragment link in CSS: " + decoded_url, ln );
            }
        }
        else {
          if ( !no_link_errors && fragment.find( '#' ) != string::npos )
          {
            ++m_bookmark_errors;
            int ln = std::count( contents_begin, url_start, '\n' ) + 1;
            error( library_name, source_path, "Invalid bookmark: " + decoded_url, ln );
          }
          else if ( !no_link_errors && url_path.empty() && !fragment.empty()
            // w3.org recommends case-sensitive broken bookmark checking
            // since some browsers do a case-sensitive match.
            && bookmarks.find(decode_percents(fragment)) == bookmarks.end() )
          {
            ++m_broken_errors;
            int ln = std::count( contents_begin, url_start, '\n' ) + 1;
            error( library_name, source_path, "Unknown bookmark: " + decoded_url, ln );
          }
        }

        // No more to do if it's just a fragment identifier
        if(url_path.empty()) return;
      }

      // Detect characters banned by RFC2396:
      if ( !no_link_errors && decoded_url.find_first_of( " <>\"{}|\\^[]'" ) != string::npos )
      {
        ++m_invalid_errors;
        int ln = std::count( contents_begin, url_start, '\n' ) + 1;
        error( library_name, source_path,
          "Invalid character in URL: " + decoded_url, ln );
      }

      // Check that we actually have a path.
      if(url_path.empty()) {
        if(!no_link_errors) {
          ++m_invalid_errors;
          int ln = std::count( contents_begin, url_start, '\n' ) + 1;
          error( library_name, source_path,
            "Invalid URL (empty path in relative url): " + decoded_url, ln );
        }
      }

      // Decode percent encoded characters.
      string decoded_path = decode_percents(url_path);
      if(decoded_path.empty()) {
        if(!no_link_errors) {
          ++m_invalid_errors;
          int ln = std::count( contents_begin, url_start, '\n' ) + 1;
          error( library_name, source_path,
            "Invalid URL (invalid character encodings): " + decoded_url, ln );
        }
        return;
      }

      // strip url of references to current dir
      if ( decoded_path[0]=='.' && decoded_path[1]=='/' ) decoded_path.erase( 0, 2 );

      // url is relative source_path.branch()
      // convert to target_path, which is_complete()
      path target_path;
      try { target_path = source_path.branch_path() /= path( decoded_path ); }
      catch ( const fs::filesystem_error & )
      {
        if(!no_link_errors) {
          int ln = std::count( contents_begin, url_start, '\n' ) + 1;
          ++m_invalid_errors;
          error( library_name, source_path,
            "Invalid URL (error resolving path): " + decoded_url, ln );
        }
        return;
      }

      // create a m_paths entry if necessary
      std::pair< const string, int > entry(
        relative_to( target_path, search_root_path() ), 0 );
      m_path_map::iterator itr( m_paths.find( entry.first ) );
      if ( itr == m_paths.end() )
      {
        if ( fs::exists( target_path ) ) entry.second = m_present;
        itr = m_paths.insert( entry ).first;
      }

      // itr now points to the m_paths entry
      itr->second |= m_linked_to;

      // if target isn't present, the link is broken
      if ( !no_link_errors && (itr->second & m_present) == 0 )
      {
        ++m_broken_errors;
        int ln = std::count( contents_begin, url_start, '\n' ) + 1;
        error( library_name, source_path, "Broken link: " + decoded_url, ln );
      }
    }