Ejemplo n.º 1
0
    long long BSONTool::processFile( const boost::filesystem::path& root ) {
        std::string fileName = root.string();

        unsigned long long fileLength = file_size( root );

        if ( fileLength == 0 ) {
            toolInfoOutput() << "file " << fileName << " empty, skipping" << std::endl;
            return 0;
        }


        FILE* file = fopen( fileName.c_str() , "rb" );
        if ( ! file ) {
            toolError() << "error opening file: " << fileName << " " << errnoWithDescription()
                      << std::endl;
            return 0;
        }

#ifdef POSIX_FADV_SEQUENTIAL
        posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL);
#endif

        if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) {
            toolInfoOutput() << "\t file size: " << fileLength << std::endl;
        }

        unsigned long long read = 0;
        unsigned long long num = 0;
        unsigned long long processed = 0;

        const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 );
        boost::scoped_array<char> buf_holder(new char[BUF_SIZE]);
        char * buf = buf_holder.get();

        ProgressMeter m(fileLength);
        if (!toolGlobalParams.quiet) {
            m.setUnits( "bytes" );
        }

        while ( read < fileLength ) {
            size_t amt = fread(buf, 1, 4, file);
            verify( amt == 4 );

            int size = ((int*)buf)[0];
            uassert( 10264 , str::stream() << "invalid object size: " << size , size < BUF_SIZE );

            amt = fread(buf+4, 1, size-4, file);
            verify( amt == (size_t)( size - 4 ) );

            BSONObj o( buf );
            if (bsonToolGlobalParams.objcheck) {
                const Status status = validateBSON(buf, size);
                if (!status.isOK()) {
                    toolError() << "INVALID OBJECT - going to try and print out " << std::endl;
                    toolError() << "size: " << size << std::endl;
                    toolError() << "error: " << status.reason() << std::endl;

                    StringBuilder sb;
                    try {
                        o.toString(sb); // using StringBuilder version to get as much as possible
                    } catch (...) {
                        toolError() << "object up to error: " << sb.str() << endl;
                        throw;
                    }
                    toolError() << "complete object: " << sb.str() << endl;

                    // NOTE: continuing with object even though we know it is invalid.
                }
            }

            if (!bsonToolGlobalParams.hasFilter || _matcher->matches(o)) {
                gotObject( o );
                processed++;
            }

            read += o.objsize();
            num++;

            if (!toolGlobalParams.quiet) {
                m.hit(o.objsize());
            }
        }

        fclose( file );

        uassert(10265, "counts don't match", read == fileLength);
        toolInfoOutput() << num << ((num == 1) ? " document" : " documents")
                         << " found" << std::endl;
        if (bsonToolGlobalParams.hasFilter) {
            toolInfoOutput() << processed
                             << ((processed == 1) ? " document" : " documents")
                             << " processed" << std::endl;
        }
        return processed;
    }
Ejemplo n.º 2
0
    long long BSONTool::processFile( const boost::filesystem::path& root ) {
        std::string fileName = root.string();

        unsigned long long fileLength = file_size( root );

        if ( fileLength == 0 ) {
            toolInfoOutput() << "file " << fileName << " empty, skipping" << std::endl;
            return 0;
        }


        FILE* file = fopen( fileName.c_str() , "rb" );
        if ( ! file ) {
            toolError() << "error opening file: " << fileName << " " << errnoWithDescription()
                      << std::endl;
            return 0;
        }

#ifdef POSIX_FADV_SEQUENTIAL
        posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL);
#endif

        if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) {
            toolInfoOutput() << "\t file size: " << fileLength << std::endl;
        }

        unsigned long long read = 0;
        unsigned long long num = 0;
        unsigned long long processed = 0;

        const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 );
        boost::scoped_array<char> buf_holder(new char[BUF_SIZE]);
        char * buf = buf_holder.get();

        ProgressMeter m(fileLength);
        if (!toolGlobalParams.quiet) {
            m.setUnits( "bytes" );
        }

        while ( read < fileLength ) {
            size_t amt = fread(buf, 1, 4, file);
            verify( amt == 4 );

            int size = ((int*)buf)[0];
            uassert( 10264 , str::stream() << "invalid object size: " << size , size < BUF_SIZE );

            amt = fread(buf+4, 1, size-4, file);
            verify( amt == (size_t)( size - 4 ) );

            BSONObj o( buf );
            if (bsonToolGlobalParams.objcheck && !o.valid()) {
                toolError() << "INVALID OBJECT - going to try and print out " << std::endl;
                toolError() << "size: " << size << std::endl;
                BSONObjIterator i(o);
                while ( i.more() ) {
                    BSONElement e = i.next();
                    try {
                        e.validate();
                    }
                    catch ( ... ) {
                        toolError() << "\t\t NEXT ONE IS INVALID" << std::endl;
                    }
                    toolError() << "\t name : " << e.fieldName() << " " << typeName(e.type())
                                << std::endl;
                    toolError() << "\t " << e << std::endl;
                }
            }

            if (!bsonToolGlobalParams.hasFilter || _matcher->matches(o)) {
                gotObject( o );
                processed++;
            }

            read += o.objsize();
            num++;

            if (!toolGlobalParams.quiet) {
                m.hit(o.objsize());
            }
        }

        fclose( file );

        uassert(10265, "counts don't match", read == fileLength);
        toolInfoOutput() << num << " objects found" << std::endl;
        if (bsonToolGlobalParams.hasFilter)
            toolInfoOutput() << processed << " objects processed" << std::endl;
        return processed;
    }
Ejemplo n.º 3
0
    long long BSONTool::processFile( const path& root ){
        string fileString = root.string();
        
        long long fileLength = file_size( root );

        if ( fileLength == 0 ) {
            out() << "file " << fileString << " empty, skipping" << endl;
            return 0;
        }


        ifstream file( fileString.c_str() , ios_base::in | ios_base::binary);
        if ( ! file.is_open() ){
            log() << "error opening file: " << fileString << endl;
            return 0;
        }

        log(1) << "\t file size: " << fileLength << endl;

        long long read = 0;
        long long num = 0;
        long long processed = 0;

        const int BUF_SIZE = 1024 * 1024 * 5;
        boost::scoped_array<char> buf_holder(new char[BUF_SIZE]);
        char * buf = buf_holder.get();

        ProgressMeter m( fileLength );

        while ( read < fileLength ) {
            file.read( buf , 4 );
            int size = ((int*)buf)[0];
            if ( size >= BUF_SIZE ){
                cerr << "got an object of size: " << size << "  terminating..." << endl;
            }
            uassert( 10264 ,  "invalid object size" , size < BUF_SIZE );

            file.read( buf + 4 , size - 4 );

            BSONObj o( buf );
            if ( _objcheck && ! o.valid() ){
                cerr << "INVALID OBJECT - going try and pring out " << endl;
                cerr << "size: " << size << endl;
                BSONObjIterator i(o);
                while ( i.more() ){
                    BSONElement e = i.next();
                    try {
                        e.validate();
                    }
                    catch ( ... ){
                        cerr << "\t\t NEXT ONE IS INVALID" << endl;
                    }
                    cerr << "\t name : " << e.fieldName() << " " << e.type() << endl;
                    cerr << "\t " << e << endl;
                }
            }
            
            if ( _matcher.get() == 0 || _matcher->matches( o ) ){
                gotObject( o );
                processed++;
            }

            read += o.objsize();
            num++;

            m.hit( o.objsize() );
        }

        uassert( 10265 ,  "counts don't match" , m.done() == fileLength );
        out() << "\t "  << m.hits() << " objects found" << endl;
        if ( _matcher.get() )
            out() << "\t "  << processed << " objects processed" << endl;
        return processed;
    }
Ejemplo n.º 4
0
    long long BSONTool::processFile( const boost::filesystem::path& root ) {
        _fileName = root.string();

        unsigned long long fileLength = file_size( root );

        if ( fileLength == 0 ) {
            out() << "file " << _fileName << " empty, skipping" << endl;
            return 0;
        }


        FILE* file = fopen( _fileName.c_str() , "rb" );
        if ( ! file ) {
            log() << "error opening file: " << _fileName << " " << errnoWithDescription() << endl;
            return 0;
        }

#if !defined(__sunos__) && defined(POSIX_FADV_SEQUENTIAL)
        posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL);
#endif

        log(1) << "\t file size: " << fileLength << endl;

        unsigned long long read = 0;
        unsigned long long num = 0;
        unsigned long long processed = 0;

        const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 );
        boost::scoped_array<char> buf_holder(new char[BUF_SIZE]);
        char * buf = buf_holder.get();

        ProgressMeter m( fileLength );
        m.setUnits( "bytes" );

        while ( read < fileLength ) {
            size_t amt = fread(buf, 1, 4, file);
            verify( amt == 4 );

            int size = ((int*)buf)[0];
            uassert( 10264 , str::stream() << "invalid object size: " << size , size < BUF_SIZE );

            amt = fread(buf+4, 1, size-4, file);
            verify( amt == (size_t)( size - 4 ) );

            BSONObj o( buf );
            if ( _objcheck && ! o.valid() ) {
                cerr << "INVALID OBJECT - going try and pring out " << endl;
                cerr << "size: " << size << endl;
                BSONObjIterator i(o);
                while ( i.more() ) {
                    BSONElement e = i.next();
                    try {
                        e.validate();
                    }
                    catch ( ... ) {
                        cerr << "\t\t NEXT ONE IS INVALID" << endl;
                    }
                    cerr << "\t name : " << e.fieldName() << " " << e.type() << endl;
                    cerr << "\t " << e << endl;
                }
            }

            if ( _matcher.get() == 0 || _matcher->matches( o ) ) {
                gotObject( o );
                processed++;
            }

            read += o.objsize();
            num++;

            m.hit( o.objsize() );
        }

        fclose( file );

        uassert( 10265 ,  "counts don't match" , m.done() == fileLength );
        (_usesstdout ? cout : cerr ) << m.hits() << " objects found" << endl;
        if ( _matcher.get() )
            (_usesstdout ? cout : cerr ) << processed << " objects processed" << endl;
        return processed;
    }