long long BSONTool::processFile( const boost::filesystem::path& root ) { std::string fileName = root.string(); unsigned long long fileLength = file_size( root ); if ( fileLength == 0 ) { toolInfoOutput() << "file " << fileName << " empty, skipping" << std::endl; return 0; } FILE* file = fopen( fileName.c_str() , "rb" ); if ( ! file ) { toolError() << "error opening file: " << fileName << " " << errnoWithDescription() << std::endl; return 0; } #ifdef POSIX_FADV_SEQUENTIAL posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL); #endif if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) { toolInfoOutput() << "\t file size: " << fileLength << std::endl; } unsigned long long read = 0; unsigned long long num = 0; unsigned long long processed = 0; const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 ); boost::scoped_array<char> buf_holder(new char[BUF_SIZE]); char * buf = buf_holder.get(); ProgressMeter m(fileLength); if (!toolGlobalParams.quiet) { m.setUnits( "bytes" ); } while ( read < fileLength ) { size_t amt = fread(buf, 1, 4, file); verify( amt == 4 ); int size = ((int*)buf)[0]; uassert( 10264 , str::stream() << "invalid object size: " << size , size < BUF_SIZE ); amt = fread(buf+4, 1, size-4, file); verify( amt == (size_t)( size - 4 ) ); BSONObj o( buf ); if (bsonToolGlobalParams.objcheck) { const Status status = validateBSON(buf, size); if (!status.isOK()) { toolError() << "INVALID OBJECT - going to try and print out " << std::endl; toolError() << "size: " << size << std::endl; toolError() << "error: " << status.reason() << std::endl; StringBuilder sb; try { o.toString(sb); // using StringBuilder version to get as much as possible } catch (...) { toolError() << "object up to error: " << sb.str() << endl; throw; } toolError() << "complete object: " << sb.str() << endl; // NOTE: continuing with object even though we know it is invalid. } } if (!bsonToolGlobalParams.hasFilter || _matcher->matches(o)) { gotObject( o ); processed++; } read += o.objsize(); num++; if (!toolGlobalParams.quiet) { m.hit(o.objsize()); } } fclose( file ); uassert(10265, "counts don't match", read == fileLength); toolInfoOutput() << num << ((num == 1) ? " document" : " documents") << " found" << std::endl; if (bsonToolGlobalParams.hasFilter) { toolInfoOutput() << processed << ((processed == 1) ? " document" : " documents") << " processed" << std::endl; } return processed; }
long long BSONTool::processFile( const boost::filesystem::path& root ) { std::string fileName = root.string(); unsigned long long fileLength = file_size( root ); if ( fileLength == 0 ) { toolInfoOutput() << "file " << fileName << " empty, skipping" << std::endl; return 0; } FILE* file = fopen( fileName.c_str() , "rb" ); if ( ! file ) { toolError() << "error opening file: " << fileName << " " << errnoWithDescription() << std::endl; return 0; } #ifdef POSIX_FADV_SEQUENTIAL posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL); #endif if (logger::globalLogDomain()->shouldLog(logger::LogSeverity::Debug(1))) { toolInfoOutput() << "\t file size: " << fileLength << std::endl; } unsigned long long read = 0; unsigned long long num = 0; unsigned long long processed = 0; const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 ); boost::scoped_array<char> buf_holder(new char[BUF_SIZE]); char * buf = buf_holder.get(); ProgressMeter m(fileLength); if (!toolGlobalParams.quiet) { m.setUnits( "bytes" ); } while ( read < fileLength ) { size_t amt = fread(buf, 1, 4, file); verify( amt == 4 ); int size = ((int*)buf)[0]; uassert( 10264 , str::stream() << "invalid object size: " << size , size < BUF_SIZE ); amt = fread(buf+4, 1, size-4, file); verify( amt == (size_t)( size - 4 ) ); BSONObj o( buf ); if (bsonToolGlobalParams.objcheck && !o.valid()) { toolError() << "INVALID OBJECT - going to try and print out " << std::endl; toolError() << "size: " << size << std::endl; BSONObjIterator i(o); while ( i.more() ) { BSONElement e = i.next(); try { e.validate(); } catch ( ... ) { toolError() << "\t\t NEXT ONE IS INVALID" << std::endl; } toolError() << "\t name : " << e.fieldName() << " " << typeName(e.type()) << std::endl; toolError() << "\t " << e << std::endl; } } if (!bsonToolGlobalParams.hasFilter || _matcher->matches(o)) { gotObject( o ); processed++; } read += o.objsize(); num++; if (!toolGlobalParams.quiet) { m.hit(o.objsize()); } } fclose( file ); uassert(10265, "counts don't match", read == fileLength); toolInfoOutput() << num << " objects found" << std::endl; if (bsonToolGlobalParams.hasFilter) toolInfoOutput() << processed << " objects processed" << std::endl; return processed; }
long long BSONTool::processFile( const path& root ){ string fileString = root.string(); long long fileLength = file_size( root ); if ( fileLength == 0 ) { out() << "file " << fileString << " empty, skipping" << endl; return 0; } ifstream file( fileString.c_str() , ios_base::in | ios_base::binary); if ( ! file.is_open() ){ log() << "error opening file: " << fileString << endl; return 0; } log(1) << "\t file size: " << fileLength << endl; long long read = 0; long long num = 0; long long processed = 0; const int BUF_SIZE = 1024 * 1024 * 5; boost::scoped_array<char> buf_holder(new char[BUF_SIZE]); char * buf = buf_holder.get(); ProgressMeter m( fileLength ); while ( read < fileLength ) { file.read( buf , 4 ); int size = ((int*)buf)[0]; if ( size >= BUF_SIZE ){ cerr << "got an object of size: " << size << " terminating..." << endl; } uassert( 10264 , "invalid object size" , size < BUF_SIZE ); file.read( buf + 4 , size - 4 ); BSONObj o( buf ); if ( _objcheck && ! o.valid() ){ cerr << "INVALID OBJECT - going try and pring out " << endl; cerr << "size: " << size << endl; BSONObjIterator i(o); while ( i.more() ){ BSONElement e = i.next(); try { e.validate(); } catch ( ... ){ cerr << "\t\t NEXT ONE IS INVALID" << endl; } cerr << "\t name : " << e.fieldName() << " " << e.type() << endl; cerr << "\t " << e << endl; } } if ( _matcher.get() == 0 || _matcher->matches( o ) ){ gotObject( o ); processed++; } read += o.objsize(); num++; m.hit( o.objsize() ); } uassert( 10265 , "counts don't match" , m.done() == fileLength ); out() << "\t " << m.hits() << " objects found" << endl; if ( _matcher.get() ) out() << "\t " << processed << " objects processed" << endl; return processed; }
long long BSONTool::processFile( const boost::filesystem::path& root ) { _fileName = root.string(); unsigned long long fileLength = file_size( root ); if ( fileLength == 0 ) { out() << "file " << _fileName << " empty, skipping" << endl; return 0; } FILE* file = fopen( _fileName.c_str() , "rb" ); if ( ! file ) { log() << "error opening file: " << _fileName << " " << errnoWithDescription() << endl; return 0; } #if !defined(__sunos__) && defined(POSIX_FADV_SEQUENTIAL) posix_fadvise(fileno(file), 0, fileLength, POSIX_FADV_SEQUENTIAL); #endif log(1) << "\t file size: " << fileLength << endl; unsigned long long read = 0; unsigned long long num = 0; unsigned long long processed = 0; const int BUF_SIZE = BSONObjMaxUserSize + ( 1024 * 1024 ); boost::scoped_array<char> buf_holder(new char[BUF_SIZE]); char * buf = buf_holder.get(); ProgressMeter m( fileLength ); m.setUnits( "bytes" ); while ( read < fileLength ) { size_t amt = fread(buf, 1, 4, file); verify( amt == 4 ); int size = ((int*)buf)[0]; uassert( 10264 , str::stream() << "invalid object size: " << size , size < BUF_SIZE ); amt = fread(buf+4, 1, size-4, file); verify( amt == (size_t)( size - 4 ) ); BSONObj o( buf ); if ( _objcheck && ! o.valid() ) { cerr << "INVALID OBJECT - going try and pring out " << endl; cerr << "size: " << size << endl; BSONObjIterator i(o); while ( i.more() ) { BSONElement e = i.next(); try { e.validate(); } catch ( ... ) { cerr << "\t\t NEXT ONE IS INVALID" << endl; } cerr << "\t name : " << e.fieldName() << " " << e.type() << endl; cerr << "\t " << e << endl; } } if ( _matcher.get() == 0 || _matcher->matches( o ) ) { gotObject( o ); processed++; } read += o.objsize(); num++; m.hit( o.objsize() ); } fclose( file ); uassert( 10265 , "counts don't match" , m.done() == fileLength ); (_usesstdout ? cout : cerr ) << m.hits() << " objects found" << endl; if ( _matcher.get() ) (_usesstdout ? cout : cerr ) << processed << " objects processed" << endl; return processed; }