void Database::flushFiles( bool sync ) { assertDbAtLeastReadLocked(this); for( vector<MongoDataFile*>::iterator i = _files.begin(); i != _files.end(); i++ ) { MongoDataFile *f = *i; f->flush(sync); } }
MongoDataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) { // check existing files for ( int i=numFiles()-1; i>=0; i-- ) { MongoDataFile* f = getFile( i ); if ( f->getHeader()->unusedLength >= sizeNeeded ) { // we don't enforce the quota on "special" namespaces as that could lead to problems -- e.g. // rejecting an index insert after inserting the main record. if( cmdLine.quota && enforceQuota && i > cmdLine.quotaFiles && !NamespaceString::special(ns) ) ; else return f; } } if( cmdLine.quota && enforceQuota && numFiles() >= cmdLine.quotaFiles && !NamespaceString::special(ns) ) uasserted(12501, "quota exceeded"); // allocate files until we either get one big enough or hit maxSize for ( int i = 0; i < 8; i++ ) { MongoDataFile* f = addAFile( sizeNeeded, preallocate ); if ( f->getHeader()->unusedLength >= sizeNeeded ) return f; if ( f->getHeader()->fileLength >= MongoDataFile::maxSize() ) // this is as big as they get so might as well stop return f; } uasserted(14810, "couldn't allocate space (suitableFile)"); // callers don't check for null return code return 0; }
void touchNs( const std::string& ns ) { std::vector< touch_location > ranges; Client::ReadContext ctx(ns); { NamespaceDetails *nsd = nsdetails(ns.c_str()); uassert( 16154, "namespace does not exist", nsd ); for( DiskLoc L = nsd->firstExtent; !L.isNull(); L = L.ext()->xnext ) { MongoDataFile* mdf = cc().database()->getFile( L.a() ); massert( 16238, "can't fetch extent file structure", mdf ); touch_location tl; tl.fd = mdf->getFd(); tl.offset = L.getOfs(); tl.ext = L.ext(); tl.length = tl.ext->length; ranges.push_back(tl); } } LockMongoFilesShared lk; Lock::TempRelease tr; std::string progress_msg = "touch " + ns + " extents"; ProgressMeterHolder pm( cc().curop()->setMessage( progress_msg.c_str() , ranges.size() ) ); for ( std::vector< touch_location >::iterator it = ranges.begin(); it != ranges.end(); ++it ) { touch_pages( it->fd, it->offset, it->length, it->ext ); pm.hit(); killCurrentOp.checkForInterrupt(false); } pm.finished(); }
DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc ){ LogIndentLevel lil; if ( eLoc.getOfs() <= 0 ){ error() << "invalid extent ofs: " << eLoc.getOfs() << endl; return DiskLoc(); } MongoDataFile * mdf = db->getFile( eLoc.a() ); Extent * e = mdf->debug_getExtent( eLoc ); if ( ! e->isOk() ){ warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl; } log() << "length:" << e->length << endl; LogIndentLevel lil2; DiskLoc loc = forward ? e->firstRecord : e->lastRecord; while ( ! loc.isNull() ){ if ( loc.getOfs() <= 0 ){ error() << "offset is 0 for record which should be impossible" << endl; break; } log() << loc << endl; Record* rec = loc.rec(); log() << loc.obj() << endl; loc = forward ? rec->getNext( loc ) : rec->getPrev( loc ); } return forward ? e->xnext : e->xprev; }
MongoDataFile* Database::suitableFile( const char *ns, int sizeNeeded, bool preallocate, bool enforceQuota ) { // check existing files for ( int i=numFiles()-1; i>=0; i-- ) { MongoDataFile* f = getFile( i ); if ( f->getHeader()->unusedLength >= sizeNeeded ) { if ( fileIndexExceedsQuota( ns, i-1, enforceQuota ) ) // NOTE i-1 is the value used historically for this check. ; else return f; } } if ( fileIndexExceedsQuota( ns, numFiles(), enforceQuota ) ) uasserted(12501, "quota exceeded"); // allocate files until we either get one big enough or hit maxSize for ( int i = 0; i < 8; i++ ) { MongoDataFile* f = addAFile( sizeNeeded, preallocate ); if ( f->getHeader()->unusedLength >= sizeNeeded ) return f; if ( f->getHeader()->fileLength >= MongoDataFile::maxSize() ) // this is as big as they get so might as well stop return f; } uasserted(14810, "couldn't allocate space (suitableFile)"); // callers don't check for null return code return 0; }
// ran at startup. static void repairDatabasesAndCheckVersion() { // LastError * le = lastError.get( true ); Client::GodScope gs; log(1) << "enter repairDatabases (to check pdfile version #)" << endl; //verify(checkNsFilesOnLoad); checkNsFilesOnLoad = false; // we are mainly just checking the header - don't scan the whole .ns file for every db here. Lock::GlobalWrite lk; vector< string > dbNames; getDatabaseNames( dbNames ); for ( vector< string >::iterator i = dbNames.begin(); i != dbNames.end(); ++i ) { string dbName = *i; log(1) << "\t" << dbName << endl; Client::Context ctx( dbName ); MongoDataFile *p = cc().database()->getFile( 0 ); DataFileHeader *h = p->getHeader(); if ( !h->isCurrentVersion() || forceRepair ) { if( h->version <= 0 ) { uasserted(14026, str::stream() << "db " << dbName << " appears corrupt pdfile version: " << h->version << " info: " << h->versionMinor << ' ' << h->fileLength); } log() << "****" << endl; log() << "****" << endl; log() << "need to upgrade database " << dbName << " with pdfile version " << h->version << "." << h->versionMinor << ", " << "new version: " << PDFILE_VERSION << "." << PDFILE_VERSION_MINOR << endl; if ( shouldRepairDatabases ) { // QUESTION: Repair even if file format is higher version than code? log() << "\t starting upgrade" << endl; string errmsg; verify( doDBUpgrade( dbName , errmsg , h ) ); } else { log() << "\t Not upgrading, exiting" << endl; log() << "\t run --upgrade to upgrade dbs, then start again" << endl; log() << "****" << endl; dbexit( EXIT_NEED_UPGRADE ); shouldRepairDatabases = 1; return; } } else { Database::closeDatabase( dbName.c_str(), dbpath ); } } log(1) << "done repairDatabases" << endl; if ( shouldRepairDatabases ) { log() << "finished checking dbs" << endl; cc().shutdown(); dbexit( EXIT_CLEAN ); } checkNsFilesOnLoad = true; }
// todo: this is called a lot. streamline the common case MongoDataFile* Database::getFile( int n, int sizeNeeded , bool preallocateOnly) { assert(this); DEV assertDbAtLeastReadLocked(this); namespaceIndex.init(); if ( n < 0 || n >= DiskLoc::MaxFiles ) { out() << "getFile(): n=" << n << endl; massert( 10295 , "getFile(): bad file number value (corrupt db?): run repair", false); } DEV { if ( n > 100 ) { out() << "getFile(): n=" << n << endl; } } MongoDataFile* p = 0; if ( !preallocateOnly ) { while ( n >= (int) _files.size() ) { assert(this); if( !Lock::isWriteLocked(this->name) ) { log() << "error: getFile() called in a read lock, yet file to return is not yet open" << endl; log() << " getFile(" << n << ") _files.size:" <<_files.size() << ' ' << fileName(n).string() << endl; log() << " context ns: " << cc().ns() << " openallfiles:" << _openAllFiles << endl; assert(false); } _files.push_back(0); } p = _files[n]; } if ( p == 0 ) { assertDbWriteLocked(this); boost::filesystem::path fullName = fileName( n ); string fullNameString = fullName.string(); p = new MongoDataFile(n); int minSize = 0; if ( n != 0 && _files[ n - 1 ] ) minSize = _files[ n - 1 ]->getHeader()->fileLength; if ( sizeNeeded + DataFileHeader::HeaderSize > minSize ) minSize = sizeNeeded + DataFileHeader::HeaderSize; try { p->open( fullNameString.c_str(), minSize, preallocateOnly ); } catch ( AssertionException& ) { delete p; throw; } if ( preallocateOnly ) delete p; else _files[n] = p; } return preallocateOnly ? 0 : p; }
bool Database::openExistingFile( int n ) { assert(this); Lock::assertWriteLocked(name); { // must not yet be visible to others as we aren't in the db's write lock and // we will write to _files vector - thus this assert. bool loaded = dbHolder().__isLoaded(name, path); assert( !loaded ); } // additionally must be in the dbholder mutex (no assert for that yet) // todo: why here? that could be bad as we may be read locked only here namespaceIndex.init(); if ( n < 0 || n >= DiskLoc::MaxFiles ) { massert( 15924 , str::stream() << "getFile(): bad file number value " << n << " (corrupt db?): run repair", false); } { if( n < (int) _files.size() && _files[n] ) { dlog(2) << "openExistingFile " << n << " is already open" << endl; return true; } } { boost::filesystem::path fullName = fileName( n ); string fullNameString = fullName.string(); MongoDataFile *df = new MongoDataFile(n); try { if( !df->openExisting( fullNameString.c_str() ) ) { delete df; return false; } } catch ( AssertionException& ) { delete df; throw; } while ( n >= (int) _files.size() ) { _files.push_back(0); } _files[n] = df; } return true; }
void run() { string dbname = "unittest_ex"; string c1 = dbname + ".x1"; string c2 = dbname + ".x2"; { DBDirectClient db; db.dropDatabase( dbname ); } dblock mylock; Client::Context cx( dbname ); bool isnew; Database * d = dbHolderW().getOrCreate( dbname , dbpath , isnew ); assert( d ); int big = 10 * 1024; //int small = 1024; unsigned long long l = 0; int n = 0; while ( 1 ) { n++; if( n == 5 && sizeof(void*)==4 ) break; MongoDataFile * f = d->addAFile( big , false ); //cout << f->length() << ' ' << n << endl; if ( f->length() == l ) break; l = f->length(); } int start = d->numFiles(); for ( int i=0; i<start; i++ ) d->allocExtent( c1.c_str() , d->getFile( i )->getHeader()->unusedLength , false, false ); ASSERT_EQUALS( start , d->numFiles() ); { DBDirectClient db; db.dropDatabase( dbname ); } }
MongoDataFile* Database::getFile( int n, int sizeNeeded , bool preallocateOnly) { assert(this); namespaceIndex.init(); if ( n < 0 || n >= DiskLoc::MaxFiles ) { out() << "getFile(): n=" << n << endl; massert( 10295 , "getFile(): bad file number value (corrupt db?): run repair", false); } DEV { if ( n > 100 ) out() << "getFile(): n=" << n << "?" << endl; } MongoDataFile* p = 0; if ( !preallocateOnly ) { while ( n >= (int) files.size() ) files.push_back(0); p = files[n]; } if ( p == 0 ) { boost::filesystem::path fullName = fileName( n ); string fullNameString = fullName.string(); p = new MongoDataFile(n); int minSize = 0; if ( n != 0 && files[ n - 1 ] ) minSize = files[ n - 1 ]->getHeader()->fileLength; if ( sizeNeeded + DataFileHeader::HeaderSize > minSize ) minSize = sizeNeeded + DataFileHeader::HeaderSize; try { p->open( fullNameString.c_str(), minSize, preallocateOnly ); } catch ( AssertionException& ) { delete p; throw; } if ( preallocateOnly ) delete p; else files[n] = p; } return preallocateOnly ? 0 : p; }
void repairDatabases() { dblock lk; vector< string > dbNames; getDatabaseNames( dbNames ); for ( vector< string >::iterator i = dbNames.begin(); i != dbNames.end(); ++i ) { string dbName = *i; assert( !setClientTempNs( dbName.c_str() ) ); MongoDataFile *p = database->getFile( 0 ); MDFHeader *h = p->getHeader(); if ( !h->currentVersion() ) { log() << "****" << endl; log() << "****" << endl; log() << "need to upgrade database " << dbName << " with pdfile version " << h->version << "." << h->versionMinor << ", " << "new version: " << VERSION << "." << VERSION_MINOR << endl; if ( shouldRepairDatabases ){ // QUESTION: Repair even if file format is higher version than code? log() << "\t starting repair" << endl; string errmsg; assert( repairDatabase( dbName.c_str(), errmsg ) ); } else { log() << "\t Not repairing, exiting!" << endl; log() << "\t run --upgrade to upgrade dbs, then start again" << endl; log() << "****" << endl; dbexit( EXIT_NEED_UPGRADE ); shouldRepairDatabases = 1; return; } } else { closeClient( dbName.c_str() ); } } if ( shouldRepairDatabases ){ log() << "finished checking dbs" << endl; dbexit( EXIT_CLEAN ); } }
MongoDataFile* Database::suitableFile( int sizeNeeded, bool preallocate ) { // check existing files for ( int i=numFiles()-1; i>=0; i-- ) { MongoDataFile* f = getFile( i ); if ( f->getHeader()->unusedLength >= sizeNeeded ) return f; } // allocate files until we either get one big enough or hit maxSize for ( int i = 0; i < 8; i++ ) { MongoDataFile* f = addAFile( sizeNeeded, preallocate ); if ( f->getHeader()->unusedLength >= sizeNeeded ) return f; if ( f->getHeader()->fileLength >= MongoDataFile::maxSize() ) // this is as big as they get so might as well stop return f; } return 0; }
DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc , Writer& w ){ LogIndentLevel lil; if ( eLoc.getOfs() <= 0 ){ error() << "invalid extent ofs: " << eLoc.getOfs() << endl; return DiskLoc(); } MongoDataFile * mdf = db->getFile( eLoc.a() ); Extent * e = mdf->debug_getExtent( eLoc ); if ( ! e->isOk() ){ warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl; } log() << "length:" << e->length << endl; LogIndentLevel lil2; set<DiskLoc> seen; DiskLoc loc = forward ? e->firstRecord : e->lastRecord; while ( ! loc.isNull() ){ if ( ! seen.insert( loc ).second ) { error() << "infinite loop in extend, seen: " << loc << " before" << endl; break; } if ( loc.getOfs() <= 0 ){ error() << "offset is 0 for record which should be impossible" << endl; break; } log(1) << loc << endl; Record* rec = loc.rec(); BSONObj obj; try { obj = loc.obj(); assert( obj.valid() ); LOG(1) << obj << endl; w( obj ); } catch ( std::exception& e ) { log() << "found invalid document @ " << loc << " " << e.what() << endl; if ( ! obj.isEmpty() ) { try { BSONElement e = obj.firstElement(); stringstream ss; ss << "first element: " << e; log() << ss.str(); } catch ( std::exception& ) { } } } loc = forward ? rec->getNext( loc ) : rec->getPrev( loc ); } return forward ? e->xnext : e->xprev; }
/** @return number of skipped (invalid) documents */ unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n, const scoped_array<IndexSpec> &indexSpecs, scoped_array<SortPhaseOne>& phase1, int nidx, bool validate, double pf, int pb) { log() << "compact begin extent #" << n << " for namespace " << ns << endl; unsigned oldObjSize = 0; // we'll report what the old padding was unsigned oldObjSizeWithPadding = 0; Extent *e = diskloc.ext(); e->assertOk(); verify( e->validates() ); unsigned skipped = 0; { // the next/prev pointers within the extent might not be in order so we first page the whole thing in // sequentially log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl; Timer t; MongoDataFile* mdf = cc().database()->getFile( diskloc.a() ); HANDLE fd = mdf->getFd(); int offset = diskloc.getOfs(); Extent* ext = diskloc.ext(); size_t length = ext->length; touch_pages(fd, offset, length, ext); int ms = t.millis(); if( ms > 1000 ) log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl; } { log() << "compact copying records" << endl; long long datasize = 0; long long nrecords = 0; DiskLoc L = e->firstRecord; if( !L.isNull() ) { while( 1 ) { Record *recOld = L.rec(); L = recOld->nextInExtent(L); BSONObj objOld = BSONObj::make(recOld); if( !validate || objOld.valid() ) { nrecords++; unsigned sz = objOld.objsize(); oldObjSize += sz; oldObjSizeWithPadding += recOld->netLength(); unsigned lenWHdr = sz + Record::HeaderSize; unsigned lenWPadding = lenWHdr; { lenWPadding = static_cast<unsigned>(pf*lenWPadding); lenWPadding += pb; lenWPadding = lenWPadding & quantizeMask(lenWPadding); if( lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { lenWPadding = lenWHdr; } } DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false); uassert(14024, "compact error out of space during compaction", !loc.isNull()); Record *recNew = loc.rec(); datasize += recNew->netLength(); recNew = (Record *) getDur().writingPtr(recNew, lenWHdr); addRecordToRecListInExtent(recNew, loc); memcpy(recNew->data(), objOld.objdata(), sz); { // extract keys for all indexes we will be rebuilding for( int x = 0; x < nidx; x++ ) { phase1[x].addKeys(indexSpecs[x], objOld, loc); } } } else { if( ++skipped <= 10 ) log() << "compact skipping invalid object" << endl; } if( L.isNull() ) { // we just did the very last record from the old extent. it's still pointed to // by the old extent ext, but that will be fixed below after this loop break; } // remove the old records (orphan them) periodically so our commit block doesn't get too large bool stopping = false; RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0; if( stopping || getDur().aCommitIsNeeded() ) { e->firstRecord.writing() = L; Record *r = L.rec(); getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs; getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } } } // if !L.isNull() verify( d->firstExtent == diskloc ); verify( d->lastExtent != diskloc ); DiskLoc newFirst = e->xnext; d->firstExtent.writing() = newFirst; newFirst.ext()->xprev.writing().Null(); getDur().writing(e)->markEmpty(); freeExtents( diskloc, diskloc ); // update datasize/record count for this namespace's extent { NamespaceDetails::Stats *s = getDur().writing(&d->stats); s->datasize += datasize; s->nrecords += nrecords; } getDur().commitIfNeeded(); { double op = 1.0; if( oldObjSize ) op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize; log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)" << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100 << endl; } } return skipped; }