void DurableImpl::debugCheckLastDeclaredWrite() { static int n; ++n; verify(debug && cmdLine.dur); if (commitJob.writes().empty()) return; const WriteIntent &i = commitJob.lastWrite(); size_t ofs; MongoMMF *mmf = privateViews.find(i.start(), ofs); if( mmf == 0 ) return; size_t past = ofs + i.length(); if( mmf->length() < past + 8 ) return; // too close to end of view char *priv = (char *) mmf->getView(); char *writ = (char *) mmf->view_write(); unsigned long long *a = (unsigned long long *) (priv+past); unsigned long long *b = (unsigned long long *) (writ+past); if( *a != *b ) { for( set<WriteIntent>::iterator it(commitJob.writes().begin()), end((commitJob.writes().begin())); it != end; ++it ) { const WriteIntent& wi = *it; char *r1 = (char*) wi.start(); char *r2 = (char*) wi.end(); if( r1 <= (((char*)a)+8) && r2 > (char*)a ) { //log() << "it's ok " << wi.p << ' ' << wi.len << endl; return; } } log() << "journal data after write area " << i.start() << " does not agree" << endl; log() << " was: " << ((void*)b) << " " << hexdump((char*)b, 8) << endl; log() << " now: " << ((void*)a) << " " << hexdump((char*)a, 8) << endl; log() << " n: " << n << endl; log() << endl; } }
/** we will build an output buffer ourself and then use O_DIRECT we could be in read lock for this caller handles locking */ static void PREPLOGBUFFER() { assert( cmdLine.dur ); AlignedBuilder& bb = commitJob._ab; bb.reset(); unsigned lenOfs; // JSectHeader { bb.appendStr("\nHH\n", false); lenOfs = bb.skip(4); } // ops other than basic writes { for( vector< shared_ptr<DurOp> >::iterator i = commitJob.ops().begin(); i != commitJob.ops().end(); ++i ) { (*i)->serialize(bb); } } // write intents { scoped_lock lk(privateViews._mutex()); string lastFilePath; for( vector<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) { size_t ofs; MongoMMF *mmf = privateViews._find(i->p, ofs); if( mmf == 0 ) { string s = str::stream() << "view pointer cannot be resolved " << (size_t) i->p; journalingFailure(s.c_str()); // asserts return; } if( !mmf->willNeedRemap() ) { mmf->willNeedRemap() = true; // usually it will already be dirty so don't bother writing then } //size_t ofs = ((char *)i->p) - ((char*)mmf->getView().p); i->w_ptr = ((char*)mmf->view_write()) + ofs; if( mmf->filePath() != lastFilePath ) { lastFilePath = mmf->filePath(); JDbContext c; bb.appendStruct(c); bb.appendStr(lastFilePath); } JEntry e; e.len = i->len; assert( ofs <= 0x80000000 ); e.ofs = (unsigned) ofs; e.fileNo = mmf->fileSuffixNo(); bb.appendStruct(e); bb.appendBuf(i->p, i->len); } } { JSectFooter f(bb.buf(), bb.len()); bb.appendStruct(f); } { assert( 0xffffe000 == (~(Alignment-1)) ); unsigned L = (bb.len() + Alignment-1) & (~(Alignment-1)); // fill to alignment dassert( L >= (unsigned) bb.len() ); *((unsigned*)bb.atOfs(lenOfs)) = L; unsigned padding = L - bb.len(); bb.skip(padding); dassert( bb.len() % Alignment == 0 ); } return; }
/** We need to remap the private views periodically. otherwise they would become very large. Call within write lock. */ void _REMAPPRIVATEVIEW() { // todo: Consider using ProcessInfo herein and watching for getResidentSize to drop. that could be a way // to assure very good behavior here. static unsigned startAt; static unsigned long long lastRemap; dbMutex.assertWriteLocked(); dbMutex._remapPrivateViewRequested = false; assert( !commitJob.hasWritten() ); // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/2000000.0; if( cmdLine.durOptions & CmdLine::DurAlwaysRemap ) fraction = 1; lastRemap = now; RWLockRecursive::Shared lk(MongoFile::mmmutex); set<MongoFile*>& files = MongoFile::getAllFiles(); unsigned sz = files.size(); if( sz == 0 ) return; { // be careful not to use too much memory if the write rate is // extremely high double f = privateMapBytes / ((double)UncommittedBytesLimit); if( f > fraction ) { fraction = f; } privateMapBytes = 0; } unsigned ntodo = (unsigned) (sz * fraction); if( ntodo < 1 ) ntodo = 1; if( ntodo > sz ) ntodo = sz; const set<MongoFile*>::iterator b = files.begin(); const set<MongoFile*>::iterator e = files.end(); set<MongoFile*>::iterator i = b; // skip to our starting position for( unsigned x = 0; x < startAt; x++ ) { i++; if( i == e ) i = b; } startAt = (startAt + ntodo) % sz; // mark where to start next time for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); if( (*i)->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) *i; assert(mmf); if( mmf->willNeedRemap() ) { mmf->willNeedRemap() = false; mmf->remapThePrivateView(); } i++; if( i == e ) i = b; } } }
static void _REMAPPRIVATEVIEW() { // todo: Consider using ProcessInfo herein and watching for getResidentSize to drop. that could be a way // to assure very good behavior here. static unsigned startAt; static unsigned long long lastRemap; LOG(4) << "journal REMAPPRIVATEVIEW" << endl; verify( Lock::isW() ); verify( !commitJob.hasWritten() ); // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/2000000.0; if( cmdLine.durOptions & CmdLine::DurAlwaysRemap ) fraction = 1; lastRemap = now; #if defined(_WIN32) // Note that this negatively affects performance. // We must grab the exclusive lock here because remapThePrivateView() on Windows needs // to grab it as well, due to the lack of a non-atomic way to remap a memory mapped file. // See SERVER-5723 for performance improvement. // See SERVER-5680 to see why this code is necessary. LockMongoFilesExclusive lk; #else LockMongoFilesShared lk; #endif set<MongoFile*>& files = MongoFile::getAllFiles(); unsigned sz = files.size(); if( sz == 0 ) return; { // be careful not to use too much memory if the write rate is // extremely high double f = privateMapBytes / ((double)UncommittedBytesLimit); if( f > fraction ) { fraction = f; } privateMapBytes = 0; } unsigned ntodo = (unsigned) (sz * fraction); if( ntodo < 1 ) ntodo = 1; if( ntodo > sz ) ntodo = sz; const set<MongoFile*>::iterator b = files.begin(); const set<MongoFile*>::iterator e = files.end(); set<MongoFile*>::iterator i = b; // skip to our starting position for( unsigned x = 0; x < startAt; x++ ) { i++; if( i == e ) i = b; } unsigned startedAt = startAt; startAt = (startAt + ntodo) % sz; // mark where to start next time Timer t; for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); if( (*i)->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) *i; verify(mmf); if( mmf->willNeedRemap() ) { mmf->willNeedRemap() = false; mmf->remapThePrivateView(); } i++; if( i == e ) i = b; } } LOG(2) << "journal REMAPPRIVATEVIEW done startedAt: " << startedAt << " n:" << ntodo << ' ' << t.millis() << "ms" << endl; }
void run() { try { boost::filesystem::remove(fn); } catch(...) { } writelock lk; { MongoMMF f; unsigned long long len = 256 * 1024 * 1024; assert( f.create(fn, len, /*sequential*/false) ); { char *p = (char *) f.getView(); assert(p); // write something to the private view as a test if( cmdLine.dur ) MemoryMappedFile::makeWritable(p, 6); strcpy(p, "hello"); } if( cmdLine.dur ) { char *w = (char *) f.view_write(); strcpy(w + 6, "world"); } MongoFileFinder ff; ASSERT( ff.findByPath(fn) ); ASSERT( ff.findByPath("asdf") == 0 ); } { MongoFileFinder ff; ASSERT( ff.findByPath(fn) == 0 ); } int N = 10000; #if !defined(_WIN32) && !defined(__linux__) // seems this test is slow on OS X. N = 100; #endif // we make a lot here -- if we were leaking, presumably it would fail doing this many. Timer t; for( int i = 0; i < N; i++ ) { MongoMMF f; assert( f.open(fn, i%4==1) ); { char *p = (char *) f.getView(); assert(p); if( cmdLine.dur ) MemoryMappedFile::makeWritable(p, 4); strcpy(p, "zzz"); } if( cmdLine.dur ) { char *w = (char *) f.view_write(); if( i % 2 == 0 ) ++(*w); assert( w[6] == 'w' ); } } if( t.millis() > 10000 ) { log() << "warning: MMap LeakTest is unusually slow N:" << N << ' ' << t.millis() << "ms" << endl; } }