/** put the basic write operation into the buffer (bb) to be journaled */ static void prepBasicWrite_inlock(AlignedBuilder&bb, const WriteIntent *i, RelativePath& lastDbPath) { size_t ofs = 1; MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs); if( unlikely(!mmf->willNeedRemap()) ) { // tag this mmf as needed a remap of its private view later. // usually it will already be dirty/already set, so we do the if above first // to avoid possibility of cpu cache line contention mmf->willNeedRemap() = true; } // since we have already looked up the mmf, we go ahead and remember the write view location // so we don't have to find the MongoMMF again later in WRITETODATAFILES() // // this was for WRITETODATAFILES_Impl2 so commented out now // /* dassert( i->w_ptr == 0 ); i->w_ptr = ((char*)mmf->view_write()) + ofs; */ JEntry e; e.len = min(i->length(), (unsigned)(mmf->length() - ofs)); //dont write past end of file assert( ofs <= 0x80000000 ); e.ofs = (unsigned) ofs; e.setFileNo( mmf->fileSuffixNo() ); if( mmf->relativePath() == local ) { e.setLocalDbContextBit(); } else if( mmf->relativePath() != lastDbPath ) { lastDbPath = mmf->relativePath(); JDbContext c; bb.appendStruct(c); bb.appendStr(lastDbPath.toString()); } bb.appendStruct(e); #if defined(_EXPERIMENTAL) i->ofsInJournalBuffer = bb.len(); #endif bb.appendBuf(i->start(), e.len); if (unlikely(e.len != (unsigned)i->length())) { log() << "journal info splitting prepBasicWrite at boundary" << endl; // This only happens if we write to the last byte in a file and // the fist byte in another file that is mapped adjacently. I // think most OSs leave at least a one page gap between // mappings, but better to be safe. WriteIntent next ((char*)i->start() + e.len, i->length() - e.len); prepBasicWrite_inlock(bb, &next, lastDbPath); } }
/** We need to remap the private views periodically. otherwise they would become very large. Call within write lock. */ void REMAPPRIVATEVIEW() { static unsigned startAt; static unsigned long long lastRemap; dbMutex.assertWriteLocked(); dbMutex._remapPrivateViewRequested = false; assert( !commitJob.hasWritten() ); if( 0 ) { log() << "TEMP remapprivateview disabled for testing - will eventually run oom in this mode if db bigger than ram" << endl; return; } // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/20000000.0; set<MongoFile*>& files = MongoFile::getAllFiles(); unsigned sz = files.size(); if( sz == 0 ) return; unsigned ntodo = (unsigned) (sz * fraction); if( ntodo < 1 ) ntodo = 1; if( ntodo > sz ) ntodo = sz; const set<MongoFile*>::iterator b = files.begin(); const set<MongoFile*>::iterator e = files.end(); set<MongoFile*>::iterator i = b; for( unsigned x = 0; x < startAt; x++ ) { i++; if( i == e ) i = b; } startAt = (startAt + ntodo) % sz; for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); MongoMMF *mmf = dynamic_cast<MongoMMF*>(*i); if( mmf && mmf->willNeedRemap() ) { mmf->willNeedRemap() = false; mmf->remapThePrivateView(); } i++; if( i == e ) i = b; } }
/** we will build an output buffer ourself and then use O_DIRECT we could be in read lock for this caller handles locking */ static void PREPLOGBUFFER() { assert( cmdLine.dur ); AlignedBuilder& bb = commitJob._ab; bb.reset(); unsigned lenOfs; // JSectHeader { bb.appendStr("\nHH\n", false); lenOfs = bb.skip(4); } // ops other than basic writes { for( vector< shared_ptr<DurOp> >::iterator i = commitJob.ops().begin(); i != commitJob.ops().end(); ++i ) { (*i)->serialize(bb); } } // write intents { scoped_lock lk(privateViews._mutex()); string lastFilePath; for( vector<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) { size_t ofs; MongoMMF *mmf = privateViews._find(i->p, ofs); if( mmf == 0 ) { string s = str::stream() << "view pointer cannot be resolved " << (size_t) i->p; journalingFailure(s.c_str()); // asserts return; } if( !mmf->willNeedRemap() ) { mmf->willNeedRemap() = true; // usually it will already be dirty so don't bother writing then } //size_t ofs = ((char *)i->p) - ((char*)mmf->getView().p); i->w_ptr = ((char*)mmf->view_write()) + ofs; if( mmf->filePath() != lastFilePath ) { lastFilePath = mmf->filePath(); JDbContext c; bb.appendStruct(c); bb.appendStr(lastFilePath); } JEntry e; e.len = i->len; assert( ofs <= 0x80000000 ); e.ofs = (unsigned) ofs; e.fileNo = mmf->fileSuffixNo(); bb.appendStruct(e); bb.appendBuf(i->p, i->len); } } { JSectFooter f(bb.buf(), bb.len()); bb.appendStruct(f); } { assert( 0xffffe000 == (~(Alignment-1)) ); unsigned L = (bb.len() + Alignment-1) & (~(Alignment-1)); // fill to alignment dassert( L >= (unsigned) bb.len() ); *((unsigned*)bb.atOfs(lenOfs)) = L; unsigned padding = L - bb.len(); bb.skip(padding); dassert( bb.len() % Alignment == 0 ); } return; }
/** We need to remap the private views periodically. otherwise they would become very large. Call within write lock. */ void _REMAPPRIVATEVIEW() { // todo: Consider using ProcessInfo herein and watching for getResidentSize to drop. that could be a way // to assure very good behavior here. static unsigned startAt; static unsigned long long lastRemap; dbMutex.assertWriteLocked(); dbMutex._remapPrivateViewRequested = false; assert( !commitJob.hasWritten() ); // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/2000000.0; if( cmdLine.durOptions & CmdLine::DurAlwaysRemap ) fraction = 1; lastRemap = now; RWLockRecursive::Shared lk(MongoFile::mmmutex); set<MongoFile*>& files = MongoFile::getAllFiles(); unsigned sz = files.size(); if( sz == 0 ) return; { // be careful not to use too much memory if the write rate is // extremely high double f = privateMapBytes / ((double)UncommittedBytesLimit); if( f > fraction ) { fraction = f; } privateMapBytes = 0; } unsigned ntodo = (unsigned) (sz * fraction); if( ntodo < 1 ) ntodo = 1; if( ntodo > sz ) ntodo = sz; const set<MongoFile*>::iterator b = files.begin(); const set<MongoFile*>::iterator e = files.end(); set<MongoFile*>::iterator i = b; // skip to our starting position for( unsigned x = 0; x < startAt; x++ ) { i++; if( i == e ) i = b; } startAt = (startAt + ntodo) % sz; // mark where to start next time for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); if( (*i)->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) *i; assert(mmf); if( mmf->willNeedRemap() ) { mmf->willNeedRemap() = false; mmf->remapThePrivateView(); } i++; if( i == e ) i = b; } } }
static void _REMAPPRIVATEVIEW() { // todo: Consider using ProcessInfo herein and watching for getResidentSize to drop. that could be a way // to assure very good behavior here. static unsigned startAt; static unsigned long long lastRemap; LOG(4) << "journal REMAPPRIVATEVIEW" << endl; verify( Lock::isW() ); verify( !commitJob.hasWritten() ); // we want to remap all private views about every 2 seconds. there could be ~1000 views so // we do a little each pass; beyond the remap time, more significantly, there will be copy on write // faults after remapping, so doing a little bit at a time will avoid big load spikes on // remapping. unsigned long long now = curTimeMicros64(); double fraction = (now-lastRemap)/2000000.0; if( cmdLine.durOptions & CmdLine::DurAlwaysRemap ) fraction = 1; lastRemap = now; #if defined(_WIN32) // Note that this negatively affects performance. // We must grab the exclusive lock here because remapThePrivateView() on Windows needs // to grab it as well, due to the lack of a non-atomic way to remap a memory mapped file. // See SERVER-5723 for performance improvement. // See SERVER-5680 to see why this code is necessary. LockMongoFilesExclusive lk; #else LockMongoFilesShared lk; #endif set<MongoFile*>& files = MongoFile::getAllFiles(); unsigned sz = files.size(); if( sz == 0 ) return; { // be careful not to use too much memory if the write rate is // extremely high double f = privateMapBytes / ((double)UncommittedBytesLimit); if( f > fraction ) { fraction = f; } privateMapBytes = 0; } unsigned ntodo = (unsigned) (sz * fraction); if( ntodo < 1 ) ntodo = 1; if( ntodo > sz ) ntodo = sz; const set<MongoFile*>::iterator b = files.begin(); const set<MongoFile*>::iterator e = files.end(); set<MongoFile*>::iterator i = b; // skip to our starting position for( unsigned x = 0; x < startAt; x++ ) { i++; if( i == e ) i = b; } unsigned startedAt = startAt; startAt = (startAt + ntodo) % sz; // mark where to start next time Timer t; for( unsigned x = 0; x < ntodo; x++ ) { dassert( i != e ); if( (*i)->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) *i; verify(mmf); if( mmf->willNeedRemap() ) { mmf->willNeedRemap() = false; mmf->remapThePrivateView(); } i++; if( i == e ) i = b; } } LOG(2) << "journal REMAPPRIVATEVIEW done startedAt: " << startedAt << " n:" << ntodo << ' ' << t.millis() << "ms" << endl; }