void DurableImpl::setNoJournal(void *dst, void *src, unsigned len) { // we are at least read locked, so we need not worry about REMAPPRIVATEVIEW herein. DEV dbMutex.assertAtLeastReadLocked(); MemoryMappedFile::makeWritable(dst, len); // we enter the RecoveryJob mutex here, so that if WRITETODATAFILES is happening we do not // conflict with it scoped_lock lk1( RecoveryJob::get()._mx ); // we stay in this mutex for everything to work with DurParanoid/validateSingleMapMatches // // either of these mutexes also makes setNoJournal threadsafe, which is good as we call it from a read // (not a write) lock in class SlaveTracking // scoped_lock lk( privateViews._mutex() ); size_t ofs; MongoMMF *f = privateViews.find_inlock(dst, ofs); assert(f); void *w = (((char *)f->view_write())+ofs); // first write it to the writable (file) view memcpy(w, src, len); if( memcmp(w, dst, len) ) { // if we get here, a copy-on-write had previously occurred. so write it to the private view too // to keep them in sync. we do this as we do not want to cause a copy on write unnecessarily. memcpy(dst, src, len); } }
/** (SLOW) diagnostic to check that the private view and the non-private view are in sync. */ static void debugValidateMapsMatch() { if( !DebugValidateMapsMatch ) return; Timer t; set<MongoFile*>& files = MongoFile::getAllFiles(); for( set<MongoFile*>::iterator i = files.begin(); i != files.end(); i++ ) { MongoFile *mf = *i; if( mf->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) mf; const char *p = (const char *) mmf->getView(); const char *w = (const char *) mmf->view_write(); unsigned low = 0xffffffff; unsigned high = 0; for( unsigned i = 0; i < mmf->length(); i++ ) { if( p[i] != w[i] ) { log() << i << '\t' << (int) p[i] << '\t' << (int) w[i] << endl; if( i < low ) low = i; if( i > high ) high = i; } } if( low != 0xffffffff ) { std::stringstream ss; ss << "dur error warning views mismatch " << mmf->filename() << ' ' << (hex) << low << ".." << high << " len:" << high-low+1; log() << ss.str() << endl; log() << "priv loc: " << (void*)(p+low) << endl; vector<WriteIntent>& w = commitJob.writes(); (void)w; // mark as unused. Useful for inspection in debugger breakpoint(); } } } log() << "debugValidateMapsMatch " << t.millis() << "ms " << endl; }
/** caller handles locking */ static bool PREPLOGBUFFER(AlignedBuilder& bb) { bb.reset(); unsigned *lenInBlockHeader; { // JSectHeader bb.appendStr("\nHH\n", false); lenInBlockHeader = (unsigned *) bb.skip(4); } string lastFilePath; { scoped_lock lk(privateViews._mutex()); for( vector<WriteIntent>::iterator i = wi._writes.begin(); i != wi._writes.end(); i++ ) { size_t ofs; MongoMMF *mmf = privateViews._find(i->p, ofs); if( mmf == 0 ) { journalingFailure("view pointer cannot be resolved"); } else { if( !mmf->dirty() ) mmf->dirty() = true; // usually it will already be dirty so don't bother writing then { size_t ofs = ((char *)i->p) - ((char*)mmf->getView().p); i->w_ptr = ((char*)mmf->view_write()) + ofs; } if( mmf->filePath() != lastFilePath ) { lastFilePath = mmf->filePath(); JDbContext c; bb.appendStruct(c); bb.appendStr(lastFilePath); } JEntry e; e.len = i->len; e.fileNo = mmf->fileSuffixNo(); bb.appendStruct(e); bb.appendBuf(i->p, i->len); } } } { JSectFooter f; f.hash = 0; bb.appendStruct(f); } { unsigned L = (bb.len() + 8191) & 0xffffe000; // fill to alignment dassert( L >= (unsigned) bb.len() ); *lenInBlockHeader = L; unsigned padding = L - bb.len(); bb.skip(padding); dassert( bb.len() % 8192 == 0 ); } return true; }
void operator () (MongoFile *mf) { if( mf->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) mf; const unsigned char *p = (const unsigned char *) mmf->getView(); const unsigned char *w = (const unsigned char *) mmf->view_write(); if (!p || !w) return; // File not fully opened yet _bytes += mmf->length(); assert( mmf->length() == (unsigned) mmf->length() ); { scoped_lock lk( privateViews._mutex() ); // see setNoJournal if (memcmp(p, w, (unsigned) mmf->length()) == 0) return; // next file } unsigned low = 0xffffffff; unsigned high = 0; log() << "DurParanoid mismatch in " << mmf->filename() << endl; int logged = 0; unsigned lastMismatch = 0xffffffff; for( unsigned i = 0; i < mmf->length(); i++ ) { if( p[i] != w[i] ) { if( lastMismatch != 0xffffffff && lastMismatch+1 != i ) log() << endl; // separate blocks of mismatches lastMismatch= i; if( ++logged < 60 ) { if( logged == 1 ) log() << "ofs % 628 = 0x" << hex << (i%628) << endl; // for .ns files to find offset in record stringstream ss; ss << "mismatch ofs:" << hex << i << "\tfilemap:" << setw(2) << (unsigned) w[i] << "\tprivmap:" << setw(2) << (unsigned) p[i]; if( p[i] > 32 && p[i] <= 126 ) ss << '\t' << p[i]; log() << ss.str() << endl; } if( logged == 60 ) log() << "..." << endl; if( i < low ) low = i; if( i > high ) high = i; } } if( low != 0xffffffff ) { std::stringstream ss; ss << "journal error warning views mismatch " << mmf->filename() << ' ' << (hex) << low << ".." << high << " len:" << high-low+1; log() << ss.str() << endl; log() << "priv loc: " << (void*)(p+low) << ' ' << endl; set<WriteIntent>& b = commitJob.writes(); (void)b; // mark as unused. Useful for inspection in debugger // should we abort() here so this isn't unnoticed in some circumstances? massert(13599, "Written data does not match in-memory view. Missing WriteIntent?", false); } } }
void DurableImpl::debugCheckLastDeclaredWrite() { if( !DebugCheckLastDeclaredWrite ) return; if( testIntent ) return; static int n; ++n; assert(debug && cmdLine.dur); vector<WriteIntent>& w = commitJob.writes(); if( w.size() == 0 ) return; const WriteIntent &i = w[w.size()-1]; size_t ofs; MongoMMF *mmf = privateViews.find(i.p, ofs); if( mmf == 0 ) return; size_t past = ofs + i.len; if( mmf->length() < past + 8 ) return; // too close to end of view char *priv = (char *) mmf->getView(); char *writ = (char *) mmf->view_write(); unsigned long long *a = (unsigned long long *) (priv+past); unsigned long long *b = (unsigned long long *) (writ+past); if( *a != *b ) { for( unsigned z = 0; z < w.size() - 1; z++ ) { const WriteIntent& wi = w[z]; char *r1 = (char*) wi.p; char *r2 = r1 + wi.len; if( r1 <= (((char*)a)+8) && r2 > (char*)a ) { //log() << "it's ok " << wi.p << ' ' << wi.len << endl; return; } } log() << "dur data after write area " << i.p << " does not agree" << endl; log() << " was: " << ((void*)b) << " " << hexdump((char*)b, 8) << endl; log() << " now: " << ((void*)a) << " " << hexdump((char*)a, 8) << endl; log() << " n: " << n << endl; log() << endl; } }
void t() { for( int i = 0; i < 20; i++ ) { sleepmillis(21); string fn = "/tmp/t1"; MongoMMF f; unsigned long long len = 1 * 1024 * 1024; assert( f.create(fn, len, /*sequential*/rand()%2==0) ); { char *p = (char *) f.getView(); assert(p); // write something to the private view as a test strcpy(p, "hello"); } if( cmdLine.dur ) { char *w = (char *) f.view_write(); strcpy(w + 6, "world"); } MongoFileFinder ff; ASSERT( ff.findByPath(fn) ); } }
void DurableImpl::debugCheckLastDeclaredWrite() { static int n; ++n; assert(debug && cmdLine.dur); if (commitJob.writes().empty()) return; const WriteIntent &i = commitJob.lastWrite(); size_t ofs; MongoMMF *mmf = privateViews.find(i.start(), ofs); if( mmf == 0 ) return; size_t past = ofs + i.length(); if( mmf->length() < past + 8 ) return; // too close to end of view char *priv = (char *) mmf->getView(); char *writ = (char *) mmf->view_write(); unsigned long long *a = (unsigned long long *) (priv+past); unsigned long long *b = (unsigned long long *) (writ+past); if( *a != *b ) { for( set<WriteIntent>::iterator it(commitJob.writes().begin()), end((commitJob.writes().begin())); it != end; ++it ) { const WriteIntent& wi = *it; char *r1 = (char*) wi.start(); char *r2 = (char*) wi.end(); if( r1 <= (((char*)a)+8) && r2 > (char*)a ) { //log() << "it's ok " << wi.p << ' ' << wi.len << endl; return; } } log() << "journal data after write area " << i.start() << " does not agree" << endl; log() << " was: " << ((void*)b) << " " << hexdump((char*)b, 8) << endl; log() << " now: " << ((void*)a) << " " << hexdump((char*)a, 8) << endl; log() << " n: " << n << endl; log() << endl; } }
/** we will build an output buffer ourself and then use O_DIRECT we could be in read lock for this caller handles locking */ static void PREPLOGBUFFER() { assert( cmdLine.dur ); AlignedBuilder& bb = commitJob._ab; bb.reset(); unsigned lenOfs; // JSectHeader { bb.appendStr("\nHH\n", false); lenOfs = bb.skip(4); } // ops other than basic writes { for( vector< shared_ptr<DurOp> >::iterator i = commitJob.ops().begin(); i != commitJob.ops().end(); ++i ) { (*i)->serialize(bb); } } // write intents { scoped_lock lk(privateViews._mutex()); string lastFilePath; for( vector<WriteIntent>::iterator i = commitJob.writes().begin(); i != commitJob.writes().end(); i++ ) { size_t ofs; MongoMMF *mmf = privateViews._find(i->p, ofs); if( mmf == 0 ) { string s = str::stream() << "view pointer cannot be resolved " << (size_t) i->p; journalingFailure(s.c_str()); // asserts return; } if( !mmf->willNeedRemap() ) { mmf->willNeedRemap() = true; // usually it will already be dirty so don't bother writing then } //size_t ofs = ((char *)i->p) - ((char*)mmf->getView().p); i->w_ptr = ((char*)mmf->view_write()) + ofs; if( mmf->filePath() != lastFilePath ) { lastFilePath = mmf->filePath(); JDbContext c; bb.appendStruct(c); bb.appendStr(lastFilePath); } JEntry e; e.len = i->len; assert( ofs <= 0x80000000 ); e.ofs = (unsigned) ofs; e.fileNo = mmf->fileSuffixNo(); bb.appendStruct(e); bb.appendBuf(i->p, i->len); } } { JSectFooter f(bb.buf(), bb.len()); bb.appendStruct(f); } { assert( 0xffffe000 == (~(Alignment-1)) ); unsigned L = (bb.len() + Alignment-1) & (~(Alignment-1)); // fill to alignment dassert( L >= (unsigned) bb.len() ); *((unsigned*)bb.atOfs(lenOfs)) = L; unsigned padding = L - bb.len(); bb.skip(padding); dassert( bb.len() % Alignment == 0 ); } return; }
void run() { try { boost::filesystem::remove(fn); } catch(...) { } writelock lk; { MongoMMF f; unsigned long long len = 256 * 1024 * 1024; assert( f.create(fn, len, /*sequential*/false) ); { char *p = (char *) f.getView(); assert(p); // write something to the private view as a test if( cmdLine.dur ) MemoryMappedFile::makeWritable(p, 6); strcpy(p, "hello"); } if( cmdLine.dur ) { char *w = (char *) f.view_write(); strcpy(w + 6, "world"); } MongoFileFinder ff; ASSERT( ff.findByPath(fn) ); ASSERT( ff.findByPath("asdf") == 0 ); } { MongoFileFinder ff; ASSERT( ff.findByPath(fn) == 0 ); } int N = 10000; #if !defined(_WIN32) && !defined(__linux__) // seems this test is slow on OS X. N = 100; #endif // we make a lot here -- if we were leaking, presumably it would fail doing this many. Timer t; for( int i = 0; i < N; i++ ) { MongoMMF f; assert( f.open(fn, i%4==1) ); { char *p = (char *) f.getView(); assert(p); if( cmdLine.dur ) MemoryMappedFile::makeWritable(p, 4); strcpy(p, "zzz"); } if( cmdLine.dur ) { char *w = (char *) f.view_write(); if( i % 2 == 0 ) ++(*w); assert( w[6] == 'w' ); } } if( t.millis() > 10000 ) { log() << "warning: MMap LeakTest is unusually slow N:" << N << ' ' << t.millis() << "ms" << endl; } }