void operator () (MongoFile *mf) { if( mf->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) mf; const unsigned char *p = (const unsigned char *) mmf->getView(); const unsigned char *w = (const unsigned char *) mmf->view_write(); if (!p || !w) return; // File not fully opened yet _bytes += mmf->length(); assert( mmf->length() == (unsigned) mmf->length() ); { scoped_lock lk( privateViews._mutex() ); // see setNoJournal if (memcmp(p, w, (unsigned) mmf->length()) == 0) return; // next file } unsigned low = 0xffffffff; unsigned high = 0; log() << "DurParanoid mismatch in " << mmf->filename() << endl; int logged = 0; unsigned lastMismatch = 0xffffffff; for( unsigned i = 0; i < mmf->length(); i++ ) { if( p[i] != w[i] ) { if( lastMismatch != 0xffffffff && lastMismatch+1 != i ) log() << endl; // separate blocks of mismatches lastMismatch= i; if( ++logged < 60 ) { if( logged == 1 ) log() << "ofs % 628 = 0x" << hex << (i%628) << endl; // for .ns files to find offset in record stringstream ss; ss << "mismatch ofs:" << hex << i << "\tfilemap:" << setw(2) << (unsigned) w[i] << "\tprivmap:" << setw(2) << (unsigned) p[i]; if( p[i] > 32 && p[i] <= 126 ) ss << '\t' << p[i]; log() << ss.str() << endl; } if( logged == 60 ) log() << "..." << endl; if( i < low ) low = i; if( i > high ) high = i; } } if( low != 0xffffffff ) { std::stringstream ss; ss << "journal error warning views mismatch " << mmf->filename() << ' ' << (hex) << low << ".." << high << " len:" << high-low+1; log() << ss.str() << endl; log() << "priv loc: " << (void*)(p+low) << ' ' << endl; set<WriteIntent>& b = commitJob.writes(); (void)b; // mark as unused. Useful for inspection in debugger // should we abort() here so this isn't unnoticed in some circumstances? massert(13599, "Written data does not match in-memory view. Missing WriteIntent?", false); } } }
/** (SLOW) diagnostic to check that the private view and the non-private view are in sync. */ static void debugValidateMapsMatch() { if( !DebugValidateMapsMatch ) return; Timer t; set<MongoFile*>& files = MongoFile::getAllFiles(); for( set<MongoFile*>::iterator i = files.begin(); i != files.end(); i++ ) { MongoFile *mf = *i; if( mf->isMongoMMF() ) { MongoMMF *mmf = (MongoMMF*) mf; const char *p = (const char *) mmf->getView(); const char *w = (const char *) mmf->view_write(); unsigned low = 0xffffffff; unsigned high = 0; for( unsigned i = 0; i < mmf->length(); i++ ) { if( p[i] != w[i] ) { log() << i << '\t' << (int) p[i] << '\t' << (int) w[i] << endl; if( i < low ) low = i; if( i > high ) high = i; } } if( low != 0xffffffff ) { std::stringstream ss; ss << "dur error warning views mismatch " << mmf->filename() << ' ' << (hex) << low << ".." << high << " len:" << high-low+1; log() << ss.str() << endl; log() << "priv loc: " << (void*)(p+low) << endl; vector<WriteIntent>& w = commitJob.writes(); (void)w; // mark as unused. Useful for inspection in debugger breakpoint(); } } } log() << "debugValidateMapsMatch " << t.millis() << "ms " << endl; }
/** put the basic write operation into the buffer (bb) to be journaled */ static void prepBasicWrite_inlock(AlignedBuilder&bb, const WriteIntent *i, RelativePath& lastDbPath) { size_t ofs = 1; MongoMMF *mmf = findMMF_inlock(i->start(), /*out*/ofs); if( unlikely(!mmf->willNeedRemap()) ) { // tag this mmf as needed a remap of its private view later. // usually it will already be dirty/already set, so we do the if above first // to avoid possibility of cpu cache line contention mmf->willNeedRemap() = true; } // since we have already looked up the mmf, we go ahead and remember the write view location // so we don't have to find the MongoMMF again later in WRITETODATAFILES() // // this was for WRITETODATAFILES_Impl2 so commented out now // /* dassert( i->w_ptr == 0 ); i->w_ptr = ((char*)mmf->view_write()) + ofs; */ JEntry e; e.len = min(i->length(), (unsigned)(mmf->length() - ofs)); //dont write past end of file assert( ofs <= 0x80000000 ); e.ofs = (unsigned) ofs; e.setFileNo( mmf->fileSuffixNo() ); if( mmf->relativePath() == local ) { e.setLocalDbContextBit(); } else if( mmf->relativePath() != lastDbPath ) { lastDbPath = mmf->relativePath(); JDbContext c; bb.appendStruct(c); bb.appendStr(lastDbPath.toString()); } bb.appendStruct(e); #if defined(_EXPERIMENTAL) i->ofsInJournalBuffer = bb.len(); #endif bb.appendBuf(i->start(), e.len); if (unlikely(e.len != (unsigned)i->length())) { log() << "journal info splitting prepBasicWrite at boundary" << endl; // This only happens if we write to the last byte in a file and // the fist byte in another file that is mapped adjacently. I // think most OSs leave at least a one page gap between // mappings, but better to be safe. WriteIntent next ((char*)i->start() + e.len, i->length() - e.len); prepBasicWrite_inlock(bb, &next, lastDbPath); } }
__declspec(noinline) void makeChunkWritable(size_t chunkno) { scoped_lock lk(mapViewMutex); if( writable.get(chunkno) ) // double check lock return; // remap all maps in this chunk. common case is a single map, but could have more than one with smallfiles or .ns files size_t chunkStart = chunkno * MemoryMappedFile::ChunkSize; size_t chunkNext = chunkStart + MemoryMappedFile::ChunkSize; scoped_lock lk2(privateViews._mutex()); map<void*,MongoMMF*>::iterator i = privateViews.finditer_inlock((void*) (chunkNext-1)); while( 1 ) { const pair<void*,MongoMMF*> x = *(--i); MongoMMF *mmf = x.second; if( mmf == 0 ) break; size_t viewStart = (size_t) x.first; size_t viewEnd = (size_t) (viewStart + mmf->length()); if( viewEnd <= chunkStart ) break; size_t protectStart = max(viewStart, chunkStart); dassert(protectStart<chunkNext); size_t protectEnd = min(viewEnd, chunkNext); size_t protectSize = protectEnd - protectStart; dassert(protectSize>0&&protectSize<=MemoryMappedFile::ChunkSize); DWORD oldProtection; bool ok = VirtualProtect( reinterpret_cast<void*>( protectStart ), protectSize, PAGE_WRITECOPY, &oldProtection ); if ( !ok ) { DWORD dosError = GetLastError(); log() << "VirtualProtect for " << mmf->filename() << " chunk " << chunkno << " failed with " << errnoWithDescription( dosError ) << " (chunk size is " << protectSize << ", address is " << hex << protectStart << dec << ")" << " in mongo::makeChunkWritable, terminating" << endl; fassertFailed( 16362 ); } } writable.set(chunkno); }
void DurableImpl::debugCheckLastDeclaredWrite() { if( !DebugCheckLastDeclaredWrite ) return; if( testIntent ) return; static int n; ++n; assert(debug && cmdLine.dur); vector<WriteIntent>& w = commitJob.writes(); if( w.size() == 0 ) return; const WriteIntent &i = w[w.size()-1]; size_t ofs; MongoMMF *mmf = privateViews.find(i.p, ofs); if( mmf == 0 ) return; size_t past = ofs + i.len; if( mmf->length() < past + 8 ) return; // too close to end of view char *priv = (char *) mmf->getView(); char *writ = (char *) mmf->view_write(); unsigned long long *a = (unsigned long long *) (priv+past); unsigned long long *b = (unsigned long long *) (writ+past); if( *a != *b ) { for( unsigned z = 0; z < w.size() - 1; z++ ) { const WriteIntent& wi = w[z]; char *r1 = (char*) wi.p; char *r2 = r1 + wi.len; if( r1 <= (((char*)a)+8) && r2 > (char*)a ) { //log() << "it's ok " << wi.p << ' ' << wi.len << endl; return; } } log() << "dur data after write area " << i.p << " does not agree" << endl; log() << " was: " << ((void*)b) << " " << hexdump((char*)b, 8) << endl; log() << " now: " << ((void*)a) << " " << hexdump((char*)a, 8) << endl; log() << " n: " << n << endl; log() << endl; } }
/** underscore version of find is for when you are already locked @param ofs out return our offset in the view @return the MongoMMF to which this pointer belongs */ MongoMMF* PointerToMMF::find_inlock(void *p, /*out*/ size_t& ofs) { // // .................memory.......................... // v1 p v2 // [--------------------] [-------] // // e.g., _find(p) == v1 // const pair<void*,MongoMMF*> x = *(--_views.upper_bound(p)); MongoMMF *mmf = x.second; if( mmf ) { size_t o = ((char *)p) - ((char*)x.first); if( o < mmf->length() ) { ofs = o; return mmf; } } return 0; }
/*static*/ void* MongoMMF::switchToPrivateView(void *readonly_ptr) { void *p = readonly_ptr; assert( durable ); assert( debug ); mutex::scoped_lock lk(our_views_mutex); std::map< void*, MongoMMF* >::iterator i = our_read_views.upper_bound(((char *)p)+1); i--; bool ok = i != our_read_views.end(); if( ok ) { MongoMMF *mmf = i->second; assert( mmf ); size_t ofs = ((char *)p) - ((char*)mmf->view_readonly); if( ofs < mmf->length() ) { return ((char *)mmf->view_private) + ofs; } } if( 1 ) { static int once; /* temp : not using MongoMMF yet for datafiles, just .ns. more to do... */ if( once++ == 0 ) log() << "TEMP TODO _DURABLE : use mongommf for datafiles" << endl; return p; } for( std::map<void*,MongoMMF*>::iterator i = our_read_views.begin(); i != our_read_views.end(); i++ ) { char *wl = (char *) i->second->view_private; char *wh = wl + i->second->length(); if( p >= wl && p < wh ) { log() << "dur: perf warning p=" << p << " is already in the writable view of " << i->second->filename() << endl; return p; } } log() << "switchToPrivateView error " << p << endl; assert( false ); // did you call writing() with a pointer that isn't into a datafile? return 0; }
void DurableImpl::debugCheckLastDeclaredWrite() { static int n; ++n; assert(debug && cmdLine.dur); if (commitJob.writes().empty()) return; const WriteIntent &i = commitJob.lastWrite(); size_t ofs; MongoMMF *mmf = privateViews.find(i.start(), ofs); if( mmf == 0 ) return; size_t past = ofs + i.length(); if( mmf->length() < past + 8 ) return; // too close to end of view char *priv = (char *) mmf->getView(); char *writ = (char *) mmf->view_write(); unsigned long long *a = (unsigned long long *) (priv+past); unsigned long long *b = (unsigned long long *) (writ+past); if( *a != *b ) { for( set<WriteIntent>::iterator it(commitJob.writes().begin()), end((commitJob.writes().begin())); it != end; ++it ) { const WriteIntent& wi = *it; char *r1 = (char*) wi.start(); char *r2 = (char*) wi.end(); if( r1 <= (((char*)a)+8) && r2 > (char*)a ) { //log() << "it's ok " << wi.p << ' ' << wi.len << endl; return; } } log() << "journal data after write area " << i.start() << " does not agree" << endl; log() << " was: " << ((void*)b) << " " << hexdump((char*)b, 8) << endl; log() << " now: " << ((void*)a) << " " << hexdump((char*)a, 8) << endl; log() << " n: " << n << endl; log() << endl; } }