bool Extent::validates(const DiskLoc diskLoc, vector<string>* errors) const { bool extentOk = true; if (magic != extentSignature) { if (errors) { StringBuilder sb; sb << "bad extent signature " << integerToHex(magic) << " in extent " << diskLoc.toString(); errors->push_back( sb.str() ); } extentOk = false; } if (myLoc != diskLoc) { if (errors) { StringBuilder sb; sb << "extent " << diskLoc.toString() << " self-pointer is " << myLoc.toString(); errors->push_back( sb.str() ); } extentOk = false; } if (firstRecord.isNull() != lastRecord.isNull()) { if (errors) { StringBuilder sb; if (firstRecord.isNull()) { sb << "in extent " << diskLoc.toString() << ", firstRecord is null but lastRecord is " << lastRecord.toString(); } else { sb << "in extent " << diskLoc.toString() << ", firstRecord is " << firstRecord.toString() << " but lastRecord is null"; } errors->push_back( sb.str() ); } extentOk = false; } static const int minSize = 0x1000; if (length < minSize) { if (errors) { StringBuilder sb; sb << "length of extent " << diskLoc.toString() << " is " << length << ", which is less than minimum length of " << minSize; errors->push_back( sb.str() ); } extentOk = false; } return extentOk; }
bool Extent::validates(const DiskLoc diskLoc, BSONArrayBuilder* errors) { bool extentOk = true; if (magic != extentSignature) { if (errors) { StringBuilder sb; sb << "bad extent signature " << integerToHex(magic) << " in extent " << diskLoc.toString(); *errors << sb.str(); } extentOk = false; } if (myLoc != diskLoc) { if (errors) { StringBuilder sb; sb << "extent " << diskLoc.toString() << " self-pointer is " << myLoc.toString(); *errors << sb.str(); } extentOk = false; } if (firstRecord.isNull() != lastRecord.isNull()) { if (errors) { StringBuilder sb; if (firstRecord.isNull()) { sb << "in extent " << diskLoc.toString() << ", firstRecord is null but lastRecord is " << lastRecord.toString(); } else { sb << "in extent " << diskLoc.toString() << ", firstRecord is " << firstRecord.toString() << " but lastRecord is null"; } *errors << sb.str(); } extentOk = false; } if (length < minSize()) { if (errors) { StringBuilder sb; sb << "length of extent " << diskLoc.toString() << " is " << length << ", which is less than minimum length of " << minSize(); *errors << sb.str(); } extentOk = false; } return extentOk; }
bool CoveredIndexMatcher::matches( const BSONObj& key, const DiskLoc& recLoc, MatchDetails* details, bool keyUsable ) const { LOG(5) << "CoveredIndexMatcher::matches() " << key.toString() << ' ' << recLoc.toString() << ' ' << keyUsable << endl; dassert( key.isValid() ); if ( details ) details->resetOutput(); if ( keyUsable ) { if ( !_keyMatcher.matches(key, details ) ) { return false; } bool needRecordForDetails = details && details->needRecord(); if ( !_needRecord && !needRecordForDetails ) { return true; } } if ( details ) details->setLoadedRecord( true ); BSONObj obj = recLoc.obj(); bool res = _docMatcher->matches( obj, details ) && !isOrClauseDup( obj ); LOG(5) << "CoveredIndexMatcher _docMatcher->matches() returns " << res << endl; return res; }
void NamespaceDetails::cappedDumpDelInfo() { cout << "dl[0]: " << _deletedList[0].toString() << endl; for( DiskLoc z = _deletedList[0]; !z.isNull(); z = z.drec()->nextDeleted() ) { cout << " drec:" << z.toString() << " dreclen:" << hex << z.drec()->lengthWithHeaders() << " ext:" << z.drec()->myExtent(z)->myLoc.toString() << endl; } cout << "dl[1]: " << _deletedList[1].toString() << endl; }
shared_ptr<Cursor> DataFileMgr::findAll(const StringData& ns, const DiskLoc &startLoc) { NamespaceDetails * d = nsdetails( ns ); if ( ! d ) return shared_ptr<Cursor>(new BasicCursor(DiskLoc())); DiskLoc loc = d->firstExtent(); if ( loc.isNull() ) return shared_ptr<Cursor>(new BasicCursor(DiskLoc())); Extent *e = getExtent(loc); DEBUGGING { out() << "listing extents for " << ns << endl; DiskLoc tmp = loc; set<DiskLoc> extents; while ( 1 ) { Extent *f = getExtent(tmp); out() << "extent: " << tmp.toString() << endl; extents.insert(tmp); tmp = f->xnext; if ( tmp.isNull() ) break; f = f->getNextExtent(); } out() << endl; d->dumpDeleted(&extents); } if ( d->isCapped() ) return shared_ptr<Cursor>( ForwardCappedCursor::make( d , startLoc ) ); if ( !startLoc.isNull() ) return shared_ptr<Cursor>(new BasicCursor( startLoc )); while ( e->firstRecord.isNull() && !e->xnext.isNull() ) { /* todo: if extent is empty, free it for reuse elsewhere. that is a bit complicated have to clean up the freelists. */ RARELY out() << "info DFM::findAll(): extent " << loc.toString() << " was empty, skipping ahead. ns:" << ns << endl; // find a nonempty extent // it might be nice to free the whole extent here! but have to clean up free recs then. e = e->getNextExtent(); } return shared_ptr<Cursor>(new BasicCursor( e->firstRecord )); }
void SimpleRecordStoreV1::addDeletedRec( OperationContext* txn, const DiskLoc& dloc ) { DeletedRecord* d = drec( dloc ); DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << hex << d->extentOfs() << endl; int b = bucket(d->lengthWithHeaders()); *txn->recoveryUnit()->writing(&d->nextDeleted()) = _details->deletedListEntry(b); _details->setDeletedListEntry(txn, b, dloc); }
void BtreeBasedAccessMethod::removeOneKey(OperationContext* txn, const BSONObj& key, const DiskLoc& loc, bool dupsAllowed) { try { _newInterface->unindex(txn, key, loc, dupsAllowed); } catch (AssertionException& e) { log() << "Assertion failure: _unindex failed " << _descriptor->indexNamespace() << endl; log() << "Assertion failure: _unindex failed: " << e.what() << '\n'; log() << " key:" << key.toString() << '\n'; log() << " dl:" << loc.toString() << endl; logContext(); } }
bool BtreeBasedAccessMethod::removeOneKey(const BSONObj& key, const DiskLoc& loc) { bool ret = false; try { ret = _newInterface->unindex(key, loc); } catch (AssertionException& e) { problem() << "Assertion failure: _unindex failed " << _descriptor->indexNamespace() << endl; out() << "Assertion failure: _unindex failed: " << e.what() << '\n'; out() << " obj:" << loc.obj().toString() << '\n'; out() << " key:" << key.toString() << '\n'; out() << " dl:" << loc.toString() << endl; logContext(); } return ret; }
int BucketBasics::fullValidate(const DiskLoc& thisLoc, const BSONObj &order, int *unusedCount) { { bool f = false; assert( f = true ); massert( 10281 , "assert is misdefined", f); } killCurrentOp.checkForInterrupt(); assertValid(order, true); // if( bt_fv==0 ) // return; if ( bt_dmp ) { out() << thisLoc.toString() << ' '; ((BtreeBucket *) this)->dump(); } // keycount int kc = 0; for ( int i = 0; i < n; i++ ) { _KeyNode& kn = k(i); if ( kn.isUsed() ) { kc++; } else { if ( unusedCount ) { ++( *unusedCount ); } } if ( !kn.prevChildBucket.isNull() ) { DiskLoc left = kn.prevChildBucket; BtreeBucket *b = left.btree(); wassert( b->parent == thisLoc ); kc += b->fullValidate(kn.prevChildBucket, order, unusedCount); } } if ( !nextChild.isNull() ) { BtreeBucket *b = nextChild.btree(); wassert( b->parent == thisLoc ); kc += b->fullValidate(nextChild, order, unusedCount); } return kc; }
string validateNS(const char *ns, NamespaceDetails *d, BSONObj *cmdObj) { bool scanData = true; if( cmdObj && cmdObj->hasElement("scandata") && !cmdObj->getBoolField("scandata") ) scanData = false; bool valid = true; stringstream ss; ss << "\nvalidate\n"; //ss << " details: " << hex << d << " ofs:" << nsindex(ns)->detailsOffset(d) << dec << endl; if ( d->capped ) ss << " capped:" << d->capped << " max:" << d->max << '\n'; ss << " firstExtent:" << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.toString()<< '\n'; ss << " lastExtent:" << d->lastExtent.toString() << " ns:" << d->lastExtent.ext()->nsDiagnostic.toString() << '\n'; try { d->firstExtent.ext()->assertOk(); d->lastExtent.ext()->assertOk(); DiskLoc el = d->firstExtent; int ne = 0; while( !el.isNull() ) { Extent *e = el.ext(); e->assertOk(); el = e->xnext; ne++; killCurrentOp.checkForInterrupt(); } ss << " # extents:" << ne << '\n'; } catch (...) { valid=false; ss << " extent asserted "; } ss << " datasize?:" << d->stats.datasize << " nrecords?:" << d->stats.nrecords << " lastExtentSize:" << d->lastExtentSize << '\n'; ss << " padding:" << d->paddingFactor << '\n'; try { try { ss << " first extent:\n"; d->firstExtent.ext()->dump(ss); valid = valid && d->firstExtent.ext()->validates(); } catch (...) { ss << "\n exception firstextent\n" << endl; } set<DiskLoc> recs; if( scanData ) { shared_ptr<Cursor> c = theDataFileMgr.findAll(ns); int n = 0; long long len = 0; long long nlen = 0; int outOfOrder = 0; DiskLoc cl_last; while ( c->ok() ) { n++; DiskLoc cl = c->currLoc(); if ( n < 1000000 ) recs.insert(cl); if ( d->capped ) { if ( cl < cl_last ) outOfOrder++; cl_last = cl; } Record *r = c->_current(); len += r->lengthWithHeaders; nlen += r->netLength(); c->advance(); } if ( d->capped && !d->capLooped() ) { ss << " capped outOfOrder:" << outOfOrder; if ( outOfOrder > 1 ) { valid = false; ss << " ???"; } else ss << " (OK)"; ss << '\n'; } ss << " " << n << " objects found, nobj:" << d->stats.nrecords << '\n'; ss << " " << len << " bytes data w/headers\n"; ss << " " << nlen << " bytes data wout/headers\n"; } ss << " deletedList: "; for ( int i = 0; i < Buckets; i++ ) { ss << (d->deletedList[i].isNull() ? '0' : '1'); } ss << endl; int ndel = 0; long long delSize = 0; int incorrect = 0; for ( int i = 0; i < Buckets; i++ ) { DiskLoc loc = d->deletedList[i]; try { int k = 0; while ( !loc.isNull() ) { if ( recs.count(loc) ) incorrect++; ndel++; if ( loc.questionable() ) { if( d->capped && !loc.isValid() && i == 1 ) { /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid see comments in namespace.h */ break; } if ( loc.a() <= 0 || strstr(ns, "hudsonSmall") == 0 ) { ss << " ?bad deleted loc: " << loc.toString() << " bucket:" << i << " k:" << k << endl; valid = false; break; } } DeletedRecord *d = loc.drec(); delSize += d->lengthWithHeaders; loc = d->nextDeleted; k++; killCurrentOp.checkForInterrupt(); } } catch (...) { ss <<" ?exception in deleted chain for bucket " << i << endl; valid = false; } } ss << " deleted: n: " << ndel << " size: " << delSize << endl; if ( incorrect ) { ss << " ?corrupt: " << incorrect << " records from datafile are in deleted list\n"; valid = false; } int idxn = 0; try { ss << " nIndexes:" << d->nIndexes << endl; NamespaceDetails::IndexIterator i = d->ii(); while( i.more() ) { IndexDetails& id = i.next(); ss << " " << id.indexNamespace() << " keys:" << id.head.btree()->fullValidate(id.head, id.keyPattern()) << endl; } } catch (...) { ss << "\n exception during index validate idxn:" << idxn << endl; valid=false; } } catch (AssertionException) { ss << "\n exception during validate\n" << endl; valid = false; } if ( !valid ) ss << " ns corrupt, requires dbchk\n"; return ss.str(); }
/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const DiskLoc& dl) { recursive_scoped_lock lock(ccmutex); Database *db = cc().database(); assert(db); aboutToDeleteForSharding( db , dl ); CCByLoc& bl = db->ccByLoc; CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl)); CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl)); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV assert( j->first.loc == dl ); ++j; if ( j == stop ) break; } if( toAdvance.size() >= 3000 ) { log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc " << dl.toString() << ' ' << toAdvance[1000]->_ns << ' ' << toAdvance[2000]->_ns << ' ' << toAdvance[1000]->_pinValue << ' ' << toAdvance[2000]->_pinValue << ' ' << toAdvance[1000]->_pos << ' ' << toAdvance[2000]->_pos << ' ' << toAdvance[1000]->_idleAgeMillis << ' ' << toAdvance[2000]->_idleAgeMillis << ' ' << toAdvance[1000]->_doingDeletes << ' ' << toAdvance[2000]->_doingDeletes << endl; //wassert( toAdvance.size() < 5000 ); } for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) { ClientCursor* cc = *i; wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; Cursor *c = cc->_c.get(); if ( c->capped() ) { /* note we cannot advance here. if this condition occurs, writes to the oplog have "caught" the reader. skipping ahead, the reader would miss postentially important data. */ delete cc; continue; } c->checkLocation(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { // This might indicate a failure to call ClientCursor::updateLocation() but it can // also happen during correct operation, see SERVER-2009. problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } else { c->advance(); } if ( c->eof() ) { // advanced to end // leave ClientCursor in place so next getMore doesn't fail // still need to mark new location though cc->updateLocation(); } else { wassert( c->refLoc() != dl ); cc->updateLocation(); } } }
/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const StringData& ns, const NamespaceDetails* nsd, const DiskLoc& dl) { // Begin cursor-only NoPageFaultsAllowed npfa; // End cursor-only recursive_scoped_lock lock(ccmutex); Database *db = cc().database(); verify(db); aboutToDeleteForSharding( ns, db, nsd, dl ); // Check our non-cached active runner list. for (set<Runner*>::iterator it = nonCachedRunners.begin(); it != nonCachedRunners.end(); ++it) { Runner* runner = *it; if (0 == ns.compare(runner->ns())) { runner->invalidate(dl); } } // TODO: This requires optimization. We walk through *all* CCs and send the delete to every // CC open on the db we're deleting from. We could: // 1. Map from ns to open runners, // 2. Map from ns -> (a map of DiskLoc -> runners who care about that DL) // // We could also queue invalidations somehow and have them processed later in the runner's // read locks. for (CCById::const_iterator it = clientCursorsById.begin(); it != clientCursorsById.end(); ++it) { ClientCursor* cc = it->second; // We're only interested in cursors over one db. if (cc->_db != db) { continue; } if (NULL == cc->_runner.get()) { continue; } cc->_runner->invalidate(dl); } // Begin cursor-only. Only cursors that are in ccByLoc are processed here. CCByLoc& bl = db->ccByLoc(); CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl)); CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl)); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV verify( j->first.loc == dl ); ++j; if ( j == stop ) break; } if( toAdvance.size() >= 3000 ) { log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc " << dl.toString() << ' ' << toAdvance[1000]->_ns << ' ' << toAdvance[2000]->_ns << ' ' << toAdvance[1000]->_pinValue << ' ' << toAdvance[2000]->_pinValue << ' ' << toAdvance[1000]->_pos << ' ' << toAdvance[2000]->_pos << ' ' << toAdvance[1000]->_idleAgeMillis << ' ' << toAdvance[2000]->_idleAgeMillis << ' ' << toAdvance[1000]->_doingDeletes << ' ' << toAdvance[2000]->_doingDeletes << endl; //wassert( toAdvance.size() < 5000 ); } for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) { ClientCursor* cc = *i; wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; Cursor *c = cc->_c.get(); if ( c->capped() ) { /* note we cannot advance here. if this condition occurs, writes to the oplog have "caught" the reader. skipping ahead, the reader would miss postentially important data. */ delete cc; continue; } c->recoverFromYield(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { // This might indicate a failure to call ClientCursor::prepareToYield() but it can // also happen during correct operation, see SERVER-2009. problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } else { c->advance(); } while (!c->eof() && c->refLoc() == dl) { /* We don't delete at EOF because we want to return "no more results" rather than "no such cursor". * The loop is to handle MultiKey indexes where the deleted record is pointed to by multiple adjacent keys. * In that case we need to advance until we get to the next distinct record or EOF. * SERVER-4154 * SERVER-5198 * But see SERVER-5725. */ c->advance(); } cc->updateLocation(); } // End cursor-only }
Status RecordStoreV1Base::validate( OperationContext* txn, bool full, bool scanData, ValidateAdaptor* adaptor, ValidateResults* results, BSONObjBuilder* output ) const { // 1) basic status that require no iteration // 2) extent level info // 3) check extent start and end // 4) check each non-deleted record // 5) check deleted list // ------------- // 1111111111111111111 if ( isCapped() ){ output->appendBool("capped", true); output->appendNumber("max", _details->maxCappedDocs()); } output->appendNumber("datasize", _details->dataSize()); output->appendNumber("nrecords", _details->numRecords()); output->appendNumber("lastExtentSize", _details->lastExtentSize(txn)); output->appendNumber("padding", _details->paddingFactor()); if ( _details->firstExtent(txn).isNull() ) output->append( "firstExtent", "null" ); else output->append( "firstExtent", str::stream() << _details->firstExtent(txn).toString() << " ns:" << _getExtent( txn, _details->firstExtent(txn) )->nsDiagnostic.toString()); if ( _details->lastExtent(txn).isNull() ) output->append( "lastExtent", "null" ); else output->append( "lastExtent", str::stream() << _details->lastExtent(txn).toString() << " ns:" << _getExtent( txn, _details->lastExtent(txn) )->nsDiagnostic.toString()); // 22222222222222222222222222 { // validate extent basics BSONArrayBuilder extentData; int extentCount = 0; DiskLoc extentDiskLoc; try { if ( !_details->firstExtent(txn).isNull() ) { _getExtent( txn, _details->firstExtent(txn) )->assertOk(); _getExtent( txn, _details->lastExtent(txn) )->assertOk(); } extentDiskLoc = _details->firstExtent(txn); while (!extentDiskLoc.isNull()) { Extent* thisExtent = _getExtent( txn, extentDiskLoc ); if (full) { extentData << thisExtent->dump(); } if (!thisExtent->validates(extentDiskLoc, &results->errors)) { results->valid = false; } DiskLoc nextDiskLoc = thisExtent->xnext; if (extentCount > 0 && !nextDiskLoc.isNull() && _getExtent( txn, nextDiskLoc )->xprev != extentDiskLoc) { StringBuilder sb; sb << "'xprev' pointer " << _getExtent( txn, nextDiskLoc )->xprev.toString() << " in extent " << nextDiskLoc.toString() << " does not point to extent " << extentDiskLoc.toString(); results->errors.push_back( sb.str() ); results->valid = false; } if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) { StringBuilder sb; sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString() << " does not point to last extent in list " << extentDiskLoc.toString(); results->errors.push_back( sb.str() ); results->valid = false; } extentDiskLoc = nextDiskLoc; extentCount++; txn->checkForInterrupt(); } } catch (const DBException& e) { StringBuilder sb; sb << "exception validating extent " << extentCount << ": " << e.what(); results->errors.push_back( sb.str() ); results->valid = false; return Status::OK(); } output->append("extentCount", extentCount); if ( full ) output->appendArray( "extents" , extentData.arr() ); } try { // 333333333333333333333333333 bool testingLastExtent = false; try { DiskLoc firstExtentLoc = _details->firstExtent(txn); if (firstExtentLoc.isNull()) { // this is ok } else { output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump()); if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) { StringBuilder sb; sb << "'xprev' pointer in 'firstExtent' " << _details->firstExtent(txn).toString() << " is " << _getExtent(txn, firstExtentLoc)->xprev.toString() << ", should be null"; results->errors.push_back( sb.str() ); results->valid = false; } } testingLastExtent = true; DiskLoc lastExtentLoc = _details->lastExtent(txn); if (lastExtentLoc.isNull()) { // this is ok } else { if (firstExtentLoc != lastExtentLoc) { output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump()); if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) { StringBuilder sb; sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString() << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString() << ", should be null"; results->errors.push_back( sb.str() ); results->valid = false; } } } } catch (const DBException& e) { StringBuilder sb; sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent") << "': " << e.what(); results->errors.push_back( sb.str() ); results->valid = false; } // 4444444444444444444444444 set<DiskLoc> recs; if( scanData ) { int n = 0; int nInvalid = 0; long long nQuantizedSize = 0; long long nPowerOf2QuantizedSize = 0; long long len = 0; long long nlen = 0; long long bsonLen = 0; int outOfOrder = 0; DiskLoc cl_last; scoped_ptr<RecordIterator> iterator( getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD ) ); DiskLoc cl; while ( !( cl = iterator->getNext() ).isNull() ) { n++; if ( n < 1000000 ) recs.insert(cl); if ( isCapped() ) { if ( cl < cl_last ) outOfOrder++; cl_last = cl; } Record *r = recordFor(cl); len += r->lengthWithHeaders(); nlen += r->netLength(); if ( r->lengthWithHeaders() == quantizeAllocationSpace( r->lengthWithHeaders() ) ) { // Count the number of records having a size consistent with // the quantizeAllocationSpace quantization implementation. ++nQuantizedSize; } if ( r->lengthWithHeaders() == quantizePowerOf2AllocationSpace( r->lengthWithHeaders() ) ) { // Count the number of records having a size consistent with the // quantizePowerOf2AllocationSpace quantization implementation. ++nPowerOf2QuantizedSize; } if (full){ size_t dataSize = 0; const Status status = adaptor->validate( r->toRecordData(), &dataSize ); if (!status.isOK()) { results->valid = false; if (nInvalid == 0) // only log once; results->errors.push_back( "invalid object detected (see logs)" ); nInvalid++; log() << "Invalid object detected in " << _ns << ": " << status.reason(); } else { bsonLen += dataSize; } } } if ( isCapped() && !_details->capLooped() ) { output->append("cappedOutOfOrder", outOfOrder); if ( outOfOrder > 1 ) { results->valid = false; results->errors.push_back( "too many out of order records" ); } } output->append("objectsFound", n); if (full) { output->append("invalidObjects", nInvalid); } output->appendNumber("nQuantizedSize", nQuantizedSize); output->appendNumber("nPowerOf2QuantizedSize", nPowerOf2QuantizedSize); output->appendNumber("bytesWithHeaders", len); output->appendNumber("bytesWithoutHeaders", nlen); if (full) { output->appendNumber("bytesBson", bsonLen); } } // end scanData // 55555555555555555555555555 BSONArrayBuilder deletedListArray; for ( int i = 0; i < Buckets; i++ ) { deletedListArray << _details->deletedListEntry(i).isNull(); } int ndel = 0; long long delSize = 0; BSONArrayBuilder delBucketSizes; int incorrect = 0; for ( int i = 0; i < Buckets; i++ ) { DiskLoc loc = _details->deletedListEntry(i); try { int k = 0; while ( !loc.isNull() ) { if ( recs.count(loc) ) incorrect++; ndel++; if ( loc.questionable() ) { if( isCapped() && !loc.isValid() && i == 1 ) { /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid see comments in namespace.h */ break; } string err( str::stream() << "bad pointer in deleted record list: " << loc.toString() << " bucket: " << i << " k: " << k ); results->errors.push_back( err ); results->valid = false; break; } const DeletedRecord* d = deletedRecordFor(loc); delSize += d->lengthWithHeaders(); loc = d->nextDeleted(); k++; txn->checkForInterrupt(); } delBucketSizes << k; } catch (...) { results->errors.push_back( (string)"exception in deleted chain for bucket " + BSONObjBuilder::numStr(i) ); results->valid = false; } } output->appendNumber("deletedCount", ndel); output->appendNumber("deletedSize", delSize); if ( full ) { output->append( "delBucketSizes", delBucketSizes.arr() ); } if ( incorrect ) { results->errors.push_back( BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list" ); results->valid = false; } } catch (AssertionException) { results->errors.push_back( "exception during validate" ); results->valid = false; } return Status::OK(); }
void validateNS(const char *ns, NamespaceDetails *d, const BSONObj& cmdObj, BSONObjBuilder& result) { const bool full = cmdObj["full"].trueValue(); const bool scanData = full || cmdObj["scandata"].trueValue(); bool valid = true; BSONArrayBuilder errors; // explanation(s) for why valid = false if ( d->isCapped() ){ result.append("capped", d->isCapped()); result.appendNumber("max", d->maxCappedDocs()); } result.append("firstExtent", str::stream() << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.toString()); result.append( "lastExtent", str::stream() << d->lastExtent.toString() << " ns:" << d->lastExtent.ext()->nsDiagnostic.toString()); BSONArrayBuilder extentData; try { d->firstExtent.ext()->assertOk(); d->lastExtent.ext()->assertOk(); DiskLoc el = d->firstExtent; int ne = 0; while( !el.isNull() ) { Extent *e = el.ext(); e->assertOk(); el = e->xnext; ne++; if ( full ) extentData << e->dump(); killCurrentOp.checkForInterrupt(); } result.append("extentCount", ne); } catch (...) { valid=false; errors << "extent asserted"; } if ( full ) result.appendArray( "extents" , extentData.arr() ); result.appendNumber("datasize", d->stats.datasize); result.appendNumber("nrecords", d->stats.nrecords); result.appendNumber("lastExtentSize", d->lastExtentSize); result.appendNumber("padding", d->paddingFactor()); try { try { result.append("firstExtentDetails", d->firstExtent.ext()->dump()); valid = valid && d->firstExtent.ext()->validates() && d->firstExtent.ext()->xprev.isNull(); } catch (...) { errors << "exception firstextent"; valid = false; } set<DiskLoc> recs; if( scanData ) { shared_ptr<Cursor> c = theDataFileMgr.findAll(ns); int n = 0; int nInvalid = 0; long long len = 0; long long nlen = 0; int outOfOrder = 0; DiskLoc cl_last; while ( c->ok() ) { n++; DiskLoc cl = c->currLoc(); if ( n < 1000000 ) recs.insert(cl); if ( d->isCapped() ) { if ( cl < cl_last ) outOfOrder++; cl_last = cl; } Record *r = c->_current(); len += r->lengthWithHeaders(); nlen += r->netLength(); if (full){ BSONObj obj = BSONObj::make(r); if (!obj.isValid() || !obj.valid()){ // both fast and deep checks valid = false; if (nInvalid == 0) // only log once; errors << "invalid bson object detected (see logs for more info)"; nInvalid++; if (strcmp("_id", obj.firstElementFieldName()) == 0){ try { obj.firstElement().validate(); // throws on error log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl; } catch(...){ log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl; } } else { log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl; } } } c->advance(); } if ( d->isCapped() && !d->capLooped() ) { result.append("cappedOutOfOrder", outOfOrder); if ( outOfOrder > 1 ) { valid = false; errors << "too many out of order records"; } } result.append("objectsFound", n); if (full) { result.append("invalidObjects", nInvalid); } result.appendNumber("bytesWithHeaders", len); result.appendNumber("bytesWithoutHeaders", nlen); } BSONArrayBuilder deletedListArray; for ( int i = 0; i < Buckets; i++ ) { deletedListArray << d->deletedList[i].isNull(); } int ndel = 0; long long delSize = 0; int incorrect = 0; for ( int i = 0; i < Buckets; i++ ) { DiskLoc loc = d->deletedList[i]; try { int k = 0; while ( !loc.isNull() ) { if ( recs.count(loc) ) incorrect++; ndel++; if ( loc.questionable() ) { if( d->isCapped() && !loc.isValid() && i == 1 ) { /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid see comments in namespace.h */ break; } if ( loc.a() <= 0 || strstr(ns, "hudsonSmall") == 0 ) { string err (str::stream() << "bad deleted loc: " << loc.toString() << " bucket:" << i << " k:" << k); errors << err; valid = false; break; } } DeletedRecord *d = loc.drec(); delSize += d->lengthWithHeaders(); loc = d->nextDeleted(); k++; killCurrentOp.checkForInterrupt(); } } catch (...) { errors << ("exception in deleted chain for bucket " + BSONObjBuilder::numStr(i)); valid = false; } } result.appendNumber("deletedCount", ndel); result.appendNumber("deletedSize", delSize); if ( incorrect ) { errors << (BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list"); valid = false; } int idxn = 0; try { result.append("nIndexes", d->nIndexes); BSONObjBuilder indexes; // not using subObjStart to be exception safe NamespaceDetails::IndexIterator i = d->ii(); while( i.more() ) { IndexDetails& id = i.next(); log() << "validating index " << idxn << ": " << id.indexNamespace() << endl; long long keys = id.idxInterface().fullValidate(id.head, id.keyPattern()); indexes.appendNumber(id.indexNamespace(), keys); idxn++; } result.append("keysPerIndex", indexes.done()); } catch (...) { errors << ("exception during index validate idxn " + BSONObjBuilder::numStr(idxn)); valid=false; } } catch (AssertionException) { errors << "exception during validate"; valid = false; } result.appendBool("valid", valid); result.append("errors", errors.arr()); if ( !full ){ result.append("warning", "Some checks omitted for speed. use {full:true} option to do more thorough scan."); } if ( !valid ) { result.append("advice", "ns corrupt, requires repair"); } }
DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents( OperationContext* txn, int lenToAlloc ) { // align size up to a multiple of 4 lenToAlloc = (lenToAlloc + (4-1)) & ~(4-1); freelistAllocs.increment(); DiskLoc loc; { DiskLoc *prev = 0; DiskLoc *bestprev = 0; DiskLoc bestmatch; int bestmatchlen = INT_MAX; // sentinel meaning we haven't found a record big enough int b = bucket(lenToAlloc); DiskLoc cur = _details->deletedListEntry(b); int extra = 5; // look for a better fit, a little. int chain = 0; while ( 1 ) { { // defensive check int fileNumber = cur.a(); int fileOffset = cur.getOfs(); if (fileNumber < -1 || fileNumber >= 100000 || fileOffset < 0) { StringBuilder sb; sb << "Deleted record list corrupted in collection " << _ns << ", bucket " << b << ", link number " << chain << ", invalid link is " << cur.toString() << ", throwing Fatal Assertion"; log() << sb.str() << endl; fassertFailed(16469); } } if ( cur.isNull() ) { // move to next bucket. if we were doing "extra", just break if ( bestmatchlen < INT_MAX ) break; if ( chain > 0 ) { // if we looked at things in the right bucket, but they were not suitable freelistBucketExhausted.increment(); } b++; if ( b > MaxBucket ) { // out of space. alloc a new extent. freelistIterations.increment( 1 + chain ); return DiskLoc(); } cur = _details->deletedListEntry(b); prev = 0; continue; } DeletedRecord *r = drec(cur); if ( r->lengthWithHeaders() >= lenToAlloc && r->lengthWithHeaders() < bestmatchlen ) { bestmatchlen = r->lengthWithHeaders(); bestmatch = cur; bestprev = prev; if (r->lengthWithHeaders() == lenToAlloc) // exact match, stop searching break; } if ( bestmatchlen < INT_MAX && --extra <= 0 ) break; if ( ++chain > 30 && b <= MaxBucket ) { // too slow, force move to next bucket to grab a big chunk //b++; freelistIterations.increment( chain ); chain = 0; cur.Null(); } else { cur = r->nextDeleted(); prev = &r->nextDeleted(); } } // unlink ourself from the deleted list DeletedRecord *bmr = drec(bestmatch); if ( bestprev ) { *txn->recoveryUnit()->writing(bestprev) = bmr->nextDeleted(); } else { // should be the front of a free-list int myBucket = bucket(bmr->lengthWithHeaders()); invariant( _details->deletedListEntry(myBucket) == bestmatch ); _details->setDeletedListEntry(txn, myBucket, bmr->nextDeleted()); } *txn->recoveryUnit()->writing(&bmr->nextDeleted()) = DiskLoc().setInvalid(); // defensive. invariant(bmr->extentOfs() < bestmatch.getOfs()); freelistIterations.increment( 1 + chain ); loc = bestmatch; } if ( loc.isNull() ) return loc; // determine if we should chop up DeletedRecord *r = drec(loc); /* note we want to grab from the front so our next pointers on disk tend to go in a forward direction which is important for performance. */ int regionlen = r->lengthWithHeaders(); invariant( r->extentOfs() < loc.getOfs() ); int left = regionlen - lenToAlloc; if ( left < 24 || left < (lenToAlloc / 8) ) { // you get the whole thing. return loc; } // don't quantize: // - $ collections (indexes) as we already have those aligned the way we want SERVER-8425 if ( _normalCollection ) { // we quantize here so that it only impacts newly sized records // this prevents oddities with older records and space re-use SERVER-8435 lenToAlloc = std::min( r->lengthWithHeaders(), quantizeAllocationSpace( lenToAlloc ) ); left = regionlen - lenToAlloc; if ( left < 24 ) { // you get the whole thing. return loc; } } /* split off some for further use. */ txn->recoveryUnit()->writingInt(r->lengthWithHeaders()) = lenToAlloc; DiskLoc newDelLoc = loc; newDelLoc.inc(lenToAlloc); DeletedRecord* newDel = drec(newDelLoc); DeletedRecord* newDelW = txn->recoveryUnit()->writing(newDel); newDelW->extentOfs() = r->extentOfs(); newDelW->lengthWithHeaders() = left; newDelW->nextDeleted().Null(); addDeletedRec( txn, newDelLoc ); return loc; }
/* must call this on a delete so we clean up the cursors. */ void ClientCursor::aboutToDelete(const DiskLoc& dl) { NoPageFaultsAllowed npfa; recursive_scoped_lock lock(ccmutex); Database *db = cc().database(); verify(db); aboutToDeleteForSharding( db , dl ); CCByLoc& bl = db->ccByLoc; CCByLoc::iterator j = bl.lower_bound(ByLocKey::min(dl)); CCByLoc::iterator stop = bl.upper_bound(ByLocKey::max(dl)); if ( j == stop ) return; vector<ClientCursor*> toAdvance; while ( 1 ) { toAdvance.push_back(j->second); DEV verify( j->first.loc == dl ); ++j; if ( j == stop ) break; } if( toAdvance.size() >= 3000 ) { log() << "perf warning MPW101: " << toAdvance.size() << " cursors for one diskloc " << dl.toString() << ' ' << toAdvance[1000]->_ns << ' ' << toAdvance[2000]->_ns << ' ' << toAdvance[1000]->_pinValue << ' ' << toAdvance[2000]->_pinValue << ' ' << toAdvance[1000]->_pos << ' ' << toAdvance[2000]->_pos << ' ' << toAdvance[1000]->_idleAgeMillis << ' ' << toAdvance[2000]->_idleAgeMillis << ' ' << toAdvance[1000]->_doingDeletes << ' ' << toAdvance[2000]->_doingDeletes << endl; //wassert( toAdvance.size() < 5000 ); } for ( vector<ClientCursor*>::iterator i = toAdvance.begin(); i != toAdvance.end(); ++i ) { ClientCursor* cc = *i; wassert(cc->_db == db); if ( cc->_doingDeletes ) continue; Cursor *c = cc->_c.get(); if ( c->capped() ) { /* note we cannot advance here. if this condition occurs, writes to the oplog have "caught" the reader. skipping ahead, the reader would miss postentially important data. */ delete cc; continue; } c->recoverFromYield(); DiskLoc tmp1 = c->refLoc(); if ( tmp1 != dl ) { // This might indicate a failure to call ClientCursor::prepareToYield() but it can // also happen during correct operation, see SERVER-2009. problem() << "warning: cursor loc " << tmp1 << " does not match byLoc position " << dl << " !" << endl; } else { c->advance(); } while (!c->eof() && c->refLoc() == dl) { /* We don't delete at EOF because we want to return "no more results" rather than "no such cursor". * The loop is to handle MultiKey indexes where the deleted record is pointed to by multiple adjacent keys. * In that case we need to advance until we get to the next distinct record or EOF. * SERVER-4154 * SERVER-5198 * But see SERVER-5725. */ c->advance(); } cc->updateLocation(); } }
void validateNS(const string& ns, Collection* collection, const BSONObj& cmdObj, BSONObjBuilder& result) { const bool full = cmdObj["full"].trueValue(); const bool scanData = full || cmdObj["scandata"].trueValue(); NamespaceDetails* nsd = collection->details(); bool valid = true; BSONArrayBuilder errors; // explanation(s) for why valid = false if ( collection->isCapped() ){ result.append("capped", nsd->isCapped()); result.appendNumber("max", nsd->maxCappedDocs()); } if ( nsd->firstExtent().isNull() ) result.append( "firstExtent", "null" ); else result.append( "firstExtent", str::stream() << nsd->firstExtent().toString() << " ns:" << nsd->firstExtent().ext()->nsDiagnostic.toString()); if ( nsd->lastExtent().isNull() ) result.append( "lastExtent", "null" ); else result.append( "lastExtent", str::stream() << nsd->lastExtent().toString() << " ns:" << nsd->lastExtent().ext()->nsDiagnostic.toString()); BSONArrayBuilder extentData; int extentCount = 0; try { if ( !nsd->firstExtent().isNull() ) { nsd->firstExtent().ext()->assertOk(); nsd->lastExtent().ext()->assertOk(); } DiskLoc extentDiskLoc = nsd->firstExtent(); while (!extentDiskLoc.isNull()) { Extent* thisExtent = extentDiskLoc.ext(); if (full) { extentData << thisExtent->dump(); } if (!thisExtent->validates(extentDiskLoc, &errors)) { valid = false; } DiskLoc nextDiskLoc = thisExtent->xnext; if (extentCount > 0 && !nextDiskLoc.isNull() && nextDiskLoc.ext()->xprev != extentDiskLoc) { StringBuilder sb; sb << "'xprev' pointer " << nextDiskLoc.ext()->xprev.toString() << " in extent " << nextDiskLoc.toString() << " does not point to extent " << extentDiskLoc.toString(); errors << sb.str(); valid = false; } if (nextDiskLoc.isNull() && extentDiskLoc != nsd->lastExtent()) { StringBuilder sb; sb << "'lastExtent' pointer " << nsd->lastExtent().toString() << " does not point to last extent in list " << extentDiskLoc.toString(); errors << sb.str(); valid = false; } extentDiskLoc = nextDiskLoc; extentCount++; killCurrentOp.checkForInterrupt(); } } catch (const DBException& e) { StringBuilder sb; sb << "exception validating extent " << extentCount << ": " << e.what(); errors << sb.str(); valid = false; } result.append("extentCount", extentCount); if ( full ) result.appendArray( "extents" , extentData.arr() ); result.appendNumber("datasize", nsd->dataSize()); result.appendNumber("nrecords", nsd->numRecords()); result.appendNumber("lastExtentSize", nsd->lastExtentSize()); result.appendNumber("padding", nsd->paddingFactor()); try { bool testingLastExtent = false; try { if (nsd->firstExtent().isNull()) { // this is ok } else { result.append("firstExtentDetails", nsd->firstExtent().ext()->dump()); if (!nsd->firstExtent().ext()->xprev.isNull()) { StringBuilder sb; sb << "'xprev' pointer in 'firstExtent' " << nsd->firstExtent().toString() << " is " << nsd->firstExtent().ext()->xprev.toString() << ", should be null"; errors << sb.str(); valid=false; } } testingLastExtent = true; if (nsd->lastExtent().isNull()) { // this is ok } else { if (nsd->firstExtent() != nsd->lastExtent()) { result.append("lastExtentDetails", nsd->lastExtent().ext()->dump()); if (!nsd->lastExtent().ext()->xnext.isNull()) { StringBuilder sb; sb << "'xnext' pointer in 'lastExtent' " << nsd->lastExtent().toString() << " is " << nsd->lastExtent().ext()->xnext.toString() << ", should be null"; errors << sb.str(); valid = false; } } } } catch (const DBException& e) { StringBuilder sb; sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent") << "': " << e.what(); errors << sb.str(); valid = false; } set<DiskLoc> recs; if( scanData ) { int n = 0; int nInvalid = 0; long long nQuantizedSize = 0; long long nPowerOf2QuantizedSize = 0; long long len = 0; long long nlen = 0; long long bsonLen = 0; int outOfOrder = 0; DiskLoc cl_last; DiskLoc cl; Runner::RunnerState state; auto_ptr<Runner> runner(InternalPlanner::collectionScan(ns)); while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &cl))) { n++; if ( n < 1000000 ) recs.insert(cl); if ( nsd->isCapped() ) { if ( cl < cl_last ) outOfOrder++; cl_last = cl; } Record *r = cl.rec(); len += r->lengthWithHeaders(); nlen += r->netLength(); if ( r->lengthWithHeaders() == NamespaceDetails::quantizeAllocationSpace ( r->lengthWithHeaders() ) ) { // Count the number of records having a size consistent with // the quantizeAllocationSpace quantization implementation. ++nQuantizedSize; } if ( r->lengthWithHeaders() == NamespaceDetails::quantizePowerOf2AllocationSpace ( r->lengthWithHeaders() - 1 ) ) { // Count the number of records having a size consistent with the // quantizePowerOf2AllocationSpace quantization implementation. // Because of SERVER-8311, power of 2 quantization is not idempotent and // r->lengthWithHeaders() - 1 must be checked instead of the record // length itself. ++nPowerOf2QuantizedSize; } if (full){ BSONObj obj = BSONObj::make(r); if (!obj.isValid() || !obj.valid()){ // both fast and deep checks valid = false; if (nInvalid == 0) // only log once; errors << "invalid bson object detected (see logs for more info)"; nInvalid++; if (strcmp("_id", obj.firstElementFieldName()) == 0){ try { obj.firstElement().validate(); // throws on error log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl; } catch(...){ log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl; } } else { log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl; } } else { bsonLen += obj.objsize(); } } } if (Runner::RUNNER_EOF != state) { // TODO: more descriptive logging. warning() << "Internal error while reading collection " << ns << endl; } if ( nsd->isCapped() && !nsd->capLooped() ) { result.append("cappedOutOfOrder", outOfOrder); if ( outOfOrder > 1 ) { valid = false; errors << "too many out of order records"; } } result.append("objectsFound", n); if (full) { result.append("invalidObjects", nInvalid); } result.appendNumber("nQuantizedSize", nQuantizedSize); result.appendNumber("nPowerOf2QuantizedSize", nPowerOf2QuantizedSize); result.appendNumber("bytesWithHeaders", len); result.appendNumber("bytesWithoutHeaders", nlen); if (full) { result.appendNumber("bytesBson", bsonLen); } } BSONArrayBuilder deletedListArray; for ( int i = 0; i < Buckets; i++ ) { deletedListArray << nsd->deletedListEntry(i).isNull(); } int ndel = 0; long long delSize = 0; BSONArrayBuilder delBucketSizes; int incorrect = 0; for ( int i = 0; i < Buckets; i++ ) { DiskLoc loc = nsd->deletedListEntry(i); try { int k = 0; while ( !loc.isNull() ) { if ( recs.count(loc) ) incorrect++; ndel++; if ( loc.questionable() ) { if( nsd->isCapped() && !loc.isValid() && i == 1 ) { /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid see comments in namespace.h */ break; } string err( str::stream() << "bad pointer in deleted record list: " << loc.toString() << " bucket: " << i << " k: " << k ); errors << err; valid = false; break; } DeletedRecord *d = loc.drec(); delSize += d->lengthWithHeaders(); loc = d->nextDeleted(); k++; killCurrentOp.checkForInterrupt(); } delBucketSizes << k; } catch (...) { errors << ("exception in deleted chain for bucket " + BSONObjBuilder::numStr(i)); valid = false; } } result.appendNumber("deletedCount", ndel); result.appendNumber("deletedSize", delSize); if ( full ) { result << "delBucketSizes" << delBucketSizes.arr(); } if ( incorrect ) { errors << (BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list"); valid = false; } int idxn = 0; try { IndexCatalog* indexCatalog = collection->getIndexCatalog(); result.append("nIndexes", nsd->getCompletedIndexCount()); BSONObjBuilder indexes; // not using subObjStart to be exception safe NamespaceDetails::IndexIterator i = nsd->ii(); while( i.more() ) { IndexDetails& id = i.next(); log() << "validating index " << idxn << ": " << id.indexNamespace() << endl; IndexDescriptor* descriptor = indexCatalog->getDescriptor( idxn ); verify( descriptor ); IndexAccessMethod* iam = indexCatalog->getIndex( descriptor ); verify( iam ); int64_t keys; iam->validate(&keys); indexes.appendNumber(id.indexNamespace(), static_cast<long long>(keys)); idxn++; } result.append("keysPerIndex", indexes.done()); } catch (...) { errors << ("exception during index validate idxn " + BSONObjBuilder::numStr(idxn)); valid=false; } } catch (AssertionException) { errors << "exception during validate"; valid = false; } result.appendBool("valid", valid); result.append("errors", errors.arr()); if ( !full ){ result.append("warning", "Some checks omitted for speed. use {full:true} option to do more thorough scan."); } if ( !valid ) { result.append("advice", "ns corrupt, requires repair"); } }