StatusWith<RecordId> RecordStoreV1Base::_insertRecord(OperationContext* txn, const char* data, int len, bool enforceQuota) { const int lenWHdr = len + MmapV1RecordHeader::HeaderSize; const int lenToAlloc = shouldPadInserts() ? quantizeAllocationSpace(lenWHdr) : lenWHdr; fassert(17208, lenToAlloc >= lenWHdr); StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota); if (!loc.isOK()) return StatusWith<RecordId>(loc.getStatus()); MmapV1RecordHeader* r = recordFor(loc.getValue()); fassert(17210, r->lengthWithHeaders() >= lenWHdr); // copy the data r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr)); memcpy(r->data(), data, len); _addRecordToRecListInExtent(txn, r, loc.getValue()); _details->incrementStats(txn, r->netLength(), 1); return StatusWith<RecordId>(loc.getValue().toRecordId()); }
StatusWith<RecordId> RecordStoreV1Base::insertRecord(OperationContext* txn, const DocWriter* doc, bool enforceQuota) { int docSize = doc->documentSize(); if (docSize < 4) { return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be >= 4 bytes"); } const int lenWHdr = docSize + MmapV1RecordHeader::HeaderSize; if (lenWHdr > MaxAllowedAllocation) { return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB"); } const int lenToAlloc = (doc->addPadding() && shouldPadInserts()) ? quantizeAllocationSpace(lenWHdr) : lenWHdr; StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota); if (!loc.isOK()) return StatusWith<RecordId>(loc.getStatus()); MmapV1RecordHeader* r = recordFor(loc.getValue()); fassert(17319, r->lengthWithHeaders() >= lenWHdr); r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr)); doc->writeDocument(r->data()); _addRecordToRecListInExtent(txn, r, loc.getValue()); _details->incrementStats(txn, r->netLength(), 1); return StatusWith<RecordId>(loc.getValue().toRecordId()); }
DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents( OperationContext* txn, int lenToAlloc ) { // align size up to a multiple of 4 lenToAlloc = (lenToAlloc + (4-1)) & ~(4-1); freelistAllocs.increment(); DiskLoc loc; { DiskLoc *prev = 0; DiskLoc *bestprev = 0; DiskLoc bestmatch; int bestmatchlen = INT_MAX; // sentinel meaning we haven't found a record big enough int b = bucket(lenToAlloc); DiskLoc cur = _details->deletedListEntry(b); int extra = 5; // look for a better fit, a little. int chain = 0; while ( 1 ) { { // defensive check int fileNumber = cur.a(); int fileOffset = cur.getOfs(); if (fileNumber < -1 || fileNumber >= 100000 || fileOffset < 0) { StringBuilder sb; sb << "Deleted record list corrupted in collection " << _ns << ", bucket " << b << ", link number " << chain << ", invalid link is " << cur.toString() << ", throwing Fatal Assertion"; log() << sb.str() << endl; fassertFailed(16469); } } if ( cur.isNull() ) { // move to next bucket. if we were doing "extra", just break if ( bestmatchlen < INT_MAX ) break; if ( chain > 0 ) { // if we looked at things in the right bucket, but they were not suitable freelistBucketExhausted.increment(); } b++; if ( b > MaxBucket ) { // out of space. alloc a new extent. freelistIterations.increment( 1 + chain ); return DiskLoc(); } cur = _details->deletedListEntry(b); prev = 0; continue; } DeletedRecord *r = drec(cur); if ( r->lengthWithHeaders() >= lenToAlloc && r->lengthWithHeaders() < bestmatchlen ) { bestmatchlen = r->lengthWithHeaders(); bestmatch = cur; bestprev = prev; if (r->lengthWithHeaders() == lenToAlloc) // exact match, stop searching break; } if ( bestmatchlen < INT_MAX && --extra <= 0 ) break; if ( ++chain > 30 && b <= MaxBucket ) { // too slow, force move to next bucket to grab a big chunk //b++; freelistIterations.increment( chain ); chain = 0; cur.Null(); } else { cur = r->nextDeleted(); prev = &r->nextDeleted(); } } // unlink ourself from the deleted list DeletedRecord *bmr = drec(bestmatch); if ( bestprev ) { *txn->recoveryUnit()->writing(bestprev) = bmr->nextDeleted(); } else { // should be the front of a free-list int myBucket = bucket(bmr->lengthWithHeaders()); invariant( _details->deletedListEntry(myBucket) == bestmatch ); _details->setDeletedListEntry(txn, myBucket, bmr->nextDeleted()); } *txn->recoveryUnit()->writing(&bmr->nextDeleted()) = DiskLoc().setInvalid(); // defensive. invariant(bmr->extentOfs() < bestmatch.getOfs()); freelistIterations.increment( 1 + chain ); loc = bestmatch; } if ( loc.isNull() ) return loc; // determine if we should chop up DeletedRecord *r = drec(loc); /* note we want to grab from the front so our next pointers on disk tend to go in a forward direction which is important for performance. */ int regionlen = r->lengthWithHeaders(); invariant( r->extentOfs() < loc.getOfs() ); int left = regionlen - lenToAlloc; if ( left < 24 || left < (lenToAlloc / 8) ) { // you get the whole thing. return loc; } // don't quantize: // - $ collections (indexes) as we already have those aligned the way we want SERVER-8425 if ( _normalCollection ) { // we quantize here so that it only impacts newly sized records // this prevents oddities with older records and space re-use SERVER-8435 lenToAlloc = std::min( r->lengthWithHeaders(), quantizeAllocationSpace( lenToAlloc ) ); left = regionlen - lenToAlloc; if ( left < 24 ) { // you get the whole thing. return loc; } } /* split off some for further use. */ txn->recoveryUnit()->writingInt(r->lengthWithHeaders()) = lenToAlloc; DiskLoc newDelLoc = loc; newDelLoc.inc(lenToAlloc); DeletedRecord* newDel = drec(newDelLoc); DeletedRecord* newDelW = txn->recoveryUnit()->writing(newDel); newDelW->extentOfs() = r->extentOfs(); newDelW->lengthWithHeaders() = left; newDelW->nextDeleted().Null(); addDeletedRec( txn, newDelLoc ); return loc; }
Status RecordStoreV1Base::validate( OperationContext* txn, bool full, bool scanData, ValidateAdaptor* adaptor, ValidateResults* results, BSONObjBuilder* output ) const { // 1) basic status that require no iteration // 2) extent level info // 3) check extent start and end // 4) check each non-deleted record // 5) check deleted list // ------------- // 1111111111111111111 if ( isCapped() ){ output->appendBool("capped", true); output->appendNumber("max", _details->maxCappedDocs()); } output->appendNumber("datasize", _details->dataSize()); output->appendNumber("nrecords", _details->numRecords()); output->appendNumber("lastExtentSize", _details->lastExtentSize(txn)); output->appendNumber("padding", _details->paddingFactor()); if ( _details->firstExtent(txn).isNull() ) output->append( "firstExtent", "null" ); else output->append( "firstExtent", str::stream() << _details->firstExtent(txn).toString() << " ns:" << _getExtent( txn, _details->firstExtent(txn) )->nsDiagnostic.toString()); if ( _details->lastExtent(txn).isNull() ) output->append( "lastExtent", "null" ); else output->append( "lastExtent", str::stream() << _details->lastExtent(txn).toString() << " ns:" << _getExtent( txn, _details->lastExtent(txn) )->nsDiagnostic.toString()); // 22222222222222222222222222 { // validate extent basics BSONArrayBuilder extentData; int extentCount = 0; DiskLoc extentDiskLoc; try { if ( !_details->firstExtent(txn).isNull() ) { _getExtent( txn, _details->firstExtent(txn) )->assertOk(); _getExtent( txn, _details->lastExtent(txn) )->assertOk(); } extentDiskLoc = _details->firstExtent(txn); while (!extentDiskLoc.isNull()) { Extent* thisExtent = _getExtent( txn, extentDiskLoc ); if (full) { extentData << thisExtent->dump(); } if (!thisExtent->validates(extentDiskLoc, &results->errors)) { results->valid = false; } DiskLoc nextDiskLoc = thisExtent->xnext; if (extentCount > 0 && !nextDiskLoc.isNull() && _getExtent( txn, nextDiskLoc )->xprev != extentDiskLoc) { StringBuilder sb; sb << "'xprev' pointer " << _getExtent( txn, nextDiskLoc )->xprev.toString() << " in extent " << nextDiskLoc.toString() << " does not point to extent " << extentDiskLoc.toString(); results->errors.push_back( sb.str() ); results->valid = false; } if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) { StringBuilder sb; sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString() << " does not point to last extent in list " << extentDiskLoc.toString(); results->errors.push_back( sb.str() ); results->valid = false; } extentDiskLoc = nextDiskLoc; extentCount++; txn->checkForInterrupt(); } } catch (const DBException& e) { StringBuilder sb; sb << "exception validating extent " << extentCount << ": " << e.what(); results->errors.push_back( sb.str() ); results->valid = false; return Status::OK(); } output->append("extentCount", extentCount); if ( full ) output->appendArray( "extents" , extentData.arr() ); } try { // 333333333333333333333333333 bool testingLastExtent = false; try { DiskLoc firstExtentLoc = _details->firstExtent(txn); if (firstExtentLoc.isNull()) { // this is ok } else { output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump()); if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) { StringBuilder sb; sb << "'xprev' pointer in 'firstExtent' " << _details->firstExtent(txn).toString() << " is " << _getExtent(txn, firstExtentLoc)->xprev.toString() << ", should be null"; results->errors.push_back( sb.str() ); results->valid = false; } } testingLastExtent = true; DiskLoc lastExtentLoc = _details->lastExtent(txn); if (lastExtentLoc.isNull()) { // this is ok } else { if (firstExtentLoc != lastExtentLoc) { output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump()); if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) { StringBuilder sb; sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString() << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString() << ", should be null"; results->errors.push_back( sb.str() ); results->valid = false; } } } } catch (const DBException& e) { StringBuilder sb; sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent") << "': " << e.what(); results->errors.push_back( sb.str() ); results->valid = false; } // 4444444444444444444444444 set<DiskLoc> recs; if( scanData ) { int n = 0; int nInvalid = 0; long long nQuantizedSize = 0; long long nPowerOf2QuantizedSize = 0; long long len = 0; long long nlen = 0; long long bsonLen = 0; int outOfOrder = 0; DiskLoc cl_last; scoped_ptr<RecordIterator> iterator( getIterator( txn, DiskLoc(), false, CollectionScanParams::FORWARD ) ); DiskLoc cl; while ( !( cl = iterator->getNext() ).isNull() ) { n++; if ( n < 1000000 ) recs.insert(cl); if ( isCapped() ) { if ( cl < cl_last ) outOfOrder++; cl_last = cl; } Record *r = recordFor(cl); len += r->lengthWithHeaders(); nlen += r->netLength(); if ( r->lengthWithHeaders() == quantizeAllocationSpace( r->lengthWithHeaders() ) ) { // Count the number of records having a size consistent with // the quantizeAllocationSpace quantization implementation. ++nQuantizedSize; } if ( r->lengthWithHeaders() == quantizePowerOf2AllocationSpace( r->lengthWithHeaders() ) ) { // Count the number of records having a size consistent with the // quantizePowerOf2AllocationSpace quantization implementation. ++nPowerOf2QuantizedSize; } if (full){ size_t dataSize = 0; const Status status = adaptor->validate( r->toRecordData(), &dataSize ); if (!status.isOK()) { results->valid = false; if (nInvalid == 0) // only log once; results->errors.push_back( "invalid object detected (see logs)" ); nInvalid++; log() << "Invalid object detected in " << _ns << ": " << status.reason(); } else { bsonLen += dataSize; } } } if ( isCapped() && !_details->capLooped() ) { output->append("cappedOutOfOrder", outOfOrder); if ( outOfOrder > 1 ) { results->valid = false; results->errors.push_back( "too many out of order records" ); } } output->append("objectsFound", n); if (full) { output->append("invalidObjects", nInvalid); } output->appendNumber("nQuantizedSize", nQuantizedSize); output->appendNumber("nPowerOf2QuantizedSize", nPowerOf2QuantizedSize); output->appendNumber("bytesWithHeaders", len); output->appendNumber("bytesWithoutHeaders", nlen); if (full) { output->appendNumber("bytesBson", bsonLen); } } // end scanData // 55555555555555555555555555 BSONArrayBuilder deletedListArray; for ( int i = 0; i < Buckets; i++ ) { deletedListArray << _details->deletedListEntry(i).isNull(); } int ndel = 0; long long delSize = 0; BSONArrayBuilder delBucketSizes; int incorrect = 0; for ( int i = 0; i < Buckets; i++ ) { DiskLoc loc = _details->deletedListEntry(i); try { int k = 0; while ( !loc.isNull() ) { if ( recs.count(loc) ) incorrect++; ndel++; if ( loc.questionable() ) { if( isCapped() && !loc.isValid() && i == 1 ) { /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid see comments in namespace.h */ break; } string err( str::stream() << "bad pointer in deleted record list: " << loc.toString() << " bucket: " << i << " k: " << k ); results->errors.push_back( err ); results->valid = false; break; } const DeletedRecord* d = deletedRecordFor(loc); delSize += d->lengthWithHeaders(); loc = d->nextDeleted(); k++; txn->checkForInterrupt(); } delBucketSizes << k; } catch (...) { results->errors.push_back( (string)"exception in deleted chain for bucket " + BSONObjBuilder::numStr(i) ); results->valid = false; } } output->appendNumber("deletedCount", ndel); output->appendNumber("deletedSize", delSize); if ( full ) { output->append( "delBucketSizes", delBucketSizes.arr() ); } if ( incorrect ) { results->errors.push_back( BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list" ); results->valid = false; } } catch (AssertionException) { results->errors.push_back( "exception during validate" ); results->valid = false; } return Status::OK(); }
bool RecordStoreV1Base::isQuantized(int recordSize) { if (recordSize > MaxAllowedAllocation) return false; return recordSize == quantizeAllocationSpace(recordSize); }