StatusWith<RecordId> RecordStoreV1Base::_insertRecord(OperationContext* txn,
                                                      const char* data,
                                                      int len,
                                                      bool enforceQuota) {
    const int lenWHdr = len + MmapV1RecordHeader::HeaderSize;
    const int lenToAlloc = shouldPadInserts() ? quantizeAllocationSpace(lenWHdr) : lenWHdr;
    fassert(17208, lenToAlloc >= lenWHdr);

    StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota);
    if (!loc.isOK())
        return StatusWith<RecordId>(loc.getStatus());

    MmapV1RecordHeader* r = recordFor(loc.getValue());
    fassert(17210, r->lengthWithHeaders() >= lenWHdr);

    // copy the data
    r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr));
    memcpy(r->data(), data, len);

    _addRecordToRecListInExtent(txn, r, loc.getValue());

    _details->incrementStats(txn, r->netLength(), 1);

    return StatusWith<RecordId>(loc.getValue().toRecordId());
}
StatusWith<RecordId> RecordStoreV1Base::insertRecord(OperationContext* txn,
                                                     const DocWriter* doc,
                                                     bool enforceQuota) {
    int docSize = doc->documentSize();
    if (docSize < 4) {
        return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be >= 4 bytes");
    }
    const int lenWHdr = docSize + MmapV1RecordHeader::HeaderSize;
    if (lenWHdr > MaxAllowedAllocation) {
        return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
    }
    const int lenToAlloc =
        (doc->addPadding() && shouldPadInserts()) ? quantizeAllocationSpace(lenWHdr) : lenWHdr;

    StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota);
    if (!loc.isOK())
        return StatusWith<RecordId>(loc.getStatus());

    MmapV1RecordHeader* r = recordFor(loc.getValue());
    fassert(17319, r->lengthWithHeaders() >= lenWHdr);

    r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr));
    doc->writeDocument(r->data());

    _addRecordToRecListInExtent(txn, r, loc.getValue());

    _details->incrementStats(txn, r->netLength(), 1);

    return StatusWith<RecordId>(loc.getValue().toRecordId());
}
bool RecordStoreV1Base::findRecord(OperationContext* txn,
                                   const RecordId& loc,
                                   RecordData* rd) const {
    // this is a bit odd, as the semantics of using the storage engine imply it _has_ to be.
    // And in fact we can't actually check.
    // So we assume the best.
    MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
    if (!rec) {
        return false;
    }
    *rd = rec->toRecordData();
    return true;
}
void RecordStoreV1Base::_addRecordToRecListInExtent(OperationContext* txn,
                                                    MmapV1RecordHeader* r,
                                                    DiskLoc loc) {
    dassert(recordFor(loc) == r);
    DiskLoc extentLoc = _getExtentLocForRecord(txn, loc);
    Extent* e = _getExtent(txn, extentLoc);
    if (e->lastRecord.isNull()) {
        *txn->recoveryUnit()->writing(&e->firstRecord) = loc;
        *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
        r->prevOfs() = r->nextOfs() = DiskLoc::NullOfs;
    } else {
        MmapV1RecordHeader* oldlast = recordFor(e->lastRecord);
        r->prevOfs() = e->lastRecord.getOfs();
        r->nextOfs() = DiskLoc::NullOfs;
        txn->recoveryUnit()->writingInt(oldlast->nextOfs()) = loc.getOfs();
        *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
    }
}
StatusWith<RecordId> RecordStoreV1Base::updateRecord(OperationContext* txn,
                                                     const RecordId& oldLocation,
                                                     const char* data,
                                                     int dataSize,
                                                     bool enforceQuota,
                                                     UpdateNotifier* notifier) {
    MmapV1RecordHeader* oldRecord = recordFor(DiskLoc::fromRecordId(oldLocation));
    if (oldRecord->netLength() >= dataSize) {
        // Make sure to notify other queries before we do an in-place update.
        if (notifier) {
            Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace(txn, oldLocation);
            if (!callbackStatus.isOK())
                return StatusWith<RecordId>(callbackStatus);
        }

        // we fit
        memcpy(txn->recoveryUnit()->writingPtr(oldRecord->data(), dataSize), data, dataSize);
        return StatusWith<RecordId>(oldLocation);
    }

    // We enforce the restriction of unchanging capped doc sizes above the storage layer.
    invariant(!isCapped());

    // we have to move
    if (dataSize + MmapV1RecordHeader::HeaderSize > MaxAllowedAllocation) {
        return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
    }

    StatusWith<RecordId> newLocation = _insertRecord(txn, data, dataSize, enforceQuota);
    if (!newLocation.isOK())
        return newLocation;

    // insert worked, so we delete old record
    if (notifier) {
        Status moveStatus = notifier->recordStoreGoingToMove(
            txn, oldLocation, oldRecord->data(), oldRecord->netLength());
        if (!moveStatus.isOK())
            return StatusWith<RecordId>(moveStatus);
    }

    deleteRecord(txn, oldLocation);

    return newLocation;
}
StatusWith<RecordData> RecordStoreV1Base::updateWithDamages(
    OperationContext* txn,
    const RecordId& loc,
    const RecordData& oldRec,
    const char* damageSource,
    const mutablebson::DamageVector& damages) {
    MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
    char* root = rec->data();

    // All updates were in place. Apply them via durability and writing pointer.
    mutablebson::DamageVector::const_iterator where = damages.begin();
    const mutablebson::DamageVector::const_iterator end = damages.end();
    for (; where != end; ++where) {
        const char* sourcePtr = damageSource + where->sourceOffset;
        void* targetPtr = txn->recoveryUnit()->writingPtr(root + where->targetOffset, where->size);
        std::memcpy(targetPtr, sourcePtr, where->size);
    }

    return rec->toRecordData();
}
示例#7
0
void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                         const DiskLoc extentLoc,
                                         int extentNumber,
                                         RecordStoreCompactAdaptor* adaptor,
                                         const CompactOptions* compactOptions,
                                         CompactStats* stats) {
    log() << "compact begin extent #" << extentNumber << " for namespace " << _ns << " "
          << extentLoc;

    unsigned oldObjSize = 0;  // we'll report what the old padding was
    unsigned oldObjSizeWithPadding = 0;

    Extent* const sourceExtent = _extentManager->getExtent(extentLoc);
    sourceExtent->assertOk();
    fassert(17437, sourceExtent->validates(extentLoc));

    {
        // The next/prev MmapV1RecordHeader pointers within the Extent might not be in order so we
        // first page in the whole Extent sequentially.
        // TODO benchmark on slow storage to verify this is measurably faster.
        log() << "compact paging in len=" << sourceExtent->length / 1000000.0 << "MB" << endl;
        Timer t;
        size_t length = sourceExtent->length;

        touch_pages(reinterpret_cast<const char*>(sourceExtent), length);
        int ms = t.millis();
        if (ms > 1000)
            log() << "compact end paging in " << ms << "ms "
                  << sourceExtent->length / 1000000.0 / t.seconds() << "MB/sec" << endl;
    }

    {
        // Move each MmapV1RecordHeader out of this extent and insert it in to the "new" extents.
        log() << "compact copying records" << endl;
        long long totalNetSize = 0;
        long long nrecords = 0;
        DiskLoc nextSourceLoc = sourceExtent->firstRecord;
        while (!nextSourceLoc.isNull()) {
            txn->checkForInterrupt();

            WriteUnitOfWork wunit(txn);
            MmapV1RecordHeader* recOld = recordFor(nextSourceLoc);
            RecordData oldData = recOld->toRecordData();
            nextSourceLoc = getNextRecordInExtent(txn, nextSourceLoc);

            if (compactOptions->validateDocuments && !adaptor->isDataValid(oldData)) {
                // object is corrupt!
                log() << "compact removing corrupt document!";
                stats->corruptDocuments++;
            } else {
                // How much data is in the record. Excludes padding and MmapV1RecordHeader headers.
                const unsigned rawDataSize = adaptor->dataSize(oldData);

                nrecords++;
                oldObjSize += rawDataSize;
                oldObjSizeWithPadding += recOld->netLength();

                // Allocation sizes include the headers and possibly some padding.
                const unsigned minAllocationSize = rawDataSize + MmapV1RecordHeader::HeaderSize;
                unsigned allocationSize = minAllocationSize;
                switch (compactOptions->paddingMode) {
                    case CompactOptions::NONE:  // default padding
                        if (shouldPadInserts()) {
                            allocationSize = quantizeAllocationSpace(minAllocationSize);
                        }
                        break;

                    case CompactOptions::PRESERVE:  // keep original padding
                        allocationSize = recOld->lengthWithHeaders();
                        break;

                    case CompactOptions::MANUAL:  // user specified how much padding to use
                        allocationSize = compactOptions->computeRecordSize(minAllocationSize);
                        if (allocationSize < minAllocationSize ||
                            allocationSize > BSONObjMaxUserSize / 2) {
                            allocationSize = minAllocationSize;
                        }
                        break;
                }
                invariant(allocationSize >= minAllocationSize);

                // Copy the data to a new record. Because we orphaned the record freelist at the
                // start of the compact, this insert will allocate a record in a new extent.
                // See the comment in compact() for more details.
                CompactDocWriter writer(recOld, rawDataSize, allocationSize);
                StatusWith<RecordId> status = insertRecordWithDocWriter(txn, &writer);
                uassertStatusOK(status.getStatus());
                const MmapV1RecordHeader* newRec =
                    recordFor(DiskLoc::fromRecordId(status.getValue()));
                invariant(unsigned(newRec->netLength()) >= rawDataSize);
                totalNetSize += newRec->netLength();

                // Tells the caller that the record has been moved, so it can do things such as
                // add it to indexes.
                adaptor->inserted(newRec->toRecordData(), status.getValue());
            }

            // Remove the old record from the linked list of records withing the sourceExtent.
            // The old record is not added to the freelist as we will be freeing the whole
            // extent at the end.
            *txn->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
            if (nextSourceLoc.isNull()) {
                // Just moved the last record out of the extent. Mark extent as empty.
                *txn->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
            } else {
                MmapV1RecordHeader* newFirstRecord = recordFor(nextSourceLoc);
                txn->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
            }

            // Adjust the stats to reflect the removal of the old record. The insert above
            // handled adjusting the stats for the new record.
            _details->incrementStats(txn, -(recOld->netLength()), -1);

            wunit.commit();
        }

        // The extent must now be empty.
        invariant(sourceExtent->firstRecord.isNull());
        invariant(sourceExtent->lastRecord.isNull());

        // We are still the first extent, but we must not be the only extent.
        invariant(_details->firstExtent(txn) == extentLoc);
        invariant(_details->lastExtent(txn) != extentLoc);

        // Remove the newly emptied sourceExtent from the extent linked list and return it to
        // the extent manager.
        WriteUnitOfWork wunit(txn);
        const DiskLoc newFirst = sourceExtent->xnext;
        _details->setFirstExtent(txn, newFirst);
        *txn->recoveryUnit()->writing(&_extentManager->getExtent(newFirst)->xprev) = DiskLoc();
        _extentManager->freeExtent(txn, extentLoc);
        wunit.commit();

        {
            const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
                                                 : 1.0;  // defining 0/0 as 1 for this.

            log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                  << " documents (" << totalNetSize / (1024 * 1024.0) << "MB)"
                  << " oldPadding: " << oldPadding;
        }
    }
}
 DiskLoc MmapV1ExtentManager::extentLocForV1( const DiskLoc& loc ) const {
     MmapV1RecordHeader* record = recordForV1( loc );
     return DiskLoc( loc.a(), record->extentOfs() );
 }
Status RecordStoreV1Base::validate(OperationContext* txn,
                                   bool full,
                                   bool scanData,
                                   ValidateAdaptor* adaptor,
                                   ValidateResults* results,
                                   BSONObjBuilder* output) {
    // 1) basic status that require no iteration
    // 2) extent level info
    // 3) check extent start and end
    // 4) check each non-deleted record
    // 5) check deleted list

    // -------------

    // 1111111111111111111
    if (isCapped()) {
        output->appendBool("capped", true);
        output->appendNumber("max", _details->maxCappedDocs());
    }

    output->appendNumber("datasize", _details->dataSize());
    output->appendNumber("nrecords", _details->numRecords());
    output->appendNumber("lastExtentSize", _details->lastExtentSize(txn));

    if (_details->firstExtent(txn).isNull())
        output->append("firstExtent", "null");
    else
        output->append("firstExtent",
                       str::stream()
                           << _details->firstExtent(txn).toString() << " ns:"
                           << _getExtent(txn, _details->firstExtent(txn))->nsDiagnostic.toString());
    if (_details->lastExtent(txn).isNull())
        output->append("lastExtent", "null");
    else
        output->append("lastExtent",
                       str::stream()
                           << _details->lastExtent(txn).toString() << " ns:"
                           << _getExtent(txn, _details->lastExtent(txn))->nsDiagnostic.toString());

    // 22222222222222222222222222
    {  // validate extent basics
        BSONArrayBuilder extentData;
        int extentCount = 0;
        DiskLoc extentDiskLoc;
        try {
            if (!_details->firstExtent(txn).isNull()) {
                _getExtent(txn, _details->firstExtent(txn))->assertOk();
                _getExtent(txn, _details->lastExtent(txn))->assertOk();
            }

            extentDiskLoc = _details->firstExtent(txn);
            while (!extentDiskLoc.isNull()) {
                Extent* thisExtent = _getExtent(txn, extentDiskLoc);
                if (full) {
                    extentData << thisExtent->dump();
                }
                if (!thisExtent->validates(extentDiskLoc, &results->errors)) {
                    results->valid = false;
                }
                DiskLoc nextDiskLoc = thisExtent->xnext;

                if (extentCount > 0 && !nextDiskLoc.isNull() &&
                    _getExtent(txn, nextDiskLoc)->xprev != extentDiskLoc) {
                    StringBuilder sb;
                    sb << "'xprev' pointer " << _getExtent(txn, nextDiskLoc)->xprev.toString()
                       << " in extent " << nextDiskLoc.toString() << " does not point to extent "
                       << extentDiskLoc.toString();
                    results->errors.push_back(sb.str());
                    results->valid = false;
                }
                if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) {
                    StringBuilder sb;
                    sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString()
                       << " does not point to last extent in list " << extentDiskLoc.toString();
                    results->errors.push_back(sb.str());
                    results->valid = false;
                }
                extentDiskLoc = nextDiskLoc;
                extentCount++;
                txn->checkForInterrupt();
            }
        } catch (const DBException& e) {
            StringBuilder sb;
            sb << "exception validating extent " << extentCount << ": " << e.what();
            results->errors.push_back(sb.str());
            results->valid = false;
            return Status::OK();
        }
        output->append("extentCount", extentCount);

        if (full)
            output->appendArray("extents", extentData.arr());
    }

    try {
        // 333333333333333333333333333
        bool testingLastExtent = false;
        try {
            DiskLoc firstExtentLoc = _details->firstExtent(txn);
            if (firstExtentLoc.isNull()) {
                // this is ok
            } else {
                output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump());
                if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) {
                    StringBuilder sb;
                    sb << "'xprev' pointer in 'firstExtent' "
                       << _details->firstExtent(txn).toString() << " is "
                       << _getExtent(txn, firstExtentLoc)->xprev.toString() << ", should be null";
                    results->errors.push_back(sb.str());
                    results->valid = false;
                }
            }
            testingLastExtent = true;
            DiskLoc lastExtentLoc = _details->lastExtent(txn);
            if (lastExtentLoc.isNull()) {
                // this is ok
            } else {
                if (firstExtentLoc != lastExtentLoc) {
                    output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump());
                    if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) {
                        StringBuilder sb;
                        sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString()
                           << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString()
                           << ", should be null";
                        results->errors.push_back(sb.str());
                        results->valid = false;
                    }
                }
            }
        } catch (const DBException& e) {
            StringBuilder sb;
            sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent")
               << "': " << e.what();
            results->errors.push_back(sb.str());
            results->valid = false;
        }

        // 4444444444444444444444444

        set<DiskLoc> recs;
        if (scanData) {
            int n = 0;
            int nInvalid = 0;
            long long nQuantizedSize = 0;
            long long len = 0;
            long long nlen = 0;
            long long bsonLen = 0;
            int outOfOrder = 0;
            DiskLoc dl_last;

            auto cursor = getCursor(txn);
            while (auto record = cursor->next()) {
                const auto dl = DiskLoc::fromRecordId(record->id);
                n++;

                if (n < 1000000)
                    recs.insert(dl);
                if (isCapped()) {
                    if (dl < dl_last)
                        outOfOrder++;
                    dl_last = dl;
                }

                MmapV1RecordHeader* r = recordFor(dl);
                len += r->lengthWithHeaders();
                nlen += r->netLength();

                if (isQuantized(r->lengthWithHeaders())) {
                    // Count the number of records having a size consistent with
                    // the quantizeAllocationSpace quantization implementation.
                    ++nQuantizedSize;
                }

                if (full) {
                    size_t dataSize = 0;
                    const Status status = adaptor->validate(r->toRecordData(), &dataSize);
                    if (!status.isOK()) {
                        results->valid = false;
                        if (nInvalid == 0)  // only log once;
                            results->errors.push_back("invalid object detected (see logs)");

                        nInvalid++;
                        log() << "Invalid object detected in " << _ns << ": " << status.reason();
                    } else {
                        bsonLen += dataSize;
                    }
                }
            }

            if (isCapped() && !_details->capLooped()) {
                output->append("cappedOutOfOrder", outOfOrder);
                if (outOfOrder > 1) {
                    results->valid = false;
                    results->errors.push_back("too many out of order records");
                }
            }
            output->append("objectsFound", n);

            if (full) {
                output->append("invalidObjects", nInvalid);
            }

            output->appendNumber("nQuantizedSize", nQuantizedSize);
            output->appendNumber("bytesWithHeaders", len);
            output->appendNumber("bytesWithoutHeaders", nlen);

            if (full) {
                output->appendNumber("bytesBson", bsonLen);
            }
        }  // end scanData

        // 55555555555555555555555555
        BSONArrayBuilder deletedListArray;
        for (int i = 0; i < Buckets; i++) {
            deletedListArray << _details->deletedListEntry(i).isNull();
        }

        int ndel = 0;
        long long delSize = 0;
        BSONArrayBuilder delBucketSizes;
        int incorrect = 0;
        for (int i = 0; i < Buckets; i++) {
            DiskLoc loc = _details->deletedListEntry(i);
            try {
                int k = 0;
                while (!loc.isNull()) {
                    if (recs.count(loc))
                        incorrect++;
                    ndel++;

                    if (loc.questionable()) {
                        if (isCapped() && !loc.isValid() && i == 1) {
                            /* the constructor for NamespaceDetails intentionally sets
                             * deletedList[1] to invalid see comments in namespace.h
                            */
                            break;
                        }

                        string err(str::stream()
                                   << "bad pointer in deleted record list: " << loc.toString()
                                   << " bucket: " << i << " k: " << k);
                        results->errors.push_back(err);
                        results->valid = false;
                        break;
                    }

                    const DeletedRecord* d = deletedRecordFor(loc);
                    delSize += d->lengthWithHeaders();
                    loc = d->nextDeleted();
                    k++;
                    txn->checkForInterrupt();
                }
                delBucketSizes << k;
            } catch (...) {
                results->errors.push_back((string) "exception in deleted chain for bucket " +
                                          BSONObjBuilder::numStr(i));
                results->valid = false;
            }
        }
        output->appendNumber("deletedCount", ndel);
        output->appendNumber("deletedSize", delSize);
        if (full) {
            output->append("delBucketSizes", delBucketSizes.arr());
        }

        if (incorrect) {
            results->errors.push_back(BSONObjBuilder::numStr(incorrect) +
                                      " records from datafile are in deleted list");
            results->valid = false;
        }

    } catch (AssertionException) {
        results->errors.push_back("exception during validate");
        results->valid = false;
    }

    return Status::OK();
}
void RecordStoreV1Base::deleteRecord(OperationContext* txn, const RecordId& rid) {
    const DiskLoc dl = DiskLoc::fromRecordId(rid);

    MmapV1RecordHeader* todelete = recordFor(dl);
    invariant(todelete->netLength() >= 4);  // this is required for defensive code

    /* remove ourself from the record next/prev chain */
    {
        if (todelete->prevOfs() != DiskLoc::NullOfs) {
            DiskLoc prev = getPrevRecordInExtent(txn, dl);
            MmapV1RecordHeader* prevRecord = recordFor(prev);
            txn->recoveryUnit()->writingInt(prevRecord->nextOfs()) = todelete->nextOfs();
        }

        if (todelete->nextOfs() != DiskLoc::NullOfs) {
            DiskLoc next = getNextRecord(txn, dl);
            MmapV1RecordHeader* nextRecord = recordFor(next);
            txn->recoveryUnit()->writingInt(nextRecord->prevOfs()) = todelete->prevOfs();
        }
    }

    /* remove ourself from extent pointers */
    {
        DiskLoc extentLoc = todelete->myExtentLoc(dl);
        Extent* e = _getExtent(txn, extentLoc);
        if (e->firstRecord == dl) {
            txn->recoveryUnit()->writing(&e->firstRecord);
            if (todelete->nextOfs() == DiskLoc::NullOfs)
                e->firstRecord.Null();
            else
                e->firstRecord.set(dl.a(), todelete->nextOfs());
        }
        if (e->lastRecord == dl) {
            txn->recoveryUnit()->writing(&e->lastRecord);
            if (todelete->prevOfs() == DiskLoc::NullOfs)
                e->lastRecord.Null();
            else
                e->lastRecord.set(dl.a(), todelete->prevOfs());
        }
    }

    /* add to the free list */
    {
        _details->incrementStats(txn, -1 * todelete->netLength(), -1);

        if (_isSystemIndexes) {
            /* temp: if in system.indexes, don't reuse, and zero out: we want to be
               careful until validated more, as IndexDetails has pointers
               to this disk location.  so an incorrectly done remove would cause
               a lot of problems.
            */
            memset(txn->recoveryUnit()->writingPtr(todelete, todelete->lengthWithHeaders()),
                   0,
                   todelete->lengthWithHeaders());
        } else {
            // this is defensive so we can detect if we are still using a location
            // that was deleted
            memset(txn->recoveryUnit()->writingPtr(todelete->data(), 4), 0xee, 4);
            addDeletedRec(txn, dl);
        }
    }
}
示例#11
0
void initializeV1RS(OperationContext* opCtx,
                    const LocAndSize* records,
                    const LocAndSize* drecs,
                    const LocAndSize* legacyGrabBag,
                    DummyExtentManager* em,
                    DummyRecordStoreV1MetaData* md) {
    invariant(records || drecs);  // if both are NULL nothing is being created...

    // Need to start with a blank slate
    invariant(em->numFiles() == 0);
    invariant(md->firstExtent(opCtx).isNull());

    // pre-allocate extents (even extents that aren't part of this RS)
    {
        typedef std::map<int, size_t> ExtentSizes;
        ExtentSizes extentSizes;
        accumulateExtentSizeRequirements(records, &extentSizes);
        accumulateExtentSizeRequirements(drecs, &extentSizes);
        accumulateExtentSizeRequirements(legacyGrabBag, &extentSizes);
        invariant(!extentSizes.empty());

        const int maxExtent = extentSizes.rbegin()->first;
        for (int i = 0; i <= maxExtent; i++) {
            const size_t size = extentSizes.count(i) ? extentSizes[i] : 0;
            const DiskLoc loc = em->allocateExtent(opCtx, md->isCapped(), size, 0);

            // This function and assertState depend on these details of DummyExtentManager
            invariant(loc.a() == i);
            invariant(loc.getOfs() == 0);
        }

        // link together extents that should be part of this RS
        md->setFirstExtent(opCtx, DiskLoc(extentSizes.begin()->first, 0));
        md->setLastExtent(opCtx, DiskLoc(extentSizes.rbegin()->first, 0));
        for (ExtentSizes::iterator it = extentSizes.begin(); boost::next(it) != extentSizes.end();
             /* ++it */) {
            const int a = it->first;
            ++it;
            const int b = it->first;
            em->getExtent(DiskLoc(a, 0))->xnext = DiskLoc(b, 0);
            em->getExtent(DiskLoc(b, 0))->xprev = DiskLoc(a, 0);
        }

        // This signals "done allocating new extents".
        if (md->isCapped())
            md->setDeletedListEntry(opCtx, 1, DiskLoc());
    }

    if (records && !records[0].loc.isNull()) {
        int recIdx = 0;
        DiskLoc extLoc = md->firstExtent(opCtx);
        while (!extLoc.isNull()) {
            Extent* ext = em->getExtent(extLoc);
            int prevOfs = DiskLoc::NullOfs;
            while (extLoc.a() == records[recIdx].loc.a()) {  // for all records in this extent
                const DiskLoc loc = records[recIdx].loc;
                const int size = records[recIdx].size;
                ;
                invariant(size >= MmapV1RecordHeader::HeaderSize);

                md->incrementStats(opCtx, size - MmapV1RecordHeader::HeaderSize, 1);

                if (ext->firstRecord.isNull())
                    ext->firstRecord = loc;

                MmapV1RecordHeader* rec = em->recordForV1(loc);
                rec->lengthWithHeaders() = size;
                rec->extentOfs() = 0;

                rec->prevOfs() = prevOfs;
                prevOfs = loc.getOfs();

                const DiskLoc nextLoc = records[recIdx + 1].loc;
                if (nextLoc.a() == loc.a()) {  // if next is in same extent
                    rec->nextOfs() = nextLoc.getOfs();
                } else {
                    rec->nextOfs() = DiskLoc::NullOfs;
                    ext->lastRecord = loc;
                }

                recIdx++;
            }
            extLoc = ext->xnext;
        }
        invariant(records[recIdx].loc.isNull());
    }

    if (drecs && !drecs[0].loc.isNull()) {
        int drecIdx = 0;
        DiskLoc* prevNextPtr = NULL;
        int lastBucket = -1;
        while (!drecs[drecIdx].loc.isNull()) {
            const DiskLoc loc = drecs[drecIdx].loc;
            const int size = drecs[drecIdx].size;
            invariant(size >= MmapV1RecordHeader::HeaderSize);
            const int bucket = RecordStoreV1Base::bucket(size);

            if (md->isCapped()) {
                // All drecs form a single list in bucket 0
                if (prevNextPtr == NULL) {
                    md->setDeletedListEntry(opCtx, 0, loc);
                } else {
                    *prevNextPtr = loc;
                }

                if (loc.a() < md->capExtent().a() &&
                    drecs[drecIdx + 1].loc.a() == md->capExtent().a()) {
                    // Bucket 1 is known as cappedLastDelRecLastExtent
                    md->setDeletedListEntry(opCtx, 1, loc);
                }
            } else if (bucket != lastBucket) {
                invariant(bucket > lastBucket);  // if this fails, drecs weren't sorted by bucket
                md->setDeletedListEntry(opCtx, bucket, loc);
                lastBucket = bucket;
            } else {
                *prevNextPtr = loc;
            }

            DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
            drec->lengthWithHeaders() = size;
            drec->extentOfs() = 0;
            drec->nextDeleted() = DiskLoc();
            prevNextPtr = &drec->nextDeleted();

            drecIdx++;
        }
    }

    if (legacyGrabBag && !legacyGrabBag[0].loc.isNull()) {
        invariant(!md->isCapped());  // capped should have an empty legacy grab bag.

        int grabBagIdx = 0;
        DiskLoc* prevNextPtr = NULL;
        while (!legacyGrabBag[grabBagIdx].loc.isNull()) {
            const DiskLoc loc = legacyGrabBag[grabBagIdx].loc;
            const int size = legacyGrabBag[grabBagIdx].size;
            invariant(size >= MmapV1RecordHeader::HeaderSize);

            if (grabBagIdx == 0) {
                md->setDeletedListLegacyGrabBag(opCtx, loc);
            } else {
                *prevNextPtr = loc;
            }

            DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
            drec->lengthWithHeaders() = size;
            drec->extentOfs() = 0;
            drec->nextDeleted() = DiskLoc();
            prevNextPtr = &drec->nextDeleted();

            grabBagIdx++;
        }
    }

    // Make sure we set everything up as requested.
    assertStateV1RS(opCtx, records, drecs, legacyGrabBag, em, md);
}