void SimpleRecordStoreV1::addDeletedRec(OperationContext* txn, const DiskLoc& dloc) {
    DeletedRecord* d = drec(dloc);

    DEBUGGING log() << "TEMP: add deleted rec " << dloc.toString() << ' ' << std::hex
                    << d->extentOfs() << endl;

    int b = bucket(d->lengthWithHeaders());
    *txn->recoveryUnit()->writing(&d->nextDeleted()) = _details->deletedListEntry(b);
    _details->setDeletedListEntry(txn, b, dloc);
}
示例#2
0
    /* assumes already zeroed -- insufficient for block 'reuse' perhaps */
    DiskLoc Extent::init(const char *nsname, int _length, int _fileNo, int _offset, bool capped) {
        magic = extentSignature;
        myLoc.set(_fileNo, _offset);
        xnext.Null();
        xprev.Null();
        nsDiagnostic = nsname;
        length = _length;
        firstRecord.Null();
        lastRecord.Null();

        DiskLoc emptyLoc;
        int delRecLength;
        extent_getEmptyLoc(nsname, myLoc, _length, capped, emptyLoc, delRecLength);

        DeletedRecord* empty = getDur().writing(DataFileMgr::getDeletedRecord(emptyLoc));
        empty->lengthWithHeaders() = delRecLength;
        empty->extentOfs() = myLoc.getOfs();
        empty->nextDeleted().Null();
        return emptyLoc;
    }
示例#3
0
    DiskLoc RecordStoreV1Base::_findFirstSpot( OperationContext* txn,
                                               const DiskLoc& extDiskLoc, Extent* e ) {
        DiskLoc emptyLoc = extDiskLoc;
        emptyLoc.inc( Extent::HeaderSize() );
        int delRecLength = e->length - Extent::HeaderSize();
        if ( delRecLength >= 32*1024 && _ns.find('$') != string::npos && !isCapped() ) {
            // probably an index. so skip forward to keep its records page aligned
            int& ofs = emptyLoc.GETOFS();
            int newOfs = (ofs + 0xfff) & ~0xfff;
            delRecLength -= (newOfs-ofs);
            dassert( delRecLength > 0 );
            ofs = newOfs;
        }

        DeletedRecord* empty = txn->recoveryUnit()->writing(drec(emptyLoc));
        empty->lengthWithHeaders() = delRecLength;
        empty->extentOfs() = e->myLoc.getOfs();
        empty->nextDeleted().Null();
        return emptyLoc;

    }
示例#4
0
    DiskLoc Extent::_reuse(const char *nsname, bool capped) {
        LOG(3) << "_reuse extent was:" << nsDiagnostic.toString() << " now:" << nsname << endl;
        if (magic != extentSignature) {
            StringBuilder sb;
            sb << "bad extent signature " << integerToHex(magic)
               << " for namespace '" << nsDiagnostic.toString()
               << "' found in Extent::_reuse";
            msgasserted(10360, sb.str());
        }
        nsDiagnostic = nsname;
        markEmpty();

        DiskLoc emptyLoc;
        int delRecLength;
        extent_getEmptyLoc(nsname, myLoc, length, capped, emptyLoc, delRecLength);

        // todo: some dup code here and below in Extent::init
        DeletedRecord* empty = getDur().writing(DataFileMgr::getDeletedRecord(emptyLoc));
        empty->lengthWithHeaders() = delRecLength;
        empty->extentOfs() = myLoc.getOfs();
        empty->nextDeleted().Null();
        return emptyLoc;
    }
    void initializeV1RS(OperationContext* txn,
                        const LocAndSize* records,
                        const LocAndSize* drecs,
                        DummyExtentManager* em,
                        DummyRecordStoreV1MetaData* md) {
        invariant(records || drecs); // if both are NULL nothing is being created...
        
        // Need to start with a blank slate
        invariant(em->numFiles() == 0);
        invariant(md->firstExtent().isNull());

        // pre-allocate extents (even extents that aren't part of this RS)
        {
            typedef std::map<int, size_t> ExtentSizes;
            ExtentSizes extentSizes;
            accumulateExtentSizeRequirements(records, &extentSizes);
            accumulateExtentSizeRequirements(drecs, &extentSizes);
            invariant(!extentSizes.empty());

            const int maxExtent = extentSizes.rbegin()->first;
            for (int i = 0; i <= maxExtent; i++) {
                const size_t size = extentSizes.count(i) ? extentSizes[i] : 0;
                const DiskLoc loc = em->allocateExtent(txn, md->isCapped(), size, 0);

                // This function and assertState depend on these details of DummyExtentManager
                invariant(loc.a() == i);
                invariant(loc.getOfs() == 0);
            }

            // link together extents that should be part of this RS
            md->setFirstExtent(txn, DiskLoc(extentSizes.begin()->first, 0));
            md->setLastExtent(txn, DiskLoc(extentSizes.rbegin()->first, 0));
            for (ExtentSizes::iterator it = extentSizes.begin();
                    boost::next(it) != extentSizes.end(); /* ++it */ ) {
                const int a = it->first;
                ++it;
                const int b = it->first;
                em->getExtent(DiskLoc(a, 0))->xnext = DiskLoc(b, 0);
                em->getExtent(DiskLoc(b, 0))->xprev = DiskLoc(a, 0);
            }

            // This signals "done allocating new extents".
            if (md->isCapped())
                md->setDeletedListEntry(txn, 1, DiskLoc());
        }

        if (records && !records[0].loc.isNull()) {
            int recIdx = 0;
            DiskLoc extLoc = md->firstExtent();
            while (!extLoc.isNull()) {
                Extent* ext = em->getExtent(extLoc);
                int prevOfs = DiskLoc::NullOfs;
                while (extLoc.a() == records[recIdx].loc.a()) { // for all records in this extent
                    const DiskLoc loc = records[recIdx].loc;
                    const int size = records[recIdx].size;;
                    invariant(size >= Record::HeaderSize);

                    md->incrementStats(txn, size - Record::HeaderSize, 1);

                    if (ext->firstRecord.isNull())
                        ext->firstRecord = loc;

                    Record* rec = em->recordForV1(loc);
                    rec->lengthWithHeaders() = size;
                    rec->extentOfs() = 0;

                    rec->prevOfs() = prevOfs;
                    prevOfs = loc.getOfs();

                    const DiskLoc nextLoc = records[recIdx + 1].loc;
                    if (nextLoc.a() == loc.a()) { // if next is in same extent
                        rec->nextOfs() = nextLoc.getOfs();
                    }
                    else {
                        rec->nextOfs() = DiskLoc::NullOfs;
                        ext->lastRecord = loc;
                    }

                    recIdx++;
                }
                extLoc = ext->xnext;
            }
            invariant(records[recIdx].loc.isNull());
        }

        if (drecs && !drecs[0].loc.isNull()) {
            int drecIdx = 0;
            DiskLoc* prevNextPtr = NULL;
            int lastBucket = -1;
            while (!drecs[drecIdx].loc.isNull()) {
                const DiskLoc loc = drecs[drecIdx].loc;
                const int size = drecs[drecIdx].size;
                invariant(size >= Record::HeaderSize);
                const int bucket = RecordStoreV1Base::bucket(size);

                if (md->isCapped()) {
                    // All drecs form a single list in bucket 0
                    if (prevNextPtr == NULL) {
                        md->setDeletedListEntry(txn, 0, loc);
                    }
                    else {
                        *prevNextPtr = loc;
                    }

                    if (loc.a() < md->capExtent().a()
                            && drecs[drecIdx + 1].loc.a() == md->capExtent().a()) {
                        // Bucket 1 is known as cappedLastDelRecLastExtent
                        md->setDeletedListEntry(txn, 1, loc);
                    }
                } 
                else if (bucket != lastBucket) {
                    invariant(bucket > lastBucket); // if this fails, drecs weren't sorted by bucket
                    md->setDeletedListEntry(txn, bucket, loc);
                    lastBucket = bucket;
                }
                else {
                    *prevNextPtr = loc;
                }

                DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
                drec->lengthWithHeaders() = size;
                drec->extentOfs() = 0;
                drec->nextDeleted() = DiskLoc();
                prevNextPtr = &drec->nextDeleted();

                drecIdx++;
            }
        }

        // Make sure we set everything up as requested.
        assertStateV1RS(records, drecs, em, md);
    }
示例#6
0
DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents(OperationContext* txn, int lenToAllocRaw) {
    // Slowly drain the deletedListLegacyGrabBag by popping one record off and putting it in the
    // correct deleted list each time we try to allocate a new record. This ensures we won't
    // orphan any data when upgrading from old versions, without needing a long upgrade phase.
    // This is done before we try to allocate the new record so we can take advantage of the new
    // space immediately.
    {
        const DiskLoc head = _details->deletedListLegacyGrabBag();
        if (!head.isNull()) {
            _details->setDeletedListLegacyGrabBag(txn, drec(head)->nextDeleted());
            addDeletedRec(txn, head);
        }
    }

    // align size up to a multiple of 4
    const int lenToAlloc = (lenToAllocRaw + (4 - 1)) & ~(4 - 1);

    freelistAllocs.increment();
    DiskLoc loc;
    DeletedRecord* dr = NULL;
    {
        int myBucket;
        for (myBucket = bucket(lenToAlloc); myBucket < Buckets; myBucket++) {
            // Only look at the first entry in each bucket. This works because we are either
            // quantizing or allocating fixed-size blocks.
            const DiskLoc head = _details->deletedListEntry(myBucket);
            if (head.isNull())
                continue;
            DeletedRecord* const candidate = drec(head);
            if (candidate->lengthWithHeaders() >= lenToAlloc) {
                loc = head;
                dr = candidate;
                break;
            }
        }

        if (!dr)
            return DiskLoc();  // no space

        // Unlink ourself from the deleted list
        _details->setDeletedListEntry(txn, myBucket, dr->nextDeleted());
        *txn->recoveryUnit()->writing(&dr->nextDeleted()) = DiskLoc().setInvalid();  // defensive
    }

    invariant(dr->extentOfs() < loc.getOfs());

    // Split the deleted record if it has at least as much left over space as our smallest
    // allocation size. Otherwise, just take the whole DeletedRecord.
    const int remainingLength = dr->lengthWithHeaders() - lenToAlloc;
    if (remainingLength >= bucketSizes[0]) {
        txn->recoveryUnit()->writingInt(dr->lengthWithHeaders()) = lenToAlloc;
        const DiskLoc newDelLoc = DiskLoc(loc.a(), loc.getOfs() + lenToAlloc);
        DeletedRecord* newDel = txn->recoveryUnit()->writing(drec(newDelLoc));
        newDel->extentOfs() = dr->extentOfs();
        newDel->lengthWithHeaders() = remainingLength;
        newDel->nextDeleted().Null();

        addDeletedRec(txn, newDelLoc);
    }

    return loc;
}
    DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents( OperationContext* txn,
                                                            int lenToAlloc ) {
        // align size up to a multiple of 4
        lenToAlloc = (lenToAlloc + (4-1)) & ~(4-1);

        freelistAllocs.increment();
        DiskLoc loc;
        {
            DiskLoc *prev = 0;
            DiskLoc *bestprev = 0;
            DiskLoc bestmatch;
            int bestmatchlen = INT_MAX; // sentinel meaning we haven't found a record big enough
            int b = bucket(lenToAlloc);
            DiskLoc cur = _details->deletedListEntry(b);
            
            int extra = 5; // look for a better fit, a little.
            int chain = 0;
            while ( 1 ) {
                { // defensive check
                    int fileNumber = cur.a();
                    int fileOffset = cur.getOfs();
                    if (fileNumber < -1 || fileNumber >= 100000 || fileOffset < 0) {
                        StringBuilder sb;
                        sb << "Deleted record list corrupted in collection " << _ns
                           << ", bucket " << b
                           << ", link number " << chain
                           << ", invalid link is " << cur.toString()
                           << ", throwing Fatal Assertion";
                        log() << sb.str() << endl;
                        fassertFailed(16469);
                    }
                }
                if ( cur.isNull() ) {
                    // move to next bucket.  if we were doing "extra", just break
                    if ( bestmatchlen < INT_MAX )
                        break;

                    if ( chain > 0 ) {
                        // if we looked at things in the right bucket, but they were not suitable
                        freelistBucketExhausted.increment();
                    }

                    b++;
                    if ( b > MaxBucket ) {
                        // out of space. alloc a new extent.
                        freelistIterations.increment( 1 + chain );
                        return DiskLoc();
                    }
                    cur = _details->deletedListEntry(b);
                    prev = 0;
                    continue;
                }
                DeletedRecord *r = drec(cur);
                if ( r->lengthWithHeaders() >= lenToAlloc &&
                     r->lengthWithHeaders() < bestmatchlen ) {
                    bestmatchlen = r->lengthWithHeaders();
                    bestmatch = cur;
                    bestprev = prev;
                    if (r->lengthWithHeaders() == lenToAlloc)
                        // exact match, stop searching
                        break;
                }
                if ( bestmatchlen < INT_MAX && --extra <= 0 )
                    break;
                if ( ++chain > 30 && b <= MaxBucket ) {
                    // too slow, force move to next bucket to grab a big chunk
                    //b++;
                    freelistIterations.increment( chain );
                    chain = 0;
                    cur.Null();
                }
                else {
                    cur = r->nextDeleted();
                    prev = &r->nextDeleted();
                }
            }

            // unlink ourself from the deleted list
            DeletedRecord *bmr = drec(bestmatch);
            if ( bestprev ) {
                *txn->recoveryUnit()->writing(bestprev) = bmr->nextDeleted();
            }
            else {
                // should be the front of a free-list
                int myBucket = bucket(bmr->lengthWithHeaders());
                invariant( _details->deletedListEntry(myBucket) == bestmatch );
                _details->setDeletedListEntry(txn, myBucket, bmr->nextDeleted());
            }
            *txn->recoveryUnit()->writing(&bmr->nextDeleted()) = DiskLoc().setInvalid(); // defensive.
            invariant(bmr->extentOfs() < bestmatch.getOfs());

            freelistIterations.increment( 1 + chain );
            loc = bestmatch;
        }

        if ( loc.isNull() )
            return loc;

        // determine if we should chop up

        DeletedRecord *r = drec(loc);

        /* note we want to grab from the front so our next pointers on disk tend
        to go in a forward direction which is important for performance. */
        int regionlen = r->lengthWithHeaders();
        invariant( r->extentOfs() < loc.getOfs() );

        int left = regionlen - lenToAlloc;
        if ( left < 24 || left < (lenToAlloc / 8) ) {
            // you get the whole thing.
            return loc;
        }

        // don't quantize:
        //   - $ collections (indexes) as we already have those aligned the way we want SERVER-8425
        if ( _normalCollection ) {
            // we quantize here so that it only impacts newly sized records
            // this prevents oddities with older records and space re-use SERVER-8435
            lenToAlloc = std::min( r->lengthWithHeaders(),
                                   quantizeAllocationSpace( lenToAlloc ) );
            left = regionlen - lenToAlloc;

            if ( left < 24 ) {
                // you get the whole thing.
                return loc;
            }
        }

        /* split off some for further use. */
        txn->recoveryUnit()->writingInt(r->lengthWithHeaders()) = lenToAlloc;
        DiskLoc newDelLoc = loc;
        newDelLoc.inc(lenToAlloc);
        DeletedRecord* newDel = drec(newDelLoc);
        DeletedRecord* newDelW = txn->recoveryUnit()->writing(newDel);
        newDelW->extentOfs() = r->extentOfs();
        newDelW->lengthWithHeaders() = left;
        newDelW->nextDeleted().Null();

        addDeletedRec( txn, newDelLoc );
        return loc;
    }