Example #1
0
    void RecordStoreV1Base::deleteRecord( TransactionExperiment* txn, const DiskLoc& dl ) {

        Record* todelete = recordFor( dl );

        /* remove ourself from the record next/prev chain */
        {
            if ( todelete->prevOfs() != DiskLoc::NullOfs ) {
                DiskLoc prev = getPrevRecordInExtent( dl );
                Record* prevRecord = recordFor( prev );
                txn->writingInt( prevRecord->nextOfs() ) = todelete->nextOfs();
            }

            if ( todelete->nextOfs() != DiskLoc::NullOfs ) {
                DiskLoc next = getNextRecord( dl );
                Record* nextRecord = recordFor( next );
                txn->writingInt( nextRecord->prevOfs() ) = todelete->prevOfs();
            }
        }

        /* remove ourself from extent pointers */
        {
            Extent *e = txn->writing( _getExtent( _getExtentLocForRecord( dl ) ) );
            if ( e->firstRecord == dl ) {
                if ( todelete->nextOfs() == DiskLoc::NullOfs )
                    e->firstRecord.Null();
                else
                    e->firstRecord.set(dl.a(), todelete->nextOfs() );
            }
            if ( e->lastRecord == dl ) {
                if ( todelete->prevOfs() == DiskLoc::NullOfs )
                    e->lastRecord.Null();
                else
                    e->lastRecord.set(dl.a(), todelete->prevOfs() );
            }
        }

        /* add to the free list */
        {
            _details->incrementStats( txn, -1 * todelete->netLength(), -1 );

            if ( _isSystemIndexes ) {
                /* temp: if in system.indexes, don't reuse, and zero out: we want to be
                   careful until validated more, as IndexDetails has pointers
                   to this disk location.  so an incorrectly done remove would cause
                   a lot of problems.
                */
                memset( txn->writingPtr(todelete, todelete->lengthWithHeaders() ),
                        0, todelete->lengthWithHeaders() );
            }
            else {
                DEV {
                    unsigned long long *p = reinterpret_cast<unsigned long long *>( todelete->data() );
                    *txn->writing(p) = 0;
                }
                addDeletedRec(txn, dl);
            }
        }

    }
Example #2
0
    StatusWith<DiskLoc> HeapRecordStore::updateRecord(OperationContext* txn,
                                                      const DiskLoc& loc,
                                                      const char* data,
                                                      int len,
                                                      bool enforceQuota,
                                                      UpdateMoveNotifier* notifier ) {
        HeapRecord* oldRecord = recordFor( loc );
        int oldLen = oldRecord->size;

        if (_isCapped && len > oldLen) {
            return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                        "failing update: objects in a capped ns cannot grow",
                                        10003 );
        }

        HeapRecord newRecord(len);
        memcpy(newRecord.data.get(), data, len);

        txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *oldRecord));
        _data->dataSize += len - oldLen;
        *oldRecord = newRecord;

        cappedDeleteAsNeeded(txn);

        return StatusWith<DiskLoc>(loc);
    }
    StatusWith<DiskLoc> RecordStoreV1Base::_insertRecord( OperationContext* txn,
                                                          const char* data,
                                                          int len,
                                                          bool enforceQuota ) {

        int lenWHdr = getRecordAllocationSize( len + Record::HeaderSize );
        fassert( 17208, lenWHdr >= ( len + Record::HeaderSize ) );

        StatusWith<DiskLoc> loc = allocRecord( txn, lenWHdr, enforceQuota );
        if ( !loc.isOK() )
            return loc;

        Record *r = recordFor( loc.getValue() );
        fassert( 17210, r->lengthWithHeaders() >= lenWHdr );

        // copy the data
        r = reinterpret_cast<Record*>( txn->recoveryUnit()->writingPtr(r, lenWHdr) );
        memcpy( r->data(), data, len );

        _addRecordToRecListInExtent(txn, r, loc.getValue());

        _details->incrementStats( txn, r->netLength(), 1 );

        return loc;
    }
    StatusWith<DiskLoc> RecordStoreV1Base::insertRecord( OperationContext* txn,
                                                         const DocWriter* doc,
                                                         bool enforceQuota ) {
        int docSize = doc->documentSize();
        if ( docSize < 4 ) {
            return StatusWith<DiskLoc>( ErrorCodes::InvalidLength,
                                        "record has to be >= 4 bytes" );
        }
        int lenWHdr = docSize + Record::HeaderSize;
        if ( doc->addPadding() )
            lenWHdr = getRecordAllocationSize( lenWHdr );

        StatusWith<DiskLoc> loc = allocRecord( txn, lenWHdr, enforceQuota );
        if ( !loc.isOK() )
            return loc;

        Record *r = recordFor( loc.getValue() );
        fassert( 17319, r->lengthWithHeaders() >= lenWHdr );

        r = reinterpret_cast<Record*>( txn->recoveryUnit()->writingPtr(r, lenWHdr) );
        doc->writeDocument( r->data() );

        _addRecordToRecListInExtent(txn, r, loc.getValue());

        _details->incrementStats( txn, r->netLength(), 1 );

        _paddingFits( txn );

        return loc;
    }
Example #5
0
    Status HeapRecordStore::updateWithDamages( OperationContext* txn,
                                               const DiskLoc& loc,
                                               const RecordData& oldRec,
                                               const char* damageSource,
                                               const mutablebson::DamageVector& damages ) {
        HeapRecord* oldRecord = recordFor( loc );
        const int len = oldRecord->size;

        HeapRecord newRecord(len);
        memcpy(newRecord.data.get(), oldRecord->data.get(), len);

        txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *oldRecord));
        *oldRecord = newRecord;

        cappedDeleteAsNeeded(txn);

        char* root = newRecord.data.get();
        mutablebson::DamageVector::const_iterator where = damages.begin();
        const mutablebson::DamageVector::const_iterator end = damages.end();
        for( ; where != end; ++where ) {
            const char* sourcePtr = damageSource + where->sourceOffset;
            char* targetPtr = root + where->targetOffset;
            std::memcpy(targetPtr, sourcePtr, where->size);
        }

        *oldRecord = newRecord;

        return Status::OK();
    }
StatusWith<RecordId> RecordStoreV1Base::_insertRecord(OperationContext* txn,
                                                      const char* data,
                                                      int len,
                                                      bool enforceQuota) {
    const int lenWHdr = len + MmapV1RecordHeader::HeaderSize;
    const int lenToAlloc = shouldPadInserts() ? quantizeAllocationSpace(lenWHdr) : lenWHdr;
    fassert(17208, lenToAlloc >= lenWHdr);

    StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota);
    if (!loc.isOK())
        return StatusWith<RecordId>(loc.getStatus());

    MmapV1RecordHeader* r = recordFor(loc.getValue());
    fassert(17210, r->lengthWithHeaders() >= lenWHdr);

    // copy the data
    r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr));
    memcpy(r->data(), data, len);

    _addRecordToRecListInExtent(txn, r, loc.getValue());

    _details->incrementStats(txn, r->netLength(), 1);

    return StatusWith<RecordId>(loc.getValue().toRecordId());
}
StatusWith<RecordId> RecordStoreV1Base::insertRecord(OperationContext* txn,
                                                     const DocWriter* doc,
                                                     bool enforceQuota) {
    int docSize = doc->documentSize();
    if (docSize < 4) {
        return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be >= 4 bytes");
    }
    const int lenWHdr = docSize + MmapV1RecordHeader::HeaderSize;
    if (lenWHdr > MaxAllowedAllocation) {
        return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
    }
    const int lenToAlloc =
        (doc->addPadding() && shouldPadInserts()) ? quantizeAllocationSpace(lenWHdr) : lenWHdr;

    StatusWith<DiskLoc> loc = allocRecord(txn, lenToAlloc, enforceQuota);
    if (!loc.isOK())
        return StatusWith<RecordId>(loc.getStatus());

    MmapV1RecordHeader* r = recordFor(loc.getValue());
    fassert(17319, r->lengthWithHeaders() >= lenWHdr);

    r = reinterpret_cast<MmapV1RecordHeader*>(txn->recoveryUnit()->writingPtr(r, lenWHdr));
    doc->writeDocument(r->data());

    _addRecordToRecListInExtent(txn, r, loc.getValue());

    _details->incrementStats(txn, r->netLength(), 1);

    return StatusWith<RecordId>(loc.getValue().toRecordId());
}
void RecordStoreV1Base::IntraExtentIterator::advance() {
    if (_curr.isNull())
        return;

    const MmapV1RecordHeader* rec = recordFor(_curr);
    const int nextOfs = _forward ? rec->nextOfs() : rec->prevOfs();
    _curr = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(_curr.a(), nextOfs));
}
 void RecordStoreV1Base::_addRecordToRecListInExtent(OperationContext* txn,
                                                     Record *r,
                                                     DiskLoc loc) {
     dassert( recordFor(loc) == r );
     Extent *e = _getExtent( _getExtentLocForRecord( loc ) );
     if ( e->lastRecord.isNull() ) {
         *txn->recoveryUnit()->writing(&e->firstRecord) = loc;
         *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
         r->prevOfs() = r->nextOfs() = DiskLoc::NullOfs;
     }
     else {
         Record *oldlast = recordFor(e->lastRecord);
         r->prevOfs() = e->lastRecord.getOfs();
         r->nextOfs() = DiskLoc::NullOfs;
         txn->recoveryUnit()->writingInt(oldlast->nextOfs()) = loc.getOfs();
         *txn->recoveryUnit()->writing(&e->lastRecord) = loc;
     }
 }
Example #10
0
    DiskLoc RecordStoreV1Base::getNextRecordInExtent( const DiskLoc& loc ) const {
        int nextOffset = recordFor( loc )->nextOfs();

        if ( nextOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 17441, abs(nextOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), nextOffset );
    }
Example #11
0
    DiskLoc ExtentManager::getNextRecordInExtent( const DiskLoc& loc ) const {
        int nextOffset = recordFor( loc )->nextOfs();

        if ( nextOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 16967, abs(nextOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), nextOffset );
    }
Example #12
0
    DiskLoc ExtentManager::getPrevRecordInExtent( const DiskLoc& loc ) const {
        int prevOffset = recordFor( loc )->prevOfs();

        if ( prevOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 16968, abs(prevOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), prevOffset );
    }
    DiskLoc RecordStoreV1Base::getPrevRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const {
        int prevOffset = recordFor( loc )->prevOfs();

        if ( prevOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 17442, abs(prevOffset) >= 8 ); // defensive
        DiskLoc result( loc.a(), prevOffset );
        return result;
    }
Example #14
0
    DiskLoc RecordStoreV1Base::getPrevRecordInExtent( const DiskLoc& loc ) const {
        int prevOffset = recordFor( loc )->prevOfs();

        if ( prevOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 17442, abs(prevOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), prevOffset );

    }
    DiskLoc RecordStoreV1Base::IntraExtentIterator::getNext() {
        if (_curr.isNull())
            return DiskLoc();

        const DiskLoc out = _curr; // we always return where we were, not where we will be.
        const Record* rec = recordFor(_curr);
        const int nextOfs = _forward ? rec->nextOfs() : rec->prevOfs();
        _curr = (nextOfs == DiskLoc::NullOfs ? DiskLoc() : DiskLoc(_curr.a(), nextOfs));
        return out;
    }
Example #16
0
 bool RecordStoreV1Base::findRecord( OperationContext* txn,
                                     const DiskLoc& loc, RecordData* rd ) const {
     // this is a bit odd, as the semantics of using the storage engine imply it _has_ to be.
     // And in fact we can't actually check.
     // So we assume the best.
     Record* rec = recordFor(loc);
     if ( !rec ) {
         return false;
     }
     *rd = rec->toRecordData();
     return true;
 }
Example #17
0
    Status HeapRecordStore::updateWithDamages( OperationContext* txn,
                                               const DiskLoc& loc,
                                               const char* damangeSource,
                                               const mutablebson::DamageVector& damages ) {
        HeapRecord* rec = recordFor( loc );
        char* root = rec->data();

        // All updates were in place. Apply them via durability and writing pointer.
        mutablebson::DamageVector::const_iterator where = damages.begin();
        const mutablebson::DamageVector::const_iterator end = damages.end();
        for( ; where != end; ++where ) {
            const char* sourcePtr = damangeSource + where->sourceOffset;
            char* targetPtr = root + where->targetOffset;
            std::memcpy(targetPtr, sourcePtr, where->size);
        }

        return Status::OK();
    }
StatusWith<RecordId> RecordStoreV1Base::updateRecord(OperationContext* txn,
                                                     const RecordId& oldLocation,
                                                     const char* data,
                                                     int dataSize,
                                                     bool enforceQuota,
                                                     UpdateNotifier* notifier) {
    MmapV1RecordHeader* oldRecord = recordFor(DiskLoc::fromRecordId(oldLocation));
    if (oldRecord->netLength() >= dataSize) {
        // Make sure to notify other queries before we do an in-place update.
        if (notifier) {
            Status callbackStatus = notifier->recordStoreGoingToUpdateInPlace(txn, oldLocation);
            if (!callbackStatus.isOK())
                return StatusWith<RecordId>(callbackStatus);
        }

        // we fit
        memcpy(txn->recoveryUnit()->writingPtr(oldRecord->data(), dataSize), data, dataSize);
        return StatusWith<RecordId>(oldLocation);
    }

    // We enforce the restriction of unchanging capped doc sizes above the storage layer.
    invariant(!isCapped());

    // we have to move
    if (dataSize + MmapV1RecordHeader::HeaderSize > MaxAllowedAllocation) {
        return StatusWith<RecordId>(ErrorCodes::InvalidLength, "record has to be <= 16.5MB");
    }

    StatusWith<RecordId> newLocation = _insertRecord(txn, data, dataSize, enforceQuota);
    if (!newLocation.isOK())
        return newLocation;

    // insert worked, so we delete old record
    if (notifier) {
        Status moveStatus = notifier->recordStoreGoingToMove(
            txn, oldLocation, oldRecord->data(), oldRecord->netLength());
        if (!moveStatus.isOK())
            return StatusWith<RecordId>(moveStatus);
    }

    deleteRecord(txn, oldLocation);

    return newLocation;
}
StatusWith<RecordData> RecordStoreV1Base::updateWithDamages(
    OperationContext* txn,
    const RecordId& loc,
    const RecordData& oldRec,
    const char* damageSource,
    const mutablebson::DamageVector& damages) {
    MmapV1RecordHeader* rec = recordFor(DiskLoc::fromRecordId(loc));
    char* root = rec->data();

    // All updates were in place. Apply them via durability and writing pointer.
    mutablebson::DamageVector::const_iterator where = damages.begin();
    const mutablebson::DamageVector::const_iterator end = damages.end();
    for (; where != end; ++where) {
        const char* sourcePtr = damageSource + where->sourceOffset;
        void* targetPtr = txn->recoveryUnit()->writingPtr(root + where->targetOffset, where->size);
        std::memcpy(targetPtr, sourcePtr, where->size);
    }

    return rec->toRecordData();
}
    StatusWith<DiskLoc> RecordStoreV1Base::updateRecord( OperationContext* txn,
                                                         const DiskLoc& oldLocation,
                                                         const char* data,
                                                         int dataSize,
                                                         bool enforceQuota,
                                                         UpdateMoveNotifier* notifier ) {
        Record* oldRecord = recordFor( oldLocation );
        if ( oldRecord->netLength() >= dataSize ) {
            // we fit
            _paddingFits( txn );
            memcpy( txn->recoveryUnit()->writingPtr( oldRecord->data(), dataSize ), data, dataSize );
            return StatusWith<DiskLoc>( oldLocation );
        }

        if ( isCapped() )
            return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                        "failing update: objects in a capped ns cannot grow",
                                        10003 );

        // we have to move

        _paddingTooSmall( txn );

        StatusWith<DiskLoc> newLocation = _insertRecord( txn, data, dataSize, enforceQuota );
        if ( !newLocation.isOK() )
            return newLocation;

        // insert worked, so we delete old record
        if ( notifier ) {
            Status moveStatus = notifier->recordStoreGoingToMove( txn,
                                                                  oldLocation,
                                                                  oldRecord->data(),
                                                                  oldRecord->netLength() );
            if ( !moveStatus.isOK() )
                return StatusWith<DiskLoc>( moveStatus );
        }

        deleteRecord( txn, oldLocation );

        return newLocation;
    }
Example #21
0
    StatusWith<DiskLoc> HeapRecordStore::updateRecord(OperationContext* txn,
                                                      const DiskLoc& oldLocation,
                                                      const char* data,
                                                      int len,
                                                      bool enforceQuota,
                                                      UpdateMoveNotifier* notifier ) {
        HeapRecord* oldRecord = recordFor( oldLocation );
        int oldLen = oldRecord->netLength();

        // If the length of the new data is <= the length of the old data then just
        // memcopy into the old space
        if ( len <= oldLen) {
            memcpy(oldRecord->data(), data, len);
            _dataSize += len - oldLen;
            return StatusWith<DiskLoc>(oldLocation);
        }

        if ( _isCapped ) {
            return StatusWith<DiskLoc>( ErrorCodes::InternalError,
                                        "failing update: objects in a capped ns cannot grow",
                                        10003 );
        }

        // If the length of the new data exceeds the size of the old Record, we need to allocate
        // a new Record, and delete the old one

        const int lengthWithHeaders = len + HeapRecord::HeaderSize;
        boost::shared_array<char> buf(new char[lengthWithHeaders]);
        HeapRecord* rec = reinterpret_cast<HeapRecord*>(buf.get());
        rec->lengthWithHeaders() = lengthWithHeaders;
        memcpy(rec->data(), data, len);

        _records[oldLocation] = buf;
        _dataSize += len - oldLen;

        cappedDeleteAsNeeded(txn);

        return StatusWith<DiskLoc>(oldLocation);
    }
Example #22
0
    StatusWith<DiskLoc> RecordStoreV1Base::insertRecord( TransactionExperiment* txn,
                                                         const DocWriter* doc,
                                                         int quotaMax ) {
        int lenWHdr = doc->documentSize() + Record::HeaderSize;
        if ( doc->addPadding() )
            lenWHdr = getRecordAllocationSize( lenWHdr );

        StatusWith<DiskLoc> loc = allocRecord( txn, lenWHdr, quotaMax );
        if ( !loc.isOK() )
            return loc;

        Record *r = recordFor( loc.getValue() );
        fassert( 17319, r->lengthWithHeaders() >= lenWHdr );

        r = reinterpret_cast<Record*>( txn->writingPtr(r, lenWHdr) );
        doc->writeDocument( r->data() );

        _addRecordToRecListInExtent(txn, r, loc.getValue());

        _details->incrementStats( txn, r->netLength(), 1 );

        return loc;
    }
    void RecordStoreV1Base::deleteRecord( OperationContext* txn, const DiskLoc& dl ) {

        Record* todelete = recordFor( dl );
        invariant( todelete->netLength() >= 4 ); // this is required for defensive code

        /* remove ourself from the record next/prev chain */
        {
            if ( todelete->prevOfs() != DiskLoc::NullOfs ) {
                DiskLoc prev = getPrevRecordInExtent( txn, dl );
                Record* prevRecord = recordFor( prev );
                txn->recoveryUnit()->writingInt( prevRecord->nextOfs() ) = todelete->nextOfs();
            }

            if ( todelete->nextOfs() != DiskLoc::NullOfs ) {
                DiskLoc next = getNextRecord( txn, dl );
                Record* nextRecord = recordFor( next );
                txn->recoveryUnit()->writingInt( nextRecord->prevOfs() ) = todelete->prevOfs();
            }
        }

        /* remove ourself from extent pointers */
        {
            DiskLoc extentLoc = todelete->myExtentLoc(dl);
            Extent *e =  _getExtent( txn, extentLoc );
            if ( e->firstRecord == dl ) {
                txn->recoveryUnit()->writing(&e->firstRecord);
                if ( todelete->nextOfs() == DiskLoc::NullOfs )
                    e->firstRecord.Null();
                else
                    e->firstRecord.set(dl.a(), todelete->nextOfs() );
            }
            if ( e->lastRecord == dl ) {
                txn->recoveryUnit()->writing(&e->lastRecord);
                if ( todelete->prevOfs() == DiskLoc::NullOfs )
                    e->lastRecord.Null();
                else
                    e->lastRecord.set(dl.a(), todelete->prevOfs() );
            }
        }

        /* add to the free list */
        {
            _details->incrementStats( txn, -1 * todelete->netLength(), -1 );

            if ( _isSystemIndexes ) {
                /* temp: if in system.indexes, don't reuse, and zero out: we want to be
                   careful until validated more, as IndexDetails has pointers
                   to this disk location.  so an incorrectly done remove would cause
                   a lot of problems.
                */
                memset( txn->recoveryUnit()->writingPtr(todelete, todelete->lengthWithHeaders() ),
                        0, todelete->lengthWithHeaders() );
            }
            else {
                // this is defensive so we can detect if we are still using a location
                // that was deleted
                memset(txn->recoveryUnit()->writingPtr(todelete->data(), 4), 0xee, 4);
                addDeletedRec(txn, dl);
            }
        }

    }
Example #24
0
 Extent* ExtentManager::extentFor( const DiskLoc& loc ) const {
     Record* record = recordFor( loc );
     DiskLoc extentLoc( loc.a(), record->extentOfs() );
     return getExtent( extentLoc );
 }
    void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                             const DiskLoc extentLoc,
                                             int extentNumber,
                                             RecordStoreCompactAdaptor* adaptor,
                                             const CompactOptions* compactOptions,
                                             CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << extentLoc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent* const sourceExtent = _extentManager->getExtent( extentLoc );
        sourceExtent->assertOk();
        fassert( 17437, sourceExtent->validates(extentLoc) );

        {
            // The next/prev Record pointers within the Extent might not be in order so we first
            // page in the whole Extent sequentially.
            // TODO benchmark on slow storage to verify this is measurably faster.
            log() << "compact paging in len=" << sourceExtent->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = sourceExtent->length;

            touch_pages( reinterpret_cast<const char*>(sourceExtent), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << sourceExtent->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            // Move each Record out of this extent and insert it in to the "new" extents.
            log() << "compact copying records" << endl;
            long long totalNetSize = 0;
            long long nrecords = 0;
            DiskLoc nextSourceLoc = sourceExtent->firstRecord;
            while (!nextSourceLoc.isNull()) {
                txn->checkForInterrupt();

                WriteUnitOfWork wunit(txn);
                Record* recOld = recordFor(nextSourceLoc);
                RecordData oldData = recOld->toRecordData();
                nextSourceLoc = getNextRecordInExtent(txn, nextSourceLoc);

                if ( compactOptions->validateDocuments && !adaptor->isDataValid( oldData ) ) {
                    // object is corrupt!
                    log() << "compact removing corrupt document!";
                    stats->corruptDocuments++;
                }
                else {
                    // How much data is in the record. Excludes padding and Record headers.
                    const unsigned rawDataSize = adaptor->dataSize( oldData );

                    nrecords++;
                    oldObjSize += rawDataSize;
                    oldObjSizeWithPadding += recOld->netLength();

                    // Allocation sizes include the headers and possibly some padding.
                    const unsigned minAllocationSize = rawDataSize + Record::HeaderSize;
                    unsigned allocationSize = minAllocationSize;
                    switch( compactOptions->paddingMode ) {
                    case CompactOptions::NONE: // no padding, unless using powerOf2Sizes
                        if ( _details->isUserFlagSet(Flag_UsePowerOf2Sizes) )
                            allocationSize = quantizePowerOf2AllocationSpace(minAllocationSize);
                        else
                            allocationSize = minAllocationSize;
                        break;

                    case CompactOptions::PRESERVE: // keep original padding
                        allocationSize = recOld->lengthWithHeaders();
                        break;

                    case CompactOptions::MANUAL: // user specified how much padding to use
                        allocationSize = compactOptions->computeRecordSize(minAllocationSize);
                        if (allocationSize < minAllocationSize
                                || allocationSize > BSONObjMaxUserSize / 2 ) {
                            allocationSize = minAllocationSize;
                        }
                        break;
                    }
                    invariant(allocationSize >= minAllocationSize);

                    // Copy the data to a new record. Because we orphaned the record freelist at the
                    // start of the compact, this insert will allocate a record in a new extent.
                    // See the comment in compact() for more details.
                    CompactDocWriter writer( recOld, rawDataSize, allocationSize );
                    StatusWith<DiskLoc> status = insertRecord( txn, &writer, false );
                    uassertStatusOK( status.getStatus() );
                    const Record* newRec = recordFor(status.getValue());
                    invariant(unsigned(newRec->netLength()) >= rawDataSize);
                    totalNetSize += newRec->netLength();

                    // Tells the caller that the record has been moved, so it can do things such as
                    // add it to indexes.
                    adaptor->inserted(newRec->toRecordData(), status.getValue());
                }

                // Remove the old record from the linked list of records withing the sourceExtent.
                // The old record is not added to the freelist as we will be freeing the whole
                // extent at the end.
                *txn->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
                if (nextSourceLoc.isNull()) {
                    // Just moved the last record out of the extent. Mark extent as empty.
                    *txn->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
                }
                else {
                    Record* newFirstRecord = recordFor(nextSourceLoc);
                    txn->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
                }

                // Adjust the stats to reflect the removal of the old record. The insert above
                // handled adjusting the stats for the new record.
                _details->incrementStats(txn, -(recOld->netLength()), -1);

                wunit.commit();
            }

            // The extent must now be empty.
            invariant(sourceExtent->firstRecord.isNull());
            invariant(sourceExtent->lastRecord.isNull());

            // We are still the first extent, but we must not be the only extent.
            invariant( _details->firstExtent(txn) == extentLoc );
            invariant( _details->lastExtent(txn) != extentLoc );

            // Remove the newly emptied sourceExtent from the extent linked list and return it to
            // the extent manager.
            WriteUnitOfWork wunit(txn);
            const DiskLoc newFirst = sourceExtent->xnext;
            _details->setFirstExtent( txn, newFirst );
            *txn->recoveryUnit()->writing(&_extentManager->getExtent( newFirst )->xprev) = DiskLoc();
            _extentManager->freeExtent( txn, extentLoc );
            wunit.commit();

            {
                const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
                                                     : 1.0; // defining 0/0 as 1 for this.

                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << totalNetSize / (1024*1024.0) << "MB)"
                      << " oldPadding: " << oldPadding;
            }
        }

    }
Example #26
0
 void HeapRecordStore::deleteRecord(OperationContext* txn, const DiskLoc& loc) {
     HeapRecord* rec = recordFor(loc);
     _dataSize -= rec->netLength();
     invariant(_records.erase(loc) == 1);
 }
Example #27
0
 DiskLoc ExtentManager::extentLocFor( const DiskLoc& loc ) const {
     Record* record = recordFor( loc );
     return DiskLoc( loc.a(), record->extentOfs() );
 }
Example #28
0
 void HeapRecordStore::deleteRecord(OperationContext* txn, const DiskLoc& loc) {
     HeapRecord* rec = recordFor(loc);
     txn->recoveryUnit()->registerChange(new RemoveChange(_data, loc, *rec));
     _data->dataSize -= rec->size;
     invariant(_data->records.erase(loc) == 1);
 }
Example #29
0
    void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                             const DiskLoc diskloc,
                                             int extentNumber,
                                             RecordStoreCompactAdaptor* adaptor,
                                             const CompactOptions* compactOptions,
                                             CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << diskloc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = _extentManager->getExtent( diskloc );
        e->assertOk();
        fassert( 17437, e->validates(diskloc) );

        {
            // the next/prev pointers within the extent might not be in order so we first
            // page the whole thing in sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = e->length;

            touch_pages( reinterpret_cast<const char*>(e), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << e->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = recordFor(L);
                    L = getNextRecordInExtent(L);

                    if ( compactOptions->validateDocuments && !adaptor->isDataValid(recOld) ) {
                        // object is corrupt!
                        log() << "compact skipping corrupt document!";
                        stats->corruptDocuments++;
                    }
                    else {
                        unsigned dataSize = adaptor->dataSize( recOld );
                        unsigned docSize = dataSize;

                        nrecords++;
                        oldObjSize += docSize;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = docSize + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;

                        switch( compactOptions->paddingMode ) {
                        case CompactOptions::NONE:
                            if ( _details->isUserFlagSet(Flag_UsePowerOf2Sizes) )
                                lenWPadding = quantizePowerOf2AllocationSpace(lenWPadding);
                            break;
                        case CompactOptions::PRESERVE:
                            // if we are preserving the padding, the record should not change size
                            lenWPadding = recOld->lengthWithHeaders();
                            break;
                        case CompactOptions::MANUAL:
                            lenWPadding = compactOptions->computeRecordSize(lenWPadding);
                            if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) {
                                lenWPadding = lenWHdr;
                            }
                            break;
                        }

                        CompactDocWriter writer( recOld, dataSize, lenWPadding );
                        StatusWith<DiskLoc> status = insertRecord( txn, &writer, 0 );
                        uassertStatusOK( status.getStatus() );
                        datasize += recordFor( status.getValue() )->netLength();

                        adaptor->inserted( recordFor( status.getValue() ), status.getValue() );
                    }

                    if( L.isNull() ) {
                        // we just did the very last record from the old extent.  it's still pointed to
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = !txn->checkForInterruptNoAssert().isOK();
                    if( stopping || txn->recoveryUnit()->isCommitNeeded() ) {
                        *txn->recoveryUnit()->writing(&e->firstRecord) = L;
                        Record *r = recordFor(L);
                        txn->recoveryUnit()->writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        txn->recoveryUnit()->commitIfNeeded();
                        txn->checkForInterrupt();
                    }
                }
            } // if !L.isNull()

            invariant( _details->firstExtent() == diskloc );
            invariant( _details->lastExtent() != diskloc );
            DiskLoc newFirst = e->xnext;
            _details->setFirstExtent( txn, newFirst );
            *txn->recoveryUnit()->writing(&_extentManager->getExtent( newFirst )->xprev) = DiskLoc();
            _extentManager->freeExtent( txn, diskloc );

            txn->recoveryUnit()->commitIfNeeded();

            {
                double op = 1.0;
                if( oldObjSize )
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << datasize/1000000.0 << "MB)"
                      << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100;
            }
        }

    }
    Status RecordStoreV1Base::validate( OperationContext* txn,
                                        bool full, bool scanData,
                                        ValidateAdaptor* adaptor,
                                        ValidateResults* results, BSONObjBuilder* output ) const {

        // 1) basic status that require no iteration
        // 2) extent level info
        // 3) check extent start and end
        // 4) check each non-deleted record
        // 5) check deleted list

        // -------------

        // 1111111111111111111
        if ( isCapped() ){
            output->appendBool("capped", true);
            output->appendNumber("max", _details->maxCappedDocs());
        }

        output->appendNumber("datasize", _details->dataSize());
        output->appendNumber("nrecords", _details->numRecords());
        output->appendNumber("lastExtentSize", _details->lastExtentSize(txn));
        output->appendNumber("padding", _details->paddingFactor());

        if ( _details->firstExtent(txn).isNull() )
            output->append( "firstExtent", "null" );
        else
            output->append( "firstExtent",
                            str::stream() << _details->firstExtent(txn).toString()
                            << " ns:"
                            << _getExtent( txn, _details->firstExtent(txn) )->nsDiagnostic.toString());
        if ( _details->lastExtent(txn).isNull() )
            output->append( "lastExtent", "null" );
        else
            output->append( "lastExtent", str::stream() << _details->lastExtent(txn).toString()
                            << " ns:"
                            << _getExtent( txn, _details->lastExtent(txn) )->nsDiagnostic.toString());

        // 22222222222222222222222222
        { // validate extent basics
            BSONArrayBuilder extentData;
            int extentCount = 0;
            DiskLoc extentDiskLoc;
            try {
                if ( !_details->firstExtent(txn).isNull() ) {
                    _getExtent( txn, _details->firstExtent(txn) )->assertOk();
                    _getExtent( txn, _details->lastExtent(txn) )->assertOk();
                }

                extentDiskLoc = _details->firstExtent(txn);
                while (!extentDiskLoc.isNull()) {
                    Extent* thisExtent = _getExtent( txn, extentDiskLoc );
                    if (full) {
                        extentData << thisExtent->dump();
                    }
                    if (!thisExtent->validates(extentDiskLoc, &results->errors)) {
                        results->valid = false;
                    }
                    DiskLoc nextDiskLoc = thisExtent->xnext;
                    
                    if (extentCount > 0 && !nextDiskLoc.isNull()
                        &&  _getExtent( txn, nextDiskLoc )->xprev != extentDiskLoc) {
                        StringBuilder sb;
                        sb << "'xprev' pointer " << _getExtent( txn, nextDiskLoc )->xprev.toString()
                           << " in extent " << nextDiskLoc.toString()
                           << " does not point to extent " << extentDiskLoc.toString();
                        results->errors.push_back( sb.str() );
                        results->valid = false;
                    }
                    if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) {
                        StringBuilder sb;
                        sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString()
                           << " does not point to last extent in list " << extentDiskLoc.toString();
                        results->errors.push_back( sb.str() );
                        results->valid = false;
                    }
                    extentDiskLoc = nextDiskLoc;
                    extentCount++;
                    txn->checkForInterrupt();
                }
            }
            catch (const DBException& e) {
                StringBuilder sb;
                sb << "exception validating extent " << extentCount
                   << ": " << e.what();
                results->errors.push_back( sb.str() );
                results->valid = false;
                return Status::OK();
            }
            output->append("extentCount", extentCount);

            if ( full )
                output->appendArray( "extents" , extentData.arr() );

        }

        try {
            // 333333333333333333333333333
            bool testingLastExtent = false;
            try {
                DiskLoc firstExtentLoc = _details->firstExtent(txn);
                if (firstExtentLoc.isNull()) {
                    // this is ok
                }
                else {
                    output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump());
                    if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) {
                        StringBuilder sb;
                        sb << "'xprev' pointer in 'firstExtent' " << _details->firstExtent(txn).toString()
                           << " is " << _getExtent(txn, firstExtentLoc)->xprev.toString()
                           << ", should be null";
                        results->errors.push_back( sb.str() );
                        results->valid = false;
                    }
                }
                testingLastExtent = true;
                DiskLoc lastExtentLoc = _details->lastExtent(txn);
                if (lastExtentLoc.isNull()) {
                    // this is ok
                }
                else {
                    if (firstExtentLoc != lastExtentLoc) {
                        output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump());
                        if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) {
                            StringBuilder sb;
                            sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString()
                               << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString()
                               << ", should be null";
                            results->errors.push_back( sb.str() );
                            results->valid = false;
                        }
                    }
                }
            }
            catch (const DBException& e) {
                StringBuilder sb;
                sb << "exception processing '"
                   << (testingLastExtent ? "lastExtent" : "firstExtent")
                   << "': " << e.what();
                results->errors.push_back( sb.str() );
                results->valid = false;
            }

            // 4444444444444444444444444

            set<DiskLoc> recs;
            if( scanData ) {
                int n = 0;
                int nInvalid = 0;
                long long nQuantizedSize = 0;
                long long nPowerOf2QuantizedSize = 0;
                long long len = 0;
                long long nlen = 0;
                long long bsonLen = 0;
                int outOfOrder = 0;
                DiskLoc cl_last;

                scoped_ptr<RecordIterator> iterator( getIterator( txn,
                                                                  DiskLoc(),
                                                                  false,
                                                                  CollectionScanParams::FORWARD ) );
                DiskLoc cl;
                while ( !( cl = iterator->getNext() ).isNull() ) {
                    n++;

                    if ( n < 1000000 )
                        recs.insert(cl);
                    if ( isCapped() ) {
                        if ( cl < cl_last )
                            outOfOrder++;
                        cl_last = cl;
                    }

                    Record *r = recordFor(cl);
                    len += r->lengthWithHeaders();
                    nlen += r->netLength();

                    if ( r->lengthWithHeaders() ==
                         quantizeAllocationSpace( r->lengthWithHeaders() ) ) {
                        // Count the number of records having a size consistent with
                        // the quantizeAllocationSpace quantization implementation.
                        ++nQuantizedSize;
                    }

                    if ( r->lengthWithHeaders() ==
                         quantizePowerOf2AllocationSpace( r->lengthWithHeaders() ) ) {
                        // Count the number of records having a size consistent with the
                        // quantizePowerOf2AllocationSpace quantization implementation.
                        ++nPowerOf2QuantizedSize;
                    }

                    if (full){
                        size_t dataSize = 0;
                        const Status status = adaptor->validate( r->toRecordData(), &dataSize );
                        if (!status.isOK()) {
                            results->valid = false;
                            if (nInvalid == 0) // only log once;
                                results->errors.push_back( "invalid object detected (see logs)" );

                            nInvalid++;
                            log() << "Invalid object detected in " << _ns
                                  << ": " << status.reason();
                        }
                        else {
                            bsonLen += dataSize;
                        }
                    }
                }

                if ( isCapped() && !_details->capLooped() ) {
                    output->append("cappedOutOfOrder", outOfOrder);
                    if ( outOfOrder > 1 ) {
                        results->valid = false;
                        results->errors.push_back( "too many out of order records" );
                    }
                }
                output->append("objectsFound", n);

                if (full) {
                    output->append("invalidObjects", nInvalid);
                }

                output->appendNumber("nQuantizedSize", nQuantizedSize);
                output->appendNumber("nPowerOf2QuantizedSize", nPowerOf2QuantizedSize);
                output->appendNumber("bytesWithHeaders", len);
                output->appendNumber("bytesWithoutHeaders", nlen);

                if (full) {
                    output->appendNumber("bytesBson", bsonLen);
                }
            } // end scanData

            // 55555555555555555555555555
            BSONArrayBuilder deletedListArray;
            for ( int i = 0; i < Buckets; i++ ) {
                deletedListArray << _details->deletedListEntry(i).isNull();
            }

            int ndel = 0;
            long long delSize = 0;
            BSONArrayBuilder delBucketSizes;
            int incorrect = 0;
            for ( int i = 0; i < Buckets; i++ ) {
                DiskLoc loc = _details->deletedListEntry(i);
                try {
                    int k = 0;
                    while ( !loc.isNull() ) {
                        if ( recs.count(loc) )
                            incorrect++;
                        ndel++;

                        if ( loc.questionable() ) {
                            if( isCapped() && !loc.isValid() && i == 1 ) {
                                /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                   see comments in namespace.h
                                */
                                break;
                            }

                            string err( str::stream() << "bad pointer in deleted record list: "
                                        << loc.toString()
                                        << " bucket: " << i
                                        << " k: " << k );
                            results->errors.push_back( err );
                            results->valid = false;
                            break;
                        }

                        const DeletedRecord* d = deletedRecordFor(loc);
                        delSize += d->lengthWithHeaders();
                        loc = d->nextDeleted();
                        k++;
                        txn->checkForInterrupt();
                    }
                    delBucketSizes << k;
                }
                catch (...) {
                    results->errors.push_back( (string)"exception in deleted chain for bucket " +
                                               BSONObjBuilder::numStr(i) );
                    results->valid = false;
                }
            }
            output->appendNumber("deletedCount", ndel);
            output->appendNumber("deletedSize", delSize);
            if ( full ) {
                output->append( "delBucketSizes", delBucketSizes.arr() );
            }

            if ( incorrect ) {
                results->errors.push_back( BSONObjBuilder::numStr(incorrect) +
                                           " records from datafile are in deleted list" );
                results->valid = false;
            }

        }
        catch (AssertionException) {
            results->errors.push_back( "exception during validate" );
            results->valid = false;
        }

        return Status::OK();
    }