コード例 #1
0
    int RecordStoreV1Base::getRecordAllocationSize( int minRecordSize ) const {

        if ( isCapped() )
            return minRecordSize;

        invariant( _details->paddingFactor() >= 1 );

        if ( _details->isUserFlagSet( Flag_UsePowerOf2Sizes ) ) {
            // quantize to the nearest bucketSize (or nearest 1mb boundary for large sizes).
            return quantizePowerOf2AllocationSpace(minRecordSize);
        }

        // adjust for padding factor
        return static_cast<int>(minRecordSize * _details->paddingFactor());
    }
コード例 #2
0
    void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                             const DiskLoc extentLoc,
                                             int extentNumber,
                                             RecordStoreCompactAdaptor* adaptor,
                                             const CompactOptions* compactOptions,
                                             CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << extentLoc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent* const sourceExtent = _extentManager->getExtent( extentLoc );
        sourceExtent->assertOk();
        fassert( 17437, sourceExtent->validates(extentLoc) );

        {
            // The next/prev Record pointers within the Extent might not be in order so we first
            // page in the whole Extent sequentially.
            // TODO benchmark on slow storage to verify this is measurably faster.
            log() << "compact paging in len=" << sourceExtent->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = sourceExtent->length;

            touch_pages( reinterpret_cast<const char*>(sourceExtent), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << sourceExtent->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            // Move each Record out of this extent and insert it in to the "new" extents.
            log() << "compact copying records" << endl;
            long long totalNetSize = 0;
            long long nrecords = 0;
            DiskLoc nextSourceLoc = sourceExtent->firstRecord;
            while (!nextSourceLoc.isNull()) {
                txn->checkForInterrupt();

                WriteUnitOfWork wunit(txn);
                Record* recOld = recordFor(nextSourceLoc);
                RecordData oldData = recOld->toRecordData();
                nextSourceLoc = getNextRecordInExtent(txn, nextSourceLoc);

                if ( compactOptions->validateDocuments && !adaptor->isDataValid( oldData ) ) {
                    // object is corrupt!
                    log() << "compact removing corrupt document!";
                    stats->corruptDocuments++;
                }
                else {
                    // How much data is in the record. Excludes padding and Record headers.
                    const unsigned rawDataSize = adaptor->dataSize( oldData );

                    nrecords++;
                    oldObjSize += rawDataSize;
                    oldObjSizeWithPadding += recOld->netLength();

                    // Allocation sizes include the headers and possibly some padding.
                    const unsigned minAllocationSize = rawDataSize + Record::HeaderSize;
                    unsigned allocationSize = minAllocationSize;
                    switch( compactOptions->paddingMode ) {
                    case CompactOptions::NONE: // no padding, unless using powerOf2Sizes
                        if ( _details->isUserFlagSet(Flag_UsePowerOf2Sizes) )
                            allocationSize = quantizePowerOf2AllocationSpace(minAllocationSize);
                        else
                            allocationSize = minAllocationSize;
                        break;

                    case CompactOptions::PRESERVE: // keep original padding
                        allocationSize = recOld->lengthWithHeaders();
                        break;

                    case CompactOptions::MANUAL: // user specified how much padding to use
                        allocationSize = compactOptions->computeRecordSize(minAllocationSize);
                        if (allocationSize < minAllocationSize
                                || allocationSize > BSONObjMaxUserSize / 2 ) {
                            allocationSize = minAllocationSize;
                        }
                        break;
                    }
                    invariant(allocationSize >= minAllocationSize);

                    // Copy the data to a new record. Because we orphaned the record freelist at the
                    // start of the compact, this insert will allocate a record in a new extent.
                    // See the comment in compact() for more details.
                    CompactDocWriter writer( recOld, rawDataSize, allocationSize );
                    StatusWith<DiskLoc> status = insertRecord( txn, &writer, false );
                    uassertStatusOK( status.getStatus() );
                    const Record* newRec = recordFor(status.getValue());
                    invariant(unsigned(newRec->netLength()) >= rawDataSize);
                    totalNetSize += newRec->netLength();

                    // Tells the caller that the record has been moved, so it can do things such as
                    // add it to indexes.
                    adaptor->inserted(newRec->toRecordData(), status.getValue());
                }

                // Remove the old record from the linked list of records withing the sourceExtent.
                // The old record is not added to the freelist as we will be freeing the whole
                // extent at the end.
                *txn->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
                if (nextSourceLoc.isNull()) {
                    // Just moved the last record out of the extent. Mark extent as empty.
                    *txn->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
                }
                else {
                    Record* newFirstRecord = recordFor(nextSourceLoc);
                    txn->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
                }

                // Adjust the stats to reflect the removal of the old record. The insert above
                // handled adjusting the stats for the new record.
                _details->incrementStats(txn, -(recOld->netLength()), -1);

                wunit.commit();
            }

            // The extent must now be empty.
            invariant(sourceExtent->firstRecord.isNull());
            invariant(sourceExtent->lastRecord.isNull());

            // We are still the first extent, but we must not be the only extent.
            invariant( _details->firstExtent(txn) == extentLoc );
            invariant( _details->lastExtent(txn) != extentLoc );

            // Remove the newly emptied sourceExtent from the extent linked list and return it to
            // the extent manager.
            WriteUnitOfWork wunit(txn);
            const DiskLoc newFirst = sourceExtent->xnext;
            _details->setFirstExtent( txn, newFirst );
            *txn->recoveryUnit()->writing(&_extentManager->getExtent( newFirst )->xprev) = DiskLoc();
            _extentManager->freeExtent( txn, extentLoc );
            wunit.commit();

            {
                const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
                                                     : 1.0; // defining 0/0 as 1 for this.

                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << totalNetSize / (1024*1024.0) << "MB)"
                      << " oldPadding: " << oldPadding;
            }
        }

    }
コード例 #3
0
    void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                             const DiskLoc diskloc,
                                             int extentNumber,
                                             RecordStoreCompactAdaptor* adaptor,
                                             const CompactOptions* compactOptions,
                                             CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << diskloc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = _extentManager->getExtent( diskloc );
        e->assertOk();
        fassert( 17437, e->validates(diskloc) );

        {
            // the next/prev pointers within the extent might not be in order so we first
            // page the whole thing in sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = e->length;

            touch_pages( reinterpret_cast<const char*>(e), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << e->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = recordFor(L);
                    L = getNextRecordInExtent(L);

                    if ( compactOptions->validateDocuments && !adaptor->isDataValid(recOld) ) {
                        // object is corrupt!
                        log() << "compact skipping corrupt document!";
                        stats->corruptDocuments++;
                    }
                    else {
                        unsigned dataSize = adaptor->dataSize( recOld );
                        unsigned docSize = dataSize;

                        nrecords++;
                        oldObjSize += docSize;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = docSize + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;

                        switch( compactOptions->paddingMode ) {
                        case CompactOptions::NONE:
                            if ( _details->isUserFlagSet(Flag_UsePowerOf2Sizes) )
                                lenWPadding = quantizePowerOf2AllocationSpace(lenWPadding);
                            break;
                        case CompactOptions::PRESERVE:
                            // if we are preserving the padding, the record should not change size
                            lenWPadding = recOld->lengthWithHeaders();
                            break;
                        case CompactOptions::MANUAL:
                            lenWPadding = compactOptions->computeRecordSize(lenWPadding);
                            if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) {
                                lenWPadding = lenWHdr;
                            }
                            break;
                        }

                        CompactDocWriter writer( recOld, dataSize, lenWPadding );
                        StatusWith<DiskLoc> status = insertRecord( txn, &writer, 0 );
                        uassertStatusOK( status.getStatus() );
                        datasize += recordFor( status.getValue() )->netLength();

                        adaptor->inserted( recordFor( status.getValue() ), status.getValue() );
                    }

                    if( L.isNull() ) {
                        // we just did the very last record from the old extent.  it's still pointed to
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = !txn->checkForInterruptNoAssert().isOK();
                    if( stopping || txn->recoveryUnit()->isCommitNeeded() ) {
                        *txn->recoveryUnit()->writing(&e->firstRecord) = L;
                        Record *r = recordFor(L);
                        txn->recoveryUnit()->writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        txn->recoveryUnit()->commitIfNeeded();
                        txn->checkForInterrupt();
                    }
                }
            } // if !L.isNull()

            invariant( _details->firstExtent() == diskloc );
            invariant( _details->lastExtent() != diskloc );
            DiskLoc newFirst = e->xnext;
            _details->setFirstExtent( txn, newFirst );
            *txn->recoveryUnit()->writing(&_extentManager->getExtent( newFirst )->xprev) = DiskLoc();
            _extentManager->freeExtent( txn, diskloc );

            txn->recoveryUnit()->commitIfNeeded();

            {
                double op = 1.0;
                if( oldObjSize )
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << datasize/1000000.0 << "MB)"
                      << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100;
            }
        }

    }
コード例 #4
0
    Status RecordStoreV1Base::validate( OperationContext* txn,
                                        bool full, bool scanData,
                                        ValidateAdaptor* adaptor,
                                        ValidateResults* results, BSONObjBuilder* output ) const {

        // 1) basic status that require no iteration
        // 2) extent level info
        // 3) check extent start and end
        // 4) check each non-deleted record
        // 5) check deleted list

        // -------------

        // 1111111111111111111
        if ( isCapped() ){
            output->appendBool("capped", true);
            output->appendNumber("max", _details->maxCappedDocs());
        }

        output->appendNumber("datasize", _details->dataSize());
        output->appendNumber("nrecords", _details->numRecords());
        output->appendNumber("lastExtentSize", _details->lastExtentSize(txn));
        output->appendNumber("padding", _details->paddingFactor());

        if ( _details->firstExtent(txn).isNull() )
            output->append( "firstExtent", "null" );
        else
            output->append( "firstExtent",
                            str::stream() << _details->firstExtent(txn).toString()
                            << " ns:"
                            << _getExtent( txn, _details->firstExtent(txn) )->nsDiagnostic.toString());
        if ( _details->lastExtent(txn).isNull() )
            output->append( "lastExtent", "null" );
        else
            output->append( "lastExtent", str::stream() << _details->lastExtent(txn).toString()
                            << " ns:"
                            << _getExtent( txn, _details->lastExtent(txn) )->nsDiagnostic.toString());

        // 22222222222222222222222222
        { // validate extent basics
            BSONArrayBuilder extentData;
            int extentCount = 0;
            DiskLoc extentDiskLoc;
            try {
                if ( !_details->firstExtent(txn).isNull() ) {
                    _getExtent( txn, _details->firstExtent(txn) )->assertOk();
                    _getExtent( txn, _details->lastExtent(txn) )->assertOk();
                }

                extentDiskLoc = _details->firstExtent(txn);
                while (!extentDiskLoc.isNull()) {
                    Extent* thisExtent = _getExtent( txn, extentDiskLoc );
                    if (full) {
                        extentData << thisExtent->dump();
                    }
                    if (!thisExtent->validates(extentDiskLoc, &results->errors)) {
                        results->valid = false;
                    }
                    DiskLoc nextDiskLoc = thisExtent->xnext;
                    
                    if (extentCount > 0 && !nextDiskLoc.isNull()
                        &&  _getExtent( txn, nextDiskLoc )->xprev != extentDiskLoc) {
                        StringBuilder sb;
                        sb << "'xprev' pointer " << _getExtent( txn, nextDiskLoc )->xprev.toString()
                           << " in extent " << nextDiskLoc.toString()
                           << " does not point to extent " << extentDiskLoc.toString();
                        results->errors.push_back( sb.str() );
                        results->valid = false;
                    }
                    if (nextDiskLoc.isNull() && extentDiskLoc != _details->lastExtent(txn)) {
                        StringBuilder sb;
                        sb << "'lastExtent' pointer " << _details->lastExtent(txn).toString()
                           << " does not point to last extent in list " << extentDiskLoc.toString();
                        results->errors.push_back( sb.str() );
                        results->valid = false;
                    }
                    extentDiskLoc = nextDiskLoc;
                    extentCount++;
                    txn->checkForInterrupt();
                }
            }
            catch (const DBException& e) {
                StringBuilder sb;
                sb << "exception validating extent " << extentCount
                   << ": " << e.what();
                results->errors.push_back( sb.str() );
                results->valid = false;
                return Status::OK();
            }
            output->append("extentCount", extentCount);

            if ( full )
                output->appendArray( "extents" , extentData.arr() );

        }

        try {
            // 333333333333333333333333333
            bool testingLastExtent = false;
            try {
                DiskLoc firstExtentLoc = _details->firstExtent(txn);
                if (firstExtentLoc.isNull()) {
                    // this is ok
                }
                else {
                    output->append("firstExtentDetails", _getExtent(txn, firstExtentLoc)->dump());
                    if (!_getExtent(txn, firstExtentLoc)->xprev.isNull()) {
                        StringBuilder sb;
                        sb << "'xprev' pointer in 'firstExtent' " << _details->firstExtent(txn).toString()
                           << " is " << _getExtent(txn, firstExtentLoc)->xprev.toString()
                           << ", should be null";
                        results->errors.push_back( sb.str() );
                        results->valid = false;
                    }
                }
                testingLastExtent = true;
                DiskLoc lastExtentLoc = _details->lastExtent(txn);
                if (lastExtentLoc.isNull()) {
                    // this is ok
                }
                else {
                    if (firstExtentLoc != lastExtentLoc) {
                        output->append("lastExtentDetails", _getExtent(txn, lastExtentLoc)->dump());
                        if (!_getExtent(txn, lastExtentLoc)->xnext.isNull()) {
                            StringBuilder sb;
                            sb << "'xnext' pointer in 'lastExtent' " << lastExtentLoc.toString()
                               << " is " << _getExtent(txn, lastExtentLoc)->xnext.toString()
                               << ", should be null";
                            results->errors.push_back( sb.str() );
                            results->valid = false;
                        }
                    }
                }
            }
            catch (const DBException& e) {
                StringBuilder sb;
                sb << "exception processing '"
                   << (testingLastExtent ? "lastExtent" : "firstExtent")
                   << "': " << e.what();
                results->errors.push_back( sb.str() );
                results->valid = false;
            }

            // 4444444444444444444444444

            set<DiskLoc> recs;
            if( scanData ) {
                int n = 0;
                int nInvalid = 0;
                long long nQuantizedSize = 0;
                long long nPowerOf2QuantizedSize = 0;
                long long len = 0;
                long long nlen = 0;
                long long bsonLen = 0;
                int outOfOrder = 0;
                DiskLoc cl_last;

                scoped_ptr<RecordIterator> iterator( getIterator( txn,
                                                                  DiskLoc(),
                                                                  false,
                                                                  CollectionScanParams::FORWARD ) );
                DiskLoc cl;
                while ( !( cl = iterator->getNext() ).isNull() ) {
                    n++;

                    if ( n < 1000000 )
                        recs.insert(cl);
                    if ( isCapped() ) {
                        if ( cl < cl_last )
                            outOfOrder++;
                        cl_last = cl;
                    }

                    Record *r = recordFor(cl);
                    len += r->lengthWithHeaders();
                    nlen += r->netLength();

                    if ( r->lengthWithHeaders() ==
                         quantizeAllocationSpace( r->lengthWithHeaders() ) ) {
                        // Count the number of records having a size consistent with
                        // the quantizeAllocationSpace quantization implementation.
                        ++nQuantizedSize;
                    }

                    if ( r->lengthWithHeaders() ==
                         quantizePowerOf2AllocationSpace( r->lengthWithHeaders() ) ) {
                        // Count the number of records having a size consistent with the
                        // quantizePowerOf2AllocationSpace quantization implementation.
                        ++nPowerOf2QuantizedSize;
                    }

                    if (full){
                        size_t dataSize = 0;
                        const Status status = adaptor->validate( r->toRecordData(), &dataSize );
                        if (!status.isOK()) {
                            results->valid = false;
                            if (nInvalid == 0) // only log once;
                                results->errors.push_back( "invalid object detected (see logs)" );

                            nInvalid++;
                            log() << "Invalid object detected in " << _ns
                                  << ": " << status.reason();
                        }
                        else {
                            bsonLen += dataSize;
                        }
                    }
                }

                if ( isCapped() && !_details->capLooped() ) {
                    output->append("cappedOutOfOrder", outOfOrder);
                    if ( outOfOrder > 1 ) {
                        results->valid = false;
                        results->errors.push_back( "too many out of order records" );
                    }
                }
                output->append("objectsFound", n);

                if (full) {
                    output->append("invalidObjects", nInvalid);
                }

                output->appendNumber("nQuantizedSize", nQuantizedSize);
                output->appendNumber("nPowerOf2QuantizedSize", nPowerOf2QuantizedSize);
                output->appendNumber("bytesWithHeaders", len);
                output->appendNumber("bytesWithoutHeaders", nlen);

                if (full) {
                    output->appendNumber("bytesBson", bsonLen);
                }
            } // end scanData

            // 55555555555555555555555555
            BSONArrayBuilder deletedListArray;
            for ( int i = 0; i < Buckets; i++ ) {
                deletedListArray << _details->deletedListEntry(i).isNull();
            }

            int ndel = 0;
            long long delSize = 0;
            BSONArrayBuilder delBucketSizes;
            int incorrect = 0;
            for ( int i = 0; i < Buckets; i++ ) {
                DiskLoc loc = _details->deletedListEntry(i);
                try {
                    int k = 0;
                    while ( !loc.isNull() ) {
                        if ( recs.count(loc) )
                            incorrect++;
                        ndel++;

                        if ( loc.questionable() ) {
                            if( isCapped() && !loc.isValid() && i == 1 ) {
                                /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                   see comments in namespace.h
                                */
                                break;
                            }

                            string err( str::stream() << "bad pointer in deleted record list: "
                                        << loc.toString()
                                        << " bucket: " << i
                                        << " k: " << k );
                            results->errors.push_back( err );
                            results->valid = false;
                            break;
                        }

                        const DeletedRecord* d = deletedRecordFor(loc);
                        delSize += d->lengthWithHeaders();
                        loc = d->nextDeleted();
                        k++;
                        txn->checkForInterrupt();
                    }
                    delBucketSizes << k;
                }
                catch (...) {
                    results->errors.push_back( (string)"exception in deleted chain for bucket " +
                                               BSONObjBuilder::numStr(i) );
                    results->valid = false;
                }
            }
            output->appendNumber("deletedCount", ndel);
            output->appendNumber("deletedSize", delSize);
            if ( full ) {
                output->append( "delBucketSizes", delBucketSizes.arr() );
            }

            if ( incorrect ) {
                results->errors.push_back( BSONObjBuilder::numStr(incorrect) +
                                           " records from datafile are in deleted list" );
                results->valid = false;
            }

        }
        catch (AssertionException) {
            results->errors.push_back( "exception during validate" );
            results->valid = false;
        }

        return Status::OK();
    }