void touchNs( const std::string& ns ) { 
        std::vector< touch_location > ranges;
        Client::ReadContext ctx(ns);
        {

            NamespaceDetails *nsd = nsdetails(ns.c_str());
            uassert( 16154, "namespace does not exist", nsd );
            
            for( DiskLoc L = nsd->firstExtent; !L.isNull(); L = L.ext()->xnext )  {
                MongoDataFile* mdf = cc().database()->getFile( L.a() );
                massert( 16238, "can't fetch extent file structure", mdf );
                touch_location tl;
                tl.fd = mdf->getFd();
                tl.offset = L.getOfs();
                tl.ext = L.ext();
                tl.length = tl.ext->length;
                
                ranges.push_back(tl);                
            }

        }
        LockMongoFilesShared lk;
        Lock::TempRelease tr;
        std::string progress_msg = "touch " + ns + " extents";
        ProgressMeterHolder pm( cc().curop()->setMessage( progress_msg.c_str() , ranges.size() ) );
        for ( std::vector< touch_location >::iterator it = ranges.begin(); it != ranges.end(); ++it ) {
            touch_pages( it->fd, it->offset, it->length, it->ext );
            pm.hit();
            killCurrentOp.checkForInterrupt(false);
        }
        pm.finished();
    }
    Status RecordStoreV1Base::touch( OperationContext* txn, BSONObjBuilder* output ) const {
        Timer t;

        // Note: when this class has document level locking, we'll need a lock to get extents
        // and then ideally only hold the collection lock from above while doing actual touching.

        std::vector<touch_location> ranges;
        {
            Extent* ext = _getExtent( _details->firstExtent() );
            while ( ext ) {
                touch_location tl;
                tl.root = reinterpret_cast<const char*>(ext);
                tl.length = ext->length;
                ranges.push_back(tl);
                if ( ext->xnext.isNull() )
                    ext = NULL;
                else
                    ext = _getExtent( ext->xnext );
            }
        }

        /* TODO(ERH)
        std::string progress_msg = "touch " + ns + " extents";
        ProgressMeterHolder pm(cc().curop()->setMessage(progress_msg.c_str(),
                                                        "Touch Progress",
                                                        ranges.size()));
        */

        for ( std::vector<touch_location>::iterator it = ranges.begin(); it != ranges.end(); ++it ) {
            touch_pages( it->root, it->length );
            //pm.hit();
            txn->checkForInterrupt();
        }
        //pm.finished();

        if ( output ) {
            output->append( "numRanges", static_cast<int>( ranges.size() ) );
            output->append( "millis", t.millis() );
        }

        return Status::OK();
    }
Status RecordStoreV1Base::touch(OperationContext* txn, BSONObjBuilder* output) const {
    Timer t;

    std::vector<touch_location> ranges;
    {
        DiskLoc nextLoc = _details->firstExtent(txn);
        Extent* ext = nextLoc.isNull() ? NULL : _getExtent(txn, nextLoc);
        while (ext) {
            touch_location tl;
            tl.root = reinterpret_cast<const char*>(ext);
            tl.length = ext->length;
            ranges.push_back(tl);

            nextLoc = ext->xnext;
            if (nextLoc.isNull())
                ext = NULL;
            else
                ext = _getExtent(txn, nextLoc);
        }
    }

    std::string progress_msg = "touch " + std::string(txn->getNS()) + " extents";
    stdx::unique_lock<Client> lk(*txn->getClient());
    ProgressMeterHolder pm(
        *txn->setMessage_inlock(progress_msg.c_str(), "Touch Progress", ranges.size()));
    lk.unlock();

    for (std::vector<touch_location>::iterator it = ranges.begin(); it != ranges.end(); ++it) {
        touch_pages(it->root, it->length);
        pm.hit();
        txn->checkForInterrupt();
    }
    pm.finished();

    if (output) {
        output->append("numRanges", static_cast<int>(ranges.size()));
        output->append("millis", t.millis());
    }

    return Status::OK();
}
Exemple #4
0
    /** @return numRanges touched */
    int touchNs( const std::string& ns ) {
        std::vector< touch_location > ranges;
        boost::scoped_ptr<LockMongoFilesShared> mongoFilesLock;
        {
            Client::ReadContext ctx(ns);

            Database* db = ctx.ctx().db();
            ExtentManager& em = db->getExtentManager();

            Collection* collection = db->getCollection( ns );
            uassert( 16154, "namespace does not exist", collection );

            Extent* ext = em.getExtent( collection->details()->firstExtent() );
            while ( ext ) {
                touch_location tl;
                tl.root = reinterpret_cast<const char*>(ext);
                tl.length = ext->length;
                ranges.push_back(tl);
                ext = em.getNextExtent( ext );
            }
            mongoFilesLock.reset(new LockMongoFilesShared());
        }
        // DB read lock is dropped; no longer needed after this point.

        std::string progress_msg = "touch " + ns + " extents";
        ProgressMeterHolder pm(cc().curop()->setMessage(progress_msg.c_str(),
                                                        "Touch Progress",
                                                        ranges.size()));
        for ( std::vector< touch_location >::iterator it = ranges.begin(); it != ranges.end(); ++it ) {
            touch_pages( it->root, it->length );
            pm.hit();
            killCurrentOp.checkForInterrupt(false);
        }
        pm.finished();

        return static_cast<int>( ranges.size() );
    }
    void Collection::_compactExtent(const DiskLoc diskloc, int extentNumber,
                                    MultiIndexBlock& indexesToInsertTo,
                                    const CompactOptions* compactOptions, CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << diskloc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = diskloc.ext();
        e->assertOk();
        verify( e->validates(diskloc) );

        {
            // the next/prev pointers within the extent might not be in order so we first
            // page the whole thing in sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = e->length;

            touch_pages( reinterpret_cast<const char*>(e), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << e->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = getExtentManager()->getNextRecordInExtent(L);
                    BSONObj objOld = BSONObj::make(recOld);

                    if ( compactOptions->validateDocuments && !objOld.valid() ) {
                        // object is corrupt!
                        log() << "compact skipping corrupt document!";
                        stats->corruptDocuments++;
                    }
                    else {
                        unsigned docSize = objOld.objsize();

                        nrecords++;
                        oldObjSize += docSize;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = docSize + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;

                        switch( compactOptions->paddingMode ) {
                        case CompactOptions::NONE:
                            if ( details()->isUserFlagSet(NamespaceDetails::Flag_UsePowerOf2Sizes) )
                                lenWPadding = details()->quantizePowerOf2AllocationSpace(lenWPadding);
                            break;
                        case CompactOptions::PRESERVE:
                            // if we are preserving the padding, the record should not change size
                            lenWPadding = recOld->lengthWithHeaders();
                            break;
                        case CompactOptions::MANUAL:
                            lenWPadding = compactOptions->computeRecordSize(lenWPadding);
                            if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) {
                                lenWPadding = lenWHdr;
                            }
                            break;
                        }

                        CompactDocWriter writer( objOld, lenWPadding );
                        StatusWith<DiskLoc> status = _recordStore->insertRecord( &writer, 0 );
                        uassertStatusOK( status.getStatus() );
                        datasize += _recordStore->recordFor( status.getValue() )->netLength();

                        InsertDeleteOptions options;
                        options.logIfError = false;
                        options.dupsAllowed = true; // in compact we should be doing no checking

                        indexesToInsertTo.insert( objOld, status.getValue(), options );
                    }

                    if( L.isNull() ) {
                        // we just did the very last record from the old extent.  it's still pointed to
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt();
                    }
                }
            } // if !L.isNull()

            verify( details()->firstExtent() == diskloc );
            verify( details()->lastExtent() != diskloc );
            DiskLoc newFirst = e->xnext;
            details()->firstExtent().writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            getExtentManager()->freeExtents( diskloc, diskloc );

            getDur().commitIfNeeded();

            {
                double op = 1.0;
                if( oldObjSize )
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << datasize/1000000.0 << "MB)"
                      << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100;
            }
        }

    }
    void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                             const DiskLoc extentLoc,
                                             int extentNumber,
                                             RecordStoreCompactAdaptor* adaptor,
                                             const CompactOptions* compactOptions,
                                             CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << extentLoc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent* const sourceExtent = _extentManager->getExtent( extentLoc );
        sourceExtent->assertOk();
        fassert( 17437, sourceExtent->validates(extentLoc) );

        {
            // The next/prev Record pointers within the Extent might not be in order so we first
            // page in the whole Extent sequentially.
            // TODO benchmark on slow storage to verify this is measurably faster.
            log() << "compact paging in len=" << sourceExtent->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = sourceExtent->length;

            touch_pages( reinterpret_cast<const char*>(sourceExtent), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << sourceExtent->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            // Move each Record out of this extent and insert it in to the "new" extents.
            log() << "compact copying records" << endl;
            long long totalNetSize = 0;
            long long nrecords = 0;
            DiskLoc nextSourceLoc = sourceExtent->firstRecord;
            while (!nextSourceLoc.isNull()) {
                txn->checkForInterrupt();

                WriteUnitOfWork wunit(txn);
                Record* recOld = recordFor(nextSourceLoc);
                RecordData oldData = recOld->toRecordData();
                nextSourceLoc = getNextRecordInExtent(txn, nextSourceLoc);

                if ( compactOptions->validateDocuments && !adaptor->isDataValid( oldData ) ) {
                    // object is corrupt!
                    log() << "compact removing corrupt document!";
                    stats->corruptDocuments++;
                }
                else {
                    // How much data is in the record. Excludes padding and Record headers.
                    const unsigned rawDataSize = adaptor->dataSize( oldData );

                    nrecords++;
                    oldObjSize += rawDataSize;
                    oldObjSizeWithPadding += recOld->netLength();

                    // Allocation sizes include the headers and possibly some padding.
                    const unsigned minAllocationSize = rawDataSize + Record::HeaderSize;
                    unsigned allocationSize = minAllocationSize;
                    switch( compactOptions->paddingMode ) {
                    case CompactOptions::NONE: // no padding, unless using powerOf2Sizes
                        if ( _details->isUserFlagSet(Flag_UsePowerOf2Sizes) )
                            allocationSize = quantizePowerOf2AllocationSpace(minAllocationSize);
                        else
                            allocationSize = minAllocationSize;
                        break;

                    case CompactOptions::PRESERVE: // keep original padding
                        allocationSize = recOld->lengthWithHeaders();
                        break;

                    case CompactOptions::MANUAL: // user specified how much padding to use
                        allocationSize = compactOptions->computeRecordSize(minAllocationSize);
                        if (allocationSize < minAllocationSize
                                || allocationSize > BSONObjMaxUserSize / 2 ) {
                            allocationSize = minAllocationSize;
                        }
                        break;
                    }
                    invariant(allocationSize >= minAllocationSize);

                    // Copy the data to a new record. Because we orphaned the record freelist at the
                    // start of the compact, this insert will allocate a record in a new extent.
                    // See the comment in compact() for more details.
                    CompactDocWriter writer( recOld, rawDataSize, allocationSize );
                    StatusWith<DiskLoc> status = insertRecord( txn, &writer, false );
                    uassertStatusOK( status.getStatus() );
                    const Record* newRec = recordFor(status.getValue());
                    invariant(unsigned(newRec->netLength()) >= rawDataSize);
                    totalNetSize += newRec->netLength();

                    // Tells the caller that the record has been moved, so it can do things such as
                    // add it to indexes.
                    adaptor->inserted(newRec->toRecordData(), status.getValue());
                }

                // Remove the old record from the linked list of records withing the sourceExtent.
                // The old record is not added to the freelist as we will be freeing the whole
                // extent at the end.
                *txn->recoveryUnit()->writing(&sourceExtent->firstRecord) = nextSourceLoc;
                if (nextSourceLoc.isNull()) {
                    // Just moved the last record out of the extent. Mark extent as empty.
                    *txn->recoveryUnit()->writing(&sourceExtent->lastRecord) = DiskLoc();
                }
                else {
                    Record* newFirstRecord = recordFor(nextSourceLoc);
                    txn->recoveryUnit()->writingInt(newFirstRecord->prevOfs()) = DiskLoc::NullOfs;
                }

                // Adjust the stats to reflect the removal of the old record. The insert above
                // handled adjusting the stats for the new record.
                _details->incrementStats(txn, -(recOld->netLength()), -1);

                wunit.commit();
            }

            // The extent must now be empty.
            invariant(sourceExtent->firstRecord.isNull());
            invariant(sourceExtent->lastRecord.isNull());

            // We are still the first extent, but we must not be the only extent.
            invariant( _details->firstExtent(txn) == extentLoc );
            invariant( _details->lastExtent(txn) != extentLoc );

            // Remove the newly emptied sourceExtent from the extent linked list and return it to
            // the extent manager.
            WriteUnitOfWork wunit(txn);
            const DiskLoc newFirst = sourceExtent->xnext;
            _details->setFirstExtent( txn, newFirst );
            *txn->recoveryUnit()->writing(&_extentManager->getExtent( newFirst )->xprev) = DiskLoc();
            _extentManager->freeExtent( txn, extentLoc );
            wunit.commit();

            {
                const double oldPadding = oldObjSize ? double(oldObjSizeWithPadding) / oldObjSize
                                                     : 1.0; // defining 0/0 as 1 for this.

                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << totalNetSize / (1024*1024.0) << "MB)"
                      << " oldPadding: " << oldPadding;
            }
        }

    }
Exemple #7
0
    /** @return number of skipped (invalid) documents */
    unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n,
                const scoped_array<IndexSpec> &indexSpecs,
                scoped_array<SortPhaseOne>& phase1, int nidx, bool validate, 
                double pf, int pb)
    {
        log() << "compact begin extent #" << n << " for namespace " << ns << endl;
        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = diskloc.ext();
        e->assertOk();
        verify( e->validates() );
        unsigned skipped = 0;

        {
            // the next/prev pointers within the extent might not be in order so we first page the whole thing in 
            // sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            MongoDataFile* mdf = cc().database()->getFile( diskloc.a() );
            HANDLE fd = mdf->getFd();
            int offset = diskloc.getOfs();
            Extent* ext = diskloc.ext();
            size_t length = ext->length;
                
            touch_pages(fd, offset, length, ext);
            int ms = t.millis();
            if( ms > 1000 ) 
                log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = recOld->nextInExtent(L);
                    BSONObj objOld = BSONObj::make(recOld);

                    if( !validate || objOld.valid() ) {
                        nrecords++;
                        unsigned sz = objOld.objsize();

                        oldObjSize += sz;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = sz + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;
                        {
                            lenWPadding = static_cast<unsigned>(pf*lenWPadding);
                            lenWPadding += pb;
                            lenWPadding = lenWPadding & quantizeMask(lenWPadding);
                            if( lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { 
                                lenWPadding = lenWHdr;
                            }
                        }
                        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false);
                        uassert(14024, "compact error out of space during compaction", !loc.isNull());
                        Record *recNew = loc.rec();
                        datasize += recNew->netLength();
                        recNew = (Record *) getDur().writingPtr(recNew, lenWHdr);
                        addRecordToRecListInExtent(recNew, loc);
                        memcpy(recNew->data(), objOld.objdata(), sz);

                        {
                            // extract keys for all indexes we will be rebuilding
                            for( int x = 0; x < nidx; x++ ) { 
                                phase1[x].addKeys(indexSpecs[x], objOld, loc);
                            }
                        }
                    }
                    else { 
                        if( ++skipped <= 10 )
                            log() << "compact skipping invalid object" << endl;
                    }

                    if( L.isNull() ) { 
                        // we just did the very last record from the old extent.  it's still pointed to 
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt(false);
                    }
                }
            } // if !L.isNull()

            verify( d->firstExtent == diskloc );
            verify( d->lastExtent != diskloc );
            DiskLoc newFirst = e->xnext;
            d->firstExtent.writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            freeExtents( diskloc, diskloc );
            // update datasize/record count for this namespace's extent
            {
                NamespaceDetails::Stats *s = getDur().writing(&d->stats);
                s->datasize += datasize;
                s->nrecords += nrecords;
            }

            getDur().commitIfNeeded();

            { 
                double op = 1.0;
                if( oldObjSize ) 
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)"
                    << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100
                    << endl;                    
            }
        }

        return skipped;
    }
Exemple #8
0
    /** @return number of skipped (invalid) documents */
    unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n,
                           int nidx, bool validate, double pf, int pb, bool useDefaultPadding,
                           bool preservePadding) {

        log() << "compact begin extent #" << n << " for namespace " << ns << endl;
        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = diskloc.ext();
        e->assertOk();
        verify( e->validates(diskloc) );
        unsigned skipped = 0;

        Database* db = cc().database();

        {
            // the next/prev pointers within the extent might not be in order so we first
            // page the whole thing in sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            Extent* ext = db->getExtentManager().getExtent( diskloc );
            size_t length = ext->length;

            touch_pages( reinterpret_cast<const char*>(ext), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << e->length/1000000.0/ms << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = db->getExtentManager().getNextRecordInExtent(L);
                    BSONObj objOld = BSONObj::make(recOld);

                    if( !validate || objOld.valid() ) {
                        nrecords++;
                        unsigned sz = objOld.objsize();

                        oldObjSize += sz;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = sz + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;
                        // if we are preserving the padding, the record should not change size
                        if (preservePadding) {
                            lenWPadding = recOld->lengthWithHeaders();
                        }
                        // maintain UsePowerOf2Sizes if no padding values were passed in
                        else if (d->isUserFlagSet(NamespaceDetails::Flag_UsePowerOf2Sizes)
                                && useDefaultPadding) {
                            lenWPadding = d->quantizePowerOf2AllocationSpace(lenWPadding);
                        }
                        // otherwise use the padding values (pf and pb) that were passed in
                        else {
                            lenWPadding = static_cast<unsigned>(pf*lenWPadding);
                            lenWPadding += pb;
                            lenWPadding = lenWPadding & quantizeMask(lenWPadding);
                        }
                        if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { 
                            lenWPadding = lenWHdr;
                        }
                        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false);
                        uassert(14024, "compact error out of space during compaction", !loc.isNull());
                        Record *recNew = loc.rec();
                        datasize += recNew->netLength();
                        recNew = (Record *) getDur().writingPtr(recNew, lenWHdr);
                        addRecordToRecListInExtent(recNew, loc);
                        memcpy(recNew->data(), objOld.objdata(), sz);
                    }
                    else { 
                        if( ++skipped <= 10 )
                            log() << "compact skipping invalid object" << endl;
                    }

                    if( L.isNull() ) { 
                        // we just did the very last record from the old extent.  it's still pointed to 
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt(false);
                    }
                }
            } // if !L.isNull()

            verify( d->firstExtent() == diskloc );
            verify( d->lastExtent() != diskloc );
            DiskLoc newFirst = e->xnext;
            d->firstExtent().writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            cc().database()->getExtentManager().freeExtents( diskloc, diskloc );

            // update datasize/record count for this namespace's extent
            d->incrementStats( datasize, nrecords );

            getDur().commitIfNeeded();

            { 
                double op = 1.0;
                if( oldObjSize ) 
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)"
                    << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100
                    << endl;                    
            }
        }

        return skipped;
    }
    void SimpleRecordStoreV1::_compactExtent(OperationContext* txn,
                                             const DiskLoc diskloc,
                                             int extentNumber,
                                             RecordStoreCompactAdaptor* adaptor,
                                             const CompactOptions* compactOptions,
                                             CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << diskloc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = _extentManager->getExtent( diskloc );
        e->assertOk();
        fassert( 17437, e->validates(diskloc) );

        {
            // the next/prev pointers within the extent might not be in order so we first
            // page the whole thing in sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = e->length;

            touch_pages( reinterpret_cast<const char*>(e), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << e->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = recordFor(L);
                    L = getNextRecordInExtent(L);

                    if ( compactOptions->validateDocuments && !adaptor->isDataValid(recOld) ) {
                        // object is corrupt!
                        log() << "compact skipping corrupt document!";
                        stats->corruptDocuments++;
                    }
                    else {
                        unsigned dataSize = adaptor->dataSize( recOld );
                        unsigned docSize = dataSize;

                        nrecords++;
                        oldObjSize += docSize;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = docSize + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;

                        switch( compactOptions->paddingMode ) {
                        case CompactOptions::NONE:
                            if ( _details->isUserFlagSet(Flag_UsePowerOf2Sizes) )
                                lenWPadding = quantizePowerOf2AllocationSpace(lenWPadding);
                            break;
                        case CompactOptions::PRESERVE:
                            // if we are preserving the padding, the record should not change size
                            lenWPadding = recOld->lengthWithHeaders();
                            break;
                        case CompactOptions::MANUAL:
                            lenWPadding = compactOptions->computeRecordSize(lenWPadding);
                            if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) {
                                lenWPadding = lenWHdr;
                            }
                            break;
                        }

                        CompactDocWriter writer( recOld, dataSize, lenWPadding );
                        StatusWith<DiskLoc> status = insertRecord( txn, &writer, 0 );
                        uassertStatusOK( status.getStatus() );
                        datasize += recordFor( status.getValue() )->netLength();

                        adaptor->inserted( recordFor( status.getValue() ), status.getValue() );
                    }

                    if( L.isNull() ) {
                        // we just did the very last record from the old extent.  it's still pointed to
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = !txn->checkForInterruptNoAssert().isOK();
                    if( stopping || txn->recoveryUnit()->isCommitNeeded() ) {
                        *txn->recoveryUnit()->writing(&e->firstRecord) = L;
                        Record *r = recordFor(L);
                        txn->recoveryUnit()->writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        txn->recoveryUnit()->commitIfNeeded();
                        txn->checkForInterrupt();
                    }
                }
            } // if !L.isNull()

            invariant( _details->firstExtent() == diskloc );
            invariant( _details->lastExtent() != diskloc );
            DiskLoc newFirst = e->xnext;
            _details->setFirstExtent( txn, newFirst );
            *txn->recoveryUnit()->writing(&_extentManager->getExtent( newFirst )->xprev) = DiskLoc();
            _extentManager->freeExtent( txn, diskloc );

            txn->recoveryUnit()->commitIfNeeded();

            {
                double op = 1.0;
                if( oldObjSize )
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << datasize/1000000.0 << "MB)"
                      << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100;
            }
        }

    }