Ejemplo n.º 1
0
    void RecordStoreV1Base::deleteRecord( TransactionExperiment* txn, const DiskLoc& dl ) {

        Record* todelete = recordFor( dl );

        /* remove ourself from the record next/prev chain */
        {
            if ( todelete->prevOfs() != DiskLoc::NullOfs ) {
                DiskLoc prev = getPrevRecordInExtent( dl );
                Record* prevRecord = recordFor( prev );
                txn->writingInt( prevRecord->nextOfs() ) = todelete->nextOfs();
            }

            if ( todelete->nextOfs() != DiskLoc::NullOfs ) {
                DiskLoc next = getNextRecord( dl );
                Record* nextRecord = recordFor( next );
                txn->writingInt( nextRecord->prevOfs() ) = todelete->prevOfs();
            }
        }

        /* remove ourself from extent pointers */
        {
            Extent *e = txn->writing( _getExtent( _getExtentLocForRecord( dl ) ) );
            if ( e->firstRecord == dl ) {
                if ( todelete->nextOfs() == DiskLoc::NullOfs )
                    e->firstRecord.Null();
                else
                    e->firstRecord.set(dl.a(), todelete->nextOfs() );
            }
            if ( e->lastRecord == dl ) {
                if ( todelete->prevOfs() == DiskLoc::NullOfs )
                    e->lastRecord.Null();
                else
                    e->lastRecord.set(dl.a(), todelete->prevOfs() );
            }
        }

        /* add to the free list */
        {
            _details->incrementStats( txn, -1 * todelete->netLength(), -1 );

            if ( _isSystemIndexes ) {
                /* temp: if in system.indexes, don't reuse, and zero out: we want to be
                   careful until validated more, as IndexDetails has pointers
                   to this disk location.  so an incorrectly done remove would cause
                   a lot of problems.
                */
                memset( txn->writingPtr(todelete, todelete->lengthWithHeaders() ),
                        0, todelete->lengthWithHeaders() );
            }
            else {
                DEV {
                    unsigned long long *p = reinterpret_cast<unsigned long long *>( todelete->data() );
                    *txn->writing(p) = 0;
                }
                addDeletedRec(txn, dl);
            }
        }

    }
Ejemplo n.º 2
0
    /* deletes a record, just the pdfile portion -- no index cleanup, no cursor cleanup, etc.
       caller must check if capped
    */
    void DataFileMgr::_deleteRecord(NamespaceDetails *d, const StringData& ns, Record *todelete, const DiskLoc& dl) {
        /* remove ourself from the record next/prev chain */
        {
            if ( todelete->prevOfs() != DiskLoc::NullOfs )
                getDur().writingInt( todelete->getPrev(dl).rec()->nextOfs() ) = todelete->nextOfs();
            if ( todelete->nextOfs() != DiskLoc::NullOfs )
                getDur().writingInt( todelete->getNext(dl).rec()->prevOfs() ) = todelete->prevOfs();
        }

        /* remove ourself from extent pointers */
        {
            Extent *e = getDur().writing( todelete->myExtent(dl) );
            if ( e->firstRecord == dl ) {
                if ( todelete->nextOfs() == DiskLoc::NullOfs )
                    e->firstRecord.Null();
                else
                    e->firstRecord.set(dl.a(), todelete->nextOfs() );
            }
            if ( e->lastRecord == dl ) {
                if ( todelete->prevOfs() == DiskLoc::NullOfs )
                    e->lastRecord.Null();
                else
                    e->lastRecord.set(dl.a(), todelete->prevOfs() );
            }
        }

        /* add to the free list */
        {
            d->incrementStats( -1 * todelete->netLength(), -1 );

            if ( nsToCollectionSubstring(ns) == "system.indexes") {
                /* temp: if in system.indexes, don't reuse, and zero out: we want to be
                   careful until validated more, as IndexDetails has pointers
                   to this disk location.  so an incorrectly done remove would cause
                   a lot of problems.
                */
                memset(getDur().writingPtr(todelete, todelete->lengthWithHeaders() ), 0, todelete->lengthWithHeaders() );
            }
            else {
                DEV {
                    unsigned long long *p = reinterpret_cast<unsigned long long *>( todelete->data() );
                    *getDur().writing(p) = 0;
                    //DEV memset(todelete->data, 0, todelete->netLength()); // attempt to notice invalid reuse.
                }
                d->addDeletedRec((DeletedRecord*)todelete, dl);
            }
        }
    }
Ejemplo n.º 3
0
    /**
     * analyzeDiskStorage helper which processes a single record.
     */
    void processDeletedRecord(const DiskLoc& dl, const DeletedRecord* dr, const Extent* ex,
                              const AnalyzeParams& params, int bucketNum,
                              vector<DiskStorageData>& sliceData,
                              BSONArrayBuilder* deletedRecordsArrayBuilder) {

        killCurrentOp.checkForInterrupt();

        int extentOfs = ex->myLoc.getOfs();

        if (! (dl.a() == ex->myLoc.a() &&
               dl.getOfs() + dr->lengthWithHeaders() > extentOfs &&
               dl.getOfs() < extentOfs + ex->length) ) {

            return;
        }

        RecPos pos = RecPos::from(dl.getOfs(), dr->lengthWithHeaders(), extentOfs, params);
        bool spansRequestedArea = false;
        for (RecPos::SliceIterator it = pos.iterateSlices(); !it.end(); ++it) {

            DiskStorageData& slice = sliceData[it->sliceNum];
            slice.freeRecords[bucketNum] += it->ratioHere;
            spansRequestedArea = true;
        }

        if (deletedRecordsArrayBuilder != NULL && spansRequestedArea) {
            BSONObjBuilder(deletedRecordsArrayBuilder->subobjStart())
                .append("ofs", dl.getOfs() - extentOfs)
                .append("recBytes", dr->lengthWithHeaders());
        }
    }
Ejemplo n.º 4
0
    DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc ){
        LogIndentLevel lil;
        
        if ( eLoc.getOfs() <= 0 ){
            error() << "invalid extent ofs: " << eLoc.getOfs() << endl;
            return DiskLoc();
        }
        

        MongoDataFile * mdf = db->getFile( eLoc.a() );

        Extent * e = mdf->debug_getExtent( eLoc );
        if ( ! e->isOk() ){
            warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl;
        }
        
        log() << "length:" << e->length << endl;
        
        LogIndentLevel lil2;
        
        DiskLoc loc = forward ? e->firstRecord : e->lastRecord;
        while ( ! loc.isNull() ){
            if ( loc.getOfs() <= 0 ){
                error() << "offset is 0 for record which should be impossible" << endl;
                break;
            }
            log() << loc << endl;
            Record* rec = loc.rec();
            log() << loc.obj() << endl;
            loc = forward ? rec->getNext( loc ) : rec->getPrev( loc );
        }
        return forward ? e->xnext : e->xprev;
        
    }
Ejemplo n.º 5
0
 // bypass standard alloc/insert routines to use the extent we want.
 static DiskLoc insert( DiskLoc ext, int i ) {
     BSONObjBuilder b;
     b.append( "a", i );
     BSONObj o = b.done();
     int len = o.objsize();
     Extent *e = ext.ext();
     int ofs;
     if ( e->lastRecord.isNull() )
         ofs = ext.getOfs() + ( e->extentData - (char *)e );
     else
         ofs = e->lastRecord.getOfs() + e->lastRecord.rec()->lengthWithHeaders;
     DiskLoc dl( ext.a(), ofs );
     Record *r = dl.rec();
     r->lengthWithHeaders = Record::HeaderSize + len;
     r->extentOfs = e->myLoc.getOfs();
     r->nextOfs = DiskLoc::NullOfs;
     r->prevOfs = e->lastRecord.isNull() ? DiskLoc::NullOfs : e->lastRecord.getOfs();
     memcpy( r->data, o.objdata(), len );
     if ( e->firstRecord.isNull() )
         e->firstRecord = dl;
     else
         e->lastRecord.rec()->nextOfs = ofs;
     e->lastRecord = dl;
     return dl;
 }
Ejemplo n.º 6
0
    void touchNs( const std::string& ns ) { 
        std::vector< touch_location > ranges;
        Client::ReadContext ctx(ns);
        {

            NamespaceDetails *nsd = nsdetails(ns.c_str());
            uassert( 16154, "namespace does not exist", nsd );
            
            for( DiskLoc L = nsd->firstExtent; !L.isNull(); L = L.ext()->xnext )  {
                MongoDataFile* mdf = cc().database()->getFile( L.a() );
                massert( 16238, "can't fetch extent file structure", mdf );
                touch_location tl;
                tl.fd = mdf->getFd();
                tl.offset = L.getOfs();
                tl.ext = L.ext();
                tl.length = tl.ext->length;
                
                ranges.push_back(tl);                
            }

        }
        LockMongoFilesShared lk;
        Lock::TempRelease tr;
        std::string progress_msg = "touch " + ns + " extents";
        ProgressMeterHolder pm( cc().curop()->setMessage( progress_msg.c_str() , ranges.size() ) );
        for ( std::vector< touch_location >::iterator it = ranges.begin(); it != ranges.end(); ++it ) {
            touch_pages( it->fd, it->offset, it->length, it->ext );
            pm.hit();
            killCurrentOp.checkForInterrupt(false);
        }
        pm.finished();
    }
Ejemplo n.º 7
0
    /**
     * analyzeDiskStorage helper which processes a single record.
     */
    void processRecord(const DiskLoc& dl, const DiskLoc& prevDl, const Record* r, int extentOfs,
                       const AnalyzeParams& params, vector<DiskStorageData>& sliceData,
                       BSONArrayBuilder* recordsArrayBuilder) {
        killCurrentOp.checkForInterrupt();

        BSONObj obj = dl.obj();
        int recBytes = r->lengthWithHeaders();
        double characteristicFieldValue = 0;
        bool hasCharacteristicField = extractCharacteristicFieldValue(obj, params,
                                                                      characteristicFieldValue);
        bool isLocatedBeforePrevious = dl.a() < prevDl.a();

        RecPos pos = RecPos::from(dl.getOfs(), recBytes, extentOfs, params);
        bool spansRequestedArea = false;
        for (RecPos::SliceIterator it = pos.iterateSlices(); !it.end(); ++it) {
            spansRequestedArea = true;
            DiskStorageData& slice = sliceData[it->sliceNum];
            slice.numEntries += it->ratioHere;
            slice.recBytes += it->sizeHere;
            slice.bsonBytes += static_cast<long long>(it->ratioHere * obj.objsize());
            if (hasCharacteristicField) {
                slice.characteristicCount += it->ratioHere;
                slice.characteristicSum += it->ratioHere * characteristicFieldValue;
            }
            if (isLocatedBeforePrevious) {
                slice.outOfOrderRecs += it->ratioHere;
            }
        }

        if (recordsArrayBuilder != NULL && spansRequestedArea) {
            DEV {
                int startsAt = dl.getOfs() - extentOfs;
                int endsAt = startsAt + recBytes;
                verify((startsAt < params.startOfs && endsAt > params.startOfs) ||
                       (startsAt < params.endOfs && endsAt >= params.endOfs) ||
                       (startsAt >= params.startOfs && endsAt < params.endOfs));
            }
            BSONObjBuilder recordBuilder(recordsArrayBuilder->subobjStart());
            recordBuilder.append("ofs", dl.getOfs() - extentOfs);
            recordBuilder.append("recBytes", recBytes);
            recordBuilder.append("bsonBytes", obj.objsize());
            recordBuilder.append("_id", obj["_id"]);
            if (hasCharacteristicField) {
                recordBuilder.append("characteristic", characteristicFieldValue);
            }
            recordBuilder.doneFast();
        }
Ejemplo n.º 8
0
    /* combine adjacent deleted records *for the current extent* of the capped collection

       this is O(n^2) but we call it for capped tables where typically n==1 or 2!
       (or 3...there will be a little unused sliver at the end of the extent.)
    */
    void NamespaceDetails::compact() {
        DDD( "NamespaceDetails::compact enter" );
        verify( isCapped() );
        
        vector<DiskLoc> drecs;
        
        // Pull out capExtent's DRs from deletedList
        DiskLoc i = cappedFirstDeletedInCurExtent();
        for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted() ) {
            DDD( "\t" << i );
            drecs.push_back( i );
        }

        getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = i;
        
        std::sort( drecs.begin(), drecs.end() );
        DDD( "\t drecs.size(): " << drecs.size() );

        vector<DiskLoc>::const_iterator j = drecs.begin();
        verify( j != drecs.end() );
        DiskLoc a = *j;
        while ( 1 ) {
            j++;
            if ( j == drecs.end() ) {
                DDD( "\t compact adddelrec" );
                addDeletedRec(a.drec(), a);
                break;
            }
            DiskLoc b = *j;
            while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders() == b.getOfs() ) {
                // a & b are adjacent.  merge.
                getDur().writingInt( a.drec()->lengthWithHeaders() ) += b.drec()->lengthWithHeaders();
                j++;
                if ( j == drecs.end() ) {
                    DDD( "\t compact adddelrec2" );
                    addDeletedRec(a.drec(), a);
                    return;
                }
                b = *j;
            }
            DDD( "\t compact adddelrec3" );
            addDeletedRec(a.drec(), a);
            a = b;
        }

    }
Ejemplo n.º 9
0
    DiskLoc ExtentManager::getNextRecordInExtent( const DiskLoc& loc ) const {
        int nextOffset = recordFor( loc )->nextOfs();

        if ( nextOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 16967, abs(nextOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), nextOffset );
    }
Ejemplo n.º 10
0
    DiskLoc RecordStoreV1Base::getNextRecordInExtent( const DiskLoc& loc ) const {
        int nextOffset = recordFor( loc )->nextOfs();

        if ( nextOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 17441, abs(nextOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), nextOffset );
    }
Ejemplo n.º 11
0
    DiskLoc ExtentManager::getPrevRecordInExtent( const DiskLoc& loc ) const {
        int prevOffset = recordFor( loc )->prevOfs();

        if ( prevOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 16968, abs(prevOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), prevOffset );
    }
Ejemplo n.º 12
0
    DiskLoc RecordStoreV1Base::getPrevRecordInExtent( OperationContext* txn, const DiskLoc& loc ) const {
        int prevOffset = recordFor( loc )->prevOfs();

        if ( prevOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 17442, abs(prevOffset) >= 8 ); // defensive
        DiskLoc result( loc.a(), prevOffset );
        return result;
    }
Ejemplo n.º 13
0
    DiskLoc RecordStoreV1Base::getPrevRecordInExtent( const DiskLoc& loc ) const {
        int prevOffset = recordFor( loc )->prevOfs();

        if ( prevOffset == DiskLoc::NullOfs )
            return DiskLoc();

        fassert( 17442, abs(prevOffset) >= 8 ); // defensive
        return DiskLoc( loc.a(), prevOffset );

    }
Ejemplo n.º 14
0
Archivo: cap.cpp Proyecto: ALFIO/mongo
    /* combine adjacent deleted records *for the current extent* of the capped collection
     
       this is O(n^2) but we call it for capped tables where typically n==1 or 2!
       (or 3...there will be a little unused sliver at the end of the extent.)
    */
    void NamespaceDetails::compact() {
        assert(capped);

        list<DiskLoc> drecs;

        // Pull out capExtent's DRs from deletedList
        DiskLoc i = cappedFirstDeletedInCurExtent();
        for (; !i.isNull() && inCapExtent( i ); i = i.drec()->nextDeleted )
            drecs.push_back( i );

        getDur().writingDiskLoc( cappedFirstDeletedInCurExtent() ) = i;

        // This is the O(n^2) part.
        drecs.sort();

        list<DiskLoc>::iterator j = drecs.begin();
        assert( j != drecs.end() );
        DiskLoc a = *j;
        while ( 1 ) {
            j++;
            if ( j == drecs.end() ) {
                DEBUGGING out() << "TEMP: compact adddelrec\n";
                addDeletedRec(a.drec(), a);
                break;
            }
            DiskLoc b = *j;
            while ( a.a() == b.a() && a.getOfs() + a.drec()->lengthWithHeaders == b.getOfs() ) {
                // a & b are adjacent.  merge.
                getDur().writingInt( a.drec()->lengthWithHeaders ) += b.drec()->lengthWithHeaders;
                j++;
                if ( j == drecs.end() ) {
                    DEBUGGING out() << "temp: compact adddelrec2\n";
                    addDeletedRec(a.drec(), a);
                    return;
                }
                b = *j;
            }
            DEBUGGING out() << "temp: compact adddelrec3\n";
            addDeletedRec(a.drec(), a);
            a = b;
        }
    }
Ejemplo n.º 15
0
    bool NamespaceDetails::inCapExtent( const DiskLoc &dl ) const {
        invariant( !dl.isNull() );

        if ( dl.a() != _capExtent.a() )
            return false;

        if ( dl.getOfs() < _capExtent.getOfs() )
            return false;

        const Extent* e = theCapExtent();
        int end = _capExtent.getOfs() + e->length;
        return dl.getOfs() <= end;
    }
Ejemplo n.º 16
0
 void DiskLoc56Bit::operator=(const DiskLoc& loc) {
     ofs = loc.getOfs();
     int la = loc.a();
     invariant( la <= 0xffffff ); // must fit in 3 bytes
     if( la < 0 ) {
         if ( la != -1 ) {
             log() << "btree diskloc isn't negative 1: " << la << std::endl;
             invariant ( la == -1 );
         }
         la = 0;
         ofs = OurNullOfs;
     }
     memcpy(_a, &la, 3); // endian
 }
Ejemplo n.º 17
0
        void validateNS(const char *ns, NamespaceDetails *d, const BSONObj& cmdObj, BSONObjBuilder& result) {
            const bool full = cmdObj["full"].trueValue();
            const bool scanData = full || cmdObj["scandata"].trueValue();

            bool valid = true;
            BSONArrayBuilder errors; // explanation(s) for why valid = false
            if ( d->isCapped() ){
                result.append("capped", d->isCapped());
                result.appendNumber("max", d->maxCappedDocs());
            }

            result.append("firstExtent", str::stream() << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.toString());
            result.append( "lastExtent", str::stream() <<  d->lastExtent.toString() << " ns:" <<  d->lastExtent.ext()->nsDiagnostic.toString());
            
            BSONArrayBuilder extentData;

            try {
                d->firstExtent.ext()->assertOk();
                d->lastExtent.ext()->assertOk();

                DiskLoc el = d->firstExtent;
                int ne = 0;
                while( !el.isNull() ) {
                    Extent *e = el.ext();
                    e->assertOk();
                    el = e->xnext;
                    ne++;
                    if ( full )
                        extentData << e->dump();
                    
                    killCurrentOp.checkForInterrupt();
                }
                result.append("extentCount", ne);
            }
            catch (...) {
                valid=false;
                errors << "extent asserted";
            }

            if ( full )
                result.appendArray( "extents" , extentData.arr() );

            
            result.appendNumber("datasize", d->stats.datasize);
            result.appendNumber("nrecords", d->stats.nrecords);
            result.appendNumber("lastExtentSize", d->lastExtentSize);
            result.appendNumber("padding", d->paddingFactor());
            

            try {

                try {
                    result.append("firstExtentDetails", d->firstExtent.ext()->dump());

                    valid = valid && d->firstExtent.ext()->validates() && 
                        d->firstExtent.ext()->xprev.isNull();
                }
                catch (...) {
                    errors << "exception firstextent";
                    valid = false;
                }

                set<DiskLoc> recs;
                if( scanData ) {
                    shared_ptr<Cursor> c = theDataFileMgr.findAll(ns);
                    int n = 0;
                    int nInvalid = 0;
                    long long len = 0;
                    long long nlen = 0;
                    int outOfOrder = 0;
                    DiskLoc cl_last;
                    while ( c->ok() ) {
                        n++;

                        DiskLoc cl = c->currLoc();
                        if ( n < 1000000 )
                            recs.insert(cl);
                        if ( d->isCapped() ) {
                            if ( cl < cl_last )
                                outOfOrder++;
                            cl_last = cl;
                        }

                        Record *r = c->_current();
                        len += r->lengthWithHeaders();
                        nlen += r->netLength();

                        if (full){
                            BSONObj obj = BSONObj::make(r);
                            if (!obj.isValid() || !obj.valid()){ // both fast and deep checks
                                valid = false;
                                if (nInvalid == 0) // only log once;
                                    errors << "invalid bson object detected (see logs for more info)";

                                nInvalid++;
                                if (strcmp("_id", obj.firstElementFieldName()) == 0){
                                    try {
                                        obj.firstElement().validate(); // throws on error
                                        log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl;
                                    }
                                    catch(...){
                                        log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl;
                                    }
                                }
                                else {
                                    log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl;
                                }
                            }
                        }

                        c->advance();
                    }
                    if ( d->isCapped() && !d->capLooped() ) {
                        result.append("cappedOutOfOrder", outOfOrder);
                        if ( outOfOrder > 1 ) {
                            valid = false;
                            errors << "too many out of order records";
                        }
                    }
                    result.append("objectsFound", n);

                    if (full) {
                        result.append("invalidObjects", nInvalid);
                    }

                    result.appendNumber("bytesWithHeaders", len);
                    result.appendNumber("bytesWithoutHeaders", nlen);
                }

                BSONArrayBuilder deletedListArray;
                for ( int i = 0; i < Buckets; i++ ) {
                    deletedListArray << d->deletedList[i].isNull();
                }

                int ndel = 0;
                long long delSize = 0;
                int incorrect = 0;
                for ( int i = 0; i < Buckets; i++ ) {
                    DiskLoc loc = d->deletedList[i];
                    try {
                        int k = 0;
                        while ( !loc.isNull() ) {
                            if ( recs.count(loc) )
                                incorrect++;
                            ndel++;

                            if ( loc.questionable() ) {
                                if( d->isCapped() && !loc.isValid() && i == 1 ) {
                                    /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                       see comments in namespace.h
                                    */
                                    break;
                                }

                                if ( loc.a() <= 0 || strstr(ns, "hudsonSmall") == 0 ) {
                                    string err (str::stream() << "bad deleted loc: " << loc.toString() << " bucket:" << i << " k:" << k);
                                    errors << err;

                                    valid = false;
                                    break;
                                }
                            }

                            DeletedRecord *d = loc.drec();
                            delSize += d->lengthWithHeaders();
                            loc = d->nextDeleted();
                            k++;
                            killCurrentOp.checkForInterrupt();
                        }
                    }
                    catch (...) {
                        errors << ("exception in deleted chain for bucket " + BSONObjBuilder::numStr(i));
                        valid = false;
                    }
                }
                result.appendNumber("deletedCount", ndel);
                result.appendNumber("deletedSize", delSize);

                if ( incorrect ) {
                    errors << (BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list");
                    valid = false;
                }

                int idxn = 0;
                try  {
                    result.append("nIndexes", d->nIndexes);
                    BSONObjBuilder indexes; // not using subObjStart to be exception safe
                    NamespaceDetails::IndexIterator i = d->ii();
                    while( i.more() ) {
                        IndexDetails& id = i.next();
                        log() << "validating index " << idxn << ": " << id.indexNamespace() << endl;
                        long long keys = id.idxInterface().fullValidate(id.head, id.keyPattern());
                        indexes.appendNumber(id.indexNamespace(), keys);
                        idxn++;
                    }
                    result.append("keysPerIndex", indexes.done());
                }
                catch (...) {
                    errors << ("exception during index validate idxn " + BSONObjBuilder::numStr(idxn));
                    valid=false;
                }

            }
            catch (AssertionException) {
                errors << "exception during validate";
                valid = false;
            }

            result.appendBool("valid", valid);
            result.append("errors", errors.arr());

            if ( !full ){
                result.append("warning", "Some checks omitted for speed. use {full:true} option to do more thorough scan.");
            }
            
            if ( !valid ) {
                result.append("advice", "ns corrupt, requires repair");
            }

        }
 DiskLoc DummyExtentManager::extentLocForV1( const DiskLoc& loc ) const {
     return DiskLoc( loc.a(), 0 );
 }
Ejemplo n.º 19
0
 DiskLoc MmapV1ExtentManager::extentLocForV1( const DiskLoc& loc ) const {
     Record* record = recordForV1( loc );
     return DiskLoc( loc.a(), record->extentOfs() );
 }
Ejemplo n.º 20
0
    void RecordStoreV1Base::deleteRecord( OperationContext* txn, const DiskLoc& dl ) {

        Record* todelete = recordFor( dl );
        invariant( todelete->netLength() >= 4 ); // this is required for defensive code

        /* remove ourself from the record next/prev chain */
        {
            if ( todelete->prevOfs() != DiskLoc::NullOfs ) {
                DiskLoc prev = getPrevRecordInExtent( txn, dl );
                Record* prevRecord = recordFor( prev );
                txn->recoveryUnit()->writingInt( prevRecord->nextOfs() ) = todelete->nextOfs();
            }

            if ( todelete->nextOfs() != DiskLoc::NullOfs ) {
                DiskLoc next = getNextRecord( txn, dl );
                Record* nextRecord = recordFor( next );
                txn->recoveryUnit()->writingInt( nextRecord->prevOfs() ) = todelete->prevOfs();
            }
        }

        /* remove ourself from extent pointers */
        {
            DiskLoc extentLoc = todelete->myExtentLoc(dl);
            Extent *e =  _getExtent( txn, extentLoc );
            if ( e->firstRecord == dl ) {
                txn->recoveryUnit()->writing(&e->firstRecord);
                if ( todelete->nextOfs() == DiskLoc::NullOfs )
                    e->firstRecord.Null();
                else
                    e->firstRecord.set(dl.a(), todelete->nextOfs() );
            }
            if ( e->lastRecord == dl ) {
                txn->recoveryUnit()->writing(&e->lastRecord);
                if ( todelete->prevOfs() == DiskLoc::NullOfs )
                    e->lastRecord.Null();
                else
                    e->lastRecord.set(dl.a(), todelete->prevOfs() );
            }
        }

        /* add to the free list */
        {
            _details->incrementStats( txn, -1 * todelete->netLength(), -1 );

            if ( _isSystemIndexes ) {
                /* temp: if in system.indexes, don't reuse, and zero out: we want to be
                   careful until validated more, as IndexDetails has pointers
                   to this disk location.  so an incorrectly done remove would cause
                   a lot of problems.
                */
                memset( txn->recoveryUnit()->writingPtr(todelete, todelete->lengthWithHeaders() ),
                        0, todelete->lengthWithHeaders() );
            }
            else {
                // this is defensive so we can detect if we are still using a location
                // that was deleted
                memset(txn->recoveryUnit()->writingPtr(todelete->data(), 4), 0xee, 4);
                addDeletedRec(txn, dl);
            }
        }

    }
Ejemplo n.º 21
0
DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents(OperationContext* txn, int lenToAllocRaw) {
    // Slowly drain the deletedListLegacyGrabBag by popping one record off and putting it in the
    // correct deleted list each time we try to allocate a new record. This ensures we won't
    // orphan any data when upgrading from old versions, without needing a long upgrade phase.
    // This is done before we try to allocate the new record so we can take advantage of the new
    // space immediately.
    {
        const DiskLoc head = _details->deletedListLegacyGrabBag();
        if (!head.isNull()) {
            _details->setDeletedListLegacyGrabBag(txn, drec(head)->nextDeleted());
            addDeletedRec(txn, head);
        }
    }

    // align size up to a multiple of 4
    const int lenToAlloc = (lenToAllocRaw + (4 - 1)) & ~(4 - 1);

    freelistAllocs.increment();
    DiskLoc loc;
    DeletedRecord* dr = NULL;
    {
        int myBucket;
        for (myBucket = bucket(lenToAlloc); myBucket < Buckets; myBucket++) {
            // Only look at the first entry in each bucket. This works because we are either
            // quantizing or allocating fixed-size blocks.
            const DiskLoc head = _details->deletedListEntry(myBucket);
            if (head.isNull())
                continue;
            DeletedRecord* const candidate = drec(head);
            if (candidate->lengthWithHeaders() >= lenToAlloc) {
                loc = head;
                dr = candidate;
                break;
            }
        }

        if (!dr)
            return DiskLoc();  // no space

        // Unlink ourself from the deleted list
        _details->setDeletedListEntry(txn, myBucket, dr->nextDeleted());
        *txn->recoveryUnit()->writing(&dr->nextDeleted()) = DiskLoc().setInvalid();  // defensive
    }

    invariant(dr->extentOfs() < loc.getOfs());

    // Split the deleted record if it has at least as much left over space as our smallest
    // allocation size. Otherwise, just take the whole DeletedRecord.
    const int remainingLength = dr->lengthWithHeaders() - lenToAlloc;
    if (remainingLength >= bucketSizes[0]) {
        txn->recoveryUnit()->writingInt(dr->lengthWithHeaders()) = lenToAlloc;
        const DiskLoc newDelLoc = DiskLoc(loc.a(), loc.getOfs() + lenToAlloc);
        DeletedRecord* newDel = txn->recoveryUnit()->writing(drec(newDelLoc));
        newDel->extentOfs() = dr->extentOfs();
        newDel->lengthWithHeaders() = remainingLength;
        newDel->nextDeleted().Null();

        addDeletedRec(txn, newDelLoc);
    }

    return loc;
}
Ejemplo n.º 22
0
    DiskLoc SimpleRecordStoreV1::_allocFromExistingExtents( OperationContext* txn,
                                                            int lenToAlloc ) {
        // align size up to a multiple of 4
        lenToAlloc = (lenToAlloc + (4-1)) & ~(4-1);

        freelistAllocs.increment();
        DiskLoc loc;
        {
            DiskLoc *prev = 0;
            DiskLoc *bestprev = 0;
            DiskLoc bestmatch;
            int bestmatchlen = INT_MAX; // sentinel meaning we haven't found a record big enough
            int b = bucket(lenToAlloc);
            DiskLoc cur = _details->deletedListEntry(b);
            
            int extra = 5; // look for a better fit, a little.
            int chain = 0;
            while ( 1 ) {
                { // defensive check
                    int fileNumber = cur.a();
                    int fileOffset = cur.getOfs();
                    if (fileNumber < -1 || fileNumber >= 100000 || fileOffset < 0) {
                        StringBuilder sb;
                        sb << "Deleted record list corrupted in collection " << _ns
                           << ", bucket " << b
                           << ", link number " << chain
                           << ", invalid link is " << cur.toString()
                           << ", throwing Fatal Assertion";
                        log() << sb.str() << endl;
                        fassertFailed(16469);
                    }
                }
                if ( cur.isNull() ) {
                    // move to next bucket.  if we were doing "extra", just break
                    if ( bestmatchlen < INT_MAX )
                        break;

                    if ( chain > 0 ) {
                        // if we looked at things in the right bucket, but they were not suitable
                        freelistBucketExhausted.increment();
                    }

                    b++;
                    if ( b > MaxBucket ) {
                        // out of space. alloc a new extent.
                        freelistIterations.increment( 1 + chain );
                        return DiskLoc();
                    }
                    cur = _details->deletedListEntry(b);
                    prev = 0;
                    continue;
                }
                DeletedRecord *r = drec(cur);
                if ( r->lengthWithHeaders() >= lenToAlloc &&
                     r->lengthWithHeaders() < bestmatchlen ) {
                    bestmatchlen = r->lengthWithHeaders();
                    bestmatch = cur;
                    bestprev = prev;
                    if (r->lengthWithHeaders() == lenToAlloc)
                        // exact match, stop searching
                        break;
                }
                if ( bestmatchlen < INT_MAX && --extra <= 0 )
                    break;
                if ( ++chain > 30 && b <= MaxBucket ) {
                    // too slow, force move to next bucket to grab a big chunk
                    //b++;
                    freelistIterations.increment( chain );
                    chain = 0;
                    cur.Null();
                }
                else {
                    cur = r->nextDeleted();
                    prev = &r->nextDeleted();
                }
            }

            // unlink ourself from the deleted list
            DeletedRecord *bmr = drec(bestmatch);
            if ( bestprev ) {
                *txn->recoveryUnit()->writing(bestprev) = bmr->nextDeleted();
            }
            else {
                // should be the front of a free-list
                int myBucket = bucket(bmr->lengthWithHeaders());
                invariant( _details->deletedListEntry(myBucket) == bestmatch );
                _details->setDeletedListEntry(txn, myBucket, bmr->nextDeleted());
            }
            *txn->recoveryUnit()->writing(&bmr->nextDeleted()) = DiskLoc().setInvalid(); // defensive.
            invariant(bmr->extentOfs() < bestmatch.getOfs());

            freelistIterations.increment( 1 + chain );
            loc = bestmatch;
        }

        if ( loc.isNull() )
            return loc;

        // determine if we should chop up

        DeletedRecord *r = drec(loc);

        /* note we want to grab from the front so our next pointers on disk tend
        to go in a forward direction which is important for performance. */
        int regionlen = r->lengthWithHeaders();
        invariant( r->extentOfs() < loc.getOfs() );

        int left = regionlen - lenToAlloc;
        if ( left < 24 || left < (lenToAlloc / 8) ) {
            // you get the whole thing.
            return loc;
        }

        // don't quantize:
        //   - $ collections (indexes) as we already have those aligned the way we want SERVER-8425
        if ( _normalCollection ) {
            // we quantize here so that it only impacts newly sized records
            // this prevents oddities with older records and space re-use SERVER-8435
            lenToAlloc = std::min( r->lengthWithHeaders(),
                                   quantizeAllocationSpace( lenToAlloc ) );
            left = regionlen - lenToAlloc;

            if ( left < 24 ) {
                // you get the whole thing.
                return loc;
            }
        }

        /* split off some for further use. */
        txn->recoveryUnit()->writingInt(r->lengthWithHeaders()) = lenToAlloc;
        DiskLoc newDelLoc = loc;
        newDelLoc.inc(lenToAlloc);
        DeletedRecord* newDel = drec(newDelLoc);
        DeletedRecord* newDelW = txn->recoveryUnit()->writing(newDel);
        newDelW->extentOfs() = r->extentOfs();
        newDelW->lengthWithHeaders() = left;
        newDelW->nextDeleted().Null();

        addDeletedRec( txn, newDelLoc );
        return loc;
    }
Ejemplo n.º 23
0
        string validateNS(const char *ns, NamespaceDetails *d, BSONObj *cmdObj) {
            bool scanData = true;
            if( cmdObj && cmdObj->hasElement("scandata") && !cmdObj->getBoolField("scandata") )
                scanData = false;
            bool valid = true;
            stringstream ss;
            ss << "\nvalidate\n";
            //ss << "  details: " << hex << d << " ofs:" << nsindex(ns)->detailsOffset(d) << dec << endl;
            if ( d->capped )
                ss << "  capped:" << d->capped << " max:" << d->max << '\n';

            ss << "  firstExtent:" << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.toString()<< '\n';
            ss << "  lastExtent:" << d->lastExtent.toString()    << " ns:" << d->lastExtent.ext()->nsDiagnostic.toString() << '\n';
            try {
                d->firstExtent.ext()->assertOk();
                d->lastExtent.ext()->assertOk();

                DiskLoc el = d->firstExtent;
                int ne = 0;
                while( !el.isNull() ) {
                    Extent *e = el.ext();
                    e->assertOk();
                    el = e->xnext;
                    ne++;
                    killCurrentOp.checkForInterrupt();
                }
                ss << "  # extents:" << ne << '\n';
            }
            catch (...) {
                valid=false;
                ss << " extent asserted ";
            }

            ss << "  datasize?:" << d->stats.datasize << " nrecords?:" << d->stats.nrecords << " lastExtentSize:" << d->lastExtentSize << '\n';
            ss << "  padding:" << d->paddingFactor << '\n';
            try {

                try {
                    ss << "  first extent:\n";
                    d->firstExtent.ext()->dump(ss);
                    valid = valid && d->firstExtent.ext()->validates();
                }
                catch (...) {
                    ss << "\n    exception firstextent\n" << endl;
                }

                set<DiskLoc> recs;
                if( scanData ) {
                    shared_ptr<Cursor> c = theDataFileMgr.findAll(ns);
                    int n = 0;
                    long long len = 0;
                    long long nlen = 0;
                    int outOfOrder = 0;
                    DiskLoc cl_last;
                    while ( c->ok() ) {
                        n++;

                        DiskLoc cl = c->currLoc();
                        if ( n < 1000000 )
                            recs.insert(cl);
                        if ( d->capped ) {
                            if ( cl < cl_last )
                                outOfOrder++;
                            cl_last = cl;
                        }

                        Record *r = c->_current();
                        len += r->lengthWithHeaders;
                        nlen += r->netLength();
                        c->advance();
                    }
                    if ( d->capped && !d->capLooped() ) {
                        ss << "  capped outOfOrder:" << outOfOrder;
                        if ( outOfOrder > 1 ) {
                            valid = false;
                            ss << " ???";
                        }
                        else ss << " (OK)";
                        ss << '\n';
                    }
                    ss << "  " << n << " objects found, nobj:" << d->stats.nrecords << '\n';
                    ss << "  " << len << " bytes data w/headers\n";
                    ss << "  " << nlen << " bytes data wout/headers\n";
                }

                ss << "  deletedList: ";
                for ( int i = 0; i < Buckets; i++ ) {
                    ss << (d->deletedList[i].isNull() ? '0' : '1');
                }
                ss << endl;
                int ndel = 0;
                long long delSize = 0;
                int incorrect = 0;
                for ( int i = 0; i < Buckets; i++ ) {
                    DiskLoc loc = d->deletedList[i];
                    try {
                        int k = 0;
                        while ( !loc.isNull() ) {
                            if ( recs.count(loc) )
                                incorrect++;
                            ndel++;

                            if ( loc.questionable() ) {
                                if( d->capped && !loc.isValid() && i == 1 ) {
                                    /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                       see comments in namespace.h
                                    */
                                    break;
                                }

                                if ( loc.a() <= 0 || strstr(ns, "hudsonSmall") == 0 ) {
                                    ss << "    ?bad deleted loc: " << loc.toString() << " bucket:" << i << " k:" << k << endl;
                                    valid = false;
                                    break;
                                }
                            }

                            DeletedRecord *d = loc.drec();
                            delSize += d->lengthWithHeaders;
                            loc = d->nextDeleted;
                            k++;
                            killCurrentOp.checkForInterrupt();
                        }
                    }
                    catch (...) {
                        ss <<"    ?exception in deleted chain for bucket " << i << endl;
                        valid = false;
                    }
                }
                ss << "  deleted: n: " << ndel << " size: " << delSize << endl;
                if ( incorrect ) {
                    ss << "    ?corrupt: " << incorrect << " records from datafile are in deleted list\n";
                    valid = false;
                }

                int idxn = 0;
                try  {
                    ss << "  nIndexes:" << d->nIndexes << endl;
                    NamespaceDetails::IndexIterator i = d->ii();
                    while( i.more() ) {
                        IndexDetails& id = i.next();
                        ss << "    " << id.indexNamespace() << " keys:" <<
                           id.head.btree()->fullValidate(id.head, id.keyPattern()) << endl;
                    }
                }
                catch (...) {
                    ss << "\n    exception during index validate idxn:" << idxn << endl;
                    valid=false;
                }

            }
            catch (AssertionException) {
                ss << "\n    exception during validate\n" << endl;
                valid = false;
            }

            if ( !valid )
                ss << " ns corrupt, requires dbchk\n";

            return ss.str();
        }
    void initializeV1RS(OperationContext* txn,
                        const LocAndSize* records,
                        const LocAndSize* drecs,
                        DummyExtentManager* em,
                        DummyRecordStoreV1MetaData* md) {
        invariant(records || drecs); // if both are NULL nothing is being created...
        
        // Need to start with a blank slate
        invariant(em->numFiles() == 0);
        invariant(md->firstExtent().isNull());

        // pre-allocate extents (even extents that aren't part of this RS)
        {
            typedef std::map<int, size_t> ExtentSizes;
            ExtentSizes extentSizes;
            accumulateExtentSizeRequirements(records, &extentSizes);
            accumulateExtentSizeRequirements(drecs, &extentSizes);
            invariant(!extentSizes.empty());

            const int maxExtent = extentSizes.rbegin()->first;
            for (int i = 0; i <= maxExtent; i++) {
                const size_t size = extentSizes.count(i) ? extentSizes[i] : 0;
                const DiskLoc loc = em->allocateExtent(txn, md->isCapped(), size, 0);

                // This function and assertState depend on these details of DummyExtentManager
                invariant(loc.a() == i);
                invariant(loc.getOfs() == 0);
            }

            // link together extents that should be part of this RS
            md->setFirstExtent(txn, DiskLoc(extentSizes.begin()->first, 0));
            md->setLastExtent(txn, DiskLoc(extentSizes.rbegin()->first, 0));
            for (ExtentSizes::iterator it = extentSizes.begin();
                    boost::next(it) != extentSizes.end(); /* ++it */ ) {
                const int a = it->first;
                ++it;
                const int b = it->first;
                em->getExtent(DiskLoc(a, 0))->xnext = DiskLoc(b, 0);
                em->getExtent(DiskLoc(b, 0))->xprev = DiskLoc(a, 0);
            }

            // This signals "done allocating new extents".
            if (md->isCapped())
                md->setDeletedListEntry(txn, 1, DiskLoc());
        }

        if (records && !records[0].loc.isNull()) {
            int recIdx = 0;
            DiskLoc extLoc = md->firstExtent();
            while (!extLoc.isNull()) {
                Extent* ext = em->getExtent(extLoc);
                int prevOfs = DiskLoc::NullOfs;
                while (extLoc.a() == records[recIdx].loc.a()) { // for all records in this extent
                    const DiskLoc loc = records[recIdx].loc;
                    const int size = records[recIdx].size;;
                    invariant(size >= Record::HeaderSize);

                    md->incrementStats(txn, size - Record::HeaderSize, 1);

                    if (ext->firstRecord.isNull())
                        ext->firstRecord = loc;

                    Record* rec = em->recordForV1(loc);
                    rec->lengthWithHeaders() = size;
                    rec->extentOfs() = 0;

                    rec->prevOfs() = prevOfs;
                    prevOfs = loc.getOfs();

                    const DiskLoc nextLoc = records[recIdx + 1].loc;
                    if (nextLoc.a() == loc.a()) { // if next is in same extent
                        rec->nextOfs() = nextLoc.getOfs();
                    }
                    else {
                        rec->nextOfs() = DiskLoc::NullOfs;
                        ext->lastRecord = loc;
                    }

                    recIdx++;
                }
                extLoc = ext->xnext;
            }
            invariant(records[recIdx].loc.isNull());
        }

        if (drecs && !drecs[0].loc.isNull()) {
            int drecIdx = 0;
            DiskLoc* prevNextPtr = NULL;
            int lastBucket = -1;
            while (!drecs[drecIdx].loc.isNull()) {
                const DiskLoc loc = drecs[drecIdx].loc;
                const int size = drecs[drecIdx].size;
                invariant(size >= Record::HeaderSize);
                const int bucket = RecordStoreV1Base::bucket(size);

                if (md->isCapped()) {
                    // All drecs form a single list in bucket 0
                    if (prevNextPtr == NULL) {
                        md->setDeletedListEntry(txn, 0, loc);
                    }
                    else {
                        *prevNextPtr = loc;
                    }

                    if (loc.a() < md->capExtent().a()
                            && drecs[drecIdx + 1].loc.a() == md->capExtent().a()) {
                        // Bucket 1 is known as cappedLastDelRecLastExtent
                        md->setDeletedListEntry(txn, 1, loc);
                    }
                } 
                else if (bucket != lastBucket) {
                    invariant(bucket > lastBucket); // if this fails, drecs weren't sorted by bucket
                    md->setDeletedListEntry(txn, bucket, loc);
                    lastBucket = bucket;
                }
                else {
                    *prevNextPtr = loc;
                }

                DeletedRecord* drec = &em->recordForV1(loc)->asDeleted();
                drec->lengthWithHeaders() = size;
                drec->extentOfs() = 0;
                drec->nextDeleted() = DiskLoc();
                prevNextPtr = &drec->nextDeleted();

                drecIdx++;
            }
        }

        // Make sure we set everything up as requested.
        assertStateV1RS(records, drecs, em, md);
    }
Ejemplo n.º 25
0
    DiskLoc _repairExtent( Database* db , string ns, bool forward , DiskLoc eLoc , Writer& w ){
        LogIndentLevel lil;
        
        if ( eLoc.getOfs() <= 0 ){
            error() << "invalid extent ofs: " << eLoc.getOfs() << endl;
            return DiskLoc();
        }
        

        MongoDataFile * mdf = db->getFile( eLoc.a() );

        Extent * e = mdf->debug_getExtent( eLoc );
        if ( ! e->isOk() ){
            warning() << "Extent not ok magic: " << e->magic << " going to try to continue" << endl;
        }
        
        log() << "length:" << e->length << endl;
        
        LogIndentLevel lil2;
        
        set<DiskLoc> seen;

        DiskLoc loc = forward ? e->firstRecord : e->lastRecord;
        while ( ! loc.isNull() ){
            
            if ( ! seen.insert( loc ).second ) {
                error() << "infinite loop in extend, seen: " << loc << " before" << endl;
                break;
            }

            if ( loc.getOfs() <= 0 ){
                error() << "offset is 0 for record which should be impossible" << endl;
                break;
            }
            log(1) << loc << endl;
            Record* rec = loc.rec();
            BSONObj obj;
            try {
                obj = loc.obj();
                assert( obj.valid() );
                LOG(1) << obj << endl;
                w( obj );
            }
            catch ( std::exception& e ) {
                log() << "found invalid document @ " << loc << " " << e.what() << endl;
                if ( ! obj.isEmpty() ) {
                    try {
                        BSONElement e = obj.firstElement();
                        stringstream ss;
                        ss << "first element: " << e;
                        log() << ss.str();
                    }
                    catch ( std::exception& ) {
                    }
                }
            }
            loc = forward ? rec->getNext( loc ) : rec->getPrev( loc );
        }
        return forward ? e->xnext : e->xprev;
        
    }
    void assertStateV1RS(const LocAndSize* records,
                         const LocAndSize* drecs,
                         const ExtentManager* em,
                         const DummyRecordStoreV1MetaData* md) {
        invariant(records || drecs); // if both are NULL nothing is being asserted...

        try {
            if (records) {
                long long dataSize = 0;
                long long numRecs = 0;

                int recIdx = 0;

                DiskLoc extLoc = md->firstExtent();
                while (!extLoc.isNull()) { // for each Extent
                    Extent* ext = em->getExtent(extLoc, true);
                    int expectedPrevOfs = DiskLoc::NullOfs;
                    DiskLoc actualLoc = ext->firstRecord;
                    while (!actualLoc.isNull()) { // for each Record in this Extent
                        const Record* actualRec = em->recordForV1(actualLoc);
                        const int actualSize = actualRec->lengthWithHeaders();

                        dataSize += actualSize - Record::HeaderSize;
                        numRecs += 1;

                        ASSERT_EQUALS(actualLoc, records[recIdx].loc);
                        ASSERT_EQUALS(actualSize, records[recIdx].size);

                        ASSERT_EQUALS(actualRec->extentOfs(), extLoc.getOfs());
                        ASSERT_EQUALS(actualRec->prevOfs(), expectedPrevOfs);
                        expectedPrevOfs = actualLoc.getOfs();

                        recIdx++;
                        const int nextOfs = actualRec->nextOfs();
                        actualLoc = (nextOfs == DiskLoc::NullOfs ? DiskLoc()
                                                                 : DiskLoc(actualLoc.a(), nextOfs));
                    }

                    if (ext->xnext.isNull()) {
                        ASSERT_EQUALS(md->lastExtent(), extLoc);
                    }

                    extLoc = ext->xnext;
                }

                // both the expected and actual record lists must be done at this point
                ASSERT_EQUALS(records[recIdx].loc, DiskLoc());

                ASSERT_EQUALS(dataSize, md->dataSize());
                ASSERT_EQUALS(numRecs, md->numRecords());
            }

            if (drecs) {
                int drecIdx = 0;
                for (int bucketIdx = 0; bucketIdx < RecordStoreV1Base::Buckets; bucketIdx++) {
                    DiskLoc actualLoc = md->deletedListEntry(bucketIdx);

                    if (md->isCapped() && bucketIdx == 1) {
                        // In capped collections, the 2nd bucket (index 1) points to the drec before
                        // the first drec in the capExtent. If the capExtent is the first Extent,
                        // it should be Null.

                        if (md->capExtent() == md->firstExtent()) {
                            ASSERT_EQUALS(actualLoc, DiskLoc());
                        }
                        else {
                            ASSERT_NOT_EQUALS(actualLoc.a(), md->capExtent().a());
                            const DeletedRecord* actualDrec =
                                &em->recordForV1(actualLoc)->asDeleted();
                            ASSERT_EQUALS(actualDrec->nextDeleted().a(), md->capExtent().a());
                        }

                        // Don't do normal checking of bucket 1 in capped collections. Checking
                        // other buckets to verify that they are Null.
                        continue;
                    }

                    while (!actualLoc.isNull()) {
                        const DeletedRecord* actualDrec = &em->recordForV1(actualLoc)->asDeleted();
                        const int actualSize = actualDrec->lengthWithHeaders();

                        ASSERT_EQUALS(actualLoc, drecs[drecIdx].loc);
                        ASSERT_EQUALS(actualSize, drecs[drecIdx].size);

                        // Make sure the drec is correct
                        ASSERT_EQUALS(actualDrec->extentOfs(), 0);

                        // in capped collections all drecs are linked into a single list in bucket 0
                        ASSERT_EQUALS(bucketIdx, md->isCapped()
                                                   ? 0
                                                   : RecordStoreV1Base::bucket(actualSize));

                        drecIdx++;
                        actualLoc = actualDrec->nextDeleted();
                    }
                }
                // both the expected and actual deleted lists must be done at this point
                ASSERT_EQUALS(drecs[drecIdx].loc, DiskLoc());
            }
        }
        catch (...) {
            // If a test fails, provide extra info to make debugging easier
            printRecList(em, md);
            printDRecList(em, md);
            throw;
        }
    }
Ejemplo n.º 27
0
    /** @return number of skipped (invalid) documents */
    unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n,
                const scoped_array<IndexSpec> &indexSpecs,
                scoped_array<SortPhaseOne>& phase1, int nidx, bool validate, 
                double pf, int pb)
    {
        log() << "compact begin extent #" << n << " for namespace " << ns << endl;
        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = diskloc.ext();
        e->assertOk();
        verify( e->validates() );
        unsigned skipped = 0;

        {
            // the next/prev pointers within the extent might not be in order so we first page the whole thing in 
            // sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            MongoDataFile* mdf = cc().database()->getFile( diskloc.a() );
            HANDLE fd = mdf->getFd();
            int offset = diskloc.getOfs();
            Extent* ext = diskloc.ext();
            size_t length = ext->length;
                
            touch_pages(fd, offset, length, ext);
            int ms = t.millis();
            if( ms > 1000 ) 
                log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = recOld->nextInExtent(L);
                    BSONObj objOld = BSONObj::make(recOld);

                    if( !validate || objOld.valid() ) {
                        nrecords++;
                        unsigned sz = objOld.objsize();

                        oldObjSize += sz;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = sz + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;
                        {
                            lenWPadding = static_cast<unsigned>(pf*lenWPadding);
                            lenWPadding += pb;
                            lenWPadding = lenWPadding & quantizeMask(lenWPadding);
                            if( lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { 
                                lenWPadding = lenWHdr;
                            }
                        }
                        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false);
                        uassert(14024, "compact error out of space during compaction", !loc.isNull());
                        Record *recNew = loc.rec();
                        datasize += recNew->netLength();
                        recNew = (Record *) getDur().writingPtr(recNew, lenWHdr);
                        addRecordToRecListInExtent(recNew, loc);
                        memcpy(recNew->data(), objOld.objdata(), sz);

                        {
                            // extract keys for all indexes we will be rebuilding
                            for( int x = 0; x < nidx; x++ ) { 
                                phase1[x].addKeys(indexSpecs[x], objOld, loc);
                            }
                        }
                    }
                    else { 
                        if( ++skipped <= 10 )
                            log() << "compact skipping invalid object" << endl;
                    }

                    if( L.isNull() ) { 
                        // we just did the very last record from the old extent.  it's still pointed to 
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt(false);
                    }
                }
            } // if !L.isNull()

            verify( d->firstExtent == diskloc );
            verify( d->lastExtent != diskloc );
            DiskLoc newFirst = e->xnext;
            d->firstExtent.writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            freeExtents( diskloc, diskloc );
            // update datasize/record count for this namespace's extent
            {
                NamespaceDetails::Stats *s = getDur().writing(&d->stats);
                s->datasize += datasize;
                s->nrecords += nrecords;
            }

            getDur().commitIfNeeded();

            { 
                double op = 1.0;
                if( oldObjSize ) 
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)"
                    << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100
                    << endl;                    
            }
        }

        return skipped;
    }