Esempio n. 1
0
    void touchNs( const std::string& ns ) { 
        std::vector< touch_location > ranges;
        Client::ReadContext ctx(ns);
        {

            NamespaceDetails *nsd = nsdetails(ns.c_str());
            uassert( 16154, "namespace does not exist", nsd );
            
            for( DiskLoc L = nsd->firstExtent; !L.isNull(); L = L.ext()->xnext )  {
                MongoDataFile* mdf = cc().database()->getFile( L.a() );
                massert( 16238, "can't fetch extent file structure", mdf );
                touch_location tl;
                tl.fd = mdf->getFd();
                tl.offset = L.getOfs();
                tl.ext = L.ext();
                tl.length = tl.ext->length;
                
                ranges.push_back(tl);                
            }

        }
        LockMongoFilesShared lk;
        Lock::TempRelease tr;
        std::string progress_msg = "touch " + ns + " extents";
        ProgressMeterHolder pm( cc().curop()->setMessage( progress_msg.c_str() , ranges.size() ) );
        for ( std::vector< touch_location >::iterator it = ranges.begin(); it != ranges.end(); ++it ) {
            touch_pages( it->fd, it->offset, it->length, it->ext );
            pm.hit();
            killCurrentOp.checkForInterrupt(false);
        }
        pm.finished();
    }
Esempio n. 2
0
 void NamespaceDetails::dumpExtents() {
     cout << "dumpExtents:" << endl;
     for ( DiskLoc i = _firstExtent; !i.isNull(); i = i.ext()->xnext ) {
         Extent *e = i.ext();
         stringstream ss;
         e->dump(ss);
         cout << ss.str() << endl;
     }
 }
Esempio n. 3
0
 int nRecords() const {
     int count = 0;
     for ( DiskLoc i = nsd()->firstExtent; !i.isNull(); i = i.ext()->xnext ) {
         int fileNo = i.ext()->firstRecord.a();
         if ( fileNo == -1 )
             continue;
         for ( int j = i.ext()->firstRecord.getOfs(); j != DiskLoc::NullOfs;
               j = DiskLoc( fileNo, j ).rec()->nextOfs ) {
             ++count;
         }
     }
     ASSERT_EQUALS( count, nsd()->nrecords );
     return count;
 }
Esempio n. 4
0
 // bypass standard alloc/insert routines to use the extent we want.
 static DiskLoc insert( DiskLoc ext, int i ) {
     BSONObjBuilder b;
     b.append( "a", i );
     BSONObj o = b.done();
     int len = o.objsize();
     Extent *e = ext.ext();
     int ofs;
     if ( e->lastRecord.isNull() )
         ofs = ext.getOfs() + ( e->extentData - (char *)e );
     else
         ofs = e->lastRecord.getOfs() + e->lastRecord.rec()->lengthWithHeaders;
     DiskLoc dl( ext.a(), ofs );
     Record *r = dl.rec();
     r->lengthWithHeaders = Record::HeaderSize + len;
     r->extentOfs = e->myLoc.getOfs();
     r->nextOfs = DiskLoc::NullOfs;
     r->prevOfs = e->lastRecord.isNull() ? DiskLoc::NullOfs : e->lastRecord.getOfs();
     memcpy( r->data, o.objdata(), len );
     if ( e->firstRecord.isNull() )
         e->firstRecord = dl;
     else
         e->lastRecord.rec()->nextOfs = ofs;
     e->lastRecord = dl;
     return dl;
 }
Esempio n. 5
0
    void NamespaceDetails::emptyCappedCollection( const char *ns ) {
        DEV verify( this == nsdetails(ns) );
        massert( 13424, "collection must be capped", capped );
        massert( 13425, "background index build in progress", !indexBuildInProgress );
        massert( 13426, "indexes present", nIndexes == 0 );

        // Clear all references to this namespace.
        ClientCursor::invalidate( ns );
        NamespaceDetailsTransient::clearForPrefix( ns );

        // Get a writeable reference to 'this' and reset all pertinent
        // attributes.
        NamespaceDetails *t = writingWithoutExtra();

        t->cappedLastDelRecLastExtent() = DiskLoc();
        t->cappedListOfAllDeletedRecords() = DiskLoc();

        // preserve firstExtent/lastExtent
        t->capExtent = firstExtent;
        t->stats.datasize = stats.nrecords = 0;
        // lastExtentSize preserve
        // nIndexes preserve 0
        // capped preserve true
        // max preserve
        t->paddingFactor = 1.0;
        t->flags = 0;
        t->capFirstNewRecord = DiskLoc();
        t->capFirstNewRecord.setInvalid();
        t->cappedLastDelRecLastExtent().setInvalid();
        // dataFileVersion preserve
        // indexFileVersion preserve
        t->multiKeyIndexBits = 0;
        t->reservedA = 0;
        t->extraOffset = 0;
        // indexBuildInProgress preserve 0
        memset(t->reserved, 0, sizeof(t->reserved));

        // Reset all existing extents and recreate the deleted list.
        for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) {
            DiskLoc prev = ext.ext()->xprev;
            DiskLoc next = ext.ext()->xnext;
            DiskLoc empty = ext.ext()->reuse( ns, true );
            ext.ext()->xprev.writing() = prev;
            ext.ext()->xnext.writing() = next;
            addDeletedRec( empty.drec(), empty );
        }
    }
Esempio n. 6
0
File: cap.cpp Progetto: ALFIO/mongo
    void NamespaceDetails::emptyCappedCollection( const char *ns ) {
        DEV assert( this == nsdetails(ns) );
        massert( 13424, "collection must be capped", capped );
        massert( 13425, "background index build in progress", !backgroundIndexBuildInProgress );
        massert( 13426, "indexes present", nIndexes == 0 );

        ClientCursor::invalidate( ns );
		NamespaceDetailsTransient::clearForPrefix( ns );

        cappedLastDelRecLastExtent() = DiskLoc();
        cappedListOfAllDeletedRecords() = DiskLoc();
        
        // preserve firstExtent/lastExtent
        capExtent = firstExtent;
        stats.datasize = stats.nrecords = 0;
        // lastExtentSize preserve
        // nIndexes preserve 0
        // capped preserve true
        // max preserve
        paddingFactor = 1.0;
        flags = 0;
        capFirstNewRecord = DiskLoc();
        capFirstNewRecord.setInvalid();
        cappedLastDelRecLastExtent().setInvalid();
        // dataFileVersion preserve
        // indexFileVersion preserve
        multiKeyIndexBits = 0;
        reservedA = 0;
        extraOffset = 0;
        // backgroundIndexBuildInProgress preserve 0
        memset(reserved, 0, sizeof(reserved));

        for( DiskLoc ext = firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) {
            DiskLoc prev = ext.ext()->xprev;
            DiskLoc next = ext.ext()->xnext;
            DiskLoc empty = ext.ext()->reuse( ns );
            ext.ext()->xprev = prev;
            ext.ext()->xnext = next;
            addDeletedRec( empty.drec(), empty );
        }
    }
Esempio n. 7
0
    bool _compact(const char *ns, NamespaceDetails *d, string& errmsg, bool validate, BSONObjBuilder& result, double pf, int pb) { 
        // this is a big job, so might as well make things tidy before we start just to be nice.
        getDur().commitIfNeeded();

        list<DiskLoc> extents;
        for( DiskLoc L = d->firstExtent; !L.isNull(); L = L.ext()->xnext ) 
            extents.push_back(L);
        log() << "compact " << extents.size() << " extents" << endl;

        ProgressMeterHolder pm( cc().curop()->setMessage( "compact extent" , extents.size() ) );

        // same data, but might perform a little different after compact?
        NamespaceDetailsTransient::get(ns).clearQueryCache();

        int nidx = d->nIndexes;
        scoped_array<IndexSpec> indexSpecs( new IndexSpec[nidx] );
        scoped_array<SortPhaseOne> phase1( new SortPhaseOne[nidx] );
        {
            NamespaceDetails::IndexIterator ii = d->ii(); 
            // For each existing index...
            for( int idxNo = 0; ii.more(); ++idxNo ) {
                // Build a new index spec based on the old index spec.
                BSONObjBuilder b;
                BSONObj::iterator i(ii.next().info.obj());
                while( i.more() ) { 
                    BSONElement e = i.next();
                    if ( str::equals( e.fieldName(), "v" ) ) {
                        // Drop any preexisting index version spec.  The default index version will
                        // be used instead for the new index.
                        continue;
                    }
                    if ( str::equals( e.fieldName(), "background" ) ) {
                        // Create the new index in the foreground.
                        continue;
                    }
                    // Pass the element through to the new index spec.
                    b.append(e);
                }
                // Add the new index spec to 'indexSpecs'.
                BSONObj o = b.obj().getOwned();
                indexSpecs[idxNo].reset(o);
                // Create an external sorter.
                phase1[idxNo].sorter.reset
                        ( new BSONObjExternalSorter
                           // Use the default index interface, since the new index will be created
                           // with the default index version.
                         ( IndexInterface::defaultVersion(),
                           o.getObjectField("key") ) );
                phase1[idxNo].sorter->hintNumObjects( d->stats.nrecords );
            }
        }

        log() << "compact orphan deleted lists" << endl;
        for( int i = 0; i < Buckets; i++ ) { 
            d->deletedList[i].writing().Null();
        }



        // Start over from scratch with our extent sizing and growth
        d->lastExtentSize=0;

        // before dropping indexes, at least make sure we can allocate one extent!
        uassert(14025, "compact error no space available to allocate", !allocateSpaceForANewRecord(ns, d, Record::HeaderSize+1, false).isNull());

        // note that the drop indexes call also invalidates all clientcursors for the namespace, which is important and wanted here
        log() << "compact dropping indexes" << endl;
        BSONObjBuilder b;
        if( !dropIndexes(d, ns, "*", errmsg, b, true) ) { 
            errmsg = "compact drop indexes failed";
            log() << errmsg << endl;
            return false;
        }

        getDur().commitIfNeeded();

        long long skipped = 0;
        int n = 0;

        // reset data size and record counts to 0 for this namespace
        // as we're about to tally them up again for each new extent
        {
            NamespaceDetails::Stats *s = getDur().writing(&d->stats);
            s->datasize = 0;
            s->nrecords = 0;
        }

        for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { 
            skipped += compactExtent(ns, d, *i, n++, indexSpecs, phase1, nidx, validate, pf, pb);
            pm.hit();
        }

        if( skipped ) {
            result.append("invalidObjects", skipped);
        }

        verify( d->firstExtent.ext()->xprev.isNull() );

        // indexes will do their own progress meter?
        pm.finished();

        // build indexes
        NamespaceString s(ns);
        string si = s.db + ".system.indexes";
        for( int i = 0; i < nidx; i++ ) {
            killCurrentOp.checkForInterrupt(false);
            BSONObj info = indexSpecs[i].info;
            log() << "compact create index " << info["key"].Obj().toString() << endl;
            try {
                precalced = &phase1[i];
                theDataFileMgr.insert(si.c_str(), info.objdata(), info.objsize());
            }
            catch(...) { 
                precalced = 0;
                throw;
            }
            precalced = 0;
        }

        return true;
    }
Esempio n. 8
0
    StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) {

        if ( isCapped() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact capped collection" );

        if ( _indexCatalog.numIndexesInProgress() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact when indexes in progress" );

        NamespaceDetails* d = details();

        // this is a big job, so might as well make things tidy before we start just to be nice.
        getDur().commitIfNeeded();

        list<DiskLoc> extents;
        for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext )
            extents.push_back(L);
        log() << "compact " << extents.size() << " extents" << endl;

        // same data, but might perform a little different after compact?
        _infoCache.reset();

        vector<BSONObj> indexSpecs;
        {
            IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) );
            while ( ii.more() ) {
                IndexDescriptor* descriptor = ii.next();

                const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj());
                const BSONObj key = spec.getObjectField("key");
                const Status keyStatus = validateKeyPattern(key);
                if (!keyStatus.isOK()) {
                    return StatusWith<CompactStats>(
                        ErrorCodes::CannotCreateIndex,
                        str::stream() << "Cannot rebuild index " << spec << ": "
                                      << keyStatus.reason()
                                      << " For more info see"
                                      << " http://dochub.mongodb.org/core/index-validation");
                }
                indexSpecs.push_back(spec);
            }
        }

        log() << "compact orphan deleted lists" << endl;
        d->orphanDeletedList();

        // Start over from scratch with our extent sizing and growth
        d->setLastExtentSize( 0 );

        // before dropping indexes, at least make sure we can allocate one extent!
        // this will allocate an extent and add to free list
        // if it cannot, it will throw an exception
        increaseStorageSize( _details->lastExtentSize(), true );

        // note that the drop indexes call also invalidates all clientcursors for the namespace,
        // which is important and wanted here
        log() << "compact dropping indexes" << endl;
        Status status = _indexCatalog.dropAllIndexes( true );
        if ( !status.isOK() ) {
            return StatusWith<CompactStats>( status );
        }

        getDur().commitIfNeeded();
        killCurrentOp.checkForInterrupt();

        CompactStats stats;

        MultiIndexBlock multiIndexBlock( this );
        status = multiIndexBlock.init( indexSpecs );
        if ( !status.isOK() )
            return StatusWith<CompactStats>( status );

        // reset data size and record counts to 0 for this namespace
        // as we're about to tally them up again for each new extent
        d->setStats( 0, 0 );

        ProgressMeterHolder pm(cc().curop()->setMessage("compact extent",
                                                        "Extent Compacting Progress",
                                                        extents.size()));

        int extentNumber = 0;
        for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) {
            _compactExtent(*i, extentNumber++, multiIndexBlock, compactOptions, &stats );
            pm.hit();
        }

        verify( d->firstExtent().ext()->xprev.isNull() );

        // indexes will do their own progress meter?
        pm.finished();

        log() << "starting index commits";

        status = multiIndexBlock.commit();
        if ( !status.isOK() )
            return StatusWith<CompactStats>( status );

        return StatusWith<CompactStats>( stats );
    }
Esempio n. 9
0
    void Collection::_compactExtent(const DiskLoc diskloc, int extentNumber,
                                    MultiIndexBlock& indexesToInsertTo,
                                    const CompactOptions* compactOptions, CompactStats* stats ) {

        log() << "compact begin extent #" << extentNumber
              << " for namespace " << _ns << " " << diskloc;

        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = diskloc.ext();
        e->assertOk();
        verify( e->validates(diskloc) );

        {
            // the next/prev pointers within the extent might not be in order so we first
            // page the whole thing in sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            size_t length = e->length;

            touch_pages( reinterpret_cast<const char*>(e), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << e->length/1000000.0/t.seconds() << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = getExtentManager()->getNextRecordInExtent(L);
                    BSONObj objOld = BSONObj::make(recOld);

                    if ( compactOptions->validateDocuments && !objOld.valid() ) {
                        // object is corrupt!
                        log() << "compact skipping corrupt document!";
                        stats->corruptDocuments++;
                    }
                    else {
                        unsigned docSize = objOld.objsize();

                        nrecords++;
                        oldObjSize += docSize;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = docSize + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;

                        switch( compactOptions->paddingMode ) {
                        case CompactOptions::NONE:
                            if ( details()->isUserFlagSet(NamespaceDetails::Flag_UsePowerOf2Sizes) )
                                lenWPadding = details()->quantizePowerOf2AllocationSpace(lenWPadding);
                            break;
                        case CompactOptions::PRESERVE:
                            // if we are preserving the padding, the record should not change size
                            lenWPadding = recOld->lengthWithHeaders();
                            break;
                        case CompactOptions::MANUAL:
                            lenWPadding = compactOptions->computeRecordSize(lenWPadding);
                            if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) {
                                lenWPadding = lenWHdr;
                            }
                            break;
                        }

                        CompactDocWriter writer( objOld, lenWPadding );
                        StatusWith<DiskLoc> status = _recordStore->insertRecord( &writer, 0 );
                        uassertStatusOK( status.getStatus() );
                        datasize += _recordStore->recordFor( status.getValue() )->netLength();

                        InsertDeleteOptions options;
                        options.logIfError = false;
                        options.dupsAllowed = true; // in compact we should be doing no checking

                        indexesToInsertTo.insert( objOld, status.getValue(), options );
                    }

                    if( L.isNull() ) {
                        // we just did the very last record from the old extent.  it's still pointed to
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt();
                    }
                }
            } // if !L.isNull()

            verify( details()->firstExtent() == diskloc );
            verify( details()->lastExtent() != diskloc );
            DiskLoc newFirst = e->xnext;
            details()->firstExtent().writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            getExtentManager()->freeExtents( diskloc, diskloc );

            getDur().commitIfNeeded();

            {
                double op = 1.0;
                if( oldObjSize )
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << extentNumber << " containing " << nrecords
                      << " documents (" << datasize/1000000.0 << "MB)"
                      << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100;
            }
        }

    }
Esempio n. 10
0
        string validateNS(const char *ns, NamespaceDetails *d, BSONObj *cmdObj) {
            bool scanData = true;
            if( cmdObj && cmdObj->hasElement("scandata") && !cmdObj->getBoolField("scandata") )
                scanData = false;
            bool valid = true;
            stringstream ss;
            ss << "\nvalidate\n";
            //ss << "  details: " << hex << d << " ofs:" << nsindex(ns)->detailsOffset(d) << dec << endl;
            if ( d->capped )
                ss << "  capped:" << d->capped << " max:" << d->max << '\n';

            ss << "  firstExtent:" << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.toString()<< '\n';
            ss << "  lastExtent:" << d->lastExtent.toString()    << " ns:" << d->lastExtent.ext()->nsDiagnostic.toString() << '\n';
            try {
                d->firstExtent.ext()->assertOk();
                d->lastExtent.ext()->assertOk();

                DiskLoc el = d->firstExtent;
                int ne = 0;
                while( !el.isNull() ) {
                    Extent *e = el.ext();
                    e->assertOk();
                    el = e->xnext;
                    ne++;
                    killCurrentOp.checkForInterrupt();
                }
                ss << "  # extents:" << ne << '\n';
            }
            catch (...) {
                valid=false;
                ss << " extent asserted ";
            }

            ss << "  datasize?:" << d->stats.datasize << " nrecords?:" << d->stats.nrecords << " lastExtentSize:" << d->lastExtentSize << '\n';
            ss << "  padding:" << d->paddingFactor << '\n';
            try {

                try {
                    ss << "  first extent:\n";
                    d->firstExtent.ext()->dump(ss);
                    valid = valid && d->firstExtent.ext()->validates();
                }
                catch (...) {
                    ss << "\n    exception firstextent\n" << endl;
                }

                set<DiskLoc> recs;
                if( scanData ) {
                    shared_ptr<Cursor> c = theDataFileMgr.findAll(ns);
                    int n = 0;
                    long long len = 0;
                    long long nlen = 0;
                    int outOfOrder = 0;
                    DiskLoc cl_last;
                    while ( c->ok() ) {
                        n++;

                        DiskLoc cl = c->currLoc();
                        if ( n < 1000000 )
                            recs.insert(cl);
                        if ( d->capped ) {
                            if ( cl < cl_last )
                                outOfOrder++;
                            cl_last = cl;
                        }

                        Record *r = c->_current();
                        len += r->lengthWithHeaders;
                        nlen += r->netLength();
                        c->advance();
                    }
                    if ( d->capped && !d->capLooped() ) {
                        ss << "  capped outOfOrder:" << outOfOrder;
                        if ( outOfOrder > 1 ) {
                            valid = false;
                            ss << " ???";
                        }
                        else ss << " (OK)";
                        ss << '\n';
                    }
                    ss << "  " << n << " objects found, nobj:" << d->stats.nrecords << '\n';
                    ss << "  " << len << " bytes data w/headers\n";
                    ss << "  " << nlen << " bytes data wout/headers\n";
                }

                ss << "  deletedList: ";
                for ( int i = 0; i < Buckets; i++ ) {
                    ss << (d->deletedList[i].isNull() ? '0' : '1');
                }
                ss << endl;
                int ndel = 0;
                long long delSize = 0;
                int incorrect = 0;
                for ( int i = 0; i < Buckets; i++ ) {
                    DiskLoc loc = d->deletedList[i];
                    try {
                        int k = 0;
                        while ( !loc.isNull() ) {
                            if ( recs.count(loc) )
                                incorrect++;
                            ndel++;

                            if ( loc.questionable() ) {
                                if( d->capped && !loc.isValid() && i == 1 ) {
                                    /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                       see comments in namespace.h
                                    */
                                    break;
                                }

                                if ( loc.a() <= 0 || strstr(ns, "hudsonSmall") == 0 ) {
                                    ss << "    ?bad deleted loc: " << loc.toString() << " bucket:" << i << " k:" << k << endl;
                                    valid = false;
                                    break;
                                }
                            }

                            DeletedRecord *d = loc.drec();
                            delSize += d->lengthWithHeaders;
                            loc = d->nextDeleted;
                            k++;
                            killCurrentOp.checkForInterrupt();
                        }
                    }
                    catch (...) {
                        ss <<"    ?exception in deleted chain for bucket " << i << endl;
                        valid = false;
                    }
                }
                ss << "  deleted: n: " << ndel << " size: " << delSize << endl;
                if ( incorrect ) {
                    ss << "    ?corrupt: " << incorrect << " records from datafile are in deleted list\n";
                    valid = false;
                }

                int idxn = 0;
                try  {
                    ss << "  nIndexes:" << d->nIndexes << endl;
                    NamespaceDetails::IndexIterator i = d->ii();
                    while( i.more() ) {
                        IndexDetails& id = i.next();
                        ss << "    " << id.indexNamespace() << " keys:" <<
                           id.head.btree()->fullValidate(id.head, id.keyPattern()) << endl;
                    }
                }
                catch (...) {
                    ss << "\n    exception during index validate idxn:" << idxn << endl;
                    valid=false;
                }

            }
            catch (AssertionException) {
                ss << "\n    exception during validate\n" << endl;
                valid = false;
            }

            if ( !valid )
                ss << " ns corrupt, requires dbchk\n";

            return ss.str();
        }
Esempio n. 11
0
        virtual bool run(const char *ns, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }
            
            setClient( source.c_str() );
            NamespaceDetails *nsd = nsdetails( source.c_str() );
            uassert( "source namespace does not exist", nsd );
            bool capped = nsd->capped;
            long long size = 0;
            if ( capped )
                for( DiskLoc i = nsd->firstExtent; !i.isNull(); i = i.ext()->xnext )
                    size += i.ext()->length;
            
            setClient( target.c_str() );
            uassert( "target namespace exists", !nsdetails( target.c_str() ) );

            {
                char from[256];
                nsToClient( source.c_str(), from );
                char to[256];
                nsToClient( target.c_str(), to );
                if ( strcmp( from, to ) == 0 ) {
                    renameNamespace( source.c_str(), target.c_str() );
                    return true;
                }
            }

            BSONObjBuilder spec;
            if ( capped ) {
                spec.appendBool( "capped", true );
                spec.append( "size", double( size ) );
            }
            if ( !userCreateNS( target.c_str(), spec.done(), errmsg, false ) )
                return false;
            
            auto_ptr< DBClientCursor > c;
            DBDirectClient bridge;

            {
                c = bridge.query( source, BSONObj() );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                theDataFileMgr.insert( target.c_str(), o );
            }
            
            char cl[256];
            nsToClient( source.c_str(), cl );
            string sourceIndexes = string( cl ) + ".system.indexes";
            nsToClient( target.c_str(), cl );
            string targetIndexes = string( cl ) + ".system.indexes";
            {
                c = bridge.query( sourceIndexes, QUERY( "ns" << source ) );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                BSONObjBuilder b;
                BSONObjIterator i( o );
                while( i.moreWithEOO() ) {
                    BSONElement e = i.next();
                    if ( e.eoo() )
                        break;
                    if ( strcmp( e.fieldName(), "ns" ) == 0 ) {
                        b.append( "ns", target );
                    } else {
                        b.append( e );
                    }
                }
                BSONObj n = b.done();
                theDataFileMgr.insert( targetIndexes.c_str(), n );
            }

            setClient( source.c_str() );
            dropCollection( source, errmsg, result );
            return true;
        }
Esempio n. 12
0
        void validateNS(const string& ns,
                        Collection* collection,
                        const BSONObj& cmdObj,
                        BSONObjBuilder& result) {

            const bool full = cmdObj["full"].trueValue();
            const bool scanData = full || cmdObj["scandata"].trueValue();

            NamespaceDetails* nsd = collection->details();

            bool valid = true;
            BSONArrayBuilder errors; // explanation(s) for why valid = false
            if ( collection->isCapped() ){
                result.append("capped", nsd->isCapped());
                result.appendNumber("max", nsd->maxCappedDocs());
            }

            if ( nsd->firstExtent().isNull() )
                result.append( "firstExtent", "null" );
            else
                result.append( "firstExtent", str::stream() << nsd->firstExtent().toString()
                               << " ns:" << nsd->firstExtent().ext()->nsDiagnostic.toString());
            if ( nsd->lastExtent().isNull() )
                result.append( "lastExtent", "null" );
            else
                result.append( "lastExtent", str::stream() <<  nsd->lastExtent().toString()
                               << " ns:" <<  nsd->lastExtent().ext()->nsDiagnostic.toString());

            BSONArrayBuilder extentData;
            int extentCount = 0;
            try {

                if ( !nsd->firstExtent().isNull() ) {
                    nsd->firstExtent().ext()->assertOk();
                    nsd->lastExtent().ext()->assertOk();
                }

                DiskLoc extentDiskLoc = nsd->firstExtent();
                while (!extentDiskLoc.isNull()) {
                    Extent* thisExtent = extentDiskLoc.ext();
                    if (full) {
                        extentData << thisExtent->dump();
                    }
                    if (!thisExtent->validates(extentDiskLoc, &errors)) {
                        valid = false;
                    }
                    DiskLoc nextDiskLoc = thisExtent->xnext;
                    if (extentCount > 0 && !nextDiskLoc.isNull()
                                        &&  nextDiskLoc.ext()->xprev != extentDiskLoc) {
                        StringBuilder sb;
                        sb << "'xprev' pointer " << nextDiskLoc.ext()->xprev.toString()
                           << " in extent " << nextDiskLoc.toString()
                           << " does not point to extent " << extentDiskLoc.toString();
                        errors << sb.str();
                        valid = false;
                    }
                    if (nextDiskLoc.isNull() && extentDiskLoc != nsd->lastExtent()) {
                        StringBuilder sb;
                        sb << "'lastExtent' pointer " << nsd->lastExtent().toString()
                           << " does not point to last extent in list " << extentDiskLoc.toString();
                        errors << sb.str();
                        valid = false;
                    }
                    extentDiskLoc = nextDiskLoc;
                    extentCount++;
                    killCurrentOp.checkForInterrupt();
                }
            }
            catch (const DBException& e) {
                StringBuilder sb;
                sb << "exception validating extent " << extentCount
                   << ": " << e.what();
                errors << sb.str();
                valid = false;
            }
            result.append("extentCount", extentCount);

            if ( full )
                result.appendArray( "extents" , extentData.arr() );

            result.appendNumber("datasize", nsd->dataSize());
            result.appendNumber("nrecords", nsd->numRecords());
            result.appendNumber("lastExtentSize", nsd->lastExtentSize());
            result.appendNumber("padding", nsd->paddingFactor());

            try {

                bool testingLastExtent = false;
                try {
                    if (nsd->firstExtent().isNull()) {
                        // this is ok
                    }
                    else {
                        result.append("firstExtentDetails", nsd->firstExtent().ext()->dump());
                        if (!nsd->firstExtent().ext()->xprev.isNull()) {
                            StringBuilder sb;
                            sb << "'xprev' pointer in 'firstExtent' " << nsd->firstExtent().toString()
                               << " is " << nsd->firstExtent().ext()->xprev.toString()
                               << ", should be null";
                            errors << sb.str();
                            valid=false;
                        }
                    }
                    testingLastExtent = true;
                    if (nsd->lastExtent().isNull()) {
                        // this is ok
                    }
                    else {
                        if (nsd->firstExtent() != nsd->lastExtent()) {
                            result.append("lastExtentDetails", nsd->lastExtent().ext()->dump());
                            if (!nsd->lastExtent().ext()->xnext.isNull()) {
                                StringBuilder sb;
                                sb << "'xnext' pointer in 'lastExtent' " << nsd->lastExtent().toString()
                                   << " is " << nsd->lastExtent().ext()->xnext.toString()
                                   << ", should be null";
                                errors << sb.str();
                                valid = false;
                            }
                        }
                    }
                }
                catch (const DBException& e) {
                    StringBuilder sb;
                    sb << "exception processing '"
                       << (testingLastExtent ? "lastExtent" : "firstExtent")
                       << "': " << e.what();
                    errors << sb.str();
                    valid = false;
                }

                set<DiskLoc> recs;
                if( scanData ) {
                    int n = 0;
                    int nInvalid = 0;
                    long long nQuantizedSize = 0;
                    long long nPowerOf2QuantizedSize = 0;
                    long long len = 0;
                    long long nlen = 0;
                    long long bsonLen = 0;
                    int outOfOrder = 0;
                    DiskLoc cl_last;

                    DiskLoc cl;
                    Runner::RunnerState state;
                    auto_ptr<Runner> runner(InternalPlanner::collectionScan(ns));
                    while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &cl))) {
                        n++;

                        if ( n < 1000000 )
                            recs.insert(cl);
                        if ( nsd->isCapped() ) {
                            if ( cl < cl_last )
                                outOfOrder++;
                            cl_last = cl;
                        }

                        Record *r = cl.rec();
                        len += r->lengthWithHeaders();
                        nlen += r->netLength();
                        
                        if ( r->lengthWithHeaders() ==
                                NamespaceDetails::quantizeAllocationSpace
                                    ( r->lengthWithHeaders() ) ) {
                            // Count the number of records having a size consistent with
                            // the quantizeAllocationSpace quantization implementation.
                            ++nQuantizedSize;
                        }

                        if ( r->lengthWithHeaders() ==
                                NamespaceDetails::quantizePowerOf2AllocationSpace
                                    ( r->lengthWithHeaders() - 1 ) ) {
                            // Count the number of records having a size consistent with the
                            // quantizePowerOf2AllocationSpace quantization implementation.
                            // Because of SERVER-8311, power of 2 quantization is not idempotent and
                            // r->lengthWithHeaders() - 1 must be checked instead of the record
                            // length itself.
                            ++nPowerOf2QuantizedSize;
                        }

                        if (full){
                            BSONObj obj = BSONObj::make(r);
                            if (!obj.isValid() || !obj.valid()){ // both fast and deep checks
                                valid = false;
                                if (nInvalid == 0) // only log once;
                                    errors << "invalid bson object detected (see logs for more info)";

                                nInvalid++;
                                if (strcmp("_id", obj.firstElementFieldName()) == 0){
                                    try {
                                        obj.firstElement().validate(); // throws on error
                                        log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl;
                                    }
                                    catch(...){
                                        log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl;
                                    }
                                }
                                else {
                                    log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl;
                                }
                            }
                            else {
                                bsonLen += obj.objsize();
                            }
                        }
                    }
                    if (Runner::RUNNER_EOF != state) {
                        // TODO: more descriptive logging.
                        warning() << "Internal error while reading collection " << ns << endl;
                    }
                    if ( nsd->isCapped() && !nsd->capLooped() ) {
                        result.append("cappedOutOfOrder", outOfOrder);
                        if ( outOfOrder > 1 ) {
                            valid = false;
                            errors << "too many out of order records";
                        }
                    }
                    result.append("objectsFound", n);

                    if (full) {
                        result.append("invalidObjects", nInvalid);
                    }

                    result.appendNumber("nQuantizedSize", nQuantizedSize);
                    result.appendNumber("nPowerOf2QuantizedSize", nPowerOf2QuantizedSize);
                    result.appendNumber("bytesWithHeaders", len);
                    result.appendNumber("bytesWithoutHeaders", nlen);

                    if (full) {
                        result.appendNumber("bytesBson", bsonLen);
                    }
                }

                BSONArrayBuilder deletedListArray;
                for ( int i = 0; i < Buckets; i++ ) {
                    deletedListArray << nsd->deletedListEntry(i).isNull();
                }

                int ndel = 0;
                long long delSize = 0;
                BSONArrayBuilder delBucketSizes;
                int incorrect = 0;
                for ( int i = 0; i < Buckets; i++ ) {
                    DiskLoc loc = nsd->deletedListEntry(i);
                    try {
                        int k = 0;
                        while ( !loc.isNull() ) {
                            if ( recs.count(loc) )
                                incorrect++;
                            ndel++;

                            if ( loc.questionable() ) {
                                if( nsd->isCapped() && !loc.isValid() && i == 1 ) {
                                    /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                       see comments in namespace.h
                                    */
                                    break;
                                }

                                string err( str::stream() << "bad pointer in deleted record list: "
                                                          << loc.toString()
                                                          << " bucket: " << i
                                                          << " k: " << k );
                                errors << err;
                                valid = false;
                                break;
                            }

                            DeletedRecord *d = loc.drec();
                            delSize += d->lengthWithHeaders();
                            loc = d->nextDeleted();
                            k++;
                            killCurrentOp.checkForInterrupt();
                        }
                        delBucketSizes << k;
                    }
                    catch (...) {
                        errors << ("exception in deleted chain for bucket " + BSONObjBuilder::numStr(i));
                        valid = false;
                    }
                }
                result.appendNumber("deletedCount", ndel);
                result.appendNumber("deletedSize", delSize);
                if ( full ) {
                    result << "delBucketSizes" << delBucketSizes.arr();
                }

                if ( incorrect ) {
                    errors << (BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list");
                    valid = false;
                }

                int idxn = 0;
                try  {
                    IndexCatalog* indexCatalog = collection->getIndexCatalog();

                    result.append("nIndexes", nsd->getCompletedIndexCount());
                    BSONObjBuilder indexes; // not using subObjStart to be exception safe
                    NamespaceDetails::IndexIterator i = nsd->ii();
                    while( i.more() ) {
                        IndexDetails& id = i.next();
                        log() << "validating index " << idxn << ": " << id.indexNamespace() << endl;

                        IndexDescriptor* descriptor = indexCatalog->getDescriptor( idxn );
                        verify( descriptor );
                        IndexAccessMethod* iam = indexCatalog->getIndex( descriptor );
                        verify( iam );

                        int64_t keys;
                        iam->validate(&keys);
                        indexes.appendNumber(id.indexNamespace(), static_cast<long long>(keys));
                        idxn++;
                    }
                    result.append("keysPerIndex", indexes.done());
                }
                catch (...) {
                    errors << ("exception during index validate idxn " + BSONObjBuilder::numStr(idxn));
                    valid=false;
                }

            }
            catch (AssertionException) {
                errors << "exception during validate";
                valid = false;
            }

            result.appendBool("valid", valid);
            result.append("errors", errors.arr());

            if ( !full ){
                result.append("warning", "Some checks omitted for speed. use {full:true} option to do more thorough scan.");
            }
            
            if ( !valid ) {
                result.append("advice", "ns corrupt, requires repair");
            }

        }
Esempio n. 13
0
    void NamespaceDetails::emptyCappedCollection( const char *ns ) {
        DEV verify( this == nsdetails(ns) );
        massert( 13424, "collection must be capped", isCapped() );
        massert( 13425, "background index build in progress", !_indexBuildsInProgress );

        vector<BSONObj> indexes = Helpers::findAll( Namespace( ns ).getSisterNS( "system.indexes" ) , BSON( "ns" << ns ) );
        for ( unsigned i=0; i<indexes.size(); i++ ) {
            indexes[i] = indexes[i].copy();
        }

        if ( _nIndexes ) {
            string errmsg;
            BSONObjBuilder note;
            bool res = dropIndexes( this , ns , "*" , errmsg , note , true );
            massert( 13426 , str::stream() << "failed during index drop: " << errmsg , res );
        }

        // Clear all references to this namespace.
        ClientCursor::invalidate( ns );
        NamespaceDetailsTransient::resetCollection( ns );

        // Get a writeable reference to 'this' and reset all pertinent
        // attributes.
        NamespaceDetails *t = writingWithoutExtra();

        t->cappedLastDelRecLastExtent() = DiskLoc();
        t->cappedListOfAllDeletedRecords() = DiskLoc();

        // preserve firstExtent/lastExtent
        t->_capExtent = _firstExtent;
        t->_stats.datasize = _stats.nrecords = 0;
        // lastExtentSize preserve
        // nIndexes preserve 0
        // capped preserve true
        // max preserve
        t->_paddingFactor = 1.0;
        t->_systemFlags = 0;
        t->_capFirstNewRecord = DiskLoc();
        t->_capFirstNewRecord.setInvalid();
        t->cappedLastDelRecLastExtent().setInvalid();
        // dataFileVersion preserve
        // indexFileVersion preserve
        t->_multiKeyIndexBits = 0;
        t->_reservedA = 0;
        t->_extraOffset = 0;
        // indexBuildInProgress preserve 0
        memset(t->_reserved, 0, sizeof(t->_reserved));

        // Reset all existing extents and recreate the deleted list.
        for( DiskLoc ext = _firstExtent; !ext.isNull(); ext = ext.ext()->xnext ) {
            DiskLoc prev = ext.ext()->xprev;
            DiskLoc next = ext.ext()->xnext;
            DiskLoc empty = ext.ext()->reuse( ns, true );
            ext.ext()->xprev.writing() = prev;
            ext.ext()->xnext.writing() = next;
            addDeletedRec( empty.drec(), empty );
        }

        for ( unsigned i=0; i<indexes.size(); i++ ) {
            theDataFileMgr.insertWithObjMod(Namespace( ns ).getSisterNS( "system.indexes" ).c_str(),
                                            indexes[i],
                                            false,
                                            true);
        }
        
    }
Esempio n. 14
0
    Status cloneCollectionAsCapped( Database* db,
                                    const string& shortFrom,
                                    const string& shortTo,
                                    double size,
                                    bool temp,
                                    bool logForReplication ) {

        string fromNs = db->name() + "." + shortFrom;
        string toNs = db->name() + "." + shortTo;

        Collection* fromCollection = db->getCollection( fromNs );
        if ( !fromCollection )
            return Status( ErrorCodes::NamespaceNotFound,
                           str::stream() << "source collection " << fromNs <<  " does not exist" );

        if ( db->getCollection( toNs ) )
            return Status( ErrorCodes::NamespaceExists, "to collection already exists" );

        // create new collection
        {
            Client::Context ctx( toNs );
            BSONObjBuilder spec;
            spec.appendBool( "capped", true );
            spec.append( "size", size );
            if ( temp )
                spec.appendBool( "temp", true );

            string errmsg;
            if ( !userCreateNS( toNs.c_str(), spec.done(), errmsg, logForReplication ) )
                return Status( ErrorCodes::InternalError, errmsg );
        }

        auto_ptr<Runner> runner;

        {
            const NamespaceDetails* details = fromCollection->details();
            DiskLoc extent = details->firstExtent();

            // datasize and extentSize can't be compared exactly, so add some padding to 'size'
            long long excessSize =
                static_cast<long long>( fromCollection->dataSize() - size * 2 );

            // skip ahead some extents since not all the data fits,
            // so we have to chop a bunch off
            for( ;
                 excessSize > extent.ext()->length && extent != details->lastExtent();
                 extent = extent.ext()->xnext ) {

                excessSize -= extent.ext()->length;
                LOG( 2 ) << "cloneCollectionAsCapped skipping extent of size "
                         << extent.ext()->length << endl;
                LOG( 6 ) << "excessSize: " << excessSize << endl;
            }
            DiskLoc startLoc = extent.ext()->firstRecord;

            runner.reset( InternalPlanner::collectionScan(fromNs,
                                                          InternalPlanner::FORWARD,
                                                          startLoc) );
        }

        Collection* toCollection = db->getCollection( toNs );
        verify( toCollection );

        while ( true ) {
            BSONObj obj;
            Runner::RunnerState state = runner->getNext(&obj, NULL);

            switch( state ) {
            case Runner::RUNNER_EOF:
                return Status::OK();
            case Runner::RUNNER_DEAD:
                db->dropCollection( toNs );
                return Status( ErrorCodes::InternalError, "runner turned dead while iterating" );
            case Runner::RUNNER_ERROR:
                return Status( ErrorCodes::InternalError, "runner error while iterating" );
            case Runner::RUNNER_ADVANCED:
                toCollection->insertDocument( obj, true );
                if ( logForReplication )
                    logOp( "i", toNs.c_str(), obj );
                getDur().commitIfNeeded();
            }
        }

        verify( false ); // unreachable
    }
Esempio n. 15
0
        void validateNS(const char *ns, NamespaceDetails *d, const BSONObj& cmdObj, BSONObjBuilder& result) {
            const bool full = cmdObj["full"].trueValue();
            const bool scanData = full || cmdObj["scandata"].trueValue();

            bool valid = true;
            BSONArrayBuilder errors; // explanation(s) for why valid = false
            if ( d->isCapped() ){
                result.append("capped", d->isCapped());
                result.appendNumber("max", d->maxCappedDocs());
            }

            result.append("firstExtent", str::stream() << d->firstExtent.toString() << " ns:" << d->firstExtent.ext()->nsDiagnostic.toString());
            result.append( "lastExtent", str::stream() <<  d->lastExtent.toString() << " ns:" <<  d->lastExtent.ext()->nsDiagnostic.toString());
            
            BSONArrayBuilder extentData;

            try {
                d->firstExtent.ext()->assertOk();
                d->lastExtent.ext()->assertOk();

                DiskLoc el = d->firstExtent;
                int ne = 0;
                while( !el.isNull() ) {
                    Extent *e = el.ext();
                    e->assertOk();
                    el = e->xnext;
                    ne++;
                    if ( full )
                        extentData << e->dump();
                    
                    killCurrentOp.checkForInterrupt();
                }
                result.append("extentCount", ne);
            }
            catch (...) {
                valid=false;
                errors << "extent asserted";
            }

            if ( full )
                result.appendArray( "extents" , extentData.arr() );

            
            result.appendNumber("datasize", d->stats.datasize);
            result.appendNumber("nrecords", d->stats.nrecords);
            result.appendNumber("lastExtentSize", d->lastExtentSize);
            result.appendNumber("padding", d->paddingFactor());
            

            try {

                try {
                    result.append("firstExtentDetails", d->firstExtent.ext()->dump());

                    valid = valid && d->firstExtent.ext()->validates() && 
                        d->firstExtent.ext()->xprev.isNull();
                }
                catch (...) {
                    errors << "exception firstextent";
                    valid = false;
                }

                set<DiskLoc> recs;
                if( scanData ) {
                    shared_ptr<Cursor> c = theDataFileMgr.findAll(ns);
                    int n = 0;
                    int nInvalid = 0;
                    long long len = 0;
                    long long nlen = 0;
                    int outOfOrder = 0;
                    DiskLoc cl_last;
                    while ( c->ok() ) {
                        n++;

                        DiskLoc cl = c->currLoc();
                        if ( n < 1000000 )
                            recs.insert(cl);
                        if ( d->isCapped() ) {
                            if ( cl < cl_last )
                                outOfOrder++;
                            cl_last = cl;
                        }

                        Record *r = c->_current();
                        len += r->lengthWithHeaders();
                        nlen += r->netLength();

                        if (full){
                            BSONObj obj = BSONObj::make(r);
                            if (!obj.isValid() || !obj.valid()){ // both fast and deep checks
                                valid = false;
                                if (nInvalid == 0) // only log once;
                                    errors << "invalid bson object detected (see logs for more info)";

                                nInvalid++;
                                if (strcmp("_id", obj.firstElementFieldName()) == 0){
                                    try {
                                        obj.firstElement().validate(); // throws on error
                                        log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl;
                                    }
                                    catch(...){
                                        log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl;
                                    }
                                }
                                else {
                                    log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl;
                                }
                            }
                        }

                        c->advance();
                    }
                    if ( d->isCapped() && !d->capLooped() ) {
                        result.append("cappedOutOfOrder", outOfOrder);
                        if ( outOfOrder > 1 ) {
                            valid = false;
                            errors << "too many out of order records";
                        }
                    }
                    result.append("objectsFound", n);

                    if (full) {
                        result.append("invalidObjects", nInvalid);
                    }

                    result.appendNumber("bytesWithHeaders", len);
                    result.appendNumber("bytesWithoutHeaders", nlen);
                }

                BSONArrayBuilder deletedListArray;
                for ( int i = 0; i < Buckets; i++ ) {
                    deletedListArray << d->deletedList[i].isNull();
                }

                int ndel = 0;
                long long delSize = 0;
                int incorrect = 0;
                for ( int i = 0; i < Buckets; i++ ) {
                    DiskLoc loc = d->deletedList[i];
                    try {
                        int k = 0;
                        while ( !loc.isNull() ) {
                            if ( recs.count(loc) )
                                incorrect++;
                            ndel++;

                            if ( loc.questionable() ) {
                                if( d->isCapped() && !loc.isValid() && i == 1 ) {
                                    /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid
                                       see comments in namespace.h
                                    */
                                    break;
                                }

                                if ( loc.a() <= 0 || strstr(ns, "hudsonSmall") == 0 ) {
                                    string err (str::stream() << "bad deleted loc: " << loc.toString() << " bucket:" << i << " k:" << k);
                                    errors << err;

                                    valid = false;
                                    break;
                                }
                            }

                            DeletedRecord *d = loc.drec();
                            delSize += d->lengthWithHeaders();
                            loc = d->nextDeleted();
                            k++;
                            killCurrentOp.checkForInterrupt();
                        }
                    }
                    catch (...) {
                        errors << ("exception in deleted chain for bucket " + BSONObjBuilder::numStr(i));
                        valid = false;
                    }
                }
                result.appendNumber("deletedCount", ndel);
                result.appendNumber("deletedSize", delSize);

                if ( incorrect ) {
                    errors << (BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list");
                    valid = false;
                }

                int idxn = 0;
                try  {
                    result.append("nIndexes", d->nIndexes);
                    BSONObjBuilder indexes; // not using subObjStart to be exception safe
                    NamespaceDetails::IndexIterator i = d->ii();
                    while( i.more() ) {
                        IndexDetails& id = i.next();
                        log() << "validating index " << idxn << ": " << id.indexNamespace() << endl;
                        long long keys = id.idxInterface().fullValidate(id.head, id.keyPattern());
                        indexes.appendNumber(id.indexNamespace(), keys);
                        idxn++;
                    }
                    result.append("keysPerIndex", indexes.done());
                }
                catch (...) {
                    errors << ("exception during index validate idxn " + BSONObjBuilder::numStr(idxn));
                    valid=false;
                }

            }
            catch (AssertionException) {
                errors << "exception during validate";
                valid = false;
            }

            result.appendBool("valid", valid);
            result.append("errors", errors.arr());

            if ( !full ){
                result.append("warning", "Some checks omitted for speed. use {full:true} option to do more thorough scan.");
            }
            
            if ( !valid ) {
                result.append("advice", "ns corrupt, requires repair");
            }

        }
Esempio n. 16
0
    DiskLoc ExtentManager::allocFromFreeList( int approxSize, bool capped ) {
        if ( !_freeListDetails ) {
            return DiskLoc();
        }

        // setup extent constraints

        int low, high;
        if ( capped ) {
            // be strict about the size
            low = approxSize;
            if ( low > 2048 ) low -= 256;
            high = (int) (approxSize * 1.05) + 256;
        }
        else {
            low = (int) (approxSize * 0.8);
            high = (int) (approxSize * 1.4);
        }
        if ( high <= 0 ) {
            // overflowed
            high = max(approxSize, Extent::maxSize());
        }
        if ( high <= Extent::minSize() ) {
            // the minimum extent size is 4097
            high = Extent::minSize() + 1;
        }

        // scan free list looking for something suitable

        int n = 0;
        Extent *best = 0;
        int bestDiff = 0x7fffffff;
        {
            Timer t;
            DiskLoc L = _freeListDetails->firstExtent();
            while( !L.isNull() ) {
                Extent * e = L.ext();
                if ( e->length >= low && e->length <= high ) {
                    int diff = abs(e->length - approxSize);
                    if ( diff < bestDiff ) {
                        bestDiff = diff;
                        best = e;
                        if ( ((double) diff) / approxSize < 0.1 ) {
                            // close enough
                            break;
                        }
                        if ( t.seconds() >= 2 ) {
                            // have spent lots of time in write lock, and we are in [low,high], so close enough
                            // could come into play if extent freelist is very long
                            break;
                        }
                    }
                    else {
                        OCCASIONALLY {
                            if ( high < 64 * 1024 && t.seconds() >= 2 ) {
                                // be less picky if it is taking a long time
                                high = 64 * 1024;
                            }
                        }
                    }
                }
                L = e->xnext;
                ++n;
            }
            if ( t.seconds() >= 10 ) {
                log() << "warning: slow scan in allocFromFreeList (in write lock)" << endl;
            }
        }
Esempio n. 17
0
    /** @return number of skipped (invalid) documents */
    unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc ext, int n,
                const scoped_array<IndexSpec> &indexSpecs,
                scoped_array<SortPhaseOne>& phase1, int nidx, bool validate, 
                double pf, int pb)
    {
        log() << "compact extent #" << n << endl;
        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = ext.ext();
        e->assertOk();
        assert( e->validates() );
        unsigned skipped = 0;

        {
            // the next/prev pointers within the extent might not be in order so we first page the whole thing in 
            // sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            MAdvise adv(e, e->length, MAdvise::Sequential);
            const char *p = (const char *) e;
            for( int i = 0; i < e->length; i += 4096 ) { 
                faux += p[i];
            }
            int ms = t.millis();
            if( ms > 1000 ) 
                log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            unsigned totalSize = 0;
            int nrecs = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = recOld->nextInExtent(L);
                    nrecs++;
                    BSONObj objOld(recOld);

                    if( !validate || objOld.valid() ) {
                        unsigned sz = objOld.objsize();

                        oldObjSize += sz;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = sz + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;
                        {
                            lenWPadding = static_cast<unsigned>(pf*lenWPadding);
                            lenWPadding += pb;
                            lenWPadding = lenWPadding & quantizeMask(lenWPadding);
                            if( lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { 
                                lenWPadding = lenWHdr;
                            }
                        }
                        totalSize += lenWPadding;
                        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false);
                        uassert(14024, "compact error out of space during compaction", !loc.isNull());
                        Record *recNew = loc.rec();
                        recNew = (Record *) getDur().writingPtr(recNew, lenWHdr);
                        addRecordToRecListInExtent(recNew, loc);
                        memcpy(recNew->data, objOld.objdata(), sz);

                        {
                            // extract keys for all indexes we will be rebuilding
                            for( int x = 0; x < nidx; x++ ) { 
                                phase1[x].addKeys(indexSpecs[x], objOld, loc);
                            }
                        }
                    }
                    else { 
                        if( ++skipped <= 10 )
                            log() << "compact skipping invalid object" << endl;
                    }

                    if( L.isNull() ) { 
                        // we just did the very last record from the old extent.  it's still pointed to 
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt(false);
                    }
                }
            } // if !L.isNull()

            assert( d->firstExtent == ext );
            assert( d->lastExtent != ext );
            DiskLoc newFirst = e->xnext;
            d->firstExtent.writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            freeExtents(ext,ext);
            getDur().commitIfNeeded();

            { 
                double op = 1.0;
                if( oldObjSize ) 
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact " << nrecs << " documents " << totalSize/1000000.0 << "MB"
                    << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100
                    << endl;                    
            }
        }

        return skipped;
    }
Esempio n. 18
0
    /** @return number of skipped (invalid) documents */
    unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n,
                const scoped_array<IndexSpec> &indexSpecs,
                scoped_array<SortPhaseOne>& phase1, int nidx, bool validate, 
                double pf, int pb)
    {
        log() << "compact begin extent #" << n << " for namespace " << ns << endl;
        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = diskloc.ext();
        e->assertOk();
        verify( e->validates() );
        unsigned skipped = 0;

        {
            // the next/prev pointers within the extent might not be in order so we first page the whole thing in 
            // sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            MongoDataFile* mdf = cc().database()->getFile( diskloc.a() );
            HANDLE fd = mdf->getFd();
            int offset = diskloc.getOfs();
            Extent* ext = diskloc.ext();
            size_t length = ext->length;
                
            touch_pages(fd, offset, length, ext);
            int ms = t.millis();
            if( ms > 1000 ) 
                log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = recOld->nextInExtent(L);
                    BSONObj objOld = BSONObj::make(recOld);

                    if( !validate || objOld.valid() ) {
                        nrecords++;
                        unsigned sz = objOld.objsize();

                        oldObjSize += sz;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = sz + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;
                        {
                            lenWPadding = static_cast<unsigned>(pf*lenWPadding);
                            lenWPadding += pb;
                            lenWPadding = lenWPadding & quantizeMask(lenWPadding);
                            if( lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { 
                                lenWPadding = lenWHdr;
                            }
                        }
                        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false);
                        uassert(14024, "compact error out of space during compaction", !loc.isNull());
                        Record *recNew = loc.rec();
                        datasize += recNew->netLength();
                        recNew = (Record *) getDur().writingPtr(recNew, lenWHdr);
                        addRecordToRecListInExtent(recNew, loc);
                        memcpy(recNew->data(), objOld.objdata(), sz);

                        {
                            // extract keys for all indexes we will be rebuilding
                            for( int x = 0; x < nidx; x++ ) { 
                                phase1[x].addKeys(indexSpecs[x], objOld, loc);
                            }
                        }
                    }
                    else { 
                        if( ++skipped <= 10 )
                            log() << "compact skipping invalid object" << endl;
                    }

                    if( L.isNull() ) { 
                        // we just did the very last record from the old extent.  it's still pointed to 
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt(false);
                    }
                }
            } // if !L.isNull()

            verify( d->firstExtent == diskloc );
            verify( d->lastExtent != diskloc );
            DiskLoc newFirst = e->xnext;
            d->firstExtent.writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            freeExtents( diskloc, diskloc );
            // update datasize/record count for this namespace's extent
            {
                NamespaceDetails::Stats *s = getDur().writing(&d->stats);
                s->datasize += datasize;
                s->nrecords += nrecords;
            }

            getDur().commitIfNeeded();

            { 
                double op = 1.0;
                if( oldObjSize ) 
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)"
                    << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100
                    << endl;                    
            }
        }

        return skipped;
    }
Esempio n. 19
0
    /** @return number of skipped (invalid) documents */
    unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n,
                           int nidx, bool validate, double pf, int pb, bool useDefaultPadding,
                           bool preservePadding) {

        log() << "compact begin extent #" << n << " for namespace " << ns << endl;
        unsigned oldObjSize = 0; // we'll report what the old padding was
        unsigned oldObjSizeWithPadding = 0;

        Extent *e = diskloc.ext();
        e->assertOk();
        verify( e->validates(diskloc) );
        unsigned skipped = 0;

        Database* db = cc().database();

        {
            // the next/prev pointers within the extent might not be in order so we first
            // page the whole thing in sequentially
            log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl;
            Timer t;
            Extent* ext = db->getExtentManager().getExtent( diskloc );
            size_t length = ext->length;

            touch_pages( reinterpret_cast<const char*>(ext), length );
            int ms = t.millis();
            if( ms > 1000 )
                log() << "compact end paging in " << ms << "ms "
                      << e->length/1000000.0/ms << "MB/sec" << endl;
        }

        {
            log() << "compact copying records" << endl;
            long long datasize = 0;
            long long nrecords = 0;
            DiskLoc L = e->firstRecord;
            if( !L.isNull() ) {
                while( 1 ) {
                    Record *recOld = L.rec();
                    L = db->getExtentManager().getNextRecordInExtent(L);
                    BSONObj objOld = BSONObj::make(recOld);

                    if( !validate || objOld.valid() ) {
                        nrecords++;
                        unsigned sz = objOld.objsize();

                        oldObjSize += sz;
                        oldObjSizeWithPadding += recOld->netLength();

                        unsigned lenWHdr = sz + Record::HeaderSize;
                        unsigned lenWPadding = lenWHdr;
                        // if we are preserving the padding, the record should not change size
                        if (preservePadding) {
                            lenWPadding = recOld->lengthWithHeaders();
                        }
                        // maintain UsePowerOf2Sizes if no padding values were passed in
                        else if (d->isUserFlagSet(NamespaceDetails::Flag_UsePowerOf2Sizes)
                                && useDefaultPadding) {
                            lenWPadding = d->quantizePowerOf2AllocationSpace(lenWPadding);
                        }
                        // otherwise use the padding values (pf and pb) that were passed in
                        else {
                            lenWPadding = static_cast<unsigned>(pf*lenWPadding);
                            lenWPadding += pb;
                            lenWPadding = lenWPadding & quantizeMask(lenWPadding);
                        }
                        if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { 
                            lenWPadding = lenWHdr;
                        }
                        DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false);
                        uassert(14024, "compact error out of space during compaction", !loc.isNull());
                        Record *recNew = loc.rec();
                        datasize += recNew->netLength();
                        recNew = (Record *) getDur().writingPtr(recNew, lenWHdr);
                        addRecordToRecListInExtent(recNew, loc);
                        memcpy(recNew->data(), objOld.objdata(), sz);
                    }
                    else { 
                        if( ++skipped <= 10 )
                            log() << "compact skipping invalid object" << endl;
                    }

                    if( L.isNull() ) { 
                        // we just did the very last record from the old extent.  it's still pointed to 
                        // by the old extent ext, but that will be fixed below after this loop
                        break;
                    }

                    // remove the old records (orphan them) periodically so our commit block doesn't get too large
                    bool stopping = false;
                    RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0;
                    if( stopping || getDur().aCommitIsNeeded() ) {
                        e->firstRecord.writing() = L;
                        Record *r = L.rec();
                        getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs;
                        getDur().commitIfNeeded();
                        killCurrentOp.checkForInterrupt(false);
                    }
                }
            } // if !L.isNull()

            verify( d->firstExtent() == diskloc );
            verify( d->lastExtent() != diskloc );
            DiskLoc newFirst = e->xnext;
            d->firstExtent().writing() = newFirst;
            newFirst.ext()->xprev.writing().Null();
            getDur().writing(e)->markEmpty();
            cc().database()->getExtentManager().freeExtents( diskloc, diskloc );

            // update datasize/record count for this namespace's extent
            d->incrementStats( datasize, nrecords );

            getDur().commitIfNeeded();

            { 
                double op = 1.0;
                if( oldObjSize ) 
                    op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize;
                log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)"
                    << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100
                    << endl;                    
            }
        }

        return skipped;
    }
Esempio n. 20
0
    bool _compact(const char *ns, NamespaceDetails *d, string& errmsg, bool validate,
                  BSONObjBuilder& result, double pf, int pb, bool useDefaultPadding,
                  bool preservePadding) {
        // this is a big job, so might as well make things tidy before we start just to be nice.
        getDur().commitIfNeeded();

        list<DiskLoc> extents;
        for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext )
            extents.push_back(L);
        log() << "compact " << extents.size() << " extents" << endl;

        ProgressMeterHolder pm(cc().curop()->setMessage("compact extent",
                                                        "Extent Compacting Progress",
                                                        extents.size()));

        // same data, but might perform a little different after compact?
        Collection* collection = cc().database()->getCollection( ns );
        verify( collection );
        collection->infoCache()->addedIndex();

        verify( d->getCompletedIndexCount() == d->getTotalIndexCount() );
        int nidx = d->getCompletedIndexCount();
        scoped_array<BSONObj> indexSpecs( new BSONObj[nidx] );
        {
            NamespaceDetails::IndexIterator ii = d->ii(); 
            // For each existing index...
            for( int idxNo = 0; ii.more(); ++idxNo ) {
                // Build a new index spec based on the old index spec.
                BSONObjBuilder b;
                BSONObj::iterator i(ii.next().info.obj());
                while( i.more() ) { 
                    BSONElement e = i.next();
                    if ( str::equals( e.fieldName(), "v" ) ) {
                        // Drop any preexisting index version spec.  The default index version will
                        // be used instead for the new index.
                        continue;
                    }
                    if ( str::equals( e.fieldName(), "background" ) ) {
                        // Create the new index in the foreground.
                        continue;
                    }
                    // Pass the element through to the new index spec.
                    b.append(e);
                }
                indexSpecs[idxNo] = b.obj().getOwned();
            }
        }

        log() << "compact orphan deleted lists" << endl;
        d->orphanDeletedList();

        // Start over from scratch with our extent sizing and growth
        d->setLastExtentSize( 0 );

        // before dropping indexes, at least make sure we can allocate one extent!
        uassert(14025, "compact error no space available to allocate", !allocateSpaceForANewRecord(ns, d, Record::HeaderSize+1, false).isNull());

        // note that the drop indexes call also invalidates all clientcursors for the namespace, which is important and wanted here
        log() << "compact dropping indexes" << endl;
        Status status = collection->getIndexCatalog()->dropAllIndexes( true );
        if ( !status.isOK() ) {
            errmsg = str::stream() << "compact drop indexes failed: " << status.toString();
            log() << status.toString() << endl;
            return false;
        }

        getDur().commitIfNeeded();

        long long skipped = 0;
        int n = 0;

        // reset data size and record counts to 0 for this namespace
        // as we're about to tally them up again for each new extent
        d->setStats( 0, 0 );

        for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { 
            skipped += compactExtent(ns, d, *i, n++, nidx, validate, pf, pb,
                                     useDefaultPadding, preservePadding);
            pm.hit();
        }

        if( skipped ) {
            result.append("invalidObjects", skipped);
        }

        verify( d->firstExtent().ext()->xprev.isNull() );

        // indexes will do their own progress meter?
        pm.finished();

        // build indexes
        for( int i = 0; i < nidx; i++ ) {
            killCurrentOp.checkForInterrupt(false);
            BSONObj info = indexSpecs[i];
            log() << "compact create index " << info["key"].Obj().toString() << endl;
            Status status = collection->getIndexCatalog()->createIndex( info, false );
            if ( !status.isOK() ) {
                log() << "failed to create index: " << status.toString();
                uassertStatusOK( status );
            }
        }

        return true;
    }
Esempio n. 21
0
 int nExtents() const {
     int count = 0;
     for ( DiskLoc i = nsd()->firstExtent; !i.isNull(); i = i.ext()->xnext )
         ++count;
     return count;
 }
Esempio n. 22
0
    void NamespaceDetails::cappedTruncateAfter(const char *ns, DiskLoc end, bool inclusive) {
        DEV verify( this == nsdetails(ns) );
        verify( cappedLastDelRecLastExtent().isValid() );

        // We iteratively remove the newest document until the newest document
        // is 'end', then we remove 'end' if requested.
        bool foundLast = false;
        while( 1 ) {
            if ( foundLast ) {
                // 'end' has been found and removed, so break.
                break;
            }
            getDur().commitIfNeeded();
            // 'curr' will point to the newest document in the collection.
            DiskLoc curr = theCapExtent()->lastRecord;
            verify( !curr.isNull() );
            if ( curr == end ) {
                if ( inclusive ) {
                    // 'end' has been found, so break next iteration.
                    foundLast = true;
                }
                else {
                    // 'end' has been found, so break.
                    break;
                }
            }

            // TODO The algorithm used in this function cannot generate an
            // empty collection, but we could call emptyCappedCollection() in
            // this case instead of asserting.
            uassert( 13415, "emptying the collection is not allowed", _stats.nrecords > 1 );

            // Delete the newest record, and coalesce the new deleted
            // record with existing deleted records.
            theDataFileMgr.deleteRecord(this, ns, curr.rec(), curr, true);
            compact();

            // This is the case where we have not yet had to remove any
            // documents to make room for other documents, and we are allocating
            // documents from free space in fresh extents instead of reusing
            // space from familiar extents.
            if ( !capLooped() ) {

                // We just removed the last record from the 'capExtent', and
                // the 'capExtent' can't be empty, so we set 'capExtent' to
                // capExtent's prev extent.
                if ( theCapExtent()->lastRecord.isNull() ) {
                    verify( !theCapExtent()->xprev.isNull() );
                    // NOTE Because we didn't delete the last document, and
                    // capLooped() is false, capExtent is not the first extent
                    // so xprev will be nonnull.
                    _capExtent.writing() = theCapExtent()->xprev;
                    theCapExtent()->assertOk();

                    // update cappedLastDelRecLastExtent()
                    cappedTruncateLastDelUpdate();
                }
                continue;
            }

            // This is the case where capLooped() is true, and we just deleted
            // from capExtent, and we just deleted capFirstNewRecord, which was
            // the last record on the fresh side of capExtent.
            // NOTE In this comparison, curr and potentially capFirstNewRecord
            // may point to invalid data, but we can still compare the
            // references themselves.
            if ( curr == _capFirstNewRecord ) {

                // Set 'capExtent' to the first nonempty extent prior to the
                // initial capExtent.  There must be such an extent because we
                // have not deleted the last document in the collection.  It is
                // possible that all extents other than the capExtent are empty.
                // In this case we will keep the initial capExtent and specify
                // that all records contained within are on the fresh rather than
                // stale side of the extent.
                DiskLoc newCapExtent = _capExtent;
                do {
                    // Find the previous extent, looping if necessary.
                    newCapExtent = ( newCapExtent == _firstExtent ) ? _lastExtent : newCapExtent.ext()->xprev;
                    newCapExtent.ext()->assertOk();
                }
                while ( newCapExtent.ext()->firstRecord.isNull() );
                _capExtent.writing() = newCapExtent;

                // Place all documents in the new capExtent on the fresh side
                // of the capExtent by setting capFirstNewRecord to the first
                // document in the new capExtent.
                _capFirstNewRecord.writing() = theCapExtent()->firstRecord;

                // update cappedLastDelRecLastExtent()
                cappedTruncateLastDelUpdate();
            }
        }
    }
Esempio n. 23
0
    bool _compact(const char *ns, NamespaceDetails *d, string& errmsg, bool validate, BSONObjBuilder& result, double pf, int pb) { 
        //int les = d->lastExtentSize;

        // this is a big job, so might as well make things tidy before we start just to be nice.
        getDur().commitNow();

        list<DiskLoc> extents;
        for( DiskLoc L = d->firstExtent; !L.isNull(); L = L.ext()->xnext ) 
            extents.push_back(L);
        log() << "compact " << extents.size() << " extents" << endl;

        ProgressMeterHolder pm( cc().curop()->setMessage( "compact extent" , extents.size() ) );

        // same data, but might perform a little different after compact?
        NamespaceDetailsTransient::get(ns).clearQueryCache();

        int nidx = d->nIndexes;
        scoped_array<IndexSpec> indexSpecs( new IndexSpec[nidx] );
        scoped_array<SortPhaseOne> phase1( new SortPhaseOne[nidx] );
        {
            NamespaceDetails::IndexIterator ii = d->ii(); 
            int x = 0;
            while( ii.more() ) { 
                BSONObjBuilder b;
                IndexDetails& idx = ii.next();
                BSONObj::iterator i(idx.info.obj());
                while( i.more() ) { 
                    BSONElement e = i.next();
                    if( !str::equals(e.fieldName(), "v") && !str::equals(e.fieldName(), "background") ) {
                        b.append(e);
                    }
                }
                BSONObj o = b.obj().getOwned();
                phase1[x].sorter.reset( new BSONObjExternalSorter( idx.idxInterface(), o.getObjectField("key") ) );
                phase1[x].sorter->hintNumObjects( d->stats.nrecords );
                indexSpecs[x++].reset(o);
            }
        }

        log() << "compact orphan deleted lists" << endl;
        for( int i = 0; i < Buckets; i++ ) { 
            d->deletedList[i].writing().Null();
        }

        // before dropping indexes, at least make sure we can allocate one extent!
        uassert(14025, "compact error no space available to allocate", !allocateSpaceForANewRecord(ns, d, Record::HeaderSize+1, false).isNull());

        // note that the drop indexes call also invalidates all clientcursors for the namespace, which is important and wanted here
        log() << "compact dropping indexes" << endl;
        BSONObjBuilder b;
        if( !dropIndexes(d, ns, "*", errmsg, b, true) ) { 
            errmsg = "compact drop indexes failed";
            log() << errmsg << endl;
            return false;
        }

        getDur().commitNow();

        long long skipped = 0;
        int n = 0;
        for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { 
            skipped += compactExtent(ns, d, *i, n++, indexSpecs, phase1, nidx, validate, pf, pb);
            pm.hit();
        }

        if( skipped ) {
            result.append("invalidObjects", skipped);
        }

        assert( d->firstExtent.ext()->xprev.isNull() );

        // indexes will do their own progress meter?
        pm.finished();

        // build indexes
        NamespaceString s(ns);
        string si = s.db + ".system.indexes";
        for( int i = 0; i < nidx; i++ ) {
            killCurrentOp.checkForInterrupt(false);
            BSONObj info = indexSpecs[i].info;
            log() << "compact create index " << info["key"].Obj().toString() << endl;
            try {
                precalced = &phase1[i];
                theDataFileMgr.insert(si.c_str(), info.objdata(), info.objsize());
            }
            catch(...) { 
                precalced = 0;
                throw;
            }
            precalced = 0;
        }

        return true;
    }
Esempio n. 24
0
    StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) {

        if ( isCapped() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact capped collection" );

        if ( _indexCatalog.numIndexesInProgress() )
            return StatusWith<CompactStats>( ErrorCodes::BadValue,
                                             "cannot compact when indexes in progress" );

        NamespaceDetails* d = details();

        // this is a big job, so might as well make things tidy before we start just to be nice.
        getDur().commitIfNeeded();

        list<DiskLoc> extents;
        for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext )
            extents.push_back(L);
        log() << "compact " << extents.size() << " extents" << endl;

        // same data, but might perform a little different after compact?
        _infoCache.reset();

        vector<BSONObj> indexSpecs;
        {
            IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) );
            while ( ii.more() ) {
                IndexDescriptor* descriptor = ii.next();
                indexSpecs.push_back( _compactAdjustIndexSpec( descriptor->infoObj() ) );
            }
        }

        log() << "compact orphan deleted lists" << endl;
        d->orphanDeletedList();

        // Start over from scratch with our extent sizing and growth
        d->setLastExtentSize( 0 );

        // before dropping indexes, at least make sure we can allocate one extent!
        if ( allocateSpaceForANewRecord( _ns.ns().c_str(),
                                         d,
                                         Record::HeaderSize+1,
                                         false).isNull() ) {
            return StatusWith<CompactStats>( ErrorCodes::InternalError,
                                             "compact error no space available to allocate" );
        }

        // note that the drop indexes call also invalidates all clientcursors for the namespace,
        // which is important and wanted here
        log() << "compact dropping indexes" << endl;
        Status status = _indexCatalog.dropAllIndexes( true );
        if ( !status.isOK() ) {
            return StatusWith<CompactStats>( status );
        }

        getDur().commitIfNeeded();

        CompactStats stats;

        OwnedPointerVector<IndexCatalog::IndexBuildBlock> indexBuildBlocks;
        vector<IndexAccessMethod*> indexesToInsertTo;
        vector< std::pair<IndexAccessMethod*,IndexAccessMethod*> > bulkToCommit;
        for ( size_t i = 0; i < indexSpecs.size(); i++ ) {
            killCurrentOp.checkForInterrupt(false);
            BSONObj info = indexSpecs[i];
            info = _compactAdjustIndexSpec( info );
            info = _indexCatalog.fixIndexSpec( info );
            auto_ptr<IndexCatalog::IndexBuildBlock> block( new IndexCatalog::IndexBuildBlock( this,info ) );
            Status status = block->init();
            if ( !status.isOK() )
                return StatusWith<CompactStats>(status);

            IndexAccessMethod* accessMethod = block->getEntry()->accessMethod();
            status = accessMethod->initializeAsEmpty();
            if ( !status.isOK() )
                return StatusWith<CompactStats>(status);

            IndexAccessMethod* bulk = accessMethod->initiateBulk();
            if ( bulk ) {
                indexesToInsertTo.push_back( bulk );
                bulkToCommit.push_back( std::pair<IndexAccessMethod*,IndexAccessMethod*>( accessMethod, bulk ) );
            }
            else {
                indexesToInsertTo.push_back( accessMethod );
            }

            indexBuildBlocks.mutableVector().push_back( block.release() );
        }

        // reset data size and record counts to 0 for this namespace
        // as we're about to tally them up again for each new extent
        d->setStats( 0, 0 );

        ProgressMeterHolder pm(cc().curop()->setMessage("compact extent",
                                                        "Extent Compacting Progress",
                                                        extents.size()));

        int extentNumber = 0;
        for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) {
            _compactExtent(*i, extentNumber++, indexesToInsertTo, compactOptions, &stats );
            pm.hit();
        }

        verify( d->firstExtent().ext()->xprev.isNull() );

        // indexes will do their own progress meter?
        pm.finished();

        log() << "starting index commits";

        for ( size_t i = 0; i < bulkToCommit.size(); i++ ) {
            bulkToCommit[i].first->commitBulk( bulkToCommit[i].second, false, NULL );
        }

        for ( size_t i = 0; i < indexBuildBlocks.size(); i++ ) {
            IndexCatalog::IndexBuildBlock* block = indexBuildBlocks.mutableVector()[i];
            block->success();
        }

        return StatusWith<CompactStats>( stats );
    }
        virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );
            uassert(15967,
                    "invalid collection name: " + target,
                    NamespaceString::validCollectionComponent(target.c_str()));
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            string sourceDB = nsToDatabase(source);
            string targetDB = nsToDatabase(target);
            string databaseName = sourceDB;
            databaseName += ".system.indexes";

            int longestIndexNameLength = 0;
            vector<BSONObj> oldIndSpec = Helpers::findAll(databaseName, BSON("ns" << source));
            for (size_t i = 0; i < oldIndSpec.size(); ++i) {
                int thisLength = oldIndSpec[i].getField("name").valuesize();
                if (thisLength > longestIndexNameLength) {
                     longestIndexNameLength = thisLength;
                }
            }
            unsigned int longestAllowed = maxNamespaceLen - longestIndexNameLength - 1;
            if (target.size() > longestAllowed) {
                StringBuilder sb;
                sb << "collection name length of " << target.size()
                << " exceeds maximum length of " << longestAllowed
                << ", allowing for index names";
                uasserted(16451, sb.str());
            }

            bool capped = false;
            long long size = 0;
            std::vector<BSONObj> indexesInProg;
            {
                Client::Context ctx( source );
                NamespaceDetails *nsd = nsdetails( source );
                uassert( 10026 ,  "source namespace does not exist", nsd );
                indexesInProg = stopIndexBuilds(dbname, cmdObj);
                capped = nsd->isCapped();
                if ( capped )
                    for( DiskLoc i = nsd->firstExtent(); !i.isNull(); i = i.ext()->xnext )
                        size += i.ext()->length;
            }

            Client::Context ctx( target );

            if ( nsdetails( target ) ) {
                uassert( 10027 ,  "target namespace exists", cmdObj["dropTarget"].trueValue() );

                Status s = cc().database()->dropCollection( target );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    return false;
                }

            }


            // if we are renaming in the same database, just
            // rename the namespace and we're done.
            {
                if ( sourceDB == targetDB ) {
                    Status s = ctx.db()->renameCollection( source, target, cmdObj["stayTemp"].trueValue() );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        return false;
                    }
                    return true;
                }
            }

            // renaming across databases, so we must copy all
            // the data and then remove the source collection.
            BSONObjBuilder spec;
            if ( capped ) {
                spec.appendBool( "capped", true );
                spec.append( "size", double( size ) );
            }
            if ( !userCreateNS( target.c_str(), spec.done(), errmsg, false ) )
                return false;

            auto_ptr< DBClientCursor > c;
            DBDirectClient bridge;

            {
                c = bridge.query( source, BSONObj(), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                theDataFileMgr.insertWithObjMod( target.c_str(), o );
            }

            string sourceIndexes = nsToDatabase( source ) + ".system.indexes";
            string targetIndexes = nsToDatabase( target ) + ".system.indexes";
            {
                c = bridge.query( sourceIndexes, QUERY( "ns" << source ), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 );
            }
            while( 1 ) {
                {
                    if ( !c->more() )
                        break;
                }
                BSONObj o = c->next();
                BSONObjBuilder b;
                BSONObjIterator i( o );
                while( i.moreWithEOO() ) {
                    BSONElement e = i.next();
                    if ( e.eoo() )
                        break;
                    if ( strcmp( e.fieldName(), "ns" ) == 0 ) {
                        b.append( "ns", target );
                    }
                    else {
                        b.append( e );
                    }
                }
                BSONObj n = b.done();
                theDataFileMgr.insertWithObjMod( targetIndexes.c_str(), n );
            }

            {
                Client::Context ctx( source );
                Status s = ctx.db()->dropCollection( source );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    return false;
                }
                IndexBuilder::restoreIndexes(targetIndexes, indexesInProg);
            }
            return true;
        }
Esempio n. 26
0
        virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) {
            string source = cmdObj.getStringField( name.c_str() );
            string target = cmdObj.getStringField( "to" );

            if ( !NamespaceString::validCollectionComponent(target.c_str()) ) {
                errmsg = "invalid collection name: " + target;
                return false;
            }
            if ( source.empty() || target.empty() ) {
                errmsg = "invalid command syntax";
                return false;
            }

            if (!fromRepl) { // If it got through on the master, need to allow it here too
                Status sourceStatus = userAllowedWriteNS(source);
                if (!sourceStatus.isOK()) {
                    errmsg = "error with source namespace: " + sourceStatus.reason();
                    return false;
                }

                Status targetStatus = userAllowedWriteNS(target);
                if (!targetStatus.isOK()) {
                    errmsg = "error with target namespace: " + targetStatus.reason();
                    return false;
                }
            }

            string sourceDB = nsToDatabase(source);
            string targetDB = nsToDatabase(target);

            bool capped = false;
            long long size = 0;
            std::vector<BSONObj> indexesInProg;

            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );

                if ( !sourceColl ) {
                    errmsg = "source namespace does not exist";
                    return false;
                }

                // Ensure that collection name does not exceed maximum length.
                // Ensure that index names do not push the length over the max.
                // Iterator includes unfinished indexes.
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );
                int longestIndexNameLength = 0;
                while ( sourceIndIt.more() ) {
                    int thisLength = sourceIndIt.next()->indexName().length();
                    if ( thisLength > longestIndexNameLength )
                        longestIndexNameLength = thisLength;
                }

                unsigned int longestAllowed =
                    min(int(Namespace::MaxNsColletionLen),
                        int(Namespace::MaxNsLen) - 2/*strlen(".$")*/ - longestIndexNameLength);
                if (target.size() > longestAllowed) {
                    StringBuilder sb;
                    sb << "collection name length of " << target.size()
                       << " exceeds maximum length of " << longestAllowed
                       << ", allowing for index names";
                    errmsg = sb.str();
                    return false;
                }

                {
                    const NamespaceDetails *nsd = nsdetails( source );
                    indexesInProg = stopIndexBuilds( dbname, cmdObj );
                    capped = nsd->isCapped();
                    if ( capped )
                        for( DiskLoc i = nsd->firstExtent(); !i.isNull(); i = i.ext()->xnext )
                            size += i.ext()->length;
                }
            }

            {
                Client::Context ctx( target );

                // Check if the target namespace exists and if dropTarget is true.
                // If target exists and dropTarget is not true, return false.
                if ( ctx.db()->getCollection( target ) ) {
                    if ( !cmdObj["dropTarget"].trueValue() ) {
                        errmsg = "target namespace exists";
                        return false;
                    }

                    Status s = cc().database()->dropCollection( target );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                }

                // If we are renaming in the same database, just
                // rename the namespace and we're done.
                if ( sourceDB == targetDB ) {
                    Status s = ctx.db()->renameCollection( source, target,
                                                           cmdObj["stayTemp"].trueValue() );
                    if ( !s.isOK() ) {
                        errmsg = s.toString();
                        restoreIndexBuildsOnSource( indexesInProg, source );
                        return false;
                    }
                    return true;
                }

                // Otherwise, we are enaming across databases, so we must copy all
                // the data and then remove the source collection.

                // Create the target collection.
                Collection* targetColl = NULL;
                if ( capped ) {
                    BSONObjBuilder spec;
                    spec.appendBool( "capped", true );
                    spec.append( "size", double( size ) );
                    spec.appendBool( "autoIndexId", false );
                    userCreateNS( target.c_str(), spec.obj(), errmsg, false );
                    targetColl = ctx.db()->getCollection( target );
                }
                else {
                    CollectionOptions options;
                    options.setNoIdIndex();
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    targetColl = ctx.db()->createCollection( target, options );
                }
                if ( !targetColl ) {
                    errmsg = "Failed to create target collection.";
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Copy over all the data from source collection to target collection.
            bool insertSuccessful = true;
            boost::scoped_ptr<CollectionIterator> sourceIt;

            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );
                sourceIt.reset( sourceColl->getIterator( DiskLoc(), false, CollectionScanParams::FORWARD ) );
            }

            Collection* targetColl = NULL;
            while ( !sourceIt->isEOF() ) {
                BSONObj o;
                {
                    Client::Context srcCtx( source );
                    o = sourceIt->getNext().obj();
                }
                // Insert and check return status of insert.
                {
                    Client::Context ctx( target );
                    if ( !targetColl )
                        targetColl = ctx.db()->getCollection( target );
                    // No logOp necessary because the entire renameCollection command is one logOp.
                    Status s = targetColl->insertDocument( o, true ).getStatus();
                    if ( !s.isOK() ) {
                        insertSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }
            }

            // If inserts were unsuccessful, drop the target collection and return false.
            if ( !insertSuccessful ) {
                Client::Context ctx( target );
                Status s = ctx.db()->dropCollection( target );
                if ( !s.isOK() )
                    errmsg = s.toString();
                restoreIndexBuildsOnSource( indexesInProg, source );
                return false;
            }

            // Copy over the indexes to temp storage and then to the target..
            vector<BSONObj> copiedIndexes;
            bool indexSuccessful = true;
            {
                Client::Context srcCtx( source );
                Collection* sourceColl = srcCtx.db()->getCollection( source );
                IndexCatalog::IndexIterator sourceIndIt =
                    sourceColl->getIndexCatalog()->getIndexIterator( true );

                while ( sourceIndIt.more() ) {
                    BSONObj currIndex = sourceIndIt.next()->infoObj();

                    // Process the source index.
                    BSONObjBuilder b;
                    BSONObjIterator i( currIndex );
                    while( i.moreWithEOO() ) {
                        BSONElement e = i.next();
                        if ( e.eoo() )
                            break;
                        else if ( strcmp( e.fieldName(), "ns" ) == 0 )
                            b.append( "ns", target );
                        else
                            b.append( e );
                    }

                    BSONObj newIndex = b.obj();
                    copiedIndexes.push_back( newIndex );
                }
            }

            {
                Client::Context ctx( target );
                if ( !targetColl )
                    targetColl = ctx.db()->getCollection( target );

                for ( vector<BSONObj>::iterator it = copiedIndexes.begin();
                                                it != copiedIndexes.end(); ++it ) {
                    Status s = targetColl->getIndexCatalog()->createIndex( *it, true );
                    if ( !s.isOK() ) {
                        indexSuccessful = false;
                        errmsg = s.toString();
                        break;
                    }
                }

                // If indexes were unsuccessful, drop the target collection and return false.
                if ( !indexSuccessful ) {
                    Status s = ctx.db()->dropCollection( target );
                    if ( !s.isOK() )
                        errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            // Drop the source collection.
            {
                Client::Context srcCtx( source );
                Status s = srcCtx.db()->dropCollection( source );
                if ( !s.isOK() ) {
                    errmsg = s.toString();
                    restoreIndexBuildsOnSource( indexesInProg, source );
                    return false;
                }
            }

            return true;
        }