IndexCatalog::IndexBuildBlock* halfAddIndex(const std::string& key) { string name = key + "_1"; BSONObj indexInfo = BSON( "v" << 1 << "key" << BSON( key << 1 ) << "ns" << _ns << "name" << name ); int32_t lenWHdr = indexInfo.objsize() + Record::HeaderSize; const char* systemIndexes = "unittests.system.indexes"; DiskLoc infoLoc = allocateSpaceForANewRecord( systemIndexes, nsdetails( systemIndexes ), lenWHdr, false ); Record* infoRecord = reinterpret_cast<Record*>( getDur().writingPtr( infoLoc.rec(), lenWHdr ) ); memcpy( infoRecord->data(), indexInfo.objdata(), indexInfo.objsize() ); addRecordToRecListInExtent( infoRecord, infoLoc ); return new IndexCatalog::IndexBuildBlock( _ctx.ctx().db()->getCollection( _ns )->getIndexCatalog(), name, infoLoc ); }
/** @return IndexDetails for a new index on a:1, with the info field populated. */ IndexDetails& addIndexWithInfo() { BSONObj indexInfo = BSON( "v" << 1 << "key" << BSON( "a" << 1 ) << "ns" << _ns << "name" << "a_1" ); int32_t lenWHdr = indexInfo.objsize() + Record::HeaderSize; const char* systemIndexes = "unittests.system.indexes"; DiskLoc infoLoc = allocateSpaceForANewRecord( systemIndexes, nsdetails( systemIndexes ), lenWHdr, false ); Record* infoRecord = reinterpret_cast<Record*>( getDur().writingPtr( infoLoc.rec(), lenWHdr ) ); memcpy( infoRecord->data(), indexInfo.objdata(), indexInfo.objsize() ); addRecordToRecListInExtent( infoRecord, infoLoc ); IndexDetails& id = nsdetails( _ns )->getNextIndexDetails( _ns ); nsdetails( _ns )->addIndex(); id.info.writing() = infoLoc; return id; }
/** @return IndexDetails for a new index on a:1, with the info field populated. */ IndexDescriptor* addIndexWithInfo() { BSONObj indexInfo = BSON( "v" << 1 << "key" << BSON( "a" << 1 ) << "ns" << _ns << "name" << "a_1" ); int32_t lenWHdr = indexInfo.objsize() + Record::HeaderSize; const char* systemIndexes = "unittests.system.indexes"; DiskLoc infoLoc = allocateSpaceForANewRecord( systemIndexes, nsdetails( systemIndexes ), lenWHdr, false ); Record* infoRecord = reinterpret_cast<Record*>( getDur().writingPtr( infoLoc.rec(), lenWHdr ) ); memcpy( infoRecord->data(), indexInfo.objdata(), indexInfo.objsize() ); addRecordToRecListInExtent( infoRecord, infoLoc ); IndexCatalog::IndexBuildBlock blk( collection()->getIndexCatalog(), "a_1", infoLoc ); blk.success(); return collection()->getIndexCatalog()->findIndexByName( "a_1" ); }
/** @return number of skipped (invalid) documents */ unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc ext, int n, const scoped_array<IndexSpec> &indexSpecs, scoped_array<SortPhaseOne>& phase1, int nidx, bool validate, double pf, int pb) { log() << "compact extent #" << n << endl; unsigned oldObjSize = 0; // we'll report what the old padding was unsigned oldObjSizeWithPadding = 0; Extent *e = ext.ext(); e->assertOk(); assert( e->validates() ); unsigned skipped = 0; { // the next/prev pointers within the extent might not be in order so we first page the whole thing in // sequentially log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl; Timer t; MAdvise adv(e, e->length, MAdvise::Sequential); const char *p = (const char *) e; for( int i = 0; i < e->length; i += 4096 ) { faux += p[i]; } int ms = t.millis(); if( ms > 1000 ) log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl; } { log() << "compact copying records" << endl; unsigned totalSize = 0; int nrecs = 0; DiskLoc L = e->firstRecord; if( !L.isNull() ) { while( 1 ) { Record *recOld = L.rec(); L = recOld->nextInExtent(L); nrecs++; BSONObj objOld(recOld); if( !validate || objOld.valid() ) { unsigned sz = objOld.objsize(); oldObjSize += sz; oldObjSizeWithPadding += recOld->netLength(); unsigned lenWHdr = sz + Record::HeaderSize; unsigned lenWPadding = lenWHdr; { lenWPadding = static_cast<unsigned>(pf*lenWPadding); lenWPadding += pb; lenWPadding = lenWPadding & quantizeMask(lenWPadding); if( lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { lenWPadding = lenWHdr; } } totalSize += lenWPadding; DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false); uassert(14024, "compact error out of space during compaction", !loc.isNull()); Record *recNew = loc.rec(); recNew = (Record *) getDur().writingPtr(recNew, lenWHdr); addRecordToRecListInExtent(recNew, loc); memcpy(recNew->data, objOld.objdata(), sz); { // extract keys for all indexes we will be rebuilding for( int x = 0; x < nidx; x++ ) { phase1[x].addKeys(indexSpecs[x], objOld, loc); } } } else { if( ++skipped <= 10 ) log() << "compact skipping invalid object" << endl; } if( L.isNull() ) { // we just did the very last record from the old extent. it's still pointed to // by the old extent ext, but that will be fixed below after this loop break; } // remove the old records (orphan them) periodically so our commit block doesn't get too large bool stopping = false; RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0; if( stopping || getDur().aCommitIsNeeded() ) { e->firstRecord.writing() = L; Record *r = L.rec(); getDur().writingInt(r->prevOfs) = DiskLoc::NullOfs; getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } } } // if !L.isNull() assert( d->firstExtent == ext ); assert( d->lastExtent != ext ); DiskLoc newFirst = e->xnext; d->firstExtent.writing() = newFirst; newFirst.ext()->xprev.writing().Null(); getDur().writing(e)->markEmpty(); freeExtents(ext,ext); getDur().commitIfNeeded(); { double op = 1.0; if( oldObjSize ) op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize; log() << "compact " << nrecs << " documents " << totalSize/1000000.0 << "MB" << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100 << endl; } } return skipped; }
bool _compact(const char *ns, NamespaceDetails *d, string& errmsg, bool validate, BSONObjBuilder& result, double pf, int pb) { //int les = d->lastExtentSize; // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitNow(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent; !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; ProgressMeterHolder pm( cc().curop()->setMessage( "compact extent" , extents.size() ) ); // same data, but might perform a little different after compact? NamespaceDetailsTransient::get(ns).clearQueryCache(); int nidx = d->nIndexes; scoped_array<IndexSpec> indexSpecs( new IndexSpec[nidx] ); scoped_array<SortPhaseOne> phase1( new SortPhaseOne[nidx] ); { NamespaceDetails::IndexIterator ii = d->ii(); int x = 0; while( ii.more() ) { BSONObjBuilder b; IndexDetails& idx = ii.next(); BSONObj::iterator i(idx.info.obj()); while( i.more() ) { BSONElement e = i.next(); if( !str::equals(e.fieldName(), "v") && !str::equals(e.fieldName(), "background") ) { b.append(e); } } BSONObj o = b.obj().getOwned(); phase1[x].sorter.reset( new BSONObjExternalSorter( idx.idxInterface(), o.getObjectField("key") ) ); phase1[x].sorter->hintNumObjects( d->stats.nrecords ); indexSpecs[x++].reset(o); } } log() << "compact orphan deleted lists" << endl; for( int i = 0; i < Buckets; i++ ) { d->deletedList[i].writing().Null(); } // before dropping indexes, at least make sure we can allocate one extent! uassert(14025, "compact error no space available to allocate", !allocateSpaceForANewRecord(ns, d, Record::HeaderSize+1, false).isNull()); // note that the drop indexes call also invalidates all clientcursors for the namespace, which is important and wanted here log() << "compact dropping indexes" << endl; BSONObjBuilder b; if( !dropIndexes(d, ns, "*", errmsg, b, true) ) { errmsg = "compact drop indexes failed"; log() << errmsg << endl; return false; } getDur().commitNow(); long long skipped = 0; int n = 0; for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { skipped += compactExtent(ns, d, *i, n++, indexSpecs, phase1, nidx, validate, pf, pb); pm.hit(); } if( skipped ) { result.append("invalidObjects", skipped); } assert( d->firstExtent.ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); // build indexes NamespaceString s(ns); string si = s.db + ".system.indexes"; for( int i = 0; i < nidx; i++ ) { killCurrentOp.checkForInterrupt(false); BSONObj info = indexSpecs[i].info; log() << "compact create index " << info["key"].Obj().toString() << endl; try { precalced = &phase1[i]; theDataFileMgr.insert(si.c_str(), info.objdata(), info.objsize()); } catch(...) { precalced = 0; throw; } precalced = 0; } return true; }
/** @return number of skipped (invalid) documents */ unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n, const scoped_array<IndexSpec> &indexSpecs, scoped_array<SortPhaseOne>& phase1, int nidx, bool validate, double pf, int pb) { log() << "compact begin extent #" << n << " for namespace " << ns << endl; unsigned oldObjSize = 0; // we'll report what the old padding was unsigned oldObjSizeWithPadding = 0; Extent *e = diskloc.ext(); e->assertOk(); verify( e->validates() ); unsigned skipped = 0; { // the next/prev pointers within the extent might not be in order so we first page the whole thing in // sequentially log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl; Timer t; MongoDataFile* mdf = cc().database()->getFile( diskloc.a() ); HANDLE fd = mdf->getFd(); int offset = diskloc.getOfs(); Extent* ext = diskloc.ext(); size_t length = ext->length; touch_pages(fd, offset, length, ext); int ms = t.millis(); if( ms > 1000 ) log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl; } { log() << "compact copying records" << endl; long long datasize = 0; long long nrecords = 0; DiskLoc L = e->firstRecord; if( !L.isNull() ) { while( 1 ) { Record *recOld = L.rec(); L = recOld->nextInExtent(L); BSONObj objOld = BSONObj::make(recOld); if( !validate || objOld.valid() ) { nrecords++; unsigned sz = objOld.objsize(); oldObjSize += sz; oldObjSizeWithPadding += recOld->netLength(); unsigned lenWHdr = sz + Record::HeaderSize; unsigned lenWPadding = lenWHdr; { lenWPadding = static_cast<unsigned>(pf*lenWPadding); lenWPadding += pb; lenWPadding = lenWPadding & quantizeMask(lenWPadding); if( lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { lenWPadding = lenWHdr; } } DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false); uassert(14024, "compact error out of space during compaction", !loc.isNull()); Record *recNew = loc.rec(); datasize += recNew->netLength(); recNew = (Record *) getDur().writingPtr(recNew, lenWHdr); addRecordToRecListInExtent(recNew, loc); memcpy(recNew->data(), objOld.objdata(), sz); { // extract keys for all indexes we will be rebuilding for( int x = 0; x < nidx; x++ ) { phase1[x].addKeys(indexSpecs[x], objOld, loc); } } } else { if( ++skipped <= 10 ) log() << "compact skipping invalid object" << endl; } if( L.isNull() ) { // we just did the very last record from the old extent. it's still pointed to // by the old extent ext, but that will be fixed below after this loop break; } // remove the old records (orphan them) periodically so our commit block doesn't get too large bool stopping = false; RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0; if( stopping || getDur().aCommitIsNeeded() ) { e->firstRecord.writing() = L; Record *r = L.rec(); getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs; getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } } } // if !L.isNull() verify( d->firstExtent == diskloc ); verify( d->lastExtent != diskloc ); DiskLoc newFirst = e->xnext; d->firstExtent.writing() = newFirst; newFirst.ext()->xprev.writing().Null(); getDur().writing(e)->markEmpty(); freeExtents( diskloc, diskloc ); // update datasize/record count for this namespace's extent { NamespaceDetails::Stats *s = getDur().writing(&d->stats); s->datasize += datasize; s->nrecords += nrecords; } getDur().commitIfNeeded(); { double op = 1.0; if( oldObjSize ) op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize; log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)" << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100 << endl; } } return skipped; }
DiskLoc DataFileMgr::insert(const char* ns, const void* obuf, int32_t len, bool mayInterrupt, bool god, bool mayAddIndex, bool* addedID) { Database* database = cc().database(); bool wouldAddIndex = false; massert( 10093 , "cannot insert into reserved $ collection", god || NamespaceString::normal( ns ) ); uassert( 10094 , str::stream() << "invalid ns: " << ns , isValidNS( ns ) ); { const char *sys = strstr(ns, "system."); if ( sys && !insert_checkSys(sys, ns, wouldAddIndex, obuf, god) ) return DiskLoc(); } bool addIndex = wouldAddIndex && mayAddIndex; Collection* collection = database->getCollection( ns ); if ( collection == NULL ) { collection = database->createCollection( ns, false, NULL ); int ies = Extent::initialSize(len); if( str::contains(ns, '$') && len + Record::HeaderSize >= BtreeData_V1::BucketSize - 256 && len + Record::HeaderSize <= BtreeData_V1::BucketSize + 256 ) { // probably an index. so we pick a value here for the first extent instead of using // initialExtentSize() which is more for user collections. // TODO: we could look at the # of records in the parent collection to be smarter here. ies = (32+4) * 1024; } collection->increaseStorageSize( ies, false); if ( !god ) ensureIdIndexForNewNs(ns); } NamespaceDetails* d = collection->details(); string tabletoidxns; Collection* collectionToIndex = 0; NamespaceDetails* tableToIndex = 0; BSONObj fixedIndexObject; if ( addIndex ) { verify( obuf ); BSONObj io((const char *) obuf); tabletoidxns = io.getStringField( "ns" ); uassert(10096, "invalid ns to index", tabletoidxns.find( '.' ) != string::npos); massert(10097, str::stream() << "trying to create index on wrong db " << " db: " << database->name() << " collection: " << tabletoidxns, database->ownsNS( tabletoidxns ) ); collectionToIndex = database->getCollection( tabletoidxns ); if ( !collectionToIndex ) { collectionToIndex = database->createCollection( tabletoidxns, false, NULL ); verify( collectionToIndex ); if ( !god ) ensureIdIndexForNewNs( tabletoidxns.c_str() ); } tableToIndex = collectionToIndex->details(); Status status = collectionToIndex->getIndexCatalog()->okToAddIndex( io ); if ( status.code() == ErrorCodes::IndexAlreadyExists ) { // dup index, we ignore return DiskLoc(); } uassert( 17199, str::stream() << "cannot build index on " << tabletoidxns << " because of " << status.toString(), status.isOK() ); if( !prepareToBuildIndex(io, mayInterrupt, god, tabletoidxns ) ) { // prepare creates _id itself, or this indicates to fail the build silently (such // as if index already exists) return DiskLoc(); } fixedIndexObject = IndexCatalog::fixIndexSpec( io ); obuf = fixedIndexObject.objdata(); len = fixedIndexObject.objsize(); } IDToInsert idToInsert; // only initialized if needed if( !god ) { /* Check if we have an _id field. If we don't, we'll add it. Note that btree buckets which we insert aren't BSONObj's, but in that case god==true. */ BSONObj io((const char *) obuf); BSONElement idField = io.getField( "_id" ); uassert( 10099 , "_id cannot be an array", idField.type() != Array ); // we don't add _id for capped collections in local as they don't have an _id index if( idField.eoo() && !wouldAddIndex && nsToDatabase( ns ) != "local" && d->haveIdIndex() ) { if( addedID ) *addedID = true; idToInsert.init(); len += idToInsert.size(); } BSONElementManipulator::lookForTimestamps( io ); } int lenWHdr = d->getRecordAllocationSize( len + Record::HeaderSize ); fassert( 16440, lenWHdr >= ( len + Record::HeaderSize ) ); // If the collection is capped, check if the new object will violate a unique index // constraint before allocating space. if ( d->isCapped() && !god) { BSONObj temp = BSONObj( reinterpret_cast<const char *>( obuf ) ); Status ret = collection->getIndexCatalog()->checkNoIndexConflicts( temp ); uassert(12582, "duplicate key insert for unique index of capped collection", ret.isOK() ); } DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWHdr, god); if ( loc.isNull() ) { log() << "insert: couldn't alloc space for object ns:" << ns << " capped:" << d->isCapped() << endl; verify(d->isCapped()); return DiskLoc(); } Record *r = loc.rec(); { verify( r->lengthWithHeaders() >= lenWHdr ); r = (Record*) getDur().writingPtr(r, lenWHdr); if( idToInsert.needed() ) { /* a little effort was made here to avoid a double copy when we add an ID */ int originalSize = *((int*) obuf); ((int&)*r->data()) = originalSize + idToInsert.size(); memcpy(r->data()+4, idToInsert.rawdata(), idToInsert.size()); memcpy(r->data()+4+idToInsert.size(), ((char*)obuf)+4, originalSize-4); } else { if( obuf ) // obuf can be null from internal callers memcpy(r->data(), obuf, len); } } addRecordToRecListInExtent(r, loc); d->incrementStats( r->netLength(), 1 ); // we don't bother resetting query optimizer stats for the god tables - also god is true when adding a btree bucket if ( !god ) collection->infoCache()->notifyOfWriteOp(); if ( tableToIndex ) { insert_makeIndex(collectionToIndex, loc, mayInterrupt); } /* add this record to our indexes */ if ( d->getTotalIndexCount() > 0 ) { try { BSONObj obj(r->data()); collection->getIndexCatalog()->indexRecord(obj, loc); } catch( AssertionException& e ) { // should be a dup key error on _id index if( tableToIndex || d->isCapped() ) { massert( 12583, "unexpected index insertion failure on capped collection", !d->isCapped() ); string s = e.toString(); s += " : on addIndex/capped - collection and its index will not match"; setLastError(0, s.c_str()); error() << s << endl; } else { // normal case -- we can roll back _deleteRecord(d, ns, r, loc); throw; } } } d->paddingFits(); return loc; }
/** @return number of skipped (invalid) documents */ unsigned compactExtent(const char *ns, NamespaceDetails *d, const DiskLoc diskloc, int n, int nidx, bool validate, double pf, int pb, bool useDefaultPadding, bool preservePadding) { log() << "compact begin extent #" << n << " for namespace " << ns << endl; unsigned oldObjSize = 0; // we'll report what the old padding was unsigned oldObjSizeWithPadding = 0; Extent *e = diskloc.ext(); e->assertOk(); verify( e->validates(diskloc) ); unsigned skipped = 0; Database* db = cc().database(); { // the next/prev pointers within the extent might not be in order so we first // page the whole thing in sequentially log() << "compact paging in len=" << e->length/1000000.0 << "MB" << endl; Timer t; Extent* ext = db->getExtentManager().getExtent( diskloc ); size_t length = ext->length; touch_pages( reinterpret_cast<const char*>(ext), length ); int ms = t.millis(); if( ms > 1000 ) log() << "compact end paging in " << ms << "ms " << e->length/1000000.0/ms << "MB/sec" << endl; } { log() << "compact copying records" << endl; long long datasize = 0; long long nrecords = 0; DiskLoc L = e->firstRecord; if( !L.isNull() ) { while( 1 ) { Record *recOld = L.rec(); L = db->getExtentManager().getNextRecordInExtent(L); BSONObj objOld = BSONObj::make(recOld); if( !validate || objOld.valid() ) { nrecords++; unsigned sz = objOld.objsize(); oldObjSize += sz; oldObjSizeWithPadding += recOld->netLength(); unsigned lenWHdr = sz + Record::HeaderSize; unsigned lenWPadding = lenWHdr; // if we are preserving the padding, the record should not change size if (preservePadding) { lenWPadding = recOld->lengthWithHeaders(); } // maintain UsePowerOf2Sizes if no padding values were passed in else if (d->isUserFlagSet(NamespaceDetails::Flag_UsePowerOf2Sizes) && useDefaultPadding) { lenWPadding = d->quantizePowerOf2AllocationSpace(lenWPadding); } // otherwise use the padding values (pf and pb) that were passed in else { lenWPadding = static_cast<unsigned>(pf*lenWPadding); lenWPadding += pb; lenWPadding = lenWPadding & quantizeMask(lenWPadding); } if (lenWPadding < lenWHdr || lenWPadding > BSONObjMaxUserSize / 2 ) { lenWPadding = lenWHdr; } DiskLoc loc = allocateSpaceForANewRecord(ns, d, lenWPadding, false); uassert(14024, "compact error out of space during compaction", !loc.isNull()); Record *recNew = loc.rec(); datasize += recNew->netLength(); recNew = (Record *) getDur().writingPtr(recNew, lenWHdr); addRecordToRecListInExtent(recNew, loc); memcpy(recNew->data(), objOld.objdata(), sz); } else { if( ++skipped <= 10 ) log() << "compact skipping invalid object" << endl; } if( L.isNull() ) { // we just did the very last record from the old extent. it's still pointed to // by the old extent ext, but that will be fixed below after this loop break; } // remove the old records (orphan them) periodically so our commit block doesn't get too large bool stopping = false; RARELY stopping = *killCurrentOp.checkForInterruptNoAssert() != 0; if( stopping || getDur().aCommitIsNeeded() ) { e->firstRecord.writing() = L; Record *r = L.rec(); getDur().writingInt(r->prevOfs()) = DiskLoc::NullOfs; getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(false); } } } // if !L.isNull() verify( d->firstExtent() == diskloc ); verify( d->lastExtent() != diskloc ); DiskLoc newFirst = e->xnext; d->firstExtent().writing() = newFirst; newFirst.ext()->xprev.writing().Null(); getDur().writing(e)->markEmpty(); cc().database()->getExtentManager().freeExtents( diskloc, diskloc ); // update datasize/record count for this namespace's extent d->incrementStats( datasize, nrecords ); getDur().commitIfNeeded(); { double op = 1.0; if( oldObjSize ) op = static_cast<double>(oldObjSizeWithPadding)/oldObjSize; log() << "compact finished extent #" << n << " containing " << nrecords << " documents (" << datasize/1000000.0 << "MB)" << " oldPadding: " << op << ' ' << static_cast<unsigned>(op*100.0)/100 << endl; } } return skipped; }
bool _compact(const char *ns, NamespaceDetails *d, string& errmsg, bool validate, BSONObjBuilder& result, double pf, int pb, bool useDefaultPadding, bool preservePadding) { // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitIfNeeded(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; ProgressMeterHolder pm(cc().curop()->setMessage("compact extent", "Extent Compacting Progress", extents.size())); // same data, but might perform a little different after compact? Collection* collection = cc().database()->getCollection( ns ); verify( collection ); collection->infoCache()->addedIndex(); verify( d->getCompletedIndexCount() == d->getTotalIndexCount() ); int nidx = d->getCompletedIndexCount(); scoped_array<BSONObj> indexSpecs( new BSONObj[nidx] ); { NamespaceDetails::IndexIterator ii = d->ii(); // For each existing index... for( int idxNo = 0; ii.more(); ++idxNo ) { // Build a new index spec based on the old index spec. BSONObjBuilder b; BSONObj::iterator i(ii.next().info.obj()); while( i.more() ) { BSONElement e = i.next(); if ( str::equals( e.fieldName(), "v" ) ) { // Drop any preexisting index version spec. The default index version will // be used instead for the new index. continue; } if ( str::equals( e.fieldName(), "background" ) ) { // Create the new index in the foreground. continue; } // Pass the element through to the new index spec. b.append(e); } indexSpecs[idxNo] = b.obj().getOwned(); } } log() << "compact orphan deleted lists" << endl; d->orphanDeletedList(); // Start over from scratch with our extent sizing and growth d->setLastExtentSize( 0 ); // before dropping indexes, at least make sure we can allocate one extent! uassert(14025, "compact error no space available to allocate", !allocateSpaceForANewRecord(ns, d, Record::HeaderSize+1, false).isNull()); // note that the drop indexes call also invalidates all clientcursors for the namespace, which is important and wanted here log() << "compact dropping indexes" << endl; Status status = collection->getIndexCatalog()->dropAllIndexes( true ); if ( !status.isOK() ) { errmsg = str::stream() << "compact drop indexes failed: " << status.toString(); log() << status.toString() << endl; return false; } getDur().commitIfNeeded(); long long skipped = 0; int n = 0; // reset data size and record counts to 0 for this namespace // as we're about to tally them up again for each new extent d->setStats( 0, 0 ); for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { skipped += compactExtent(ns, d, *i, n++, nidx, validate, pf, pb, useDefaultPadding, preservePadding); pm.hit(); } if( skipped ) { result.append("invalidObjects", skipped); } verify( d->firstExtent().ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); // build indexes for( int i = 0; i < nidx; i++ ) { killCurrentOp.checkForInterrupt(false); BSONObj info = indexSpecs[i]; log() << "compact create index " << info["key"].Obj().toString() << endl; Status status = collection->getIndexCatalog()->createIndex( info, false ); if ( !status.isOK() ) { log() << "failed to create index: " << status.toString(); uassertStatusOK( status ); } } return true; }
StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) { if ( isCapped() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact capped collection" ); if ( _indexCatalog.numIndexesInProgress() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact when indexes in progress" ); NamespaceDetails* d = details(); // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitIfNeeded(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; // same data, but might perform a little different after compact? _infoCache.reset(); vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); indexSpecs.push_back( _compactAdjustIndexSpec( descriptor->infoObj() ) ); } } log() << "compact orphan deleted lists" << endl; d->orphanDeletedList(); // Start over from scratch with our extent sizing and growth d->setLastExtentSize( 0 ); // before dropping indexes, at least make sure we can allocate one extent! if ( allocateSpaceForANewRecord( _ns.ns().c_str(), d, Record::HeaderSize+1, false).isNull() ) { return StatusWith<CompactStats>( ErrorCodes::InternalError, "compact error no space available to allocate" ); } // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here log() << "compact dropping indexes" << endl; Status status = _indexCatalog.dropAllIndexes( true ); if ( !status.isOK() ) { return StatusWith<CompactStats>( status ); } getDur().commitIfNeeded(); CompactStats stats; OwnedPointerVector<IndexCatalog::IndexBuildBlock> indexBuildBlocks; vector<IndexAccessMethod*> indexesToInsertTo; vector< std::pair<IndexAccessMethod*,IndexAccessMethod*> > bulkToCommit; for ( size_t i = 0; i < indexSpecs.size(); i++ ) { killCurrentOp.checkForInterrupt(false); BSONObj info = indexSpecs[i]; info = _compactAdjustIndexSpec( info ); info = _indexCatalog.fixIndexSpec( info ); auto_ptr<IndexCatalog::IndexBuildBlock> block( new IndexCatalog::IndexBuildBlock( this,info ) ); Status status = block->init(); if ( !status.isOK() ) return StatusWith<CompactStats>(status); IndexAccessMethod* accessMethod = block->getEntry()->accessMethod(); status = accessMethod->initializeAsEmpty(); if ( !status.isOK() ) return StatusWith<CompactStats>(status); IndexAccessMethod* bulk = accessMethod->initiateBulk(); if ( bulk ) { indexesToInsertTo.push_back( bulk ); bulkToCommit.push_back( std::pair<IndexAccessMethod*,IndexAccessMethod*>( accessMethod, bulk ) ); } else { indexesToInsertTo.push_back( accessMethod ); } indexBuildBlocks.mutableVector().push_back( block.release() ); } // reset data size and record counts to 0 for this namespace // as we're about to tally them up again for each new extent d->setStats( 0, 0 ); ProgressMeterHolder pm(cc().curop()->setMessage("compact extent", "Extent Compacting Progress", extents.size())); int extentNumber = 0; for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { _compactExtent(*i, extentNumber++, indexesToInsertTo, compactOptions, &stats ); pm.hit(); } verify( d->firstExtent().ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); log() << "starting index commits"; for ( size_t i = 0; i < bulkToCommit.size(); i++ ) { bulkToCommit[i].first->commitBulk( bulkToCommit[i].second, false, NULL ); } for ( size_t i = 0; i < indexBuildBlocks.size(); i++ ) { IndexCatalog::IndexBuildBlock* block = indexBuildBlocks.mutableVector()[i]; block->success(); } return StatusWith<CompactStats>( stats ); }
bool _compact(const char *ns, NamespaceDetails *d, string& errmsg, bool validate, BSONObjBuilder& result, double pf, int pb) { // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitIfNeeded(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent; !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; ProgressMeterHolder pm( cc().curop()->setMessage( "compact extent" , extents.size() ) ); // same data, but might perform a little different after compact? NamespaceDetailsTransient::get(ns).clearQueryCache(); int nidx = d->nIndexes; scoped_array<IndexSpec> indexSpecs( new IndexSpec[nidx] ); scoped_array<SortPhaseOne> phase1( new SortPhaseOne[nidx] ); { NamespaceDetails::IndexIterator ii = d->ii(); // For each existing index... for( int idxNo = 0; ii.more(); ++idxNo ) { // Build a new index spec based on the old index spec. BSONObjBuilder b; BSONObj::iterator i(ii.next().info.obj()); while( i.more() ) { BSONElement e = i.next(); if ( str::equals( e.fieldName(), "v" ) ) { // Drop any preexisting index version spec. The default index version will // be used instead for the new index. continue; } if ( str::equals( e.fieldName(), "background" ) ) { // Create the new index in the foreground. continue; } // Pass the element through to the new index spec. b.append(e); } // Add the new index spec to 'indexSpecs'. BSONObj o = b.obj().getOwned(); indexSpecs[idxNo].reset(o); // Create an external sorter. phase1[idxNo].sorter.reset ( new BSONObjExternalSorter // Use the default index interface, since the new index will be created // with the default index version. ( IndexInterface::defaultVersion(), o.getObjectField("key") ) ); phase1[idxNo].sorter->hintNumObjects( d->stats.nrecords ); } } log() << "compact orphan deleted lists" << endl; for( int i = 0; i < Buckets; i++ ) { d->deletedList[i].writing().Null(); } // Start over from scratch with our extent sizing and growth d->lastExtentSize=0; // before dropping indexes, at least make sure we can allocate one extent! uassert(14025, "compact error no space available to allocate", !allocateSpaceForANewRecord(ns, d, Record::HeaderSize+1, false).isNull()); // note that the drop indexes call also invalidates all clientcursors for the namespace, which is important and wanted here log() << "compact dropping indexes" << endl; BSONObjBuilder b; if( !dropIndexes(d, ns, "*", errmsg, b, true) ) { errmsg = "compact drop indexes failed"; log() << errmsg << endl; return false; } getDur().commitIfNeeded(); long long skipped = 0; int n = 0; // reset data size and record counts to 0 for this namespace // as we're about to tally them up again for each new extent { NamespaceDetails::Stats *s = getDur().writing(&d->stats); s->datasize = 0; s->nrecords = 0; } for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { skipped += compactExtent(ns, d, *i, n++, indexSpecs, phase1, nidx, validate, pf, pb); pm.hit(); } if( skipped ) { result.append("invalidObjects", skipped); } verify( d->firstExtent.ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); // build indexes NamespaceString s(ns); string si = s.db + ".system.indexes"; for( int i = 0; i < nidx; i++ ) { killCurrentOp.checkForInterrupt(false); BSONObj info = indexSpecs[i].info; log() << "compact create index " << info["key"].Obj().toString() << endl; try { precalced = &phase1[i]; theDataFileMgr.insert(si.c_str(), info.objdata(), info.objsize()); } catch(...) { precalced = 0; throw; } precalced = 0; } return true; }