Database::Database(TransactionExperiment* txn, const char *nm, bool& newDb, const string& path ) : _name(nm), _path(path), _namespaceIndex( _path, _name ), _extentManager(new MmapV1ExtentManager(_name, _path, storageGlobalParams.directoryperdb)), _profileName(_name + ".system.profile"), _namespacesName(_name + ".system.namespaces"), _indexesName(_name + ".system.indexes"), _collectionLock( "Database::_collectionLock" ) { Status status = validateDBName( _name ); if ( !status.isOK() ) { warning() << "tried to open invalid db: " << _name << endl; uasserted( 10028, status.toString() ); } try { newDb = _namespaceIndex.exists(); _profile = serverGlobalParams.defaultProfile; checkDuplicateUncasedNames(true); // If already exists, open. Otherwise behave as if empty until // there's a write, then open. if (!newDb) { _namespaceIndex.init( txn ); openAllFiles(txn); // upgrade freelist string oldFreeList = _name + ".$freelist"; NamespaceDetails* details = _namespaceIndex.details( oldFreeList ); if ( details ) { if ( !details->firstExtent().isNull() ) { _extentManager->freeExtents(txn, details->firstExtent(), details->lastExtent()); } _namespaceIndex.kill_ns( txn, oldFreeList ); } } _magic = 781231; } catch(std::exception& e) { log() << "warning database " << path << " " << nm << " could not be opened" << endl; DBException* dbe = dynamic_cast<DBException*>(&e); if ( dbe != 0 ) { log() << "DBException " << dbe->getCode() << ": " << e.what() << endl; } else { log() << e.what() << endl; } _extentManager.reset(); throw; } }
shared_ptr<Cursor> DataFileMgr::findAll(const StringData& ns, const DiskLoc &startLoc) { NamespaceDetails * d = nsdetails( ns ); if ( ! d ) return shared_ptr<Cursor>(new BasicCursor(DiskLoc())); DiskLoc loc = d->firstExtent(); if ( loc.isNull() ) return shared_ptr<Cursor>(new BasicCursor(DiskLoc())); Extent *e = getExtent(loc); DEBUGGING { out() << "listing extents for " << ns << endl; DiskLoc tmp = loc; set<DiskLoc> extents; while ( 1 ) { Extent *f = getExtent(tmp); out() << "extent: " << tmp.toString() << endl; extents.insert(tmp); tmp = f->xnext; if ( tmp.isNull() ) break; f = f->getNextExtent(); } out() << endl; d->dumpDeleted(&extents); } if ( d->isCapped() ) return shared_ptr<Cursor>( ForwardCappedCursor::make( d , startLoc ) ); if ( !startLoc.isNull() ) return shared_ptr<Cursor>(new BasicCursor( startLoc )); while ( e->firstRecord.isNull() && !e->xnext.isNull() ) { /* todo: if extent is empty, free it for reuse elsewhere. that is a bit complicated have to clean up the freelists. */ RARELY out() << "info DFM::findAll(): extent " << loc.toString() << " was empty, skipping ahead. ns:" << ns << endl; // find a nonempty extent // it might be nice to free the whole extent here! but have to clean up free recs then. e = e->getNextExtent(); } return shared_ptr<Cursor>(new BasicCursor( e->firstRecord )); }
virtual bool run(const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { string source = cmdObj.getStringField( name.c_str() ); string target = cmdObj.getStringField( "to" ); uassert(15967, "invalid collection name: " + target, NamespaceString::validCollectionComponent(target.c_str())); if ( source.empty() || target.empty() ) { errmsg = "invalid command syntax"; return false; } string sourceDB = nsToDatabase(source); string targetDB = nsToDatabase(target); string databaseName = sourceDB; databaseName += ".system.indexes"; int longestIndexNameLength = 0; vector<BSONObj> oldIndSpec = Helpers::findAll(databaseName, BSON("ns" << source)); for (size_t i = 0; i < oldIndSpec.size(); ++i) { int thisLength = oldIndSpec[i].getField("name").valuesize(); if (thisLength > longestIndexNameLength) { longestIndexNameLength = thisLength; } } unsigned int longestAllowed = maxNamespaceLen - longestIndexNameLength - 1; if (target.size() > longestAllowed) { StringBuilder sb; sb << "collection name length of " << target.size() << " exceeds maximum length of " << longestAllowed << ", allowing for index names"; uasserted(16451, sb.str()); } bool capped = false; long long size = 0; std::vector<BSONObj> indexesInProg; { Client::Context ctx( source ); NamespaceDetails *nsd = nsdetails( source ); uassert( 10026 , "source namespace does not exist", nsd ); indexesInProg = stopIndexBuilds(dbname, cmdObj); capped = nsd->isCapped(); if ( capped ) for( DiskLoc i = nsd->firstExtent(); !i.isNull(); i = i.ext()->xnext ) size += i.ext()->length; } Client::Context ctx( target ); if ( nsdetails( target ) ) { uassert( 10027 , "target namespace exists", cmdObj["dropTarget"].trueValue() ); Status s = cc().database()->dropCollection( target ); if ( !s.isOK() ) { errmsg = s.toString(); return false; } } // if we are renaming in the same database, just // rename the namespace and we're done. { if ( sourceDB == targetDB ) { Status s = ctx.db()->renameCollection( source, target, cmdObj["stayTemp"].trueValue() ); if ( !s.isOK() ) { errmsg = s.toString(); return false; } return true; } } // renaming across databases, so we must copy all // the data and then remove the source collection. BSONObjBuilder spec; if ( capped ) { spec.appendBool( "capped", true ); spec.append( "size", double( size ) ); } if ( !userCreateNS( target.c_str(), spec.done(), errmsg, false ) ) return false; auto_ptr< DBClientCursor > c; DBDirectClient bridge; { c = bridge.query( source, BSONObj(), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 ); } while( 1 ) { { if ( !c->more() ) break; } BSONObj o = c->next(); theDataFileMgr.insertWithObjMod( target.c_str(), o ); } string sourceIndexes = nsToDatabase( source ) + ".system.indexes"; string targetIndexes = nsToDatabase( target ) + ".system.indexes"; { c = bridge.query( sourceIndexes, QUERY( "ns" << source ), 0, 0, 0, fromRepl ? QueryOption_SlaveOk : 0 ); } while( 1 ) { { if ( !c->more() ) break; } BSONObj o = c->next(); BSONObjBuilder b; BSONObjIterator i( o ); while( i.moreWithEOO() ) { BSONElement e = i.next(); if ( e.eoo() ) break; if ( strcmp( e.fieldName(), "ns" ) == 0 ) { b.append( "ns", target ); } else { b.append( e ); } } BSONObj n = b.done(); theDataFileMgr.insertWithObjMod( targetIndexes.c_str(), n ); } { Client::Context ctx( source ); Status s = ctx.db()->dropCollection( source ); if ( !s.isOK() ) { errmsg = s.toString(); return false; } IndexBuilder::restoreIndexes(targetIndexes, indexesInProg); } return true; }
StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) { if ( isCapped() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact capped collection" ); if ( _indexCatalog.numIndexesInProgress() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact when indexes in progress" ); NamespaceDetails* d = details(); // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitIfNeeded(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; // same data, but might perform a little different after compact? _infoCache.reset(); vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); const BSONObj spec = _compactAdjustIndexSpec(descriptor->infoObj()); const BSONObj key = spec.getObjectField("key"); const Status keyStatus = validateKeyPattern(key); if (!keyStatus.isOK()) { return StatusWith<CompactStats>( ErrorCodes::CannotCreateIndex, str::stream() << "Cannot rebuild index " << spec << ": " << keyStatus.reason() << " For more info see" << " http://dochub.mongodb.org/core/index-validation"); } indexSpecs.push_back(spec); } } log() << "compact orphan deleted lists" << endl; d->orphanDeletedList(); // Start over from scratch with our extent sizing and growth d->setLastExtentSize( 0 ); // before dropping indexes, at least make sure we can allocate one extent! // this will allocate an extent and add to free list // if it cannot, it will throw an exception increaseStorageSize( _details->lastExtentSize(), true ); // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here log() << "compact dropping indexes" << endl; Status status = _indexCatalog.dropAllIndexes( true ); if ( !status.isOK() ) { return StatusWith<CompactStats>( status ); } getDur().commitIfNeeded(); killCurrentOp.checkForInterrupt(); CompactStats stats; MultiIndexBlock multiIndexBlock( this ); status = multiIndexBlock.init( indexSpecs ); if ( !status.isOK() ) return StatusWith<CompactStats>( status ); // reset data size and record counts to 0 for this namespace // as we're about to tally them up again for each new extent d->setStats( 0, 0 ); ProgressMeterHolder pm(cc().curop()->setMessage("compact extent", "Extent Compacting Progress", extents.size())); int extentNumber = 0; for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { _compactExtent(*i, extentNumber++, multiIndexBlock, compactOptions, &stats ); pm.hit(); } verify( d->firstExtent().ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); log() << "starting index commits"; status = multiIndexBlock.commit(); if ( !status.isOK() ) return StatusWith<CompactStats>( status ); return StatusWith<CompactStats>( stats ); }
void validateNS(const string& ns, Collection* collection, const BSONObj& cmdObj, BSONObjBuilder& result) { const bool full = cmdObj["full"].trueValue(); const bool scanData = full || cmdObj["scandata"].trueValue(); NamespaceDetails* nsd = collection->details(); bool valid = true; BSONArrayBuilder errors; // explanation(s) for why valid = false if ( collection->isCapped() ){ result.append("capped", nsd->isCapped()); result.appendNumber("max", nsd->maxCappedDocs()); } if ( nsd->firstExtent().isNull() ) result.append( "firstExtent", "null" ); else result.append( "firstExtent", str::stream() << nsd->firstExtent().toString() << " ns:" << nsd->firstExtent().ext()->nsDiagnostic.toString()); if ( nsd->lastExtent().isNull() ) result.append( "lastExtent", "null" ); else result.append( "lastExtent", str::stream() << nsd->lastExtent().toString() << " ns:" << nsd->lastExtent().ext()->nsDiagnostic.toString()); BSONArrayBuilder extentData; int extentCount = 0; try { if ( !nsd->firstExtent().isNull() ) { nsd->firstExtent().ext()->assertOk(); nsd->lastExtent().ext()->assertOk(); } DiskLoc extentDiskLoc = nsd->firstExtent(); while (!extentDiskLoc.isNull()) { Extent* thisExtent = extentDiskLoc.ext(); if (full) { extentData << thisExtent->dump(); } if (!thisExtent->validates(extentDiskLoc, &errors)) { valid = false; } DiskLoc nextDiskLoc = thisExtent->xnext; if (extentCount > 0 && !nextDiskLoc.isNull() && nextDiskLoc.ext()->xprev != extentDiskLoc) { StringBuilder sb; sb << "'xprev' pointer " << nextDiskLoc.ext()->xprev.toString() << " in extent " << nextDiskLoc.toString() << " does not point to extent " << extentDiskLoc.toString(); errors << sb.str(); valid = false; } if (nextDiskLoc.isNull() && extentDiskLoc != nsd->lastExtent()) { StringBuilder sb; sb << "'lastExtent' pointer " << nsd->lastExtent().toString() << " does not point to last extent in list " << extentDiskLoc.toString(); errors << sb.str(); valid = false; } extentDiskLoc = nextDiskLoc; extentCount++; killCurrentOp.checkForInterrupt(); } } catch (const DBException& e) { StringBuilder sb; sb << "exception validating extent " << extentCount << ": " << e.what(); errors << sb.str(); valid = false; } result.append("extentCount", extentCount); if ( full ) result.appendArray( "extents" , extentData.arr() ); result.appendNumber("datasize", nsd->dataSize()); result.appendNumber("nrecords", nsd->numRecords()); result.appendNumber("lastExtentSize", nsd->lastExtentSize()); result.appendNumber("padding", nsd->paddingFactor()); try { bool testingLastExtent = false; try { if (nsd->firstExtent().isNull()) { // this is ok } else { result.append("firstExtentDetails", nsd->firstExtent().ext()->dump()); if (!nsd->firstExtent().ext()->xprev.isNull()) { StringBuilder sb; sb << "'xprev' pointer in 'firstExtent' " << nsd->firstExtent().toString() << " is " << nsd->firstExtent().ext()->xprev.toString() << ", should be null"; errors << sb.str(); valid=false; } } testingLastExtent = true; if (nsd->lastExtent().isNull()) { // this is ok } else { if (nsd->firstExtent() != nsd->lastExtent()) { result.append("lastExtentDetails", nsd->lastExtent().ext()->dump()); if (!nsd->lastExtent().ext()->xnext.isNull()) { StringBuilder sb; sb << "'xnext' pointer in 'lastExtent' " << nsd->lastExtent().toString() << " is " << nsd->lastExtent().ext()->xnext.toString() << ", should be null"; errors << sb.str(); valid = false; } } } } catch (const DBException& e) { StringBuilder sb; sb << "exception processing '" << (testingLastExtent ? "lastExtent" : "firstExtent") << "': " << e.what(); errors << sb.str(); valid = false; } set<DiskLoc> recs; if( scanData ) { int n = 0; int nInvalid = 0; long long nQuantizedSize = 0; long long nPowerOf2QuantizedSize = 0; long long len = 0; long long nlen = 0; long long bsonLen = 0; int outOfOrder = 0; DiskLoc cl_last; DiskLoc cl; Runner::RunnerState state; auto_ptr<Runner> runner(InternalPlanner::collectionScan(ns)); while (Runner::RUNNER_ADVANCED == (state = runner->getNext(NULL, &cl))) { n++; if ( n < 1000000 ) recs.insert(cl); if ( nsd->isCapped() ) { if ( cl < cl_last ) outOfOrder++; cl_last = cl; } Record *r = cl.rec(); len += r->lengthWithHeaders(); nlen += r->netLength(); if ( r->lengthWithHeaders() == NamespaceDetails::quantizeAllocationSpace ( r->lengthWithHeaders() ) ) { // Count the number of records having a size consistent with // the quantizeAllocationSpace quantization implementation. ++nQuantizedSize; } if ( r->lengthWithHeaders() == NamespaceDetails::quantizePowerOf2AllocationSpace ( r->lengthWithHeaders() - 1 ) ) { // Count the number of records having a size consistent with the // quantizePowerOf2AllocationSpace quantization implementation. // Because of SERVER-8311, power of 2 quantization is not idempotent and // r->lengthWithHeaders() - 1 must be checked instead of the record // length itself. ++nPowerOf2QuantizedSize; } if (full){ BSONObj obj = BSONObj::make(r); if (!obj.isValid() || !obj.valid()){ // both fast and deep checks valid = false; if (nInvalid == 0) // only log once; errors << "invalid bson object detected (see logs for more info)"; nInvalid++; if (strcmp("_id", obj.firstElementFieldName()) == 0){ try { obj.firstElement().validate(); // throws on error log() << "Invalid bson detected in " << ns << " with _id: " << obj.firstElement().toString(false) << endl; } catch(...){ log() << "Invalid bson detected in " << ns << " with corrupt _id" << endl; } } else { log() << "Invalid bson detected in " << ns << " and couldn't find _id" << endl; } } else { bsonLen += obj.objsize(); } } } if (Runner::RUNNER_EOF != state) { // TODO: more descriptive logging. warning() << "Internal error while reading collection " << ns << endl; } if ( nsd->isCapped() && !nsd->capLooped() ) { result.append("cappedOutOfOrder", outOfOrder); if ( outOfOrder > 1 ) { valid = false; errors << "too many out of order records"; } } result.append("objectsFound", n); if (full) { result.append("invalidObjects", nInvalid); } result.appendNumber("nQuantizedSize", nQuantizedSize); result.appendNumber("nPowerOf2QuantizedSize", nPowerOf2QuantizedSize); result.appendNumber("bytesWithHeaders", len); result.appendNumber("bytesWithoutHeaders", nlen); if (full) { result.appendNumber("bytesBson", bsonLen); } } BSONArrayBuilder deletedListArray; for ( int i = 0; i < Buckets; i++ ) { deletedListArray << nsd->deletedListEntry(i).isNull(); } int ndel = 0; long long delSize = 0; BSONArrayBuilder delBucketSizes; int incorrect = 0; for ( int i = 0; i < Buckets; i++ ) { DiskLoc loc = nsd->deletedListEntry(i); try { int k = 0; while ( !loc.isNull() ) { if ( recs.count(loc) ) incorrect++; ndel++; if ( loc.questionable() ) { if( nsd->isCapped() && !loc.isValid() && i == 1 ) { /* the constructor for NamespaceDetails intentionally sets deletedList[1] to invalid see comments in namespace.h */ break; } string err( str::stream() << "bad pointer in deleted record list: " << loc.toString() << " bucket: " << i << " k: " << k ); errors << err; valid = false; break; } DeletedRecord *d = loc.drec(); delSize += d->lengthWithHeaders(); loc = d->nextDeleted(); k++; killCurrentOp.checkForInterrupt(); } delBucketSizes << k; } catch (...) { errors << ("exception in deleted chain for bucket " + BSONObjBuilder::numStr(i)); valid = false; } } result.appendNumber("deletedCount", ndel); result.appendNumber("deletedSize", delSize); if ( full ) { result << "delBucketSizes" << delBucketSizes.arr(); } if ( incorrect ) { errors << (BSONObjBuilder::numStr(incorrect) + " records from datafile are in deleted list"); valid = false; } int idxn = 0; try { IndexCatalog* indexCatalog = collection->getIndexCatalog(); result.append("nIndexes", nsd->getCompletedIndexCount()); BSONObjBuilder indexes; // not using subObjStart to be exception safe NamespaceDetails::IndexIterator i = nsd->ii(); while( i.more() ) { IndexDetails& id = i.next(); log() << "validating index " << idxn << ": " << id.indexNamespace() << endl; IndexDescriptor* descriptor = indexCatalog->getDescriptor( idxn ); verify( descriptor ); IndexAccessMethod* iam = indexCatalog->getIndex( descriptor ); verify( iam ); int64_t keys; iam->validate(&keys); indexes.appendNumber(id.indexNamespace(), static_cast<long long>(keys)); idxn++; } result.append("keysPerIndex", indexes.done()); } catch (...) { errors << ("exception during index validate idxn " + BSONObjBuilder::numStr(idxn)); valid=false; } } catch (AssertionException) { errors << "exception during validate"; valid = false; } result.appendBool("valid", valid); result.append("errors", errors.arr()); if ( !full ){ result.append("warning", "Some checks omitted for speed. use {full:true} option to do more thorough scan."); } if ( !valid ) { result.append("advice", "ns corrupt, requires repair"); } }
StatusWith<CompactStats> Collection::compact( const CompactOptions* compactOptions ) { if ( isCapped() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact capped collection" ); if ( _indexCatalog.numIndexesInProgress() ) return StatusWith<CompactStats>( ErrorCodes::BadValue, "cannot compact when indexes in progress" ); NamespaceDetails* d = details(); // this is a big job, so might as well make things tidy before we start just to be nice. getDur().commitIfNeeded(); list<DiskLoc> extents; for( DiskLoc L = d->firstExtent(); !L.isNull(); L = L.ext()->xnext ) extents.push_back(L); log() << "compact " << extents.size() << " extents" << endl; // same data, but might perform a little different after compact? _infoCache.reset(); vector<BSONObj> indexSpecs; { IndexCatalog::IndexIterator ii( _indexCatalog.getIndexIterator( false ) ); while ( ii.more() ) { IndexDescriptor* descriptor = ii.next(); indexSpecs.push_back( _compactAdjustIndexSpec( descriptor->infoObj() ) ); } } log() << "compact orphan deleted lists" << endl; d->orphanDeletedList(); // Start over from scratch with our extent sizing and growth d->setLastExtentSize( 0 ); // before dropping indexes, at least make sure we can allocate one extent! if ( allocateSpaceForANewRecord( _ns.ns().c_str(), d, Record::HeaderSize+1, false).isNull() ) { return StatusWith<CompactStats>( ErrorCodes::InternalError, "compact error no space available to allocate" ); } // note that the drop indexes call also invalidates all clientcursors for the namespace, // which is important and wanted here log() << "compact dropping indexes" << endl; Status status = _indexCatalog.dropAllIndexes( true ); if ( !status.isOK() ) { return StatusWith<CompactStats>( status ); } getDur().commitIfNeeded(); CompactStats stats; OwnedPointerVector<IndexCatalog::IndexBuildBlock> indexBuildBlocks; vector<IndexAccessMethod*> indexesToInsertTo; vector< std::pair<IndexAccessMethod*,IndexAccessMethod*> > bulkToCommit; for ( size_t i = 0; i < indexSpecs.size(); i++ ) { killCurrentOp.checkForInterrupt(false); BSONObj info = indexSpecs[i]; info = _compactAdjustIndexSpec( info ); info = _indexCatalog.fixIndexSpec( info ); auto_ptr<IndexCatalog::IndexBuildBlock> block( new IndexCatalog::IndexBuildBlock( this,info ) ); Status status = block->init(); if ( !status.isOK() ) return StatusWith<CompactStats>(status); IndexAccessMethod* accessMethod = block->getEntry()->accessMethod(); status = accessMethod->initializeAsEmpty(); if ( !status.isOK() ) return StatusWith<CompactStats>(status); IndexAccessMethod* bulk = accessMethod->initiateBulk(); if ( bulk ) { indexesToInsertTo.push_back( bulk ); bulkToCommit.push_back( std::pair<IndexAccessMethod*,IndexAccessMethod*>( accessMethod, bulk ) ); } else { indexesToInsertTo.push_back( accessMethod ); } indexBuildBlocks.mutableVector().push_back( block.release() ); } // reset data size and record counts to 0 for this namespace // as we're about to tally them up again for each new extent d->setStats( 0, 0 ); ProgressMeterHolder pm(cc().curop()->setMessage("compact extent", "Extent Compacting Progress", extents.size())); int extentNumber = 0; for( list<DiskLoc>::iterator i = extents.begin(); i != extents.end(); i++ ) { _compactExtent(*i, extentNumber++, indexesToInsertTo, compactOptions, &stats ); pm.hit(); } verify( d->firstExtent().ext()->xprev.isNull() ); // indexes will do their own progress meter? pm.finished(); log() << "starting index commits"; for ( size_t i = 0; i < bulkToCommit.size(); i++ ) { bulkToCommit[i].first->commitBulk( bulkToCommit[i].second, false, NULL ); } for ( size_t i = 0; i < indexBuildBlocks.size(); i++ ) { IndexCatalog::IndexBuildBlock* block = indexBuildBlocks.mutableVector()[i]; block->success(); } return StatusWith<CompactStats>( stats ); }