void BtreeBasedBuilder::addKeysToPhaseOne(NamespaceDetails* d, const char* ns, const IndexDetails& idx, const BSONObj& order, SortPhaseOne* phaseOne, int64_t nrecords, ProgressMeter* progressMeter, bool mayInterrupt, int idxNo) { shared_ptr<Cursor> cursor = theDataFileMgr.findAll( ns ); phaseOne->sortCmp.reset(getComparison(idx.version(), idx.keyPattern())); phaseOne->sorter.reset(new BSONObjExternalSorter(phaseOne->sortCmp.get())); phaseOne->sorter->hintNumObjects( nrecords ); auto_ptr<IndexDescriptor> desc(CatalogHack::getDescriptor(d, idxNo)); auto_ptr<BtreeBasedAccessMethod> iam(CatalogHack::getBtreeBasedIndex(desc.get())); while ( cursor->ok() ) { RARELY killCurrentOp.checkForInterrupt( !mayInterrupt ); BSONObj o = cursor->current(); DiskLoc loc = cursor->currLoc(); BSONObjSet keys; iam->getKeys(o, &keys); phaseOne->addKeys(keys, loc, mayInterrupt); cursor->advance(); progressMeter->hit(); if ( logLevel > 1 && phaseOne->n % 10000 == 0 ) { printMemInfo( "\t iterating objects" ); } } }
void IndexChanges::dupCheck(IndexDetails& idx, DiskLoc curObjLoc) { if (added.empty() || !idx.unique() || ignoreUniqueIndex(idx)) { return; } const Ordering ordering = Ordering::make(idx.keyPattern()); // "E11001 duplicate key on update" idx.idxInterface().uassertIfDups(idx, added, idx.head, curObjLoc, ordering); }
DiskLoc BtreeBasedBuilder::makeEmptyIndex(const IndexDetails& idx) { if (0 == idx.version()) { return BtreeBucket<V0>::addBucket(idx); } else { return BtreeBucket<V1>::addBucket(idx); } }
BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails &_id, const BSONObj &_startKey, const BSONObj &_endKey, bool endKeyInclusive, int _direction ) : d(_d), idxNo(_idxNo), startKey( _startKey ), endKey( _endKey ), _endKeyInclusive( endKeyInclusive ), _multikey( d->isMultikey( idxNo ) ), indexDetails( _id ), _order( _id.keyPattern() ), _ordering( Ordering::make( _order ) ), _direction( _direction ), _spec( _id.getSpec() ), _independentFieldRanges( false ), _nscanned( 0 ) { audit(); }
// throws DBException void buildAnIndex(const std::string& ns, NamespaceDetails* d, IndexDetails& idx, bool mayInterrupt) { BSONObj idxInfo = idx.info.obj(); MONGO_TLOG(0) << "build index on: " << ns << " properties: " << idxInfo.jsonString() << endl; audit::logCreateIndex( currentClient.get(), &idxInfo, idx.indexName(), ns ); Timer t; unsigned long long n; verify( Lock::isWriteLocked(ns) ); if( inDBRepair || !idxInfo["background"].trueValue() ) { int idxNo = d->findIndexByName( idx.info.obj()["name"].valuestr(), true ); verify( idxNo >= 0 ); n = BtreeBasedBuilder::fastBuildIndex(ns.c_str(), d, idx, mayInterrupt, idxNo); verify( !idx.head.isNull() ); } else { BackgroundIndexBuildJob j(ns.c_str()); n = j.go(ns, d, idx); } MONGO_TLOG(0) << "build index done. scanned " << n << " total records. " << t.millis() / 1000.0 << " secs" << endl; }
BtreeCursor::BtreeCursor( const IndexDetails &_id, const BSONObj &_startKey, const BSONObj &_endKey, int _direction ) : startKey( _startKey ), endKey( _endKey ), indexDetails( _id ), order( _id.keyPattern() ), direction( _direction ) { bool found; if ( otherTraceLevel >= 12 ) { if ( otherTraceLevel >= 200 ) { out() << "::BtreeCursor() qtl>200. validating entire index." << endl; indexDetails.head.btree()->fullValidate(indexDetails.head, order); } else { out() << "BTreeCursor(). dumping head bucket" << endl; indexDetails.head.btree()->dump(); } } bucket = indexDetails.head.btree()-> locate(indexDetails, indexDetails.head, startKey, order, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction); skipUnusedKeys(); checkEnd(); }
// static DiskLoc QueryRunner::fastFindSingle(const IndexDetails &indexdetails, const BSONObj& key) { const int version = indexdetails.version(); if (0 == version) { return indexdetails.head.btree<V0>()->findSingle(indexdetails, indexdetails.head, key); } else { verify(1 == version); return indexdetails.head.btree<V1>()->findSingle(indexdetails, indexdetails.head, key); } }
BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction, bool useFRVSpec ) : d(_d), idxNo(_idxNo), _endKeyInclusive( true ), _multikey( d->isMultikey( idxNo ) ), indexDetails( _id ), _order( _id.keyPattern() ), _ordering( Ordering::make( _order ) ), _direction( _direction ), _bounds( ( assert( _bounds.get() ), _bounds ) ), _boundsIterator( new FieldRangeVectorIterator( *_bounds ) ), _spec( useFRVSpec ? _bounds->getSpec() : _id.getSpec() ), _independentFieldRanges( true ), _nscanned( 0 ) { massert( 13384, "BtreeCursor FieldRangeVector constructor doesn't accept special indexes", !_spec.getType() ); audit(); startKey = _bounds->startKey(); _boundsIterator->advance( startKey ); // handles initialization _boundsIterator->prepDive(); bucket = indexDetails.head; keyOfs = 0; }
virtual void uassertIfDups(IndexDetails& idx, vector<BSONObj*>& addedKeys, DiskLoc head, DiskLoc self, const Ordering& ordering) { const BtreeBucket<V> *h = head.btree<V>(); for( vector<BSONObj*>::iterator i = addedKeys.begin(); i != addedKeys.end(); i++ ) { KeyOwned k(**i); bool dup = h->wouldCreateDup(idx, head, k, ordering, self); if (dup) { stringstream ss; ss << "E11001 duplicate key on update error "; ss << "index: " << idx.indexNamespace() << " "; ss << "dup key: " << k.toString(); uasserted(11001, ss.str()); } } }
BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails &_id, const BSONObj &_startKey, const BSONObj &_endKey, bool endKeyInclusive, int _direction ) : d(_d), idxNo(_idxNo), startKey( _startKey ), endKey( _endKey ), endKeyInclusive_( endKeyInclusive ), multikey( d->isMultikey( idxNo ) ), indexDetails( _id ), order( _id.keyPattern() ), direction( _direction ), boundIndex_() { audit(); init(); }
BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const shared_ptr< FieldRangeVector > &_bounds, int _direction ) : d(_d), idxNo(_idxNo), endKeyInclusive_( true ), multikey( d->isMultikey( idxNo ) ), indexDetails( _id ), order( _id.keyPattern() ), _ordering( Ordering::make( order ) ), direction( _direction ), bounds_( ( assert( _bounds.get() ), _bounds ) ), _boundsIterator( new FieldRangeVector::Iterator( *bounds_ ) ), _spec( _id.getSpec() ), _independentFieldRanges( true ) { massert( 13384, "BtreeCursor FieldRangeVector constructor doesn't accept special indexes", !_spec.getType() ); audit(); startKey = bounds_->startKey(); bool found; _boundsIterator->advance( startKey ); // handles initialization bucket = indexDetails.head.btree()-> locate(indexDetails, indexDetails.head, startKey, _ordering, keyOfs, found, direction > 0 ? minDiskLoc : maxDiskLoc, direction); skipAndCheck(); DEV assert( dups.size() == 0 ); }
BtreeCursor::BtreeCursor( NamespaceDetails *_d, int _idxNo, const IndexDetails& _id, const vector< pair< BSONObj, BSONObj > > &_bounds, int _direction ) : d(_d), idxNo(_idxNo), endKeyInclusive_( true ), multikey( d->isMultikey( idxNo ) ), indexDetails( _id ), order( _id.keyPattern() ), direction( _direction ), bounds_( _bounds ), boundIndex_() { assert( !bounds_.empty() ); startKey = bounds_[ 0 ].first; endKey = bounds_[ 0 ].second; audit(); init(); }
unsigned long long go(string ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { unsigned long long n = 0; prep(ns.c_str(), d); verify( idxNo == d->nIndexes ); try { idx.head.writing() = idx.idxInterface().addBucket(idx); n = addExistingToIndex(ns.c_str(), d, idx, idxNo); } catch(...) { if( cc().database() && nsdetails(ns.c_str()) == d ) { verify( idxNo == d->nIndexes ); done(ns.c_str(), d); } else { log() << "ERROR: db gone during bg index?" << endl; } throw; } verify( idxNo == d->nIndexes ); done(ns.c_str(), d); return n; }
uint64_t BtreeBasedBuilder::fastBuildIndex(const char* ns, NamespaceDetails* d, IndexDetails& idx, bool mayInterrupt, int idxNo) { CurOp * op = cc().curop(); Timer t; tlog(1) << "fastBuildIndex " << ns << ' ' << idx.info.obj().toString() << endl; bool dupsAllowed = !idx.unique() || ignoreUniqueIndex(idx); bool dropDups = idx.dropDups() || inDBRepair; BSONObj order = idx.keyPattern(); getDur().writingDiskLoc(idx.head).Null(); if ( logLevel > 1 ) printMemInfo( "before index start" ); /* get and sort all the keys ----- */ ProgressMeterHolder pm(op->setMessage("index: (1/3) external sort", "Index: (1/3) External Sort Progress", d->stats.nrecords, 10)); SortPhaseOne phase1; addKeysToPhaseOne(d, ns, idx, order, &phase1, d->stats.nrecords, pm.get(), mayInterrupt, idxNo ); pm.finished(); BSONObjExternalSorter& sorter = *(phase1.sorter); if( phase1.multi ) { d->setIndexIsMultikey(ns, idxNo); } if ( logLevel > 1 ) printMemInfo( "before final sort" ); phase1.sorter->sort( mayInterrupt ); if ( logLevel > 1 ) printMemInfo( "after final sort" ); LOG(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles() << " files " << " in " << t.seconds() << " secs" << endl; set<DiskLoc> dupsToDrop; /* build index --- */ if( idx.version() == 0 ) buildBottomUpPhases2And3<V0>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, &phase1, pm, t, mayInterrupt); else if( idx.version() == 1 ) buildBottomUpPhases2And3<V1>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, &phase1, pm, t, mayInterrupt); else verify(false); if( dropDups ) log() << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; BtreeBasedBuilder::doDropDups(ns, d, dupsToDrop, mayInterrupt); return phase1.n; }
bool run2DSphereGeoNear(const IndexDetails &id, BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result) { S2IndexType *idxType = static_cast<S2IndexType*>(id.getSpec().getType()); verify(&id == idxType->getDetails()); // We support both "num" and "limit" options to control limit int numWanted = 100; const char* limitName = cmdObj["num"].isNumber() ? "num" : "limit"; if (cmdObj[limitName].isNumber()) { numWanted = cmdObj[limitName].numberInt(); verify(numWanted >= 0); } // Don't count any docs twice. Isn't this default behavior? Or will yields screw this up? //bool uniqueDocs = false; //if (!cmdObj["uniqueDocs"].eoo()) uniqueDocs = cmdObj["uniqueDocs"].trueValue(); // Add the location information to each result as a field with name 'loc'. bool includeLocs = false; if (!cmdObj["includeLocs"].eoo()) includeLocs = cmdObj["includeLocs"].trueValue(); // The actual query point uassert(16551, "'near' param missing/invalid", !cmdObj["near"].eoo()); BSONObj nearObj = cmdObj["near"].embeddedObject(); // nearObj must be a point. uassert(16571, "near must be called with a point, called with " + nearObj.toString(), GeoParser::isPoint(nearObj)); // The non-near query part. BSONObj query; if (cmdObj["query"].isABSONObj()) query = cmdObj["query"].embeddedObject(); // The farthest away we're willing to look. double maxDistance = numeric_limits<double>::max(); if (cmdObj["maxDistance"].isNumber()) maxDistance = cmdObj["maxDistance"].number(); vector<string> geoFieldNames; idxType->getGeoFieldNames(&geoFieldNames); uassert(16552, "geoNear called but no indexed geo fields?", 1 == geoFieldNames.size()); QueryGeometry queryGeo(geoFieldNames[0]); uassert(16553, "geoNear couldn't parse geo: " + nearObj.toString(), queryGeo.parseFrom(nearObj)); vector<QueryGeometry> regions; regions.push_back(queryGeo); scoped_ptr<S2NearCursor> cursor(new S2NearCursor(idxType->keyPattern(), idxType->getDetails(), query, regions, idxType->getParams(), numWanted, maxDistance)); double totalDistance = 0; int results = 0; BSONObjBuilder resultBuilder(result.subarrayStart("results")); double farthestDist = 0; while (cursor->ok()) { double dist = cursor->currentDistance(); totalDistance += dist; if (dist > farthestDist) { farthestDist = dist; } BSONObjBuilder oneResultBuilder(resultBuilder.subobjStart(BSONObjBuilder::numStr(results))); oneResultBuilder.append("dis", dist); if (includeLocs) { BSONElementSet geoFieldElements; cursor->current().getFieldsDotted(geoFieldNames[0], geoFieldElements, false); for (BSONElementSet::iterator oi = geoFieldElements.begin(); oi != geoFieldElements.end(); ++oi) { if (oi->isABSONObj()) { oneResultBuilder.appendAs(*oi, "loc"); } } } oneResultBuilder.append("obj", cursor->current()); oneResultBuilder.done(); ++results; cursor->advance(); } resultBuilder.done(); BSONObjBuilder stats(result.subobjStart("stats")); stats.append("time", cc().curop()->elapsedMillis()); stats.appendNumber("nscanned", cursor->nscanned()); stats.append("avgDistance", totalDistance / results); stats.append("maxDistance", farthestDist); stats.done(); return true; }
unsigned long long addExistingToIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { bool dupsAllowed = !idx.unique(); bool dropDups = idx.dropDups(); ProgressMeter& progress = cc().curop()->setMessage( "bg index build" , d->stats.nrecords ); unsigned long long n = 0; unsigned long long numDropped = 0; auto_ptr<ClientCursor> cc; { shared_ptr<Cursor> c = theDataFileMgr.findAll(ns); cc.reset( new ClientCursor(QueryOption_NoCursorTimeout, c, ns) ); } while ( cc->ok() ) { BSONObj js = cc->current(); try { { if ( !dupsAllowed && dropDups ) { LastError::Disabled led( lastError.get() ); addKeysToIndex(ns, d, idxNo, js, cc->currLoc(), dupsAllowed); } else { addKeysToIndex(ns, d, idxNo, js, cc->currLoc(), dupsAllowed); } } cc->advance(); } catch( AssertionException& e ) { if( e.interrupted() ) { killCurrentOp.checkForInterrupt(); } if ( dropDups ) { DiskLoc toDelete = cc->currLoc(); bool ok = cc->advance(); ClientCursor::YieldData yieldData; massert( 16093, "after yield cursor deleted" , cc->prepareToYield( yieldData ) ); theDataFileMgr.deleteRecord( ns, toDelete.rec(), toDelete, false, true , true ); if( !cc->recoverFromYield( yieldData ) ) { cc.release(); if( !ok ) { /* we were already at the end. normal. */ } else { uasserted(12585, "cursor gone during bg index; dropDups"); } break; } numDropped++; } else { log() << "background addExistingToIndex exception " << e.what() << endl; throw; } } n++; progress.hit(); getDur().commitIfNeeded(); if ( cc->yieldSometimes( ClientCursor::WillNeed ) ) { progress.setTotalWhileRunning( d->stats.nrecords ); } else { cc.release(); uasserted(12584, "cursor gone during bg index"); break; } } progress.finished(); if ( dropDups ) log() << "\t backgroundIndexBuild dupsToDrop: " << numDropped << endl; return n; }
// throws DBException unsigned long long fastBuildIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx, int idxNo) { CurOp * op = cc().curop(); Timer t; tlog(1) << "fastBuildIndex " << ns << " idxNo:" << idxNo << ' ' << idx.info.obj().toString() << endl; bool dupsAllowed = !idx.unique(); bool dropDups = idx.dropDups() || inDBRepair; BSONObj order = idx.keyPattern(); getDur().writingDiskLoc(idx.head).Null(); if ( logLevel > 1 ) printMemInfo( "before index start" ); /* get and sort all the keys ----- */ ProgressMeterHolder pm( op->setMessage( "index: (1/3) external sort" , d->stats.nrecords , 10 ) ); SortPhaseOne _ours; SortPhaseOne *phase1 = precalced; if( phase1 == 0 ) { phase1 = &_ours; SortPhaseOne& p1 = *phase1; shared_ptr<Cursor> c = theDataFileMgr.findAll(ns); p1.sorter.reset( new BSONObjExternalSorter(idx.idxInterface(), order) ); p1.sorter->hintNumObjects( d->stats.nrecords ); const IndexSpec& spec = idx.getSpec(); while ( c->ok() ) { BSONObj o = c->current(); DiskLoc loc = c->currLoc(); p1.addKeys(spec, o, loc); c->advance(); pm.hit(); if ( logLevel > 1 && p1.n % 10000 == 0 ) { printMemInfo( "\t iterating objects" ); } }; } pm.finished(); BSONObjExternalSorter& sorter = *(phase1->sorter); if( phase1->multi ) d->setIndexIsMultikey(ns, idxNo); if ( logLevel > 1 ) printMemInfo( "before final sort" ); phase1->sorter->sort(); if ( logLevel > 1 ) printMemInfo( "after final sort" ); log(t.seconds() > 5 ? 0 : 1) << "\t external sort used : " << sorter.numFiles() << " files " << " in " << t.seconds() << " secs" << endl; set<DiskLoc> dupsToDrop; /* build index --- */ if( idx.version() == 0 ) buildBottomUpPhases2And3<V0>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, phase1, pm, t); else if( idx.version() == 1 ) buildBottomUpPhases2And3<V1>(dupsAllowed, idx, sorter, dropDups, dupsToDrop, op, phase1, pm, t); else verify(false); if( dropDups ) log() << "\t fastBuildIndex dupsToDrop:" << dupsToDrop.size() << endl; for( set<DiskLoc>::iterator i = dupsToDrop.begin(); i != dupsToDrop.end(); i++ ){ theDataFileMgr.deleteRecord( ns, i->rec(), *i, false /* cappedOk */ , true /* noWarn */ , isMaster( ns ) /* logOp */ ); getDur().commitIfNeeded(); } return phase1->n; }
unsigned long long addExistingToIndex(const char *ns, NamespaceDetails *d, IndexDetails& idx) { bool dupsAllowed = !idx.unique(); bool dropDups = idx.dropDups(); ProgressMeter& progress = cc().curop()->setMessage("bg index build", "Background Index Build Progress", d->numRecords()); unsigned long long n = 0; unsigned long long numDropped = 0; auto_ptr<Runner> runner(InternalPlanner::collectionScan(ns)); // We're not delegating yielding to the runner because we need to know when a yield // happens. RunnerYieldPolicy yieldPolicy; std::string idxName = idx.indexName(); int idxNo = IndexBuildsInProgress::get(ns, idxName); // After this yields in the loop, idx may point at a different index (if indexes get // flipped, see insert_makeIndex) or even an empty IndexDetails, so nothing below should // depend on idx. idxNo should be recalculated after each yield. BSONObj js; DiskLoc loc; while (Runner::RUNNER_ADVANCED == runner->getNext(&js, &loc)) { try { if ( !dupsAllowed && dropDups ) { LastError::Disabled led( lastError.get() ); addKeysToIndex(ns, d, idxNo, js, loc, dupsAllowed); } else { addKeysToIndex(ns, d, idxNo, js, loc, dupsAllowed); } } catch( AssertionException& e ) { if( e.interrupted() ) { killCurrentOp.checkForInterrupt(); } // TODO: Does exception really imply dropDups exception? if (dropDups) { bool runnerEOF = runner->isEOF(); runner->saveState(); theDataFileMgr.deleteRecord(d, ns, loc.rec(), loc, false, true, true); if (!runner->restoreState()) { // Runner got killed somehow. This probably shouldn't happen. if (runnerEOF) { // Quote: "We were already at the end. Normal. // TODO: Why is this normal? } else { uasserted(12585, "cursor gone during bg index; dropDups"); } break; } // We deleted a record, but we didn't actually yield the dblock. // TODO: Why did the old code assume we yielded the lock? numDropped++; } else { log() << "background addExistingToIndex exception " << e.what() << endl; throw; } } n++; progress.hit(); getDur().commitIfNeeded(); if (yieldPolicy.shouldYield()) { if (!yieldPolicy.yieldAndCheckIfOK(runner.get())) { uasserted(12584, "cursor gone during bg index"); break; } progress.setTotalWhileRunning( d->numRecords() ); // Recalculate idxNo if we yielded idxNo = IndexBuildsInProgress::get(ns, idxName); } } progress.finished(); if ( dropDups ) log() << "\t backgroundIndexBuild dupsToDrop: " << numDropped << endl; return n; }