long long runCount(OperationContext* txn, const string& ns, const BSONObj &cmd, string &err, int &errCode) { AutoGetCollectionForRead ctx(txn, ns); Collection* collection = ctx.getCollection(); if (NULL == collection) { err = "ns missing"; return -1; } const NamespaceString nss(ns); CountRequest request; CmdCount* countComm = static_cast<CmdCount*>(Command::findCommand("count")); Status parseStatus = countComm->parseRequest(nss.db().toString(), cmd, &request); if (!parseStatus.isOK()) { err = parseStatus.reason(); errCode = parseStatus.code(); return -1; } if (request.query.isEmpty()) { return applySkipLimit(collection->numRecords(txn), cmd); } PlanExecutor* rawExec; Status getExecStatus = getExecutorCount(txn, collection, request, PlanExecutor::YIELD_AUTO, &rawExec); if (!getExecStatus.isOK()) { err = getExecStatus.reason(); errCode = getExecStatus.code(); return -1; } scoped_ptr<PlanExecutor> exec(rawExec); // Store the plan summary string in CurOp. if (NULL != txn->getCurOp()) { txn->getCurOp()->debug().planSummary = Explain::getPlanSummary(exec.get()); } Status execPlanStatus = exec->executePlan(); if (!execPlanStatus.isOK()) { err = execPlanStatus.reason(); errCode = execPlanStatus.code(); return -2; } // Plan is done executing. We just need to pull the count out of the root stage. invariant(STAGE_COUNT == exec->getRootStage()->stageType()); CountStage* countStage = static_cast<CountStage*>(exec->getRootStage()); const CountStats* countStats = static_cast<const CountStats*>(countStage->getSpecificStats()); return countStats->nCounted; }
Runner::RunnerState MultiPlanRunner::getNext(BSONObj* objOut, DiskLoc* dlOut) { if (_killed) { return Runner::RUNNER_DEAD; } if (_failure) { return Runner::RUNNER_ERROR; } // If we haven't picked the best plan yet... if (NULL == _bestPlan) { if (!pickBestPlan(NULL, objOut)) { verify(_failure || _killed); if (_killed) { return Runner::RUNNER_DEAD; } if (_failure) { return Runner::RUNNER_ERROR; } } } // Look for an already produced result that provides the data the caller wants. while (!_alreadyProduced.empty()) { WorkingSetID id = _alreadyProduced.front(); _alreadyProduced.pop_front(); WorkingSetMember* member = _bestPlan->getWorkingSet()->get(id); // Note that this copies code from PlanExecutor. if (NULL != objOut) { if (WorkingSetMember::LOC_AND_IDX == member->state) { if (1 != member->keyData.size()) { _bestPlan->getWorkingSet()->free(id); // If the caller needs the key data and the WSM doesn't have it, drop the // result and carry on. continue; } *objOut = member->keyData[0].keyData; } else if (member->hasObj()) { *objOut = member->obj; } else { // If the caller needs an object and the WSM doesn't have it, drop and // try the next result. _bestPlan->getWorkingSet()->free(id); continue; } } if (NULL != dlOut) { if (member->hasLoc()) { *dlOut = member->loc; } else { // If the caller needs a DiskLoc and the WSM doesn't have it, drop and carry on. _bestPlan->getWorkingSet()->free(id); continue; } } // If we're here, the caller has all the data needed and we've set the out // parameters. Remove the result from the WorkingSet. _bestPlan->getWorkingSet()->free(id); return Runner::RUNNER_ADVANCED; } RunnerState state = _bestPlan->getNext(objOut, dlOut); if (Runner::RUNNER_ERROR == state && (NULL != _backupSolution)) { QLOG() << "Best plan errored out switching to backup\n"; // Uncache the bad solution if we fall back // on the backup solution. // // XXX: Instead of uncaching we should find a way for the // cached plan runner to fall back on a different solution // if the best solution fails. Alternatively we could try to // defer cache insertion to be after the first produced result. Database* db = cc().database(); verify(NULL != db); Collection* collection = db->getCollection(_query->ns()); verify(NULL != collection); PlanCache* cache = collection->infoCache()->getPlanCache(); cache->remove(*_query); _bestPlan.reset(_backupPlan); _backupPlan = NULL; _bestSolution.reset(_backupSolution); _backupSolution = NULL; _alreadyProduced = _backupAlreadyProduced; return getNext(objOut, dlOut); } if (NULL != _backupSolution && Runner::RUNNER_ADVANCED == state) { QLOG() << "Best plan had a blocking sort, became unblocked, deleting backup plan\n"; delete _backupSolution; delete _backupPlan; _backupSolution = NULL; _backupPlan = NULL; // TODO: free from WS? _backupAlreadyProduced.clear(); } return state; }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result, bool fromRepl) { const string ns = dbname + "." + cmdObj.firstElement().valuestr(); if (!cmdObj["start"].eoo()) { errmsg = "using deprecated 'start' argument to geoNear"; return false; } Client::ReadContext ctx(txn, ns); Database* db = ctx.ctx().db(); if ( !db ) { errmsg = "can't find ns"; return false; } Collection* collection = db->getCollection( txn, ns ); if ( !collection ) { errmsg = "can't find ns"; return false; } IndexCatalog* indexCatalog = collection->getIndexCatalog(); // cout << "raw cmd " << cmdObj.toString() << endl; // We seek to populate this. string nearFieldName; bool using2DIndex = false; if (!getFieldName(txn, collection, indexCatalog, &nearFieldName, &errmsg, &using2DIndex)) { return false; } PointWithCRS point; uassert(17304, "'near' field must be point", GeoParser::parseQueryPoint(cmdObj["near"], &point).isOK()); bool isSpherical = cmdObj["spherical"].trueValue(); if (!using2DIndex) { uassert(17301, "2dsphere index must have spherical: true", isSpherical); } // Build the $near expression for the query. BSONObjBuilder nearBob; if (isSpherical) { nearBob.append("$nearSphere", cmdObj["near"].Obj()); } else { nearBob.append("$near", cmdObj["near"].Obj()); } if (!cmdObj["maxDistance"].eoo()) { uassert(17299, "maxDistance must be a number",cmdObj["maxDistance"].isNumber()); nearBob.append("$maxDistance", cmdObj["maxDistance"].number()); } if (!cmdObj["minDistance"].eoo()) { uassert(17298, "minDistance doesn't work on 2d index", !using2DIndex); uassert(17300, "minDistance must be a number",cmdObj["minDistance"].isNumber()); nearBob.append("$minDistance", cmdObj["minDistance"].number()); } if (!cmdObj["uniqueDocs"].eoo()) { warning() << ns << ": ignoring deprecated uniqueDocs option in geoNear command"; } // And, build the full query expression. BSONObjBuilder queryBob; queryBob.append(nearFieldName, nearBob.obj()); if (!cmdObj["query"].eoo() && cmdObj["query"].isABSONObj()) { queryBob.appendElements(cmdObj["query"].Obj()); } BSONObj rewritten = queryBob.obj(); // cout << "rewritten query: " << rewritten.toString() << endl; int numWanted = 100; const char* limitName = !cmdObj["num"].eoo() ? "num" : "limit"; BSONElement eNumWanted = cmdObj[limitName]; if (!eNumWanted.eoo()) { uassert(17303, "limit must be number", eNumWanted.isNumber()); numWanted = eNumWanted.numberInt(); uassert(17302, "limit must be >=0", numWanted >= 0); } bool includeLocs = false; if (!cmdObj["includeLocs"].eoo()) { includeLocs = cmdObj["includeLocs"].trueValue(); } double distanceMultiplier = 1.0; BSONElement eDistanceMultiplier = cmdObj["distanceMultiplier"]; if (!eDistanceMultiplier.eoo()) { uassert(17296, "distanceMultiplier must be a number", eDistanceMultiplier.isNumber()); distanceMultiplier = eDistanceMultiplier.number(); uassert(17297, "distanceMultiplier must be non-negative", distanceMultiplier >= 0); } BSONObj projObj = BSON("$pt" << BSON("$meta" << LiteParsedQuery::metaGeoNearPoint) << "$dis" << BSON("$meta" << LiteParsedQuery::metaGeoNearDistance)); CanonicalQuery* cq; const NamespaceString nss(dbname); const WhereCallbackReal whereCallback(txn, nss.db()); if (!CanonicalQuery::canonicalize(ns, rewritten, BSONObj(), projObj, 0, numWanted, BSONObj(), &cq, whereCallback).isOK()) { errmsg = "Can't parse filter / create query"; return false; } PlanExecutor* rawExec; if (!getExecutor(txn, collection, cq, &rawExec, 0).isOK()) { errmsg = "can't get query runner"; return false; } auto_ptr<PlanExecutor> exec(rawExec); const ScopedExecutorRegistration safety(exec.get()); double totalDistance = 0; BSONObjBuilder resultBuilder(result.subarrayStart("results")); double farthestDist = 0; BSONObj currObj; int results = 0; while ((results < numWanted) && PlanExecutor::ADVANCED == exec->getNext(&currObj, NULL)) { // Come up with the correct distance. double dist = currObj["$dis"].number() * distanceMultiplier; totalDistance += dist; if (dist > farthestDist) { farthestDist = dist; } // Strip out '$dis' and '$pt' from the result obj. The rest gets added as 'obj' // in the command result. BSONObjIterator resIt(currObj); BSONObjBuilder resBob; while (resIt.more()) { BSONElement elt = resIt.next(); if (!mongoutils::str::equals("$pt", elt.fieldName()) && !mongoutils::str::equals("$dis", elt.fieldName())) { resBob.append(elt); } } BSONObj resObj = resBob.obj(); // Don't make a too-big result object. if (resultBuilder.len() + resObj.objsize()> BSONObjMaxUserSize) { warning() << "Too many geoNear results for query " << rewritten.toString() << ", truncating output."; break; } // Add the next result to the result builder. BSONObjBuilder oneResultBuilder( resultBuilder.subobjStart(BSONObjBuilder::numStr(results))); oneResultBuilder.append("dis", dist); if (includeLocs) { oneResultBuilder.appendAs(currObj["$pt"], "loc"); } oneResultBuilder.append("obj", resObj); oneResultBuilder.done(); ++results; } resultBuilder.done(); // Fill out the stats subobj. BSONObjBuilder stats(result.subobjStart("stats")); // Fill in nscanned from the explain. PlanSummaryStats summary; Explain::getSummaryStats(exec.get(), &summary); stats.appendNumber("nscanned", summary.totalKeysExamined); stats.appendNumber("objectsLoaded", summary.totalDocsExamined); stats.append("avgDistance", totalDistance / results); stats.append("maxDistance", farthestDist); stats.append("time", txn->getCurOp()->elapsedMillis()); stats.done(); return true; }
static inline void iterate(Collection& collection, Iterator begin, Iterator end, buffer_side_selector side, DistanceStrategy const& distance, JoinStrategy const& join_strategy, bool close = false) { output_point_type previous_p1, previous_p2; output_point_type first_p1, first_p2; bool first = true; Iterator it = begin; // We want to memorize the last vector too. typedef BOOST_TYPEOF(*it) point_type; point_type last_ip1, last_ip2; for (Iterator prev = it++; it != end; ++it) { if (! detail::equals::equals_point_point(*prev, *it)) { output_point_type p1, p2; last_ip1 = *prev; last_ip2 = *it; generate_side(*prev, *it, side, distance, p1, p2); std::vector<output_point_type> range_out; if (! first) { output_point_type p; segment_type s1(p1, p2); segment_type s2(previous_p1, previous_p2); if (line_line_intersection<output_point_type, segment_type>::apply(s1, s2, p)) { join_strategy.apply(p, *prev, previous_p2, p1, distance.apply(*prev, *it, side), range_out); } } else { first = false; first_p1 = p1; first_p2 = p2; } if (! range_out.empty()) { collection.add_piece(buffered_join, *prev, range_out); range_out.clear(); } collection.add_piece(buffered_segment, *prev, *it, p1, p2); previous_p1 = p1; previous_p2 = p2; prev = it; } } // Might be replaced by specialization if(boost::is_same<Tag, ring_tag>::value) { // Generate closing corner output_point_type p; segment_type s1(previous_p1, previous_p2); segment_type s2(first_p1, first_p2); if (line_line_intersection<output_point_type, segment_type>::apply(s1, s2, p)) { std::vector<output_point_type> range_out; join_strategy.apply(p, *begin, previous_p2, first_p1, distance.apply(*(end - 1), *begin, side), range_out); if (! range_out.empty()) { collection.add_piece(buffered_join, *begin, range_out); } } // Buffer is closed automatically by last closing corner (NOT FOR OPEN POLYGONS - TODO) } else if (boost::is_same<Tag, linestring_tag>::value) { // Assume flat-end-strategy for now // TODO fix this (approach) for one-side buffer (1.5 - -1.0) output_point_type rp1, rp2; generate_side(last_ip2, last_ip1, side == buffer_side_left ? buffer_side_right : buffer_side_left, distance, rp2, rp1); // For flat end: std::vector<output_point_type> range_out; range_out.push_back(previous_p2); if (close) { range_out.push_back(rp2); } collection.add_piece(buffered_flat_end, range_out); } }
Status Database::dropCollection( const StringData& fullns ) { LOG(1) << "dropCollection: " << fullns << endl; massertNamespaceNotIndex( fullns, "dropCollection" ); Collection* collection = getCollection( fullns ); if ( !collection ) { // collection doesn't exist return Status::OK(); } _initForWrites(); { NamespaceString s( fullns ); verify( s.db() == _name ); if( s.isSystem() ) { if( s.coll() == "system.profile" ) { if ( _profile != 0 ) return Status( ErrorCodes::IllegalOperation, "turn off profiling before dropping system.profile collection" ); } else { return Status( ErrorCodes::IllegalOperation, "can't drop system ns" ); } } } BackgroundOperation::assertNoBgOpInProgForNs( fullns ); audit::logDropCollection( currentClient.get(), fullns ); try { Status s = collection->getIndexCatalog()->dropAllIndexes( true ); if ( !s.isOK() ) { warning() << "could not drop collection, trying to drop indexes" << fullns << " because of " << s.toString(); return s; } } catch( DBException& e ) { stringstream ss; ss << "drop: dropIndexes for collection failed. cause: " << e.what(); ss << ". See http://dochub.mongodb.org/core/data-recovery"; warning() << ss.str() << endl; return Status( ErrorCodes::InternalError, ss.str() ); } verify( collection->_details->getTotalIndexCount() == 0 ); LOG(1) << "\t dropIndexes done" << endl; ClientCursor::invalidate( fullns ); Top::global.collectionDropped( fullns ); Status s = _dropNS( fullns ); _clearCollectionCache( fullns ); // we want to do this always if ( !s.isOK() ) return s; DEV { // check all index collection entries are gone string nstocheck = fullns.toString() + ".$"; scoped_lock lk( _collectionLock ); for ( CollectionMap::const_iterator i = _collections.begin(); i != _collections.end(); ++i ) { string temp = i->first; if ( temp.find( nstocheck ) != 0 ) continue; log() << "after drop, bad cache entries for: " << fullns << " have " << temp; verify(0); } } return Status::OK(); }
bool MigrationSourceManager::clone(OperationContext* txn, string& errmsg, BSONObjBuilder& result) { ElapsedTracker tracker(internalQueryExecYieldIterations, internalQueryExecYieldPeriodMS); int allocSize = 0; { AutoGetCollectionForRead ctx(txn, _getNS()); stdx::lock_guard<stdx::mutex> sl(_mutex); if (!_active) { errmsg = "not active"; return false; } Collection* collection = ctx.getCollection(); if (!collection) { errmsg = str::stream() << "collection " << _ns << " does not exist"; return false; } allocSize = std::min( BSONObjMaxUserSize, static_cast<int>((12 + collection->averageObjectSize(txn)) * cloneLocsRemaining())); } bool isBufferFilled = false; BSONArrayBuilder clonedDocsArrayBuilder(allocSize); while (!isBufferFilled) { AutoGetCollectionForRead ctx(txn, _getNS()); stdx::lock_guard<stdx::mutex> sl(_mutex); if (!_active) { errmsg = "not active"; return false; } // TODO: fix SERVER-16540 race Collection* collection = ctx.getCollection(); if (!collection) { errmsg = str::stream() << "collection " << _ns << " does not exist"; return false; } stdx::lock_guard<stdx::mutex> lk(_cloneLocsMutex); std::set<RecordId>::iterator cloneLocsIter = _cloneLocs.begin(); for (; cloneLocsIter != _cloneLocs.end(); ++cloneLocsIter) { if (tracker.intervalHasElapsed()) // should I yield? break; RecordId recordId = *cloneLocsIter; Snapshotted<BSONObj> doc; if (!collection->findDoc(txn, recordId, &doc)) { // doc was deleted continue; } // Use the builder size instead of accumulating 'doc's size so that we take // into consideration the overhead of BSONArray indices, and *always* // append one doc. if (clonedDocsArrayBuilder.arrSize() != 0 && (clonedDocsArrayBuilder.len() + doc.value().objsize() + 1024) > BSONObjMaxUserSize) { isBufferFilled = true; // break out of outer while loop break; } clonedDocsArrayBuilder.append(doc.value()); } _cloneLocs.erase(_cloneLocs.begin(), cloneLocsIter); // Note: must be holding _cloneLocsMutex, don't move this inside while condition! if (_cloneLocs.empty()) { break; } } result.appendArray("objects", clonedDocsArrayBuilder.arr()); return true; }
void testLink() { SearchCreateJob *create = new SearchCreateJob( "linkTestFolder", "dummy query", this ); AKVERIFYEXEC( create ); CollectionFetchJob *list = new CollectionFetchJob( Collection( 1 ), CollectionFetchJob::Recursive, this ); AKVERIFYEXEC( list ); Collection col; foreach ( const Collection &c, list->collections() ) { if ( c.name() == "linkTestFolder" ) { col = c; } } QVERIFY( col.isValid() ); Item::List items; items << Item( 3 ) << Item( 4 ) << Item( 6 ); Monitor *monitor = new Monitor( this ); monitor->setCollectionMonitored( col ); monitor->itemFetchScope().fetchFullPayload(); qRegisterMetaType<Akonadi::Collection>(); qRegisterMetaType<Akonadi::Item>(); QSignalSpy lspy( monitor, SIGNAL(itemLinked(Akonadi::Item,Akonadi::Collection)) ); QSignalSpy uspy( monitor, SIGNAL(itemUnlinked(Akonadi::Item,Akonadi::Collection)) ); QVERIFY( lspy.isValid() ); QVERIFY( uspy.isValid() ); LinkJob *link = new LinkJob( col, items, this ); AKVERIFYEXEC( link ); QTest::qWait( 1000 ); QVERIFY( uspy.isEmpty() ); QCOMPARE( lspy.count(), 3 ); QList<QVariant> arg = lspy.takeFirst(); Item item = arg.at( 0 ).value<Item>(); QCOMPARE( item.mimeType(), QString::fromLatin1( "application/octet-stream" ) ); QVERIFY( item.hasPayload<QByteArray>() ); lspy.clear(); ItemFetchJob *fetch = new ItemFetchJob( col ); AKVERIFYEXEC( fetch ); QCOMPARE( fetch->items().count(), 3 ); foreach ( const Item &item, fetch->items() ) { QVERIFY( items.contains( item ) ); } UnlinkJob *unlink = new UnlinkJob( col, items, this ); AKVERIFYEXEC( unlink ); QTest::qWait( 1000 ); QVERIFY( lspy.isEmpty() ); QCOMPARE( uspy.count(), 3 ); fetch = new ItemFetchJob( col ); AKVERIFYEXEC( fetch ); QCOMPARE( fetch->items().count(), 0 ); }
std::string DBHashCmd::hashCollection(OperationContext* opCtx, Database* db, const std::string& fullCollectionName, bool* fromCache) { stdx::unique_lock<stdx::mutex> cachedHashedLock(_cachedHashedMutex, stdx::defer_lock); if (isCachable(fullCollectionName)) { cachedHashedLock.lock(); string hash = _cachedHashed[fullCollectionName]; if (hash.size() > 0) { *fromCache = true; return hash; } } *fromCache = false; Collection* collection = db->getCollection(fullCollectionName); if (!collection) return ""; IndexDescriptor* desc = collection->getIndexCatalog()->findIdIndex(opCtx); unique_ptr<PlanExecutor> exec; if (desc) { exec.reset(InternalPlanner::indexScan(opCtx, collection, desc, BSONObj(), BSONObj(), false, InternalPlanner::FORWARD, InternalPlanner::IXSCAN_FETCH)); } else if (collection->isCapped()) { exec.reset(InternalPlanner::collectionScan(opCtx, fullCollectionName, collection)); } else { log() << "can't find _id index for: " << fullCollectionName << endl; return "no _id _index"; } md5_state_t st; md5_init(&st); long long n = 0; PlanExecutor::ExecState state; BSONObj c; verify(NULL != exec.get()); while (PlanExecutor::ADVANCED == (state = exec->getNext(&c, NULL))) { md5_append(&st, (const md5_byte_t*)c.objdata(), c.objsize()); n++; } if (PlanExecutor::IS_EOF != state) { warning() << "error while hashing, db dropped? ns=" << fullCollectionName << endl; } md5digest d; md5_finish(&st, d); string hash = digestToString(d); if (cachedHashedLock.owns_lock()) { _cachedHashed[fullCollectionName] = hash; } return hash; }
PlanStage* buildStages(const string& ns, const QuerySolutionNode* root, WorkingSet* ws) { if (STAGE_COLLSCAN == root->getType()) { const CollectionScanNode* csn = static_cast<const CollectionScanNode*>(root); CollectionScanParams params; params.ns = csn->name; params.tailable = csn->tailable; params.direction = (csn->direction == 1) ? CollectionScanParams::FORWARD : CollectionScanParams::BACKWARD; return new CollectionScan(params, ws, csn->filter.get()); } else if (STAGE_IXSCAN == root->getType()) { const IndexScanNode* ixn = static_cast<const IndexScanNode*>(root); // // XXX XXX // Given that this grabs data from the catalog, we must do this inside of a lock. // We should change this to take a (ns, index key pattern) pair so that the params // don't involve any on-disk data, just descriptions thereof. // XXX XXX // Database* db = cc().database(); Collection* collection = db ? db->getCollection( ns ) : NULL; if (NULL == collection) { warning() << "Can't ixscan null ns " << ns << endl; return NULL; } NamespaceDetails* nsd = collection->details(); int idxNo = nsd->findIndexByKeyPattern(ixn->indexKeyPattern); if (-1 == idxNo) { warning() << "Can't find idx " << ixn->indexKeyPattern.toString() << "in ns " << ns << endl; return NULL; } IndexScanParams params; params.descriptor = collection->getIndexCatalog()->getDescriptor( idxNo ); params.bounds = ixn->bounds; params.direction = ixn->direction; params.limit = ixn->limit; return new IndexScan(params, ws, ixn->filter.get()); } else if (STAGE_FETCH == root->getType()) { const FetchNode* fn = static_cast<const FetchNode*>(root); PlanStage* childStage = buildStages(ns, fn->children[0], ws); if (NULL == childStage) { return NULL; } return new FetchStage(ws, childStage, fn->filter.get()); } else if (STAGE_SORT == root->getType()) { const SortNode* sn = static_cast<const SortNode*>(root); PlanStage* childStage = buildStages(ns, sn->children[0], ws); if (NULL == childStage) { return NULL; } SortStageParams params; params.pattern = sn->pattern; params.query = sn->query; return new SortStage(params, ws, childStage); } else if (STAGE_PROJECTION == root->getType()) { const ProjectionNode* pn = static_cast<const ProjectionNode*>(root); PlanStage* childStage = buildStages(ns, pn->children[0], ws); if (NULL == childStage) { return NULL; } return new ProjectionStage(pn->projection, pn->fullExpression, ws, childStage); } else if (STAGE_LIMIT == root->getType()) { const LimitNode* ln = static_cast<const LimitNode*>(root); PlanStage* childStage = buildStages(ns, ln->children[0], ws); if (NULL == childStage) { return NULL; } return new LimitStage(ln->limit, ws, childStage); } else if (STAGE_SKIP == root->getType()) { const SkipNode* sn = static_cast<const SkipNode*>(root); PlanStage* childStage = buildStages(ns, sn->children[0], ws); if (NULL == childStage) { return NULL; } return new SkipStage(sn->skip, ws, childStage); } else if (STAGE_AND_HASH == root->getType()) { const AndHashNode* ahn = static_cast<const AndHashNode*>(root); auto_ptr<AndHashStage> ret(new AndHashStage(ws, ahn->filter.get())); for (size_t i = 0; i < ahn->children.size(); ++i) { PlanStage* childStage = buildStages(ns, ahn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_OR == root->getType()) { const OrNode * orn = static_cast<const OrNode*>(root); auto_ptr<OrStage> ret(new OrStage(ws, orn->dedup, orn->filter.get())); for (size_t i = 0; i < orn->children.size(); ++i) { PlanStage* childStage = buildStages(ns, orn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_AND_SORTED == root->getType()) { const AndSortedNode* asn = static_cast<const AndSortedNode*>(root); auto_ptr<AndSortedStage> ret(new AndSortedStage(ws, asn->filter.get())); for (size_t i = 0; i < asn->children.size(); ++i) { PlanStage* childStage = buildStages(ns, asn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_SORT_MERGE == root->getType()) { const MergeSortNode* msn = static_cast<const MergeSortNode*>(root); MergeSortStageParams params; params.dedup = msn->dedup; params.pattern = msn->sort; auto_ptr<MergeSortStage> ret(new MergeSortStage(params, ws)); for (size_t i = 0; i < msn->children.size(); ++i) { PlanStage* childStage = buildStages(ns, msn->children[i], ws); if (NULL == childStage) { return NULL; } ret->addChild(childStage); } return ret.release(); } else if (STAGE_GEO_2D == root->getType()) { const Geo2DNode* node = static_cast<const Geo2DNode*>(root); TwoDParams params; params.gq = node->gq; params.filter = node->filter.get(); params.indexKeyPattern = node->indexKeyPattern; params.ns = ns; return new TwoD(params, ws); } else if (STAGE_GEO_NEAR_2D == root->getType()) { const GeoNear2DNode* node = static_cast<const GeoNear2DNode*>(root); TwoDNearParams params; params.nearQuery = node->nq; params.ns = ns; params.indexKeyPattern = node->indexKeyPattern; params.filter = node->filter.get(); params.numWanted = node->numWanted; // XXX XXX where do we grab this from?? the near query...modify geo parser... :( params.uniqueDocs = false; // XXX XXX where do we grab this from?? the near query...modify geo parser... :( return new TwoDNear(params, ws); } else if (STAGE_GEO_NEAR_2DSPHERE == root->getType()) { const GeoNear2DSphereNode* node = static_cast<const GeoNear2DSphereNode*>(root); return new S2NearStage(ns, node->indexKeyPattern, node->nq, node->baseBounds, node->filter.get(), ws); } else if (STAGE_TEXT == root->getType()) { const TextNode* node = static_cast<const TextNode*>(root); Database* db = cc().database(); Collection* collection = db ? db->getCollection( ns ) : NULL; if (NULL == collection) { return NULL; } vector<int> idxMatches; collection->details()->findIndexByType("text", idxMatches); if (1 != idxMatches.size()) { return NULL; } IndexDescriptor* index = collection->getIndexCatalog()->getDescriptor(idxMatches[0]); auto_ptr<FTSAccessMethod> fam(new FTSAccessMethod(index)); TextStageParams params(fam->getSpec()); params.ns = ns; params.index = index; params.spec = fam->getSpec(); params.limit = node->_numWanted; Status s = fam->getSpec().getIndexPrefix(BSONObj(), ¶ms.indexPrefix); if (!s.isOK()) { return NULL; } string language = ("" == node->_language ? fam->getSpec().defaultLanguage().str() : node->_language); FTSQuery ftsq; Status parseStatus = ftsq.parse(node->_query, language); if (!parseStatus.isOK()) { return NULL; } params.query = ftsq; return new TextStage(params, ws, node->filter.get()); } else if (STAGE_SHARDING_FILTER == root->getType()) { const ShardingFilterNode* fn = static_cast<const ShardingFilterNode*>(root); PlanStage* childStage = buildStages(ns, fn->children[0], ws); if (NULL == childStage) { return NULL; } return new ShardFilterStage(ns, ws, childStage); } else { stringstream ss; root->appendToString(&ss, 0); warning() << "Could not build exec tree for node " << ss.str() << endl; return NULL; } }
type end(Collection const& collection) { return collection.elements() + collection.used_memory(); }
bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int, string& errmsg, BSONObjBuilder& result) { if (!cmdObj["start"].eoo()) { errmsg = "using deprecated 'start' argument to geoNear"; return false; } const NamespaceString nss(parseNs(dbname, cmdObj)); AutoGetCollectionForRead ctx(txn, nss); Collection* collection = ctx.getCollection(); if (!collection) { errmsg = "can't find ns"; return false; } IndexCatalog* indexCatalog = collection->getIndexCatalog(); // cout << "raw cmd " << cmdObj.toString() << endl; // We seek to populate this. string nearFieldName; bool using2DIndex = false; if (!getFieldName(txn, collection, indexCatalog, &nearFieldName, &errmsg, &using2DIndex)) { return false; } PointWithCRS point; uassert(17304, "'near' field must be point", GeoParser::parseQueryPoint(cmdObj["near"], &point).isOK()); bool isSpherical = cmdObj["spherical"].trueValue(); if (!using2DIndex) { uassert(17301, "2dsphere index must have spherical: true", isSpherical); } // Build the $near expression for the query. BSONObjBuilder nearBob; if (isSpherical) { nearBob.append("$nearSphere", cmdObj["near"].Obj()); } else { nearBob.append("$near", cmdObj["near"].Obj()); } if (!cmdObj["maxDistance"].eoo()) { uassert(17299, "maxDistance must be a number", cmdObj["maxDistance"].isNumber()); nearBob.append("$maxDistance", cmdObj["maxDistance"].number()); } if (!cmdObj["minDistance"].eoo()) { uassert(17298, "minDistance doesn't work on 2d index", !using2DIndex); uassert(17300, "minDistance must be a number", cmdObj["minDistance"].isNumber()); nearBob.append("$minDistance", cmdObj["minDistance"].number()); } if (!cmdObj["uniqueDocs"].eoo()) { warning() << nss << ": ignoring deprecated uniqueDocs option in geoNear command"; } // And, build the full query expression. BSONObjBuilder queryBob; queryBob.append(nearFieldName, nearBob.obj()); if (!cmdObj["query"].eoo() && cmdObj["query"].isABSONObj()) { queryBob.appendElements(cmdObj["query"].Obj()); } BSONObj rewritten = queryBob.obj(); // Extract the collation, if it exists. // TODO SERVER-23473: Pass this collation spec object down so that it can be converted into // a CollatorInterface. BSONObj collation; { BSONElement collationElt; Status collationEltStatus = bsonExtractTypedField(cmdObj, "collation", BSONType::Object, &collationElt); if (!collationEltStatus.isOK() && (collationEltStatus != ErrorCodes::NoSuchKey)) { return appendCommandStatus(result, collationEltStatus); } if (collationEltStatus.isOK()) { collation = collationElt.Obj(); } } long long numWanted = 100; const char* limitName = !cmdObj["num"].eoo() ? "num" : "limit"; BSONElement eNumWanted = cmdObj[limitName]; if (!eNumWanted.eoo()) { uassert(17303, "limit must be number", eNumWanted.isNumber()); numWanted = eNumWanted.safeNumberLong(); uassert(17302, "limit must be >=0", numWanted >= 0); } bool includeLocs = false; if (!cmdObj["includeLocs"].eoo()) { includeLocs = cmdObj["includeLocs"].trueValue(); } double distanceMultiplier = 1.0; BSONElement eDistanceMultiplier = cmdObj["distanceMultiplier"]; if (!eDistanceMultiplier.eoo()) { uassert(17296, "distanceMultiplier must be a number", eDistanceMultiplier.isNumber()); distanceMultiplier = eDistanceMultiplier.number(); uassert(17297, "distanceMultiplier must be non-negative", distanceMultiplier >= 0); } BSONObj projObj = BSON("$pt" << BSON("$meta" << LiteParsedQuery::metaGeoNearPoint) << "$dis" << BSON("$meta" << LiteParsedQuery::metaGeoNearDistance)); const ExtensionsCallbackReal extensionsCallback(txn, &nss); auto statusWithCQ = CanonicalQuery::canonicalize( nss, rewritten, BSONObj(), projObj, 0, numWanted, BSONObj(), extensionsCallback); if (!statusWithCQ.isOK()) { errmsg = "Can't parse filter / create query"; return false; } unique_ptr<CanonicalQuery> cq = std::move(statusWithCQ.getValue()); // Prevent chunks from being cleaned up during yields - this allows us to only check the // version on initial entry into geoNear. RangePreserver preserver(collection); auto statusWithPlanExecutor = getExecutor(txn, collection, std::move(cq), PlanExecutor::YIELD_AUTO, 0); if (!statusWithPlanExecutor.isOK()) { errmsg = "can't get query executor"; return false; } unique_ptr<PlanExecutor> exec = std::move(statusWithPlanExecutor.getValue()); auto curOp = CurOp::get(txn); { stdx::lock_guard<Client>(*txn->getClient()); curOp->setPlanSummary_inlock(Explain::getPlanSummary(exec.get())); } double totalDistance = 0; BSONObjBuilder resultBuilder(result.subarrayStart("results")); double farthestDist = 0; BSONObj currObj; long long results = 0; PlanExecutor::ExecState state; while (PlanExecutor::ADVANCED == (state = exec->getNext(&currObj, NULL))) { // Come up with the correct distance. double dist = currObj["$dis"].number() * distanceMultiplier; totalDistance += dist; if (dist > farthestDist) { farthestDist = dist; } // Strip out '$dis' and '$pt' from the result obj. The rest gets added as 'obj' // in the command result. BSONObjIterator resIt(currObj); BSONObjBuilder resBob; while (resIt.more()) { BSONElement elt = resIt.next(); if (!mongoutils::str::equals("$pt", elt.fieldName()) && !mongoutils::str::equals("$dis", elt.fieldName())) { resBob.append(elt); } } BSONObj resObj = resBob.obj(); // Don't make a too-big result object. if (resultBuilder.len() + resObj.objsize() > BSONObjMaxUserSize) { warning() << "Too many geoNear results for query " << rewritten.toString() << ", truncating output."; break; } // Add the next result to the result builder. BSONObjBuilder oneResultBuilder( resultBuilder.subobjStart(BSONObjBuilder::numStr(results))); oneResultBuilder.append("dis", dist); if (includeLocs) { oneResultBuilder.appendAs(currObj["$pt"], "loc"); } oneResultBuilder.append("obj", resObj); oneResultBuilder.done(); ++results; // Break if we have the number of requested result documents. if (results >= numWanted) { break; } } resultBuilder.done(); // Return an error if execution fails for any reason. if (PlanExecutor::FAILURE == state || PlanExecutor::DEAD == state) { log() << "Plan executor error during geoNear command: " << PlanExecutor::statestr(state) << ", stats: " << Explain::getWinningPlanStats(exec.get()); return appendCommandStatus(result, Status(ErrorCodes::OperationFailed, str::stream() << "Executor error during geoNear command: " << WorkingSetCommon::toStatusString(currObj))); } PlanSummaryStats summary; Explain::getSummaryStats(*exec, &summary); // Fill out the stats subobj. BSONObjBuilder stats(result.subobjStart("stats")); stats.appendNumber("nscanned", summary.totalKeysExamined); stats.appendNumber("objectsLoaded", summary.totalDocsExamined); if (results > 0) { stats.append("avgDistance", totalDistance / results); } stats.append("maxDistance", farthestDist); stats.append("time", curOp->elapsedMillis()); stats.done(); collection->infoCache()->notifyOfQuery(txn, summary.indexesUsed); curOp->debug().setPlanSummaryMetrics(summary); if (curOp->shouldDBProfile(curOp->elapsedMillis())) { BSONObjBuilder execStatsBob; Explain::getWinningPlanStats(exec.get(), &execStatsBob); curOp->debug().execStats.set(execStatsBob.obj()); } return true; }
type begin(Collection const& collection) { return collection.elements(); }
virtual bool run(OperationContext* txn, const string& db, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result) { const std::string ns = parseNs(db, cmdObj); if (nsToCollectionSubstring(ns).empty()) { errmsg = "missing collection name"; return false; } NamespaceString nss(ns); intrusive_ptr<ExpressionContext> pCtx = new ExpressionContext(txn, nss); pCtx->tempDir = storageGlobalParams.dbpath + "/_tmp"; /* try to parse the command; if this fails, then we didn't run */ intrusive_ptr<Pipeline> pPipeline = Pipeline::parseCommand(errmsg, cmdObj, pCtx); if (!pPipeline.get()) return false; // This is outside of the if block to keep the object alive until the pipeline is finished. BSONObj parsed; if (kDebugBuild && !pPipeline->isExplain() && !pCtx->inShard) { // Make sure all operations round-trip through Pipeline::toBson() correctly by // reparsing every command in debug builds. This is important because sharded // aggregations rely on this ability. Skipping when inShard because this has // already been through the transformation (and this unsets pCtx->inShard). parsed = pPipeline->serialize().toBson(); pPipeline = Pipeline::parseCommand(errmsg, parsed, pCtx); verify(pPipeline); } unique_ptr<ClientCursorPin> pin; // either this OR the exec will be non-null unique_ptr<PlanExecutor> exec; { // This will throw if the sharding version for this connection is out of date. The // lock must be held continuously from now until we have we created both the output // ClientCursor and the input executor. This ensures that both are using the same // sharding version that we synchronize on here. This is also why we always need to // create a ClientCursor even when we aren't outputting to a cursor. See the comment // on ShardFilterStage for more details. AutoGetCollectionForRead ctx(txn, nss.ns()); Collection* collection = ctx.getCollection(); // This does mongod-specific stuff like creating the input PlanExecutor and adding // it to the front of the pipeline if needed. std::shared_ptr<PlanExecutor> input = PipelineD::prepareCursorSource(txn, collection, pPipeline, pCtx); pPipeline->stitch(); // Create the PlanExecutor which returns results from the pipeline. The WorkingSet // ('ws') and the PipelineProxyStage ('proxy') will be owned by the created // PlanExecutor. unique_ptr<WorkingSet> ws(new WorkingSet()); unique_ptr<PipelineProxyStage> proxy( new PipelineProxyStage(pPipeline, input, ws.get())); auto statusWithPlanExecutor = (NULL == collection) ? PlanExecutor::make( txn, std::move(ws), std::move(proxy), nss.ns(), PlanExecutor::YIELD_MANUAL) : PlanExecutor::make( txn, std::move(ws), std::move(proxy), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); exec = std::move(statusWithPlanExecutor.getValue()); if (!collection && input) { // If we don't have a collection, we won't be able to register any executors, so // make sure that the input PlanExecutor (likely wrapping an EOFStage) doesn't // need to be registered. invariant(!input->collection()); } if (collection) { const bool isAggCursor = true; // enable special locking behavior ClientCursor* cursor = new ClientCursor(collection->getCursorManager(), exec.release(), nss.ns(), txn->recoveryUnit()->isReadingFromMajorityCommittedSnapshot(), 0, cmdObj.getOwned(), isAggCursor); pin.reset(new ClientCursorPin(collection->getCursorManager(), cursor->cursorid())); // Don't add any code between here and the start of the try block. } // At this point, it is safe to release the collection lock. // - In the case where we have a collection: we will need to reacquire the // collection lock later when cleaning up our ClientCursorPin. // - In the case where we don't have a collection: our PlanExecutor won't be // registered, so it will be safe to clean it up outside the lock. invariant(NULL == exec.get() || NULL == exec->collection()); } try { // Unless set to true, the ClientCursor created above will be deleted on block exit. bool keepCursor = false; const bool isCursorCommand = !cmdObj["cursor"].eoo(); // If both explain and cursor are specified, explain wins. if (pPipeline->isExplain()) { result << "stages" << Value(pPipeline->writeExplainOps()); } else if (isCursorCommand) { keepCursor = handleCursorCommand(txn, nss.ns(), pin.get(), pin ? pin->c()->getExecutor() : exec.get(), cmdObj, result); } else { pPipeline->run(result); } // Clean up our ClientCursorPin, if needed. We must reacquire the collection lock // in order to do so. if (pin) { // We acquire locks here with DBLock and CollectionLock instead of using // AutoGetCollectionForRead. AutoGetCollectionForRead will throw if the // sharding version is out of date, and we don't care if the sharding version // has changed. Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS); Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS); if (keepCursor) { pin->release(); } else { pin->deleteUnderlying(); } } } catch (...) { // On our way out of scope, we clean up our ClientCursorPin if needed. if (pin) { Lock::DBLock dbLock(txn->lockState(), nss.db(), MODE_IS); Lock::CollectionLock collLock(txn->lockState(), nss.ns(), MODE_IS); pin->deleteUnderlying(); } throw; } // Any code that needs the cursor pinned must be inside the try block, above. return true; }
Status appendCollectionStorageStats(OperationContext* opCtx, const NamespaceString& nss, const BSONObj& param, BSONObjBuilder* result) { int scale = 1; if (param["scale"].isNumber()) { scale = param["scale"].numberInt(); if (scale < 1) { return {ErrorCodes::BadValue, "scale has to be >= 1"}; } } else if (param["scale"].trueValue()) { return {ErrorCodes::BadValue, "scale has to be a number >= 1"}; } bool verbose = param["verbose"].trueValue(); AutoGetCollectionForReadCommand ctx(opCtx, nss); Collection* collection = ctx.getCollection(); // Will be set if present if (!ctx.getDb() || !collection) { result->appendNumber("size", 0); result->appendNumber("count", 0); result->appendNumber("storageSize", 0); result->append("nindexes", 0); result->appendNumber("totalIndexSize", 0); result->append("indexDetails", BSONObj()); result->append("indexSizes", BSONObj()); std::string errmsg = !(ctx.getDb()) ? "Database [" + nss.db().toString() + "] not found." : "Collection [" + nss.toString() + "] not found."; return {ErrorCodes::NamespaceNotFound, errmsg}; } long long size = collection->dataSize(opCtx) / scale; result->appendNumber("size", size); long long numRecords = collection->numRecords(opCtx); result->appendNumber("count", numRecords); if (numRecords) result->append("avgObjSize", collection->averageObjectSize(opCtx)); RecordStore* recordStore = collection->getRecordStore(); result->appendNumber( "storageSize", static_cast<long long>(recordStore->storageSize(opCtx, result, verbose ? 1 : 0)) / scale); recordStore->appendCustomStats(opCtx, result, scale); IndexCatalog* indexCatalog = collection->getIndexCatalog(); result->append("nindexes", indexCatalog->numIndexesReady(opCtx)); BSONObjBuilder indexDetails; std::unique_ptr<IndexCatalog::IndexIterator> it = indexCatalog->getIndexIterator(opCtx, false); while (it->more()) { const IndexCatalogEntry* entry = it->next(); const IndexDescriptor* descriptor = entry->descriptor(); const IndexAccessMethod* iam = entry->accessMethod(); invariant(iam); BSONObjBuilder bob; if (iam->appendCustomStats(opCtx, &bob, scale)) { indexDetails.append(descriptor->indexName(), bob.obj()); } } result->append("indexDetails", indexDetails.obj()); BSONObjBuilder indexSizes; long long indexSize = collection->getIndexSize(opCtx, &indexSizes, scale); result->appendNumber("totalIndexSize", indexSize / scale); result->append("indexSizes", indexSizes.obj()); return Status::OK(); }
virtual bool run( const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) { // --- parse NamespaceString ns( dbname, cmdObj[name].String() ); Status status = userAllowedWriteNS( ns ); if ( !status.isOK() ) return appendCommandStatus( result, status ); if ( cmdObj["indexes"].type() != Array ) { errmsg = "indexes has to be an array"; result.append( "cmdObj", cmdObj ); return false; } std::vector<BSONObj> specs; { BSONObjIterator i( cmdObj["indexes"].Obj() ); while ( i.more() ) { BSONElement e = i.next(); if ( e.type() != Object ) { errmsg = "everything in indexes has to be an Object"; result.append( "cmdObj", cmdObj ); return false; } specs.push_back( e.Obj() ); } } if ( specs.size() == 0 ) { errmsg = "no indexes to add"; return false; } // check specs for ( size_t i = 0; i < specs.size(); i++ ) { BSONObj spec = specs[i]; if ( spec["ns"].eoo() ) { spec = _addNsToSpec( ns, spec ); specs[i] = spec; } if ( spec["ns"].type() != String ) { errmsg = "spec has no ns"; result.append( "spec", spec ); return false; } if ( ns != spec["ns"].String() ) { errmsg = "namespace mismatch"; result.append( "spec", spec ); return false; } } { // We first take a read lock to see if we need to do anything // as many calls are ensureIndex (and hence no-ops), this is good so its a shared // lock for common calls. We only take write lock if needed. // Note: createIndexes command does not currently respect shard versioning. Client::ReadContext readContext( ns, storageGlobalParams.dbpath, false /* doVersion */ ); const Collection* collection = readContext.ctx().db()->getCollection( ns.ns() ); if ( collection ) { for ( size_t i = 0; i < specs.size(); i++ ) { BSONObj spec = specs[i]; StatusWith<BSONObj> statusWithSpec = collection->getIndexCatalog()->prepareSpecForCreate( spec ); status = statusWithSpec.getStatus(); if ( status.code() == ErrorCodes::IndexAlreadyExists ) { specs.erase( specs.begin() + i ); i--; continue; } if ( !status.isOK() ) return appendCommandStatus( result, status ); } if ( specs.size() == 0 ) { result.append( "numIndexesBefore", collection->getIndexCatalog()->numIndexesTotal() ); result.append( "note", "all indexes already exist" ); return true; } // need to create index } } // now we know we have to create index(es) // Note: createIndexes command does not currently respect shard versioning. Client::WriteContext writeContext( ns.ns(), storageGlobalParams.dbpath, false /* doVersion */ ); Database* db = writeContext.ctx().db(); Collection* collection = db->getCollection( ns.ns() ); result.appendBool( "createdCollectionAutomatically", collection == NULL ); if ( !collection ) { collection = db->createCollection( ns.ns() ); invariant( collection ); } result.append( "numIndexesBefore", collection->getIndexCatalog()->numIndexesTotal() ); for ( size_t i = 0; i < specs.size(); i++ ) { BSONObj spec = specs[i]; if ( spec["unique"].trueValue() ) { status = checkUniqueIndexConstraints( ns.ns(), spec["key"].Obj() ); if ( !status.isOK() ) { appendCommandStatus( result, status ); return false; } } status = collection->getIndexCatalog()->createIndex( spec, true ); if ( status.code() == ErrorCodes::IndexAlreadyExists ) { if ( !result.hasField( "note" ) ) result.append( "note", "index already exists" ); continue; } if ( !status.isOK() ) { appendCommandStatus( result, status ); return false; } } result.append( "numIndexesAfter", collection->getIndexCatalog()->numIndexesTotal() ); if ( !fromRepl ) { string cmdNs = ns.getCommandNS(); logOp( "c", cmdNs.c_str(), cmdObj ); } return true; }
void IndexRebuilder::checkNS(const std::list<std::string>& nsToCheck) { bool firstTime = true; for (std::list<std::string>::const_iterator it = nsToCheck.begin(); it != nsToCheck.end(); ++it) { string ns = *it; LOG(3) << "IndexRebuilder::checkNS: " << ns; // This write lock is held throughout the index building process // for this namespace. Client::WriteContext ctx(ns); DurTransaction txn; // XXX??? Collection* collection = ctx.ctx().db()->getCollection( ns ); if ( collection == NULL ) continue; IndexCatalog* indexCatalog = collection->getIndexCatalog(); if ( collection->ns().isOplog() && indexCatalog->numIndexesTotal() > 0 ) { warning() << ns << " had illegal indexes, removing"; indexCatalog->dropAllIndexes(&txn, true); continue; } vector<BSONObj> indexesToBuild = indexCatalog->getAndClearUnfinishedIndexes(&txn); // The indexes have now been removed from system.indexes, so the only record is // in-memory. If there is a journal commit between now and when insert() rewrites // the entry and the db crashes before the new system.indexes entry is journalled, // the index will be lost forever. Thus, we're assuming no journaling will happen // between now and the entry being re-written. if ( indexesToBuild.size() == 0 ) { continue; } log() << "found " << indexesToBuild.size() << " interrupted index build(s) on " << ns; if (firstTime) { log() << "note: restart the server with --noIndexBuildRetry to skip index rebuilds"; firstTime = false; } if (!serverGlobalParams.indexBuildRetry) { log() << " not rebuilding interrupted indexes"; continue; } // TODO: these can/should/must be done in parallel for ( size_t i = 0; i < indexesToBuild.size(); i++ ) { BSONObj indexObj = indexesToBuild[i]; log() << "going to rebuild: " << indexObj; Status status = indexCatalog->createIndex(&txn, indexObj, false); if ( !status.isOK() ) { log() << "building index failed: " << status.toString() << " index: " << indexObj; } } } }
bool MigrationSourceManager::storeCurrentLocs(OperationContext* txn, long long maxChunkSize, string& errmsg, BSONObjBuilder& result) { AutoGetCollectionForRead ctx(txn, _getNS()); Collection* collection = ctx.getCollection(); if (!collection) { errmsg = "ns not found, should be impossible"; return false; } // Allow multiKey based on the invariant that shard keys must be single-valued. Therefore, any // multi-key index prefixed by shard key cannot be multikey over the shard key fields. IndexDescriptor* idx = collection->getIndexCatalog()->findShardKeyPrefixedIndex(txn, _shardKeyPattern, false); // requireSingleKey if (idx == NULL) { errmsg = str::stream() << "can't find index with prefix " << _shardKeyPattern << " in storeCurrentLocs for " << _ns; return false; } // Assume both min and max non-empty, append MinKey's to make them fit chosen index BSONObj min; BSONObj max; KeyPattern kp(idx->keyPattern()); { // It's alright not to lock _mutex all the way through based on the assumption that this is // only called by the main thread that drives the migration and only it can start and stop // the current migration. stdx::lock_guard<stdx::mutex> sl(_mutex); invariant(_deleteNotifyExec.get() == NULL); unique_ptr<WorkingSet> ws = stdx::make_unique<WorkingSet>(); unique_ptr<DeleteNotificationStage> dns = stdx::make_unique<DeleteNotificationStage>(this); // Takes ownership of 'ws' and 'dns'. auto statusWithPlanExecutor = PlanExecutor::make( txn, std::move(ws), std::move(dns), collection, PlanExecutor::YIELD_MANUAL); invariant(statusWithPlanExecutor.isOK()); _deleteNotifyExec = std::move(statusWithPlanExecutor.getValue()); _deleteNotifyExec->registerExec(); min = Helpers::toKeyFormat(kp.extendRangeBound(_min, false)); max = Helpers::toKeyFormat(kp.extendRangeBound(_max, false)); } unique_ptr<PlanExecutor> exec( InternalPlanner::indexScan(txn, collection, idx, min, max, false)); // We can afford to yield here because any change to the base data that we might miss is already // being queued and will migrate in the 'transferMods' stage. exec->setYieldPolicy(PlanExecutor::YIELD_AUTO); // Use the average object size to estimate how many objects a full chunk would carry do that // while traversing the chunk's range using the sharding index, below there's a fair amount of // slack before we determine a chunk is too large because object sizes will vary. unsigned long long maxRecsWhenFull; long long avgRecSize; const long long totalRecs = collection->numRecords(txn); if (totalRecs > 0) { avgRecSize = collection->dataSize(txn) / totalRecs; maxRecsWhenFull = maxChunkSize / avgRecSize; maxRecsWhenFull = std::min((unsigned long long)(Chunk::MaxObjectPerChunk + 1), 130 * maxRecsWhenFull / 100 /* slack */); } else { avgRecSize = 0; maxRecsWhenFull = Chunk::MaxObjectPerChunk + 1; } // Do a full traversal of the chunk and don't stop even if we think it is a large chunk we want // the number of records to better report, in that case bool isLargeChunk = false; unsigned long long recCount = 0; RecordId recordId; while (PlanExecutor::ADVANCED == exec->getNext(NULL, &recordId)) { if (!isLargeChunk) { stdx::lock_guard<stdx::mutex> lk(_cloneLocsMutex); _cloneLocs.insert(recordId); } if (++recCount > maxRecsWhenFull) { isLargeChunk = true; // Continue on despite knowing that it will fail, just to get the correct value for // recCount } } exec.reset(); if (isLargeChunk) { stdx::lock_guard<stdx::mutex> sl(_mutex); warning() << "cannot move chunk: the maximum number of documents for a chunk is " << maxRecsWhenFull << " , the maximum chunk size is " << maxChunkSize << " , average document size is " << avgRecSize << ". Found " << recCount << " documents in chunk " << " ns: " << _ns << " " << _min << " -> " << _max << migrateLog; result.appendBool("chunkTooBig", true); result.appendNumber("estimatedChunkSize", (long long)(recCount * avgRecSize)); errmsg = "chunk too big to move"; return false; } log() << "moveChunk number of documents: " << cloneLocsRemaining() << migrateLog; txn->recoveryUnit()->abandonSnapshot(); return true; }
virtual bool errmsgRun(OperationContext* opCtx, const string& db, const BSONObj& cmdObj, string& errmsg, BSONObjBuilder& result) { NamespaceString nss = CommandHelpers::parseNsCollectionRequired(db, cmdObj); repl::ReplicationCoordinator* replCoord = repl::ReplicationCoordinator::get(opCtx); if (replCoord->getMemberState().primary() && !cmdObj["force"].trueValue()) { errmsg = "will not run compact on an active replica set primary as this is a slow blocking " "operation. use force:true to force"; return false; } if (!nss.isNormal()) { errmsg = "bad namespace name"; return false; } if (nss.isSystem()) { // items in system.* cannot be moved as there might be pointers to them // i.e. system.indexes entries are pointed to from NamespaceDetails errmsg = "can't compact a system namespace"; return false; } CompactOptions compactOptions; if (cmdObj["preservePadding"].trueValue()) { compactOptions.paddingMode = CompactOptions::PRESERVE; if (cmdObj.hasElement("paddingFactor") || cmdObj.hasElement("paddingBytes")) { errmsg = "cannot mix preservePadding and paddingFactor|paddingBytes"; return false; } } else if (cmdObj.hasElement("paddingFactor") || cmdObj.hasElement("paddingBytes")) { compactOptions.paddingMode = CompactOptions::MANUAL; if (cmdObj.hasElement("paddingFactor")) { compactOptions.paddingFactor = cmdObj["paddingFactor"].Number(); if (compactOptions.paddingFactor < 1 || compactOptions.paddingFactor > 4) { errmsg = "invalid padding factor"; return false; } } if (cmdObj.hasElement("paddingBytes")) { compactOptions.paddingBytes = cmdObj["paddingBytes"].numberInt(); if (compactOptions.paddingBytes < 0 || compactOptions.paddingBytes > (1024 * 1024)) { errmsg = "invalid padding bytes"; return false; } } } if (cmdObj.hasElement("validate")) compactOptions.validateDocuments = cmdObj["validate"].trueValue(); AutoGetDb autoDb(opCtx, db, MODE_X); Database* const collDB = autoDb.getDb(); Collection* collection = collDB ? collDB->getCollection(opCtx, nss) : nullptr; auto view = collDB && !collection ? collDB->getViewCatalog()->lookup(opCtx, nss.ns()) : nullptr; // If db/collection does not exist, short circuit and return. if (!collDB || !collection) { if (view) uasserted(ErrorCodes::CommandNotSupportedOnView, "can't compact a view"); else uasserted(ErrorCodes::NamespaceNotFound, "collection does not exist"); } OldClientContext ctx(opCtx, nss.ns()); BackgroundOperation::assertNoBgOpInProgForNs(nss.ns()); log() << "compact " << nss.ns() << " begin, options: " << compactOptions; StatusWith<CompactStats> status = collection->compact(opCtx, &compactOptions); uassertStatusOK(status.getStatus()); if (status.getValue().corruptDocuments > 0) result.append("invalidObjects", status.getValue().corruptDocuments); log() << "compact " << nss.ns() << " end"; return true; }
explicit BeginEnd(Collection _collection, const TransformT& _transform = TransformT() ): b(_collection.begin()), e(_collection.end()), transform(_transform) {}
TermPositionsQueue::TermPositionsQueue(Collection<TermPositionsPtr> termPositions) : PriorityQueue<TermPositionsPtr>(termPositions.size()) { this->termPositions = termPositions; }
void S2NearStage::init() { _initted = true; // The field we're near-ing from is the n-th field. Figure out what that 'n' is. We // put the cover for the search annulus in this spot in the bounds. _nearFieldIndex = 0; BSONObjIterator specIt(_params.indexKeyPattern); while (specIt.more()) { if (specIt.next().fieldName() == _params.nearQuery.field) { break; } ++_nearFieldIndex; } verify(_nearFieldIndex < _params.indexKeyPattern.nFields()); // FLAT implies the distances are in radians. Convert to meters. if (FLAT == _params.nearQuery.centroid.crs) { _params.nearQuery.minDistance *= kRadiusOfEarthInMeters; _params.nearQuery.maxDistance *= kRadiusOfEarthInMeters; } // Make sure distances are sane. Possibly redundant given the checking during parsing. _minDistance = max(0.0, _params.nearQuery.minDistance); _maxDistance = min(M_PI * kRadiusOfEarthInMeters, _params.nearQuery.maxDistance); _minDistance = min(_minDistance, _maxDistance); // We grow _outerRadius in nextAnnulus() below. _innerRadius = _outerRadius = _minDistance; _outerRadiusInclusive = false; // Grab the IndexDescriptor. Database* db = cc().database(); if (!db) { _failed = true; return; } Collection* collection = db->getCollection(_params.ns); if (!collection) { _failed = true; return; } _descriptor = collection->getIndexCatalog()->findIndexByKeyPattern(_params.indexKeyPattern); if (NULL == _descriptor) { _failed = true; return; } // The user can override this so we honor it. We could ignore it though -- it's just used // to set _radiusIncrement, not to do any covering. int finestIndexedLevel; BSONElement fl = _descriptor->infoObj()["finestIndexedLevel"]; if (fl.isNumber()) { finestIndexedLevel = fl.numberInt(); } else { finestIndexedLevel = S2::kAvgEdge.GetClosestLevel(500.0 / kRadiusOfEarthInMeters); } // Start with a conservative _radiusIncrement. When we're done searching a shell we // increment the two radii by this. _radiusIncrement = 5 * S2::kAvgEdge.GetValue(finestIndexedLevel) * kRadiusOfEarthInMeters; }
mongo::Status mongo::cloneCollectionAsCapped(OperationContext* opCtx, Database* db, const std::string& shortFrom, const std::string& shortTo, long long size, bool temp) { NamespaceString fromNss(db->name(), shortFrom); NamespaceString toNss(db->name(), shortTo); Collection* fromCollection = db->getCollection(opCtx, fromNss); if (!fromCollection) { if (db->getViewCatalog()->lookup(opCtx, fromNss.ns())) { return Status(ErrorCodes::CommandNotSupportedOnView, str::stream() << "cloneCollectionAsCapped not supported for views: " << fromNss.ns()); } return Status(ErrorCodes::NamespaceNotFound, str::stream() << "source collection " << fromNss.ns() << " does not exist"); } if (fromNss.isDropPendingNamespace()) { return Status(ErrorCodes::NamespaceNotFound, str::stream() << "source collection " << fromNss.ns() << " is currently in a drop-pending state."); } if (db->getCollection(opCtx, toNss)) { return Status(ErrorCodes::NamespaceExists, str::stream() << "cloneCollectionAsCapped failed - destination collection " << toNss.ns() << " already exists. source collection: " << fromNss.ns()); } // create new collection { auto options = fromCollection->getCatalogEntry()->getCollectionOptions(opCtx); // The capped collection will get its own new unique id, as the conversion isn't reversible, // so it can't be rolled back. options.uuid.reset(); options.capped = true; options.cappedSize = size; if (temp) options.temp = true; BSONObjBuilder cmd; cmd.append("create", toNss.coll()); cmd.appendElements(options.toBSON()); Status status = createCollection(opCtx, toNss.db().toString(), cmd.done()); if (!status.isOK()) return status; } Collection* toCollection = db->getCollection(opCtx, toNss); invariant(toCollection); // we created above // how much data to ignore because it won't fit anyway // datasize and extentSize can't be compared exactly, so add some padding to 'size' long long allocatedSpaceGuess = std::max(static_cast<long long>(size * 2), static_cast<long long>(toCollection->getRecordStore()->storageSize(opCtx) * 2)); long long excessSize = fromCollection->dataSize(opCtx) - allocatedSpaceGuess; auto exec = InternalPlanner::collectionScan(opCtx, fromNss.ns(), fromCollection, PlanExecutor::WRITE_CONFLICT_RETRY_ONLY, InternalPlanner::FORWARD); Snapshotted<BSONObj> objToClone; RecordId loc; PlanExecutor::ExecState state = PlanExecutor::FAILURE; // suppress uninitialized warnings DisableDocumentValidation validationDisabler(opCtx); int retries = 0; // non-zero when retrying our last document. while (true) { if (!retries) { state = exec->getNextSnapshotted(&objToClone, &loc); } switch (state) { case PlanExecutor::IS_EOF: return Status::OK(); case PlanExecutor::ADVANCED: { if (excessSize > 0) { // 4x is for padding, power of 2, etc... excessSize -= (4 * objToClone.value().objsize()); continue; } break; } default: // Unreachable as: // 1) We require a read lock (at a minimum) on the "from" collection // and won't yield, preventing collection drop and PlanExecutor::DEAD // 2) PlanExecutor::FAILURE is only returned on PlanStage::FAILURE. The // CollectionScan PlanStage does not have a FAILURE scenario. // 3) All other PlanExecutor states are handled above MONGO_UNREACHABLE; } try { // Make sure we are working with the latest version of the document. if (objToClone.snapshotId() != opCtx->recoveryUnit()->getSnapshotId() && !fromCollection->findDoc(opCtx, loc, &objToClone)) { // doc was deleted so don't clone it. retries = 0; continue; } WriteUnitOfWork wunit(opCtx); OpDebug* const nullOpDebug = nullptr; uassertStatusOK(toCollection->insertDocument( opCtx, InsertStatement(objToClone.value()), nullOpDebug, true)); wunit.commit(); // Go to the next document retries = 0; } catch (const WriteConflictException&) { CurOp::get(opCtx)->debug().additiveMetrics.incrementWriteConflicts(1); retries++; // logAndBackoff expects this to be 1 on first call. WriteConflictException::logAndBackoff(retries, "cloneCollectionAsCapped", fromNss.ns()); // Can't use writeConflictRetry since we need to save/restore exec around call to // abandonSnapshot. exec->saveState(); opCtx->recoveryUnit()->abandonSnapshot(); auto restoreStatus = exec->restoreState(); // Handles any WCEs internally. if (!restoreStatus.isOK()) { return restoreStatus; } } } MONGO_UNREACHABLE; }
TEST(YAMLUtilsTest, ParameterSpec) { Collection<ParameterSpec> ps; ps.add( "int32Param", ParameterSpec( "Int32 scalar parameter", // description NTA_BasicType_Int32, 1, // elementCount "", // constraints "32", // defaultValue ParameterSpec::ReadWriteAccess)); ps.add( "uint32Param", ParameterSpec( "UInt32 scalar parameter", // description NTA_BasicType_UInt32, 1, // elementCount "", // constraints "33", // defaultValue ParameterSpec::ReadWriteAccess)); ps.add( "int64Param", ParameterSpec( "Int64 scalar parameter", // description NTA_BasicType_Int64, 1, // elementCount "", // constraints "64", // defaultValue ParameterSpec::ReadWriteAccess)); ps.add( "uint64Param", ParameterSpec( "UInt64 scalar parameter", // description NTA_BasicType_UInt64, 1, // elementCount "", // constraints "65", // defaultValue ParameterSpec::ReadWriteAccess)); ps.add( "real32Param", ParameterSpec( "Real32 scalar parameter", // description NTA_BasicType_Real32, 1, // elementCount "", // constraints "32.1", // defaultValue ParameterSpec::ReadWriteAccess)); ps.add( "real64Param", ParameterSpec( "Real64 scalar parameter", // description NTA_BasicType_Real64, 1, // elementCount "", // constraints "64.1", // defaultValue ParameterSpec::ReadWriteAccess)); ps.add( "real32ArrayParam", ParameterSpec( "int32 array parameter", NTA_BasicType_Real32, 0, // array "", "", ParameterSpec::ReadWriteAccess)); ps.add( "int64ArrayParam", ParameterSpec( "int64 array parameter", NTA_BasicType_Int64, 0, // array "", "", ParameterSpec::ReadWriteAccess)); ps.add( "computeCallback", ParameterSpec( "address of a function that is called at every compute()", NTA_BasicType_Handle, 1, "", "", // handles must not have a default value ParameterSpec::ReadWriteAccess)); ps.add( "stringParam", ParameterSpec( "string parameter", NTA_BasicType_Byte, 0, // length=0 required for strings "", "default value", ParameterSpec::ReadWriteAccess)); ps.add( "boolParam", ParameterSpec( "bool parameter", NTA_BasicType_Bool, 1, "", "false", ParameterSpec::ReadWriteAccess)); NTA_DEBUG << "ps count: " << ps.getCount(); ValueMap vm = YAMLUtils::toValueMap("", ps); EXPECT_TRUE(vm.contains("int32Param")) << "assertion vm.contains(\"int32Param\") failed at " << __FILE__ << ":" << __LINE__ ; ASSERT_EQ((Int32)32, vm.getScalarT<Int32>("int32Param")); EXPECT_TRUE(vm.contains("boolParam")) << "assertion vm.contains(\"boolParam\") failed at " << __FILE__ << ":" << __LINE__ ; ASSERT_EQ(false, vm.getScalarT<bool>("boolParam")); // disabled until we fix default string params // TEST(vm.contains("stringParam")); // EXPECT_STREQ("default value", vm.getString("stringParam")->c_str()); // Test error message in case of invalid parameter with and without nodeType and regionName try { YAMLUtils::toValueMap("{ blah: True }", ps, "nodeType", "regionName"); } catch (nupic::Exception & e) { std::string s("Unknown parameter 'blah' for region 'regionName'"); EXPECT_TRUE(std::string(e.getMessage()).find(s) == 0) << "assertion std::string(e.getMessage()).find(s) == 0 failed at " << __FILE__ << ":" << __LINE__ ; } try { YAMLUtils::toValueMap("{ blah: True }", ps); } catch (nupic::Exception & e) { std::string s("Unknown parameter 'blah'\nValid"); EXPECT_TRUE(std::string(e.getMessage()).find(s) == 0) << "assertion std::string(e.getMessage()).find(s) == 0 failed at " << __FILE__ << ":" << __LINE__ ; } }
//-------------------------------------------------------------------------------------------------- /// Merge a collection of drawable geometry objects into this drawable /// /// \param drawableGeos Collection of drawable geometries to be merged /// /// A new vertex array is created with the incoming vertex arrays appended to the existing contents. /// Primitives are copied and indices updated. /// /// \warning All other vertex attribute data such as normals, texture coordinates etc will be set to NULL //-------------------------------------------------------------------------------------------------- void DrawableGeo::mergeInto(const Collection<DrawableGeo>& drawableGeos) { size_t totalVertexCount = m_vertexBundle->vertexCount(); size_t i; for (i = 0; i < drawableGeos.size(); i++) { const DrawableGeo* geo = drawableGeos[i].p(); totalVertexCount += geo->vertexCount(); } // Nothing to do if no existing vertices and no new vertices if (totalVertexCount == 0) { return; } // Create a new vertex array and copy data from our array cref<Vec3fArray> oldVertexArray = m_vertexBundle->vertexArray(); ref<Vec3fArray> newVertexArr = new Vec3fArray(totalVertexCount); size_t currentVertexIndex = 0; if (oldVertexArray.notNull() && oldVertexArray->size() > 0) { newVertexArr->copyData(*oldVertexArray, oldVertexArray->size(), 0, 0); currentVertexIndex = oldVertexArray->size(); } // Then copy from the other drawable geos for (i = 0; i < drawableGeos.size(); i++) { const DrawableGeo* otherDrawable = drawableGeos[i].p(); size_t j = 0; for (j = 0; j < otherDrawable->primitiveSetCount(); j++) { const PrimitiveSet* primSet = otherDrawable->primitiveSet(j); CVF_ASSERT(primSet); ref<UIntArray> indices = new UIntArray; indices->resize(primSet->indexCount()); uint k; for (k = 0; k < primSet->indexCount(); k++) { uint val = primSet->index(k); val += static_cast<uint>(currentVertexIndex); indices->set(k, val); } ref<PrimitiveSetIndexedUInt> prim = new PrimitiveSetIndexedUInt(primSet->primitiveType()); prim->setIndices(indices.p()); m_primitiveSets.push_back(prim.p()); } const Vec3fArray* otherVertices = otherDrawable->vertexArray(); CVF_ASSERT(otherVertices); // Append other drawable vertices vertex array and update vertex index newVertexArr->copyData(otherVertices->ptr(), otherVertices->size(), currentVertexIndex); currentVertexIndex += otherVertices->size(); } // Clear all vertex attributes and set new vertex array m_vertexBundle->clear(); m_vertexBundle->setVertexArray(newVertexArr.p()); recomputeBoundingBox(); }
IndexDescriptor* getIndex(Database* db, const BSONObj& obj) { Collection* collection = db->getCollection(&_txn, ns()); return collection->getIndexCatalog()->findIndexByKeyPattern(&_txn, obj); }
//-------------------------------------------------------------------------------------------------- /// Convert indexed primitive set to unsigned short if possible /// /// \return The number of primitive sets that was converted. //-------------------------------------------------------------------------------------------------- int DrawableGeo::convertFromUIntToUShort() { int numConverted = 0; Collection<PrimitiveSet> myCollection; size_t numPrimitiveObjects = m_primitiveSets.size(); size_t iPrim; for (iPrim = 0; iPrim < numPrimitiveObjects; iPrim++) { PrimitiveSet* primitive = m_primitiveSets[iPrim].p(); PrimitiveSetIndexedUInt* primitiveSetUInt = dynamic_cast<PrimitiveSetIndexedUInt*>(primitive); PrimitiveSetIndexedUIntScoped* primitiveSetUIntScoped = dynamic_cast<PrimitiveSetIndexedUIntScoped*>(primitive); if (vertexCount() < std::numeric_limits<ushort>::max() && primitiveSetUInt) { const UIntArray* uiIndices = primitiveSetUInt->indices(); ref<UShortArray> indices = new UShortArray; if (uiIndices) { size_t uiArraySize = uiIndices->size(); indices->resize(uiArraySize); size_t j; for (j = 0; j < uiArraySize; j++) { indices->set(j, static_cast<ushort>(uiIndices->get(j))); } } ref<PrimitiveSetIndexedUShort> prim = new PrimitiveSetIndexedUShort(primitive->primitiveType()); prim->setIndices(indices.p()); myCollection.push_back(prim.p()); numConverted++; } else if (vertexCount() < std::numeric_limits<ushort>::max() && primitiveSetUIntScoped) { const UIntArray* uiIndices = primitiveSetUIntScoped->indices(); size_t uiArraySize = uiIndices->size(); ref<UShortArray> indices = new UShortArray; indices->resize(uiArraySize); size_t j; for (j = 0; j < uiArraySize; j++) { indices->set(j, static_cast<ushort>(uiIndices->get(j))); } ref<PrimitiveSetIndexedUShortScoped> prim = new PrimitiveSetIndexedUShortScoped(primitive->primitiveType()); prim->setIndices(indices.p(), primitiveSetUIntScoped->scopeFirstElement(), primitiveSetUIntScoped->scopeElementCount()); myCollection.push_back(prim.p()); numConverted++; } else { myCollection.push_back(primitive); } } m_primitiveSets.clear(); m_primitiveSets = myCollection; return numConverted; }
bool MultiPlanRunner::pickBestPlan(size_t* out, BSONObj* objOut) { static const int timesEachPlanIsWorked = 100; // Run each plan some number of times. for (int i = 0; i < timesEachPlanIsWorked; ++i) { bool moreToDo = workAllPlans(objOut); if (!moreToDo) { break; } } if (_failure || _killed) { return false; } // After picking best plan, ranking will own plan stats from // candidate solutions (winner and losers). std::auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision); size_t bestChild = PlanRanker::pickBestPlan(_candidates, ranking.get()); // Copy candidate order. We will need this to sort candidate stats for explain // after transferring ownership of 'ranking' to plan cache. std::vector<size_t> candidateOrder = ranking->candidateOrder; // Run the best plan. Store it. _bestPlan.reset(new PlanExecutor(_candidates[bestChild].ws, _candidates[bestChild].root)); _bestPlan->setYieldPolicy(_policy); _alreadyProduced = _candidates[bestChild].results; _bestSolution.reset(_candidates[bestChild].solution); QLOG() << "Winning solution:\n" << _bestSolution->toString() << endl; size_t backupChild = bestChild; if (_bestSolution->hasBlockingStage && (0 == _alreadyProduced.size())) { QLOG() << "Winner has blocking stage, looking for backup plan...\n"; for (size_t i = 0; i < _candidates.size(); ++i) { if (!_candidates[i].solution->hasBlockingStage) { QLOG() << "Candidate " << i << " is backup child\n"; backupChild = i; _backupSolution = _candidates[i].solution; _backupAlreadyProduced = _candidates[i].results; _backupPlan = new PlanExecutor(_candidates[i].ws, _candidates[i].root); _backupPlan->setYieldPolicy(_policy); break; } } } // Store the choice we just made in the cache. We do // not cache the query if: // 1) The query is of a type that is not safe to cache, or // 2) the winning plan did not actually produce any results, // without hitting EOF. In this case, we have no information to // suggest that this plan is good. const PlanStageStats* bestStats = ranking->stats.vector()[0]; if (PlanCache::shouldCacheQuery(*_query) && (!_alreadyProduced.empty() || bestStats->common.isEOF)) { Database* db = cc().database(); verify(NULL != db); Collection* collection = db->getCollection(_query->ns()); verify(NULL != collection); PlanCache* cache = collection->infoCache()->getPlanCache(); // Create list of candidate solutions for the cache with // the best solution at the front. std::vector<QuerySolution*> solutions; // Generate solutions and ranking decisions sorted by score. for (size_t orderingIndex = 0; orderingIndex < candidateOrder.size(); ++orderingIndex) { // index into candidates/ranking size_t i = candidateOrder[orderingIndex]; solutions.push_back(_candidates[i].solution); } // Check solution cache data. Do not add to cache if // we have any invalid SolutionCacheData data. // XXX: One known example is 2D queries bool validSolutions = true; for (size_t i = 0; i < solutions.size(); ++i) { if (NULL == solutions[i]->cacheData.get()) { QLOG() << "Not caching query because this solution has no cache data: " << solutions[i]->toString(); validSolutions = false; break; } } if (validSolutions) { cache->add(*_query, solutions, ranking.release()); } } // Clear out the candidate plans, leaving only stats as we're all done w/them. // Traverse candidate plans in order or score for (size_t orderingIndex = 0; orderingIndex < candidateOrder.size(); ++orderingIndex) { // index into candidates/ranking size_t i = candidateOrder[orderingIndex]; if (i == bestChild) { continue; } if (i == backupChild) { continue; } delete _candidates[i].solution; // Remember the stats for the candidate plan because we always show it on an // explain. (The {verbose:false} in explain() is client-side trick; we always // generate a "verbose" explain.) PlanStageStats* stats = _candidates[i].root->getStats(); if (stats) { _candidateStats.push_back(stats); } delete _candidates[i].root; // ws must die after the root. delete _candidates[i].ws; } _candidates.clear(); if (NULL != out) { *out = bestChild; } return true; }
void DisjunctionMaxQuery::add(Collection<QueryPtr> disjuncts) { this->disjuncts.addAll(disjuncts.begin(), disjuncts.end()); }
virtual bool run(OperationContext* txn, const string& dbname, BSONObj& cmdObj, int options, string& errmsg, BSONObjBuilder& result, bool fromRepl = false ) { NamespaceString ns( dbname, cmdObj[name].String() ); Client::ReadContext ctx(ns.ns()); Database* db = ctx.ctx().db(); Collection* collection = db->getCollection( ns ); if ( !collection ) return appendCommandStatus( result, Status( ErrorCodes::NamespaceNotFound, str::stream() << "ns does not exist: " << ns.ns() ) ); size_t numCursors = static_cast<size_t>( cmdObj["numCursors"].numberInt() ); if ( numCursors == 0 || numCursors > 10000 ) return appendCommandStatus( result, Status( ErrorCodes::BadValue, str::stream() << "numCursors has to be between 1 and 10000" << " was: " << numCursors ) ); OwnedPointerVector<RecordIterator> iterators(collection->getManyIterators()); if (iterators.size() < numCursors) { numCursors = iterators.size(); } OwnedPointerVector<MultiIteratorRunner> runners; for ( size_t i = 0; i < numCursors; i++ ) { runners.push_back(new MultiIteratorRunner(ns.ns(), collection)); } // transfer iterators to runners using a round-robin distribution. // TODO consider using a common work queue once invalidation issues go away. for (size_t i = 0; i < iterators.size(); i++) { runners[i % runners.size()]->addIterator(iterators.releaseAt(i)); } { BSONArrayBuilder bucketsBuilder; for (size_t i = 0; i < runners.size(); i++) { // transfer ownership of a runner to the ClientCursor (which manages its own // lifetime). ClientCursor* cc = new ClientCursor( collection, runners.releaseAt(i) ); // we are mimicking the aggregation cursor output here // that is why there are ns, ok and empty firstBatch BSONObjBuilder threadResult; { BSONObjBuilder cursor; cursor.appendArray( "firstBatch", BSONObj() ); cursor.append( "ns", ns ); cursor.append( "id", cc->cursorid() ); threadResult.append( "cursor", cursor.obj() ); } threadResult.appendBool( "ok", 1 ); bucketsBuilder.append( threadResult.obj() ); } result.appendArray( "cursors", bucketsBuilder.obj() ); } return true; }
/** * Called by db/instance.cpp. This is the getMore entry point. */ Message getMore(OperationContext* opCtx, const char* ns, int ntoreturn, long long cursorid, bool* exhaust, bool* isCursorAuthorized) { invariant(ntoreturn >= 0); CurOp& curOp = *CurOp::get(opCtx); curOp.ensureStarted(); // For testing, we may want to fail if we receive a getmore. if (MONGO_FAIL_POINT(failReceivedGetmore)) { MONGO_UNREACHABLE; } *exhaust = false; const NamespaceString nss(ns); // Cursors come in one of two flavors: // - Cursors owned by the collection cursor manager, such as those generated via the find // command. For these cursors, we hold the appropriate collection lock for the duration of the // getMore using AutoGetCollectionForRead. // - Cursors owned by the global cursor manager, such as those generated via the aggregate // command. These cursors either hold no collection state or manage their collection state // internally, so we acquire no locks. // // While we only need to acquire locks in the case of a cursor which is *not* globally owned, we // need to create an AutoStatsTracker in either case. This is responsible for updating // statistics in CurOp and Top. We avoid using AutoGetCollectionForReadCommand because we may // need to drop and reacquire locks when the cursor is awaitData, but we don't want to update // the stats twice. // // Note that we acquire our locks before our ClientCursorPin, in order to ensure that the pin's // destructor is called before the lock's destructor (if there is one) so that the cursor // cleanup can occur under the lock. UninterruptibleLockGuard noInterrupt(opCtx->lockState()); boost::optional<AutoGetCollectionForRead> readLock; boost::optional<AutoStatsTracker> statsTracker; CursorManager* cursorManager; if (CursorManager::isGloballyManagedCursor(cursorid)) { cursorManager = CursorManager::getGlobalCursorManager(); if (boost::optional<NamespaceString> nssForCurOp = nss.isGloballyManagedNamespace() ? nss.getTargetNSForGloballyManagedNamespace() : nss) { AutoGetDb autoDb(opCtx, nssForCurOp->db(), MODE_IS); const auto profilingLevel = autoDb.getDb() ? boost::optional<int>{autoDb.getDb()->getProfilingLevel()} : boost::none; statsTracker.emplace(opCtx, *nssForCurOp, Top::LockType::NotLocked, profilingLevel); auto view = autoDb.getDb() ? autoDb.getDb()->getViewCatalog()->lookup(opCtx, nssForCurOp->ns()) : nullptr; uassert( ErrorCodes::CommandNotSupportedOnView, str::stream() << "Namespace " << nssForCurOp->ns() << " is a view. OP_GET_MORE operations are not supported on views. " << "Only clients which support the getMore command can be used to " "query views.", !view); } } else { readLock.emplace(opCtx, nss); const int doNotChangeProfilingLevel = 0; statsTracker.emplace(opCtx, nss, Top::LockType::ReadLocked, readLock->getDb() ? readLock->getDb()->getProfilingLevel() : doNotChangeProfilingLevel); Collection* collection = readLock->getCollection(); uassert( ErrorCodes::OperationFailed, "collection dropped between getMore calls", collection); cursorManager = collection->getCursorManager(); // This checks to make sure the operation is allowed on a replicated node. Since we are not // passing in a query object (necessary to check SlaveOK query option), we allow reads // whether we are PRIMARY or SECONDARY. uassertStatusOK( repl::ReplicationCoordinator::get(opCtx)->checkCanServeReadsFor(opCtx, nss, true)); } LOG(5) << "Running getMore, cursorid: " << cursorid; // A pin performs a CC lookup and if there is a CC, increments the CC's pin value so it // doesn't time out. Also informs ClientCursor that there is somebody actively holding the // CC, so don't delete it. auto ccPin = cursorManager->pinCursor(opCtx, cursorid); // These are set in the QueryResult msg we return. int resultFlags = ResultFlag_AwaitCapable; int numResults = 0; int startingResult = 0; const int InitialBufSize = 512 + sizeof(QueryResult::Value) + FindCommon::kMaxBytesToReturnToClientAtOnce; BufBuilder bb(InitialBufSize); bb.skip(sizeof(QueryResult::Value)); if (!ccPin.isOK()) { if (ccPin == ErrorCodes::CursorNotFound) { cursorid = 0; resultFlags = ResultFlag_CursorNotFound; } else { uassertStatusOK(ccPin.getStatus()); } } else { ClientCursor* cc = ccPin.getValue().getCursor(); // Check for spoofing of the ns such that it does not match the one originally // there for the cursor. uassert(ErrorCodes::Unauthorized, str::stream() << "Requested getMore on namespace " << ns << ", but cursor " << cursorid << " belongs to namespace " << cc->nss().ns(), nss == cc->nss()); // A user can only call getMore on their own cursor. If there were multiple users // authenticated when the cursor was created, then at least one of them must be // authenticated in order to run getMore on the cursor. uassert(ErrorCodes::Unauthorized, str::stream() << "cursor id " << cursorid << " was not created by the authenticated user", AuthorizationSession::get(opCtx->getClient()) ->isCoauthorizedWith(cc->getAuthenticatedUsers())); *isCursorAuthorized = true; const auto replicationMode = repl::ReplicationCoordinator::get(opCtx)->getReplicationMode(); opCtx->recoveryUnit()->setReadConcernLevelAndReplicationMode(cc->getReadConcernLevel(), replicationMode); // TODO SERVER-33698: Remove kSnapshotReadConcern clause once we can guarantee that a // readConcern level snapshot getMore will have an established point-in-time WiredTiger // snapshot. if (replicationMode == repl::ReplicationCoordinator::modeReplSet && (cc->getReadConcernLevel() == repl::ReadConcernLevel::kMajorityReadConcern || cc->getReadConcernLevel() == repl::ReadConcernLevel::kSnapshotReadConcern)) { uassertStatusOK(opCtx->recoveryUnit()->obtainMajorityCommittedSnapshot()); } uassert(40548, "OP_GET_MORE operations are not supported on tailable aggregations. Only clients " "which support the getMore command can be used on tailable aggregations.", readLock || !cc->isAwaitData()); // If the operation that spawned this cursor had a time limit set, apply leftover // time to this getmore. if (cc->getLeftoverMaxTimeMicros() < Microseconds::max()) { uassert(40136, "Illegal attempt to set operation deadline within DBDirectClient", !opCtx->getClient()->isInDirectClient()); opCtx->setDeadlineAfterNowBy(cc->getLeftoverMaxTimeMicros()); } opCtx->checkForInterrupt(); // May trigger maxTimeAlwaysTimeOut fail point. // What number result are we starting at? Used to fill out the reply. startingResult = cc->pos(); uint64_t notifierVersion = 0; std::shared_ptr<CappedInsertNotifier> notifier; if (cc->isAwaitData()) { invariant(readLock->getCollection()->isCapped()); // Retrieve the notifier which we will wait on until new data arrives. We make sure // to do this in the lock because once we drop the lock it is possible for the // collection to become invalid. The notifier itself will outlive the collection if // the collection is dropped, as we keep a shared_ptr to it. notifier = readLock->getCollection()->getCappedInsertNotifier(); // Must get the version before we call generateBatch in case a write comes in after // that call and before we call wait on the notifier. notifierVersion = notifier->getVersion(); } PlanExecutor* exec = cc->getExecutor(); exec->reattachToOperationContext(opCtx); uassertStatusOK(exec->restoreState()); auto planSummary = Explain::getPlanSummary(exec); { stdx::lock_guard<Client> lk(*opCtx->getClient()); curOp.setPlanSummary_inlock(planSummary); // Ensure that the original query object is available in the slow query log, profiler // and currentOp. Upconvert _query to resemble a getMore command, and set the original // command or upconverted legacy query in the originatingCommand field. curOp.setOpDescription_inlock(upconvertGetMoreEntry(nss, cursorid, ntoreturn)); curOp.setOriginatingCommand_inlock(cc->getOriginatingCommandObj()); } PlanExecutor::ExecState state; // We report keysExamined and docsExamined to OpDebug for a given getMore operation. To // obtain these values we need to take a diff of the pre-execution and post-execution // metrics, as they accumulate over the course of a cursor's lifetime. PlanSummaryStats preExecutionStats; Explain::getSummaryStats(*exec, &preExecutionStats); generateBatch(ntoreturn, cc, &bb, &numResults, &state); // If this is an await data cursor, and we hit EOF without generating any results, then // we block waiting for new data to arrive. if (cc->isAwaitData() && state == PlanExecutor::IS_EOF && numResults == 0) { // Save the PlanExecutor and drop our locks. exec->saveState(); readLock.reset(); // Block waiting for data for up to 1 second. Time spent blocking is not counted towards // the total operation latency. curOp.pauseTimer(); Seconds timeout(1); notifier->waitUntil(notifierVersion, opCtx->getServiceContext()->getPreciseClockSource()->now() + timeout); notifier.reset(); curOp.resumeTimer(); // Reacquiring locks. readLock.emplace(opCtx, nss); uassertStatusOK(exec->restoreState()); // We woke up because either the timed_wait expired, or there was more data. Either // way, attempt to generate another batch of results. generateBatch(ntoreturn, cc, &bb, &numResults, &state); } PlanSummaryStats postExecutionStats; Explain::getSummaryStats(*exec, &postExecutionStats); postExecutionStats.totalKeysExamined -= preExecutionStats.totalKeysExamined; postExecutionStats.totalDocsExamined -= preExecutionStats.totalDocsExamined; curOp.debug().setPlanSummaryMetrics(postExecutionStats); // We do not report 'execStats' for aggregation or other globally managed cursors, both in // the original request and subsequent getMore. It would be useful to have this information // for an aggregation, but the source PlanExecutor could be destroyed before we know whether // we need execStats and we do not want to generate for all operations due to cost. if (!CursorManager::isGloballyManagedCursor(cursorid) && curOp.shouldDBProfile()) { BSONObjBuilder execStatsBob; Explain::getWinningPlanStats(exec, &execStatsBob); curOp.debug().execStats = execStatsBob.obj(); } // Our two possible ClientCursorPin cleanup paths are: // 1) If the cursor is not going to be saved, we call deleteUnderlying() on the pin. // 2) If the cursor is going to be saved, we simply let the pin go out of scope. In this // case, the pin's destructor will be invoked, which will call release() on the pin. // Because our ClientCursorPin is declared after our lock is declared, this will happen // under the lock if any locking was necessary. if (!shouldSaveCursorGetMore(state, exec, cc->isTailable())) { ccPin.getValue().deleteUnderlying(); // cc is now invalid, as is the executor cursorid = 0; cc = nullptr; curOp.debug().cursorExhausted = true; LOG(5) << "getMore NOT saving client cursor, ended with state " << PlanExecutor::statestr(state); } else { // Continue caching the ClientCursor. cc->incPos(numResults); exec->saveState(); exec->detachFromOperationContext(); LOG(5) << "getMore saving client cursor ended with state " << PlanExecutor::statestr(state); *exhaust = cc->queryOptions() & QueryOption_Exhaust; // We assume that cursors created through a DBDirectClient are always used from their // original OperationContext, so we do not need to move time to and from the cursor. if (!opCtx->getClient()->isInDirectClient()) { // If the getmore had a time limit, remaining time is "rolled over" back to the // cursor (for use by future getmore ops). cc->setLeftoverMaxTimeMicros(opCtx->getRemainingMaxTimeMicros()); } } } QueryResult::View qr = bb.buf(); qr.msgdata().setLen(bb.len()); qr.msgdata().setOperation(opReply); qr.setResultFlags(resultFlags); qr.setCursorId(cursorid); qr.setStartingFrom(startingResult); qr.setNReturned(numResults); LOG(5) << "getMore returned " << numResults << " results\n"; return Message(bb.release()); }