bool MultiPlanRunner::pickBestPlan(size_t* out, BSONObj* objOut) { // Run each plan some number of times. This number is at least as great as // 'internalQueryPlanEvaluationWorks', but may be larger for big collections. size_t numWorks = internalQueryPlanEvaluationWorks; if (NULL != _collection) { // For large collections, the number of works is set to be this // fraction of the collection size. double fraction = internalQueryPlanEvaluationCollFraction; numWorks = std::max(size_t(internalQueryPlanEvaluationWorks), size_t(fraction * _collection->numRecords())); } // Work the plans, stopping when a plan hits EOF or returns some // fixed number of results. for (size_t i = 0; i < numWorks; ++i) { bool moreToDo = workAllPlans(objOut); if (!moreToDo) { break; } } if (_failure || _killed) { return false; } // After picking best plan, ranking will own plan stats from // candidate solutions (winner and losers). _ranking.reset(new PlanRankingDecision()); _bestChild = PlanRanker::pickBestPlan(_candidates, _ranking.get()); if (NULL != out) { *out = _bestChild; } return true; }
bool MultiPlanRunner::pickBestPlan(size_t* out) { static const int timesEachPlanIsWorked = 100; // Run each plan some number of times. for (int i = 0; i < timesEachPlanIsWorked; ++i) { bool moreToDo = workAllPlans(); if (!moreToDo) { break; } } if (_failure || _killed) { return false; } size_t bestChild = PlanRanker::pickBestPlan(_candidates, NULL); // Run the best plan. Store it. _bestPlan.reset(new PlanExecutor(_candidates[bestChild].ws, _candidates[bestChild].root)); _bestPlan->setYieldPolicy(_policy); _alreadyProduced = _candidates[bestChild].results; _bestSolution.reset(_candidates[bestChild].solution); QLOG() << "Winning solution:\n" << _bestSolution->toString() << endl; // TODO: // Store the choice we just made in the cache. // QueryPlanCache* cache = PlanCache::get(somenamespace); // cache->add(_query, *_candidates[bestChild]->solution, decision->bestPlanStats); // delete decision; // Clear out the candidate plans, leaving only stats as we're all done w/them. for (size_t i = 0; i < _candidates.size(); ++i) { if (i == bestChild) { continue; } delete _candidates[i].solution; // Remember the stats for the candidate plan because we always show it on an // explain. (The {verbose:false} in explain() is client-side trick; we always // generate a "verbose" explain.) PlanStageStats* stats = _candidates[i].root->getStats(); if (stats) { _candidateStats.push_back(stats); } delete _candidates[i].root; // ws must die after the root. delete _candidates[i].ws; } _candidates.clear(); if (NULL != out) { *out = bestChild; } return true; }
bool MultiPlanRunner::pickBestPlan(size_t* out) { static const int timesEachPlanIsWorked = 100; static const int yieldInterval = 10; // Run each plan some number of times. for (int i = 0; i < timesEachPlanIsWorked; ++i) { bool moreToDo = workAllPlans(); if (!moreToDo) { break; } if (0 == ((i + 1) % yieldInterval)) { yieldAllPlans(); // TODO: Actually yield... unyieldAllPlans(); } } if (_failure) { return false; } size_t bestChild = PlanRanker::pickBestPlan(_candidates, NULL); // Run the best plan. Store it. _bestPlanRunner.reset(new SimplePlanRunner(_candidates[bestChild].ws, _candidates[bestChild].root)); _alreadyProduced = _candidates[bestChild].results; // TODO: Normally we'd hand this to the cache, who would own it. delete _candidates[bestChild].solution; // Store the choice we just made in the cache. // QueryPlanCache* cache = PlanCache::get(somenamespace); // cache->add(_query, *_candidates[bestChild]->solution, decision->bestPlanStats); // delete decision; // Clear out the candidate plans as we're all done w/them. for (size_t i = 0; i < _candidates.size(); ++i) { if (i == bestChild) { continue; } delete _candidates[i].solution; delete _candidates[i].root; // ws must die after the root. delete _candidates[i].ws; } _candidates.clear(); if (NULL != out) { *out = bestChild; } return true; }
bool MultiPlanRunner::pickBestPlan(size_t* out, BSONObj* objOut) { static const int timesEachPlanIsWorked = 100; // Run each plan some number of times. for (int i = 0; i < timesEachPlanIsWorked; ++i) { bool moreToDo = workAllPlans(objOut); if (!moreToDo) { break; } } if (_failure || _killed) { return false; } // After picking best plan, ranking will own plan stats from // candidate solutions (winner and losers). std::auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision); size_t bestChild = PlanRanker::pickBestPlan(_candidates, ranking.get()); // Copy candidate order. We will need this to sort candidate stats for explain // after transferring ownership of 'ranking' to plan cache. std::vector<size_t> candidateOrder = ranking->candidateOrder; // Run the best plan. Store it. _bestPlan.reset(new PlanExecutor(_candidates[bestChild].ws, _candidates[bestChild].root)); _bestPlan->setYieldPolicy(_policy); _alreadyProduced = _candidates[bestChild].results; _bestSolution.reset(_candidates[bestChild].solution); QLOG() << "Winning solution:\n" << _bestSolution->toString() << endl; size_t backupChild = bestChild; if (_bestSolution->hasBlockingStage && (0 == _alreadyProduced.size())) { QLOG() << "Winner has blocking stage, looking for backup plan...\n"; for (size_t i = 0; i < _candidates.size(); ++i) { if (!_candidates[i].solution->hasBlockingStage) { QLOG() << "Candidate " << i << " is backup child\n"; backupChild = i; _backupSolution = _candidates[i].solution; _backupAlreadyProduced = _candidates[i].results; _backupPlan = new PlanExecutor(_candidates[i].ws, _candidates[i].root); _backupPlan->setYieldPolicy(_policy); break; } } } // Store the choice we just made in the cache. We do // not cache the query if: // 1) The query is of a type that is not safe to cache, or // 2) the winning plan did not actually produce any results, // without hitting EOF. In this case, we have no information to // suggest that this plan is good. const PlanStageStats* bestStats = ranking->stats.vector()[0]; if (PlanCache::shouldCacheQuery(*_query) && (!_alreadyProduced.empty() || bestStats->common.isEOF)) { Database* db = cc().database(); verify(NULL != db); Collection* collection = db->getCollection(_query->ns()); verify(NULL != collection); PlanCache* cache = collection->infoCache()->getPlanCache(); // Create list of candidate solutions for the cache with // the best solution at the front. std::vector<QuerySolution*> solutions; // Generate solutions and ranking decisions sorted by score. for (size_t orderingIndex = 0; orderingIndex < candidateOrder.size(); ++orderingIndex) { // index into candidates/ranking size_t i = candidateOrder[orderingIndex]; solutions.push_back(_candidates[i].solution); } // Check solution cache data. Do not add to cache if // we have any invalid SolutionCacheData data. // XXX: One known example is 2D queries bool validSolutions = true; for (size_t i = 0; i < solutions.size(); ++i) { if (NULL == solutions[i]->cacheData.get()) { QLOG() << "Not caching query because this solution has no cache data: " << solutions[i]->toString(); validSolutions = false; break; } } if (validSolutions) { cache->add(*_query, solutions, ranking.release()); } } // Clear out the candidate plans, leaving only stats as we're all done w/them. // Traverse candidate plans in order or score for (size_t orderingIndex = 0; orderingIndex < candidateOrder.size(); ++orderingIndex) { // index into candidates/ranking size_t i = candidateOrder[orderingIndex]; if (i == bestChild) { continue; } if (i == backupChild) { continue; } delete _candidates[i].solution; // Remember the stats for the candidate plan because we always show it on an // explain. (The {verbose:false} in explain() is client-side trick; we always // generate a "verbose" explain.) PlanStageStats* stats = _candidates[i].root->getStats(); if (stats) { _candidateStats.push_back(stats); } delete _candidates[i].root; // ws must die after the root. delete _candidates[i].ws; } _candidates.clear(); if (NULL != out) { *out = bestChild; } return true; }
void MultiPlanStage::pickBestPlan() { // Run each plan some number of times. This number is at least as great as // 'internalQueryPlanEvaluationWorks', but may be larger for big collections. size_t numWorks = internalQueryPlanEvaluationWorks; if (NULL != _collection) { // For large collections, the number of works is set to be this // fraction of the collection size. double fraction = internalQueryPlanEvaluationCollFraction; numWorks = std::max(size_t(internalQueryPlanEvaluationWorks), size_t(fraction * _collection->numRecords())); } // We treat ntoreturn as though it is a limit during plan ranking. // This means that ranking might not be great for sort + batchSize. // But it also means that we don't buffer too much data for sort + limit. // See SERVER-14174 for details. size_t numToReturn = _query->getParsed().getNumToReturn(); // Determine the number of results which we will produce during the plan // ranking phase before stopping. size_t numResults = (size_t)internalQueryPlanEvaluationMaxResults; if (numToReturn > 0) { numResults = std::min(numToReturn, numResults); } // Work the plans, stopping when a plan hits EOF or returns some // fixed number of results. for (size_t ix = 0; ix < numWorks; ++ix) { bool moreToDo = workAllPlans(numResults); if (!moreToDo) { break; } } if (_failure) { return; } // After picking best plan, ranking will own plan stats from // candidate solutions (winner and losers). std::auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision); _bestPlanIdx = PlanRanker::pickBestPlan(_candidates, ranking.get()); verify(_bestPlanIdx >= 0 && _bestPlanIdx < static_cast<int>(_candidates.size())); // Copy candidate order. We will need this to sort candidate stats for explain // after transferring ownership of 'ranking' to plan cache. std::vector<size_t> candidateOrder = ranking->candidateOrder; CandidatePlan& bestCandidate = _candidates[_bestPlanIdx]; std::list<WorkingSetID>& alreadyProduced = bestCandidate.results; QuerySolution* bestSolution = bestCandidate.solution; QLOG() << "Winning solution:\n" << bestSolution->toString() << endl; LOG(2) << "Winning plan: " << getPlanSummary(*bestSolution); _backupPlanIdx = kNoSuchPlan; if (bestSolution->hasBlockingStage && (0 == alreadyProduced.size())) { QLOG() << "Winner has blocking stage, looking for backup plan...\n"; for (size_t ix = 0; ix < _candidates.size(); ++ix) { if (!_candidates[ix].solution->hasBlockingStage) { QLOG() << "Candidate " << ix << " is backup child\n"; _backupPlanIdx = ix; break; } } } // Store the choice we just made in the cache. In order to do so, // 1) the query must be of a type that is safe to cache, and // 2) two or more plans cannot have tied for the win. Caching in the // case of ties can cause successive queries of the same shape to // use a bad index. if (PlanCache::shouldCacheQuery(*_query) && !ranking->tieForBest) { // Create list of candidate solutions for the cache with // the best solution at the front. std::vector<QuerySolution*> solutions; // Generate solutions and ranking decisions sorted by score. for (size_t orderingIndex = 0; orderingIndex < candidateOrder.size(); ++orderingIndex) { // index into candidates/ranking size_t ix = candidateOrder[orderingIndex]; solutions.push_back(_candidates[ix].solution); } // Check solution cache data. Do not add to cache if // we have any invalid SolutionCacheData data. // XXX: One known example is 2D queries bool validSolutions = true; for (size_t ix = 0; ix < solutions.size(); ++ix) { if (NULL == solutions[ix]->cacheData.get()) { QLOG() << "Not caching query because this solution has no cache data: " << solutions[ix]->toString(); validSolutions = false; break; } } if (validSolutions) { _collection->infoCache()->getPlanCache()->add(*_query, solutions, ranking.release()); } } }
bool MultiPlanRunner::pickBestPlan(size_t* out) { static const int timesEachPlanIsWorked = 100; // Run each plan some number of times. for (int i = 0; i < timesEachPlanIsWorked; ++i) { bool moreToDo = workAllPlans(); if (!moreToDo) { break; } } if (_failure || _killed) { return false; } auto_ptr<PlanRankingDecision> ranking(new PlanRankingDecision()); size_t bestChild = PlanRanker::pickBestPlan(_candidates, ranking.get()); // Run the best plan. Store it. _bestPlan.reset(new PlanExecutor(_candidates[bestChild].ws, _candidates[bestChild].root)); _bestPlan->setYieldPolicy(_policy); _alreadyProduced = _candidates[bestChild].results; _bestSolution.reset(_candidates[bestChild].solution); QLOG() << "Winning solution:\n" << _bestSolution->toString() << endl; size_t backupChild = bestChild; if (_bestSolution->hasSortStage && (0 == _alreadyProduced.size())) { QLOG() << "Winner has blocked sort, looking for backup plan...\n"; for (size_t i = 0; i < _candidates.size(); ++i) { if (!_candidates[i].solution->hasSortStage) { QLOG() << "Candidate " << i << " is backup child\n"; backupChild = i; _backupSolution = _candidates[i].solution; _backupAlreadyProduced = _candidates[i].results; _backupPlan = new PlanExecutor(_candidates[i].ws, _candidates[i].root); _backupPlan->setYieldPolicy(_policy); break; } } } // Store the choice we just made in the cache. if (PlanCache::shouldCacheQuery(*_query)) { Database* db = cc().database(); verify(NULL != db); Collection* collection = db->getCollection(_query->ns()); verify(NULL != collection); PlanCache* cache = collection->infoCache()->getPlanCache(); // Create list of candidate solutions for the cache with // the best solution at the front. std::vector<QuerySolution*> solutions; solutions.push_back(_bestSolution.get()); for (size_t i = 0; i < _candidates.size(); ++i) { if (i == bestChild) { continue; } solutions.push_back(_candidates[i].solution); } // Check solution cache data. Do not add to cache if // we have any invalid SolutionCacheData data. // XXX: One known example is 2D queries bool validSolutions = true; for (size_t i = 0; i < solutions.size(); ++i) { if (NULL == solutions[i]->cacheData.get()) { QLOG() << "Not caching query because this solution has no cache data: " << solutions[i]->toString(); validSolutions = false; break; } } if (validSolutions) { cache->add(*_query, solutions, ranking.release()); } } // Clear out the candidate plans, leaving only stats as we're all done w/them. for (size_t i = 0; i < _candidates.size(); ++i) { if (i == bestChild) { continue; } if (i == backupChild) { continue; } delete _candidates[i].solution; // Remember the stats for the candidate plan because we always show it on an // explain. (The {verbose:false} in explain() is client-side trick; we always // generate a "verbose" explain.) PlanStageStats* stats = _candidates[i].root->getStats(); if (stats) { _candidateStats.push_back(stats); } delete _candidates[i].root; // ws must die after the root. delete _candidates[i].ws; } _candidates.clear(); if (NULL != out) { *out = bestChild; } return true; }