/* * optimize_minmax_aggregates - check for optimizing MIN/MAX via indexes * * This checks to see if we can replace MIN/MAX aggregate functions by * subqueries of the form * (SELECT col FROM tab WHERE ... ORDER BY col ASC/DESC LIMIT 1) * Given a suitable index on tab.col, this can be much faster than the * generic scan-all-the-rows plan. * * We are passed the preprocessed tlist, and the best path * devised for computing the input of a standard Agg node. If we are able * to optimize all the aggregates, and the result is estimated to be cheaper * than the generic aggregate method, then generate and return a Plan that * does it that way. Otherwise, return NULL. */ Plan * optimize_minmax_aggregates(PlannerInfo *root, List *tlist, Path *best_path) { Query *parse = root->parse; FromExpr *jtnode; RangeTblRef *rtr; RangeTblEntry *rte; RelOptInfo *rel; List *aggs_list; ListCell *l; Cost total_cost; Path agg_p; Plan *plan; Node *hqual; QualCost tlist_cost; /* Nothing to do if query has no aggregates */ if (!parse->hasAggs) return NULL; Assert(!parse->setOperations); /* shouldn't get here if a setop */ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ /* * Reject unoptimizable cases. * * We don't handle GROUP BY, because our current implementations of * grouping require looking at all the rows anyway, and so there's not * much point in optimizing MIN/MAX. */ if (parse->groupClause) return NULL; /* * We also restrict the query to reference exactly one table, since join * conditions can't be handled reasonably. (We could perhaps handle a * query containing cartesian-product joins, but it hardly seems worth the * trouble.) However, the single real table could be buried in several * levels of FromExpr. */ jtnode = parse->jointree; while (IsA(jtnode, FromExpr)) { if (list_length(jtnode->fromlist) != 1) return NULL; jtnode = linitial(jtnode->fromlist); } if (!IsA(jtnode, RangeTblRef)) return NULL; rtr = (RangeTblRef *) jtnode; rte = rt_fetch(rtr->rtindex, parse->rtable); if (rte->rtekind != RTE_RELATION || rte->inh) return NULL; rel = find_base_rel(root, rtr->rtindex); /* * Since this optimization is not applicable all that often, we want to * fall out before doing very much work if possible. Therefore we do the * work in several passes. The first pass scans the tlist and HAVING qual * to find all the aggregates and verify that each of them is a MIN/MAX * aggregate. If that succeeds, the second pass looks at each aggregate * to see if it is optimizable; if so we make an IndexPath describing how * we would scan it. (We do not try to optimize if only some aggs are * optimizable, since that means we'll have to scan all the rows anyway.) * If that succeeds, we have enough info to compare costs against the * generic implementation. Only if that test passes do we build a Plan. */ /* Pass 1: find all the aggregates */ aggs_list = NIL; if (find_minmax_aggs_walker((Node *) tlist, &aggs_list)) return NULL; if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) return NULL; /* Pass 2: see if each one is optimizable */ total_cost = 0; foreach(l, aggs_list) { MinMaxAggInfo *info = (MinMaxAggInfo *) lfirst(l); if (!build_minmax_path(root, rel, info)) return NULL; total_cost += info->pathcost; }
/* * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates * * Check to see whether the query contains MIN/MAX aggregate functions that * might be optimizable via indexscans. If it does, and all the aggregates * are potentially optimizable, then set up root->minmax_aggs with a list of * these aggregates. * * Note: we are passed the preprocessed targetlist separately, because it's * not necessarily equal to root->parse->targetList. */ void preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) { Query *parse = root->parse; FromExpr *jtnode; RangeTblRef *rtr; RangeTblEntry *rte; List *aggs_list; ListCell *lc; /* minmax_aggs list should be empty at this point */ Assert(root->minmax_aggs == NIL); /* Nothing to do if query has no aggregates */ if (!parse->hasAggs) return; Assert(!parse->setOperations); /* shouldn't get here if a setop */ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ /* * Reject unoptimizable cases. * * We don't handle GROUP BY or windowing, because our current * implementations of grouping require looking at all the rows anyway, and * so there's not much point in optimizing MIN/MAX. */ if (parse->groupClause || parse->hasWindowFuncs) return; /* * We also restrict the query to reference exactly one table, since join * conditions can't be handled reasonably. (We could perhaps handle a * query containing cartesian-product joins, but it hardly seems worth the * trouble.) However, the single real table could be buried in several * levels of FromExpr due to subqueries. Note the single table could be * an inheritance parent, too. */ jtnode = parse->jointree; while (IsA(jtnode, FromExpr)) { if (list_length(jtnode->fromlist) != 1) return; jtnode = linitial(jtnode->fromlist); } if (!IsA(jtnode, RangeTblRef)) return; rtr = (RangeTblRef *) jtnode; rte = planner_rt_fetch(rtr->rtindex, root); if (rte->rtekind != RTE_RELATION) return; /* * Scan the tlist and HAVING qual to find all the aggregates and verify * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. */ aggs_list = NIL; if (find_minmax_aggs_walker((Node *) tlist, &aggs_list)) return; if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) return; /* * OK, there is at least the possibility of performing the optimization. * Build pathkeys (and thereby EquivalenceClasses) for each aggregate. * The existence of the EquivalenceClasses will prompt the path generation * logic to try to build paths matching the desired sort ordering(s). * * Note: the pathkeys are non-canonical at this point. They'll be fixed * later by canonicalize_all_pathkeys(). */ foreach(lc, aggs_list) { MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); mminfo->pathkeys = make_pathkeys_for_aggregate(root, mminfo->target, mminfo->aggsortop); }
/* * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates * * Check to see whether the query contains MIN/MAX aggregate functions that * might be optimizable via indexscans. If it does, and all the aggregates * are potentially optimizable, then set up root->minmax_aggs with a list of * these aggregates. * * Note: we are passed the preprocessed targetlist separately, because it's * not necessarily equal to root->parse->targetList. */ void preprocess_minmax_aggregates(PlannerInfo *root, List *tlist) { Query *parse = root->parse; FromExpr *jtnode; RangeTblRef *rtr; RangeTblEntry *rte; List *aggs_list; ListCell *lc; /* minmax_aggs list should be empty at this point */ Assert(root->minmax_aggs == NIL); /* Nothing to do if query has no aggregates */ if (!parse->hasAggs) return; Assert(!parse->setOperations); /* shouldn't get here if a setop */ Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ /* * Reject unoptimizable cases. * * We don't handle GROUP BY or windowing, because our current * implementations of grouping require looking at all the rows anyway, and * so there's not much point in optimizing MIN/MAX. (Note: relaxing this * would likely require some restructuring in grouping_planner(), since it * performs assorted processing related to these features between calling * preprocess_minmax_aggregates and optimize_minmax_aggregates.) */ if (parse->groupClause || parse->hasWindowFuncs) return; /* * We also restrict the query to reference exactly one table, since join * conditions can't be handled reasonably. (We could perhaps handle a * query containing cartesian-product joins, but it hardly seems worth the * trouble.) However, the single table could be buried in several levels * of FromExpr due to subqueries. Note the "single" table could be an * inheritance parent, too, including the case of a UNION ALL subquery * that's been flattened to an appendrel. */ jtnode = parse->jointree; while (IsA(jtnode, FromExpr)) { if (list_length(jtnode->fromlist) != 1) return; jtnode = linitial(jtnode->fromlist); } if (!IsA(jtnode, RangeTblRef)) return; rtr = (RangeTblRef *) jtnode; rte = planner_rt_fetch(rtr->rtindex, root); if (rte->rtekind == RTE_RELATION) /* ordinary relation, ok */ ; else if (rte->rtekind == RTE_SUBQUERY && rte->inh) /* flattened UNION ALL subquery, ok */ ; else return; /* * Scan the tlist and HAVING qual to find all the aggregates and verify * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. */ aggs_list = NIL; if (find_minmax_aggs_walker((Node *) tlist, &aggs_list)) return; if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) return; /* * OK, there is at least the possibility of performing the optimization. * Build an access path for each aggregate. (We must do this now because * we need to call query_planner with a pristine copy of the current query * tree; it'll be too late when optimize_minmax_aggregates gets called.) * If any of the aggregates prove to be non-indexable, give up; there is * no point in optimizing just some of them. */ foreach(lc, aggs_list) { MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); Oid eqop; bool reverse; /* * We'll need the equality operator that goes with the aggregate's * ordering operator. */ eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse); if (!OidIsValid(eqop)) /* shouldn't happen */ elog(ERROR, "could not find equality operator for ordering operator %u", mminfo->aggsortop); /* * We can use either an ordering that gives NULLS FIRST or one that * gives NULLS LAST; furthermore there's unlikely to be much * performance difference between them, so it doesn't seem worth * costing out both ways if we get a hit on the first one. NULLS * FIRST is more likely to be available if the operator is a * reverse-sort operator, so try that first if reverse. */ if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse)) continue; if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse)) continue; /* No indexable path for this aggregate, so fail */ return; }