/* * dependency_is_compatible_clause * Determines if the clause is compatible with functional dependencies * * Only clauses that have the form of equality to a pseudoconstant, or can be * interpreted that way, are currently accepted. Furthermore the variable * part of the clause must be a simple Var belonging to the specified * relation, whose attribute number we return in *attnum on success. */ static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) { RestrictInfo *rinfo = (RestrictInfo *) clause; Var *var; if (!IsA(rinfo, RestrictInfo)) return false; /* Pseudoconstants are not interesting (they couldn't contain a Var) */ if (rinfo->pseudoconstant) return false; /* Clauses referencing multiple, or no, varnos are incompatible */ if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) return false; if (is_opclause(rinfo->clause)) { /* If it's an opclause, check for Var = Const or Const = Var. */ OpExpr *expr = (OpExpr *) rinfo->clause; /* Only expressions with two arguments are candidates. */ if (list_length(expr->args) != 2) return false; /* Make sure non-selected argument is a pseudoconstant. */ if (is_pseudo_constant_clause(lsecond(expr->args))) var = linitial(expr->args); else if (is_pseudo_constant_clause(linitial(expr->args))) var = lsecond(expr->args); else return false; /* * If it's not an "=" operator, just ignore the clause, as it's not * compatible with functional dependencies. * * This uses the function for estimating selectivity, not the operator * directly (a bit awkward, but well ...). * * XXX this is pretty dubious; probably it'd be better to check btree * or hash opclass membership, so as not to be fooled by custom * selectivity functions, and to be more consistent with decisions * elsewhere in the planner. */ if (get_oprrest(expr->opno) != F_EQSEL) return false; /* OK to proceed with checking "var" */ } else if (not_clause((Node *) rinfo->clause)) { /* * "NOT x" can be interpreted as "x = false", so get the argument and * proceed with seeing if it's a suitable Var. */ var = (Var *) get_notclausearg(rinfo->clause); } else { /* * A boolean expression "x" can be interpreted as "x = true", so * proceed with seeing if it's a suitable Var. */ var = (Var *) rinfo->clause; } /* * We may ignore any RelabelType node above the operand. (There won't be * more than one, since eval_const_expressions has been applied already.) */ if (IsA(var, RelabelType)) var = (Var *) ((RelabelType *) var)->arg; /* We only support plain Vars for now */ if (!IsA(var, Var)) return false; /* Ensure Var is from the correct relation */ if (var->varno != relid) return false; /* We also better ensure the Var is from the current level */ if (var->varlevelsup != 0) return false; /* Also ignore system attributes (we don't allow stats on those) */ if (!AttrNumberIsForUserDefinedAttr(var->varattno)) return false; *attnum = var->varattno; return true; }
/* * clauselist_selectivity - * Compute the selectivity of an implicitly-ANDed list of boolean * expression clauses. The list can be empty, in which case 1.0 * must be returned. List elements may be either RestrictInfos * or bare expression clauses --- the former is preferred since * it allows caching of results. * * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses. However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent. So, * we want to be smarter where we can. * Currently, the only extra smarts we have is to recognize "range queries", * such as "x > 34 AND x < 42". Clauses are recognized as possible range * query components if they are restriction opclauses whose operators have * scalarltsel() or scalargtsel() as their restriction selectivity estimator. * We pair up clauses of this form that refer to the same variable. An * unpairable clause of this kind is simply multiplied into the selectivity * product in the normal way. But when we find a pair, we know that the * selectivities represent the relative positions of the low and high bounds * within the column's range, so instead of figuring the selectivity as * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, * see that hisel is the fraction of the range below the high bound, while * losel is the fraction above the low bound; so hisel can be interpreted * directly as a 0..1 value but we need to convert losel to 1-losel before * interpreting it as a value. Then the available range is 1-losel to hisel. * However, this calculation double-excludes nulls, so really we need * hisel + losel + null_frac - 1.) * * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation * yields an impossible (negative) result. * * A free side-effect is that we can recognize redundant inequalities such * as "x < 4 AND x < 5"; only the tighter constraint will be counted. * * Of course this is all very dependent on the behavior of * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. */ Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_damping) { Selectivity s1 = 1.0; Selectivity *rgsel = NULL; RangeQueryClause *rqlist = NULL; ListCell *l; int pos = 0; int i = 0; /* allocate array to hold all selectivity factors */ rgsel = (Selectivity *) palloc(sizeof(Selectivity) * list_length(clauses)); /* * If there's exactly one clause, then no use in trying to match up pairs, * so just go directly to clause_selectivity(). */ if (list_length(clauses) == 1) return clause_selectivity(root, (Node *) linitial(clauses), varRelid, jointype, sjinfo, use_damping); /* * Initial scan over clauses. Anything that doesn't look like a potential * rangequery clause gets directly added as selectivity factor. Anything that * does gets inserted into an rqlist entry. */ foreach(l, clauses) { Node *clause = (Node *) lfirst(l); RestrictInfo *rinfo; Selectivity s2; /* Always compute the selectivity using clause_selectivity */ s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo, use_damping); /* * Check for being passed a RestrictInfo. * * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or * 0.0; just use that rather than looking for range pairs. */ if (IsA(clause, RestrictInfo)) { rinfo = (RestrictInfo *) clause; if (rinfo->pseudoconstant) { rgsel[pos++] = s2; continue; } clause = (Node *) rinfo->clause; } else rinfo = NULL; /* * See if it looks like a restriction clause with a pseudoconstant on * one side. (Anything more complicated than that might not behave in * the simple way we are expecting.) Most of the tests here can be * done more efficiently with rinfo than without. */ if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) { OpExpr *expr = (OpExpr *) clause; bool varonleft = true; bool ok; if (rinfo) { ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && (is_pseudo_constant_clause_relids(lsecond(expr->args), rinfo->right_relids) || (varonleft = false, is_pseudo_constant_clause_relids(linitial(expr->args), rinfo->left_relids))); } else { ok = (NumRelids(clause) == 1) && (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(linitial(expr->args)))); } if (ok) { /* * If it's not a "<" or ">" operator, just merge the * selectivity in generically. But if it's the right oprrest, * add the clause to rqlist for later processing. */ switch (get_oprrest(expr->opno)) { case F_SCALARLTSEL: addRangeClause(&rqlist, clause, varonleft, true, s2); break; case F_SCALARGTSEL: addRangeClause(&rqlist, clause, varonleft, false, s2); break; default: /* Just merge the selectivity in generically */ rgsel[pos++] = s2; break; } continue; /* drop to loop bottom */ } } /* Not the right form, so treat it generically. */ rgsel[pos++] = s2; }
/* * clauselist_selectivity - * Compute the selectivity of an implicitly-ANDed list of boolean * expression clauses. The list can be empty, in which case 1.0 * must be returned. * * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses. However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent. So, * we want to be smarter where we can. * Currently, the only extra smarts we have is to recognize "range queries", * such as "x > 34 AND x < 42". Clauses are recognized as possible range * query components if they are restriction opclauses whose operators have * scalarltsel() or scalargtsel() as their restriction selectivity estimator. * We pair up clauses of this form that refer to the same variable. An * unpairable clause of this kind is simply multiplied into the selectivity * product in the normal way. But when we find a pair, we know that the * selectivities represent the relative positions of the low and high bounds * within the column's range, so instead of figuring the selectivity as * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, * see that hisel is the fraction of the range below the high bound, while * losel is the fraction above the low bound; so hisel can be interpreted * directly as a 0..1 value but we need to convert losel to 1-losel before * interpreting it as a value. Then the available range is 1-losel to hisel. * However, this calculation double-excludes nulls, so really we need * hisel + losel + null_frac - 1.) * If the calculation yields zero or negative, however, we chicken out and * use a default estimate; that probably means that one or both * selectivities is a default estimate rather than an actual range value. * Of course this is all very dependent on the behavior of * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. */ Selectivity clauselist_selectivity(Query *root, List *clauses, int varRelid, JoinType jointype) { Selectivity s1 = 1.0; RangeQueryClause *rqlist = NULL; List *clist; /* * Initial scan over clauses. Anything that doesn't look like a * potential rangequery clause gets multiplied into s1 and forgotten. * Anything that does gets inserted into an rqlist entry. */ foreach(clist, clauses) { Node *clause = (Node *) lfirst(clist); Selectivity s2; /* * See if it looks like a restriction clause with a pseudoconstant * on one side. (Anything more complicated than that might not * behave in the simple way we are expecting.) * * NB: for consistency of results, this fragment of code had better * match what clause_selectivity() would do in the cases it * handles. */ if (is_opclause(clause) && (varRelid != 0 || NumRelids(clause) == 1)) { OpExpr *expr = (OpExpr *) clause; if (length(expr->args) == 2) { bool varonleft = true; if (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(lfirst(expr->args)))) { Oid opno = expr->opno; RegProcedure oprrest = get_oprrest(opno); s2 = restriction_selectivity(root, opno, expr->args, varRelid); /* * If we reach here, we have computed the same result * that clause_selectivity would, so we can just use * s2 if it's the wrong oprrest. But if it's the * right oprrest, add the clause to rqlist for later * processing. */ switch (oprrest) { case F_SCALARLTSEL: addRangeClause(&rqlist, clause, varonleft, true, s2); break; case F_SCALARGTSEL: addRangeClause(&rqlist, clause, varonleft, false, s2); break; default: /* Just merge the selectivity in generically */ s1 = s1 * s2; break; } continue; /* drop to loop bottom */ } } } /* Not the right form, so treat it generically. */ s2 = clause_selectivity(root, clause, varRelid, jointype); s1 = s1 * s2; }