/* * clauselist_selectivity - * Compute the selectivity of an implicitly-ANDed list of boolean * expression clauses. The list can be empty, in which case 1.0 * must be returned. List elements may be either RestrictInfos * or bare expression clauses --- the former is preferred since * it allows caching of results. * * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses. However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent. So, * we want to be smarter where we can. * Currently, the only extra smarts we have is to recognize "range queries", * such as "x > 34 AND x < 42". Clauses are recognized as possible range * query components if they are restriction opclauses whose operators have * scalarltsel() or scalargtsel() as their restriction selectivity estimator. * We pair up clauses of this form that refer to the same variable. An * unpairable clause of this kind is simply multiplied into the selectivity * product in the normal way. But when we find a pair, we know that the * selectivities represent the relative positions of the low and high bounds * within the column's range, so instead of figuring the selectivity as * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, * see that hisel is the fraction of the range below the high bound, while * losel is the fraction above the low bound; so hisel can be interpreted * directly as a 0..1 value but we need to convert losel to 1-losel before * interpreting it as a value. Then the available range is 1-losel to hisel. * However, this calculation double-excludes nulls, so really we need * hisel + losel + null_frac - 1.) * * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation * yields an impossible (negative) result. * * A free side-effect is that we can recognize redundant inequalities such * as "x < 4 AND x < 5"; only the tighter constraint will be counted. * * Of course this is all very dependent on the behavior of * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. */ Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_damping) { Selectivity s1 = 1.0; Selectivity *rgsel = NULL; RangeQueryClause *rqlist = NULL; ListCell *l; int pos = 0; int i = 0; /* allocate array to hold all selectivity factors */ rgsel = (Selectivity *) palloc(sizeof(Selectivity) * list_length(clauses)); /* * If there's exactly one clause, then no use in trying to match up pairs, * so just go directly to clause_selectivity(). */ if (list_length(clauses) == 1) return clause_selectivity(root, (Node *) linitial(clauses), varRelid, jointype, sjinfo, use_damping); /* * Initial scan over clauses. Anything that doesn't look like a potential * rangequery clause gets directly added as selectivity factor. Anything that * does gets inserted into an rqlist entry. */ foreach(l, clauses) { Node *clause = (Node *) lfirst(l); RestrictInfo *rinfo; Selectivity s2; /* Always compute the selectivity using clause_selectivity */ s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo, use_damping); /* * Check for being passed a RestrictInfo. * * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or * 0.0; just use that rather than looking for range pairs. */ if (IsA(clause, RestrictInfo)) { rinfo = (RestrictInfo *) clause; if (rinfo->pseudoconstant) { rgsel[pos++] = s2; continue; } clause = (Node *) rinfo->clause; } else rinfo = NULL; /* * See if it looks like a restriction clause with a pseudoconstant on * one side. (Anything more complicated than that might not behave in * the simple way we are expecting.) Most of the tests here can be * done more efficiently with rinfo than without. */ if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) { OpExpr *expr = (OpExpr *) clause; bool varonleft = true; bool ok; if (rinfo) { ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && (is_pseudo_constant_clause_relids(lsecond(expr->args), rinfo->right_relids) || (varonleft = false, is_pseudo_constant_clause_relids(linitial(expr->args), rinfo->left_relids))); } else { ok = (NumRelids(clause) == 1) && (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(linitial(expr->args)))); } if (ok) { /* * If it's not a "<" or ">" operator, just merge the * selectivity in generically. But if it's the right oprrest, * add the clause to rqlist for later processing. */ switch (get_oprrest(expr->opno)) { case F_SCALARLTSEL: addRangeClause(&rqlist, clause, varonleft, true, s2); break; case F_SCALARGTSEL: addRangeClause(&rqlist, clause, varonleft, false, s2); break; default: /* Just merge the selectivity in generically */ rgsel[pos++] = s2; break; } continue; /* drop to loop bottom */ } } /* Not the right form, so treat it generically. */ rgsel[pos++] = s2; }
/* * Does the supplied GpPolicy support unique indexing on the specified * attributes? * * If the table is distributed randomly, no unique indexing is supported. * Otherwise, the set of columns being indexed should be a superset of the * policy. * * If the proposed index does not match the distribution policy but the relation * is empty and does not have a primary key or unique index, update the * distribution policy to match the index definition (MPP-101), as long as it * doesn't contain expressions. */ void checkPolicyForUniqueIndex(Relation rel, AttrNumber *indattr, int nidxatts, bool isprimary, bool has_exprs, bool has_pkey, bool has_ukey) { Bitmapset *polbm = NULL; Bitmapset *indbm = NULL; int i; GpPolicy *pol = rel->rd_cdbpolicy; /* * Firstly, unique/primary key indexes aren't supported if we're * distributing randomly. */ if (GpPolicyIsRandomly(pol)) { ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("%s and DISTRIBUTED RANDOMLY are incompatible", isprimary ? "PRIMARY KEY" : "UNIQUE"))); } /* * We use bitmaps to make intersection tests easier. As noted, order is * not relevant so looping is just painful. */ for (i = 0; i < pol->nattrs; i++) polbm = bms_add_member(polbm, pol->attrs[i]); for (i = 0; i < nidxatts; i++) { if (indattr[i] < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("cannot create %s on system column", isprimary ? "primary key" : "unique index"))); indbm = bms_add_member(indbm, indattr[i]); } Assert(bms_membership(polbm) != BMS_EMPTY_SET); Assert(bms_membership(indbm) != BMS_EMPTY_SET); /* * If the existing policy is not a subset, we must either error out or * update the distribution policy. It might be tempting to say that even * when the policy is a subset, we should update it to match the index * definition. The problem then is that if the user actually wants to * distribution on (a, b) but then creates an index on (a, b, c) we'll * change the policy underneath them. * * What is really needed is a new field in gp_distribution_policy telling us * if the policy has been explicitly set. */ if (!bms_is_subset(polbm, indbm)) { if (cdbRelSize(rel) != 0 || has_pkey || has_ukey || has_exprs) { ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("%s must contain all columns in the " "distribution key of relation \"%s\"", isprimary ? "PRIMARY KEY" : "UNIQUE index", RelationGetRelationName(rel)))); } else { /* update policy since table is not populated yet. See MPP-101 */ GpPolicy *policy = palloc(sizeof(GpPolicy) + (sizeof(AttrNumber) * nidxatts)); policy->ptype = POLICYTYPE_PARTITIONED; policy->nattrs = 0; for (i = 0; i < nidxatts; i++) policy->attrs[policy->nattrs++] = indattr[i]; GpPolicyReplace(rel->rd_id, policy); if (isprimary) elog(NOTICE, "updating distribution policy to match new primary key"); else elog(NOTICE, "updating distribution policy to match new unique index"); } } }
/* * dependency_is_compatible_clause * Determines if the clause is compatible with functional dependencies * * Only clauses that have the form of equality to a pseudoconstant, or can be * interpreted that way, are currently accepted. Furthermore the variable * part of the clause must be a simple Var belonging to the specified * relation, whose attribute number we return in *attnum on success. */ static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) { RestrictInfo *rinfo = (RestrictInfo *) clause; Var *var; if (!IsA(rinfo, RestrictInfo)) return false; /* Pseudoconstants are not interesting (they couldn't contain a Var) */ if (rinfo->pseudoconstant) return false; /* Clauses referencing multiple, or no, varnos are incompatible */ if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) return false; if (is_opclause(rinfo->clause)) { /* If it's an opclause, check for Var = Const or Const = Var. */ OpExpr *expr = (OpExpr *) rinfo->clause; /* Only expressions with two arguments are candidates. */ if (list_length(expr->args) != 2) return false; /* Make sure non-selected argument is a pseudoconstant. */ if (is_pseudo_constant_clause(lsecond(expr->args))) var = linitial(expr->args); else if (is_pseudo_constant_clause(linitial(expr->args))) var = lsecond(expr->args); else return false; /* * If it's not an "=" operator, just ignore the clause, as it's not * compatible with functional dependencies. * * This uses the function for estimating selectivity, not the operator * directly (a bit awkward, but well ...). * * XXX this is pretty dubious; probably it'd be better to check btree * or hash opclass membership, so as not to be fooled by custom * selectivity functions, and to be more consistent with decisions * elsewhere in the planner. */ if (get_oprrest(expr->opno) != F_EQSEL) return false; /* OK to proceed with checking "var" */ } else if (not_clause((Node *) rinfo->clause)) { /* * "NOT x" can be interpreted as "x = false", so get the argument and * proceed with seeing if it's a suitable Var. */ var = (Var *) get_notclausearg(rinfo->clause); } else { /* * A boolean expression "x" can be interpreted as "x = true", so * proceed with seeing if it's a suitable Var. */ var = (Var *) rinfo->clause; } /* * We may ignore any RelabelType node above the operand. (There won't be * more than one, since eval_const_expressions has been applied already.) */ if (IsA(var, RelabelType)) var = (Var *) ((RelabelType *) var)->arg; /* We only support plain Vars for now */ if (!IsA(var, Var)) return false; /* Ensure Var is from the correct relation */ if (var->varno != relid) return false; /* We also better ensure the Var is from the current level */ if (var->varlevelsup != 0) return false; /* Also ignore system attributes (we don't allow stats on those) */ if (!AttrNumberIsForUserDefinedAttr(var->varattno)) return false; *attnum = var->varattno; return true; }