/* * Compute the list of TIDs to be visited, by evaluating the expressions * for them. * * (The result is actually an array, not a list.) */ static void TidListCreate(TidScanState *tidstate) { List *evalList = tidstate->tss_tidquals; ExprContext *econtext = tidstate->ss.ps.ps_ExprContext; BlockNumber nblocks; ItemPointerData *tidList; int numAllocTids; int numTids; ListCell *l; /* * We silently discard any TIDs that are out of range at the time of scan * start. (Since we hold at least AccessShareLock on the table, it won't * be possible for someone to truncate away the blocks we intend to * visit.) */ nblocks = RelationGetNumberOfBlocks(tidstate->ss.ss_currentRelation); /* * We initialize the array with enough slots for the case that all quals * are simple OpExprs or CurrentOfExprs. If there are any * ScalarArrayOpExprs, we may have to enlarge the array. */ numAllocTids = list_length(evalList); tidList = (ItemPointerData *) palloc(numAllocTids * sizeof(ItemPointerData)); numTids = 0; tidstate->tss_isCurrentOf = false; foreach(l, evalList) { ExprState *exstate = (ExprState *) lfirst(l); Expr *expr = exstate->expr; ItemPointer itemptr; bool isNull; if (is_opclause(expr)) { FuncExprState *fexstate = (FuncExprState *) exstate; Node *arg1; Node *arg2; arg1 = get_leftop(expr); arg2 = get_rightop(expr); if (IsCTIDVar(arg1)) exstate = (ExprState *) lsecond(fexstate->args); else if (IsCTIDVar(arg2)) exstate = (ExprState *) linitial(fexstate->args); else elog(ERROR, "could not identify CTID variable"); itemptr = (ItemPointer) DatumGetPointer(ExecEvalExprSwitchContext(exstate, econtext, &isNull, NULL)); if (!isNull && ItemPointerIsValid(itemptr) && ItemPointerGetBlockNumber(itemptr) < nblocks) { if (numTids >= numAllocTids) { numAllocTids *= 2; tidList = (ItemPointerData *) repalloc(tidList, numAllocTids * sizeof(ItemPointerData)); } tidList[numTids++] = *itemptr; } } else if (expr && IsA(expr, ScalarArrayOpExpr)) { ScalarArrayOpExprState *saexstate = (ScalarArrayOpExprState *) exstate; Datum arraydatum; ArrayType *itemarray; Datum *ipdatums; bool *ipnulls; int ndatums; int i; exstate = (ExprState *) lsecond(saexstate->fxprstate.args); arraydatum = ExecEvalExprSwitchContext(exstate, econtext, &isNull, NULL); if (isNull) continue; itemarray = DatumGetArrayTypeP(arraydatum); deconstruct_array(itemarray, TIDOID, SizeOfIptrData, false, 's', &ipdatums, &ipnulls, &ndatums); if (numTids + ndatums > numAllocTids) { numAllocTids = numTids + ndatums; tidList = (ItemPointerData *) repalloc(tidList, numAllocTids * sizeof(ItemPointerData)); } for (i = 0; i < ndatums; i++) { if (!ipnulls[i]) { itemptr = (ItemPointer) DatumGetPointer(ipdatums[i]); if (ItemPointerIsValid(itemptr) && ItemPointerGetBlockNumber(itemptr) < nblocks) tidList[numTids++] = *itemptr; } } pfree(ipdatums); pfree(ipnulls); } else if (expr && IsA(expr, CurrentOfExpr)) { CurrentOfExpr *cexpr = (CurrentOfExpr *) expr; ItemPointerData cursor_tid; if (execCurrentOf(cexpr, econtext, RelationGetRelid(tidstate->ss.ss_currentRelation), &cursor_tid)) { if (numTids >= numAllocTids) { numAllocTids *= 2; tidList = (ItemPointerData *) repalloc(tidList, numAllocTids * sizeof(ItemPointerData)); } tidList[numTids++] = cursor_tid; tidstate->tss_isCurrentOf = true; } } else elog(ERROR, "could not identify CTID expression"); }
/* * clauselist_selectivity - * Compute the selectivity of an implicitly-ANDed list of boolean * expression clauses. The list can be empty, in which case 1.0 * must be returned. List elements may be either RestrictInfos * or bare expression clauses --- the former is preferred since * it allows caching of results. * * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses. However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent. So, * we want to be smarter where we can. * Currently, the only extra smarts we have is to recognize "range queries", * such as "x > 34 AND x < 42". Clauses are recognized as possible range * query components if they are restriction opclauses whose operators have * scalarltsel() or scalargtsel() as their restriction selectivity estimator. * We pair up clauses of this form that refer to the same variable. An * unpairable clause of this kind is simply multiplied into the selectivity * product in the normal way. But when we find a pair, we know that the * selectivities represent the relative positions of the low and high bounds * within the column's range, so instead of figuring the selectivity as * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, * see that hisel is the fraction of the range below the high bound, while * losel is the fraction above the low bound; so hisel can be interpreted * directly as a 0..1 value but we need to convert losel to 1-losel before * interpreting it as a value. Then the available range is 1-losel to hisel. * However, this calculation double-excludes nulls, so really we need * hisel + losel + null_frac - 1.) * * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation * yields an impossible (negative) result. * * A free side-effect is that we can recognize redundant inequalities such * as "x < 4 AND x < 5"; only the tighter constraint will be counted. * * Of course this is all very dependent on the behavior of * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. */ Selectivity clauselist_selectivity(PlannerInfo *root, List *clauses, int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo, bool use_damping) { Selectivity s1 = 1.0; Selectivity *rgsel = NULL; RangeQueryClause *rqlist = NULL; ListCell *l; int pos = 0; int i = 0; /* allocate array to hold all selectivity factors */ rgsel = (Selectivity *) palloc(sizeof(Selectivity) * list_length(clauses)); /* * If there's exactly one clause, then no use in trying to match up pairs, * so just go directly to clause_selectivity(). */ if (list_length(clauses) == 1) return clause_selectivity(root, (Node *) linitial(clauses), varRelid, jointype, sjinfo, use_damping); /* * Initial scan over clauses. Anything that doesn't look like a potential * rangequery clause gets directly added as selectivity factor. Anything that * does gets inserted into an rqlist entry. */ foreach(l, clauses) { Node *clause = (Node *) lfirst(l); RestrictInfo *rinfo; Selectivity s2; /* Always compute the selectivity using clause_selectivity */ s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo, use_damping); /* * Check for being passed a RestrictInfo. * * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or * 0.0; just use that rather than looking for range pairs. */ if (IsA(clause, RestrictInfo)) { rinfo = (RestrictInfo *) clause; if (rinfo->pseudoconstant) { rgsel[pos++] = s2; continue; } clause = (Node *) rinfo->clause; } else rinfo = NULL; /* * See if it looks like a restriction clause with a pseudoconstant on * one side. (Anything more complicated than that might not behave in * the simple way we are expecting.) Most of the tests here can be * done more efficiently with rinfo than without. */ if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) { OpExpr *expr = (OpExpr *) clause; bool varonleft = true; bool ok; if (rinfo) { ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && (is_pseudo_constant_clause_relids(lsecond(expr->args), rinfo->right_relids) || (varonleft = false, is_pseudo_constant_clause_relids(linitial(expr->args), rinfo->left_relids))); } else { ok = (NumRelids(clause) == 1) && (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(linitial(expr->args)))); } if (ok) { /* * If it's not a "<" or ">" operator, just merge the * selectivity in generically. But if it's the right oprrest, * add the clause to rqlist for later processing. */ switch (get_oprrest(expr->opno)) { case F_SCALARLTSEL: addRangeClause(&rqlist, clause, varonleft, true, s2); break; case F_SCALARGTSEL: addRangeClause(&rqlist, clause, varonleft, false, s2); break; default: /* Just merge the selectivity in generically */ rgsel[pos++] = s2; break; } continue; /* drop to loop bottom */ } } /* Not the right form, so treat it generically. */ rgsel[pos++] = s2; }
/* * make_restrictinfo_internal * * Common code for the main entry points and the recursive cases. */ static RestrictInfo * make_restrictinfo_internal(Expr *clause, Expr *orclause, bool is_pushed_down, bool outerjoin_delayed, bool pseudoconstant, Index security_level, Relids required_relids, Relids outer_relids, Relids nullable_relids) { RestrictInfo *restrictinfo = makeNode(RestrictInfo); restrictinfo->clause = clause; restrictinfo->orclause = orclause; restrictinfo->is_pushed_down = is_pushed_down; restrictinfo->outerjoin_delayed = outerjoin_delayed; restrictinfo->pseudoconstant = pseudoconstant; restrictinfo->can_join = false; /* may get set below */ restrictinfo->security_level = security_level; restrictinfo->outer_relids = outer_relids; restrictinfo->nullable_relids = nullable_relids; /* * If it's potentially delayable by lower-level security quals, figure out * whether it's leakproof. We can skip testing this for level-zero quals, * since they would never get delayed on security grounds anyway. */ if (security_level > 0) restrictinfo->leakproof = !contain_leaked_vars((Node *) clause); else restrictinfo->leakproof = false; /* really, "don't know" */ /* * If it's a binary opclause, set up left/right relids info. In any case * set up the total clause relids info. */ if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) { restrictinfo->left_relids = pull_varnos(get_leftop(clause)); restrictinfo->right_relids = pull_varnos(get_rightop(clause)); restrictinfo->clause_relids = bms_union(restrictinfo->left_relids, restrictinfo->right_relids); /* * Does it look like a normal join clause, i.e., a binary operator * relating expressions that come from distinct relations? If so we * might be able to use it in a join algorithm. Note that this is a * purely syntactic test that is made regardless of context. */ if (!bms_is_empty(restrictinfo->left_relids) && !bms_is_empty(restrictinfo->right_relids) && !bms_overlap(restrictinfo->left_relids, restrictinfo->right_relids)) { restrictinfo->can_join = true; /* pseudoconstant should certainly not be true */ Assert(!restrictinfo->pseudoconstant); } } else { /* Not a binary opclause, so mark left/right relid sets as empty */ restrictinfo->left_relids = NULL; restrictinfo->right_relids = NULL; /* and get the total relid set the hard way */ restrictinfo->clause_relids = pull_varnos((Node *) clause); } /* required_relids defaults to clause_relids */ if (required_relids != NULL) restrictinfo->required_relids = required_relids; else restrictinfo->required_relids = restrictinfo->clause_relids; /* * Fill in all the cacheable fields with "not yet set" markers. None of * these will be computed until/unless needed. Note in particular that we * don't mark a binary opclause as mergejoinable or hashjoinable here; * that happens only if it appears in the right context (top level of a * joinclause list). */ restrictinfo->parent_ec = NULL; restrictinfo->eval_cost.startup = -1; restrictinfo->norm_selec = -1; restrictinfo->outer_selec = -1; restrictinfo->mergeopfamilies = NIL; restrictinfo->left_ec = NULL; restrictinfo->right_ec = NULL; restrictinfo->left_em = NULL; restrictinfo->right_em = NULL; restrictinfo->scansel_cache = NIL; restrictinfo->outer_is_left = false; restrictinfo->hashjoinoperator = InvalidOid; restrictinfo->left_bucketsize = -1; restrictinfo->right_bucketsize = -1; restrictinfo->left_mcvfreq = -1; restrictinfo->right_mcvfreq = -1; return restrictinfo; }
/* * dependency_is_compatible_clause * Determines if the clause is compatible with functional dependencies * * Only clauses that have the form of equality to a pseudoconstant, or can be * interpreted that way, are currently accepted. Furthermore the variable * part of the clause must be a simple Var belonging to the specified * relation, whose attribute number we return in *attnum on success. */ static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) { RestrictInfo *rinfo = (RestrictInfo *) clause; Var *var; if (!IsA(rinfo, RestrictInfo)) return false; /* Pseudoconstants are not interesting (they couldn't contain a Var) */ if (rinfo->pseudoconstant) return false; /* Clauses referencing multiple, or no, varnos are incompatible */ if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) return false; if (is_opclause(rinfo->clause)) { /* If it's an opclause, check for Var = Const or Const = Var. */ OpExpr *expr = (OpExpr *) rinfo->clause; /* Only expressions with two arguments are candidates. */ if (list_length(expr->args) != 2) return false; /* Make sure non-selected argument is a pseudoconstant. */ if (is_pseudo_constant_clause(lsecond(expr->args))) var = linitial(expr->args); else if (is_pseudo_constant_clause(linitial(expr->args))) var = lsecond(expr->args); else return false; /* * If it's not an "=" operator, just ignore the clause, as it's not * compatible with functional dependencies. * * This uses the function for estimating selectivity, not the operator * directly (a bit awkward, but well ...). * * XXX this is pretty dubious; probably it'd be better to check btree * or hash opclass membership, so as not to be fooled by custom * selectivity functions, and to be more consistent with decisions * elsewhere in the planner. */ if (get_oprrest(expr->opno) != F_EQSEL) return false; /* OK to proceed with checking "var" */ } else if (not_clause((Node *) rinfo->clause)) { /* * "NOT x" can be interpreted as "x = false", so get the argument and * proceed with seeing if it's a suitable Var. */ var = (Var *) get_notclausearg(rinfo->clause); } else { /* * A boolean expression "x" can be interpreted as "x = true", so * proceed with seeing if it's a suitable Var. */ var = (Var *) rinfo->clause; } /* * We may ignore any RelabelType node above the operand. (There won't be * more than one, since eval_const_expressions has been applied already.) */ if (IsA(var, RelabelType)) var = (Var *) ((RelabelType *) var)->arg; /* We only support plain Vars for now */ if (!IsA(var, Var)) return false; /* Ensure Var is from the correct relation */ if (var->varno != relid) return false; /* We also better ensure the Var is from the current level */ if (var->varlevelsup != 0) return false; /* Also ignore system attributes (we don't allow stats on those) */ if (!AttrNumberIsForUserDefinedAttr(var->varattno)) return false; *attnum = var->varattno; return true; }
/* * clauselist_selectivity - * Compute the selectivity of an implicitly-ANDed list of boolean * expression clauses. The list can be empty, in which case 1.0 * must be returned. * * See clause_selectivity() for the meaning of the additional parameters. * * Our basic approach is to take the product of the selectivities of the * subclauses. However, that's only right if the subclauses have independent * probabilities, and in reality they are often NOT independent. So, * we want to be smarter where we can. * Currently, the only extra smarts we have is to recognize "range queries", * such as "x > 34 AND x < 42". Clauses are recognized as possible range * query components if they are restriction opclauses whose operators have * scalarltsel() or scalargtsel() as their restriction selectivity estimator. * We pair up clauses of this form that refer to the same variable. An * unpairable clause of this kind is simply multiplied into the selectivity * product in the normal way. But when we find a pair, we know that the * selectivities represent the relative positions of the low and high bounds * within the column's range, so instead of figuring the selectivity as * hisel * losel, we can figure it as hisel + losel - 1. (To visualize this, * see that hisel is the fraction of the range below the high bound, while * losel is the fraction above the low bound; so hisel can be interpreted * directly as a 0..1 value but we need to convert losel to 1-losel before * interpreting it as a value. Then the available range is 1-losel to hisel. * However, this calculation double-excludes nulls, so really we need * hisel + losel + null_frac - 1.) * If the calculation yields zero or negative, however, we chicken out and * use a default estimate; that probably means that one or both * selectivities is a default estimate rather than an actual range value. * Of course this is all very dependent on the behavior of * scalarltsel/scalargtsel; perhaps some day we can generalize the approach. */ Selectivity clauselist_selectivity(Query *root, List *clauses, int varRelid, JoinType jointype) { Selectivity s1 = 1.0; RangeQueryClause *rqlist = NULL; List *clist; /* * Initial scan over clauses. Anything that doesn't look like a * potential rangequery clause gets multiplied into s1 and forgotten. * Anything that does gets inserted into an rqlist entry. */ foreach(clist, clauses) { Node *clause = (Node *) lfirst(clist); Selectivity s2; /* * See if it looks like a restriction clause with a pseudoconstant * on one side. (Anything more complicated than that might not * behave in the simple way we are expecting.) * * NB: for consistency of results, this fragment of code had better * match what clause_selectivity() would do in the cases it * handles. */ if (is_opclause(clause) && (varRelid != 0 || NumRelids(clause) == 1)) { OpExpr *expr = (OpExpr *) clause; if (length(expr->args) == 2) { bool varonleft = true; if (is_pseudo_constant_clause(lsecond(expr->args)) || (varonleft = false, is_pseudo_constant_clause(lfirst(expr->args)))) { Oid opno = expr->opno; RegProcedure oprrest = get_oprrest(opno); s2 = restriction_selectivity(root, opno, expr->args, varRelid); /* * If we reach here, we have computed the same result * that clause_selectivity would, so we can just use * s2 if it's the wrong oprrest. But if it's the * right oprrest, add the clause to rqlist for later * processing. */ switch (oprrest) { case F_SCALARLTSEL: addRangeClause(&rqlist, clause, varonleft, true, s2); break; case F_SCALARGTSEL: addRangeClause(&rqlist, clause, varonleft, false, s2); break; default: /* Just merge the selectivity in generically */ s1 = s1 * s2; break; } continue; /* drop to loop bottom */ } } } /* Not the right form, so treat it generically. */ s2 = clause_selectivity(root, clause, varRelid, jointype); s1 = s1 * s2; }