Exemplo n.º 1
0
/*
 * dependency_is_compatible_clause
 *		Determines if the clause is compatible with functional dependencies
 *
 * Only clauses that have the form of equality to a pseudoconstant, or can be
 * interpreted that way, are currently accepted.  Furthermore the variable
 * part of the clause must be a simple Var belonging to the specified
 * relation, whose attribute number we return in *attnum on success.
 */
static bool
dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
{
	RestrictInfo *rinfo = (RestrictInfo *) clause;
	Var		   *var;

	if (!IsA(rinfo, RestrictInfo))
		return false;

	/* Pseudoconstants are not interesting (they couldn't contain a Var) */
	if (rinfo->pseudoconstant)
		return false;

	/* Clauses referencing multiple, or no, varnos are incompatible */
	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
		return false;

	if (is_opclause(rinfo->clause))
	{
		/* If it's an opclause, check for Var = Const or Const = Var. */
		OpExpr	   *expr = (OpExpr *) rinfo->clause;

		/* Only expressions with two arguments are candidates. */
		if (list_length(expr->args) != 2)
			return false;

		/* Make sure non-selected argument is a pseudoconstant. */
		if (is_pseudo_constant_clause(lsecond(expr->args)))
			var = linitial(expr->args);
		else if (is_pseudo_constant_clause(linitial(expr->args)))
			var = lsecond(expr->args);
		else
			return false;

		/*
		 * If it's not an "=" operator, just ignore the clause, as it's not
		 * compatible with functional dependencies.
		 *
		 * This uses the function for estimating selectivity, not the operator
		 * directly (a bit awkward, but well ...).
		 *
		 * XXX this is pretty dubious; probably it'd be better to check btree
		 * or hash opclass membership, so as not to be fooled by custom
		 * selectivity functions, and to be more consistent with decisions
		 * elsewhere in the planner.
		 */
		if (get_oprrest(expr->opno) != F_EQSEL)
			return false;

		/* OK to proceed with checking "var" */
	}
	else if (not_clause((Node *) rinfo->clause))
	{
		/*
		 * "NOT x" can be interpreted as "x = false", so get the argument and
		 * proceed with seeing if it's a suitable Var.
		 */
		var = (Var *) get_notclausearg(rinfo->clause);
	}
	else
	{
		/*
		 * A boolean expression "x" can be interpreted as "x = true", so
		 * proceed with seeing if it's a suitable Var.
		 */
		var = (Var *) rinfo->clause;
	}

	/*
	 * We may ignore any RelabelType node above the operand.  (There won't be
	 * more than one, since eval_const_expressions has been applied already.)
	 */
	if (IsA(var, RelabelType))
		var = (Var *) ((RelabelType *) var)->arg;

	/* We only support plain Vars for now */
	if (!IsA(var, Var))
		return false;

	/* Ensure Var is from the correct relation */
	if (var->varno != relid)
		return false;

	/* We also better ensure the Var is from the current level */
	if (var->varlevelsup != 0)
		return false;

	/* Also ignore system attributes (we don't allow stats on those) */
	if (!AttrNumberIsForUserDefinedAttr(var->varattno))
		return false;

	*attnum = var->varattno;
	return true;
}
Exemplo n.º 2
0
/*
 * clauselist_selectivity -
 *	  Compute the selectivity of an implicitly-ANDed list of boolean
 *	  expression clauses.  The list can be empty, in which case 1.0
 *	  must be returned.  List elements may be either RestrictInfos
 *	  or bare expression clauses --- the former is preferred since
 *	  it allows caching of results.
 *
 * See clause_selectivity() for the meaning of the additional parameters.
 *
 * Our basic approach is to take the product of the selectivities of the
 * subclauses.  However, that's only right if the subclauses have independent
 * probabilities, and in reality they are often NOT independent.  So,
 * we want to be smarter where we can.

 * Currently, the only extra smarts we have is to recognize "range queries",
 * such as "x > 34 AND x < 42".  Clauses are recognized as possible range
 * query components if they are restriction opclauses whose operators have
 * scalarltsel() or scalargtsel() as their restriction selectivity estimator.
 * We pair up clauses of this form that refer to the same variable.  An
 * unpairable clause of this kind is simply multiplied into the selectivity
 * product in the normal way.  But when we find a pair, we know that the
 * selectivities represent the relative positions of the low and high bounds
 * within the column's range, so instead of figuring the selectivity as
 * hisel * losel, we can figure it as hisel + losel - 1.  (To visualize this,
 * see that hisel is the fraction of the range below the high bound, while
 * losel is the fraction above the low bound; so hisel can be interpreted
 * directly as a 0..1 value but we need to convert losel to 1-losel before
 * interpreting it as a value.  Then the available range is 1-losel to hisel.
 * However, this calculation double-excludes nulls, so really we need
 * hisel + losel + null_frac - 1.)
 *
 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
 * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
 * yields an impossible (negative) result.
 *
 * A free side-effect is that we can recognize redundant inequalities such
 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
 *
 * Of course this is all very dependent on the behavior of
 * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
 */
Selectivity
clauselist_selectivity(PlannerInfo *root,
					   List *clauses,
					   int varRelid,
					   JoinType jointype,
					   SpecialJoinInfo *sjinfo,
					   bool use_damping)
{
	Selectivity s1 = 1.0;
	Selectivity *rgsel = NULL;
	RangeQueryClause *rqlist = NULL;
	ListCell   *l;

	int pos = 0;
	int i = 0;

	/* allocate array to hold all selectivity factors */
	rgsel = (Selectivity *) palloc(sizeof(Selectivity) * list_length(clauses));

	/*
	 * If there's exactly one clause, then no use in trying to match up pairs,
	 * so just go directly to clause_selectivity().
	 */
	if (list_length(clauses) == 1)
		return clause_selectivity(root, (Node *) linitial(clauses),
								  varRelid, jointype, sjinfo, use_damping);

	/*
	 * Initial scan over clauses.  Anything that doesn't look like a potential
	 * rangequery clause gets directly added as selectivity factor. Anything that
	 * does gets inserted into an rqlist entry.
	 */
	foreach(l, clauses)
	{
		Node	   *clause = (Node *) lfirst(l);
		RestrictInfo *rinfo;
		Selectivity s2;

		/* Always compute the selectivity using clause_selectivity */
		s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo, use_damping);

		/*
		 * Check for being passed a RestrictInfo.
		 *
		 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
		 * 0.0; just use that rather than looking for range pairs.
		 */
		if (IsA(clause, RestrictInfo))
		{
			rinfo = (RestrictInfo *) clause;
			if (rinfo->pseudoconstant)
			{
				rgsel[pos++] = s2;
				continue;
			}
			clause = (Node *) rinfo->clause;
		}
		else
			rinfo = NULL;

		/*
		 * See if it looks like a restriction clause with a pseudoconstant on
		 * one side.  (Anything more complicated than that might not behave in
		 * the simple way we are expecting.)  Most of the tests here can be
		 * done more efficiently with rinfo than without.
		 */
		if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
		{
			OpExpr	   *expr = (OpExpr *) clause;
			bool		varonleft = true;
			bool		ok;

			if (rinfo)
			{
				ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
					(is_pseudo_constant_clause_relids(lsecond(expr->args),
													  rinfo->right_relids) ||
					 (varonleft = false,
					  is_pseudo_constant_clause_relids(linitial(expr->args),
													   rinfo->left_relids)));
			}
			else
			{
				ok = (NumRelids(clause) == 1) &&
					(is_pseudo_constant_clause(lsecond(expr->args)) ||
					 (varonleft = false,
					  is_pseudo_constant_clause(linitial(expr->args))));
			}

			if (ok)
			{
				/*
				 * If it's not a "<" or ">" operator, just merge the
				 * selectivity in generically.  But if it's the right oprrest,
				 * add the clause to rqlist for later processing.
				 */
				switch (get_oprrest(expr->opno))
				{
					case F_SCALARLTSEL:
						addRangeClause(&rqlist, clause,
									   varonleft, true, s2);
						break;
					case F_SCALARGTSEL:
						addRangeClause(&rqlist, clause,
									   varonleft, false, s2);
						break;
					default:
						/* Just merge the selectivity in generically */
						rgsel[pos++] = s2;
						break;
				}
				continue;		/* drop to loop bottom */
			}
		}

		/* Not the right form, so treat it generically. */
		rgsel[pos++] = s2;
	}
Exemplo n.º 3
0
/*
 * clauselist_selectivity -
 *	  Compute the selectivity of an implicitly-ANDed list of boolean
 *	  expression clauses.  The list can be empty, in which case 1.0
 *	  must be returned.
 *
 * See clause_selectivity() for the meaning of the additional parameters.
 *
 * Our basic approach is to take the product of the selectivities of the
 * subclauses.	However, that's only right if the subclauses have independent
 * probabilities, and in reality they are often NOT independent.  So,
 * we want to be smarter where we can.

 * Currently, the only extra smarts we have is to recognize "range queries",
 * such as "x > 34 AND x < 42".  Clauses are recognized as possible range
 * query components if they are restriction opclauses whose operators have
 * scalarltsel() or scalargtsel() as their restriction selectivity estimator.
 * We pair up clauses of this form that refer to the same variable.  An
 * unpairable clause of this kind is simply multiplied into the selectivity
 * product in the normal way.  But when we find a pair, we know that the
 * selectivities represent the relative positions of the low and high bounds
 * within the column's range, so instead of figuring the selectivity as
 * hisel * losel, we can figure it as hisel + losel - 1.  (To visualize this,
 * see that hisel is the fraction of the range below the high bound, while
 * losel is the fraction above the low bound; so hisel can be interpreted
 * directly as a 0..1 value but we need to convert losel to 1-losel before
 * interpreting it as a value.	Then the available range is 1-losel to hisel.
 * However, this calculation double-excludes nulls, so really we need
 * hisel + losel + null_frac - 1.)
 * If the calculation yields zero or negative, however, we chicken out and
 * use a default estimate; that probably means that one or both
 * selectivities is a default estimate rather than an actual range value.
 * Of course this is all very dependent on the behavior of
 * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
 */
Selectivity
clauselist_selectivity(Query *root,
					   List *clauses,
					   int varRelid,
					   JoinType jointype)
{
	Selectivity s1 = 1.0;
	RangeQueryClause *rqlist = NULL;
	List	   *clist;

	/*
	 * Initial scan over clauses.  Anything that doesn't look like a
	 * potential rangequery clause gets multiplied into s1 and forgotten.
	 * Anything that does gets inserted into an rqlist entry.
	 */
	foreach(clist, clauses)
	{
		Node	   *clause = (Node *) lfirst(clist);
		Selectivity s2;

		/*
		 * See if it looks like a restriction clause with a pseudoconstant
		 * on one side.  (Anything more complicated than that might not
		 * behave in the simple way we are expecting.)
		 *
		 * NB: for consistency of results, this fragment of code had better
		 * match what clause_selectivity() would do in the cases it
		 * handles.
		 */
		if (is_opclause(clause) &&
			(varRelid != 0 || NumRelids(clause) == 1))
		{
			OpExpr	   *expr = (OpExpr *) clause;

			if (length(expr->args) == 2)
			{
				bool		varonleft = true;

				if (is_pseudo_constant_clause(lsecond(expr->args)) ||
					(varonleft = false,
					 is_pseudo_constant_clause(lfirst(expr->args))))
				{
					Oid			opno = expr->opno;
					RegProcedure oprrest = get_oprrest(opno);

					s2 = restriction_selectivity(root, opno,
												 expr->args, varRelid);

					/*
					 * If we reach here, we have computed the same result
					 * that clause_selectivity would, so we can just use
					 * s2 if it's the wrong oprrest.  But if it's the
					 * right oprrest, add the clause to rqlist for later
					 * processing.
					 */
					switch (oprrest)
					{
						case F_SCALARLTSEL:
							addRangeClause(&rqlist, clause,
										   varonleft, true, s2);
							break;
						case F_SCALARGTSEL:
							addRangeClause(&rqlist, clause,
										   varonleft, false, s2);
							break;
						default:
							/* Just merge the selectivity in generically */
							s1 = s1 * s2;
							break;
					}
					continue;	/* drop to loop bottom */
				}
			}
		}
		/* Not the right form, so treat it generically. */
		s2 = clause_selectivity(root, clause, varRelid, jointype);
		s1 = s1 * s2;
	}