Example #1
0
/*
 * clauselist_selectivity -
 *	  Compute the selectivity of an implicitly-ANDed list of boolean
 *	  expression clauses.  The list can be empty, in which case 1.0
 *	  must be returned.  List elements may be either RestrictInfos
 *	  or bare expression clauses --- the former is preferred since
 *	  it allows caching of results.
 *
 * See clause_selectivity() for the meaning of the additional parameters.
 *
 * Our basic approach is to take the product of the selectivities of the
 * subclauses.  However, that's only right if the subclauses have independent
 * probabilities, and in reality they are often NOT independent.  So,
 * we want to be smarter where we can.

 * Currently, the only extra smarts we have is to recognize "range queries",
 * such as "x > 34 AND x < 42".  Clauses are recognized as possible range
 * query components if they are restriction opclauses whose operators have
 * scalarltsel() or scalargtsel() as their restriction selectivity estimator.
 * We pair up clauses of this form that refer to the same variable.  An
 * unpairable clause of this kind is simply multiplied into the selectivity
 * product in the normal way.  But when we find a pair, we know that the
 * selectivities represent the relative positions of the low and high bounds
 * within the column's range, so instead of figuring the selectivity as
 * hisel * losel, we can figure it as hisel + losel - 1.  (To visualize this,
 * see that hisel is the fraction of the range below the high bound, while
 * losel is the fraction above the low bound; so hisel can be interpreted
 * directly as a 0..1 value but we need to convert losel to 1-losel before
 * interpreting it as a value.  Then the available range is 1-losel to hisel.
 * However, this calculation double-excludes nulls, so really we need
 * hisel + losel + null_frac - 1.)
 *
 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
 * and instead use DEFAULT_RANGE_INEQ_SEL.  The same applies if the equation
 * yields an impossible (negative) result.
 *
 * A free side-effect is that we can recognize redundant inequalities such
 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
 *
 * Of course this is all very dependent on the behavior of
 * scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
 */
Selectivity
clauselist_selectivity(PlannerInfo *root,
					   List *clauses,
					   int varRelid,
					   JoinType jointype,
					   SpecialJoinInfo *sjinfo,
					   bool use_damping)
{
	Selectivity s1 = 1.0;
	Selectivity *rgsel = NULL;
	RangeQueryClause *rqlist = NULL;
	ListCell   *l;

	int pos = 0;
	int i = 0;

	/* allocate array to hold all selectivity factors */
	rgsel = (Selectivity *) palloc(sizeof(Selectivity) * list_length(clauses));

	/*
	 * If there's exactly one clause, then no use in trying to match up pairs,
	 * so just go directly to clause_selectivity().
	 */
	if (list_length(clauses) == 1)
		return clause_selectivity(root, (Node *) linitial(clauses),
								  varRelid, jointype, sjinfo, use_damping);

	/*
	 * Initial scan over clauses.  Anything that doesn't look like a potential
	 * rangequery clause gets directly added as selectivity factor. Anything that
	 * does gets inserted into an rqlist entry.
	 */
	foreach(l, clauses)
	{
		Node	   *clause = (Node *) lfirst(l);
		RestrictInfo *rinfo;
		Selectivity s2;

		/* Always compute the selectivity using clause_selectivity */
		s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo, use_damping);

		/*
		 * Check for being passed a RestrictInfo.
		 *
		 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
		 * 0.0; just use that rather than looking for range pairs.
		 */
		if (IsA(clause, RestrictInfo))
		{
			rinfo = (RestrictInfo *) clause;
			if (rinfo->pseudoconstant)
			{
				rgsel[pos++] = s2;
				continue;
			}
			clause = (Node *) rinfo->clause;
		}
		else
			rinfo = NULL;

		/*
		 * See if it looks like a restriction clause with a pseudoconstant on
		 * one side.  (Anything more complicated than that might not behave in
		 * the simple way we are expecting.)  Most of the tests here can be
		 * done more efficiently with rinfo than without.
		 */
		if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
		{
			OpExpr	   *expr = (OpExpr *) clause;
			bool		varonleft = true;
			bool		ok;

			if (rinfo)
			{
				ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
					(is_pseudo_constant_clause_relids(lsecond(expr->args),
													  rinfo->right_relids) ||
					 (varonleft = false,
					  is_pseudo_constant_clause_relids(linitial(expr->args),
													   rinfo->left_relids)));
			}
			else
			{
				ok = (NumRelids(clause) == 1) &&
					(is_pseudo_constant_clause(lsecond(expr->args)) ||
					 (varonleft = false,
					  is_pseudo_constant_clause(linitial(expr->args))));
			}

			if (ok)
			{
				/*
				 * If it's not a "<" or ">" operator, just merge the
				 * selectivity in generically.  But if it's the right oprrest,
				 * add the clause to rqlist for later processing.
				 */
				switch (get_oprrest(expr->opno))
				{
					case F_SCALARLTSEL:
						addRangeClause(&rqlist, clause,
									   varonleft, true, s2);
						break;
					case F_SCALARGTSEL:
						addRangeClause(&rqlist, clause,
									   varonleft, false, s2);
						break;
					default:
						/* Just merge the selectivity in generically */
						rgsel[pos++] = s2;
						break;
				}
				continue;		/* drop to loop bottom */
			}
		}

		/* Not the right form, so treat it generically. */
		rgsel[pos++] = s2;
	}
Example #2
0
/*
 * Does the supplied GpPolicy support unique indexing on the specified
 * attributes?
 *
 * If the table is distributed randomly, no unique indexing is supported.
 * Otherwise, the set of columns being indexed should be a superset of the
 * policy.
 *
 * If the proposed index does not match the distribution policy but the relation
 * is empty and does not have a primary key or unique index, update the
 * distribution policy to match the index definition (MPP-101), as long as it
 * doesn't contain expressions.
 */
void
checkPolicyForUniqueIndex(Relation rel, AttrNumber *indattr, int nidxatts,
			 			  bool isprimary, bool has_exprs, bool has_pkey,
						  bool has_ukey)
{
	Bitmapset *polbm = NULL;
	Bitmapset *indbm = NULL;
	int i;
	GpPolicy *pol = rel->rd_cdbpolicy;

	/* 
	 * Firstly, unique/primary key indexes aren't supported if we're
	 * distributing randomly.
	 */
	if (GpPolicyIsRandomly(pol))
	{
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
				 errmsg("%s and DISTRIBUTED RANDOMLY are incompatible",
						isprimary ? "PRIMARY KEY" : "UNIQUE")));
	}

	/* 
	 * We use bitmaps to make intersection tests easier. As noted, order is
	 * not relevant so looping is just painful.
	 */
	for (i = 0; i < pol->nattrs; i++)
		polbm = bms_add_member(polbm, pol->attrs[i]);
	for (i = 0; i < nidxatts; i++)
	{
		if (indattr[i] < 0)
        	ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
					 errmsg("cannot create %s on system column",
							isprimary ? "primary key" : "unique index")));

		indbm = bms_add_member(indbm, indattr[i]);
	}

	Assert(bms_membership(polbm) != BMS_EMPTY_SET);
	Assert(bms_membership(indbm) != BMS_EMPTY_SET);

	/* 
	 * If the existing policy is not a subset, we must either error out or
	 * update the distribution policy. It might be tempting to say that even
	 * when the policy is a subset, we should update it to match the index
	 * definition. The problem then is that if the user actually wants to
	 * distribution on (a, b) but then creates an index on (a, b, c) we'll
	 * change the policy underneath them.
	 *
	 * What is really needed is a new field in gp_distribution_policy telling us
	 * if the policy has been explicitly set.
	 */
	if (!bms_is_subset(polbm, indbm))
	{
		if (cdbRelSize(rel) != 0 || has_pkey || has_ukey || has_exprs)
		{
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
					 errmsg("%s must contain all columns in the "
							"distribution key of relation \"%s\"",
							isprimary ? "PRIMARY KEY" : "UNIQUE index",
						RelationGetRelationName(rel))));
		}
		else
		{
			/* update policy since table is not populated yet. See MPP-101 */
			GpPolicy *policy = palloc(sizeof(GpPolicy) + 
									  (sizeof(AttrNumber) * nidxatts));
			policy->ptype = POLICYTYPE_PARTITIONED;
			policy->nattrs = 0;
			for (i = 0; i < nidxatts; i++)
				policy->attrs[policy->nattrs++] = indattr[i];	

			GpPolicyReplace(rel->rd_id, policy);

			if (isprimary)
				elog(NOTICE, "updating distribution policy to match new primary key");
			else
				elog(NOTICE, "updating distribution policy to match new unique index");
		}
	}
}
Example #3
0
/*
 * dependency_is_compatible_clause
 *		Determines if the clause is compatible with functional dependencies
 *
 * Only clauses that have the form of equality to a pseudoconstant, or can be
 * interpreted that way, are currently accepted.  Furthermore the variable
 * part of the clause must be a simple Var belonging to the specified
 * relation, whose attribute number we return in *attnum on success.
 */
static bool
dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
{
	RestrictInfo *rinfo = (RestrictInfo *) clause;
	Var		   *var;

	if (!IsA(rinfo, RestrictInfo))
		return false;

	/* Pseudoconstants are not interesting (they couldn't contain a Var) */
	if (rinfo->pseudoconstant)
		return false;

	/* Clauses referencing multiple, or no, varnos are incompatible */
	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
		return false;

	if (is_opclause(rinfo->clause))
	{
		/* If it's an opclause, check for Var = Const or Const = Var. */
		OpExpr	   *expr = (OpExpr *) rinfo->clause;

		/* Only expressions with two arguments are candidates. */
		if (list_length(expr->args) != 2)
			return false;

		/* Make sure non-selected argument is a pseudoconstant. */
		if (is_pseudo_constant_clause(lsecond(expr->args)))
			var = linitial(expr->args);
		else if (is_pseudo_constant_clause(linitial(expr->args)))
			var = lsecond(expr->args);
		else
			return false;

		/*
		 * If it's not an "=" operator, just ignore the clause, as it's not
		 * compatible with functional dependencies.
		 *
		 * This uses the function for estimating selectivity, not the operator
		 * directly (a bit awkward, but well ...).
		 *
		 * XXX this is pretty dubious; probably it'd be better to check btree
		 * or hash opclass membership, so as not to be fooled by custom
		 * selectivity functions, and to be more consistent with decisions
		 * elsewhere in the planner.
		 */
		if (get_oprrest(expr->opno) != F_EQSEL)
			return false;

		/* OK to proceed with checking "var" */
	}
	else if (not_clause((Node *) rinfo->clause))
	{
		/*
		 * "NOT x" can be interpreted as "x = false", so get the argument and
		 * proceed with seeing if it's a suitable Var.
		 */
		var = (Var *) get_notclausearg(rinfo->clause);
	}
	else
	{
		/*
		 * A boolean expression "x" can be interpreted as "x = true", so
		 * proceed with seeing if it's a suitable Var.
		 */
		var = (Var *) rinfo->clause;
	}

	/*
	 * We may ignore any RelabelType node above the operand.  (There won't be
	 * more than one, since eval_const_expressions has been applied already.)
	 */
	if (IsA(var, RelabelType))
		var = (Var *) ((RelabelType *) var)->arg;

	/* We only support plain Vars for now */
	if (!IsA(var, Var))
		return false;

	/* Ensure Var is from the correct relation */
	if (var->varno != relid)
		return false;

	/* We also better ensure the Var is from the current level */
	if (var->varlevelsup != 0)
		return false;

	/* Also ignore system attributes (we don't allow stats on those) */
	if (!AttrNumberIsForUserDefinedAttr(var->varattno))
		return false;

	*attnum = var->varattno;
	return true;
}