Пример #1
1
/*----------
 * Determine which quadrant a 2d-mapped range falls into, relative to the
 * centroid.
 *
 * Quadrants are numbered like this:
 *
 *	 4	|  1
 *	----+----
 *	 3	|  2
 *
 * Where the lower bound of range is the horizontal axis and upper bound the
 * vertical axis.
 *
 * Ranges on one of the axes are taken to lie in the quadrant with higher value
 * along perpendicular axis. That is, a value on the horizontal axis is taken
 * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to
 * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in
 * quadrant 1.
 *
 * Empty ranges are taken to lie in the special quadrant 5.
 *----------
 */
static int16
getQuadrant(TypeCacheEntry *typcache, RangeType *centroid, RangeType *tst)
{
	RangeBound	centroidLower,
				centroidUpper;
	bool		centroidEmpty;
	RangeBound	lower,
				upper;
	bool		empty;

	range_deserialize(typcache, centroid, &centroidLower, &centroidUpper,
					  &centroidEmpty);
	range_deserialize(typcache, tst, &lower, &upper, &empty);

	if (empty)
		return 5;

	if (range_cmp_bounds(typcache, &lower, &centroidLower) >= 0)
	{
		if (range_cmp_bounds(typcache, &upper, &centroidUpper) >= 0)
			return 1;
		else
			return 2;
	}
	else
	{
		if (range_cmp_bounds(typcache, &upper, &centroidUpper) >= 0)
			return 4;
		else
			return 3;
	}
}
Пример #2
0
/*
 * Calculate range operator selectivity using histograms of range bounds.
 *
 * This estimate is for the portion of values that are not empty and not
 * NULL.
 */
static double
calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
					  RangeType *constval, Oid operator)
{
	Datum	   *hist_values;
	int			nhist;
	Datum	   *length_hist_values;
	int			length_nhist;
	RangeBound *hist_lower;
	RangeBound *hist_upper;
	int			i;
	RangeBound	const_lower;
	RangeBound	const_upper;
	bool		empty;
	double		hist_selec;

	/* Try to get histogram of ranges */
	if (!(HeapTupleIsValid(vardata->statsTuple) &&
		  get_attstatsslot(vardata->statsTuple,
						   vardata->atttype, vardata->atttypmod,
						   STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
						   NULL,
						   &hist_values, &nhist,
						   NULL, NULL)))
		return -1.0;

	/*
	 * Convert histogram of ranges into histograms of its lower and upper
	 * bounds.
	 */
	hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
	hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
	for (i = 0; i < nhist; i++)
	{
		range_deserialize(typcache, DatumGetRangeType(hist_values[i]),
						  &hist_lower[i], &hist_upper[i], &empty);
		/* The histogram should not contain any empty ranges */
		if (empty)
			elog(ERROR, "bounds histogram contains an empty range");
	}

	/* @> and @< also need a histogram of range lengths */
	if (operator == OID_RANGE_CONTAINS_OP ||
		operator == OID_RANGE_CONTAINED_OP)
	{
		if (!(HeapTupleIsValid(vardata->statsTuple) &&
			  get_attstatsslot(vardata->statsTuple,
							   vardata->atttype, vardata->atttypmod,
							   STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
							   InvalidOid,
							   NULL,
							   &length_hist_values, &length_nhist,
							   NULL, NULL)))
			return -1.0;

		/* check that it's a histogram, not just a dummy entry */
		if (length_nhist < 2)
			return -1.0;
	}

	/* Extract the bounds of the constant value. */
	range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
	Assert(!empty);

	/*
	 * Calculate selectivity comparing the lower or upper bound of the
	 * constant with the histogram of lower or upper bounds.
	 */
	switch (operator)
	{
		case OID_RANGE_LESS_OP:

			/*
			 * The regular b-tree comparison operators (<, <=, >, >=) compare
			 * the lower bounds first, and the upper bounds for values with
			 * equal lower bounds. Estimate that by comparing the lower bounds
			 * only. This gives a fairly accurate estimate assuming there
			 * aren't many rows with a lower bound equal to the constant's
			 * lower bound.
			 */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower,
											 hist_lower, nhist, false);
			break;

		case OID_RANGE_LESS_EQUAL_OP:
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower,
											 hist_lower, nhist, true);
			break;

		case OID_RANGE_GREATER_OP:
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_lower,
												 hist_lower, nhist, false);
			break;

		case OID_RANGE_GREATER_EQUAL_OP:
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_lower,
												 hist_lower, nhist, true);
			break;

		case OID_RANGE_LEFT_OP:
			/* var << const when upper(var) < lower(const) */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower,
											 hist_upper, nhist, false);
			break;

		case OID_RANGE_RIGHT_OP:
			/* var >> const when lower(var) > upper(const) */
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_upper,
												 hist_lower, nhist, true);
			break;

		case OID_RANGE_OVERLAPS_RIGHT_OP:
			/* compare lower bounds */
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_lower,
												 hist_lower, nhist, false);
			break;

		case OID_RANGE_OVERLAPS_LEFT_OP:
			/* compare upper bounds */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_upper,
											 hist_upper, nhist, true);
			break;

		case OID_RANGE_OVERLAP_OP:
		case OID_RANGE_CONTAINS_ELEM_OP:

			/*
			 * A && B <=> NOT (A << B OR A >> B).
			 *
			 * Since A << B and A >> B are mutually exclusive events we can
			 * sum their probabilities to find probability of (A << B OR A >>
			 * B).
			 *
			 * "range @> elem" is equivalent to "range && [elem,elem]". The
			 * caller already constructed the singular range from the element
			 * constant, so just treat it the same as &&.
			 */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
											 nhist, false);
			hist_selec +=
				(1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
													nhist, true));
			hist_selec = 1.0 - hist_selec;
			break;

		case OID_RANGE_CONTAINS_OP:
			hist_selec =
				calc_hist_selectivity_contains(typcache, &const_lower,
											 &const_upper, hist_lower, nhist,
										   length_hist_values, length_nhist);
			break;

		case OID_RANGE_CONTAINED_OP:
			if (const_lower.infinite)
			{
				/*
				 * Lower bound no longer matters. Just estimate the fraction
				 * with an upper bound <= const upper bound
				 */
				hist_selec =
					calc_hist_selectivity_scalar(typcache, &const_upper,
												 hist_upper, nhist, true);
			}
			else if (const_upper.infinite)
			{
				hist_selec =
					1.0 - calc_hist_selectivity_scalar(typcache, &const_lower,
												   hist_lower, nhist, false);
			}
			else
			{
				hist_selec =
					calc_hist_selectivity_contained(typcache, &const_lower,
											 &const_upper, hist_lower, nhist,
										   length_hist_values, length_nhist);
			}
			break;

		default:
			elog(ERROR, "unknown range operator %u", operator);
			hist_selec = -1.0;	/* keep compiler quiet */
			break;
	}

	return hist_selec;
}
Пример #3
0
/*
 * SP-GiST consistent function for inner nodes: check which nodes are
 * consistent with given set of queries.
 */
Datum
spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
{
	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
	int			which;
	int			i;

	if (in->allTheSame)
	{
		/* Report that all nodes should be visited */
		out->nNodes = in->nNodes;
		out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
		for (i = 0; i < in->nNodes; i++)
			out->nodeNumbers[i] = i;
		PG_RETURN_VOID();
	}

	if (!in->hasPrefix)
	{
		/*
		 * No centroid on this inner node. Such a node has two child nodes,
		 * the first for empty ranges, and the second for non-empty ones.
		 */
		Assert(in->nNodes == 2);

		/*
		 * Nth bit of which variable means that (N - 1)th node should be
		 * visited. Initially all bits are set. Bits of nodes which should be
		 * skipped will be unset.
		 */
		which = (1 << 1) | (1 << 2);
		for (i = 0; i < in->nkeys; i++)
		{
			StrategyNumber strategy = in->scankeys[i].sk_strategy;
			bool		empty;

			/*
			 * The only strategy when second argument of operator is not range
			 * is RANGESTRAT_CONTAINS_ELEM.
			 */
			if (strategy != RANGESTRAT_CONTAINS_ELEM)
				empty = RangeIsEmpty(
							 DatumGetRangeType(in->scankeys[i].sk_argument));
			else
				empty = false;

			switch (strategy)
			{
				case RANGESTRAT_BEFORE:
				case RANGESTRAT_OVERLEFT:
				case RANGESTRAT_OVERLAPS:
				case RANGESTRAT_OVERRIGHT:
				case RANGESTRAT_AFTER:
					/* These strategies return false if any argument is empty */
					if (empty)
						which = 0;
					else
						which &= (1 << 2);
					break;

				case RANGESTRAT_CONTAINS:
					/*
					 * All ranges contain an empty range. Only non-empty ranges
					 * can contain a non-empty range.
					 */
					if (!empty)
						which &= (1 << 2);
					break;

				case RANGESTRAT_CONTAINED_BY:
					/*
					 * Only an empty range is contained by an empty range. Both
					 * empty and non-empty ranges can be contained by a
					 * non-empty range.
					 */
					if (empty)
						which &= (1 << 1);
					break;

				case RANGESTRAT_CONTAINS_ELEM:
					which &= (1 << 2);
					break;

				case RANGESTRAT_EQ:
					if (empty)
						which &= (1 << 1);
					else
						which &= (1 << 2);
					break;

				default:
					elog(ERROR, "unrecognized range strategy: %d", strategy);
					break;
			}
			if (which == 0)
				break;			/* no need to consider remaining conditions */
		}
	}
	else
	{
		RangeBound	centroidLower,
					centroidUpper;
		bool		centroidEmpty;
		TypeCacheEntry *typcache;
		RangeType  *centroid;

		/* This node has a centroid. Fetch it. */
		centroid = DatumGetRangeType(in->prefixDatum);
		typcache = range_get_typcache(fcinfo,
							   RangeTypeGetOid(DatumGetRangeType(centroid)));
		range_deserialize(typcache, centroid, &centroidLower, &centroidUpper,
						  &centroidEmpty);

		Assert(in->nNodes == 4 || in->nNodes == 5);

		/*
		 * Nth bit of which variable means that (N - 1)th node (Nth quadrant)
		 * should be visited. Initially all bits are set. Bits of nodes which
		 * can be skipped will be unset.
		 */
		which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5);

		for (i = 0; i < in->nkeys; i++)
		{
			StrategyNumber strategy;
			RangeBound	lower,
						upper;
			bool		empty;
			RangeType  *range = NULL;
			/* Restrictions on range bounds according to scan strategy */
			RangeBound *minLower = NULL,
					   *maxLower = NULL,
					   *minUpper = NULL,
					   *maxUpper = NULL;
			/* Are the restrictions on range bounds inclusive? */
			bool		inclusive = true;
			bool		strictEmpty = true;

			strategy = in->scankeys[i].sk_strategy;

			/*
			 * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but
			 * the argument is a single element. Expand the single element to
			 * a range containing only the element, and treat it like
			 * RANGESTRAT_CONTAINS.
			 */
			if (strategy == RANGESTRAT_CONTAINS_ELEM)
			{
				lower.inclusive = true;
				lower.infinite = false;
				lower.lower = true;
				lower.val = in->scankeys[i].sk_argument;

				upper.inclusive = true;
				upper.infinite = false;
				upper.lower = false;
				upper.val = in->scankeys[i].sk_argument;

				empty = false;

				strategy = RANGESTRAT_CONTAINS;
			}
			else
			{
				range = DatumGetRangeType(in->scankeys[i].sk_argument);
				range_deserialize(typcache, range, &lower, &upper, &empty);
			}

			/*
			 * Most strategies are handled by forming a bounding box from the
			 * search key, defined by a minLower, maxLower, minUpper, maxUpper.
			 * Some modify 'which' directly, to specify exactly which quadrants
			 * need to be visited.
			 *
			 * For most strategies, nothing matches an empty search key, and
			 * an empty range never matches a non-empty key. If a strategy
			 * does not behave like that wrt. empty ranges, set strictEmpty to
			 * false.
			 */
			switch (strategy)
			{
				case RANGESTRAT_BEFORE:
					/*
					 * Range A is before range B if upper bound of A is lower
					 * than lower bound of B.
					 */
					maxUpper = &lower;
					inclusive = false;
					break;

				case RANGESTRAT_OVERLEFT:
					/*
					 * Range A is overleft to range B if upper bound of A is
					 * less or equal to upper bound of B.
					 */
					maxUpper = &upper;
					break;

				case RANGESTRAT_OVERLAPS:
					/*
					 * Non-empty ranges overlap, if lower bound of each range
					 * is lower or equal to upper bound of the other range.
					 */
					maxLower = &upper;
					minUpper = &lower;
					break;

				case RANGESTRAT_OVERRIGHT:
					/*
					 * Range A is overright to range B if lower bound of A is
					 * greater or equal to lower bound of B.
					 */
					minLower = &lower;
					break;

				case RANGESTRAT_AFTER:
					/*
					 * Range A is after range B if lower bound of A is greater
					 * than upper bound of B.
					 */
					minLower = &upper;
					inclusive = false;
					break;

				case RANGESTRAT_CONTAINS:
					/*
					 * Non-empty range A contains non-empty range B if lower
					 * bound of A is lower or equal to lower bound of range B
					 * and upper bound of range A is greater or equal to upper
					 * bound of range A.
					 *
					 * All non-empty ranges contain an empty range.
					 */
					strictEmpty = false;
					if (!empty)
					{
						which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
						maxLower = &lower;
						minUpper = &upper;
					}
					break;

				case RANGESTRAT_CONTAINED_BY:
					/* The opposite of contains. */
					strictEmpty = false;
					if (empty)
					{
						/* An empty range is only contained by an empty range */
						which &= (1 << 5);
					}
					else
					{
						minLower = &lower;
						maxUpper = &upper;
					}
					break;

				case RANGESTRAT_EQ:
					/*
					 * Equal range can be only in the same quadrant where
					 * argument would be placed to.
					 */
					strictEmpty = false;
					which &= (1 << getQuadrant(typcache, centroid, range));
					break;

				default:
					elog(ERROR, "unrecognized range strategy: %d", strategy);
					break;
			}

			if (strictEmpty)
			{
				if (empty)
				{
					/* Scan key is empty, no branches are satisfying */
					which = 0;
					break;
				}
				else
				{
					/* Shouldn't visit tree branch with empty ranges */
					which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
				}
			}

			/*
			 * Using the bounding box, see which quadrants we have to descend
			 * into.
			 */
			if (minLower)
			{
				/*
				 * If the centroid's lower bound is less than or equal to
				 * the minimum lower bound, anything in the 3rd and 4th
				 * quadrants will have an even smaller lower bound, and thus
				 * can't match.
				 */
				if (range_cmp_bounds(typcache, &centroidLower, minLower) <= 0)
					which &= (1 << 1) | (1 << 2) | (1 << 5);
			}
			if (maxLower)
			{
				/*
				 * If the centroid's lower bound is greater than the maximum
				 * lower bound, anything in the 1st and 2nd quadrants will
				 * also have a greater than or equal lower bound, and thus
				 * can't match. If the centroid's lower bound is equal to
				 * the maximum lower bound, we can still exclude the 1st and
				 * 2nd quadrants if we're looking for a value strictly greater
				 * than the maximum.
				 */
				int			cmp;

				cmp = range_cmp_bounds(typcache, &centroidLower, maxLower);
				if (cmp > 0 || (!inclusive && cmp == 0))
					which &= (1 << 3) | (1 << 4) | (1 << 5);
			}
			if (minUpper)
			{
				/*
				 * If the centroid's upper bound is less than or equal to
				 * the minimum upper bound, anything in the 2nd and 3rd
				 * quadrants will have an even smaller upper bound, and thus
				 * can't match.
				 */
				if (range_cmp_bounds(typcache, &centroidUpper, minUpper) <= 0)
					which &= (1 << 1) | (1 << 4) | (1 << 5);
			}
			if (maxUpper)
			{
				/*
				 * If the centroid's upper bound is greater than the maximum
				 * upper bound, anything in the 1st and 4th quadrants will
				 * also have a greater than or equal upper bound, and thus
				 * can't match. If the centroid's upper bound is equal to
				 * the maximum upper bound, we can still exclude the 1st and
				 * 4th quadrants if we're looking for a value strictly greater
				 * than the maximum.
				 */
				int			cmp;

				cmp = range_cmp_bounds(typcache, &centroidUpper, maxUpper);
				if (cmp > 0 || (!inclusive && cmp == 0))
					which &= (1 << 2) | (1 << 3) | (1 << 5);
			}

			if (which == 0)
				break;			/* no need to consider remaining conditions */
		}
	}

	/* We must descend into the quadrant(s) identified by 'which' */
	out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
	out->nNodes = 0;
	for (i = 1; i <= in->nNodes; i++)
	{
		if (which & (1 << i))
			out->nodeNumbers[out->nNodes++] = i - 1;
	}

	PG_RETURN_VOID();
}
Пример #4
0
/*
 * Picksplit SP-GiST function: split ranges into nodes. Select "centroid"
 * range and distribute ranges according to quadrants.
 */
Datum
spg_range_quad_picksplit(PG_FUNCTION_ARGS)
{
	spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0);
	spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1);
	int			i;
	int			j;
	int			nonEmptyCount;
	RangeType  *centroid;
	bool		empty;
	TypeCacheEntry *typcache;

	/* Use the median values of lower and upper bounds as the centroid range */
	RangeBound *lowerBounds,
			   *upperBounds;

	typcache = range_get_typcache(fcinfo,
						  RangeTypeGetOid(DatumGetRangeType(in->datums[0])));

	/* Allocate memory for bounds */
	lowerBounds = palloc(sizeof(RangeBound) * in->nTuples);
	upperBounds = palloc(sizeof(RangeBound) * in->nTuples);
	j = 0;

	/* Deserialize bounds of ranges, count non-empty ranges */
	for (i = 0; i < in->nTuples; i++)
	{
		range_deserialize(typcache, DatumGetRangeType(in->datums[i]),
						  &lowerBounds[j], &upperBounds[j], &empty);
		if (!empty)
			j++;
	}
	nonEmptyCount = j;

	/*
	 * All the ranges are empty. The best we can do is to construct an inner
	 * node with no centroid, and put all ranges into node 0. If non-empty
	 * ranges are added later, they will be routed to node 1.
	 */
	if (nonEmptyCount == 0)
	{
		out->nNodes = 2;
		out->hasPrefix = false;
		/* Prefix is empty */
		out->prefixDatum = PointerGetDatum(NULL);
		out->nodeLabels = NULL;

		out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
		out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);

		/* Place all ranges into node 0 */
		for (i = 0; i < in->nTuples; i++)
		{
			RangeType  *range = DatumGetRangeType(in->datums[i]);

			out->leafTupleDatums[i] = RangeTypeGetDatum(range);
			out->mapTuplesToNodes[i] = 0;
		}
		PG_RETURN_VOID();
	}

	/* Sort range bounds in order to find medians */
	qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound),
			  bound_cmp, typcache);
	qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound),
			  bound_cmp, typcache);

	/* Construct "centroid" range from medians of lower and upper bounds */
	centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2],
							   &upperBounds[nonEmptyCount / 2], false);
	out->hasPrefix = true;
	out->prefixDatum = RangeTypeGetDatum(centroid);

	/* Create node for empty ranges only if it is a root node */
	out->nNodes = (in->level == 0) ? 5 : 4;
	out->nodeLabels = NULL;		/* we don't need node labels */

	out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples);
	out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples);

	/*
	 * Assign ranges to corresponding nodes according to quadrants relative to
	 * "centroid" range.
	 */
	for (i = 0; i < in->nTuples; i++)
	{
		RangeType  *range = DatumGetRangeType(in->datums[i]);
		int16		quadrant = getQuadrant(typcache, centroid, range);

		out->leafTupleDatums[i] = RangeTypeGetDatum(range);
		out->mapTuplesToNodes[i] = quadrant - 1;
	}

	PG_RETURN_VOID();
}