示例#1
0
/*
 * Choose SP-GiST function: choose path for addition of new range.
 */
Datum
spg_range_quad_choose(PG_FUNCTION_ARGS)
{
	spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0);
	spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1);
	RangeType  *inRange = DatumGetRangeType(in->datum),
			   *centroid;
	int16		quadrant;
	TypeCacheEntry *typcache;

	if (in->allTheSame)
	{
		out->resultType = spgMatchNode;
		/* nodeN will be set by core */
		out->result.matchNode.levelAdd = 0;
		out->result.matchNode.restDatum = RangeTypeGetDatum(inRange);
		PG_RETURN_VOID();
	}

	typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange));

	/*
	 * A node with no centroid divides ranges purely on whether they're empty
	 * or not. All empty ranges go to child node 0, all non-empty ranges go
	 * to node 1.
	 */
	if (!in->hasPrefix)
	{
		out->resultType = spgMatchNode;
		if (RangeIsEmpty(inRange))
			out->result.matchNode.nodeN = 0;
		else
			out->result.matchNode.nodeN = 1;
		out->result.matchNode.levelAdd = 1;
		out->result.matchNode.restDatum = RangeTypeGetDatum(inRange);
		PG_RETURN_VOID();
	}

	centroid = DatumGetRangeType(in->prefixDatum);
	quadrant = getQuadrant(typcache, centroid, inRange);

	Assert(quadrant <= in->nNodes);

	/* Select node matching to quadrant number */
	out->resultType = spgMatchNode;
	out->result.matchNode.nodeN = quadrant - 1;
	out->result.matchNode.levelAdd = 1;
	out->result.matchNode.restDatum = RangeTypeGetDatum(inRange);

	PG_RETURN_VOID();
}
static double
calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
			  RangeType *constval, Oid operator)
{
	double		hist_selec;
	double		selec;
	float4		empty_frac,
				null_frac;

	/*
	 * First look up the fraction of NULLs and empty ranges from pg_statistic.
	 */
	if (HeapTupleIsValid(vardata->statsTuple))
	{
		Form_pg_statistic stats;
		float4	   *numbers;
		int			nnumbers;

		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
		null_frac = stats->stanullfrac;

		/* Try to get fraction of empty ranges */
		if (get_attstatsslot(vardata->statsTuple,
							 vardata->atttype, vardata->atttypmod,
						   STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, InvalidOid,
							 NULL,
							 NULL, NULL,
							 &numbers, &nnumbers))
		{
			if (nnumbers != 1)
				elog(ERROR, "invalid empty fraction statistic");		/* shouldn't happen */
			empty_frac = numbers[0];
		}
		else
		{
			/* No empty fraction statistic. Assume no empty ranges. */
			empty_frac = 0.0;
		}
	}
	else
	{
		/*
		 * No stats are available. Follow through the calculations below
		 * anyway, assuming no NULLs and no empty ranges. This still allows us
		 * to give a better-than-nothing estimate based on whether the
		 * constant is an empty range or not.
		 */
		null_frac = 0.0;
		empty_frac = 0.0;
	}

	if (RangeIsEmpty(constval))
	{
		/*
		 * An empty range matches all ranges, all empty ranges, or nothing,
		 * depending on the operator
		 */
		switch (operator)
		{
				/* these return false if either argument is empty */
			case OID_RANGE_OVERLAP_OP:
			case OID_RANGE_OVERLAPS_LEFT_OP:
			case OID_RANGE_OVERLAPS_RIGHT_OP:
			case OID_RANGE_LEFT_OP:
			case OID_RANGE_RIGHT_OP:
				/* nothing is less than an empty range */
			case OID_RANGE_LESS_OP:
				selec = 0.0;
				break;

				/* only empty ranges can be contained by an empty range */
			case OID_RANGE_CONTAINED_OP:
				/* only empty ranges are <= an empty range */
			case OID_RANGE_LESS_EQUAL_OP:
				selec = empty_frac;
				break;

				/* everything contains an empty range */
			case OID_RANGE_CONTAINS_OP:
				/* everything is >= an empty range */
			case OID_RANGE_GREATER_EQUAL_OP:
				selec = 1.0;
				break;

				/* all non-empty ranges are > an empty range */
			case OID_RANGE_GREATER_OP:
				selec = 1.0 - empty_frac;
				break;

				/* an element cannot be empty */
			case OID_RANGE_CONTAINS_ELEM_OP:
			default:
				elog(ERROR, "unexpected operator %u", operator);
				selec = 0.0;	/* keep compiler quiet */
				break;
		}
	}
	else
	{
		/*
		 * Calculate selectivity using bound histograms. If that fails for
		 * some reason, e.g no histogram in pg_statistic, use the default
		 * constant estimate for the fraction of non-empty values. This is
		 * still somewhat better than just returning the default estimate,
		 * because this still takes into account the fraction of empty and
		 * NULL tuples, if we had statistics for them.
		 */
		hist_selec = calc_hist_selectivity(typcache, vardata, constval,
										   operator);
		if (hist_selec < 0.0)
			hist_selec = default_range_selectivity(operator);

		/*
		 * Now merge the results for the empty ranges and histogram
		 * calculations, realizing that the histogram covers only the
		 * non-null, non-empty values.
		 */
		if (operator == OID_RANGE_CONTAINED_OP)
		{
			/* empty is contained by anything non-empty */
			selec = (1.0 - empty_frac) * hist_selec + empty_frac;
		}
		else
		{
			/* with any other operator, empty Op non-empty matches nothing */
			selec = (1.0 - empty_frac) * hist_selec;
		}
	}

	/* all range operators are strict */
	selec *= (1.0 - null_frac);

	/* result should be in range, but make sure... */
	CLAMP_PROBABILITY(selec);

	return selec;
}
示例#3
0
/*
 * SP-GiST consistent function for inner nodes: check which nodes are
 * consistent with given set of queries.
 */
Datum
spg_range_quad_inner_consistent(PG_FUNCTION_ARGS)
{
	spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0);
	spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1);
	int			which;
	int			i;

	if (in->allTheSame)
	{
		/* Report that all nodes should be visited */
		out->nNodes = in->nNodes;
		out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
		for (i = 0; i < in->nNodes; i++)
			out->nodeNumbers[i] = i;
		PG_RETURN_VOID();
	}

	if (!in->hasPrefix)
	{
		/*
		 * No centroid on this inner node. Such a node has two child nodes,
		 * the first for empty ranges, and the second for non-empty ones.
		 */
		Assert(in->nNodes == 2);

		/*
		 * Nth bit of which variable means that (N - 1)th node should be
		 * visited. Initially all bits are set. Bits of nodes which should be
		 * skipped will be unset.
		 */
		which = (1 << 1) | (1 << 2);
		for (i = 0; i < in->nkeys; i++)
		{
			StrategyNumber strategy = in->scankeys[i].sk_strategy;
			bool		empty;

			/*
			 * The only strategy when second argument of operator is not range
			 * is RANGESTRAT_CONTAINS_ELEM.
			 */
			if (strategy != RANGESTRAT_CONTAINS_ELEM)
				empty = RangeIsEmpty(
							 DatumGetRangeType(in->scankeys[i].sk_argument));
			else
				empty = false;

			switch (strategy)
			{
				case RANGESTRAT_BEFORE:
				case RANGESTRAT_OVERLEFT:
				case RANGESTRAT_OVERLAPS:
				case RANGESTRAT_OVERRIGHT:
				case RANGESTRAT_AFTER:
					/* These strategies return false if any argument is empty */
					if (empty)
						which = 0;
					else
						which &= (1 << 2);
					break;

				case RANGESTRAT_CONTAINS:
					/*
					 * All ranges contain an empty range. Only non-empty ranges
					 * can contain a non-empty range.
					 */
					if (!empty)
						which &= (1 << 2);
					break;

				case RANGESTRAT_CONTAINED_BY:
					/*
					 * Only an empty range is contained by an empty range. Both
					 * empty and non-empty ranges can be contained by a
					 * non-empty range.
					 */
					if (empty)
						which &= (1 << 1);
					break;

				case RANGESTRAT_CONTAINS_ELEM:
					which &= (1 << 2);
					break;

				case RANGESTRAT_EQ:
					if (empty)
						which &= (1 << 1);
					else
						which &= (1 << 2);
					break;

				default:
					elog(ERROR, "unrecognized range strategy: %d", strategy);
					break;
			}
			if (which == 0)
				break;			/* no need to consider remaining conditions */
		}
	}
	else
	{
		RangeBound	centroidLower,
					centroidUpper;
		bool		centroidEmpty;
		TypeCacheEntry *typcache;
		RangeType  *centroid;

		/* This node has a centroid. Fetch it. */
		centroid = DatumGetRangeType(in->prefixDatum);
		typcache = range_get_typcache(fcinfo,
							   RangeTypeGetOid(DatumGetRangeType(centroid)));
		range_deserialize(typcache, centroid, &centroidLower, &centroidUpper,
						  &centroidEmpty);

		Assert(in->nNodes == 4 || in->nNodes == 5);

		/*
		 * Nth bit of which variable means that (N - 1)th node (Nth quadrant)
		 * should be visited. Initially all bits are set. Bits of nodes which
		 * can be skipped will be unset.
		 */
		which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5);

		for (i = 0; i < in->nkeys; i++)
		{
			StrategyNumber strategy;
			RangeBound	lower,
						upper;
			bool		empty;
			RangeType  *range = NULL;
			/* Restrictions on range bounds according to scan strategy */
			RangeBound *minLower = NULL,
					   *maxLower = NULL,
					   *minUpper = NULL,
					   *maxUpper = NULL;
			/* Are the restrictions on range bounds inclusive? */
			bool		inclusive = true;
			bool		strictEmpty = true;

			strategy = in->scankeys[i].sk_strategy;

			/*
			 * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but
			 * the argument is a single element. Expand the single element to
			 * a range containing only the element, and treat it like
			 * RANGESTRAT_CONTAINS.
			 */
			if (strategy == RANGESTRAT_CONTAINS_ELEM)
			{
				lower.inclusive = true;
				lower.infinite = false;
				lower.lower = true;
				lower.val = in->scankeys[i].sk_argument;

				upper.inclusive = true;
				upper.infinite = false;
				upper.lower = false;
				upper.val = in->scankeys[i].sk_argument;

				empty = false;

				strategy = RANGESTRAT_CONTAINS;
			}
			else
			{
				range = DatumGetRangeType(in->scankeys[i].sk_argument);
				range_deserialize(typcache, range, &lower, &upper, &empty);
			}

			/*
			 * Most strategies are handled by forming a bounding box from the
			 * search key, defined by a minLower, maxLower, minUpper, maxUpper.
			 * Some modify 'which' directly, to specify exactly which quadrants
			 * need to be visited.
			 *
			 * For most strategies, nothing matches an empty search key, and
			 * an empty range never matches a non-empty key. If a strategy
			 * does not behave like that wrt. empty ranges, set strictEmpty to
			 * false.
			 */
			switch (strategy)
			{
				case RANGESTRAT_BEFORE:
					/*
					 * Range A is before range B if upper bound of A is lower
					 * than lower bound of B.
					 */
					maxUpper = &lower;
					inclusive = false;
					break;

				case RANGESTRAT_OVERLEFT:
					/*
					 * Range A is overleft to range B if upper bound of A is
					 * less or equal to upper bound of B.
					 */
					maxUpper = &upper;
					break;

				case RANGESTRAT_OVERLAPS:
					/*
					 * Non-empty ranges overlap, if lower bound of each range
					 * is lower or equal to upper bound of the other range.
					 */
					maxLower = &upper;
					minUpper = &lower;
					break;

				case RANGESTRAT_OVERRIGHT:
					/*
					 * Range A is overright to range B if lower bound of A is
					 * greater or equal to lower bound of B.
					 */
					minLower = &lower;
					break;

				case RANGESTRAT_AFTER:
					/*
					 * Range A is after range B if lower bound of A is greater
					 * than upper bound of B.
					 */
					minLower = &upper;
					inclusive = false;
					break;

				case RANGESTRAT_CONTAINS:
					/*
					 * Non-empty range A contains non-empty range B if lower
					 * bound of A is lower or equal to lower bound of range B
					 * and upper bound of range A is greater or equal to upper
					 * bound of range A.
					 *
					 * All non-empty ranges contain an empty range.
					 */
					strictEmpty = false;
					if (!empty)
					{
						which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
						maxLower = &lower;
						minUpper = &upper;
					}
					break;

				case RANGESTRAT_CONTAINED_BY:
					/* The opposite of contains. */
					strictEmpty = false;
					if (empty)
					{
						/* An empty range is only contained by an empty range */
						which &= (1 << 5);
					}
					else
					{
						minLower = &lower;
						maxUpper = &upper;
					}
					break;

				case RANGESTRAT_EQ:
					/*
					 * Equal range can be only in the same quadrant where
					 * argument would be placed to.
					 */
					strictEmpty = false;
					which &= (1 << getQuadrant(typcache, centroid, range));
					break;

				default:
					elog(ERROR, "unrecognized range strategy: %d", strategy);
					break;
			}

			if (strictEmpty)
			{
				if (empty)
				{
					/* Scan key is empty, no branches are satisfying */
					which = 0;
					break;
				}
				else
				{
					/* Shouldn't visit tree branch with empty ranges */
					which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4);
				}
			}

			/*
			 * Using the bounding box, see which quadrants we have to descend
			 * into.
			 */
			if (minLower)
			{
				/*
				 * If the centroid's lower bound is less than or equal to
				 * the minimum lower bound, anything in the 3rd and 4th
				 * quadrants will have an even smaller lower bound, and thus
				 * can't match.
				 */
				if (range_cmp_bounds(typcache, &centroidLower, minLower) <= 0)
					which &= (1 << 1) | (1 << 2) | (1 << 5);
			}
			if (maxLower)
			{
				/*
				 * If the centroid's lower bound is greater than the maximum
				 * lower bound, anything in the 1st and 2nd quadrants will
				 * also have a greater than or equal lower bound, and thus
				 * can't match. If the centroid's lower bound is equal to
				 * the maximum lower bound, we can still exclude the 1st and
				 * 2nd quadrants if we're looking for a value strictly greater
				 * than the maximum.
				 */
				int			cmp;

				cmp = range_cmp_bounds(typcache, &centroidLower, maxLower);
				if (cmp > 0 || (!inclusive && cmp == 0))
					which &= (1 << 3) | (1 << 4) | (1 << 5);
			}
			if (minUpper)
			{
				/*
				 * If the centroid's upper bound is less than or equal to
				 * the minimum upper bound, anything in the 2nd and 3rd
				 * quadrants will have an even smaller upper bound, and thus
				 * can't match.
				 */
				if (range_cmp_bounds(typcache, &centroidUpper, minUpper) <= 0)
					which &= (1 << 1) | (1 << 4) | (1 << 5);
			}
			if (maxUpper)
			{
				/*
				 * If the centroid's upper bound is greater than the maximum
				 * upper bound, anything in the 1st and 4th quadrants will
				 * also have a greater than or equal upper bound, and thus
				 * can't match. If the centroid's upper bound is equal to
				 * the maximum upper bound, we can still exclude the 1st and
				 * 4th quadrants if we're looking for a value strictly greater
				 * than the maximum.
				 */
				int			cmp;

				cmp = range_cmp_bounds(typcache, &centroidUpper, maxUpper);
				if (cmp > 0 || (!inclusive && cmp == 0))
					which &= (1 << 2) | (1 << 3) | (1 << 5);
			}

			if (which == 0)
				break;			/* no need to consider remaining conditions */
		}
	}

	/* We must descend into the quadrant(s) identified by 'which' */
	out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes);
	out->nNodes = 0;
	for (i = 1; i <= in->nNodes; i++)
	{
		if (which & (1 << i))
			out->nodeNumbers[out->nNodes++] = i - 1;
	}

	PG_RETURN_VOID();
}