/*---------- * Determine which quadrant a 2d-mapped range falls into, relative to the * centroid. * * Quadrants are numbered like this: * * 4 | 1 * ----+---- * 3 | 2 * * Where the lower bound of range is the horizontal axis and upper bound the * vertical axis. * * Ranges on one of the axes are taken to lie in the quadrant with higher value * along perpendicular axis. That is, a value on the horizontal axis is taken * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in * quadrant 1. * * Empty ranges are taken to lie in the special quadrant 5. *---------- */ static int16 getQuadrant(TypeCacheEntry *typcache, RangeType *centroid, RangeType *tst) { RangeBound centroidLower, centroidUpper; bool centroidEmpty; RangeBound lower, upper; bool empty; range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, ¢roidEmpty); range_deserialize(typcache, tst, &lower, &upper, &empty); if (empty) return 5; if (range_cmp_bounds(typcache, &lower, ¢roidLower) >= 0) { if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) return 1; else return 2; } else { if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) return 4; else return 3; } }
/* * Calculate range operator selectivity using histograms of range bounds. * * This estimate is for the portion of values that are not empty and not * NULL. */ static double calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, RangeType *constval, Oid operator) { Datum *hist_values; int nhist; Datum *length_hist_values; int length_nhist; RangeBound *hist_lower; RangeBound *hist_upper; int i; RangeBound const_lower; RangeBound const_upper; bool empty; double hist_selec; /* Try to get histogram of ranges */ if (!(HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid, NULL, &hist_values, &nhist, NULL, NULL))) return -1.0; /* * Convert histogram of ranges into histograms of its lower and upper * bounds. */ hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist); hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist); for (i = 0; i < nhist; i++) { range_deserialize(typcache, DatumGetRangeType(hist_values[i]), &hist_lower[i], &hist_upper[i], &empty); /* The histogram should not contain any empty ranges */ if (empty) elog(ERROR, "bounds histogram contains an empty range"); } /* @> and @< also need a histogram of range lengths */ if (operator == OID_RANGE_CONTAINS_OP || operator == OID_RANGE_CONTAINED_OP) { if (!(HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, InvalidOid, NULL, &length_hist_values, &length_nhist, NULL, NULL))) return -1.0; /* check that it's a histogram, not just a dummy entry */ if (length_nhist < 2) return -1.0; } /* Extract the bounds of the constant value. */ range_deserialize(typcache, constval, &const_lower, &const_upper, &empty); Assert(!empty); /* * Calculate selectivity comparing the lower or upper bound of the * constant with the histogram of lower or upper bounds. */ switch (operator) { case OID_RANGE_LESS_OP: /* * The regular b-tree comparison operators (<, <=, >, >=) compare * the lower bounds first, and the upper bounds for values with * equal lower bounds. Estimate that by comparing the lower bounds * only. This gives a fairly accurate estimate assuming there * aren't many rows with a lower bound equal to the constant's * lower bound. */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); break; case OID_RANGE_LESS_EQUAL_OP: hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, true); break; case OID_RANGE_GREATER_OP: hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); break; case OID_RANGE_GREATER_EQUAL_OP: hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, true); break; case OID_RANGE_LEFT_OP: /* var << const when upper(var) < lower(const) */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper, nhist, false); break; case OID_RANGE_RIGHT_OP: /* var >> const when lower(var) > upper(const) */ hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower, nhist, true); break; case OID_RANGE_OVERLAPS_RIGHT_OP: /* compare lower bounds */ hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); break; case OID_RANGE_OVERLAPS_LEFT_OP: /* compare upper bounds */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_upper, hist_upper, nhist, true); break; case OID_RANGE_OVERLAP_OP: case OID_RANGE_CONTAINS_ELEM_OP: /* * A && B <=> NOT (A << B OR A >> B). * * Since A << B and A >> B are mutually exclusive events we can * sum their probabilities to find probability of (A << B OR A >> * B). * * "range @> elem" is equivalent to "range && [elem,elem]". The * caller already constructed the singular range from the element * constant, so just treat it the same as &&. */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper, nhist, false); hist_selec += (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower, nhist, true)); hist_selec = 1.0 - hist_selec; break; case OID_RANGE_CONTAINS_OP: hist_selec = calc_hist_selectivity_contains(typcache, &const_lower, &const_upper, hist_lower, nhist, length_hist_values, length_nhist); break; case OID_RANGE_CONTAINED_OP: if (const_lower.infinite) { /* * Lower bound no longer matters. Just estimate the fraction * with an upper bound <= const upper bound */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_upper, hist_upper, nhist, true); } else if (const_upper.infinite) { hist_selec = 1.0 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); } else { hist_selec = calc_hist_selectivity_contained(typcache, &const_lower, &const_upper, hist_lower, nhist, length_hist_values, length_nhist); } break; default: elog(ERROR, "unknown range operator %u", operator); hist_selec = -1.0; /* keep compiler quiet */ break; } return hist_selec; }
/* * SP-GiST consistent function for inner nodes: check which nodes are * consistent with given set of queries. */ Datum spg_range_quad_inner_consistent(PG_FUNCTION_ARGS) { spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); int which; int i; if (in->allTheSame) { /* Report that all nodes should be visited */ out->nNodes = in->nNodes; out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); for (i = 0; i < in->nNodes; i++) out->nodeNumbers[i] = i; PG_RETURN_VOID(); } if (!in->hasPrefix) { /* * No centroid on this inner node. Such a node has two child nodes, * the first for empty ranges, and the second for non-empty ones. */ Assert(in->nNodes == 2); /* * Nth bit of which variable means that (N - 1)th node should be * visited. Initially all bits are set. Bits of nodes which should be * skipped will be unset. */ which = (1 << 1) | (1 << 2); for (i = 0; i < in->nkeys; i++) { StrategyNumber strategy = in->scankeys[i].sk_strategy; bool empty; /* * The only strategy when second argument of operator is not range * is RANGESTRAT_CONTAINS_ELEM. */ if (strategy != RANGESTRAT_CONTAINS_ELEM) empty = RangeIsEmpty( DatumGetRangeType(in->scankeys[i].sk_argument)); else empty = false; switch (strategy) { case RANGESTRAT_BEFORE: case RANGESTRAT_OVERLEFT: case RANGESTRAT_OVERLAPS: case RANGESTRAT_OVERRIGHT: case RANGESTRAT_AFTER: /* These strategies return false if any argument is empty */ if (empty) which = 0; else which &= (1 << 2); break; case RANGESTRAT_CONTAINS: /* * All ranges contain an empty range. Only non-empty ranges * can contain a non-empty range. */ if (!empty) which &= (1 << 2); break; case RANGESTRAT_CONTAINED_BY: /* * Only an empty range is contained by an empty range. Both * empty and non-empty ranges can be contained by a * non-empty range. */ if (empty) which &= (1 << 1); break; case RANGESTRAT_CONTAINS_ELEM: which &= (1 << 2); break; case RANGESTRAT_EQ: if (empty) which &= (1 << 1); else which &= (1 << 2); break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); break; } if (which == 0) break; /* no need to consider remaining conditions */ } } else { RangeBound centroidLower, centroidUpper; bool centroidEmpty; TypeCacheEntry *typcache; RangeType *centroid; /* This node has a centroid. Fetch it. */ centroid = DatumGetRangeType(in->prefixDatum); typcache = range_get_typcache(fcinfo, RangeTypeGetOid(DatumGetRangeType(centroid))); range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, ¢roidEmpty); Assert(in->nNodes == 4 || in->nNodes == 5); /* * Nth bit of which variable means that (N - 1)th node (Nth quadrant) * should be visited. Initially all bits are set. Bits of nodes which * can be skipped will be unset. */ which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5); for (i = 0; i < in->nkeys; i++) { StrategyNumber strategy; RangeBound lower, upper; bool empty; RangeType *range = NULL; /* Restrictions on range bounds according to scan strategy */ RangeBound *minLower = NULL, *maxLower = NULL, *minUpper = NULL, *maxUpper = NULL; /* Are the restrictions on range bounds inclusive? */ bool inclusive = true; bool strictEmpty = true; strategy = in->scankeys[i].sk_strategy; /* * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but * the argument is a single element. Expand the single element to * a range containing only the element, and treat it like * RANGESTRAT_CONTAINS. */ if (strategy == RANGESTRAT_CONTAINS_ELEM) { lower.inclusive = true; lower.infinite = false; lower.lower = true; lower.val = in->scankeys[i].sk_argument; upper.inclusive = true; upper.infinite = false; upper.lower = false; upper.val = in->scankeys[i].sk_argument; empty = false; strategy = RANGESTRAT_CONTAINS; } else { range = DatumGetRangeType(in->scankeys[i].sk_argument); range_deserialize(typcache, range, &lower, &upper, &empty); } /* * Most strategies are handled by forming a bounding box from the * search key, defined by a minLower, maxLower, minUpper, maxUpper. * Some modify 'which' directly, to specify exactly which quadrants * need to be visited. * * For most strategies, nothing matches an empty search key, and * an empty range never matches a non-empty key. If a strategy * does not behave like that wrt. empty ranges, set strictEmpty to * false. */ switch (strategy) { case RANGESTRAT_BEFORE: /* * Range A is before range B if upper bound of A is lower * than lower bound of B. */ maxUpper = &lower; inclusive = false; break; case RANGESTRAT_OVERLEFT: /* * Range A is overleft to range B if upper bound of A is * less or equal to upper bound of B. */ maxUpper = &upper; break; case RANGESTRAT_OVERLAPS: /* * Non-empty ranges overlap, if lower bound of each range * is lower or equal to upper bound of the other range. */ maxLower = &upper; minUpper = &lower; break; case RANGESTRAT_OVERRIGHT: /* * Range A is overright to range B if lower bound of A is * greater or equal to lower bound of B. */ minLower = &lower; break; case RANGESTRAT_AFTER: /* * Range A is after range B if lower bound of A is greater * than upper bound of B. */ minLower = &upper; inclusive = false; break; case RANGESTRAT_CONTAINS: /* * Non-empty range A contains non-empty range B if lower * bound of A is lower or equal to lower bound of range B * and upper bound of range A is greater or equal to upper * bound of range A. * * All non-empty ranges contain an empty range. */ strictEmpty = false; if (!empty) { which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); maxLower = &lower; minUpper = &upper; } break; case RANGESTRAT_CONTAINED_BY: /* The opposite of contains. */ strictEmpty = false; if (empty) { /* An empty range is only contained by an empty range */ which &= (1 << 5); } else { minLower = &lower; maxUpper = &upper; } break; case RANGESTRAT_EQ: /* * Equal range can be only in the same quadrant where * argument would be placed to. */ strictEmpty = false; which &= (1 << getQuadrant(typcache, centroid, range)); break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); break; } if (strictEmpty) { if (empty) { /* Scan key is empty, no branches are satisfying */ which = 0; break; } else { /* Shouldn't visit tree branch with empty ranges */ which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); } } /* * Using the bounding box, see which quadrants we have to descend * into. */ if (minLower) { /* * If the centroid's lower bound is less than or equal to * the minimum lower bound, anything in the 3rd and 4th * quadrants will have an even smaller lower bound, and thus * can't match. */ if (range_cmp_bounds(typcache, ¢roidLower, minLower) <= 0) which &= (1 << 1) | (1 << 2) | (1 << 5); } if (maxLower) { /* * If the centroid's lower bound is greater than the maximum * lower bound, anything in the 1st and 2nd quadrants will * also have a greater than or equal lower bound, and thus * can't match. If the centroid's lower bound is equal to * the maximum lower bound, we can still exclude the 1st and * 2nd quadrants if we're looking for a value strictly greater * than the maximum. */ int cmp; cmp = range_cmp_bounds(typcache, ¢roidLower, maxLower); if (cmp > 0 || (!inclusive && cmp == 0)) which &= (1 << 3) | (1 << 4) | (1 << 5); } if (minUpper) { /* * If the centroid's upper bound is less than or equal to * the minimum upper bound, anything in the 2nd and 3rd * quadrants will have an even smaller upper bound, and thus * can't match. */ if (range_cmp_bounds(typcache, ¢roidUpper, minUpper) <= 0) which &= (1 << 1) | (1 << 4) | (1 << 5); } if (maxUpper) { /* * If the centroid's upper bound is greater than the maximum * upper bound, anything in the 1st and 4th quadrants will * also have a greater than or equal upper bound, and thus * can't match. If the centroid's upper bound is equal to * the maximum upper bound, we can still exclude the 1st and * 4th quadrants if we're looking for a value strictly greater * than the maximum. */ int cmp; cmp = range_cmp_bounds(typcache, ¢roidUpper, maxUpper); if (cmp > 0 || (!inclusive && cmp == 0)) which &= (1 << 2) | (1 << 3) | (1 << 5); } if (which == 0) break; /* no need to consider remaining conditions */ } } /* We must descend into the quadrant(s) identified by 'which' */ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); out->nNodes = 0; for (i = 1; i <= in->nNodes; i++) { if (which & (1 << i)) out->nodeNumbers[out->nNodes++] = i - 1; } PG_RETURN_VOID(); }
/* * Picksplit SP-GiST function: split ranges into nodes. Select "centroid" * range and distribute ranges according to quadrants. */ Datum spg_range_quad_picksplit(PG_FUNCTION_ARGS) { spgPickSplitIn *in = (spgPickSplitIn *) PG_GETARG_POINTER(0); spgPickSplitOut *out = (spgPickSplitOut *) PG_GETARG_POINTER(1); int i; int j; int nonEmptyCount; RangeType *centroid; bool empty; TypeCacheEntry *typcache; /* Use the median values of lower and upper bounds as the centroid range */ RangeBound *lowerBounds, *upperBounds; typcache = range_get_typcache(fcinfo, RangeTypeGetOid(DatumGetRangeType(in->datums[0]))); /* Allocate memory for bounds */ lowerBounds = palloc(sizeof(RangeBound) * in->nTuples); upperBounds = palloc(sizeof(RangeBound) * in->nTuples); j = 0; /* Deserialize bounds of ranges, count non-empty ranges */ for (i = 0; i < in->nTuples; i++) { range_deserialize(typcache, DatumGetRangeType(in->datums[i]), &lowerBounds[j], &upperBounds[j], &empty); if (!empty) j++; } nonEmptyCount = j; /* * All the ranges are empty. The best we can do is to construct an inner * node with no centroid, and put all ranges into node 0. If non-empty * ranges are added later, they will be routed to node 1. */ if (nonEmptyCount == 0) { out->nNodes = 2; out->hasPrefix = false; /* Prefix is empty */ out->prefixDatum = PointerGetDatum(NULL); out->nodeLabels = NULL; out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); /* Place all ranges into node 0 */ for (i = 0; i < in->nTuples; i++) { RangeType *range = DatumGetRangeType(in->datums[i]); out->leafTupleDatums[i] = RangeTypeGetDatum(range); out->mapTuplesToNodes[i] = 0; } PG_RETURN_VOID(); } /* Sort range bounds in order to find medians */ qsort_arg(lowerBounds, nonEmptyCount, sizeof(RangeBound), bound_cmp, typcache); qsort_arg(upperBounds, nonEmptyCount, sizeof(RangeBound), bound_cmp, typcache); /* Construct "centroid" range from medians of lower and upper bounds */ centroid = range_serialize(typcache, &lowerBounds[nonEmptyCount / 2], &upperBounds[nonEmptyCount / 2], false); out->hasPrefix = true; out->prefixDatum = RangeTypeGetDatum(centroid); /* Create node for empty ranges only if it is a root node */ out->nNodes = (in->level == 0) ? 5 : 4; out->nodeLabels = NULL; /* we don't need node labels */ out->mapTuplesToNodes = palloc(sizeof(int) * in->nTuples); out->leafTupleDatums = palloc(sizeof(Datum) * in->nTuples); /* * Assign ranges to corresponding nodes according to quadrants relative to * "centroid" range. */ for (i = 0; i < in->nTuples; i++) { RangeType *range = DatumGetRangeType(in->datums[i]); int16 quadrant = getQuadrant(typcache, centroid, range); out->leafTupleDatums[i] = RangeTypeGetDatum(range); out->mapTuplesToNodes[i] = quadrant - 1; } PG_RETURN_VOID(); }