/*---------- * Determine which quadrant a 2d-mapped range falls into, relative to the * centroid. * * Quadrants are numbered like this: * * 4 | 1 * ----+---- * 3 | 2 * * Where the lower bound of range is the horizontal axis and upper bound the * vertical axis. * * Ranges on one of the axes are taken to lie in the quadrant with higher value * along perpendicular axis. That is, a value on the horizontal axis is taken * to belong to quadrant 1 or 4, and a value on the vertical axis is taken to * belong to quadrant 1 or 2. A range equal to centroid is taken to lie in * quadrant 1. * * Empty ranges are taken to lie in the special quadrant 5. *---------- */ static int16 getQuadrant(TypeCacheEntry *typcache, RangeType *centroid, RangeType *tst) { RangeBound centroidLower, centroidUpper; bool centroidEmpty; RangeBound lower, upper; bool empty; range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, ¢roidEmpty); range_deserialize(typcache, tst, &lower, &upper, &empty); if (empty) return 5; if (range_cmp_bounds(typcache, &lower, ¢roidLower) >= 0) { if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) return 1; else return 2; } else { if (range_cmp_bounds(typcache, &upper, ¢roidUpper) >= 0) return 4; else return 3; } }
/*---------- * adjacent_inner_consistent * * Like adjacent_cmp_bounds, but also takes into account the previous * level's centroid. We might've traversed left (or right) at the previous * node, in search for ranges adjacent to the other bound, even though we * already ruled out the possibility for any matches in that direction for * this bound. By comparing the argument with the previous centroid, and * the previous centroid with the current centroid, we can determine which * direction we should've moved in at previous level, and which direction we * actually moved. * * If there can be any matches to the left, returns -1. If to the right, * returns 1. If there can be no matches below this centroid, because we * already ruled them out at the previous level, returns 0. * * XXX: Comparing just the previous and current level isn't foolproof; we * might still search some branches unnecessarily. For example, imagine that * we are searching for value 15, and we traverse the following centroids * (only considering one bound for the moment): * * Level 1: 20 * Level 2: 50 * Level 3: 25 * * At this point, previous centroid is 50, current centroid is 25, and the * target value is to the left. But because we already moved right from * centroid 20 to 50 in the first level, there cannot be any values < 20 in * the current branch. But we don't know that just by looking at the previous * and current centroid, so we traverse left, unnecessarily. The reason we are * down this branch is that we're searching for matches with the *other* * bound. If we kept track of which bound we are searching for explicitly, * instead of deducing that from the previous and current centroid, we could * avoid some unnecessary work. *---------- */ static int adjacent_inner_consistent(TypeCacheEntry *typcache, RangeBound *arg, RangeBound *centroid, RangeBound *prev) { if (prev) { int prevcmp; int cmp; /* * Which direction were we supposed to traverse at previous level, * left or right? */ prevcmp = adjacent_cmp_bounds(typcache, arg, prev); /* and which direction did we actually go? */ cmp = range_cmp_bounds(typcache, centroid, prev); /* if the two don't agree, there's nothing to see here */ if ((prevcmp < 0 && cmp >= 0) || (prevcmp > 0 && cmp < 0)) return 0; } return adjacent_cmp_bounds(typcache, arg, centroid); }
/* * Bound comparison for sorting. */ static int bound_cmp(const void *a, const void *b, void *arg) { RangeBound *ba = (RangeBound *) a; RangeBound *bb = (RangeBound *) b; TypeCacheEntry *typcache = (TypeCacheEntry *) arg; return range_cmp_bounds(typcache, ba, bb); }
/* * Binary search on an array of range bounds. Returns greatest index of range * bound in array which is less(less or equal) than given range bound. If all * range bounds in array are greater or equal(greater) than given range bound, * return -1. When "equal" flag is set conditions in brackets are used. * * This function is used in scalar operator selectivity estimation. Another * goal of this function is to find a histogram bin where to stop * interpolation of portion of bounds which are less or equal to given bound. */ static int rbound_bsearch(TypeCacheEntry *typcache, RangeBound *value, RangeBound *hist, int hist_length, bool equal) { int lower = -1, upper = hist_length - 1, cmp, middle; while (lower < upper) { middle = (lower + upper + 1) / 2; cmp = range_cmp_bounds(typcache, &hist[middle], value); if (cmp < 0 || (equal && cmp == 0)) lower = middle; else upper = middle - 1; } return lower; }
/* * Calculate selectivity of "var <@ const" operator, ie. estimate the fraction * of ranges that fall within the constant lower and upper bounds. This uses * the histograms of range lower bounds and range lengths, on the assumption * that the range lengths are independent of the lower bounds. * * The caller has already checked that constant lower and upper bounds are * finite. */ static double calc_hist_selectivity_contained(TypeCacheEntry *typcache, RangeBound *lower, RangeBound *upper, RangeBound *hist_lower, int hist_nvalues, Datum *length_hist_values, int length_hist_nvalues) { int i, upper_index; float8 prev_dist; double bin_width; double upper_bin_width; double sum_frac; /* * Begin by finding the bin containing the upper bound, in the lower bound * histogram. Any range with a lower bound > constant upper bound can't * match, ie. there are no matches in bins greater than upper_index. */ upper->inclusive = !upper->inclusive; upper->lower = true; upper_index = rbound_bsearch(typcache, upper, hist_lower, hist_nvalues, false); /* * Calculate upper_bin_width, ie. the fraction of the (upper_index, * upper_index + 1) bin which is greater than upper bound of query range * using linear interpolation of subdiff function. */ if (upper_index >= 0 && upper_index < hist_nvalues - 1) upper_bin_width = get_position(typcache, upper, &hist_lower[upper_index], &hist_lower[upper_index + 1]); else upper_bin_width = 0.0; /* * In the loop, dist and prev_dist are the distance of the "current" bin's * lower and upper bounds from the constant upper bound. * * bin_width represents the width of the current bin. Normally it is 1.0, * meaning a full width bin, but can be less in the corner cases: start * and end of the loop. We start with bin_width = upper_bin_width, because * we begin at the bin containing the upper bound. */ prev_dist = 0.0; bin_width = upper_bin_width; sum_frac = 0.0; for (i = upper_index; i >= 0; i--) { double dist; double length_hist_frac; bool final_bin = false; /* * dist -- distance from upper bound of query range to lower bound of * the current bin in the lower bound histogram. Or to the lower bound * of the constant range, if this is the final bin, containing the * constant lower bound. */ if (range_cmp_bounds(typcache, &hist_lower[i], lower) < 0) { dist = get_distance(typcache, lower, upper); /* * Subtract from bin_width the portion of this bin that we want to * ignore. */ bin_width -= get_position(typcache, lower, &hist_lower[i], &hist_lower[i + 1]); if (bin_width < 0.0) bin_width = 0.0; final_bin = true; } else dist = get_distance(typcache, &hist_lower[i], upper); /* * Estimate the fraction of tuples in this bin that are narrow enough * to not exceed the distance to the upper bound of the query range. */ length_hist_frac = calc_length_hist_frac(length_hist_values, length_hist_nvalues, prev_dist, dist, true); /* * Add the fraction of tuples in this bin, with a suitable length, to * the total. */ sum_frac += length_hist_frac * bin_width / (double) (hist_nvalues - 1); if (final_bin) break; bin_width = 1.0; prev_dist = dist; } return sum_frac; }
/* * SP-GiST consistent function for inner nodes: check which nodes are * consistent with given set of queries. */ Datum spg_range_quad_inner_consistent(PG_FUNCTION_ARGS) { spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); int which; int i; if (in->allTheSame) { /* Report that all nodes should be visited */ out->nNodes = in->nNodes; out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); for (i = 0; i < in->nNodes; i++) out->nodeNumbers[i] = i; PG_RETURN_VOID(); } if (!in->hasPrefix) { /* * No centroid on this inner node. Such a node has two child nodes, * the first for empty ranges, and the second for non-empty ones. */ Assert(in->nNodes == 2); /* * Nth bit of which variable means that (N - 1)th node should be * visited. Initially all bits are set. Bits of nodes which should be * skipped will be unset. */ which = (1 << 1) | (1 << 2); for (i = 0; i < in->nkeys; i++) { StrategyNumber strategy = in->scankeys[i].sk_strategy; bool empty; /* * The only strategy when second argument of operator is not range * is RANGESTRAT_CONTAINS_ELEM. */ if (strategy != RANGESTRAT_CONTAINS_ELEM) empty = RangeIsEmpty( DatumGetRangeType(in->scankeys[i].sk_argument)); else empty = false; switch (strategy) { case RANGESTRAT_BEFORE: case RANGESTRAT_OVERLEFT: case RANGESTRAT_OVERLAPS: case RANGESTRAT_OVERRIGHT: case RANGESTRAT_AFTER: /* These strategies return false if any argument is empty */ if (empty) which = 0; else which &= (1 << 2); break; case RANGESTRAT_CONTAINS: /* * All ranges contain an empty range. Only non-empty ranges * can contain a non-empty range. */ if (!empty) which &= (1 << 2); break; case RANGESTRAT_CONTAINED_BY: /* * Only an empty range is contained by an empty range. Both * empty and non-empty ranges can be contained by a * non-empty range. */ if (empty) which &= (1 << 1); break; case RANGESTRAT_CONTAINS_ELEM: which &= (1 << 2); break; case RANGESTRAT_EQ: if (empty) which &= (1 << 1); else which &= (1 << 2); break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); break; } if (which == 0) break; /* no need to consider remaining conditions */ } } else { RangeBound centroidLower, centroidUpper; bool centroidEmpty; TypeCacheEntry *typcache; RangeType *centroid; /* This node has a centroid. Fetch it. */ centroid = DatumGetRangeType(in->prefixDatum); typcache = range_get_typcache(fcinfo, RangeTypeGetOid(DatumGetRangeType(centroid))); range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, ¢roidEmpty); Assert(in->nNodes == 4 || in->nNodes == 5); /* * Nth bit of which variable means that (N - 1)th node (Nth quadrant) * should be visited. Initially all bits are set. Bits of nodes which * can be skipped will be unset. */ which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5); for (i = 0; i < in->nkeys; i++) { StrategyNumber strategy; RangeBound lower, upper; bool empty; RangeType *range = NULL; /* Restrictions on range bounds according to scan strategy */ RangeBound *minLower = NULL, *maxLower = NULL, *minUpper = NULL, *maxUpper = NULL; /* Are the restrictions on range bounds inclusive? */ bool inclusive = true; bool strictEmpty = true; strategy = in->scankeys[i].sk_strategy; /* * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but * the argument is a single element. Expand the single element to * a range containing only the element, and treat it like * RANGESTRAT_CONTAINS. */ if (strategy == RANGESTRAT_CONTAINS_ELEM) { lower.inclusive = true; lower.infinite = false; lower.lower = true; lower.val = in->scankeys[i].sk_argument; upper.inclusive = true; upper.infinite = false; upper.lower = false; upper.val = in->scankeys[i].sk_argument; empty = false; strategy = RANGESTRAT_CONTAINS; } else { range = DatumGetRangeType(in->scankeys[i].sk_argument); range_deserialize(typcache, range, &lower, &upper, &empty); } /* * Most strategies are handled by forming a bounding box from the * search key, defined by a minLower, maxLower, minUpper, maxUpper. * Some modify 'which' directly, to specify exactly which quadrants * need to be visited. * * For most strategies, nothing matches an empty search key, and * an empty range never matches a non-empty key. If a strategy * does not behave like that wrt. empty ranges, set strictEmpty to * false. */ switch (strategy) { case RANGESTRAT_BEFORE: /* * Range A is before range B if upper bound of A is lower * than lower bound of B. */ maxUpper = &lower; inclusive = false; break; case RANGESTRAT_OVERLEFT: /* * Range A is overleft to range B if upper bound of A is * less or equal to upper bound of B. */ maxUpper = &upper; break; case RANGESTRAT_OVERLAPS: /* * Non-empty ranges overlap, if lower bound of each range * is lower or equal to upper bound of the other range. */ maxLower = &upper; minUpper = &lower; break; case RANGESTRAT_OVERRIGHT: /* * Range A is overright to range B if lower bound of A is * greater or equal to lower bound of B. */ minLower = &lower; break; case RANGESTRAT_AFTER: /* * Range A is after range B if lower bound of A is greater * than upper bound of B. */ minLower = &upper; inclusive = false; break; case RANGESTRAT_CONTAINS: /* * Non-empty range A contains non-empty range B if lower * bound of A is lower or equal to lower bound of range B * and upper bound of range A is greater or equal to upper * bound of range A. * * All non-empty ranges contain an empty range. */ strictEmpty = false; if (!empty) { which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); maxLower = &lower; minUpper = &upper; } break; case RANGESTRAT_CONTAINED_BY: /* The opposite of contains. */ strictEmpty = false; if (empty) { /* An empty range is only contained by an empty range */ which &= (1 << 5); } else { minLower = &lower; maxUpper = &upper; } break; case RANGESTRAT_EQ: /* * Equal range can be only in the same quadrant where * argument would be placed to. */ strictEmpty = false; which &= (1 << getQuadrant(typcache, centroid, range)); break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); break; } if (strictEmpty) { if (empty) { /* Scan key is empty, no branches are satisfying */ which = 0; break; } else { /* Shouldn't visit tree branch with empty ranges */ which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); } } /* * Using the bounding box, see which quadrants we have to descend * into. */ if (minLower) { /* * If the centroid's lower bound is less than or equal to * the minimum lower bound, anything in the 3rd and 4th * quadrants will have an even smaller lower bound, and thus * can't match. */ if (range_cmp_bounds(typcache, ¢roidLower, minLower) <= 0) which &= (1 << 1) | (1 << 2) | (1 << 5); } if (maxLower) { /* * If the centroid's lower bound is greater than the maximum * lower bound, anything in the 1st and 2nd quadrants will * also have a greater than or equal lower bound, and thus * can't match. If the centroid's lower bound is equal to * the maximum lower bound, we can still exclude the 1st and * 2nd quadrants if we're looking for a value strictly greater * than the maximum. */ int cmp; cmp = range_cmp_bounds(typcache, ¢roidLower, maxLower); if (cmp > 0 || (!inclusive && cmp == 0)) which &= (1 << 3) | (1 << 4) | (1 << 5); } if (minUpper) { /* * If the centroid's upper bound is less than or equal to * the minimum upper bound, anything in the 2nd and 3rd * quadrants will have an even smaller upper bound, and thus * can't match. */ if (range_cmp_bounds(typcache, ¢roidUpper, minUpper) <= 0) which &= (1 << 1) | (1 << 4) | (1 << 5); } if (maxUpper) { /* * If the centroid's upper bound is greater than the maximum * upper bound, anything in the 1st and 4th quadrants will * also have a greater than or equal upper bound, and thus * can't match. If the centroid's upper bound is equal to * the maximum upper bound, we can still exclude the 1st and * 4th quadrants if we're looking for a value strictly greater * than the maximum. */ int cmp; cmp = range_cmp_bounds(typcache, ¢roidUpper, maxUpper); if (cmp > 0 || (!inclusive && cmp == 0)) which &= (1 << 2) | (1 << 3) | (1 << 5); } if (which == 0) break; /* no need to consider remaining conditions */ } } /* We must descend into the quadrant(s) identified by 'which' */ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); out->nNodes = 0; for (i = 1; i <= in->nNodes; i++) { if (which & (1 << i)) out->nodeNumbers[out->nNodes++] = i - 1; } PG_RETURN_VOID(); }
/* * adjacent_cmp_bounds * * Given an argument and centroid bound, this function determines if any * bounds that are adjacent to the argument are smaller than, or greater than * or equal to centroid. For brevity, we call the arg < centroid "left", and * arg >= centroid case "right". This corresponds to how the quadrants are * arranged, if you imagine that "left" is equivalent to "down" and "right" * is equivalent to "up". * * For the "left" case, returns -1, and for the "right" case, returns 1. */ static int adjacent_cmp_bounds(TypeCacheEntry *typcache, RangeBound *arg, RangeBound *centroid) { int cmp; Assert(arg->lower != centroid->lower); cmp = range_cmp_bounds(typcache, arg, centroid); if (centroid->lower) { /*------ * The argument is an upper bound, we are searching for adjacent lower * bounds. A matching adjacent lower bound must be *larger* than the * argument, but only just. * * The following table illustrates the desired result with a fixed * argument bound, and different centroids. The CMP column shows * the value of 'cmp' variable, and ADJ shows whether the argument * and centroid are adjacent, per bounds_adjacent(). (N) means we * don't need to check for that case, because it's implied by CMP. * With the argument range [..., 500), the adjacent range we're * searching for is [500, ...): * * ARGUMENT CENTROID CMP ADJ * [..., 500) [498, ...) > (N) [500, ...) is to the right * [..., 500) [499, ...) = (N) [500, ...) is to the right * [..., 500) [500, ...) < Y [500, ...) is to the right * [..., 500) [501, ...) < N [500, ...) is to the left * * So, we must search left when the argument is smaller than, and not * adjacent, to the centroid. Otherwise search right. *------ */ if (cmp < 0 && !bounds_adjacent(typcache, *arg, *centroid)) return -1; else return 1; } else { /*------ * The argument is a lower bound, we are searching for adjacent upper * bounds. A matching adjacent upper bound must be *smaller* than the * argument, but only just. * * ARGUMENT CENTROID CMP ADJ * [500, ...) [..., 499) > (N) [..., 500) is to the right * [500, ...) [..., 500) > (Y) [..., 500) is to the right * [500, ...) [..., 501) = (N) [..., 500) is to the left * [500, ...) [..., 502) < (N) [..., 500) is to the left * * We must search left when the argument is smaller than or equal to * the centroid. Otherwise search right. We don't need to check * whether the argument is adjacent with the centroid, because it * doesn't matter. *------ */ if (cmp <= 0) return -1; else return 1; } }