/* * Choose SP-GiST function: choose path for addition of new range. */ Datum spg_range_quad_choose(PG_FUNCTION_ARGS) { spgChooseIn *in = (spgChooseIn *) PG_GETARG_POINTER(0); spgChooseOut *out = (spgChooseOut *) PG_GETARG_POINTER(1); RangeType *inRange = DatumGetRangeType(in->datum), *centroid; int16 quadrant; TypeCacheEntry *typcache; if (in->allTheSame) { out->resultType = spgMatchNode; /* nodeN will be set by core */ out->result.matchNode.levelAdd = 0; out->result.matchNode.restDatum = RangeTypeGetDatum(inRange); PG_RETURN_VOID(); } typcache = range_get_typcache(fcinfo, RangeTypeGetOid(inRange)); /* * A node with no centroid divides ranges purely on whether they're empty * or not. All empty ranges go to child node 0, all non-empty ranges go * to node 1. */ if (!in->hasPrefix) { out->resultType = spgMatchNode; if (RangeIsEmpty(inRange)) out->result.matchNode.nodeN = 0; else out->result.matchNode.nodeN = 1; out->result.matchNode.levelAdd = 1; out->result.matchNode.restDatum = RangeTypeGetDatum(inRange); PG_RETURN_VOID(); } centroid = DatumGetRangeType(in->prefixDatum); quadrant = getQuadrant(typcache, centroid, inRange); Assert(quadrant <= in->nNodes); /* Select node matching to quadrant number */ out->resultType = spgMatchNode; out->result.matchNode.nodeN = quadrant - 1; out->result.matchNode.levelAdd = 1; out->result.matchNode.restDatum = RangeTypeGetDatum(inRange); PG_RETURN_VOID(); }
static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata, RangeType *constval, Oid operator) { double hist_selec; double selec; float4 empty_frac, null_frac; /* * First look up the fraction of NULLs and empty ranges from pg_statistic. */ if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); null_frac = stats->stanullfrac; /* Try to get fraction of empty ranges */ if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, InvalidOid, NULL, NULL, NULL, &numbers, &nnumbers)) { if (nnumbers != 1) elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */ empty_frac = numbers[0]; } else { /* No empty fraction statistic. Assume no empty ranges. */ empty_frac = 0.0; } } else { /* * No stats are available. Follow through the calculations below * anyway, assuming no NULLs and no empty ranges. This still allows us * to give a better-than-nothing estimate based on whether the * constant is an empty range or not. */ null_frac = 0.0; empty_frac = 0.0; } if (RangeIsEmpty(constval)) { /* * An empty range matches all ranges, all empty ranges, or nothing, * depending on the operator */ switch (operator) { /* these return false if either argument is empty */ case OID_RANGE_OVERLAP_OP: case OID_RANGE_OVERLAPS_LEFT_OP: case OID_RANGE_OVERLAPS_RIGHT_OP: case OID_RANGE_LEFT_OP: case OID_RANGE_RIGHT_OP: /* nothing is less than an empty range */ case OID_RANGE_LESS_OP: selec = 0.0; break; /* only empty ranges can be contained by an empty range */ case OID_RANGE_CONTAINED_OP: /* only empty ranges are <= an empty range */ case OID_RANGE_LESS_EQUAL_OP: selec = empty_frac; break; /* everything contains an empty range */ case OID_RANGE_CONTAINS_OP: /* everything is >= an empty range */ case OID_RANGE_GREATER_EQUAL_OP: selec = 1.0; break; /* all non-empty ranges are > an empty range */ case OID_RANGE_GREATER_OP: selec = 1.0 - empty_frac; break; /* an element cannot be empty */ case OID_RANGE_CONTAINS_ELEM_OP: default: elog(ERROR, "unexpected operator %u", operator); selec = 0.0; /* keep compiler quiet */ break; } } else { /* * Calculate selectivity using bound histograms. If that fails for * some reason, e.g no histogram in pg_statistic, use the default * constant estimate for the fraction of non-empty values. This is * still somewhat better than just returning the default estimate, * because this still takes into account the fraction of empty and * NULL tuples, if we had statistics for them. */ hist_selec = calc_hist_selectivity(typcache, vardata, constval, operator); if (hist_selec < 0.0) hist_selec = default_range_selectivity(operator); /* * Now merge the results for the empty ranges and histogram * calculations, realizing that the histogram covers only the * non-null, non-empty values. */ if (operator == OID_RANGE_CONTAINED_OP) { /* empty is contained by anything non-empty */ selec = (1.0 - empty_frac) * hist_selec + empty_frac; } else { /* with any other operator, empty Op non-empty matches nothing */ selec = (1.0 - empty_frac) * hist_selec; } } /* all range operators are strict */ selec *= (1.0 - null_frac); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return selec; }
/* * SP-GiST consistent function for inner nodes: check which nodes are * consistent with given set of queries. */ Datum spg_range_quad_inner_consistent(PG_FUNCTION_ARGS) { spgInnerConsistentIn *in = (spgInnerConsistentIn *) PG_GETARG_POINTER(0); spgInnerConsistentOut *out = (spgInnerConsistentOut *) PG_GETARG_POINTER(1); int which; int i; if (in->allTheSame) { /* Report that all nodes should be visited */ out->nNodes = in->nNodes; out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); for (i = 0; i < in->nNodes; i++) out->nodeNumbers[i] = i; PG_RETURN_VOID(); } if (!in->hasPrefix) { /* * No centroid on this inner node. Such a node has two child nodes, * the first for empty ranges, and the second for non-empty ones. */ Assert(in->nNodes == 2); /* * Nth bit of which variable means that (N - 1)th node should be * visited. Initially all bits are set. Bits of nodes which should be * skipped will be unset. */ which = (1 << 1) | (1 << 2); for (i = 0; i < in->nkeys; i++) { StrategyNumber strategy = in->scankeys[i].sk_strategy; bool empty; /* * The only strategy when second argument of operator is not range * is RANGESTRAT_CONTAINS_ELEM. */ if (strategy != RANGESTRAT_CONTAINS_ELEM) empty = RangeIsEmpty( DatumGetRangeType(in->scankeys[i].sk_argument)); else empty = false; switch (strategy) { case RANGESTRAT_BEFORE: case RANGESTRAT_OVERLEFT: case RANGESTRAT_OVERLAPS: case RANGESTRAT_OVERRIGHT: case RANGESTRAT_AFTER: /* These strategies return false if any argument is empty */ if (empty) which = 0; else which &= (1 << 2); break; case RANGESTRAT_CONTAINS: /* * All ranges contain an empty range. Only non-empty ranges * can contain a non-empty range. */ if (!empty) which &= (1 << 2); break; case RANGESTRAT_CONTAINED_BY: /* * Only an empty range is contained by an empty range. Both * empty and non-empty ranges can be contained by a * non-empty range. */ if (empty) which &= (1 << 1); break; case RANGESTRAT_CONTAINS_ELEM: which &= (1 << 2); break; case RANGESTRAT_EQ: if (empty) which &= (1 << 1); else which &= (1 << 2); break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); break; } if (which == 0) break; /* no need to consider remaining conditions */ } } else { RangeBound centroidLower, centroidUpper; bool centroidEmpty; TypeCacheEntry *typcache; RangeType *centroid; /* This node has a centroid. Fetch it. */ centroid = DatumGetRangeType(in->prefixDatum); typcache = range_get_typcache(fcinfo, RangeTypeGetOid(DatumGetRangeType(centroid))); range_deserialize(typcache, centroid, ¢roidLower, ¢roidUpper, ¢roidEmpty); Assert(in->nNodes == 4 || in->nNodes == 5); /* * Nth bit of which variable means that (N - 1)th node (Nth quadrant) * should be visited. Initially all bits are set. Bits of nodes which * can be skipped will be unset. */ which = (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4) | (1 << 5); for (i = 0; i < in->nkeys; i++) { StrategyNumber strategy; RangeBound lower, upper; bool empty; RangeType *range = NULL; /* Restrictions on range bounds according to scan strategy */ RangeBound *minLower = NULL, *maxLower = NULL, *minUpper = NULL, *maxUpper = NULL; /* Are the restrictions on range bounds inclusive? */ bool inclusive = true; bool strictEmpty = true; strategy = in->scankeys[i].sk_strategy; /* * RANGESTRAT_CONTAINS_ELEM is just like RANGESTRAT_CONTAINS, but * the argument is a single element. Expand the single element to * a range containing only the element, and treat it like * RANGESTRAT_CONTAINS. */ if (strategy == RANGESTRAT_CONTAINS_ELEM) { lower.inclusive = true; lower.infinite = false; lower.lower = true; lower.val = in->scankeys[i].sk_argument; upper.inclusive = true; upper.infinite = false; upper.lower = false; upper.val = in->scankeys[i].sk_argument; empty = false; strategy = RANGESTRAT_CONTAINS; } else { range = DatumGetRangeType(in->scankeys[i].sk_argument); range_deserialize(typcache, range, &lower, &upper, &empty); } /* * Most strategies are handled by forming a bounding box from the * search key, defined by a minLower, maxLower, minUpper, maxUpper. * Some modify 'which' directly, to specify exactly which quadrants * need to be visited. * * For most strategies, nothing matches an empty search key, and * an empty range never matches a non-empty key. If a strategy * does not behave like that wrt. empty ranges, set strictEmpty to * false. */ switch (strategy) { case RANGESTRAT_BEFORE: /* * Range A is before range B if upper bound of A is lower * than lower bound of B. */ maxUpper = &lower; inclusive = false; break; case RANGESTRAT_OVERLEFT: /* * Range A is overleft to range B if upper bound of A is * less or equal to upper bound of B. */ maxUpper = &upper; break; case RANGESTRAT_OVERLAPS: /* * Non-empty ranges overlap, if lower bound of each range * is lower or equal to upper bound of the other range. */ maxLower = &upper; minUpper = &lower; break; case RANGESTRAT_OVERRIGHT: /* * Range A is overright to range B if lower bound of A is * greater or equal to lower bound of B. */ minLower = &lower; break; case RANGESTRAT_AFTER: /* * Range A is after range B if lower bound of A is greater * than upper bound of B. */ minLower = &upper; inclusive = false; break; case RANGESTRAT_CONTAINS: /* * Non-empty range A contains non-empty range B if lower * bound of A is lower or equal to lower bound of range B * and upper bound of range A is greater or equal to upper * bound of range A. * * All non-empty ranges contain an empty range. */ strictEmpty = false; if (!empty) { which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); maxLower = &lower; minUpper = &upper; } break; case RANGESTRAT_CONTAINED_BY: /* The opposite of contains. */ strictEmpty = false; if (empty) { /* An empty range is only contained by an empty range */ which &= (1 << 5); } else { minLower = &lower; maxUpper = &upper; } break; case RANGESTRAT_EQ: /* * Equal range can be only in the same quadrant where * argument would be placed to. */ strictEmpty = false; which &= (1 << getQuadrant(typcache, centroid, range)); break; default: elog(ERROR, "unrecognized range strategy: %d", strategy); break; } if (strictEmpty) { if (empty) { /* Scan key is empty, no branches are satisfying */ which = 0; break; } else { /* Shouldn't visit tree branch with empty ranges */ which &= (1 << 1) | (1 << 2) | (1 << 3) | (1 << 4); } } /* * Using the bounding box, see which quadrants we have to descend * into. */ if (minLower) { /* * If the centroid's lower bound is less than or equal to * the minimum lower bound, anything in the 3rd and 4th * quadrants will have an even smaller lower bound, and thus * can't match. */ if (range_cmp_bounds(typcache, ¢roidLower, minLower) <= 0) which &= (1 << 1) | (1 << 2) | (1 << 5); } if (maxLower) { /* * If the centroid's lower bound is greater than the maximum * lower bound, anything in the 1st and 2nd quadrants will * also have a greater than or equal lower bound, and thus * can't match. If the centroid's lower bound is equal to * the maximum lower bound, we can still exclude the 1st and * 2nd quadrants if we're looking for a value strictly greater * than the maximum. */ int cmp; cmp = range_cmp_bounds(typcache, ¢roidLower, maxLower); if (cmp > 0 || (!inclusive && cmp == 0)) which &= (1 << 3) | (1 << 4) | (1 << 5); } if (minUpper) { /* * If the centroid's upper bound is less than or equal to * the minimum upper bound, anything in the 2nd and 3rd * quadrants will have an even smaller upper bound, and thus * can't match. */ if (range_cmp_bounds(typcache, ¢roidUpper, minUpper) <= 0) which &= (1 << 1) | (1 << 4) | (1 << 5); } if (maxUpper) { /* * If the centroid's upper bound is greater than the maximum * upper bound, anything in the 1st and 4th quadrants will * also have a greater than or equal upper bound, and thus * can't match. If the centroid's upper bound is equal to * the maximum upper bound, we can still exclude the 1st and * 4th quadrants if we're looking for a value strictly greater * than the maximum. */ int cmp; cmp = range_cmp_bounds(typcache, ¢roidUpper, maxUpper); if (cmp > 0 || (!inclusive && cmp == 0)) which &= (1 << 2) | (1 << 3) | (1 << 5); } if (which == 0) break; /* no need to consider remaining conditions */ } } /* We must descend into the quadrant(s) identified by 'which' */ out->nodeNumbers = (int *) palloc(sizeof(int) * in->nNodes); out->nNodes = 0; for (i = 1; i <= in->nNodes; i++) { if (which & (1 << i)) out->nodeNumbers[out->nNodes++] = i - 1; } PG_RETURN_VOID(); }