/* * @@ selectivity for tsvector var vs tsquery constant */ static Selectivity tsquerysel(VariableStatData *vardata, Datum constval) { Selectivity selec; TSQuery query; /* The caller made sure the const is a TSQuery, so get it now */ query = DatumGetTSQuery(constval); /* Empty query matches nothing */ if (query->size == 0) return (Selectivity) 0.0; if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; Datum *values; int nvalues; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of TEXT elements for a tsvector column */ if (get_attstatsslot(vardata->statsTuple, TEXTOID, -1, STATISTIC_KIND_MCELEM, InvalidOid, NULL, &values, &nvalues, &numbers, &nnumbers)) { /* * There is a most-common-elements slot for the tsvector Var, so * use that. */ selec = mcelem_tsquery_selec(query, values, nvalues, numbers, nnumbers); free_attstatsslot(TEXTOID, values, nvalues, numbers, nnumbers); } else { /* No most-common-elements info, so do without */ selec = tsquery_opr_selec_no_stats(query); } /* * MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ selec = tsquery_opr_selec_no_stats(query); /* we assume no nulls here, so no stanullfrac correction */ } return selec; }
/* * @@ selectivity for tsvector var vs tsquery constant */ static Selectivity tsquerysel(VariableStatData *vardata, Datum constval) { Selectivity selec; TSQuery query; /* The caller made sure the const is a TSQuery, so get it now */ query = DatumGetTSQuery(constval); /* Empty query matches nothing */ if (query->size == 0) return (Selectivity) 0.0; if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; Datum *values; int nvalues; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of TEXT elements for a tsvector column */ if (get_attstatsslot(vardata->statsTuple, TEXTOID, -1, STATISTIC_KIND_MCELEM, InvalidOid, &values, &nvalues, &numbers, &nnumbers)) { /* * There is a most-common-elements slot for the tsvector Var, so * use that. */ selec = mcelem_tsquery_selec(query, values, nvalues, numbers, nnumbers); free_attstatsslot(TEXTOID, values, nvalues, numbers, nnumbers); } else { /* No most-common-elements info, so we must punt */ selec = (Selectivity) DEFAULT_TS_MATCH_SEL; } } else { /* No stats at all, so we must punt */ selec = (Selectivity) DEFAULT_TS_MATCH_SEL; } return selec; }
/* * Given column stats of an attribute, build an MCVFreqPair and add it to the hash table. * If the MCV to be added already exist in the hash table, we increment its count value. * Input: * - datumHash: hash table * - partOid: Oid of current partition * - typInfo: type information * Output: * - partReltuples: the number of tuples in this partition */ static void addAllMCVsToHashTable ( HTAB *datumHash, Oid partOid, HeapTuple heaptupleStats, TypInfo *typInfo, float4 *partReltuples ) { float4 reltuples = get_rel_reltuples(partOid); *partReltuples = reltuples; Datum *datumMCVs = NULL; int numMCVs = 0; float4 *freqs = NULL; int numFreqs = 0; (void) get_attstatsslot ( heaptupleStats, typInfo->typOid, -1, STATISTIC_KIND_MCV, InvalidOid, &datumMCVs, &numMCVs, &freqs, &numFreqs ); Assert(numMCVs == numFreqs); for (int i = 0; i < numMCVs; i++) { Datum mcv = datumMCVs[i]; float4 count = reltuples * freqs[i]; MCVFreqPair *mfp = (MCVFreqPair *) palloc(sizeof(MCVFreqPair)); mfp->mcv = mcv; mfp->count = count; mfp->typinfo = typInfo; addMCVToHashTable(datumHash, mfp); pfree(mfp); } free_attstatsslot(typInfo->typOid, datumMCVs, numMCVs, freqs, numFreqs); }
/* * scalararraysel_containment * Estimate selectivity of ScalarArrayOpExpr via array containment. * * If we have const =/<> ANY/ALL (array_var) then we can estimate the * selectivity as though this were an array containment operator, * array_var op ARRAY[const]. * * scalararraysel() has already verified that the ScalarArrayOpExpr's operator * is the array element type's default equality or inequality operator, and * has aggressively simplified both inputs to constants. * * Returns selectivity (0..1), or -1 if we fail to estimate selectivity. */ Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid) { Selectivity selec; VariableStatData vardata; Datum constval; TypeCacheEntry *typentry; FmgrInfo *cmpfunc; /* * rightop must be a variable, else punt. */ examine_variable(root, rightop, varRelid, &vardata); if (!vardata.rel) { ReleaseVariableStats(vardata); return -1.0; } /* * leftop must be a constant, else punt. */ if (!IsA(leftop, Const)) { ReleaseVariableStats(vardata); return -1.0; } if (((Const *) leftop)->constisnull) { /* qual can't succeed if null on left */ ReleaseVariableStats(vardata); return (Selectivity) 0.0; } constval = ((Const *) leftop)->constvalue; /* Get element type's default comparison function */ typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) { ReleaseVariableStats(vardata); return -1.0; } cmpfunc = &typentry->cmp_proc_finfo; /* * If the operator is <>, swap ANY/ALL, then invert the result later. */ if (!isEquality) useOr = !useOr; /* Get array element stats for var, if available */ if (HeapTupleIsValid(vardata.statsTuple) && statistic_proc_security_check(&vardata, cmpfunc->fn_oid)) { Form_pg_statistic stats; AttStatsSlot sslot; AttStatsSlot hslot; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); /* MCELEM will be an array of same type as element */ if (get_attstatsslot(&sslot, vardata.statsTuple, STATISTIC_KIND_MCELEM, InvalidOid, ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) { /* For ALL case, also get histogram of distinct-element counts */ if (useOr || !get_attstatsslot(&hslot, vardata.statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, ATTSTATSSLOT_NUMBERS)) memset(&hslot, 0, sizeof(hslot)); /* * For = ANY, estimate as var @> ARRAY[const]. * * For = ALL, estimate as var <@ ARRAY[const]. */ if (useOr) selec = mcelem_array_contain_overlap_selec(sslot.values, sslot.nvalues, sslot.numbers, sslot.nnumbers, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(sslot.values, sslot.nvalues, sslot.numbers, sslot.nnumbers, &constval, 1, hslot.numbers, hslot.nnumbers, OID_ARRAY_CONTAINED_OP, cmpfunc); free_attstatsslot(&hslot); free_attstatsslot(&sslot); } else { /* No most-common-elements info, so do without */ if (useOr) selec = mcelem_array_contain_overlap_selec(NULL, 0, NULL, 0, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(NULL, 0, NULL, 0, &constval, 1, NULL, 0, OID_ARRAY_CONTAINED_OP, cmpfunc); } /* * MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ if (useOr) selec = mcelem_array_contain_overlap_selec(NULL, 0, NULL, 0, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(NULL, 0, NULL, 0, &constval, 1, NULL, 0, OID_ARRAY_CONTAINED_OP, cmpfunc); /* we assume no nulls here, so no stanullfrac correction */ } ReleaseVariableStats(vardata); /* * If the operator is <>, invert the results. */ if (!isEquality) selec = 1.0 - selec; CLAMP_PROBABILITY(selec); return selec; }
/* * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const" * or "arraycolumn <@ const" based on the statistics * * This function is mainly responsible for extracting the pg_statistic data * to be used; we then pass the problem on to mcelem_array_selec(). */ static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator) { Selectivity selec; TypeCacheEntry *typentry; FmgrInfo *cmpfunc; ArrayType *array; /* Get element type's default comparison function */ typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) return DEFAULT_SEL(operator); cmpfunc = &typentry->cmp_proc_finfo; /* * The caller made sure the const is an array with same element type, so * get it now */ array = DatumGetArrayTypeP(constval); if (HeapTupleIsValid(vardata->statsTuple) && statistic_proc_security_check(vardata, cmpfunc->fn_oid)) { Form_pg_statistic stats; AttStatsSlot sslot; AttStatsSlot hslot; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of same type as column */ if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_MCELEM, InvalidOid, ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) { /* * For "array <@ const" case we also need histogram of distinct * element counts. */ if (operator != OID_ARRAY_CONTAINED_OP || !get_attstatsslot(&hslot, vardata->statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, ATTSTATSSLOT_NUMBERS)) memset(&hslot, 0, sizeof(hslot)); /* Use the most-common-elements slot for the array Var. */ selec = mcelem_array_selec(array, typentry, sslot.values, sslot.nvalues, sslot.numbers, sslot.nnumbers, hslot.numbers, hslot.nnumbers, operator, cmpfunc); free_attstatsslot(&hslot); free_attstatsslot(&sslot); } else { /* No most-common-elements info, so do without */ selec = mcelem_array_selec(array, typentry, NULL, 0, NULL, 0, NULL, 0, operator, cmpfunc); } /* * MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ selec = mcelem_array_selec(array, typentry, NULL, 0, NULL, 0, NULL, 0, operator, cmpfunc); /* we assume no nulls here, so no stanullfrac correction */ } /* If constant was toasted, release the copy we made */ if (PointerGetDatum(array) != constval) pfree(array); return selec; }
/* * Calculate range operator selectivity using histograms of range bounds. * * This estimate is for the portion of values that are not empty and not * NULL. */ static double calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata, RangeType *constval, Oid operator) { Datum *hist_values; int nhist; Datum *length_hist_values; int length_nhist; RangeBound *hist_lower; RangeBound *hist_upper; int i; RangeBound const_lower; RangeBound const_upper; bool empty; double hist_selec; /* Try to get histogram of ranges */ if (!(HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid, NULL, &hist_values, &nhist, NULL, NULL))) return -1.0; /* * Convert histogram of ranges into histograms of its lower and upper * bounds. */ hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist); hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist); for (i = 0; i < nhist; i++) { range_deserialize(typcache, DatumGetRangeType(hist_values[i]), &hist_lower[i], &hist_upper[i], &empty); /* The histogram should not contain any empty ranges */ if (empty) elog(ERROR, "bounds histogram contains an empty range"); } /* @> and @< also need a histogram of range lengths */ if (operator == OID_RANGE_CONTAINS_OP || operator == OID_RANGE_CONTAINED_OP) { if (!(HeapTupleIsValid(vardata->statsTuple) && get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, InvalidOid, NULL, &length_hist_values, &length_nhist, NULL, NULL))) return -1.0; /* check that it's a histogram, not just a dummy entry */ if (length_nhist < 2) return -1.0; } /* Extract the bounds of the constant value. */ range_deserialize(typcache, constval, &const_lower, &const_upper, &empty); Assert(!empty); /* * Calculate selectivity comparing the lower or upper bound of the * constant with the histogram of lower or upper bounds. */ switch (operator) { case OID_RANGE_LESS_OP: /* * The regular b-tree comparison operators (<, <=, >, >=) compare * the lower bounds first, and the upper bounds for values with * equal lower bounds. Estimate that by comparing the lower bounds * only. This gives a fairly accurate estimate assuming there * aren't many rows with a lower bound equal to the constant's * lower bound. */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); break; case OID_RANGE_LESS_EQUAL_OP: hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, true); break; case OID_RANGE_GREATER_OP: hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); break; case OID_RANGE_GREATER_EQUAL_OP: hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, true); break; case OID_RANGE_LEFT_OP: /* var << const when upper(var) < lower(const) */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper, nhist, false); break; case OID_RANGE_RIGHT_OP: /* var >> const when lower(var) > upper(const) */ hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower, nhist, true); break; case OID_RANGE_OVERLAPS_RIGHT_OP: /* compare lower bounds */ hist_selec = 1 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); break; case OID_RANGE_OVERLAPS_LEFT_OP: /* compare upper bounds */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_upper, hist_upper, nhist, true); break; case OID_RANGE_OVERLAP_OP: case OID_RANGE_CONTAINS_ELEM_OP: /* * A && B <=> NOT (A << B OR A >> B). * * Since A << B and A >> B are mutually exclusive events we can * sum their probabilities to find probability of (A << B OR A >> * B). * * "range @> elem" is equivalent to "range && [elem,elem]". The * caller already constructed the singular range from the element * constant, so just treat it the same as &&. */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper, nhist, false); hist_selec += (1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower, nhist, true)); hist_selec = 1.0 - hist_selec; break; case OID_RANGE_CONTAINS_OP: hist_selec = calc_hist_selectivity_contains(typcache, &const_lower, &const_upper, hist_lower, nhist, length_hist_values, length_nhist); break; case OID_RANGE_CONTAINED_OP: if (const_lower.infinite) { /* * Lower bound no longer matters. Just estimate the fraction * with an upper bound <= const upper bound */ hist_selec = calc_hist_selectivity_scalar(typcache, &const_upper, hist_upper, nhist, true); } else if (const_upper.infinite) { hist_selec = 1.0 - calc_hist_selectivity_scalar(typcache, &const_lower, hist_lower, nhist, false); } else { hist_selec = calc_hist_selectivity_contained(typcache, &const_lower, &const_upper, hist_lower, nhist, length_hist_values, length_nhist); } break; default: elog(ERROR, "unknown range operator %u", operator); hist_selec = -1.0; /* keep compiler quiet */ break; } return hist_selec; }
static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata, RangeType *constval, Oid operator) { double hist_selec; double selec; float4 empty_frac, null_frac; /* * First look up the fraction of NULLs and empty ranges from pg_statistic. */ if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); null_frac = stats->stanullfrac; /* Try to get fraction of empty ranges */ if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, InvalidOid, NULL, NULL, NULL, &numbers, &nnumbers)) { if (nnumbers != 1) elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */ empty_frac = numbers[0]; } else { /* No empty fraction statistic. Assume no empty ranges. */ empty_frac = 0.0; } } else { /* * No stats are available. Follow through the calculations below * anyway, assuming no NULLs and no empty ranges. This still allows us * to give a better-than-nothing estimate based on whether the * constant is an empty range or not. */ null_frac = 0.0; empty_frac = 0.0; } if (RangeIsEmpty(constval)) { /* * An empty range matches all ranges, all empty ranges, or nothing, * depending on the operator */ switch (operator) { /* these return false if either argument is empty */ case OID_RANGE_OVERLAP_OP: case OID_RANGE_OVERLAPS_LEFT_OP: case OID_RANGE_OVERLAPS_RIGHT_OP: case OID_RANGE_LEFT_OP: case OID_RANGE_RIGHT_OP: /* nothing is less than an empty range */ case OID_RANGE_LESS_OP: selec = 0.0; break; /* only empty ranges can be contained by an empty range */ case OID_RANGE_CONTAINED_OP: /* only empty ranges are <= an empty range */ case OID_RANGE_LESS_EQUAL_OP: selec = empty_frac; break; /* everything contains an empty range */ case OID_RANGE_CONTAINS_OP: /* everything is >= an empty range */ case OID_RANGE_GREATER_EQUAL_OP: selec = 1.0; break; /* all non-empty ranges are > an empty range */ case OID_RANGE_GREATER_OP: selec = 1.0 - empty_frac; break; /* an element cannot be empty */ case OID_RANGE_CONTAINS_ELEM_OP: default: elog(ERROR, "unexpected operator %u", operator); selec = 0.0; /* keep compiler quiet */ break; } } else { /* * Calculate selectivity using bound histograms. If that fails for * some reason, e.g no histogram in pg_statistic, use the default * constant estimate for the fraction of non-empty values. This is * still somewhat better than just returning the default estimate, * because this still takes into account the fraction of empty and * NULL tuples, if we had statistics for them. */ hist_selec = calc_hist_selectivity(typcache, vardata, constval, operator); if (hist_selec < 0.0) hist_selec = default_range_selectivity(operator); /* * Now merge the results for the empty ranges and histogram * calculations, realizing that the histogram covers only the * non-null, non-empty values. */ if (operator == OID_RANGE_CONTAINED_OP) { /* empty is contained by anything non-empty */ selec = (1.0 - empty_frac) * hist_selec + empty_frac; } else { /* with any other operator, empty Op non-empty matches nothing */ selec = (1.0 - empty_frac) * hist_selec; } } /* all range operators are strict */ selec *= (1.0 - null_frac); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return selec; }
/* * _int_matchsel -- restriction selectivity function for intarray @@ query_int */ Datum _int_matchsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec; QUERYTYPE *query; Datum *mcelems = NULL; float4 *mcefreqs = NULL; int nmcelems = 0; float4 minfreq = 0.0; float4 nullfrac = 0.0; Form_pg_statistic stats; Datum *values = NULL; int nvalues = 0; float4 *numbers = NULL; int nnumbers = 0; /* * If expression is not "variable @@ something" or "something @@ variable" * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * Variable should be int[]. We don't support cases where variable is * query_int. */ if (vardata.vartype != INT4ARRAYOID) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); } /* * The "@@" operator is strict, so we can cope with NULL right away. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } /* The caller made sure the const is a query, so get it now */ query = DatumGetQueryTypeP(((Const *) other)->constvalue); /* Empty query matches nothing */ if (query->size == 0) { ReleaseVariableStats(vardata); return (Selectivity) 0.0; } /* * Get the statistics for the intarray column. * * We're interested in the Most-Common-Elements list, and the NULL * fraction. */ if (HeapTupleIsValid(vardata.statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); nullfrac = stats->stanullfrac; /* * For an int4 array, the default array type analyze function will * collect a Most Common Elements list, which is an array of int4s. */ if (get_attstatsslot(vardata.statsTuple, INT4OID, -1, STATISTIC_KIND_MCELEM, InvalidOid, NULL, &values, &nvalues, &numbers, &nnumbers)) { /* * There should be three more Numbers than Values, because the * last three (for intarray) cells are taken for minimal, maximal * and nulls frequency. Punt if not. */ if (nnumbers == nvalues + 3) { /* Grab the lowest frequency. */ minfreq = numbers[nnumbers - (nnumbers - nvalues)]; mcelems = values; mcefreqs = numbers; nmcelems = nvalues; } } } /* Process the logical expression in the query, using the stats */ selec = int_query_opr_selec(GETQUERY(query) + query->size - 1, mcelems, mcefreqs, nmcelems, minfreq); /* MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - nullfrac); free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers); ReleaseVariableStats(vardata); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
/* * Selectivity estimation for the subnet inclusion/overlap operators */ Datum networksel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec, mcv_selec, non_mcv_selec; Datum constvalue, *hist_values; int hist_nvalues; Form_pg_statistic stats; double sumcommon, nullfrac; FmgrInfo proc; /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } /* All of the operators handled here are strict. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } constvalue = ((Const *) other)->constvalue; /* Otherwise, we need stats in order to produce a non-default estimate. */ if (!HeapTupleIsValid(vardata.statsTuple)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); nullfrac = stats->stanullfrac; /* * If we have most-common-values info, add up the fractions of the MCV * entries that satisfy MCV OP CONST. These fractions contribute directly * to the result selectivity. Also add up the total fraction represented * by MCV entries. */ fmgr_info(get_opcode(operator), &proc); mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft, &sumcommon); /* * If we have a histogram, use it to estimate the proportion of the * non-MCV population that satisfies the clause. If we don't, apply the * default selectivity to that population. */ if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist_values, &hist_nvalues, NULL, NULL)) { int opr_codenum = inet_opr_codenum(operator); /* Commute if needed, so we can consider histogram to be on the left */ if (!varonleft) opr_codenum = -opr_codenum; non_mcv_selec = inet_hist_value_sel(hist_values, hist_nvalues, constvalue, opr_codenum); free_attstatsslot(vardata.atttype, hist_values, hist_nvalues, NULL, 0); } else non_mcv_selec = DEFAULT_SEL(operator); /* Combine selectivities for MCV and non-MCV populations */ selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec; /* Result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(selec); }
/* * Semi join selectivity estimation for subnet inclusion/overlap operators * * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs * histogram selectivity for semi/anti join cases. */ static Selectivity networkjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; Selectivity selec = 0.0, sumcommon1 = 0.0, sumcommon2 = 0.0; double nullfrac1 = 0.0, nullfrac2 = 0.0, hist2_weight = 0.0; bool mcv1_exists = false, mcv2_exists = false, hist1_exists = false, hist2_exists = false; int opr_codenum; FmgrInfo proc; int i, mcv1_nvalues, mcv2_nvalues, mcv1_nnumbers, mcv2_nnumbers, hist1_nvalues, hist2_nvalues, mcv1_length = 0, mcv2_length = 0; Datum *mcv1_values, *mcv2_values, *hist1_values, *hist2_values; float4 *mcv1_numbers, *mcv2_numbers; if (HeapTupleIsValid(vardata1->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); nullfrac1 = stats->stanullfrac; mcv1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv1_values, &mcv1_nvalues, &mcv1_numbers, &mcv1_nnumbers); hist1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist1_values, &hist1_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS); if (mcv1_exists) sumcommon1 = mcv_population(mcv1_numbers, mcv1_length); } if (HeapTupleIsValid(vardata2->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); nullfrac2 = stats->stanullfrac; mcv2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv2_values, &mcv2_nvalues, &mcv2_numbers, &mcv2_nnumbers); hist2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist2_values, &hist2_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS); if (mcv2_exists) sumcommon2 = mcv_population(mcv2_numbers, mcv2_length); } opr_codenum = inet_opr_codenum(operator); fmgr_info(get_opcode(operator), &proc); /* Estimate number of input rows represented by RHS histogram. */ if (hist2_exists && vardata2->rel) hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows; /* * Consider each element of the LHS MCV list, matching it to whatever RHS * stats we have. Scale according to the known frequency of the MCV. */ if (mcv1_exists && (mcv2_exists || hist2_exists)) { for (i = 0; i < mcv1_length; i++) { selec += mcv1_numbers[i] * inet_semi_join_sel(mcv1_values[i], mcv2_exists, mcv2_values, mcv2_length, hist2_exists, hist2_values, hist2_nvalues, hist2_weight, &proc, opr_codenum); } } /* * Consider each element of the LHS histogram, except for the first and * last elements, which we exclude on the grounds that they're outliers * and thus not very representative. Scale on the assumption that each * such histogram element represents an equal share of the LHS histogram * population (which is a bit bogus, because the members of its bucket may * not all act the same with respect to the join clause, but it's hard to * do better). * * If there are too many histogram elements, decimate to limit runtime. */ if (hist1_exists && hist1_nvalues > 2 && (mcv2_exists || hist2_exists)) { double hist_selec_sum = 0.0; int k, n; k = (hist1_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1; n = 0; for (i = 1; i < hist1_nvalues - 1; i += k) { hist_selec_sum += inet_semi_join_sel(hist1_values[i], mcv2_exists, mcv2_values, mcv2_length, hist2_exists, hist2_values, hist2_nvalues, hist2_weight, &proc, opr_codenum); n++; } selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n; } /* * If useful statistics are not available then use the default estimate. * We can apply null fractions if known, though. */ if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists)) selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator); /* Release stats. */ if (mcv1_exists) free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues, mcv1_numbers, mcv1_nnumbers); if (mcv2_exists) free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues, mcv2_numbers, mcv2_nnumbers); if (hist1_exists) free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues, NULL, 0); if (hist2_exists) free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues, NULL, 0); return selec; }
/* * Inner join selectivity estimation for subnet inclusion/overlap operators * * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram * selectivity for join using the subnet inclusion operators. Unlike the * join selectivity function for the equality operator, eqjoinsel_inner(), * one to one matching of the values is not enough. Network inclusion * operators are likely to match many to many, so we must check all pairs. * (Note: it might be possible to exploit understanding of the histogram's * btree ordering to reduce the work needed, but we don't currently try.) * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner(). */ static Selectivity networkjoinsel_inner(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; double nullfrac1 = 0.0, nullfrac2 = 0.0; Selectivity selec = 0.0, sumcommon1 = 0.0, sumcommon2 = 0.0; bool mcv1_exists = false, mcv2_exists = false, hist1_exists = false, hist2_exists = false; int opr_codenum; int mcv1_nvalues, mcv2_nvalues, mcv1_nnumbers, mcv2_nnumbers, hist1_nvalues, hist2_nvalues, mcv1_length = 0, mcv2_length = 0; Datum *mcv1_values, *mcv2_values, *hist1_values, *hist2_values; float4 *mcv1_numbers, *mcv2_numbers; if (HeapTupleIsValid(vardata1->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); nullfrac1 = stats->stanullfrac; mcv1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv1_values, &mcv1_nvalues, &mcv1_numbers, &mcv1_nnumbers); hist1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist1_values, &hist1_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS); if (mcv1_exists) sumcommon1 = mcv_population(mcv1_numbers, mcv1_length); } if (HeapTupleIsValid(vardata2->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); nullfrac2 = stats->stanullfrac; mcv2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv2_values, &mcv2_nvalues, &mcv2_numbers, &mcv2_nnumbers); hist2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist2_values, &hist2_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS); if (mcv2_exists) sumcommon2 = mcv_population(mcv2_numbers, mcv2_length); } opr_codenum = inet_opr_codenum(operator); /* * Calculate selectivity for MCV vs MCV matches. */ if (mcv1_exists && mcv2_exists) selec += inet_mcv_join_sel(mcv1_values, mcv1_numbers, mcv1_length, mcv2_values, mcv2_numbers, mcv2_length, operator); /* * Add in selectivities for MCV vs histogram matches, scaling according to * the fractions of the populations represented by the histograms. Note * that the second case needs to commute the operator. */ if (mcv1_exists && hist2_exists) selec += (1.0 - nullfrac2 - sumcommon2) * inet_mcv_hist_sel(mcv1_values, mcv1_numbers, mcv1_length, hist2_values, hist2_nvalues, opr_codenum); if (mcv2_exists && hist1_exists) selec += (1.0 - nullfrac1 - sumcommon1) * inet_mcv_hist_sel(mcv2_values, mcv2_numbers, mcv2_length, hist1_values, hist1_nvalues, -opr_codenum); /* * Add in selectivity for histogram vs histogram matches, again scaling * appropriately. */ if (hist1_exists && hist2_exists) selec += (1.0 - nullfrac1 - sumcommon1) * (1.0 - nullfrac2 - sumcommon2) * inet_hist_inclusion_join_sel(hist1_values, hist1_nvalues, hist2_values, hist2_nvalues, opr_codenum); /* * If useful statistics are not available then use the default estimate. * We can apply null fractions if known, though. */ if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists)) selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator); /* Release stats. */ if (mcv1_exists) free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues, mcv1_numbers, mcv1_nnumbers); if (mcv2_exists) free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues, mcv2_numbers, mcv2_nnumbers); if (hist1_exists) free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues, NULL, 0); if (hist2_exists) free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues, NULL, 0); return selec; }
Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); JoinType jointype = (JoinType) PG_GETARG_INT16(3); Node *arg1, *arg2; Var *var1, *var2; Oid relid1, relid2; HeapTuple stats1_tuple, stats2_tuple; GEOG_STATS *geogstats1, *geogstats2; /* * These are to avoid casting the corresponding * "type-punned" pointers, which would break * "strict-aliasing rules". */ GEOG_STATS **gs1ptr=&geogstats1, **gs2ptr=&geogstats2; int geogstats1_nvalues = 0, geogstats2_nvalues = 0; float8 selectivity1 = 0.0, selectivity2 = 0.0; float4 num1_tuples = 0.0, num2_tuples = 0.0; float4 total_tuples = 0.0, rows_returned = 0.0; GBOX search_box; /** * Join selectivity algorithm. To calculation the selectivity we * calculate the intersection of the two column sample extents, * sum the results, and then multiply by two since for each * geometry in col 1 that intersects a geometry in col 2, the same * will also be true. */ POSTGIS_DEBUGF(3, "geography_gist_join_selectivity called with jointype %d", jointype); /* * We'll only respond to an inner join/unknown context join */ if (jointype != JOIN_INNER) { elog(NOTICE, "geography_gist_join_selectivity called with incorrect join type"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * Determine the oids of the geometry columns we are working with */ arg1 = (Node *) linitial(args); arg2 = (Node *) lsecond(args); if (!IsA(arg1, Var) || !IsA(arg2, Var)) { elog(DEBUG1, "geography_gist_join_selectivity called with arguments that are not column references"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } var1 = (Var *)arg1; var2 = (Var *)arg2; relid1 = getrelid(var1->varno, root->parse->rtable); relid2 = getrelid(var2->varno, root->parse->rtable); POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2); /* Read the stats tuple from the first column */ stats1_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0); if ( ! stats1_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gs1ptr, &geogstats1_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* Read the stats tuple from the second column */ stats2_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0); if ( ! stats2_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity"); free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gs2ptr, &geogstats2_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity"); free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats2_tuple); ReleaseSysCache(stats1_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /** * Setup the search box - this is the intersection of the two column * extents. */ search_box.xmin = Max(geogstats1->xmin, geogstats2->xmin); search_box.ymin = Max(geogstats1->ymin, geogstats2->ymin); search_box.zmin = Max(geogstats1->zmin, geogstats2->zmin); search_box.xmax = Min(geogstats1->xmax, geogstats2->xmax); search_box.ymax = Min(geogstats1->ymax, geogstats2->ymax); search_box.zmax = Min(geogstats1->zmax, geogstats2->zmax); /* If the extents of the two columns don't intersect, return zero */ if (search_box.xmin > search_box.xmax || search_box.ymin > search_box.ymax || search_box.zmin > search_box.zmax) PG_RETURN_FLOAT8(0.0); POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats1->xmin, geogstats1->ymin, geogstats1->zmin, geogstats1->xmax, geogstats1->ymax, geogstats1->zmax); POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats2->xmin, geogstats2->ymin, geogstats2->zmin, geogstats2->xmax, geogstats2->ymax, geogstats2->zmax); POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax); /* Do the selectivity */ selectivity1 = estimate_selectivity(&search_box, geogstats1); selectivity2 = estimate_selectivity(&search_box, geogstats2); POSTGIS_DEBUGF(3, "selectivity1: %.15g selectivity2: %.15g", selectivity1, selectivity2); /* * OK, so before we calculate the join selectivity we also need to * know the number of tuples in each of the columns since * estimate_selectivity returns the number of estimated tuples * divided by the total number of tuples. */ num1_tuples = geogstats1->totalrows; num2_tuples = geogstats2->totalrows; /* Free the statistic tuples */ free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues); ReleaseSysCache(stats1_tuple); free_attstatsslot(0, NULL, 0, (float *)geogstats2, geogstats2_nvalues); ReleaseSysCache(stats2_tuple); /* * Finally calculate the estimate of the number of rows returned * * = 2 * (nrows from col1 + nrows from col2) / * total nrows in col1 x total nrows in col2 * * The factor of 2 accounts for the fact that for each tuple in * col 1 matching col 2, * there will be another match in col 2 matching col 1 */ total_tuples = num1_tuples * num2_tuples; rows_returned = 2 * ((num1_tuples * selectivity1) + (num2_tuples * selectivity2)); POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1); POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2); POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned); /* * One (or both) tuple count is zero... * We return default selectivity estimate. * We could probably attempt at an estimate * w/out looking at tables tuple count, with * a function of selectivity1, selectivity2. */ if ( ! total_tuples ) { POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( rows_returned > total_tuples ) PG_RETURN_FLOAT8(1.0); PG_RETURN_FLOAT8(rows_returned / total_tuples); }
Datum geography_gist_selectivity(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); /* int varRelid = PG_GETARG_INT32(3); */ Oid relid; HeapTuple stats_tuple; GEOG_STATS *geogstats; /* * This is to avoid casting the corresponding * "type-punned" pointer, which would break * "strict-aliasing rules". */ GEOG_STATS **gsptr=&geogstats; int geogstats_nvalues = 0; Node *other; Var *self; GBOX search_box; float8 selectivity = 0; POSTGIS_DEBUG(2, "geography_gist_selectivity called"); /* Fail if not a binary opclause (probably shouldn't happen) */ if (list_length(args) != 2) { POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * This selectivity function is invoked by a clause of the form <arg> && <arg> * * In typical usage, one argument will be a column reference, while the other will * be a geography constant; set self to point to the column argument and other * to point to the constant argument. */ other = (Node *) linitial(args); if ( ! IsA(other, Const) ) { self = (Var *)other; other = (Node *) lsecond(args); } else { self = (Var *) lsecond(args); } if ( ! IsA(other, Const) ) { POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } /* * We don't have a nice <const> && <var> or <var> && <const> * situation here. <const> && <const> would probably get evaluated * away by PgSQL earlier on. <func> && <const> is harder, and the * case we get often is <const> && ST_Expand(<var>), which does * actually have a subtly different selectivity than a bae * <const> && <var> call. It's calculatable though, by expanding * every cell in the histgram appropriately. * * Discussion: http://trac.osgeo.org/postgis/ticket/1828 * * To do? Do variable selectivity based on the <func> node. */ if ( ! IsA(self, Var) ) { POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity"); // PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); PG_RETURN_FLOAT8(0.33333); } /* Convert coordinates to 3D geodesic */ search_box.flags = 1; FLAGS_SET_GEODETIC(search_box.flags, 1); if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) { POSTGIS_DEBUG(3, " search box cannot be calculated"); PG_RETURN_FLOAT8(0.0); } POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax); /* * Get pg_statistic row */ relid = getrelid(self->varno, root->parse->rtable); stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0); if ( ! stats_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default estimate"); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gsptr, &geogstats_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity"); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL); } POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues); POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin); POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax); POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx); POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy); POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz); POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage); POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells); /* * Do the estimation */ selectivity = estimate_selectivity(&search_box, geogstats); POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(selectivity); }
Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); /* Oid operator = PG_GETARG_OID(1); */ List *args = (List *) PG_GETARG_POINTER(2); /* int varRelid = PG_GETARG_INT32(3); */ Oid relid; HeapTuple stats_tuple; GEOM_STATS *geomstats; /* * This is to avoid casting the corresponding * "type-punned" pointer, which would break * "strict-aliasing rules". */ GEOM_STATS **gsptr=&geomstats; int geomstats_nvalues=0; Node *other; Var *self; GBOX search_box; float8 selectivity=0; POSTGIS_DEBUG(2, "geometry_gist_sel called"); /* Fail if not a binary opclause (probably shouldn't happen) */ if (list_length(args) != 2) { POSTGIS_DEBUG(3, "geometry_gist_sel: not a binary opclause"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * Find the constant part */ other = (Node *) linitial(args); if ( ! IsA(other, Const) ) { self = (Var *)other; other = (Node *) lsecond(args); } else { self = (Var *) lsecond(args); } if ( ! IsA(other, Const) ) { POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * We are working on two constants.. * TODO: check if expression is true, * returned set would be either * the whole or none. */ if ( ! IsA(self, Var) ) { POSTGIS_DEBUG(3, " no variable argument ? - returning default selectivity"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } /* * Convert the constant to a BOX */ if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) ) { POSTGIS_DEBUG(3, "search box is EMPTY"); PG_RETURN_FLOAT8(0.0); } POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax); /* * Get pg_statistic row */ relid = getrelid(self->varno, root->parse->rtable); stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0); if ( ! stats_tuple ) { POSTGIS_DEBUG(3, " No statistics, returning default estimate"); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL, #if POSTGIS_PGSQL_VERSION >= 85 NULL, #endif (float4 **)gsptr, &geomstats_nvalues) ) { POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity"); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL); } POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues); POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f", geomstats->xmin, geomstats->ymin); POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f", geomstats->xmax, geomstats->ymax); POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows); POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols); POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea); POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells); /* * Do the estimation */ selectivity = estimate_selectivity(&search_box, geomstats); POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity); free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues); ReleaseSysCache(stats_tuple); PG_RETURN_FLOAT8(selectivity); }