/* * Join selectivity estimation for the subnet inclusion/overlap operators * * This function has the same structure as eqjoinsel() in selfuncs.c. * * Throughout networkjoinsel and its subroutines, we have a performance issue * in that the amount of work to be done is O(N^2) in the length of the MCV * and histogram arrays. To keep the runtime from getting out of hand when * large statistics targets have been set, we arbitrarily limit the number of * values considered to 1024 (MAX_CONSIDERED_ELEMS). For the MCV arrays, this * is easy: just consider at most the first N elements. (Since the MCVs are * sorted by decreasing frequency, this correctly gets us the first N MCVs.) * For the histogram arrays, we decimate; that is consider only every k'th * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS * elements are considered. This should still give us a good random sample of * the non-MCV population. Decimation is done on-the-fly in the loops that * iterate over the histogram arrays. */ Datum networkjoinsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); #ifdef NOT_USED JoinType jointype = (JoinType) PG_GETARG_INT16(3); #endif SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4); double selec; VariableStatData vardata1; VariableStatData vardata2; bool join_is_reversed; get_join_variables(root, args, sjinfo, &vardata1, &vardata2, &join_is_reversed); switch (sjinfo->jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_FULL: /* * Selectivity for left/full join is not exactly the same as inner * join, but we neglect the difference, as eqjoinsel does. */ selec = networkjoinsel_inner(operator, &vardata1, &vardata2); break; case JOIN_SEMI: case JOIN_ANTI: /* Here, it's important that we pass the outer var on the left. */ if (!join_is_reversed) selec = networkjoinsel_semi(operator, &vardata1, &vardata2); else selec = networkjoinsel_semi(get_commutator(operator), &vardata2, &vardata1); break; default: /* other values not expected here */ elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype); selec = 0; /* keep compiler quiet */ break; } ReleaseVariableStats(vardata1); ReleaseVariableStats(vardata2); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
/* * scalararraysel_containment * Estimate selectivity of ScalarArrayOpExpr via array containment. * * If we have const =/<> ANY/ALL (array_var) then we can estimate the * selectivity as though this were an array containment operator, * array_var op ARRAY[const]. * * scalararraysel() has already verified that the ScalarArrayOpExpr's operator * is the array element type's default equality or inequality operator, and * has aggressively simplified both inputs to constants. * * Returns selectivity (0..1), or -1 if we fail to estimate selectivity. */ Selectivity scalararraysel_containment(PlannerInfo *root, Node *leftop, Node *rightop, Oid elemtype, bool isEquality, bool useOr, int varRelid) { Selectivity selec; VariableStatData vardata; Datum constval; TypeCacheEntry *typentry; FmgrInfo *cmpfunc; /* * rightop must be a variable, else punt. */ examine_variable(root, rightop, varRelid, &vardata); if (!vardata.rel) { ReleaseVariableStats(vardata); return -1.0; } /* * leftop must be a constant, else punt. */ if (!IsA(leftop, Const)) { ReleaseVariableStats(vardata); return -1.0; } if (((Const *) leftop)->constisnull) { /* qual can't succeed if null on left */ ReleaseVariableStats(vardata); return (Selectivity) 0.0; } constval = ((Const *) leftop)->constvalue; /* Get element type's default comparison function */ typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) { ReleaseVariableStats(vardata); return -1.0; } cmpfunc = &typentry->cmp_proc_finfo; /* * If the operator is <>, swap ANY/ALL, then invert the result later. */ if (!isEquality) useOr = !useOr; /* Get array element stats for var, if available */ if (HeapTupleIsValid(vardata.statsTuple) && statistic_proc_security_check(&vardata, cmpfunc->fn_oid)) { Form_pg_statistic stats; AttStatsSlot sslot; AttStatsSlot hslot; stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); /* MCELEM will be an array of same type as element */ if (get_attstatsslot(&sslot, vardata.statsTuple, STATISTIC_KIND_MCELEM, InvalidOid, ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) { /* For ALL case, also get histogram of distinct-element counts */ if (useOr || !get_attstatsslot(&hslot, vardata.statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, ATTSTATSSLOT_NUMBERS)) memset(&hslot, 0, sizeof(hslot)); /* * For = ANY, estimate as var @> ARRAY[const]. * * For = ALL, estimate as var <@ ARRAY[const]. */ if (useOr) selec = mcelem_array_contain_overlap_selec(sslot.values, sslot.nvalues, sslot.numbers, sslot.nnumbers, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(sslot.values, sslot.nvalues, sslot.numbers, sslot.nnumbers, &constval, 1, hslot.numbers, hslot.nnumbers, OID_ARRAY_CONTAINED_OP, cmpfunc); free_attstatsslot(&hslot); free_attstatsslot(&sslot); } else { /* No most-common-elements info, so do without */ if (useOr) selec = mcelem_array_contain_overlap_selec(NULL, 0, NULL, 0, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(NULL, 0, NULL, 0, &constval, 1, NULL, 0, OID_ARRAY_CONTAINED_OP, cmpfunc); } /* * MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ if (useOr) selec = mcelem_array_contain_overlap_selec(NULL, 0, NULL, 0, &constval, 1, OID_ARRAY_CONTAINS_OP, cmpfunc); else selec = mcelem_array_contained_selec(NULL, 0, NULL, 0, &constval, 1, NULL, 0, OID_ARRAY_CONTAINED_OP, cmpfunc); /* we assume no nulls here, so no stanullfrac correction */ } ReleaseVariableStats(vardata); /* * If the operator is <>, invert the results. */ if (!isEquality) selec = 1.0 - selec; CLAMP_PROBABILITY(selec); return selec; }
/* * Estimate selectivity of "column <@ const" based on most common element * statistics. * * mcelem (of length nmcelem) and numbers (of length nnumbers) are from * the array column's MCELEM statistics slot, or are NULL/0 if stats are * not available. array_data (of length nitems) is the constant's elements. * hist (of length nhist) is from the array column's DECHIST statistics slot, * or is NULL/0 if those stats are not available. * * Both the mcelem and array_data arrays are assumed presorted according * to the element type's cmpfunc. Null elements are not present. * * Independent element occurrence would imply a particular distribution of * distinct element counts among matching rows. Real data usually falsifies * that assumption. For example, in a set of 11-element integer arrays having * elements in the range [0..10], element occurrences are typically not * independent. If they were, a sufficiently-large set would include all * distinct element counts 0 through 11. We correct for this using the * histogram of distinct element counts. * * In the "column @> const" and "column && const" cases, we usually have a * "const" with low number of elements (otherwise we have selectivity close * to 0 or 1 respectively). That's why the effect of dependence related * to distinct element count distribution is negligible there. In the * "column <@ const" case, number of elements is usually high (otherwise we * have selectivity close to 0). That's why we should do a correction with * the array distinct element count distribution here. * * Using the histogram of distinct element counts produces a different * distribution law than independent occurrences of elements. This * distribution law can be described as follows: * * P(o1, o2, ..., on) = f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * * (1 - f2)^(1 - o2) * ... * fn^on * (1 - fn)^(1 - on) * hist[m] / ind[m] * * where: * o1, o2, ..., on - occurrences of elements 1, 2, ..., n * (1 - occurrence, 0 - no occurrence) in row * f1, f2, ..., fn - frequencies of elements 1, 2, ..., n * (scalar values in [0..1]) according to collected statistics * m = o1 + o2 + ... + on = total number of distinct elements in row * hist[m] - histogram data for occurrence of m elements. * ind[m] - probability of m occurrences from n events assuming their * probabilities to be equal to frequencies of array elements. * * ind[m] = sum(f1^o1 * (1 - f1)^(1 - o1) * f2^o2 * (1 - f2)^(1 - o2) * * ... * fn^on * (1 - fn)^(1 - on), o1, o2, ..., on) | o1 + o2 + .. on = m */ static Selectivity mcelem_array_contained_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, float4 *hist, int nhist, Oid operator, FmgrInfo *cmpfunc) { int mcelem_index, i, unique_nitems = 0; float selec, minfreq, nullelem_freq; float *dist, *mcelem_dist, *hist_part; float avg_count, mult, rest; float *elem_selec; /* * There should be three more Numbers than Values in the MCELEM slot, * because the last three cells should hold minimal and maximal frequency * among the non-null elements, and then the frequency of null elements. * Punt if not right, because we can't do much without the element freqs. */ if (numbers == NULL || nnumbers != nmcelem + 3) return DEFAULT_CONTAIN_SEL; /* Can't do much without a count histogram, either */ if (hist == NULL || nhist < 3) return DEFAULT_CONTAIN_SEL; /* * Grab some of the summary statistics that compute_array_stats() stores: * lowest frequency, frequency of null elements, and average distinct * element count. */ minfreq = numbers[nmcelem]; nullelem_freq = numbers[nmcelem + 2]; avg_count = hist[nhist - 1]; /* * "rest" will be the sum of the frequencies of all elements not * represented in MCELEM. The average distinct element count is the sum * of the frequencies of *all* elements. Begin with that; we will proceed * to subtract the MCELEM frequencies. */ rest = avg_count; /* * mult is a multiplier representing estimate of probability that each * mcelem that is not present in constant doesn't occur. */ mult = 1.0f; /* * elem_selec is array of estimated frequencies for elements in the * constant. */ elem_selec = (float *) palloc(sizeof(float) * nitems); /* Scan mcelem and array in parallel. */ mcelem_index = 0; for (i = 0; i < nitems; i++) { bool match = false; /* Ignore any duplicates in the array data. */ if (i > 0 && element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0) continue; /* * Iterate over MCELEM until we find an entry greater than or equal to * this element of the constant. Update "rest" and "mult" for mcelem * entries skipped over. */ while (mcelem_index < nmcelem) { int cmp = element_compare(&mcelem[mcelem_index], &array_data[i], cmpfunc); if (cmp < 0) { mult *= (1.0f - numbers[mcelem_index]); rest -= numbers[mcelem_index]; mcelem_index++; } else { if (cmp == 0) match = true; /* mcelem is found */ break; } } if (match) { /* MCELEM matches the array item. */ elem_selec[unique_nitems] = numbers[mcelem_index]; /* "rest" is decremented for all mcelems, matched or not */ rest -= numbers[mcelem_index]; mcelem_index++; } else { /* * The element is not in MCELEM. Punt, but assume that the * selectivity cannot be more than minfreq / 2. */ elem_selec[unique_nitems] = Min(DEFAULT_CONTAIN_SEL, minfreq / 2); } unique_nitems++; } /* * If we handled all constant elements without exhausting the MCELEM * array, finish walking it to complete calculation of "rest" and "mult". */ while (mcelem_index < nmcelem) { mult *= (1.0f - numbers[mcelem_index]); rest -= numbers[mcelem_index]; mcelem_index++; } /* * The presence of many distinct rare elements materially decreases * selectivity. Use the Poisson distribution to estimate the probability * of a column value having zero occurrences of such elements. See above * for the definition of "rest". */ mult *= exp(-rest); /*---------- * Using the distinct element count histogram requires * O(unique_nitems * (nmcelem + unique_nitems)) * operations. Beyond a certain computational cost threshold, it's * reasonable to sacrifice accuracy for decreased planning time. We limit * the number of operations to EFFORT * nmcelem; since nmcelem is limited * by the column's statistics target, the work done is user-controllable. * * If the number of operations would be too large, we can reduce it * without losing all accuracy by reducing unique_nitems and considering * only the most-common elements of the constant array. To make the * results exactly match what we would have gotten with only those * elements to start with, we'd have to remove any discarded elements' * frequencies from "mult", but since this is only an approximation * anyway, we don't bother with that. Therefore it's sufficient to qsort * elem_selec[] and take the largest elements. (They will no longer match * up with the elements of array_data[], but we don't care.) *---------- */ #define EFFORT 100 if ((nmcelem + unique_nitems) > 0 && unique_nitems > EFFORT * nmcelem / (nmcelem + unique_nitems)) { /* * Use the quadratic formula to solve for largest allowable N. We * have A = 1, B = nmcelem, C = - EFFORT * nmcelem. */ double b = (double) nmcelem; int n; n = (int) ((sqrt(b * b + 4 * EFFORT * b) - b) / 2); /* Sort, then take just the first n elements */ qsort(elem_selec, unique_nitems, sizeof(float), float_compare_desc); unique_nitems = n; } /* * Calculate probabilities of each distinct element count for both mcelems * and constant elements. At this point, assume independent element * occurrence. */ dist = calc_distr(elem_selec, unique_nitems, unique_nitems, 0.0f); mcelem_dist = calc_distr(numbers, nmcelem, unique_nitems, rest); /* ignore hist[nhist-1], which is the average not a histogram member */ hist_part = calc_hist(hist, nhist - 1, unique_nitems); selec = 0.0f; for (i = 0; i <= unique_nitems; i++) { /* * mult * dist[i] / mcelem_dist[i] gives us probability of qual * matching from assumption of independent element occurrence with the * condition that distinct element count = i. */ if (mcelem_dist[i] > 0) selec += hist_part[i] * mult * dist[i] / mcelem_dist[i]; } pfree(dist); pfree(mcelem_dist); pfree(hist_part); pfree(elem_selec); /* Take into account occurrence of NULL element. */ selec *= (1.0f - nullelem_freq); CLAMP_PROBABILITY(selec); return selec; }
/* * Estimate selectivity of "column @> const" and "column && const" based on * most common element statistics. This estimation assumes element * occurrences are independent. * * mcelem (of length nmcelem) and numbers (of length nnumbers) are from * the array column's MCELEM statistics slot, or are NULL/0 if stats are * not available. array_data (of length nitems) is the constant's elements. * * Both the mcelem and array_data arrays are assumed presorted according * to the element type's cmpfunc. Null elements are not present. * * TODO: this estimate probably could be improved by using the distinct * elements count histogram. For example, excepting the special case of * "column @> '{}'", we can multiply the calculated selectivity by the * fraction of nonempty arrays in the column. */ static Selectivity mcelem_array_contain_overlap_selec(Datum *mcelem, int nmcelem, float4 *numbers, int nnumbers, Datum *array_data, int nitems, Oid operator, FmgrInfo *cmpfunc) { Selectivity selec, elem_selec; int mcelem_index, i; bool use_bsearch; float4 minfreq; /* * There should be three more Numbers than Values, because the last three * cells should hold minimal and maximal frequency among the non-null * elements, and then the frequency of null elements. Ignore the Numbers * if not right. */ if (nnumbers != nmcelem + 3) { numbers = NULL; nnumbers = 0; } if (numbers) { /* Grab the lowest observed frequency */ minfreq = numbers[nmcelem]; } else { /* Without statistics make some default assumptions */ minfreq = 2 * (float4) DEFAULT_CONTAIN_SEL; } /* Decide whether it is faster to use binary search or not. */ if (nitems * floor_log2((uint32) nmcelem) < nmcelem + nitems) use_bsearch = true; else use_bsearch = false; if (operator == OID_ARRAY_CONTAINS_OP) { /* * Initial selectivity for "column @> const" query is 1.0, and it will * be decreased with each element of constant array. */ selec = 1.0; } else { /* * Initial selectivity for "column && const" query is 0.0, and it will * be increased with each element of constant array. */ selec = 0.0; } /* Scan mcelem and array in parallel. */ mcelem_index = 0; for (i = 0; i < nitems; i++) { bool match = false; /* Ignore any duplicates in the array data. */ if (i > 0 && element_compare(&array_data[i - 1], &array_data[i], cmpfunc) == 0) continue; /* Find the smallest MCELEM >= this array item. */ if (use_bsearch) { match = find_next_mcelem(mcelem, nmcelem, array_data[i], &mcelem_index, cmpfunc); } else { while (mcelem_index < nmcelem) { int cmp = element_compare(&mcelem[mcelem_index], &array_data[i], cmpfunc); if (cmp < 0) mcelem_index++; else { if (cmp == 0) match = true; /* mcelem is found */ break; } } } if (match && numbers) { /* MCELEM matches the array item; use its frequency. */ elem_selec = numbers[mcelem_index]; mcelem_index++; } else { /* * The element is not in MCELEM. Punt, but assume that the * selectivity cannot be more than minfreq / 2. */ elem_selec = Min(DEFAULT_CONTAIN_SEL, minfreq / 2); } /* * Update overall selectivity using the current element's selectivity * and an assumption of element occurrence independence. */ if (operator == OID_ARRAY_CONTAINS_OP) selec *= elem_selec; else selec = selec + elem_selec - selec * elem_selec; /* Clamp intermediate results to stay sane despite roundoff error */ CLAMP_PROBABILITY(selec); } return selec; }
/* * arraycontsel -- restriction selectivity for array @>, &&, <@ operators */ Datum arraycontsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec; Oid element_typeid; /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } /* * The "&&", "@>" and "<@" operators are strict, so we can cope with a * NULL constant right away. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } /* * If var is on the right, commute the operator, so that we can assume the * var is on the left in what follows. */ if (!varonleft) { if (operator == OID_ARRAY_CONTAINS_OP) operator = OID_ARRAY_CONTAINED_OP; else if (operator == OID_ARRAY_CONTAINED_OP) operator = OID_ARRAY_CONTAINS_OP; } /* * OK, there's a Var and a Const we're dealing with here. We need the * Const to be an array with same element type as column, else we can't do * anything useful. (Such cases will likely fail at runtime, but here * we'd rather just return a default estimate.) */ element_typeid = get_base_element_type(((Const *) other)->consttype); if (element_typeid != InvalidOid && element_typeid == get_base_element_type(vardata.vartype)) { selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue, element_typeid, operator); } else { selec = DEFAULT_SEL(operator); } ReleaseVariableStats(vardata); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
/* * ltreeparentsel - Selectivity of parent relationship for ltree data types. */ Datum ltreeparentsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; double selec; /* * If expression is not variable <@ something or something <@ variable, * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL); /* * If the something is a NULL constant, assume operator is strict and * return zero, ie, operator will never return TRUE. */ if (IsA(other, Const) && ((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } if (IsA(other, Const)) { /* Variable is being compared to a known non-null constant */ Datum constval = ((Const *) other)->constvalue; FmgrInfo contproc; double mcvsum; double mcvsel; double nullfrac; int hist_size; fmgr_info(get_opcode(operator), &contproc); /* * Is the constant "<@" to any of the column's most common values? */ mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft, &mcvsum); /* * If the histogram is large enough, see what fraction of it the * constant is "<@" to, and assume that's representative of the * non-MCV population. Otherwise use the default selectivity for the * non-MCV population. */ selec = histogram_selectivity(&vardata, &contproc, constval, varonleft, 10, 1, &hist_size); if (selec < 0) { /* Nope, fall back on default */ selec = DEFAULT_PARENT_SEL; } else if (hist_size < 100) { /* * For histogram sizes from 10 to 100, we combine the histogram * and default selectivities, putting increasingly more trust in * the histogram for larger sizes. */ double hist_weight = hist_size / 100.0; selec = selec * hist_weight + DEFAULT_PARENT_SEL * (1.0 - hist_weight); } /* In any case, don't believe extremely small or large estimates. */ if (selec < 0.0001) selec = 0.0001; else if (selec > 0.9999) selec = 0.9999; if (HeapTupleIsValid(vardata.statsTuple)) nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac; else nullfrac = 0.0; /* * Now merge the results from the MCV and histogram calculations, * realizing that the histogram covers only the non-null values that * are not listed in MCV. */ selec *= 1.0 - nullfrac - mcvsum; selec += mcvsel; } else selec = DEFAULT_PARENT_SEL; ReleaseVariableStats(vardata); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
static double calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata, RangeType *constval, Oid operator) { double hist_selec; double selec; float4 empty_frac, null_frac; /* * First look up the fraction of NULLs and empty ranges from pg_statistic. */ if (HeapTupleIsValid(vardata->statsTuple)) { Form_pg_statistic stats; float4 *numbers; int nnumbers; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); null_frac = stats->stanullfrac; /* Try to get fraction of empty ranges */ if (get_attstatsslot(vardata->statsTuple, vardata->atttype, vardata->atttypmod, STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, InvalidOid, NULL, NULL, NULL, &numbers, &nnumbers)) { if (nnumbers != 1) elog(ERROR, "invalid empty fraction statistic"); /* shouldn't happen */ empty_frac = numbers[0]; } else { /* No empty fraction statistic. Assume no empty ranges. */ empty_frac = 0.0; } } else { /* * No stats are available. Follow through the calculations below * anyway, assuming no NULLs and no empty ranges. This still allows us * to give a better-than-nothing estimate based on whether the * constant is an empty range or not. */ null_frac = 0.0; empty_frac = 0.0; } if (RangeIsEmpty(constval)) { /* * An empty range matches all ranges, all empty ranges, or nothing, * depending on the operator */ switch (operator) { /* these return false if either argument is empty */ case OID_RANGE_OVERLAP_OP: case OID_RANGE_OVERLAPS_LEFT_OP: case OID_RANGE_OVERLAPS_RIGHT_OP: case OID_RANGE_LEFT_OP: case OID_RANGE_RIGHT_OP: /* nothing is less than an empty range */ case OID_RANGE_LESS_OP: selec = 0.0; break; /* only empty ranges can be contained by an empty range */ case OID_RANGE_CONTAINED_OP: /* only empty ranges are <= an empty range */ case OID_RANGE_LESS_EQUAL_OP: selec = empty_frac; break; /* everything contains an empty range */ case OID_RANGE_CONTAINS_OP: /* everything is >= an empty range */ case OID_RANGE_GREATER_EQUAL_OP: selec = 1.0; break; /* all non-empty ranges are > an empty range */ case OID_RANGE_GREATER_OP: selec = 1.0 - empty_frac; break; /* an element cannot be empty */ case OID_RANGE_CONTAINS_ELEM_OP: default: elog(ERROR, "unexpected operator %u", operator); selec = 0.0; /* keep compiler quiet */ break; } } else { /* * Calculate selectivity using bound histograms. If that fails for * some reason, e.g no histogram in pg_statistic, use the default * constant estimate for the fraction of non-empty values. This is * still somewhat better than just returning the default estimate, * because this still takes into account the fraction of empty and * NULL tuples, if we had statistics for them. */ hist_selec = calc_hist_selectivity(typcache, vardata, constval, operator); if (hist_selec < 0.0) hist_selec = default_range_selectivity(operator); /* * Now merge the results for the empty ranges and histogram * calculations, realizing that the histogram covers only the * non-null, non-empty values. */ if (operator == OID_RANGE_CONTAINED_OP) { /* empty is contained by anything non-empty */ selec = (1.0 - empty_frac) * hist_selec + empty_frac; } else { /* with any other operator, empty Op non-empty matches nothing */ selec = (1.0 - empty_frac) * hist_selec; } } /* all range operators are strict */ selec *= (1.0 - null_frac); /* result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); return selec; }
/* * rangesel -- restriction selectivity for range operators */ Datum rangesel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec; TypeCacheEntry *typcache = NULL; RangeType *constrange = NULL; /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(default_range_selectivity(operator)); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(default_range_selectivity(operator)); } /* * All the range operators are strict, so we can cope with a NULL constant * right away. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } /* * If var is on the right, commute the operator, so that we can assume the * var is on the left in what follows. */ if (!varonleft) { /* we have other Op var, commute to make var Op other */ operator = get_commutator(operator); if (!operator) { /* Use default selectivity (should we raise an error instead?) */ ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(default_range_selectivity(operator)); } } /* * OK, there's a Var and a Const we're dealing with here. We need the * Const to be of same range type as the column, else we can't do anything * useful. (Such cases will likely fail at runtime, but here we'd rather * just return a default estimate.) * * If the operator is "range @> element", the constant should be of the * element type of the range column. Convert it to a range that includes * only that single point, so that we don't need special handling for that * in what follows. */ if (operator == OID_RANGE_CONTAINS_ELEM_OP) { typcache = range_get_typcache(fcinfo, vardata.vartype); if (((Const *) other)->consttype == typcache->rngelemtype->type_id) { RangeBound lower, upper; lower.inclusive = true; lower.val = ((Const *) other)->constvalue; lower.infinite = false; lower.lower = true; upper.inclusive = true; upper.val = ((Const *) other)->constvalue; upper.infinite = false; upper.lower = false; constrange = range_serialize(typcache, &lower, &upper, false); } } else if (operator == OID_RANGE_ELEM_CONTAINED_OP) { /* * Here, the Var is the elem, not the range. For now we just punt and * return the default estimate. In future we could disassemble the * range constant and apply scalarineqsel ... */ } else if (((Const *) other)->consttype == vardata.vartype) { /* Both sides are the same range type */ typcache = range_get_typcache(fcinfo, vardata.vartype); constrange = DatumGetRangeType(((Const *) other)->constvalue); } /* * If we got a valid constant on one side of the operator, proceed to * estimate using statistics. Otherwise punt and return a default constant * estimate. Note that calc_rangesel need not handle * OID_RANGE_ELEM_CONTAINED_OP. */ if (constrange) selec = calc_rangesel(typcache, &vardata, constrange, operator); else selec = default_range_selectivity(operator); ReleaseVariableStats(vardata); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
/* * Estimate selectivity of single intquery operator */ static Selectivity int_query_opr_selec(ITEM *item, Datum *mcelems, float4 *mcefreqs, int nmcelems, float4 minfreq) { Selectivity selec; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (item->type == VAL) { Datum *searchres; if (mcelems == NULL) return (Selectivity) DEFAULT_EQ_SEL; searchres = (Datum *) bsearch(&item->val, mcelems, nmcelems, sizeof(Datum), compare_val_int4); if (searchres) { /* * The element is in MCELEM. Return precise selectivity (or at * least as precise as ANALYZE could find out). */ selec = mcefreqs[searchres - mcelems]; } else { /* * The element is not in MCELEM. Punt, but assume that the * selectivity cannot be more than minfreq / 2. */ selec = Min(DEFAULT_EQ_SEL, minfreq / 2); } } else if (item->type == OPR) { /* Current query node is an operator */ Selectivity s1, s2; s1 = int_query_opr_selec(item - 1, mcelems, mcefreqs, nmcelems, minfreq); switch (item->val) { case (int32) '!': selec = 1.0 - s1; break; case (int32) '&': s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs, nmcelems, minfreq); selec = s1 * s2; break; case (int32) '|': s2 = int_query_opr_selec(item + item->left, mcelems, mcefreqs, nmcelems, minfreq); selec = s1 + s2 - s1 * s2; break; default: elog(ERROR, "unrecognized operator: %d", item->val); selec = 0; /* keep compiler quiet */ break; } } else { elog(ERROR, "unrecognized int query item type: %u", item->type); selec = 0; /* keep compiler quiet */ } /* Clamp intermediate results to stay sane despite roundoff error */ CLAMP_PROBABILITY(selec); return selec; }
/* * _int_matchsel -- restriction selectivity function for intarray @@ query_int */ Datum _int_matchsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec; QUERYTYPE *query; Datum *mcelems = NULL; float4 *mcefreqs = NULL; int nmcelems = 0; float4 minfreq = 0.0; float4 nullfrac = 0.0; Form_pg_statistic stats; Datum *values = NULL; int nvalues = 0; float4 *numbers = NULL; int nnumbers = 0; /* * If expression is not "variable @@ something" or "something @@ variable" * then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * Variable should be int[]. We don't support cases where variable is * query_int. */ if (vardata.vartype != INT4ARRAYOID) PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_EQ_SEL); } /* * The "@@" operator is strict, so we can cope with NULL right away. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } /* The caller made sure the const is a query, so get it now */ query = DatumGetQueryTypeP(((Const *) other)->constvalue); /* Empty query matches nothing */ if (query->size == 0) { ReleaseVariableStats(vardata); return (Selectivity) 0.0; } /* * Get the statistics for the intarray column. * * We're interested in the Most-Common-Elements list, and the NULL * fraction. */ if (HeapTupleIsValid(vardata.statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); nullfrac = stats->stanullfrac; /* * For an int4 array, the default array type analyze function will * collect a Most Common Elements list, which is an array of int4s. */ if (get_attstatsslot(vardata.statsTuple, INT4OID, -1, STATISTIC_KIND_MCELEM, InvalidOid, NULL, &values, &nvalues, &numbers, &nnumbers)) { /* * There should be three more Numbers than Values, because the * last three (for intarray) cells are taken for minimal, maximal * and nulls frequency. Punt if not. */ if (nnumbers == nvalues + 3) { /* Grab the lowest frequency. */ minfreq = numbers[nnumbers - (nnumbers - nvalues)]; mcelems = values; mcefreqs = numbers; nmcelems = nvalues; } } } /* Process the logical expression in the query, using the stats */ selec = int_query_opr_selec(GETQUERY(query) + query->size - 1, mcelems, mcefreqs, nmcelems, minfreq); /* MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - nullfrac); free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers); ReleaseVariableStats(vardata); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
/* * tsmatchsel -- Selectivity of "@@" * * restriction selectivity function for tsvector @@ tsquery and * tsquery @@ tsvector */ Datum tsmatchsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); #ifdef NOT_USED Oid operator = PG_GETARG_OID(1); #endif List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec; /* * If expression is not variable = something or something = variable, then * punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL); } /* * The "@@" operator is strict, so we can cope with NULL right away */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } /* * OK, there's a Var and a Const we're dealing with here. We need the * Const to be a TSQuery, else we can't do anything useful. We have to * check this because the Var might be the TSQuery not the TSVector. */ if (((Const *) other)->consttype == TSQUERYOID) { /* tsvector @@ tsquery or the other way around */ Assert(vardata.vartype == TSVECTOROID); selec = tsquerysel(&vardata, ((Const *) other)->constvalue); } else { /* If we can't see the query structure, must punt */ selec = DEFAULT_TS_MATCH_SEL; } ReleaseVariableStats(vardata); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
/* * Traverse the tsquery in preorder, calculating selectivity as: * * selec(left_oper) * selec(right_oper) in AND & PHRASE nodes, * * selec(left_oper) + selec(right_oper) - * selec(left_oper) * selec(right_oper) in OR nodes, * * 1 - select(oper) in NOT nodes * * histogram-based estimation in prefix VAL nodes * * freq[val] in exact VAL nodes, if the value is in MCELEM * min(freq[MCELEM]) / 2 in VAL nodes, if it is not * * The MCELEM array is already sorted (see ts_typanalyze.c), so we can use * binary search for determining freq[MCELEM]. * * If we don't have stats for the tsvector, we still use this logic, * except we use default estimates for VAL nodes. This case is signaled * by lookup == NULL. */ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand, TextFreq *lookup, int length, float4 minfreq) { Selectivity selec; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (item->type == QI_VAL) { QueryOperand *oper = (QueryOperand *) item; LexemeKey key; /* * Prepare the key for bsearch(). */ key.lexeme = operand + oper->distance; key.length = oper->length; if (oper->prefix) { /* Prefix match, ie the query item is lexeme:* */ Selectivity matched, allmces; int i, n_matched; /* * Our strategy is to scan through the MCELEM list and combine the * frequencies of the ones that match the prefix. We then * extrapolate the fraction of matching MCELEMs to the remaining * rows, assuming that the MCELEMs are representative of the whole * lexeme population in this respect. (Compare * histogram_selectivity().) Note that these are most common * elements not most common values, so they're not mutually * exclusive. We treat occurrences as independent events. * * This is only a good plan if we have a pretty fair number of * MCELEMs available; we set the threshold at 100. If no stats or * insufficient stats, arbitrarily use DEFAULT_TS_MATCH_SEL*4. */ if (lookup == NULL || length < 100) return (Selectivity) (DEFAULT_TS_MATCH_SEL * 4); matched = allmces = 0; n_matched = 0; for (i = 0; i < length; i++) { TextFreq *t = lookup + i; int tlen = VARSIZE_ANY_EXHDR(t->element); if (tlen >= key.length && strncmp(key.lexeme, VARDATA_ANY(t->element), key.length) == 0) { matched += t->frequency - matched * t->frequency; n_matched++; } allmces += t->frequency - allmces * t->frequency; } /* Clamp to ensure sanity in the face of roundoff error */ CLAMP_PROBABILITY(matched); CLAMP_PROBABILITY(allmces); selec = matched + (1.0 - allmces) * ((double) n_matched / length); /* * In any case, never believe that a prefix match has selectivity * less than we would assign for a non-MCELEM lexeme. This * preserves the property that "word:*" should be estimated to * match at least as many rows as "word" would be. */ selec = Max(Min(DEFAULT_TS_MATCH_SEL, minfreq / 2), selec); } else { /* Regular exact lexeme match */ TextFreq *searchres; /* If no stats for the variable, use DEFAULT_TS_MATCH_SEL */ if (lookup == NULL) return (Selectivity) DEFAULT_TS_MATCH_SEL; searchres = (TextFreq *) bsearch(&key, lookup, length, sizeof(TextFreq), compare_lexeme_textfreq); if (searchres) { /* * The element is in MCELEM. Return precise selectivity (or * at least as precise as ANALYZE could find out). */ selec = searchres->frequency; } else { /* * The element is not in MCELEM. Punt, but assume that the * selectivity cannot be more than minfreq / 2. */ selec = Min(DEFAULT_TS_MATCH_SEL, minfreq / 2); } } } else { /* Current TSQuery node is an operator */ Selectivity s1, s2; switch (item->qoperator.oper) { case OP_NOT: selec = 1.0 - tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); break; case OP_PHRASE: case OP_AND: s1 = tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); s2 = tsquery_opr_selec(item + item->qoperator.left, operand, lookup, length, minfreq); selec = s1 * s2; break; case OP_OR: s1 = tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); s2 = tsquery_opr_selec(item + item->qoperator.left, operand, lookup, length, minfreq); selec = s1 + s2 - s1 * s2; break; default: elog(ERROR, "unrecognized operator: %d", item->qoperator.oper); selec = 0; /* keep compiler quiet */ break; } } /* Clamp intermediate results to stay sane despite roundoff error */ CLAMP_PROBABILITY(selec); return selec; }
/* * Selectivity estimation for the subnet inclusion/overlap operators */ Datum networksel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec, mcv_selec, non_mcv_selec; Datum constvalue, *hist_values; int hist_nvalues; Form_pg_statistic stats; double sumcommon, nullfrac; FmgrInfo proc; /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } /* All of the operators handled here are strict. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } constvalue = ((Const *) other)->constvalue; /* Otherwise, we need stats in order to produce a non-default estimate. */ if (!HeapTupleIsValid(vardata.statsTuple)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); nullfrac = stats->stanullfrac; /* * If we have most-common-values info, add up the fractions of the MCV * entries that satisfy MCV OP CONST. These fractions contribute directly * to the result selectivity. Also add up the total fraction represented * by MCV entries. */ fmgr_info(get_opcode(operator), &proc); mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft, &sumcommon); /* * If we have a histogram, use it to estimate the proportion of the * non-MCV population that satisfies the clause. If we don't, apply the * default selectivity to that population. */ if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist_values, &hist_nvalues, NULL, NULL)) { int opr_codenum = inet_opr_codenum(operator); /* Commute if needed, so we can consider histogram to be on the left */ if (!varonleft) opr_codenum = -opr_codenum; non_mcv_selec = inet_hist_value_sel(hist_values, hist_nvalues, constvalue, opr_codenum); free_attstatsslot(vardata.atttype, hist_values, hist_nvalues, NULL, 0); } else non_mcv_selec = DEFAULT_SEL(operator); /* Combine selectivities for MCV and non-MCV populations */ selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec; /* Result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(selec); }
/* * Traverse the tsquery in preorder, calculating selectivity as: * * selec(left_oper) * selec(right_oper) in AND nodes, * * selec(left_oper) + selec(right_oper) - * selec(left_oper) * selec(right_oper) in OR nodes, * * 1 - select(oper) in NOT nodes * * histogram-based estimation in prefix VAL nodes * * freq[val] in exact VAL nodes, if the value is in MCELEM * min(freq[MCELEM]) / 2 in VAL nodes, if it is not * * The MCELEM array is already sorted (see ts_typanalyze.c), so we can use * binary search for determining freq[MCELEM]. * * If we don't have stats for the tsvector, we still use this logic, * except we use default estimates for VAL nodes. This case is signaled * by lookup == NULL. */ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand, TextFreq *lookup, int length, float4 minfreq) { Selectivity selec; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (item->type == QI_VAL) { QueryOperand *oper = (QueryOperand *) item; LexemeKey key; /* * Prepare the key for bsearch(). */ key.lexeme = operand + oper->distance; key.length = oper->length; if (oper->prefix) { /* Prefix match, ie the query item is lexeme:* */ Selectivity matched, allmcvs; int i; /* * Our strategy is to scan through the MCV list and add up the * frequencies of the ones that match the prefix, thereby * assuming that the MCVs are representative of the whole lexeme * population in this respect. Compare histogram_selectivity(). * * This is only a good plan if we have a pretty fair number of * MCVs available; we set the threshold at 100. If no stats or * insufficient stats, arbitrarily use DEFAULT_TS_MATCH_SEL*4. */ if (lookup == NULL || length < 100) return (Selectivity) (DEFAULT_TS_MATCH_SEL * 4); matched = allmcvs = 0; for (i = 0; i < length; i++) { TextFreq *t = lookup + i; int tlen = VARSIZE_ANY_EXHDR(t->element); if (tlen >= key.length && strncmp(key.lexeme, VARDATA_ANY(t->element), key.length) == 0) matched += t->frequency; allmcvs += t->frequency; } if (allmcvs > 0) /* paranoia about zero divide */ selec = matched / allmcvs; else selec = (Selectivity) (DEFAULT_TS_MATCH_SEL * 4); /* * In any case, never believe that a prefix match has selectivity * less than DEFAULT_TS_MATCH_SEL. */ selec = Max(DEFAULT_TS_MATCH_SEL, selec); } else { /* Regular exact lexeme match */ TextFreq *searchres; /* If no stats for the variable, use DEFAULT_TS_MATCH_SEL */ if (lookup == NULL) return (Selectivity) DEFAULT_TS_MATCH_SEL; searchres = (TextFreq *) bsearch(&key, lookup, length, sizeof(TextFreq), compare_lexeme_textfreq); if (searchres) { /* * The element is in MCELEM. Return precise selectivity (or * at least as precise as ANALYZE could find out). */ selec = searchres->frequency; } else { /* * The element is not in MCELEM. Punt, but assume that the * selectivity cannot be more than minfreq / 2. */ selec = Min(DEFAULT_TS_MATCH_SEL, minfreq / 2); } } } else { /* Current TSQuery node is an operator */ Selectivity s1, s2; switch (item->qoperator.oper) { case OP_NOT: selec = 1.0 - tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); break; case OP_AND: s1 = tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); s2 = tsquery_opr_selec(item + item->qoperator.left, operand, lookup, length, minfreq); selec = s1 * s2; break; case OP_OR: s1 = tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); s2 = tsquery_opr_selec(item + item->qoperator.left, operand, lookup, length, minfreq); selec = s1 + s2 - s1 * s2; break; default: elog(ERROR, "unrecognized operator: %d", item->qoperator.oper); selec = 0; /* keep compiler quiet */ break; } } /* Clamp intermediate results to stay sane despite roundoff error */ CLAMP_PROBABILITY(selec); return selec; }
/* * Traverse the tsquery in preorder, calculating selectivity as: * * selec(left_oper) * selec(right_oper) in AND nodes, * * selec(left_oper) + selec(right_oper) - * selec(left_oper) * selec(right_oper) in OR nodes, * * 1 - select(oper) in NOT nodes * * freq[val] in VAL nodes, if the value is in MCELEM * min(freq[MCELEM]) / 2 in VAL nodes, if it is not * * * The MCELEM array is already sorted (see ts_typanalyze.c), so we can use * binary search for determining freq[MCELEM]. */ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand, TextFreq *lookup, int length, float4 minfreq) { LexemeKey key; TextFreq *searchres; Selectivity selec, s1, s2; /* since this function recurses, it could be driven to stack overflow */ check_stack_depth(); if (item->type == QI_VAL) { QueryOperand *oper = (QueryOperand *) item; /* * Prepare the key for bsearch(). */ key.lexeme = operand + oper->distance; key.length = oper->length; searchres = (TextFreq *) bsearch(&key, lookup, length, sizeof(TextFreq), compare_lexeme_textfreq); if (searchres) { /* * The element is in MCELEM. Return precise selectivity (or at * least as precise as ANALYZE could find out). */ return (Selectivity) searchres->frequency; } else { /* * The element is not in MCELEM. Punt, but assert that the * selectivity cannot be more than minfreq / 2. */ return (Selectivity) Min(DEFAULT_TS_MATCH_SEL, minfreq / 2); } } /* Current TSQuery node is an operator */ switch (item->operator.oper) { case OP_NOT: selec = 1.0 - tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); break; case OP_AND: s1 = tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); s2 = tsquery_opr_selec(item + item->operator.left, operand, lookup, length, minfreq); selec = s1 * s2; break; case OP_OR: s1 = tsquery_opr_selec(item + 1, operand, lookup, length, minfreq); s2 = tsquery_opr_selec(item + item->operator.left, operand, lookup, length, minfreq); selec = s1 + s2 - s1 * s2; break; default: elog(ERROR, "unrecognized operator: %d", item->operator.oper); selec = 0; /* keep compiler quiet */ break; } /* Clamp intermediate results to stay sane despite roundoff error */ CLAMP_PROBABILITY(selec); return selec; }