/* * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators */ Datum arraycontjoinsel(PG_FUNCTION_ARGS) { /* For the moment this is just a stub */ Oid operator = PG_GETARG_OID(1); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); }
/* * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const" * or "arraycolumn <@ const" based on the statistics * * This function is mainly responsible for extracting the pg_statistic data * to be used; we then pass the problem on to mcelem_array_selec(). */ static Selectivity calc_arraycontsel(VariableStatData *vardata, Datum constval, Oid elemtype, Oid operator) { Selectivity selec; TypeCacheEntry *typentry; FmgrInfo *cmpfunc; ArrayType *array; /* Get element type's default comparison function */ typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO); if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid)) return DEFAULT_SEL(operator); cmpfunc = &typentry->cmp_proc_finfo; /* * The caller made sure the const is an array with same element type, so * get it now */ array = DatumGetArrayTypeP(constval); if (HeapTupleIsValid(vardata->statsTuple) && statistic_proc_security_check(vardata, cmpfunc->fn_oid)) { Form_pg_statistic stats; AttStatsSlot sslot; AttStatsSlot hslot; stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple); /* MCELEM will be an array of same type as column */ if (get_attstatsslot(&sslot, vardata->statsTuple, STATISTIC_KIND_MCELEM, InvalidOid, ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)) { /* * For "array <@ const" case we also need histogram of distinct * element counts. */ if (operator != OID_ARRAY_CONTAINED_OP || !get_attstatsslot(&hslot, vardata->statsTuple, STATISTIC_KIND_DECHIST, InvalidOid, ATTSTATSSLOT_NUMBERS)) memset(&hslot, 0, sizeof(hslot)); /* Use the most-common-elements slot for the array Var. */ selec = mcelem_array_selec(array, typentry, sslot.values, sslot.nvalues, sslot.numbers, sslot.nnumbers, hslot.numbers, hslot.nnumbers, operator, cmpfunc); free_attstatsslot(&hslot); free_attstatsslot(&sslot); } else { /* No most-common-elements info, so do without */ selec = mcelem_array_selec(array, typentry, NULL, 0, NULL, 0, NULL, 0, operator, cmpfunc); } /* * MCE stats count only non-null rows, so adjust for null rows. */ selec *= (1.0 - stats->stanullfrac); } else { /* No stats at all, so do without */ selec = mcelem_array_selec(array, typentry, NULL, 0, NULL, 0, NULL, 0, operator, cmpfunc); /* we assume no nulls here, so no stanullfrac correction */ } /* If constant was toasted, release the copy we made */ if (PointerGetDatum(array) != constval) pfree(array); return selec; }
/* * arraycontsel -- restriction selectivity for array @>, &&, <@ operators */ Datum arraycontsel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec; Oid element_typeid; /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } /* * The "&&", "@>" and "<@" operators are strict, so we can cope with a * NULL constant right away. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } /* * If var is on the right, commute the operator, so that we can assume the * var is on the left in what follows. */ if (!varonleft) { if (operator == OID_ARRAY_CONTAINS_OP) operator = OID_ARRAY_CONTAINED_OP; else if (operator == OID_ARRAY_CONTAINED_OP) operator = OID_ARRAY_CONTAINS_OP; } /* * OK, there's a Var and a Const we're dealing with here. We need the * Const to be an array with same element type as column, else we can't do * anything useful. (Such cases will likely fail at runtime, but here * we'd rather just return a default estimate.) */ element_typeid = get_base_element_type(((Const *) other)->consttype); if (element_typeid != InvalidOid && element_typeid == get_base_element_type(vardata.vartype)) { selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue, element_typeid, operator); } else { selec = DEFAULT_SEL(operator); } ReleaseVariableStats(vardata); CLAMP_PROBABILITY(selec); PG_RETURN_FLOAT8((float8) selec); }
/* * Selectivity estimation for the subnet inclusion/overlap operators */ Datum networksel(PG_FUNCTION_ARGS) { PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0); Oid operator = PG_GETARG_OID(1); List *args = (List *) PG_GETARG_POINTER(2); int varRelid = PG_GETARG_INT32(3); VariableStatData vardata; Node *other; bool varonleft; Selectivity selec, mcv_selec, non_mcv_selec; Datum constvalue, *hist_values; int hist_nvalues; Form_pg_statistic stats; double sumcommon, nullfrac; FmgrInfo proc; /* * If expression is not (variable op something) or (something op * variable), then punt and return a default estimate. */ if (!get_restriction_variable(root, args, varRelid, &vardata, &other, &varonleft)) PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); /* * Can't do anything useful if the something is not a constant, either. */ if (!IsA(other, Const)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } /* All of the operators handled here are strict. */ if (((Const *) other)->constisnull) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(0.0); } constvalue = ((Const *) other)->constvalue; /* Otherwise, we need stats in order to produce a non-default estimate. */ if (!HeapTupleIsValid(vardata.statsTuple)) { ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(DEFAULT_SEL(operator)); } stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple); nullfrac = stats->stanullfrac; /* * If we have most-common-values info, add up the fractions of the MCV * entries that satisfy MCV OP CONST. These fractions contribute directly * to the result selectivity. Also add up the total fraction represented * by MCV entries. */ fmgr_info(get_opcode(operator), &proc); mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft, &sumcommon); /* * If we have a histogram, use it to estimate the proportion of the * non-MCV population that satisfies the clause. If we don't, apply the * default selectivity to that population. */ if (get_attstatsslot(vardata.statsTuple, vardata.atttype, vardata.atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist_values, &hist_nvalues, NULL, NULL)) { int opr_codenum = inet_opr_codenum(operator); /* Commute if needed, so we can consider histogram to be on the left */ if (!varonleft) opr_codenum = -opr_codenum; non_mcv_selec = inet_hist_value_sel(hist_values, hist_nvalues, constvalue, opr_codenum); free_attstatsslot(vardata.atttype, hist_values, hist_nvalues, NULL, 0); } else non_mcv_selec = DEFAULT_SEL(operator); /* Combine selectivities for MCV and non-MCV populations */ selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec; /* Result should be in range, but make sure... */ CLAMP_PROBABILITY(selec); ReleaseVariableStats(vardata); PG_RETURN_FLOAT8(selec); }
/* * Semi join selectivity estimation for subnet inclusion/overlap operators * * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs * histogram selectivity for semi/anti join cases. */ static Selectivity networkjoinsel_semi(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; Selectivity selec = 0.0, sumcommon1 = 0.0, sumcommon2 = 0.0; double nullfrac1 = 0.0, nullfrac2 = 0.0, hist2_weight = 0.0; bool mcv1_exists = false, mcv2_exists = false, hist1_exists = false, hist2_exists = false; int opr_codenum; FmgrInfo proc; int i, mcv1_nvalues, mcv2_nvalues, mcv1_nnumbers, mcv2_nnumbers, hist1_nvalues, hist2_nvalues, mcv1_length = 0, mcv2_length = 0; Datum *mcv1_values, *mcv2_values, *hist1_values, *hist2_values; float4 *mcv1_numbers, *mcv2_numbers; if (HeapTupleIsValid(vardata1->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); nullfrac1 = stats->stanullfrac; mcv1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv1_values, &mcv1_nvalues, &mcv1_numbers, &mcv1_nnumbers); hist1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist1_values, &hist1_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS); if (mcv1_exists) sumcommon1 = mcv_population(mcv1_numbers, mcv1_length); } if (HeapTupleIsValid(vardata2->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); nullfrac2 = stats->stanullfrac; mcv2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv2_values, &mcv2_nvalues, &mcv2_numbers, &mcv2_nnumbers); hist2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist2_values, &hist2_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS); if (mcv2_exists) sumcommon2 = mcv_population(mcv2_numbers, mcv2_length); } opr_codenum = inet_opr_codenum(operator); fmgr_info(get_opcode(operator), &proc); /* Estimate number of input rows represented by RHS histogram. */ if (hist2_exists && vardata2->rel) hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows; /* * Consider each element of the LHS MCV list, matching it to whatever RHS * stats we have. Scale according to the known frequency of the MCV. */ if (mcv1_exists && (mcv2_exists || hist2_exists)) { for (i = 0; i < mcv1_length; i++) { selec += mcv1_numbers[i] * inet_semi_join_sel(mcv1_values[i], mcv2_exists, mcv2_values, mcv2_length, hist2_exists, hist2_values, hist2_nvalues, hist2_weight, &proc, opr_codenum); } } /* * Consider each element of the LHS histogram, except for the first and * last elements, which we exclude on the grounds that they're outliers * and thus not very representative. Scale on the assumption that each * such histogram element represents an equal share of the LHS histogram * population (which is a bit bogus, because the members of its bucket may * not all act the same with respect to the join clause, but it's hard to * do better). * * If there are too many histogram elements, decimate to limit runtime. */ if (hist1_exists && hist1_nvalues > 2 && (mcv2_exists || hist2_exists)) { double hist_selec_sum = 0.0; int k, n; k = (hist1_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1; n = 0; for (i = 1; i < hist1_nvalues - 1; i += k) { hist_selec_sum += inet_semi_join_sel(hist1_values[i], mcv2_exists, mcv2_values, mcv2_length, hist2_exists, hist2_values, hist2_nvalues, hist2_weight, &proc, opr_codenum); n++; } selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n; } /* * If useful statistics are not available then use the default estimate. * We can apply null fractions if known, though. */ if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists)) selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator); /* Release stats. */ if (mcv1_exists) free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues, mcv1_numbers, mcv1_nnumbers); if (mcv2_exists) free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues, mcv2_numbers, mcv2_nnumbers); if (hist1_exists) free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues, NULL, 0); if (hist2_exists) free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues, NULL, 0); return selec; }
/* * Inner join selectivity estimation for subnet inclusion/overlap operators * * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram * selectivity for join using the subnet inclusion operators. Unlike the * join selectivity function for the equality operator, eqjoinsel_inner(), * one to one matching of the values is not enough. Network inclusion * operators are likely to match many to many, so we must check all pairs. * (Note: it might be possible to exploit understanding of the histogram's * btree ordering to reduce the work needed, but we don't currently try.) * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner(). */ static Selectivity networkjoinsel_inner(Oid operator, VariableStatData *vardata1, VariableStatData *vardata2) { Form_pg_statistic stats; double nullfrac1 = 0.0, nullfrac2 = 0.0; Selectivity selec = 0.0, sumcommon1 = 0.0, sumcommon2 = 0.0; bool mcv1_exists = false, mcv2_exists = false, hist1_exists = false, hist2_exists = false; int opr_codenum; int mcv1_nvalues, mcv2_nvalues, mcv1_nnumbers, mcv2_nnumbers, hist1_nvalues, hist2_nvalues, mcv1_length = 0, mcv2_length = 0; Datum *mcv1_values, *mcv2_values, *hist1_values, *hist2_values; float4 *mcv1_numbers, *mcv2_numbers; if (HeapTupleIsValid(vardata1->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple); nullfrac1 = stats->stanullfrac; mcv1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv1_values, &mcv1_nvalues, &mcv1_numbers, &mcv1_nnumbers); hist1_exists = get_attstatsslot(vardata1->statsTuple, vardata1->atttype, vardata1->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist1_values, &hist1_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS); if (mcv1_exists) sumcommon1 = mcv_population(mcv1_numbers, mcv1_length); } if (HeapTupleIsValid(vardata2->statsTuple)) { stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple); nullfrac2 = stats->stanullfrac; mcv2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_MCV, InvalidOid, NULL, &mcv2_values, &mcv2_nvalues, &mcv2_numbers, &mcv2_nnumbers); hist2_exists = get_attstatsslot(vardata2->statsTuple, vardata2->atttype, vardata2->atttypmod, STATISTIC_KIND_HISTOGRAM, InvalidOid, NULL, &hist2_values, &hist2_nvalues, NULL, NULL); /* Arbitrarily limit number of MCVs considered */ mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS); if (mcv2_exists) sumcommon2 = mcv_population(mcv2_numbers, mcv2_length); } opr_codenum = inet_opr_codenum(operator); /* * Calculate selectivity for MCV vs MCV matches. */ if (mcv1_exists && mcv2_exists) selec += inet_mcv_join_sel(mcv1_values, mcv1_numbers, mcv1_length, mcv2_values, mcv2_numbers, mcv2_length, operator); /* * Add in selectivities for MCV vs histogram matches, scaling according to * the fractions of the populations represented by the histograms. Note * that the second case needs to commute the operator. */ if (mcv1_exists && hist2_exists) selec += (1.0 - nullfrac2 - sumcommon2) * inet_mcv_hist_sel(mcv1_values, mcv1_numbers, mcv1_length, hist2_values, hist2_nvalues, opr_codenum); if (mcv2_exists && hist1_exists) selec += (1.0 - nullfrac1 - sumcommon1) * inet_mcv_hist_sel(mcv2_values, mcv2_numbers, mcv2_length, hist1_values, hist1_nvalues, -opr_codenum); /* * Add in selectivity for histogram vs histogram matches, again scaling * appropriately. */ if (hist1_exists && hist2_exists) selec += (1.0 - nullfrac1 - sumcommon1) * (1.0 - nullfrac2 - sumcommon2) * inet_hist_inclusion_join_sel(hist1_values, hist1_nvalues, hist2_values, hist2_nvalues, opr_codenum); /* * If useful statistics are not available then use the default estimate. * We can apply null fractions if known, though. */ if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists)) selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator); /* Release stats. */ if (mcv1_exists) free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues, mcv1_numbers, mcv1_nnumbers); if (mcv2_exists) free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues, mcv2_numbers, mcv2_nnumbers); if (hist1_exists) free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues, NULL, 0); if (hist2_exists) free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues, NULL, 0); return selec; }