コード例 #1
0
ファイル: array_selfuncs.c プロジェクト: AmiGanguli/postgres
/*
 * arraycontjoinsel -- join selectivity for array @>, &&, <@ operators
 */
Datum
arraycontjoinsel(PG_FUNCTION_ARGS)
{
	/* For the moment this is just a stub */
	Oid			operator = PG_GETARG_OID(1);

	PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
}
コード例 #2
0
ファイル: array_selfuncs.c プロジェクト: AmiGanguli/postgres
/*
 * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const"
 * or "arraycolumn <@ const" based on the statistics
 *
 * This function is mainly responsible for extracting the pg_statistic data
 * to be used; we then pass the problem on to mcelem_array_selec().
 */
static Selectivity
calc_arraycontsel(VariableStatData *vardata, Datum constval,
				  Oid elemtype, Oid operator)
{
	Selectivity selec;
	TypeCacheEntry *typentry;
	FmgrInfo   *cmpfunc;
	ArrayType  *array;

	/* Get element type's default comparison function */
	typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
	if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
		return DEFAULT_SEL(operator);
	cmpfunc = &typentry->cmp_proc_finfo;

	/*
	 * The caller made sure the const is an array with same element type, so
	 * get it now
	 */
	array = DatumGetArrayTypeP(constval);

	if (HeapTupleIsValid(vardata->statsTuple) &&
		statistic_proc_security_check(vardata, cmpfunc->fn_oid))
	{
		Form_pg_statistic stats;
		AttStatsSlot sslot;
		AttStatsSlot hslot;

		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);

		/* MCELEM will be an array of same type as column */
		if (get_attstatsslot(&sslot, vardata->statsTuple,
							 STATISTIC_KIND_MCELEM, InvalidOid,
							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
		{
			/*
			 * For "array <@ const" case we also need histogram of distinct
			 * element counts.
			 */
			if (operator != OID_ARRAY_CONTAINED_OP ||
				!get_attstatsslot(&hslot, vardata->statsTuple,
								  STATISTIC_KIND_DECHIST, InvalidOid,
								  ATTSTATSSLOT_NUMBERS))
				memset(&hslot, 0, sizeof(hslot));

			/* Use the most-common-elements slot for the array Var. */
			selec = mcelem_array_selec(array, typentry,
									   sslot.values, sslot.nvalues,
									   sslot.numbers, sslot.nnumbers,
									   hslot.numbers, hslot.nnumbers,
									   operator, cmpfunc);

			free_attstatsslot(&hslot);
			free_attstatsslot(&sslot);
		}
		else
		{
			/* No most-common-elements info, so do without */
			selec = mcelem_array_selec(array, typentry,
									   NULL, 0, NULL, 0, NULL, 0,
									   operator, cmpfunc);
		}

		/*
		 * MCE stats count only non-null rows, so adjust for null rows.
		 */
		selec *= (1.0 - stats->stanullfrac);
	}
	else
	{
		/* No stats at all, so do without */
		selec = mcelem_array_selec(array, typentry,
								   NULL, 0, NULL, 0, NULL, 0,
								   operator, cmpfunc);
		/* we assume no nulls here, so no stanullfrac correction */
	}

	/* If constant was toasted, release the copy we made */
	if (PointerGetDatum(array) != constval)
		pfree(array);

	return selec;
}
コード例 #3
0
ファイル: array_selfuncs.c プロジェクト: AmiGanguli/postgres
/*
 * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
 */
Datum
arraycontsel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec;
	Oid			element_typeid;

	/*
	 * If expression is not (variable op something) or (something op
	 * variable), then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	/*
	 * The "&&", "@>" and "<@" operators are strict, so we can cope with a
	 * NULL constant right away.
	 */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	/*
	 * If var is on the right, commute the operator, so that we can assume the
	 * var is on the left in what follows.
	 */
	if (!varonleft)
	{
		if (operator == OID_ARRAY_CONTAINS_OP)
			operator = OID_ARRAY_CONTAINED_OP;
		else if (operator == OID_ARRAY_CONTAINED_OP)
			operator = OID_ARRAY_CONTAINS_OP;
	}

	/*
	 * OK, there's a Var and a Const we're dealing with here.  We need the
	 * Const to be an array with same element type as column, else we can't do
	 * anything useful.  (Such cases will likely fail at runtime, but here
	 * we'd rather just return a default estimate.)
	 */
	element_typeid = get_base_element_type(((Const *) other)->consttype);
	if (element_typeid != InvalidOid &&
		element_typeid == get_base_element_type(vardata.vartype))
	{
		selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
								  element_typeid, operator);
	}
	else
	{
		selec = DEFAULT_SEL(operator);
	}

	ReleaseVariableStats(vardata);

	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}
コード例 #4
0
/*
 * Selectivity estimation for the subnet inclusion/overlap operators
 */
Datum
networksel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec,
				mcv_selec,
				non_mcv_selec;
	Datum		constvalue,
			   *hist_values;
	int			hist_nvalues;
	Form_pg_statistic stats;
	double		sumcommon,
				nullfrac;
	FmgrInfo	proc;

	/*
	 * If expression is not (variable op something) or (something op
	 * variable), then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	/* All of the operators handled here are strict. */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}
	constvalue = ((Const *) other)->constvalue;

	/* Otherwise, we need stats in order to produce a non-default estimate. */
	if (!HeapTupleIsValid(vardata.statsTuple))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
	nullfrac = stats->stanullfrac;

	/*
	 * If we have most-common-values info, add up the fractions of the MCV
	 * entries that satisfy MCV OP CONST.  These fractions contribute directly
	 * to the result selectivity.  Also add up the total fraction represented
	 * by MCV entries.
	 */
	fmgr_info(get_opcode(operator), &proc);
	mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
								&sumcommon);

	/*
	 * If we have a histogram, use it to estimate the proportion of the
	 * non-MCV population that satisfies the clause.  If we don't, apply the
	 * default selectivity to that population.
	 */
	if (get_attstatsslot(vardata.statsTuple,
						 vardata.atttype, vardata.atttypmod,
						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
						 NULL,
						 &hist_values, &hist_nvalues,
						 NULL, NULL))
	{
		int			opr_codenum = inet_opr_codenum(operator);

		/* Commute if needed, so we can consider histogram to be on the left */
		if (!varonleft)
			opr_codenum = -opr_codenum;
		non_mcv_selec = inet_hist_value_sel(hist_values, hist_nvalues,
											constvalue, opr_codenum);

		free_attstatsslot(vardata.atttype, hist_values, hist_nvalues, NULL, 0);
	}
	else
		non_mcv_selec = DEFAULT_SEL(operator);

	/* Combine selectivities for MCV and non-MCV populations */
	selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;

	/* Result should be in range, but make sure... */
	CLAMP_PROBABILITY(selec);

	ReleaseVariableStats(vardata);

	PG_RETURN_FLOAT8(selec);
}
コード例 #5
0
/*
 * Semi join selectivity estimation for subnet inclusion/overlap operators
 *
 * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
 * histogram selectivity for semi/anti join cases.
 */
static Selectivity
networkjoinsel_semi(Oid operator,
					VariableStatData *vardata1, VariableStatData *vardata2)
{
	Form_pg_statistic stats;
	Selectivity selec = 0.0,
				sumcommon1 = 0.0,
				sumcommon2 = 0.0;
	double		nullfrac1 = 0.0,
				nullfrac2 = 0.0,
				hist2_weight = 0.0;
	bool		mcv1_exists = false,
				mcv2_exists = false,
				hist1_exists = false,
				hist2_exists = false;
	int			opr_codenum;
	FmgrInfo	proc;
	int			i,
				mcv1_nvalues,
				mcv2_nvalues,
				mcv1_nnumbers,
				mcv2_nnumbers,
				hist1_nvalues,
				hist2_nvalues,
				mcv1_length = 0,
				mcv2_length = 0;
	Datum	   *mcv1_values,
			   *mcv2_values,
			   *hist1_values,
			   *hist2_values;
	float4	   *mcv1_numbers,
			   *mcv2_numbers;

	if (HeapTupleIsValid(vardata1->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
		nullfrac1 = stats->stanullfrac;

		mcv1_exists = get_attstatsslot(vardata1->statsTuple,
									   vardata1->atttype, vardata1->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv1_values, &mcv1_nvalues,
									   &mcv1_numbers, &mcv1_nnumbers);
		hist1_exists = get_attstatsslot(vardata1->statsTuple,
									  vardata1->atttype, vardata1->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist1_values, &hist1_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv1_exists)
			sumcommon1 = mcv_population(mcv1_numbers, mcv1_length);
	}

	if (HeapTupleIsValid(vardata2->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
		nullfrac2 = stats->stanullfrac;

		mcv2_exists = get_attstatsslot(vardata2->statsTuple,
									   vardata2->atttype, vardata2->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv2_values, &mcv2_nvalues,
									   &mcv2_numbers, &mcv2_nnumbers);
		hist2_exists = get_attstatsslot(vardata2->statsTuple,
									  vardata2->atttype, vardata2->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist2_values, &hist2_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv2_exists)
			sumcommon2 = mcv_population(mcv2_numbers, mcv2_length);
	}

	opr_codenum = inet_opr_codenum(operator);
	fmgr_info(get_opcode(operator), &proc);

	/* Estimate number of input rows represented by RHS histogram. */
	if (hist2_exists && vardata2->rel)
		hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;

	/*
	 * Consider each element of the LHS MCV list, matching it to whatever RHS
	 * stats we have.  Scale according to the known frequency of the MCV.
	 */
	if (mcv1_exists && (mcv2_exists || hist2_exists))
	{
		for (i = 0; i < mcv1_length; i++)
		{
			selec += mcv1_numbers[i] *
				inet_semi_join_sel(mcv1_values[i],
								   mcv2_exists, mcv2_values, mcv2_length,
								   hist2_exists, hist2_values, hist2_nvalues,
								   hist2_weight,
								   &proc, opr_codenum);
		}
	}

	/*
	 * Consider each element of the LHS histogram, except for the first and
	 * last elements, which we exclude on the grounds that they're outliers
	 * and thus not very representative.  Scale on the assumption that each
	 * such histogram element represents an equal share of the LHS histogram
	 * population (which is a bit bogus, because the members of its bucket may
	 * not all act the same with respect to the join clause, but it's hard to
	 * do better).
	 *
	 * If there are too many histogram elements, decimate to limit runtime.
	 */
	if (hist1_exists && hist1_nvalues > 2 && (mcv2_exists || hist2_exists))
	{
		double		hist_selec_sum = 0.0;
		int			k,
					n;

		k = (hist1_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;

		n = 0;
		for (i = 1; i < hist1_nvalues - 1; i += k)
		{
			hist_selec_sum +=
				inet_semi_join_sel(hist1_values[i],
								   mcv2_exists, mcv2_values, mcv2_length,
								   hist2_exists, hist2_values, hist2_nvalues,
								   hist2_weight,
								   &proc, opr_codenum);
			n++;
		}

		selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
	}

	/*
	 * If useful statistics are not available then use the default estimate.
	 * We can apply null fractions if known, though.
	 */
	if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);

	/* Release stats. */
	if (mcv1_exists)
		free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues,
						  mcv1_numbers, mcv1_nnumbers);
	if (mcv2_exists)
		free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues,
						  mcv2_numbers, mcv2_nnumbers);
	if (hist1_exists)
		free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues,
						  NULL, 0);
	if (hist2_exists)
		free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues,
						  NULL, 0);

	return selec;
}
コード例 #6
0
/*
 * Inner join selectivity estimation for subnet inclusion/overlap operators
 *
 * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
 * selectivity for join using the subnet inclusion operators.  Unlike the
 * join selectivity function for the equality operator, eqjoinsel_inner(),
 * one to one matching of the values is not enough.  Network inclusion
 * operators are likely to match many to many, so we must check all pairs.
 * (Note: it might be possible to exploit understanding of the histogram's
 * btree ordering to reduce the work needed, but we don't currently try.)
 * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
 */
static Selectivity
networkjoinsel_inner(Oid operator,
					 VariableStatData *vardata1, VariableStatData *vardata2)
{
	Form_pg_statistic stats;
	double		nullfrac1 = 0.0,
				nullfrac2 = 0.0;
	Selectivity selec = 0.0,
				sumcommon1 = 0.0,
				sumcommon2 = 0.0;
	bool		mcv1_exists = false,
				mcv2_exists = false,
				hist1_exists = false,
				hist2_exists = false;
	int			opr_codenum;
	int			mcv1_nvalues,
				mcv2_nvalues,
				mcv1_nnumbers,
				mcv2_nnumbers,
				hist1_nvalues,
				hist2_nvalues,
				mcv1_length = 0,
				mcv2_length = 0;
	Datum	   *mcv1_values,
			   *mcv2_values,
			   *hist1_values,
			   *hist2_values;
	float4	   *mcv1_numbers,
			   *mcv2_numbers;

	if (HeapTupleIsValid(vardata1->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
		nullfrac1 = stats->stanullfrac;

		mcv1_exists = get_attstatsslot(vardata1->statsTuple,
									   vardata1->atttype, vardata1->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv1_values, &mcv1_nvalues,
									   &mcv1_numbers, &mcv1_nnumbers);
		hist1_exists = get_attstatsslot(vardata1->statsTuple,
									  vardata1->atttype, vardata1->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist1_values, &hist1_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv1_exists)
			sumcommon1 = mcv_population(mcv1_numbers, mcv1_length);
	}

	if (HeapTupleIsValid(vardata2->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
		nullfrac2 = stats->stanullfrac;

		mcv2_exists = get_attstatsslot(vardata2->statsTuple,
									   vardata2->atttype, vardata2->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv2_values, &mcv2_nvalues,
									   &mcv2_numbers, &mcv2_nnumbers);
		hist2_exists = get_attstatsslot(vardata2->statsTuple,
									  vardata2->atttype, vardata2->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist2_values, &hist2_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv2_exists)
			sumcommon2 = mcv_population(mcv2_numbers, mcv2_length);
	}

	opr_codenum = inet_opr_codenum(operator);

	/*
	 * Calculate selectivity for MCV vs MCV matches.
	 */
	if (mcv1_exists && mcv2_exists)
		selec += inet_mcv_join_sel(mcv1_values, mcv1_numbers, mcv1_length,
								   mcv2_values, mcv2_numbers, mcv2_length,
								   operator);

	/*
	 * Add in selectivities for MCV vs histogram matches, scaling according to
	 * the fractions of the populations represented by the histograms. Note
	 * that the second case needs to commute the operator.
	 */
	if (mcv1_exists && hist2_exists)
		selec += (1.0 - nullfrac2 - sumcommon2) *
			inet_mcv_hist_sel(mcv1_values, mcv1_numbers, mcv1_length,
							  hist2_values, hist2_nvalues,
							  opr_codenum);
	if (mcv2_exists && hist1_exists)
		selec += (1.0 - nullfrac1 - sumcommon1) *
			inet_mcv_hist_sel(mcv2_values, mcv2_numbers, mcv2_length,
							  hist1_values, hist1_nvalues,
							  -opr_codenum);

	/*
	 * Add in selectivity for histogram vs histogram matches, again scaling
	 * appropriately.
	 */
	if (hist1_exists && hist2_exists)
		selec += (1.0 - nullfrac1 - sumcommon1) *
			(1.0 - nullfrac2 - sumcommon2) *
			inet_hist_inclusion_join_sel(hist1_values, hist1_nvalues,
										 hist2_values, hist2_nvalues,
										 opr_codenum);

	/*
	 * If useful statistics are not available then use the default estimate.
	 * We can apply null fractions if known, though.
	 */
	if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);

	/* Release stats. */
	if (mcv1_exists)
		free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues,
						  mcv1_numbers, mcv1_nnumbers);
	if (mcv2_exists)
		free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues,
						  mcv2_numbers, mcv2_nnumbers);
	if (hist1_exists)
		free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues,
						  NULL, 0);
	if (hist2_exists)
		free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues,
						  NULL, 0);

	return selec;
}