コード例 #1
0
ファイル: ts_selfuncs.c プロジェクト: adunstan/postgresql-dev
/*
 * @@ selectivity for tsvector var vs tsquery constant
 */
static Selectivity
tsquerysel(VariableStatData *vardata, Datum constval)
{
	Selectivity selec;
	TSQuery		query;

	/* The caller made sure the const is a TSQuery, so get it now */
	query = DatumGetTSQuery(constval);

	/* Empty query matches nothing */
	if (query->size == 0)
		return (Selectivity) 0.0;

	if (HeapTupleIsValid(vardata->statsTuple))
	{
		Form_pg_statistic stats;
		Datum	   *values;
		int			nvalues;
		float4	   *numbers;
		int			nnumbers;

		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);

		/* MCELEM will be an array of TEXT elements for a tsvector column */
		if (get_attstatsslot(vardata->statsTuple,
							 TEXTOID, -1,
							 STATISTIC_KIND_MCELEM, InvalidOid,
							 NULL,
							 &values, &nvalues,
							 &numbers, &nnumbers))
		{
			/*
			 * There is a most-common-elements slot for the tsvector Var, so
			 * use that.
			 */
			selec = mcelem_tsquery_selec(query, values, nvalues,
										 numbers, nnumbers);
			free_attstatsslot(TEXTOID, values, nvalues, numbers, nnumbers);
		}
		else
		{
			/* No most-common-elements info, so do without */
			selec = tsquery_opr_selec_no_stats(query);
		}

		/*
		 * MCE stats count only non-null rows, so adjust for null rows.
		 */
		selec *= (1.0 - stats->stanullfrac);
	}
	else
	{
		/* No stats at all, so do without */
		selec = tsquery_opr_selec_no_stats(query);
		/* we assume no nulls here, so no stanullfrac correction */
	}

	return selec;
}
コード例 #2
0
ファイル: ts_selfuncs.c プロジェクト: Aldizh/buffer_manager
/*
 * @@ selectivity for tsvector var vs tsquery constant
 */
static Selectivity
tsquerysel(VariableStatData *vardata, Datum constval)
{
	Selectivity selec;
	TSQuery		query;

	/* The caller made sure the const is a TSQuery, so get it now */
	query = DatumGetTSQuery(constval);

	/* Empty query matches nothing */
	if (query->size == 0)
		return (Selectivity) 0.0;

	if (HeapTupleIsValid(vardata->statsTuple))
	{
		Form_pg_statistic stats;
		Datum	   *values;
		int			nvalues;
		float4	   *numbers;
		int			nnumbers;

		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);

		/* MCELEM will be an array of TEXT elements for a tsvector column */
		if (get_attstatsslot(vardata->statsTuple,
							 TEXTOID, -1,
							 STATISTIC_KIND_MCELEM, InvalidOid,
							 &values, &nvalues,
							 &numbers, &nnumbers))
		{
			/*
			 * There is a most-common-elements slot for the tsvector Var, so
			 * use that.
			 */
			selec = mcelem_tsquery_selec(query, values, nvalues,
										 numbers, nnumbers);
			free_attstatsslot(TEXTOID, values, nvalues, numbers, nnumbers);
		}
		else
		{
			/* No most-common-elements info, so we must punt */
			selec = (Selectivity) DEFAULT_TS_MATCH_SEL;
		}
	}
	else
	{
		/* No stats at all, so we must punt */
		selec = (Selectivity) DEFAULT_TS_MATCH_SEL;
	}

	return selec;
}
コード例 #3
0
ファイル: analyzeutils.c プロジェクト: qiuyesuifeng/gpdb
/*
 * Given column stats of an attribute, build an MCVFreqPair and add it to the hash table.
 * If the MCV to be added already exist in the hash table, we increment its count value.
 * Input:
 * 	- datumHash: hash table
 * 	- partOid: Oid of current partition
 * 	- typInfo: type information
 * Output:
 *  - partReltuples: the number of tuples in this partition
 */
static void
addAllMCVsToHashTable
(
    HTAB *datumHash,
    Oid partOid,
    HeapTuple heaptupleStats,
    TypInfo *typInfo,
    float4 *partReltuples
)
{
    float4 reltuples = get_rel_reltuples(partOid);
    *partReltuples = reltuples;
    Datum	   *datumMCVs = NULL;
    int			numMCVs = 0;
    float4	   *freqs = NULL;
    int			numFreqs = 0;

    (void)	get_attstatsslot
    (
        heaptupleStats,
        typInfo->typOid,
        -1,
        STATISTIC_KIND_MCV,
        InvalidOid,
        &datumMCVs, &numMCVs,
        &freqs, &numFreqs
    );

    Assert(numMCVs == numFreqs);
    for (int i = 0; i < numMCVs; i++)
    {
        Datum mcv = datumMCVs[i];
        float4 count = reltuples * freqs[i];
        MCVFreqPair *mfp = (MCVFreqPair *) palloc(sizeof(MCVFreqPair));
        mfp->mcv = mcv;
        mfp->count = count;
        mfp->typinfo = typInfo;
        addMCVToHashTable(datumHash, mfp);
        pfree(mfp);
    }
    free_attstatsslot(typInfo->typOid, datumMCVs, numMCVs, freqs, numFreqs);
}
コード例 #4
0
ファイル: array_selfuncs.c プロジェクト: AmiGanguli/postgres
/*
 * scalararraysel_containment
 *		Estimate selectivity of ScalarArrayOpExpr via array containment.
 *
 * If we have const =/<> ANY/ALL (array_var) then we can estimate the
 * selectivity as though this were an array containment operator,
 * array_var op ARRAY[const].
 *
 * scalararraysel() has already verified that the ScalarArrayOpExpr's operator
 * is the array element type's default equality or inequality operator, and
 * has aggressively simplified both inputs to constants.
 *
 * Returns selectivity (0..1), or -1 if we fail to estimate selectivity.
 */
Selectivity
scalararraysel_containment(PlannerInfo *root,
						   Node *leftop, Node *rightop,
						   Oid elemtype, bool isEquality, bool useOr,
						   int varRelid)
{
	Selectivity selec;
	VariableStatData vardata;
	Datum		constval;
	TypeCacheEntry *typentry;
	FmgrInfo   *cmpfunc;

	/*
	 * rightop must be a variable, else punt.
	 */
	examine_variable(root, rightop, varRelid, &vardata);
	if (!vardata.rel)
	{
		ReleaseVariableStats(vardata);
		return -1.0;
	}

	/*
	 * leftop must be a constant, else punt.
	 */
	if (!IsA(leftop, Const))
	{
		ReleaseVariableStats(vardata);
		return -1.0;
	}
	if (((Const *) leftop)->constisnull)
	{
		/* qual can't succeed if null on left */
		ReleaseVariableStats(vardata);
		return (Selectivity) 0.0;
	}
	constval = ((Const *) leftop)->constvalue;

	/* Get element type's default comparison function */
	typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
	if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
	{
		ReleaseVariableStats(vardata);
		return -1.0;
	}
	cmpfunc = &typentry->cmp_proc_finfo;

	/*
	 * If the operator is <>, swap ANY/ALL, then invert the result later.
	 */
	if (!isEquality)
		useOr = !useOr;

	/* Get array element stats for var, if available */
	if (HeapTupleIsValid(vardata.statsTuple) &&
		statistic_proc_security_check(&vardata, cmpfunc->fn_oid))
	{
		Form_pg_statistic stats;
		AttStatsSlot sslot;
		AttStatsSlot hslot;

		stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);

		/* MCELEM will be an array of same type as element */
		if (get_attstatsslot(&sslot, vardata.statsTuple,
							 STATISTIC_KIND_MCELEM, InvalidOid,
							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
		{
			/* For ALL case, also get histogram of distinct-element counts */
			if (useOr ||
				!get_attstatsslot(&hslot, vardata.statsTuple,
								  STATISTIC_KIND_DECHIST, InvalidOid,
								  ATTSTATSSLOT_NUMBERS))
				memset(&hslot, 0, sizeof(hslot));

			/*
			 * For = ANY, estimate as var @> ARRAY[const].
			 *
			 * For = ALL, estimate as var <@ ARRAY[const].
			 */
			if (useOr)
				selec = mcelem_array_contain_overlap_selec(sslot.values,
														   sslot.nvalues,
														   sslot.numbers,
														   sslot.nnumbers,
														   &constval, 1,
														   OID_ARRAY_CONTAINS_OP,
														   cmpfunc);
			else
				selec = mcelem_array_contained_selec(sslot.values,
													 sslot.nvalues,
													 sslot.numbers,
													 sslot.nnumbers,
													 &constval, 1,
													 hslot.numbers,
													 hslot.nnumbers,
													 OID_ARRAY_CONTAINED_OP,
													 cmpfunc);

			free_attstatsslot(&hslot);
			free_attstatsslot(&sslot);
		}
		else
		{
			/* No most-common-elements info, so do without */
			if (useOr)
				selec = mcelem_array_contain_overlap_selec(NULL, 0,
														   NULL, 0,
														   &constval, 1,
														   OID_ARRAY_CONTAINS_OP,
														   cmpfunc);
			else
				selec = mcelem_array_contained_selec(NULL, 0,
													 NULL, 0,
													 &constval, 1,
													 NULL, 0,
													 OID_ARRAY_CONTAINED_OP,
													 cmpfunc);
		}

		/*
		 * MCE stats count only non-null rows, so adjust for null rows.
		 */
		selec *= (1.0 - stats->stanullfrac);
	}
	else
	{
		/* No stats at all, so do without */
		if (useOr)
			selec = mcelem_array_contain_overlap_selec(NULL, 0,
													   NULL, 0,
													   &constval, 1,
													   OID_ARRAY_CONTAINS_OP,
													   cmpfunc);
		else
			selec = mcelem_array_contained_selec(NULL, 0,
												 NULL, 0,
												 &constval, 1,
												 NULL, 0,
												 OID_ARRAY_CONTAINED_OP,
												 cmpfunc);
		/* we assume no nulls here, so no stanullfrac correction */
	}

	ReleaseVariableStats(vardata);

	/*
	 * If the operator is <>, invert the results.
	 */
	if (!isEquality)
		selec = 1.0 - selec;

	CLAMP_PROBABILITY(selec);

	return selec;
}
コード例 #5
0
ファイル: array_selfuncs.c プロジェクト: AmiGanguli/postgres
/*
 * Calculate selectivity for "arraycolumn @> const", "arraycolumn && const"
 * or "arraycolumn <@ const" based on the statistics
 *
 * This function is mainly responsible for extracting the pg_statistic data
 * to be used; we then pass the problem on to mcelem_array_selec().
 */
static Selectivity
calc_arraycontsel(VariableStatData *vardata, Datum constval,
				  Oid elemtype, Oid operator)
{
	Selectivity selec;
	TypeCacheEntry *typentry;
	FmgrInfo   *cmpfunc;
	ArrayType  *array;

	/* Get element type's default comparison function */
	typentry = lookup_type_cache(elemtype, TYPECACHE_CMP_PROC_FINFO);
	if (!OidIsValid(typentry->cmp_proc_finfo.fn_oid))
		return DEFAULT_SEL(operator);
	cmpfunc = &typentry->cmp_proc_finfo;

	/*
	 * The caller made sure the const is an array with same element type, so
	 * get it now
	 */
	array = DatumGetArrayTypeP(constval);

	if (HeapTupleIsValid(vardata->statsTuple) &&
		statistic_proc_security_check(vardata, cmpfunc->fn_oid))
	{
		Form_pg_statistic stats;
		AttStatsSlot sslot;
		AttStatsSlot hslot;

		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);

		/* MCELEM will be an array of same type as column */
		if (get_attstatsslot(&sslot, vardata->statsTuple,
							 STATISTIC_KIND_MCELEM, InvalidOid,
							 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
		{
			/*
			 * For "array <@ const" case we also need histogram of distinct
			 * element counts.
			 */
			if (operator != OID_ARRAY_CONTAINED_OP ||
				!get_attstatsslot(&hslot, vardata->statsTuple,
								  STATISTIC_KIND_DECHIST, InvalidOid,
								  ATTSTATSSLOT_NUMBERS))
				memset(&hslot, 0, sizeof(hslot));

			/* Use the most-common-elements slot for the array Var. */
			selec = mcelem_array_selec(array, typentry,
									   sslot.values, sslot.nvalues,
									   sslot.numbers, sslot.nnumbers,
									   hslot.numbers, hslot.nnumbers,
									   operator, cmpfunc);

			free_attstatsslot(&hslot);
			free_attstatsslot(&sslot);
		}
		else
		{
			/* No most-common-elements info, so do without */
			selec = mcelem_array_selec(array, typentry,
									   NULL, 0, NULL, 0, NULL, 0,
									   operator, cmpfunc);
		}

		/*
		 * MCE stats count only non-null rows, so adjust for null rows.
		 */
		selec *= (1.0 - stats->stanullfrac);
	}
	else
	{
		/* No stats at all, so do without */
		selec = mcelem_array_selec(array, typentry,
								   NULL, 0, NULL, 0, NULL, 0,
								   operator, cmpfunc);
		/* we assume no nulls here, so no stanullfrac correction */
	}

	/* If constant was toasted, release the copy we made */
	if (PointerGetDatum(array) != constval)
		pfree(array);

	return selec;
}
コード例 #6
0
/*
 * Calculate range operator selectivity using histograms of range bounds.
 *
 * This estimate is for the portion of values that are not empty and not
 * NULL.
 */
static double
calc_hist_selectivity(TypeCacheEntry *typcache, VariableStatData *vardata,
					  RangeType *constval, Oid operator)
{
	Datum	   *hist_values;
	int			nhist;
	Datum	   *length_hist_values;
	int			length_nhist;
	RangeBound *hist_lower;
	RangeBound *hist_upper;
	int			i;
	RangeBound	const_lower;
	RangeBound	const_upper;
	bool		empty;
	double		hist_selec;

	/* Try to get histogram of ranges */
	if (!(HeapTupleIsValid(vardata->statsTuple) &&
		  get_attstatsslot(vardata->statsTuple,
						   vardata->atttype, vardata->atttypmod,
						   STATISTIC_KIND_BOUNDS_HISTOGRAM, InvalidOid,
						   NULL,
						   &hist_values, &nhist,
						   NULL, NULL)))
		return -1.0;

	/*
	 * Convert histogram of ranges into histograms of its lower and upper
	 * bounds.
	 */
	hist_lower = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
	hist_upper = (RangeBound *) palloc(sizeof(RangeBound) * nhist);
	for (i = 0; i < nhist; i++)
	{
		range_deserialize(typcache, DatumGetRangeType(hist_values[i]),
						  &hist_lower[i], &hist_upper[i], &empty);
		/* The histogram should not contain any empty ranges */
		if (empty)
			elog(ERROR, "bounds histogram contains an empty range");
	}

	/* @> and @< also need a histogram of range lengths */
	if (operator == OID_RANGE_CONTAINS_OP ||
		operator == OID_RANGE_CONTAINED_OP)
	{
		if (!(HeapTupleIsValid(vardata->statsTuple) &&
			  get_attstatsslot(vardata->statsTuple,
							   vardata->atttype, vardata->atttypmod,
							   STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM,
							   InvalidOid,
							   NULL,
							   &length_hist_values, &length_nhist,
							   NULL, NULL)))
			return -1.0;

		/* check that it's a histogram, not just a dummy entry */
		if (length_nhist < 2)
			return -1.0;
	}

	/* Extract the bounds of the constant value. */
	range_deserialize(typcache, constval, &const_lower, &const_upper, &empty);
	Assert(!empty);

	/*
	 * Calculate selectivity comparing the lower or upper bound of the
	 * constant with the histogram of lower or upper bounds.
	 */
	switch (operator)
	{
		case OID_RANGE_LESS_OP:

			/*
			 * The regular b-tree comparison operators (<, <=, >, >=) compare
			 * the lower bounds first, and the upper bounds for values with
			 * equal lower bounds. Estimate that by comparing the lower bounds
			 * only. This gives a fairly accurate estimate assuming there
			 * aren't many rows with a lower bound equal to the constant's
			 * lower bound.
			 */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower,
											 hist_lower, nhist, false);
			break;

		case OID_RANGE_LESS_EQUAL_OP:
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower,
											 hist_lower, nhist, true);
			break;

		case OID_RANGE_GREATER_OP:
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_lower,
												 hist_lower, nhist, false);
			break;

		case OID_RANGE_GREATER_EQUAL_OP:
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_lower,
												 hist_lower, nhist, true);
			break;

		case OID_RANGE_LEFT_OP:
			/* var << const when upper(var) < lower(const) */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower,
											 hist_upper, nhist, false);
			break;

		case OID_RANGE_RIGHT_OP:
			/* var >> const when lower(var) > upper(const) */
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_upper,
												 hist_lower, nhist, true);
			break;

		case OID_RANGE_OVERLAPS_RIGHT_OP:
			/* compare lower bounds */
			hist_selec =
				1 - calc_hist_selectivity_scalar(typcache, &const_lower,
												 hist_lower, nhist, false);
			break;

		case OID_RANGE_OVERLAPS_LEFT_OP:
			/* compare upper bounds */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_upper,
											 hist_upper, nhist, true);
			break;

		case OID_RANGE_OVERLAP_OP:
		case OID_RANGE_CONTAINS_ELEM_OP:

			/*
			 * A && B <=> NOT (A << B OR A >> B).
			 *
			 * Since A << B and A >> B are mutually exclusive events we can
			 * sum their probabilities to find probability of (A << B OR A >>
			 * B).
			 *
			 * "range @> elem" is equivalent to "range && [elem,elem]". The
			 * caller already constructed the singular range from the element
			 * constant, so just treat it the same as &&.
			 */
			hist_selec =
				calc_hist_selectivity_scalar(typcache, &const_lower, hist_upper,
											 nhist, false);
			hist_selec +=
				(1.0 - calc_hist_selectivity_scalar(typcache, &const_upper, hist_lower,
													nhist, true));
			hist_selec = 1.0 - hist_selec;
			break;

		case OID_RANGE_CONTAINS_OP:
			hist_selec =
				calc_hist_selectivity_contains(typcache, &const_lower,
											 &const_upper, hist_lower, nhist,
										   length_hist_values, length_nhist);
			break;

		case OID_RANGE_CONTAINED_OP:
			if (const_lower.infinite)
			{
				/*
				 * Lower bound no longer matters. Just estimate the fraction
				 * with an upper bound <= const upper bound
				 */
				hist_selec =
					calc_hist_selectivity_scalar(typcache, &const_upper,
												 hist_upper, nhist, true);
			}
			else if (const_upper.infinite)
			{
				hist_selec =
					1.0 - calc_hist_selectivity_scalar(typcache, &const_lower,
												   hist_lower, nhist, false);
			}
			else
			{
				hist_selec =
					calc_hist_selectivity_contained(typcache, &const_lower,
											 &const_upper, hist_lower, nhist,
										   length_hist_values, length_nhist);
			}
			break;

		default:
			elog(ERROR, "unknown range operator %u", operator);
			hist_selec = -1.0;	/* keep compiler quiet */
			break;
	}

	return hist_selec;
}
コード例 #7
0
static double
calc_rangesel(TypeCacheEntry *typcache, VariableStatData *vardata,
			  RangeType *constval, Oid operator)
{
	double		hist_selec;
	double		selec;
	float4		empty_frac,
				null_frac;

	/*
	 * First look up the fraction of NULLs and empty ranges from pg_statistic.
	 */
	if (HeapTupleIsValid(vardata->statsTuple))
	{
		Form_pg_statistic stats;
		float4	   *numbers;
		int			nnumbers;

		stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
		null_frac = stats->stanullfrac;

		/* Try to get fraction of empty ranges */
		if (get_attstatsslot(vardata->statsTuple,
							 vardata->atttype, vardata->atttypmod,
						   STATISTIC_KIND_RANGE_LENGTH_HISTOGRAM, InvalidOid,
							 NULL,
							 NULL, NULL,
							 &numbers, &nnumbers))
		{
			if (nnumbers != 1)
				elog(ERROR, "invalid empty fraction statistic");		/* shouldn't happen */
			empty_frac = numbers[0];
		}
		else
		{
			/* No empty fraction statistic. Assume no empty ranges. */
			empty_frac = 0.0;
		}
	}
	else
	{
		/*
		 * No stats are available. Follow through the calculations below
		 * anyway, assuming no NULLs and no empty ranges. This still allows us
		 * to give a better-than-nothing estimate based on whether the
		 * constant is an empty range or not.
		 */
		null_frac = 0.0;
		empty_frac = 0.0;
	}

	if (RangeIsEmpty(constval))
	{
		/*
		 * An empty range matches all ranges, all empty ranges, or nothing,
		 * depending on the operator
		 */
		switch (operator)
		{
				/* these return false if either argument is empty */
			case OID_RANGE_OVERLAP_OP:
			case OID_RANGE_OVERLAPS_LEFT_OP:
			case OID_RANGE_OVERLAPS_RIGHT_OP:
			case OID_RANGE_LEFT_OP:
			case OID_RANGE_RIGHT_OP:
				/* nothing is less than an empty range */
			case OID_RANGE_LESS_OP:
				selec = 0.0;
				break;

				/* only empty ranges can be contained by an empty range */
			case OID_RANGE_CONTAINED_OP:
				/* only empty ranges are <= an empty range */
			case OID_RANGE_LESS_EQUAL_OP:
				selec = empty_frac;
				break;

				/* everything contains an empty range */
			case OID_RANGE_CONTAINS_OP:
				/* everything is >= an empty range */
			case OID_RANGE_GREATER_EQUAL_OP:
				selec = 1.0;
				break;

				/* all non-empty ranges are > an empty range */
			case OID_RANGE_GREATER_OP:
				selec = 1.0 - empty_frac;
				break;

				/* an element cannot be empty */
			case OID_RANGE_CONTAINS_ELEM_OP:
			default:
				elog(ERROR, "unexpected operator %u", operator);
				selec = 0.0;	/* keep compiler quiet */
				break;
		}
	}
	else
	{
		/*
		 * Calculate selectivity using bound histograms. If that fails for
		 * some reason, e.g no histogram in pg_statistic, use the default
		 * constant estimate for the fraction of non-empty values. This is
		 * still somewhat better than just returning the default estimate,
		 * because this still takes into account the fraction of empty and
		 * NULL tuples, if we had statistics for them.
		 */
		hist_selec = calc_hist_selectivity(typcache, vardata, constval,
										   operator);
		if (hist_selec < 0.0)
			hist_selec = default_range_selectivity(operator);

		/*
		 * Now merge the results for the empty ranges and histogram
		 * calculations, realizing that the histogram covers only the
		 * non-null, non-empty values.
		 */
		if (operator == OID_RANGE_CONTAINED_OP)
		{
			/* empty is contained by anything non-empty */
			selec = (1.0 - empty_frac) * hist_selec + empty_frac;
		}
		else
		{
			/* with any other operator, empty Op non-empty matches nothing */
			selec = (1.0 - empty_frac) * hist_selec;
		}
	}

	/* all range operators are strict */
	selec *= (1.0 - null_frac);

	/* result should be in range, but make sure... */
	CLAMP_PROBABILITY(selec);

	return selec;
}
コード例 #8
0
ファイル: _int_selfuncs.c プロジェクト: CrocdileChan/postgres
/*
 * _int_matchsel -- restriction selectivity function for intarray @@ query_int
 */
Datum
_int_matchsel(PG_FUNCTION_ARGS)
{
    PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

    List	   *args = (List *) PG_GETARG_POINTER(2);
    int			varRelid = PG_GETARG_INT32(3);
    VariableStatData vardata;
    Node	   *other;
    bool		varonleft;
    Selectivity selec;
    QUERYTYPE  *query;
    Datum	   *mcelems = NULL;
    float4	   *mcefreqs = NULL;
    int			nmcelems = 0;
    float4		minfreq = 0.0;
    float4		nullfrac = 0.0;
    Form_pg_statistic stats;
    Datum	   *values = NULL;
    int			nvalues = 0;
    float4	   *numbers = NULL;
    int			nnumbers = 0;

    /*
     * If expression is not "variable @@ something" or "something @@ variable"
     * then punt and return a default estimate.
     */
    if (!get_restriction_variable(root, args, varRelid,
                                  &vardata, &other, &varonleft))
        PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);

    /*
     * Variable should be int[]. We don't support cases where variable is
     * query_int.
     */
    if (vardata.vartype != INT4ARRAYOID)
        PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);

    /*
     * Can't do anything useful if the something is not a constant, either.
     */
    if (!IsA(other, Const))
    {
        ReleaseVariableStats(vardata);
        PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
    }

    /*
     * The "@@" operator is strict, so we can cope with NULL right away.
     */
    if (((Const *) other)->constisnull)
    {
        ReleaseVariableStats(vardata);
        PG_RETURN_FLOAT8(0.0);
    }

    /* The caller made sure the const is a query, so get it now */
    query = DatumGetQueryTypeP(((Const *) other)->constvalue);

    /* Empty query matches nothing */
    if (query->size == 0)
    {
        ReleaseVariableStats(vardata);
        return (Selectivity) 0.0;
    }

    /*
     * Get the statistics for the intarray column.
     *
     * We're interested in the Most-Common-Elements list, and the NULL
     * fraction.
     */
    if (HeapTupleIsValid(vardata.statsTuple))
    {
        stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
        nullfrac = stats->stanullfrac;

        /*
         * For an int4 array, the default array type analyze function will
         * collect a Most Common Elements list, which is an array of int4s.
         */
        if (get_attstatsslot(vardata.statsTuple,
                             INT4OID, -1,
                             STATISTIC_KIND_MCELEM, InvalidOid,
                             NULL,
                             &values, &nvalues,
                             &numbers, &nnumbers))
        {
            /*
             * There should be three more Numbers than Values, because the
             * last three (for intarray) cells are taken for minimal, maximal
             * and nulls frequency. Punt if not.
             */
            if (nnumbers == nvalues + 3)
            {
                /* Grab the lowest frequency. */
                minfreq = numbers[nnumbers - (nnumbers - nvalues)];

                mcelems = values;
                mcefreqs = numbers;
                nmcelems = nvalues;
            }
        }
    }

    /* Process the logical expression in the query, using the stats */
    selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
                                mcelems, mcefreqs, nmcelems, minfreq);

    /* MCE stats count only non-null rows, so adjust for null rows. */
    selec *= (1.0 - nullfrac);

    free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers);
    ReleaseVariableStats(vardata);

    CLAMP_PROBABILITY(selec);

    PG_RETURN_FLOAT8((float8) selec);
}
コード例 #9
0
/*
 * Selectivity estimation for the subnet inclusion/overlap operators
 */
Datum
networksel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec,
				mcv_selec,
				non_mcv_selec;
	Datum		constvalue,
			   *hist_values;
	int			hist_nvalues;
	Form_pg_statistic stats;
	double		sumcommon,
				nullfrac;
	FmgrInfo	proc;

	/*
	 * If expression is not (variable op something) or (something op
	 * variable), then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	/* All of the operators handled here are strict. */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}
	constvalue = ((Const *) other)->constvalue;

	/* Otherwise, we need stats in order to produce a non-default estimate. */
	if (!HeapTupleIsValid(vardata.statsTuple))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
	nullfrac = stats->stanullfrac;

	/*
	 * If we have most-common-values info, add up the fractions of the MCV
	 * entries that satisfy MCV OP CONST.  These fractions contribute directly
	 * to the result selectivity.  Also add up the total fraction represented
	 * by MCV entries.
	 */
	fmgr_info(get_opcode(operator), &proc);
	mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
								&sumcommon);

	/*
	 * If we have a histogram, use it to estimate the proportion of the
	 * non-MCV population that satisfies the clause.  If we don't, apply the
	 * default selectivity to that population.
	 */
	if (get_attstatsslot(vardata.statsTuple,
						 vardata.atttype, vardata.atttypmod,
						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
						 NULL,
						 &hist_values, &hist_nvalues,
						 NULL, NULL))
	{
		int			opr_codenum = inet_opr_codenum(operator);

		/* Commute if needed, so we can consider histogram to be on the left */
		if (!varonleft)
			opr_codenum = -opr_codenum;
		non_mcv_selec = inet_hist_value_sel(hist_values, hist_nvalues,
											constvalue, opr_codenum);

		free_attstatsslot(vardata.atttype, hist_values, hist_nvalues, NULL, 0);
	}
	else
		non_mcv_selec = DEFAULT_SEL(operator);

	/* Combine selectivities for MCV and non-MCV populations */
	selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;

	/* Result should be in range, but make sure... */
	CLAMP_PROBABILITY(selec);

	ReleaseVariableStats(vardata);

	PG_RETURN_FLOAT8(selec);
}
コード例 #10
0
/*
 * Semi join selectivity estimation for subnet inclusion/overlap operators
 *
 * Calculates MCV vs MCV, MCV vs histogram, histogram vs MCV, and histogram vs
 * histogram selectivity for semi/anti join cases.
 */
static Selectivity
networkjoinsel_semi(Oid operator,
					VariableStatData *vardata1, VariableStatData *vardata2)
{
	Form_pg_statistic stats;
	Selectivity selec = 0.0,
				sumcommon1 = 0.0,
				sumcommon2 = 0.0;
	double		nullfrac1 = 0.0,
				nullfrac2 = 0.0,
				hist2_weight = 0.0;
	bool		mcv1_exists = false,
				mcv2_exists = false,
				hist1_exists = false,
				hist2_exists = false;
	int			opr_codenum;
	FmgrInfo	proc;
	int			i,
				mcv1_nvalues,
				mcv2_nvalues,
				mcv1_nnumbers,
				mcv2_nnumbers,
				hist1_nvalues,
				hist2_nvalues,
				mcv1_length = 0,
				mcv2_length = 0;
	Datum	   *mcv1_values,
			   *mcv2_values,
			   *hist1_values,
			   *hist2_values;
	float4	   *mcv1_numbers,
			   *mcv2_numbers;

	if (HeapTupleIsValid(vardata1->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
		nullfrac1 = stats->stanullfrac;

		mcv1_exists = get_attstatsslot(vardata1->statsTuple,
									   vardata1->atttype, vardata1->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv1_values, &mcv1_nvalues,
									   &mcv1_numbers, &mcv1_nnumbers);
		hist1_exists = get_attstatsslot(vardata1->statsTuple,
									  vardata1->atttype, vardata1->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist1_values, &hist1_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv1_exists)
			sumcommon1 = mcv_population(mcv1_numbers, mcv1_length);
	}

	if (HeapTupleIsValid(vardata2->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
		nullfrac2 = stats->stanullfrac;

		mcv2_exists = get_attstatsslot(vardata2->statsTuple,
									   vardata2->atttype, vardata2->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv2_values, &mcv2_nvalues,
									   &mcv2_numbers, &mcv2_nnumbers);
		hist2_exists = get_attstatsslot(vardata2->statsTuple,
									  vardata2->atttype, vardata2->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist2_values, &hist2_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv2_exists)
			sumcommon2 = mcv_population(mcv2_numbers, mcv2_length);
	}

	opr_codenum = inet_opr_codenum(operator);
	fmgr_info(get_opcode(operator), &proc);

	/* Estimate number of input rows represented by RHS histogram. */
	if (hist2_exists && vardata2->rel)
		hist2_weight = (1.0 - nullfrac2 - sumcommon2) * vardata2->rel->rows;

	/*
	 * Consider each element of the LHS MCV list, matching it to whatever RHS
	 * stats we have.  Scale according to the known frequency of the MCV.
	 */
	if (mcv1_exists && (mcv2_exists || hist2_exists))
	{
		for (i = 0; i < mcv1_length; i++)
		{
			selec += mcv1_numbers[i] *
				inet_semi_join_sel(mcv1_values[i],
								   mcv2_exists, mcv2_values, mcv2_length,
								   hist2_exists, hist2_values, hist2_nvalues,
								   hist2_weight,
								   &proc, opr_codenum);
		}
	}

	/*
	 * Consider each element of the LHS histogram, except for the first and
	 * last elements, which we exclude on the grounds that they're outliers
	 * and thus not very representative.  Scale on the assumption that each
	 * such histogram element represents an equal share of the LHS histogram
	 * population (which is a bit bogus, because the members of its bucket may
	 * not all act the same with respect to the join clause, but it's hard to
	 * do better).
	 *
	 * If there are too many histogram elements, decimate to limit runtime.
	 */
	if (hist1_exists && hist1_nvalues > 2 && (mcv2_exists || hist2_exists))
	{
		double		hist_selec_sum = 0.0;
		int			k,
					n;

		k = (hist1_nvalues - 3) / MAX_CONSIDERED_ELEMS + 1;

		n = 0;
		for (i = 1; i < hist1_nvalues - 1; i += k)
		{
			hist_selec_sum +=
				inet_semi_join_sel(hist1_values[i],
								   mcv2_exists, mcv2_values, mcv2_length,
								   hist2_exists, hist2_values, hist2_nvalues,
								   hist2_weight,
								   &proc, opr_codenum);
			n++;
		}

		selec += (1.0 - nullfrac1 - sumcommon1) * hist_selec_sum / n;
	}

	/*
	 * If useful statistics are not available then use the default estimate.
	 * We can apply null fractions if known, though.
	 */
	if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);

	/* Release stats. */
	if (mcv1_exists)
		free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues,
						  mcv1_numbers, mcv1_nnumbers);
	if (mcv2_exists)
		free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues,
						  mcv2_numbers, mcv2_nnumbers);
	if (hist1_exists)
		free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues,
						  NULL, 0);
	if (hist2_exists)
		free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues,
						  NULL, 0);

	return selec;
}
コード例 #11
0
/*
 * Inner join selectivity estimation for subnet inclusion/overlap operators
 *
 * Calculates MCV vs MCV, MCV vs histogram and histogram vs histogram
 * selectivity for join using the subnet inclusion operators.  Unlike the
 * join selectivity function for the equality operator, eqjoinsel_inner(),
 * one to one matching of the values is not enough.  Network inclusion
 * operators are likely to match many to many, so we must check all pairs.
 * (Note: it might be possible to exploit understanding of the histogram's
 * btree ordering to reduce the work needed, but we don't currently try.)
 * Also, MCV vs histogram selectivity is not neglected as in eqjoinsel_inner().
 */
static Selectivity
networkjoinsel_inner(Oid operator,
					 VariableStatData *vardata1, VariableStatData *vardata2)
{
	Form_pg_statistic stats;
	double		nullfrac1 = 0.0,
				nullfrac2 = 0.0;
	Selectivity selec = 0.0,
				sumcommon1 = 0.0,
				sumcommon2 = 0.0;
	bool		mcv1_exists = false,
				mcv2_exists = false,
				hist1_exists = false,
				hist2_exists = false;
	int			opr_codenum;
	int			mcv1_nvalues,
				mcv2_nvalues,
				mcv1_nnumbers,
				mcv2_nnumbers,
				hist1_nvalues,
				hist2_nvalues,
				mcv1_length = 0,
				mcv2_length = 0;
	Datum	   *mcv1_values,
			   *mcv2_values,
			   *hist1_values,
			   *hist2_values;
	float4	   *mcv1_numbers,
			   *mcv2_numbers;

	if (HeapTupleIsValid(vardata1->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata1->statsTuple);
		nullfrac1 = stats->stanullfrac;

		mcv1_exists = get_attstatsslot(vardata1->statsTuple,
									   vardata1->atttype, vardata1->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv1_values, &mcv1_nvalues,
									   &mcv1_numbers, &mcv1_nnumbers);
		hist1_exists = get_attstatsslot(vardata1->statsTuple,
									  vardata1->atttype, vardata1->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist1_values, &hist1_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv1_length = Min(mcv1_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv1_exists)
			sumcommon1 = mcv_population(mcv1_numbers, mcv1_length);
	}

	if (HeapTupleIsValid(vardata2->statsTuple))
	{
		stats = (Form_pg_statistic) GETSTRUCT(vardata2->statsTuple);
		nullfrac2 = stats->stanullfrac;

		mcv2_exists = get_attstatsslot(vardata2->statsTuple,
									   vardata2->atttype, vardata2->atttypmod,
									   STATISTIC_KIND_MCV, InvalidOid,
									   NULL,
									   &mcv2_values, &mcv2_nvalues,
									   &mcv2_numbers, &mcv2_nnumbers);
		hist2_exists = get_attstatsslot(vardata2->statsTuple,
									  vardata2->atttype, vardata2->atttypmod,
										STATISTIC_KIND_HISTOGRAM, InvalidOid,
										NULL,
										&hist2_values, &hist2_nvalues,
										NULL, NULL);
		/* Arbitrarily limit number of MCVs considered */
		mcv2_length = Min(mcv2_nvalues, MAX_CONSIDERED_ELEMS);
		if (mcv2_exists)
			sumcommon2 = mcv_population(mcv2_numbers, mcv2_length);
	}

	opr_codenum = inet_opr_codenum(operator);

	/*
	 * Calculate selectivity for MCV vs MCV matches.
	 */
	if (mcv1_exists && mcv2_exists)
		selec += inet_mcv_join_sel(mcv1_values, mcv1_numbers, mcv1_length,
								   mcv2_values, mcv2_numbers, mcv2_length,
								   operator);

	/*
	 * Add in selectivities for MCV vs histogram matches, scaling according to
	 * the fractions of the populations represented by the histograms. Note
	 * that the second case needs to commute the operator.
	 */
	if (mcv1_exists && hist2_exists)
		selec += (1.0 - nullfrac2 - sumcommon2) *
			inet_mcv_hist_sel(mcv1_values, mcv1_numbers, mcv1_length,
							  hist2_values, hist2_nvalues,
							  opr_codenum);
	if (mcv2_exists && hist1_exists)
		selec += (1.0 - nullfrac1 - sumcommon1) *
			inet_mcv_hist_sel(mcv2_values, mcv2_numbers, mcv2_length,
							  hist1_values, hist1_nvalues,
							  -opr_codenum);

	/*
	 * Add in selectivity for histogram vs histogram matches, again scaling
	 * appropriately.
	 */
	if (hist1_exists && hist2_exists)
		selec += (1.0 - nullfrac1 - sumcommon1) *
			(1.0 - nullfrac2 - sumcommon2) *
			inet_hist_inclusion_join_sel(hist1_values, hist1_nvalues,
										 hist2_values, hist2_nvalues,
										 opr_codenum);

	/*
	 * If useful statistics are not available then use the default estimate.
	 * We can apply null fractions if known, though.
	 */
	if ((!mcv1_exists && !hist1_exists) || (!mcv2_exists && !hist2_exists))
		selec = (1.0 - nullfrac1) * (1.0 - nullfrac2) * DEFAULT_SEL(operator);

	/* Release stats. */
	if (mcv1_exists)
		free_attstatsslot(vardata1->atttype, mcv1_values, mcv1_nvalues,
						  mcv1_numbers, mcv1_nnumbers);
	if (mcv2_exists)
		free_attstatsslot(vardata2->atttype, mcv2_values, mcv2_nvalues,
						  mcv2_numbers, mcv2_nnumbers);
	if (hist1_exists)
		free_attstatsslot(vardata1->atttype, hist1_values, hist1_nvalues,
						  NULL, 0);
	if (hist2_exists)
		free_attstatsslot(vardata2->atttype, hist2_values, hist2_nvalues,
						  NULL, 0);

	return selec;
}
コード例 #12
0
Datum geography_gist_join_selectivity(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	/* Oid operator = PG_GETARG_OID(1); */
	List *args = (List *) PG_GETARG_POINTER(2);
	JoinType jointype = (JoinType) PG_GETARG_INT16(3);

	Node *arg1, *arg2;
	Var *var1, *var2;
	Oid relid1, relid2;

	HeapTuple stats1_tuple, stats2_tuple;
	GEOG_STATS *geogstats1, *geogstats2;
	/*
	* These are to avoid casting the corresponding
	* "type-punned" pointers, which would break
	* "strict-aliasing rules".
	*/
	GEOG_STATS **gs1ptr=&geogstats1, **gs2ptr=&geogstats2;
	int geogstats1_nvalues = 0, geogstats2_nvalues = 0;
	float8 selectivity1 = 0.0, selectivity2 = 0.0;
	float4 num1_tuples = 0.0, num2_tuples = 0.0;
	float4 total_tuples = 0.0, rows_returned = 0.0;
	GBOX search_box;


	/**
	* Join selectivity algorithm. To calculation the selectivity we
	* calculate the intersection of the two column sample extents,
	* sum the results, and then multiply by two since for each
	* geometry in col 1 that intersects a geometry in col 2, the same
	* will also be true.
	*/

	POSTGIS_DEBUGF(3, "geography_gist_join_selectivity called with jointype %d", jointype);

	/*
	* We'll only respond to an inner join/unknown context join
	*/
	if (jointype != JOIN_INNER)
	{
		elog(NOTICE, "geography_gist_join_selectivity called with incorrect join type");
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	* Determine the oids of the geometry columns we are working with
	*/
	arg1 = (Node *) linitial(args);
	arg2 = (Node *) lsecond(args);

	if (!IsA(arg1, Var) || !IsA(arg2, Var))
	{
		elog(DEBUG1, "geography_gist_join_selectivity called with arguments that are not column references");
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	var1 = (Var *)arg1;
	var2 = (Var *)arg2;

	relid1 = getrelid(var1->varno, root->parse->rtable);
	relid2 = getrelid(var2->varno, root->parse->rtable);

	POSTGIS_DEBUGF(3, "Working with relations oids: %d %d", relid1, relid2);

	/* Read the stats tuple from the first column */
	stats1_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid1), Int16GetDatum(var1->varattno), 0, 0);
	if ( ! stats1_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	if ( ! get_attstatsslot(stats1_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gs1ptr, &geogstats1_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity");

		ReleaseSysCache(stats1_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}


	/* Read the stats tuple from the second column */
	stats2_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid2), Int16GetDatum(var2->varattno), 0, 0);
	if ( ! stats2_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default geometry join selectivity");

		free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
		ReleaseSysCache(stats1_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	if ( ! get_attstatsslot(stats2_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gs2ptr, &geogstats2_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geometry join selectivity");

		free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
		ReleaseSysCache(stats2_tuple);
		ReleaseSysCache(stats1_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}


	/**
	* Setup the search box - this is the intersection of the two column
	* extents.
	*/
	search_box.xmin = Max(geogstats1->xmin, geogstats2->xmin);
	search_box.ymin = Max(geogstats1->ymin, geogstats2->ymin);
	search_box.zmin = Max(geogstats1->zmin, geogstats2->zmin);
	search_box.xmax = Min(geogstats1->xmax, geogstats2->xmax);
	search_box.ymax = Min(geogstats1->ymax, geogstats2->ymax);
	search_box.zmax = Min(geogstats1->zmax, geogstats2->zmax);

	/* If the extents of the two columns don't intersect, return zero */
	if (search_box.xmin > search_box.xmax || search_box.ymin > search_box.ymax ||
	        search_box.zmin > search_box.zmax)
		PG_RETURN_FLOAT8(0.0);

	POSTGIS_DEBUGF(3, " -- geomstats1 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats1->xmin, geogstats1->ymin, geogstats1->zmin, geogstats1->xmax, geogstats1->ymax, geogstats1->zmax);
	POSTGIS_DEBUGF(3, " -- geomstats2 box: %.15g %.15g %.15g, %.15g %.15g %.15g", geogstats2->xmin, geogstats2->ymin, geogstats2->zmin, geogstats2->xmax, geogstats2->ymax, geogstats2->zmax);
	POSTGIS_DEBUGF(3, " -- calculated intersection box is : %.15g %.15g %.15g, %.15g %.15g %.15g", search_box.xmin, search_box.ymin, search_box.zmin, search_box.xmax, search_box.ymax, search_box.zmax);


	/* Do the selectivity */
	selectivity1 = estimate_selectivity(&search_box, geogstats1);
	selectivity2 = estimate_selectivity(&search_box, geogstats2);

	POSTGIS_DEBUGF(3, "selectivity1: %.15g   selectivity2: %.15g", selectivity1, selectivity2);

	/*
	* OK, so before we calculate the join selectivity we also need to
	* know the number of tuples in each of the columns since
	* estimate_selectivity returns the number of estimated tuples
	* divided by the total number of tuples.
	*/
	num1_tuples = geogstats1->totalrows;
	num2_tuples = geogstats2->totalrows;

	/* Free the statistic tuples */
	free_attstatsslot(0, NULL, 0, (float *)geogstats1, geogstats1_nvalues);
	ReleaseSysCache(stats1_tuple);

	free_attstatsslot(0, NULL, 0, (float *)geogstats2, geogstats2_nvalues);
	ReleaseSysCache(stats2_tuple);

	/*
	* Finally calculate the estimate of the number of rows returned
	*
	*    = 2 * (nrows from col1 + nrows from col2) /
	*	total nrows in col1 x total nrows in col2
	*
	* The factor of 2 accounts for the fact that for each tuple in
	* col 1 matching col 2,
	* there will be another match in col 2 matching col 1
	*/
	total_tuples = num1_tuples * num2_tuples;
	rows_returned = 2 * ((num1_tuples * selectivity1) + (num2_tuples * selectivity2));

	POSTGIS_DEBUGF(3, "Rows from rel1: %f", num1_tuples * selectivity1);
	POSTGIS_DEBUGF(3, "Rows from rel2: %f", num2_tuples * selectivity2);
	POSTGIS_DEBUGF(3, "Estimated rows returned: %f", rows_returned);

	/*
	* One (or both) tuple count is zero...
	* We return default selectivity estimate.
	* We could probably attempt at an estimate
	* w/out looking at tables tuple count, with
	* a function of selectivity1, selectivity2.
	*/
	if ( ! total_tuples )
	{
		POSTGIS_DEBUG(3, "Total tuples == 0, returning default join selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	if ( rows_returned > total_tuples )
		PG_RETURN_FLOAT8(1.0);

	PG_RETURN_FLOAT8(rows_returned / total_tuples);
}
コード例 #13
0
Datum geography_gist_selectivity(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	/* Oid operator = PG_GETARG_OID(1); */
	List *args = (List *) PG_GETARG_POINTER(2);
	/* int varRelid = PG_GETARG_INT32(3); */
	Oid relid;
	HeapTuple stats_tuple;
	GEOG_STATS *geogstats;
	/*
	 * This is to avoid casting the corresponding
	 * "type-punned" pointer, which would break
	 * "strict-aliasing rules".
	 */
	GEOG_STATS **gsptr=&geogstats;
	int geogstats_nvalues = 0;
	Node *other;
	Var *self;
	GBOX search_box;
	float8 selectivity = 0;

	POSTGIS_DEBUG(2, "geography_gist_selectivity called");

	/* Fail if not a binary opclause (probably shouldn't happen) */
	if (list_length(args) != 2)
	{
		POSTGIS_DEBUG(3, "geography_gist_selectivity: not a binary opclause");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	 * This selectivity function is invoked by a clause of the form <arg> && <arg>
	 *
	 * In typical usage, one argument will be a column reference, while the other will
	 * be a geography constant; set self to point to the column argument and other
	 * to point to the constant argument.
	 */
	other = (Node *) linitial(args);
	if ( ! IsA(other, Const) )
	{
		self = (Var *)other;
		other = (Node *) lsecond(args);
	}
	else
	{
		self = (Var *) lsecond(args);
	}

	if ( ! IsA(other, Const) )
	{
		POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	/*
	* We don't have a nice <const> && <var> or <var> && <const> 
	* situation here. <const> && <const> would probably get evaluated
	* away by PgSQL earlier on. <func> && <const> is harder, and the
	* case we get often is <const> && ST_Expand(<var>), which does 
	* actually have a subtly different selectivity than a bae
	* <const> && <var> call. It's calculatable though, by expanding
	* every cell in the histgram appropriately.
	* 
	* Discussion: http://trac.osgeo.org/postgis/ticket/1828
	*
	* To do? Do variable selectivity based on the <func> node.
	*/
	if ( ! IsA(self, Var) )
	{
		POSTGIS_DEBUG(3, " no bare variable argument ? - returning a moderate selectivity");
//		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
		PG_RETURN_FLOAT8(0.33333);
	}

	/* Convert coordinates to 3D geodesic */
	search_box.flags = 1;
	FLAGS_SET_GEODETIC(search_box.flags, 1);
	if ( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
	{
		POSTGIS_DEBUG(3, " search box cannot be calculated");
		PG_RETURN_FLOAT8(0.0);
	}

	POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g %.15g, %.15g %.15g %.15g",
	               search_box.xmin, search_box.ymin, search_box.zmin,
	               search_box.xmax, search_box.ymax, search_box.zmax);

	/*
	 * Get pg_statistic row
	 */
	relid = getrelid(self->varno, root->parse->rtable);

	stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0);
	if ( ! stats_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default estimate");

		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}


	if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOGRAPHY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gsptr, &geogstats_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOGRAPHY stats not found - returning default geography selectivity");

		ReleaseSysCache(stats_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOGRAPHY_SEL);
	}

	POSTGIS_DEBUGF(4, " %d read from stats", geogstats_nvalues);

	POSTGIS_DEBUGF(4, " histo: xmin,ymin,zmin: %f,%f,%f", geogstats->xmin, geogstats->ymin, geogstats->zmin);
	POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f,%f", geogstats->xmax, geogstats->ymax, geogstats->zmax);
	POSTGIS_DEBUGF(4, " histo: unitsx: %f", geogstats->unitsx);
	POSTGIS_DEBUGF(4, " histo: unitsy: %f", geogstats->unitsy);
	POSTGIS_DEBUGF(4, " histo: unitsz: %f", geogstats->unitsz);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCoverage: %f", geogstats->avgFeatureCoverage);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geogstats->avgFeatureCells);

	/*
	 * Do the estimation
	 */
	selectivity = estimate_selectivity(&search_box, geogstats);

	POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);

	free_attstatsslot(0, NULL, 0, (float *)geogstats, geogstats_nvalues);
	ReleaseSysCache(stats_tuple);
	PG_RETURN_FLOAT8(selectivity);
}
コード例 #14
0
Datum geometry_gist_sel_2d(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

	/* Oid operator = PG_GETARG_OID(1); */
	List *args = (List *) PG_GETARG_POINTER(2);
	/* int varRelid = PG_GETARG_INT32(3); */
	Oid relid;
	HeapTuple stats_tuple;
	GEOM_STATS *geomstats;
	/*
	 * This is to avoid casting the corresponding
	 * "type-punned" pointer, which would break
	 * "strict-aliasing rules".
	 */
	GEOM_STATS **gsptr=&geomstats;
	int geomstats_nvalues=0;
	Node *other;
	Var *self;
	GBOX search_box;
	float8 selectivity=0;

	POSTGIS_DEBUG(2, "geometry_gist_sel called");

	/* Fail if not a binary opclause (probably shouldn't happen) */
	if (list_length(args) != 2)
	{
		POSTGIS_DEBUG(3, "geometry_gist_sel: not a binary opclause");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}


	/*
	 * Find the constant part
	 */
	other = (Node *) linitial(args);
	if ( ! IsA(other, Const) )
	{
		self = (Var *)other;
		other = (Node *) lsecond(args);
	}
	else
	{
		self = (Var *) lsecond(args);
	}

	if ( ! IsA(other, Const) )
	{
		POSTGIS_DEBUG(3, " no constant arguments - returning default selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}

	/*
	 * We are working on two constants..
	 * TODO: check if expression is true,
	 *       returned set would be either
	 *       the whole or none.
	 */
	if ( ! IsA(self, Var) )
	{
		POSTGIS_DEBUG(3, " no variable argument ? - returning default selectivity");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}

	/*
	 * Convert the constant to a BOX
	 */

	if( ! gserialized_datum_get_gbox_p(((Const*)other)->constvalue, &search_box) )
	{
		POSTGIS_DEBUG(3, "search box is EMPTY");
		PG_RETURN_FLOAT8(0.0);
	}

	POSTGIS_DEBUGF(4, " requested search box is : %.15g %.15g, %.15g %.15g",search_box.xmin,search_box.ymin,search_box.xmax,search_box.ymax);

	/*
	 * Get pg_statistic row
	 */

	relid = getrelid(self->varno, root->parse->rtable);

	stats_tuple = SearchSysCache(STATRELATT, ObjectIdGetDatum(relid), Int16GetDatum(self->varattno), 0, 0);
	if ( ! stats_tuple )
	{
		POSTGIS_DEBUG(3, " No statistics, returning default estimate");

		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}


	if ( ! get_attstatsslot(stats_tuple, 0, 0, STATISTIC_KIND_GEOMETRY, InvalidOid, NULL, NULL,
#if POSTGIS_PGSQL_VERSION >= 85
	                        NULL,
#endif
	                        (float4 **)gsptr, &geomstats_nvalues) )
	{
		POSTGIS_DEBUG(3, " STATISTIC_KIND_GEOMETRY stats not found - returning default geometry selectivity");

		ReleaseSysCache(stats_tuple);
		PG_RETURN_FLOAT8(DEFAULT_GEOMETRY_SEL);
	}

	POSTGIS_DEBUGF(4, " %d read from stats", geomstats_nvalues);

	POSTGIS_DEBUGF(4, " histo: xmin,ymin: %f,%f",
	               geomstats->xmin, geomstats->ymin);
	POSTGIS_DEBUGF(4, " histo: xmax,ymax: %f,%f",
	               geomstats->xmax, geomstats->ymax);
	POSTGIS_DEBUGF(4, " histo: cols: %f", geomstats->rows);
	POSTGIS_DEBUGF(4, " histo: rows: %f", geomstats->cols);
	POSTGIS_DEBUGF(4, " histo: avgFeatureArea: %f", geomstats->avgFeatureArea);
	POSTGIS_DEBUGF(4, " histo: avgFeatureCells: %f", geomstats->avgFeatureCells);

	/*
	 * Do the estimation
	 */
	selectivity = estimate_selectivity(&search_box, geomstats);


	POSTGIS_DEBUGF(3, " returning computed value: %f", selectivity);

	free_attstatsslot(0, NULL, 0, (float *)geomstats, geomstats_nvalues);
	ReleaseSysCache(stats_tuple);
	PG_RETURN_FLOAT8(selectivity);

}