示例#1
0
/*
 * arraycontsel -- restriction selectivity for array @>, &&, <@ operators
 */
Datum
arraycontsel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec;
	Oid			element_typeid;

	/*
	 * If expression is not (variable op something) or (something op
	 * variable), then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	/*
	 * The "&&", "@>" and "<@" operators are strict, so we can cope with a
	 * NULL constant right away.
	 */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	/*
	 * If var is on the right, commute the operator, so that we can assume the
	 * var is on the left in what follows.
	 */
	if (!varonleft)
	{
		if (operator == OID_ARRAY_CONTAINS_OP)
			operator = OID_ARRAY_CONTAINED_OP;
		else if (operator == OID_ARRAY_CONTAINED_OP)
			operator = OID_ARRAY_CONTAINS_OP;
	}

	/*
	 * OK, there's a Var and a Const we're dealing with here.  We need the
	 * Const to be an array with same element type as column, else we can't do
	 * anything useful.  (Such cases will likely fail at runtime, but here
	 * we'd rather just return a default estimate.)
	 */
	element_typeid = get_base_element_type(((Const *) other)->consttype);
	if (element_typeid != InvalidOid &&
		element_typeid == get_base_element_type(vardata.vartype))
	{
		selec = calc_arraycontsel(&vardata, ((Const *) other)->constvalue,
								  element_typeid, operator);
	}
	else
	{
		selec = DEFAULT_SEL(operator);
	}

	ReleaseVariableStats(vardata);

	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}
示例#2
0
/*
 *	ltreeparentsel - Selectivity of parent relationship for ltree data types.
 */
Datum
ltreeparentsel(PG_FUNCTION_ARGS)
{
    PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
    Oid			operator = PG_GETARG_OID(1);
    List	   *args = (List *) PG_GETARG_POINTER(2);
    int			varRelid = PG_GETARG_INT32(3);
    VariableStatData vardata;
    Node	   *other;
    bool		varonleft;
    double		selec;

    /*
     * If expression is not variable <@ something or something <@ variable,
     * then punt and return a default estimate.
     */
    if (!get_restriction_variable(root, args, varRelid,
                                  &vardata, &other, &varonleft))
        PG_RETURN_FLOAT8(DEFAULT_PARENT_SEL);

    /*
     * If the something is a NULL constant, assume operator is strict and
     * return zero, ie, operator will never return TRUE.
     */
    if (IsA(other, Const) &&
            ((Const *) other)->constisnull)
    {
        ReleaseVariableStats(vardata);
        PG_RETURN_FLOAT8(0.0);
    }

    if (IsA(other, Const))
    {
        /* Variable is being compared to a known non-null constant */
        Datum		constval = ((Const *) other)->constvalue;
        FmgrInfo	contproc;
        double		mcvsum;
        double		mcvsel;
        double		nullfrac;
        int			hist_size;

        fmgr_info(get_opcode(operator), &contproc);

        /*
         * Is the constant "<@" to any of the column's most common values?
         */
        mcvsel = mcv_selectivity(&vardata, &contproc, constval, varonleft,
                                 &mcvsum);

        /*
         * If the histogram is large enough, see what fraction of it the
         * constant is "<@" to, and assume that's representative of the
         * non-MCV population.	Otherwise use the default selectivity for the
         * non-MCV population.
         */
        selec = histogram_selectivity(&vardata, &contproc,
                                      constval, varonleft,
                                      10, 1, &hist_size);
        if (selec < 0)
        {
            /* Nope, fall back on default */
            selec = DEFAULT_PARENT_SEL;
        }
        else if (hist_size < 100)
        {
            /*
             * For histogram sizes from 10 to 100, we combine the histogram
             * and default selectivities, putting increasingly more trust in
             * the histogram for larger sizes.
             */
            double		hist_weight = hist_size / 100.0;

            selec = selec * hist_weight +
                    DEFAULT_PARENT_SEL * (1.0 - hist_weight);
        }

        /* In any case, don't believe extremely small or large estimates. */
        if (selec < 0.0001)
            selec = 0.0001;
        else if (selec > 0.9999)
            selec = 0.9999;

        if (HeapTupleIsValid(vardata.statsTuple))
            nullfrac = ((Form_pg_statistic) GETSTRUCT(vardata.statsTuple))->stanullfrac;
        else
            nullfrac = 0.0;

        /*
         * Now merge the results from the MCV and histogram calculations,
         * realizing that the histogram covers only the non-null values that
         * are not listed in MCV.
         */
        selec *= 1.0 - nullfrac - mcvsum;
        selec += mcvsel;
    }
    else
        selec = DEFAULT_PARENT_SEL;

    ReleaseVariableStats(vardata);

    /* result should be in range, but make sure... */
    CLAMP_PROBABILITY(selec);

    PG_RETURN_FLOAT8((float8) selec);
}
/*
 * rangesel -- restriction selectivity for range operators
 */
Datum
rangesel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec;
	TypeCacheEntry *typcache = NULL;
	RangeType  *constrange = NULL;

	/*
	 * If expression is not (variable op something) or (something op
	 * variable), then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(default_range_selectivity(operator));

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(default_range_selectivity(operator));
	}

	/*
	 * All the range operators are strict, so we can cope with a NULL constant
	 * right away.
	 */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	/*
	 * If var is on the right, commute the operator, so that we can assume the
	 * var is on the left in what follows.
	 */
	if (!varonleft)
	{
		/* we have other Op var, commute to make var Op other */
		operator = get_commutator(operator);
		if (!operator)
		{
			/* Use default selectivity (should we raise an error instead?) */
			ReleaseVariableStats(vardata);
			PG_RETURN_FLOAT8(default_range_selectivity(operator));
		}
	}

	/*
	 * OK, there's a Var and a Const we're dealing with here.  We need the
	 * Const to be of same range type as the column, else we can't do anything
	 * useful. (Such cases will likely fail at runtime, but here we'd rather
	 * just return a default estimate.)
	 *
	 * If the operator is "range @> element", the constant should be of the
	 * element type of the range column. Convert it to a range that includes
	 * only that single point, so that we don't need special handling for that
	 * in what follows.
	 */
	if (operator == OID_RANGE_CONTAINS_ELEM_OP)
	{
		typcache = range_get_typcache(fcinfo, vardata.vartype);

		if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
		{
			RangeBound	lower,
						upper;

			lower.inclusive = true;
			lower.val = ((Const *) other)->constvalue;
			lower.infinite = false;
			lower.lower = true;
			upper.inclusive = true;
			upper.val = ((Const *) other)->constvalue;
			upper.infinite = false;
			upper.lower = false;
			constrange = range_serialize(typcache, &lower, &upper, false);
		}
	}
	else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
	{
		/*
		 * Here, the Var is the elem, not the range.  For now we just punt and
		 * return the default estimate.  In future we could disassemble the
		 * range constant and apply scalarineqsel ...
		 */
	}
	else if (((Const *) other)->consttype == vardata.vartype)
	{
		/* Both sides are the same range type */
		typcache = range_get_typcache(fcinfo, vardata.vartype);

		constrange = DatumGetRangeType(((Const *) other)->constvalue);
	}

	/*
	 * If we got a valid constant on one side of the operator, proceed to
	 * estimate using statistics. Otherwise punt and return a default constant
	 * estimate.  Note that calc_rangesel need not handle
	 * OID_RANGE_ELEM_CONTAINED_OP.
	 */
	if (constrange)
		selec = calc_rangesel(typcache, &vardata, constrange, operator);
	else
		selec = default_range_selectivity(operator);

	ReleaseVariableStats(vardata);

	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}
示例#4
0
/*
 * _int_matchsel -- restriction selectivity function for intarray @@ query_int
 */
Datum
_int_matchsel(PG_FUNCTION_ARGS)
{
    PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

    List	   *args = (List *) PG_GETARG_POINTER(2);
    int			varRelid = PG_GETARG_INT32(3);
    VariableStatData vardata;
    Node	   *other;
    bool		varonleft;
    Selectivity selec;
    QUERYTYPE  *query;
    Datum	   *mcelems = NULL;
    float4	   *mcefreqs = NULL;
    int			nmcelems = 0;
    float4		minfreq = 0.0;
    float4		nullfrac = 0.0;
    Form_pg_statistic stats;
    Datum	   *values = NULL;
    int			nvalues = 0;
    float4	   *numbers = NULL;
    int			nnumbers = 0;

    /*
     * If expression is not "variable @@ something" or "something @@ variable"
     * then punt and return a default estimate.
     */
    if (!get_restriction_variable(root, args, varRelid,
                                  &vardata, &other, &varonleft))
        PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);

    /*
     * Variable should be int[]. We don't support cases where variable is
     * query_int.
     */
    if (vardata.vartype != INT4ARRAYOID)
        PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);

    /*
     * Can't do anything useful if the something is not a constant, either.
     */
    if (!IsA(other, Const))
    {
        ReleaseVariableStats(vardata);
        PG_RETURN_FLOAT8(DEFAULT_EQ_SEL);
    }

    /*
     * The "@@" operator is strict, so we can cope with NULL right away.
     */
    if (((Const *) other)->constisnull)
    {
        ReleaseVariableStats(vardata);
        PG_RETURN_FLOAT8(0.0);
    }

    /* The caller made sure the const is a query, so get it now */
    query = DatumGetQueryTypeP(((Const *) other)->constvalue);

    /* Empty query matches nothing */
    if (query->size == 0)
    {
        ReleaseVariableStats(vardata);
        return (Selectivity) 0.0;
    }

    /*
     * Get the statistics for the intarray column.
     *
     * We're interested in the Most-Common-Elements list, and the NULL
     * fraction.
     */
    if (HeapTupleIsValid(vardata.statsTuple))
    {
        stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
        nullfrac = stats->stanullfrac;

        /*
         * For an int4 array, the default array type analyze function will
         * collect a Most Common Elements list, which is an array of int4s.
         */
        if (get_attstatsslot(vardata.statsTuple,
                             INT4OID, -1,
                             STATISTIC_KIND_MCELEM, InvalidOid,
                             NULL,
                             &values, &nvalues,
                             &numbers, &nnumbers))
        {
            /*
             * There should be three more Numbers than Values, because the
             * last three (for intarray) cells are taken for minimal, maximal
             * and nulls frequency. Punt if not.
             */
            if (nnumbers == nvalues + 3)
            {
                /* Grab the lowest frequency. */
                minfreq = numbers[nnumbers - (nnumbers - nvalues)];

                mcelems = values;
                mcefreqs = numbers;
                nmcelems = nvalues;
            }
        }
    }

    /* Process the logical expression in the query, using the stats */
    selec = int_query_opr_selec(GETQUERY(query) + query->size - 1,
                                mcelems, mcefreqs, nmcelems, minfreq);

    /* MCE stats count only non-null rows, so adjust for null rows. */
    selec *= (1.0 - nullfrac);

    free_attstatsslot(INT4OID, values, nvalues, numbers, nnumbers);
    ReleaseVariableStats(vardata);

    CLAMP_PROBABILITY(selec);

    PG_RETURN_FLOAT8((float8) selec);
}
示例#5
0
/*
 *	tsmatchsel -- Selectivity of "@@"
 *
 * restriction selectivity function for tsvector @@ tsquery and
 * tsquery @@ tsvector
 */
Datum
tsmatchsel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);

#ifdef NOT_USED
	Oid			operator = PG_GETARG_OID(1);
#endif
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec;

	/*
	 * If expression is not variable = something or something = variable, then
	 * punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_TS_MATCH_SEL);
	}

	/*
	 * The "@@" operator is strict, so we can cope with NULL right away
	 */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	/*
	 * OK, there's a Var and a Const we're dealing with here.  We need the
	 * Const to be a TSQuery, else we can't do anything useful.  We have to
	 * check this because the Var might be the TSQuery not the TSVector.
	 */
	if (((Const *) other)->consttype == TSQUERYOID)
	{
		/* tsvector @@ tsquery or the other way around */
		Assert(vardata.vartype == TSVECTOROID);

		selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
	}
	else
	{
		/* If we can't see the query structure, must punt */
		selec = DEFAULT_TS_MATCH_SEL;
	}

	ReleaseVariableStats(vardata);

	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}
/*
 * Selectivity estimation for the subnet inclusion/overlap operators
 */
Datum
networksel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec,
				mcv_selec,
				non_mcv_selec;
	Datum		constvalue,
			   *hist_values;
	int			hist_nvalues;
	Form_pg_statistic stats;
	double		sumcommon,
				nullfrac;
	FmgrInfo	proc;

	/*
	 * If expression is not (variable op something) or (something op
	 * variable), then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	/* All of the operators handled here are strict. */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}
	constvalue = ((Const *) other)->constvalue;

	/* Otherwise, we need stats in order to produce a non-default estimate. */
	if (!HeapTupleIsValid(vardata.statsTuple))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(DEFAULT_SEL(operator));
	}

	stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
	nullfrac = stats->stanullfrac;

	/*
	 * If we have most-common-values info, add up the fractions of the MCV
	 * entries that satisfy MCV OP CONST.  These fractions contribute directly
	 * to the result selectivity.  Also add up the total fraction represented
	 * by MCV entries.
	 */
	fmgr_info(get_opcode(operator), &proc);
	mcv_selec = mcv_selectivity(&vardata, &proc, constvalue, varonleft,
								&sumcommon);

	/*
	 * If we have a histogram, use it to estimate the proportion of the
	 * non-MCV population that satisfies the clause.  If we don't, apply the
	 * default selectivity to that population.
	 */
	if (get_attstatsslot(vardata.statsTuple,
						 vardata.atttype, vardata.atttypmod,
						 STATISTIC_KIND_HISTOGRAM, InvalidOid,
						 NULL,
						 &hist_values, &hist_nvalues,
						 NULL, NULL))
	{
		int			opr_codenum = inet_opr_codenum(operator);

		/* Commute if needed, so we can consider histogram to be on the left */
		if (!varonleft)
			opr_codenum = -opr_codenum;
		non_mcv_selec = inet_hist_value_sel(hist_values, hist_nvalues,
											constvalue, opr_codenum);

		free_attstatsslot(vardata.atttype, hist_values, hist_nvalues, NULL, 0);
	}
	else
		non_mcv_selec = DEFAULT_SEL(operator);

	/* Combine selectivities for MCV and non-MCV populations */
	selec = mcv_selec + (1.0 - nullfrac - sumcommon) * non_mcv_selec;

	/* Result should be in range, but make sure... */
	CLAMP_PROBABILITY(selec);

	ReleaseVariableStats(vardata);

	PG_RETURN_FLOAT8(selec);
}