/*
 * Join selectivity estimation for the subnet inclusion/overlap operators
 *
 * This function has the same structure as eqjoinsel() in selfuncs.c.
 *
 * Throughout networkjoinsel and its subroutines, we have a performance issue
 * in that the amount of work to be done is O(N^2) in the length of the MCV
 * and histogram arrays.  To keep the runtime from getting out of hand when
 * large statistics targets have been set, we arbitrarily limit the number of
 * values considered to 1024 (MAX_CONSIDERED_ELEMS).  For the MCV arrays, this
 * is easy: just consider at most the first N elements.  (Since the MCVs are
 * sorted by decreasing frequency, this correctly gets us the first N MCVs.)
 * For the histogram arrays, we decimate; that is consider only every k'th
 * element, where k is chosen so that no more than MAX_CONSIDERED_ELEMS
 * elements are considered.  This should still give us a good random sample of
 * the non-MCV population.  Decimation is done on-the-fly in the loops that
 * iterate over the histogram arrays.
 */
Datum
networkjoinsel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
#ifdef NOT_USED
	JoinType	jointype = (JoinType) PG_GETARG_INT16(3);
#endif
	SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
	double		selec;
	VariableStatData vardata1;
	VariableStatData vardata2;
	bool		join_is_reversed;

	get_join_variables(root, args, sjinfo,
					   &vardata1, &vardata2, &join_is_reversed);

	switch (sjinfo->jointype)
	{
		case JOIN_INNER:
		case JOIN_LEFT:
		case JOIN_FULL:

			/*
			 * Selectivity for left/full join is not exactly the same as inner
			 * join, but we neglect the difference, as eqjoinsel does.
			 */
			selec = networkjoinsel_inner(operator, &vardata1, &vardata2);
			break;
		case JOIN_SEMI:
		case JOIN_ANTI:
			/* Here, it's important that we pass the outer var on the left. */
			if (!join_is_reversed)
				selec = networkjoinsel_semi(operator, &vardata1, &vardata2);
			else
				selec = networkjoinsel_semi(get_commutator(operator),
											&vardata2, &vardata1);
			break;
		default:
			/* other values not expected here */
			elog(ERROR, "unrecognized join type: %d",
				 (int) sjinfo->jointype);
			selec = 0;			/* keep compiler quiet */
			break;
	}

	ReleaseVariableStats(vardata1);
	ReleaseVariableStats(vardata2);

	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}
Exemplo n.º 2
0
/*
 * rangesel -- restriction selectivity for range operators
 */
Datum
rangesel(PG_FUNCTION_ARGS)
{
	PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
	Oid			operator = PG_GETARG_OID(1);
	List	   *args = (List *) PG_GETARG_POINTER(2);
	int			varRelid = PG_GETARG_INT32(3);
	VariableStatData vardata;
	Node	   *other;
	bool		varonleft;
	Selectivity selec;
	TypeCacheEntry *typcache = NULL;
	RangeType  *constrange = NULL;

	/*
	 * If expression is not (variable op something) or (something op
	 * variable), then punt and return a default estimate.
	 */
	if (!get_restriction_variable(root, args, varRelid,
								  &vardata, &other, &varonleft))
		PG_RETURN_FLOAT8(default_range_selectivity(operator));

	/*
	 * Can't do anything useful if the something is not a constant, either.
	 */
	if (!IsA(other, Const))
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(default_range_selectivity(operator));
	}

	/*
	 * All the range operators are strict, so we can cope with a NULL constant
	 * right away.
	 */
	if (((Const *) other)->constisnull)
	{
		ReleaseVariableStats(vardata);
		PG_RETURN_FLOAT8(0.0);
	}

	/*
	 * If var is on the right, commute the operator, so that we can assume the
	 * var is on the left in what follows.
	 */
	if (!varonleft)
	{
		/* we have other Op var, commute to make var Op other */
		operator = get_commutator(operator);
		if (!operator)
		{
			/* Use default selectivity (should we raise an error instead?) */
			ReleaseVariableStats(vardata);
			PG_RETURN_FLOAT8(default_range_selectivity(operator));
		}
	}

	/*
	 * OK, there's a Var and a Const we're dealing with here.  We need the
	 * Const to be of same range type as the column, else we can't do anything
	 * useful. (Such cases will likely fail at runtime, but here we'd rather
	 * just return a default estimate.)
	 *
	 * If the operator is "range @> element", the constant should be of the
	 * element type of the range column. Convert it to a range that includes
	 * only that single point, so that we don't need special handling for that
	 * in what follows.
	 */
	if (operator == OID_RANGE_CONTAINS_ELEM_OP)
	{
		typcache = range_get_typcache(fcinfo, vardata.vartype);

		if (((Const *) other)->consttype == typcache->rngelemtype->type_id)
		{
			RangeBound	lower,
						upper;

			lower.inclusive = true;
			lower.val = ((Const *) other)->constvalue;
			lower.infinite = false;
			lower.lower = true;
			upper.inclusive = true;
			upper.val = ((Const *) other)->constvalue;
			upper.infinite = false;
			upper.lower = false;
			constrange = range_serialize(typcache, &lower, &upper, false);
		}
	}
	else if (operator == OID_RANGE_ELEM_CONTAINED_OP)
	{
		/*
		 * Here, the Var is the elem, not the range.  For now we just punt and
		 * return the default estimate.  In future we could disassemble the
		 * range constant and apply scalarineqsel ...
		 */
	}
	else if (((Const *) other)->consttype == vardata.vartype)
	{
		/* Both sides are the same range type */
		typcache = range_get_typcache(fcinfo, vardata.vartype);

		constrange = DatumGetRangeType(((Const *) other)->constvalue);
	}

	/*
	 * If we got a valid constant on one side of the operator, proceed to
	 * estimate using statistics. Otherwise punt and return a default constant
	 * estimate.  Note that calc_rangesel need not handle
	 * OID_RANGE_ELEM_CONTAINED_OP.
	 */
	if (constrange)
		selec = calc_rangesel(typcache, &vardata, constrange, operator);
	else
		selec = default_range_selectivity(operator);

	ReleaseVariableStats(vardata);

	CLAMP_PROBABILITY(selec);

	PG_RETURN_FLOAT8((float8) selec);
}