/*
 * have_relevant_joinclause
 *		Detect whether there is a joinclause that can be used to join
 *		the two given relations.
 */
bool
have_relevant_joinclause(PlannerInfo *root,
                         RelOptInfo *rel1, RelOptInfo *rel2)
{
    bool		result = false;
    Relids		join_relids;
    List	   *joininfo;
    ListCell   *l;

    join_relids = bms_union(rel1->relids, rel2->relids);

    /*
     * We could scan either relation's joininfo list; may as well use the
     * shorter one.
     */
    if (list_length(rel1->joininfo) <= list_length(rel2->joininfo))
        joininfo = rel1->joininfo;
    else
        joininfo = rel2->joininfo;

    foreach(l, joininfo)
    {
        RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);

        if (bms_is_subset(rinfo->required_relids, join_relids))
        {
            result = true;
            break;
        }
    }
Пример #2
0
/*
 * clause_sides_match_join
 *	  Determine whether a join clause is of the right form to use in this join.
 *
 * We already know that the clause is a binary opclause referencing only the
 * rels in the current join.  The point here is to check whether it has the
 * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
 * rather than mixing outer and inner vars on either side.	If it matches,
 * we set the transient flag outer_is_left to identify which side is which.
 */
static inline bool
clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
						RelOptInfo *innerrel)
{
	if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
		bms_is_subset(rinfo->right_relids, innerrel->relids))
	{
		/* lefthand side is outer */
		rinfo->outer_is_left = true;
		return true;
	}
	else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
			 bms_is_subset(rinfo->right_relids, outerrel->relids))
	{
		/* righthand side is outer */
		rinfo->outer_is_left = false;
		return true;
	}
	return false;				/* no good for these input relations */
}
Пример #3
0
/*
 * Does the supplied GpPolicy support unique indexing on the specified
 * attributes?
 *
 * If the table is distributed randomly, no unique indexing is supported.
 * Otherwise, the set of columns being indexed should be a superset of the
 * policy.
 *
 * If the proposed index does not match the distribution policy but the relation
 * is empty and does not have a primary key or unique index, update the
 * distribution policy to match the index definition (MPP-101), as long as it
 * doesn't contain expressions.
 */
void
checkPolicyForUniqueIndex(Relation rel, AttrNumber *indattr, int nidxatts,
			 			  bool isprimary, bool has_exprs, bool has_pkey,
						  bool has_ukey)
{
	Bitmapset *polbm = NULL;
	Bitmapset *indbm = NULL;
	int i;
	GpPolicy *pol = rel->rd_cdbpolicy;

	/* 
	 * Firstly, unique/primary key indexes aren't supported if we're
	 * distributing randomly.
	 */
	if (GpPolicyIsRandomly(pol))
	{
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
				 errmsg("%s and DISTRIBUTED RANDOMLY are incompatible",
						isprimary ? "PRIMARY KEY" : "UNIQUE")));
	}

	/* 
	 * We use bitmaps to make intersection tests easier. As noted, order is
	 * not relevant so looping is just painful.
	 */
	for (i = 0; i < pol->nattrs; i++)
		polbm = bms_add_member(polbm, pol->attrs[i]);
	for (i = 0; i < nidxatts; i++)
	{
		if (indattr[i] < 0)
        	ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
					 errmsg("cannot create %s on system column",
							isprimary ? "primary key" : "unique index")));

		indbm = bms_add_member(indbm, indattr[i]);
	}

	Assert(bms_membership(polbm) != BMS_EMPTY_SET);
	Assert(bms_membership(indbm) != BMS_EMPTY_SET);

	/* 
	 * If the existing policy is not a subset, we must either error out or
	 * update the distribution policy. It might be tempting to say that even
	 * when the policy is a subset, we should update it to match the index
	 * definition. The problem then is that if the user actually wants to
	 * distribution on (a, b) but then creates an index on (a, b, c) we'll
	 * change the policy underneath them.
	 *
	 * What is really needed is a new field in gp_distribution_policy telling us
	 * if the policy has been explicitly set.
	 */
	if (!bms_is_subset(polbm, indbm))
	{
		if (cdbRelSize(rel) != 0 || has_pkey || has_ukey || has_exprs)
		{
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
					 errmsg("%s must contain all columns in the "
							"distribution key of relation \"%s\"",
							isprimary ? "PRIMARY KEY" : "UNIQUE index",
						RelationGetRelationName(rel))));
		}
		else
		{
			/* update policy since table is not populated yet. See MPP-101 */
			GpPolicy *policy = palloc(sizeof(GpPolicy) + 
									  (sizeof(AttrNumber) * nidxatts));
			policy->ptype = POLICYTYPE_PARTITIONED;
			policy->nattrs = 0;
			for (i = 0; i < nidxatts; i++)
				policy->attrs[policy->nattrs++] = indattr[i];	

			GpPolicyReplace(rel->rd_id, policy);

			if (isprimary)
				elog(NOTICE, "updating distribution policy to match new primary key");
			else
				elog(NOTICE, "updating distribution policy to match new unique index");
		}
	}
}
Пример #4
0
/*
 * join_is_removable
 *	  Determine whether we need not perform the join at all, because
 *	  it will just duplicate its left input.
 *
 * This is true for a left join for which the join condition cannot match
 * more than one inner-side row.  (There are other possibly interesting
 * cases, but we don't have the infrastructure to prove them.)
 *
 * Note: there is no need to consider the symmetrical case of duplicating the
 * right input, because add_paths_to_joinrel() will be called with each rel
 * on the outer side.
 */
static bool
join_is_removable(PlannerInfo *root,
				  RelOptInfo *joinrel,
				  RelOptInfo *outerrel,
				  RelOptInfo *innerrel,
				  List *restrictlist,
				  JoinType jointype)
{
	List	   *clause_list = NIL;
	ListCell   *l;
	int			attroff;

	/*
	 * Currently, we only know how to remove left joins to a baserel with
	 * unique indexes.	We can check most of these criteria pretty trivially
	 * to avoid doing useless extra work.  But checking whether any of the
	 * indexes are unique would require iterating over the indexlist, so for
	 * now we just make sure there are indexes of some sort or other.  If none
	 * of them are unique, join removal will still fail, just slightly later.
	 */
	if (jointype != JOIN_LEFT ||
		innerrel->reloptkind == RELOPT_JOINREL ||
		innerrel->rtekind != RTE_RELATION ||
		innerrel->indexlist == NIL)
		return false;

	/*
	 * We can't remove the join if any inner-rel attributes are used above the
	 * join.
	 *
	 * Note that this test only detects use of inner-rel attributes in higher
	 * join conditions and the target list.  There might be such attributes in
	 * pushed-down conditions at this join, too.  We check that case below.
	 *
	 * As a micro-optimization, it seems better to start with max_attr and
	 * count down rather than starting with min_attr and counting up, on the
	 * theory that the system attributes are somewhat less likely to be wanted
	 * and should be tested last.
	 */
	for (attroff = innerrel->max_attr - innerrel->min_attr;
		 attroff >= 0;
		 attroff--)
	{
		if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids))
			return false;
	}

	/*
	 * Search for mergejoinable clauses that constrain the inner rel against
	 * either the outer rel or a pseudoconstant.  If an operator is
	 * mergejoinable then it behaves like equality for some btree opclass, so
	 * it's what we want.  The mergejoinability test also eliminates clauses
	 * containing volatile functions, which we couldn't depend on.
	 */
	foreach(l, restrictlist)
	{
		RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);

		/*
		 * If we find a pushed-down clause, it must have come from above the
		 * outer join and it must contain references to the inner rel.	(If it
		 * had only outer-rel variables, it'd have been pushed down into the
		 * outer rel.)	Therefore, we can conclude that join removal is unsafe
		 * without any examination of the clause contents.
		 */
		if (restrictinfo->is_pushed_down)
			return false;

		/* Ignore if it's not a mergejoinable clause */
		if (!restrictinfo->can_join ||
			restrictinfo->mergeopfamilies == NIL)
			continue;			/* not mergejoinable */

		/*
		 * Check if clause has the form "outer op inner" or "inner op outer".
		 */
		if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
			continue;			/* no good for these input relations */

		/* OK, add to list */
		clause_list = lappend(clause_list, restrictinfo);
	}
Пример #5
0
/*
 * make_join_rel
 *	   Find or create a join RelOptInfo that represents the join of
 *	   the two given rels, and add to it path information for paths
 *	   created with the two rels as outer and inner rel.
 *	   (The join rel may already contain paths generated from other
 *	   pairs of rels that add up to the same set of base rels.)
 *
 * NB: will return NULL if attempted join is not valid.  This can happen
 * when working with outer joins, or with IN or EXISTS clauses that have been
 * turned into joins.
 */
RelOptInfo *
make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
{
	Relids		joinrelids;
	SpecialJoinInfo *sjinfo;
	bool		reversed;
	SpecialJoinInfo sjinfo_data;
	RelOptInfo *joinrel;
	List	   *restrictlist;

	/* We should never try to join two overlapping sets of rels. */
	Assert(!bms_overlap(rel1->relids, rel2->relids));

	/* Construct Relids set that identifies the joinrel. */
	joinrelids = bms_union(rel1->relids, rel2->relids);

	/* Check validity and determine join type. */
	if (!join_is_legal(root, rel1, rel2, joinrelids,
					   &sjinfo, &reversed))
	{
		/* invalid join path */
		bms_free(joinrelids);
		return NULL;
	}

	/* Swap rels if needed to match the join info. */
	if (reversed)
	{
		RelOptInfo *trel = rel1;

		rel1 = rel2;
		rel2 = trel;
	}

	/*
	 * If it's a plain inner join, then we won't have found anything in
	 * join_info_list.  Make up a SpecialJoinInfo so that selectivity
	 * estimation functions will know what's being joined.
	 */
	if (sjinfo == NULL)
	{
		sjinfo = &sjinfo_data;
		sjinfo->type = T_SpecialJoinInfo;
		sjinfo->min_lefthand = rel1->relids;
		sjinfo->min_righthand = rel2->relids;
		sjinfo->syn_lefthand = rel1->relids;
		sjinfo->syn_righthand = rel2->relids;
		sjinfo->jointype = JOIN_INNER;
		/* we don't bother trying to make the remaining fields valid */
		sjinfo->lhs_strict = false;
		sjinfo->delay_upper_joins = false;
		sjinfo->semi_can_btree = false;
		sjinfo->semi_can_hash = false;
		sjinfo->semi_operators = NIL;
		sjinfo->semi_rhs_exprs = NIL;
	}

	/*
	 * Find or build the join RelOptInfo, and compute the restrictlist that
	 * goes with this particular joining.
	 */
	joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
							 &restrictlist);

	/* !!! START: HERE IS THE PART WHICH ADDED FOR PG_HINT_PLAN !!! */
	{
		RowsHint   *rows_hint = NULL;
		int			i;
		RowsHint   *justforme = NULL;
		RowsHint   *domultiply = NULL;

		/* Search for applicable rows hint for this join node */
		for (i = 0; i < current_hint->num_hints[HINT_TYPE_ROWS]; i++)
		{
			rows_hint = current_hint->rows_hints[i];

			/*
			 * Skip this rows_hint if it is invalid from the first or it
			 * doesn't target any join rels.
			 */
			if (!rows_hint->joinrelids ||
				rows_hint->base.state == HINT_STATE_ERROR)
				continue;

			if (bms_equal(joinrelids, rows_hint->joinrelids))
			{
				/*
				 * This joinrel is just the target of this rows_hint, so tweak
				 * rows estimation according to the hint.
				 */
				justforme = rows_hint;
			}
			else if (!(bms_is_subset(rows_hint->joinrelids, rel1->relids) ||
					   bms_is_subset(rows_hint->joinrelids, rel2->relids)) &&
					 bms_is_subset(rows_hint->joinrelids, joinrelids) &&
					 rows_hint->value_type == RVT_MULTI)
			{
				/*
				 * If the rows_hint's target relids is not a subset of both of
				 * component rels and is a subset of this joinrel, ths hint's
				 * targets spread over both component rels. This menas that
				 * this hint has been never applied so far and this joinrel is
				 * the first (and only) chance to fire in current join tree.
				 * Only the multiplication hint has the cumulative nature so we
				 * apply only RVT_MULTI in this way.
				 */
				domultiply = rows_hint;
			}
		}

		if (justforme)
		{
			/*
			 * If a hint just for me is found, no other adjust method is
			 * useles, but this cannot be more than twice becuase this joinrel
			 * is already adjusted by this hint.
			 */
			if (justforme->base.state == HINT_STATE_NOTUSED)
				joinrel->rows = adjust_rows(joinrel->rows, justforme);
		}
		else
		{
			if (domultiply)
			{
				/*
				 * If we have multiple routes up to this joinrel which are not
				 * applicable this hint, this multiply hint will applied more
				 * than twice. But there's no means to know of that,
				 * re-estimate the row number of this joinrel always just
				 * before applying the hint. This is a bit different from
				 * normal planner behavior but it doesn't harm so much.
				 */
				set_joinrel_size_estimates(root, joinrel, rel1, rel2, sjinfo,
										   restrictlist);
				
				joinrel->rows = adjust_rows(joinrel->rows, domultiply);
			}
			
		}
	}
	/* !!! END: HERE IS THE PART WHICH ADDED FOR PG_HINT_PLAN !!! */

	/*
	 * If we've already proven this join is empty, we needn't consider any
	 * more paths for it.
	 */
	if (is_dummy_rel(joinrel))
	{
		bms_free(joinrelids);
		return joinrel;
	}

	/*
	 * Consider paths using each rel as both outer and inner.  Depending on
	 * the join type, a provably empty outer or inner rel might mean the join
	 * is provably empty too; in which case throw away any previously computed
	 * paths and mark the join as dummy.  (We do it this way since it's
	 * conceivable that dummy-ness of a multi-element join might only be
	 * noticeable for certain construction paths.)
	 *
	 * Also, a provably constant-false join restriction typically means that
	 * we can skip evaluating one or both sides of the join.  We do this by
	 * marking the appropriate rel as dummy.  For outer joins, a
	 * constant-false restriction that is pushed down still means the whole
	 * join is dummy, while a non-pushed-down one means that no inner rows
	 * will join so we can treat the inner rel as dummy.
	 *
	 * We need only consider the jointypes that appear in join_info_list, plus
	 * JOIN_INNER.
	 */
	switch (sjinfo->jointype)
	{
		case JOIN_INNER:
			if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
				restriction_is_constant_false(restrictlist, false))
			{
				mark_dummy_rel(joinrel);
				break;
			}
			add_paths_to_joinrel(root, joinrel, rel1, rel2,
								 JOIN_INNER, sjinfo,
								 restrictlist);
			add_paths_to_joinrel(root, joinrel, rel2, rel1,
								 JOIN_INNER, sjinfo,
								 restrictlist);
			break;
		case JOIN_LEFT:
			if (is_dummy_rel(rel1) ||
				restriction_is_constant_false(restrictlist, true))
			{
				mark_dummy_rel(joinrel);
				break;
			}
			if (restriction_is_constant_false(restrictlist, false) &&
				bms_is_subset(rel2->relids, sjinfo->syn_righthand))
				mark_dummy_rel(rel2);
			add_paths_to_joinrel(root, joinrel, rel1, rel2,
								 JOIN_LEFT, sjinfo,
								 restrictlist);
			add_paths_to_joinrel(root, joinrel, rel2, rel1,
								 JOIN_RIGHT, sjinfo,
								 restrictlist);
			break;
		case JOIN_FULL:
			if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
				restriction_is_constant_false(restrictlist, true))
			{
				mark_dummy_rel(joinrel);
				break;
			}
			add_paths_to_joinrel(root, joinrel, rel1, rel2,
								 JOIN_FULL, sjinfo,
								 restrictlist);
			add_paths_to_joinrel(root, joinrel, rel2, rel1,
								 JOIN_FULL, sjinfo,
								 restrictlist);

			/*
			 * If there are join quals that aren't mergeable or hashable, we
			 * may not be able to build any valid plan.  Complain here so that
			 * we can give a somewhat-useful error message.  (Since we have no
			 * flexibility of planning for a full join, there's no chance of
			 * succeeding later with another pair of input rels.)
			 */
			if (joinrel->pathlist == NIL)
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
			break;
		case JOIN_SEMI:

			/*
			 * We might have a normal semijoin, or a case where we don't have
			 * enough rels to do the semijoin but can unique-ify the RHS and
			 * then do an innerjoin (see comments in join_is_legal).  In the
			 * latter case we can't apply JOIN_SEMI joining.
			 */
			if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
				bms_is_subset(sjinfo->min_righthand, rel2->relids))
			{
				if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
					restriction_is_constant_false(restrictlist, false))
				{
					mark_dummy_rel(joinrel);
					break;
				}
				add_paths_to_joinrel(root, joinrel, rel1, rel2,
									 JOIN_SEMI, sjinfo,
									 restrictlist);
			}

			/*
			 * If we know how to unique-ify the RHS and one input rel is
			 * exactly the RHS (not a superset) we can consider unique-ifying
			 * it and then doing a regular join.  (The create_unique_path
			 * check here is probably redundant with what join_is_legal did,
			 * but if so the check is cheap because it's cached.  So test
			 * anyway to be sure.)
			 */
			if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
				create_unique_path(root, rel2, rel2->cheapest_total_path,
								   sjinfo) != NULL)
			{
				if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
					restriction_is_constant_false(restrictlist, false))
				{
					mark_dummy_rel(joinrel);
					break;
				}
				add_paths_to_joinrel(root, joinrel, rel1, rel2,
									 JOIN_UNIQUE_INNER, sjinfo,
									 restrictlist);
				add_paths_to_joinrel(root, joinrel, rel2, rel1,
									 JOIN_UNIQUE_OUTER, sjinfo,
									 restrictlist);
			}
			break;
		case JOIN_ANTI:
			if (is_dummy_rel(rel1) ||
				restriction_is_constant_false(restrictlist, true))
			{
				mark_dummy_rel(joinrel);
				break;
			}
			if (restriction_is_constant_false(restrictlist, false) &&
				bms_is_subset(rel2->relids, sjinfo->syn_righthand))
				mark_dummy_rel(rel2);
			add_paths_to_joinrel(root, joinrel, rel1, rel2,
								 JOIN_ANTI, sjinfo,
								 restrictlist);
			break;
		default:
			/* other values not expected here */
			elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
			break;
	}

	bms_free(joinrelids);

	return joinrel;
}