/* * have_relevant_joinclause * Detect whether there is a joinclause that can be used to join * the two given relations. */ bool have_relevant_joinclause(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) { bool result = false; Relids join_relids; List *joininfo; ListCell *l; join_relids = bms_union(rel1->relids, rel2->relids); /* * We could scan either relation's joininfo list; may as well use the * shorter one. */ if (list_length(rel1->joininfo) <= list_length(rel2->joininfo)) joininfo = rel1->joininfo; else joininfo = rel2->joininfo; foreach(l, joininfo) { RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); if (bms_is_subset(rinfo->required_relids, join_relids)) { result = true; break; } }
/* * clause_sides_match_join * Determine whether a join clause is of the right form to use in this join. * * We already know that the clause is a binary opclause referencing only the * rels in the current join. The point here is to check whether it has the * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr", * rather than mixing outer and inner vars on either side. If it matches, * we set the transient flag outer_is_left to identify which side is which. */ static inline bool clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel, RelOptInfo *innerrel) { if (bms_is_subset(rinfo->left_relids, outerrel->relids) && bms_is_subset(rinfo->right_relids, innerrel->relids)) { /* lefthand side is outer */ rinfo->outer_is_left = true; return true; } else if (bms_is_subset(rinfo->left_relids, innerrel->relids) && bms_is_subset(rinfo->right_relids, outerrel->relids)) { /* righthand side is outer */ rinfo->outer_is_left = false; return true; } return false; /* no good for these input relations */ }
/* * Does the supplied GpPolicy support unique indexing on the specified * attributes? * * If the table is distributed randomly, no unique indexing is supported. * Otherwise, the set of columns being indexed should be a superset of the * policy. * * If the proposed index does not match the distribution policy but the relation * is empty and does not have a primary key or unique index, update the * distribution policy to match the index definition (MPP-101), as long as it * doesn't contain expressions. */ void checkPolicyForUniqueIndex(Relation rel, AttrNumber *indattr, int nidxatts, bool isprimary, bool has_exprs, bool has_pkey, bool has_ukey) { Bitmapset *polbm = NULL; Bitmapset *indbm = NULL; int i; GpPolicy *pol = rel->rd_cdbpolicy; /* * Firstly, unique/primary key indexes aren't supported if we're * distributing randomly. */ if (GpPolicyIsRandomly(pol)) { ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("%s and DISTRIBUTED RANDOMLY are incompatible", isprimary ? "PRIMARY KEY" : "UNIQUE"))); } /* * We use bitmaps to make intersection tests easier. As noted, order is * not relevant so looping is just painful. */ for (i = 0; i < pol->nattrs; i++) polbm = bms_add_member(polbm, pol->attrs[i]); for (i = 0; i < nidxatts; i++) { if (indattr[i] < 0) ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("cannot create %s on system column", isprimary ? "primary key" : "unique index"))); indbm = bms_add_member(indbm, indattr[i]); } Assert(bms_membership(polbm) != BMS_EMPTY_SET); Assert(bms_membership(indbm) != BMS_EMPTY_SET); /* * If the existing policy is not a subset, we must either error out or * update the distribution policy. It might be tempting to say that even * when the policy is a subset, we should update it to match the index * definition. The problem then is that if the user actually wants to * distribution on (a, b) but then creates an index on (a, b, c) we'll * change the policy underneath them. * * What is really needed is a new field in gp_distribution_policy telling us * if the policy has been explicitly set. */ if (!bms_is_subset(polbm, indbm)) { if (cdbRelSize(rel) != 0 || has_pkey || has_ukey || has_exprs) { ereport(ERROR, (errcode(ERRCODE_INVALID_TABLE_DEFINITION), errmsg("%s must contain all columns in the " "distribution key of relation \"%s\"", isprimary ? "PRIMARY KEY" : "UNIQUE index", RelationGetRelationName(rel)))); } else { /* update policy since table is not populated yet. See MPP-101 */ GpPolicy *policy = palloc(sizeof(GpPolicy) + (sizeof(AttrNumber) * nidxatts)); policy->ptype = POLICYTYPE_PARTITIONED; policy->nattrs = 0; for (i = 0; i < nidxatts; i++) policy->attrs[policy->nattrs++] = indattr[i]; GpPolicyReplace(rel->rd_id, policy); if (isprimary) elog(NOTICE, "updating distribution policy to match new primary key"); else elog(NOTICE, "updating distribution policy to match new unique index"); } } }
/* * join_is_removable * Determine whether we need not perform the join at all, because * it will just duplicate its left input. * * This is true for a left join for which the join condition cannot match * more than one inner-side row. (There are other possibly interesting * cases, but we don't have the infrastructure to prove them.) * * Note: there is no need to consider the symmetrical case of duplicating the * right input, because add_paths_to_joinrel() will be called with each rel * on the outer side. */ static bool join_is_removable(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, JoinType jointype) { List *clause_list = NIL; ListCell *l; int attroff; /* * Currently, we only know how to remove left joins to a baserel with * unique indexes. We can check most of these criteria pretty trivially * to avoid doing useless extra work. But checking whether any of the * indexes are unique would require iterating over the indexlist, so for * now we just make sure there are indexes of some sort or other. If none * of them are unique, join removal will still fail, just slightly later. */ if (jointype != JOIN_LEFT || innerrel->reloptkind == RELOPT_JOINREL || innerrel->rtekind != RTE_RELATION || innerrel->indexlist == NIL) return false; /* * We can't remove the join if any inner-rel attributes are used above the * join. * * Note that this test only detects use of inner-rel attributes in higher * join conditions and the target list. There might be such attributes in * pushed-down conditions at this join, too. We check that case below. * * As a micro-optimization, it seems better to start with max_attr and * count down rather than starting with min_attr and counting up, on the * theory that the system attributes are somewhat less likely to be wanted * and should be tested last. */ for (attroff = innerrel->max_attr - innerrel->min_attr; attroff >= 0; attroff--) { if (!bms_is_subset(innerrel->attr_needed[attroff], joinrel->relids)) return false; } /* * Search for mergejoinable clauses that constrain the inner rel against * either the outer rel or a pseudoconstant. If an operator is * mergejoinable then it behaves like equality for some btree opclass, so * it's what we want. The mergejoinability test also eliminates clauses * containing volatile functions, which we couldn't depend on. */ foreach(l, restrictlist) { RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l); /* * If we find a pushed-down clause, it must have come from above the * outer join and it must contain references to the inner rel. (If it * had only outer-rel variables, it'd have been pushed down into the * outer rel.) Therefore, we can conclude that join removal is unsafe * without any examination of the clause contents. */ if (restrictinfo->is_pushed_down) return false; /* Ignore if it's not a mergejoinable clause */ if (!restrictinfo->can_join || restrictinfo->mergeopfamilies == NIL) continue; /* not mergejoinable */ /* * Check if clause has the form "outer op inner" or "inner op outer". */ if (!clause_sides_match_join(restrictinfo, outerrel, innerrel)) continue; /* no good for these input relations */ /* OK, add to list */ clause_list = lappend(clause_list, restrictinfo); }
/* * make_join_rel * Find or create a join RelOptInfo that represents the join of * the two given rels, and add to it path information for paths * created with the two rels as outer and inner rel. * (The join rel may already contain paths generated from other * pairs of rels that add up to the same set of base rels.) * * NB: will return NULL if attempted join is not valid. This can happen * when working with outer joins, or with IN or EXISTS clauses that have been * turned into joins. */ RelOptInfo * make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2) { Relids joinrelids; SpecialJoinInfo *sjinfo; bool reversed; SpecialJoinInfo sjinfo_data; RelOptInfo *joinrel; List *restrictlist; /* We should never try to join two overlapping sets of rels. */ Assert(!bms_overlap(rel1->relids, rel2->relids)); /* Construct Relids set that identifies the joinrel. */ joinrelids = bms_union(rel1->relids, rel2->relids); /* Check validity and determine join type. */ if (!join_is_legal(root, rel1, rel2, joinrelids, &sjinfo, &reversed)) { /* invalid join path */ bms_free(joinrelids); return NULL; } /* Swap rels if needed to match the join info. */ if (reversed) { RelOptInfo *trel = rel1; rel1 = rel2; rel2 = trel; } /* * If it's a plain inner join, then we won't have found anything in * join_info_list. Make up a SpecialJoinInfo so that selectivity * estimation functions will know what's being joined. */ if (sjinfo == NULL) { sjinfo = &sjinfo_data; sjinfo->type = T_SpecialJoinInfo; sjinfo->min_lefthand = rel1->relids; sjinfo->min_righthand = rel2->relids; sjinfo->syn_lefthand = rel1->relids; sjinfo->syn_righthand = rel2->relids; sjinfo->jointype = JOIN_INNER; /* we don't bother trying to make the remaining fields valid */ sjinfo->lhs_strict = false; sjinfo->delay_upper_joins = false; sjinfo->semi_can_btree = false; sjinfo->semi_can_hash = false; sjinfo->semi_operators = NIL; sjinfo->semi_rhs_exprs = NIL; } /* * Find or build the join RelOptInfo, and compute the restrictlist that * goes with this particular joining. */ joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo, &restrictlist); /* !!! START: HERE IS THE PART WHICH ADDED FOR PG_HINT_PLAN !!! */ { RowsHint *rows_hint = NULL; int i; RowsHint *justforme = NULL; RowsHint *domultiply = NULL; /* Search for applicable rows hint for this join node */ for (i = 0; i < current_hint->num_hints[HINT_TYPE_ROWS]; i++) { rows_hint = current_hint->rows_hints[i]; /* * Skip this rows_hint if it is invalid from the first or it * doesn't target any join rels. */ if (!rows_hint->joinrelids || rows_hint->base.state == HINT_STATE_ERROR) continue; if (bms_equal(joinrelids, rows_hint->joinrelids)) { /* * This joinrel is just the target of this rows_hint, so tweak * rows estimation according to the hint. */ justforme = rows_hint; } else if (!(bms_is_subset(rows_hint->joinrelids, rel1->relids) || bms_is_subset(rows_hint->joinrelids, rel2->relids)) && bms_is_subset(rows_hint->joinrelids, joinrelids) && rows_hint->value_type == RVT_MULTI) { /* * If the rows_hint's target relids is not a subset of both of * component rels and is a subset of this joinrel, ths hint's * targets spread over both component rels. This menas that * this hint has been never applied so far and this joinrel is * the first (and only) chance to fire in current join tree. * Only the multiplication hint has the cumulative nature so we * apply only RVT_MULTI in this way. */ domultiply = rows_hint; } } if (justforme) { /* * If a hint just for me is found, no other adjust method is * useles, but this cannot be more than twice becuase this joinrel * is already adjusted by this hint. */ if (justforme->base.state == HINT_STATE_NOTUSED) joinrel->rows = adjust_rows(joinrel->rows, justforme); } else { if (domultiply) { /* * If we have multiple routes up to this joinrel which are not * applicable this hint, this multiply hint will applied more * than twice. But there's no means to know of that, * re-estimate the row number of this joinrel always just * before applying the hint. This is a bit different from * normal planner behavior but it doesn't harm so much. */ set_joinrel_size_estimates(root, joinrel, rel1, rel2, sjinfo, restrictlist); joinrel->rows = adjust_rows(joinrel->rows, domultiply); } } } /* !!! END: HERE IS THE PART WHICH ADDED FOR PG_HINT_PLAN !!! */ /* * If we've already proven this join is empty, we needn't consider any * more paths for it. */ if (is_dummy_rel(joinrel)) { bms_free(joinrelids); return joinrel; } /* * Consider paths using each rel as both outer and inner. Depending on * the join type, a provably empty outer or inner rel might mean the join * is provably empty too; in which case throw away any previously computed * paths and mark the join as dummy. (We do it this way since it's * conceivable that dummy-ness of a multi-element join might only be * noticeable for certain construction paths.) * * Also, a provably constant-false join restriction typically means that * we can skip evaluating one or both sides of the join. We do this by * marking the appropriate rel as dummy. For outer joins, a * constant-false restriction that is pushed down still means the whole * join is dummy, while a non-pushed-down one means that no inner rows * will join so we can treat the inner rel as dummy. * * We need only consider the jointypes that appear in join_info_list, plus * JOIN_INNER. */ switch (sjinfo->jointype) { case JOIN_INNER: if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || restriction_is_constant_false(restrictlist, false)) { mark_dummy_rel(joinrel); break; } add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_INNER, sjinfo, restrictlist); add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_INNER, sjinfo, restrictlist); break; case JOIN_LEFT: if (is_dummy_rel(rel1) || restriction_is_constant_false(restrictlist, true)) { mark_dummy_rel(joinrel); break; } if (restriction_is_constant_false(restrictlist, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_LEFT, sjinfo, restrictlist); add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_RIGHT, sjinfo, restrictlist); break; case JOIN_FULL: if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) || restriction_is_constant_false(restrictlist, true)) { mark_dummy_rel(joinrel); break; } add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_FULL, sjinfo, restrictlist); add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_FULL, sjinfo, restrictlist); /* * If there are join quals that aren't mergeable or hashable, we * may not be able to build any valid plan. Complain here so that * we can give a somewhat-useful error message. (Since we have no * flexibility of planning for a full join, there's no chance of * succeeding later with another pair of input rels.) */ if (joinrel->pathlist == NIL) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions"))); break; case JOIN_SEMI: /* * We might have a normal semijoin, or a case where we don't have * enough rels to do the semijoin but can unique-ify the RHS and * then do an innerjoin (see comments in join_is_legal). In the * latter case we can't apply JOIN_SEMI joining. */ if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) && bms_is_subset(sjinfo->min_righthand, rel2->relids)) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || restriction_is_constant_false(restrictlist, false)) { mark_dummy_rel(joinrel); break; } add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_SEMI, sjinfo, restrictlist); } /* * If we know how to unique-ify the RHS and one input rel is * exactly the RHS (not a superset) we can consider unique-ifying * it and then doing a regular join. (The create_unique_path * check here is probably redundant with what join_is_legal did, * but if so the check is cheap because it's cached. So test * anyway to be sure.) */ if (bms_equal(sjinfo->syn_righthand, rel2->relids) && create_unique_path(root, rel2, rel2->cheapest_total_path, sjinfo) != NULL) { if (is_dummy_rel(rel1) || is_dummy_rel(rel2) || restriction_is_constant_false(restrictlist, false)) { mark_dummy_rel(joinrel); break; } add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_UNIQUE_INNER, sjinfo, restrictlist); add_paths_to_joinrel(root, joinrel, rel2, rel1, JOIN_UNIQUE_OUTER, sjinfo, restrictlist); } break; case JOIN_ANTI: if (is_dummy_rel(rel1) || restriction_is_constant_false(restrictlist, true)) { mark_dummy_rel(joinrel); break; } if (restriction_is_constant_false(restrictlist, false) && bms_is_subset(rel2->relids, sjinfo->syn_righthand)) mark_dummy_rel(rel2); add_paths_to_joinrel(root, joinrel, rel1, rel2, JOIN_ANTI, sjinfo, restrictlist); break; default: /* other values not expected here */ elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype); break; } bms_free(joinrelids); return joinrel; }