/* * sort_inner_and_outer * Create mergejoin join paths by explicitly sorting both the outer and * inner join relations on each available merge ordering. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do */ static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype) { bool useallclauses; Path *outer_path; Path *inner_path; List *all_pathkeys; ListCell *l; /* * If we are doing a right or full join, we must use *all* the * mergeclauses as join clauses, else we will not have a valid plan. */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_IN: case JOIN_UNIQUE_OUTER: case JOIN_UNIQUE_INNER: useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: useallclauses = true; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); useallclauses = false; /* keep compiler quiet */ break; } /* * We only consider the cheapest-total-cost input paths, since we are * assuming here that a sort is required. We will consider * cheapest-startup-cost input paths later, and only if they don't need a * sort. * * If unique-ification is requested, do it and then handle as a plain * inner join. */ outer_path = outerrel->cheapest_total_path; inner_path = innerrel->cheapest_total_path; if (jointype == JOIN_UNIQUE_OUTER) { outer_path = (Path *) create_unique_path(root, outerrel, outer_path); jointype = JOIN_INNER; } else if (jointype == JOIN_UNIQUE_INNER) { inner_path = (Path *) create_unique_path(root, innerrel, inner_path); jointype = JOIN_INNER; } /* * Each possible ordering of the available mergejoin clauses will generate * a differently-sorted result path at essentially the same cost. We have * no basis for choosing one over another at this level of joining, but * some sort orders may be more useful than others for higher-level * mergejoins, so it's worth considering multiple orderings. * * Actually, it's not quite true that every mergeclause ordering will * generate a different path order, because some of the clauses may be * partially redundant (refer to the same EquivalenceClasses). Therefore, * what we do is convert the mergeclause list to a list of canonical * pathkeys, and then consider different orderings of the pathkeys. * * Generating a path for *every* permutation of the pathkeys doesn't seem * like a winning strategy; the cost in planning time is too high. For * now, we generate one path for each pathkey, listing that pathkey first * and the rest in random order. This should allow at least a one-clause * mergejoin without re-sorting against any other possible mergejoin * partner path. But if we've not guessed the right ordering of secondary * keys, we may end up evaluating clauses as qpquals when they could have * been done as mergeclauses. (In practice, it's rare that there's more * than two or three mergeclauses, so expending a huge amount of thought * on that is probably not worth it.) * * The pathkey order returned by select_outer_pathkeys_for_merge() has * some heuristics behind it (see that function), so be sure to try it * exactly as-is as well as making variants. */ all_pathkeys = select_outer_pathkeys_for_merge(root, mergeclause_list, joinrel); foreach(l, all_pathkeys) { List *front_pathkey = (List *) lfirst(l); List *cur_mergeclauses; List *outerkeys; List *innerkeys; List *merge_pathkeys; /* Make a pathkey list with this guy first */ if (l != list_head(all_pathkeys)) outerkeys = lcons(front_pathkey, list_delete_ptr(list_copy(all_pathkeys), front_pathkey)); else outerkeys = all_pathkeys; /* no work at first one... */ /* Sort the mergeclauses into the corresponding ordering */ cur_mergeclauses = find_mergeclauses_for_pathkeys(root, outerkeys, true, mergeclause_list); /* Should have used them all... */ Assert(list_length(cur_mergeclauses) == list_length(mergeclause_list)); /* Build sort pathkeys for the inner side */ innerkeys = make_inner_pathkeys_for_merge(root, cur_mergeclauses, outerkeys); /* Build pathkeys representing output sort order */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerkeys); /* * And now we can make the path. * * Note: it's possible that the cheapest paths will already be sorted * properly. create_mergejoin_path will detect that case and suppress * an explicit sort step, so we needn't do so here. */ add_path(joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, outer_path, inner_path, restrictlist, merge_pathkeys, cur_mergeclauses, outerkeys, innerkeys)); }
/* * sort_inner_and_outer * Create mergejoin join paths by explicitly sorting both the outer and * inner join relations on each available merge ordering. * * 'joinrel' is the join relation * 'outerrel' is the outer join relation * 'innerrel' is the inner join relation * 'restrictlist' contains all of the RestrictInfo nodes for restriction * clauses that apply to this join * 'mergeclause_list' is a list of RestrictInfo nodes for available * mergejoin clauses in this join * 'jointype' is the type of join to do */ static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, List *restrictlist, List *mergeclause_list, JoinType jointype) { bool useallclauses; Path *outer_path; Path *inner_path; List *all_pathkeys; ListCell *l; /* * If we are doing a right or full join, we must use *all* the * mergeclauses as join clauses, else we will not have a valid plan. */ switch (jointype) { case JOIN_INNER: case JOIN_LEFT: case JOIN_LASJ: case JOIN_LASJ_NOTIN: useallclauses = false; break; case JOIN_RIGHT: case JOIN_FULL: useallclauses = true; break; default: elog(ERROR, "unrecognized join type: %d", (int) jointype); useallclauses = false; /* keep compiler quiet */ break; } /* * We only consider the cheapest-total-cost input paths, since we are * assuming here that a sort is required. We will consider * cheapest-startup-cost input paths later, and only if they don't need a * sort. */ outer_path = outerrel->cheapest_total_path; inner_path = innerrel->cheapest_total_path; /* * Each possible ordering of the available mergejoin clauses will generate * a differently-sorted result path at essentially the same cost. We have * no basis for choosing one over another at this level of joining, but * some sort orders may be more useful than others for higher-level * mergejoins, so it's worth considering multiple orderings. * * Actually, it's not quite true that every mergeclause ordering will * generate a different path order, because some of the clauses may be * redundant. Therefore, what we do is convert the mergeclause list to a * list of canonical pathkeys, and then consider different orderings of * the pathkeys. * * Generating a path for *every* permutation of the pathkeys doesn't seem * like a winning strategy; the cost in planning time is too high. For * now, we generate one path for each pathkey, listing that pathkey first * and the rest in random order. This should allow at least a one-clause * mergejoin without re-sorting against any other possible mergejoin * partner path. But if we've not guessed the right ordering of secondary * keys, we may end up evaluating clauses as qpquals when they could have * been done as mergeclauses. We need to figure out a better way. (Two * possible approaches: look at all the relevant index relations to * suggest plausible sort orders, or make just one output path and somehow * mark it as having a sort-order that can be rearranged freely.) */ all_pathkeys = make_pathkeys_for_mergeclauses(root, mergeclause_list, outerrel); foreach(l, all_pathkeys) { List *front_pathkey = (List *) lfirst(l); List *cur_pathkeys; List *cur_mergeclauses; List *outerkeys; List *innerkeys; List *merge_pathkeys; /* Make a pathkey list with this guy first. */ if (l != list_head(all_pathkeys)) cur_pathkeys = lcons(front_pathkey, list_delete_ptr(list_copy(all_pathkeys), front_pathkey)); else cur_pathkeys = all_pathkeys; /* no work at first one... */ /* * Select mergeclause(s) that match this sort ordering. If we had * redundant merge clauses then we will get a subset of the original * clause list. There had better be some match, however... */ cur_mergeclauses = find_mergeclauses_for_pathkeys(root, cur_pathkeys, mergeclause_list); Assert(cur_mergeclauses != NIL); /* Forget it if can't use all the clauses in right/full join */ if (useallclauses && list_length(cur_mergeclauses) != list_length(mergeclause_list)) continue; /* * Build sort pathkeys for both sides. * * Note: it's possible that the cheapest paths will already be sorted * properly. create_mergejoin_path will detect that case and suppress * an explicit sort step, so we needn't do so here. */ outerkeys = make_pathkeys_for_mergeclauses(root, cur_mergeclauses, outerrel); innerkeys = make_pathkeys_for_mergeclauses(root, cur_mergeclauses, innerrel); /* Build pathkeys representing output sort order. */ merge_pathkeys = build_join_pathkeys(root, joinrel, jointype, outerkeys); /* * And now we can make the path. */ add_path(root, joinrel, (Path *) create_mergejoin_path(root, joinrel, jointype, outer_path, inner_path, restrictlist, merge_pathkeys, cur_mergeclauses, mergeclause_list, outerkeys, innerkeys)); }