/* * dependency_is_compatible_clause * Determines if the clause is compatible with functional dependencies * * Only clauses that have the form of equality to a pseudoconstant, or can be * interpreted that way, are currently accepted. Furthermore the variable * part of the clause must be a simple Var belonging to the specified * relation, whose attribute number we return in *attnum on success. */ static bool dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum) { RestrictInfo *rinfo = (RestrictInfo *) clause; Var *var; if (!IsA(rinfo, RestrictInfo)) return false; /* Pseudoconstants are not interesting (they couldn't contain a Var) */ if (rinfo->pseudoconstant) return false; /* Clauses referencing multiple, or no, varnos are incompatible */ if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON) return false; if (is_opclause(rinfo->clause)) { /* If it's an opclause, check for Var = Const or Const = Var. */ OpExpr *expr = (OpExpr *) rinfo->clause; /* Only expressions with two arguments are candidates. */ if (list_length(expr->args) != 2) return false; /* Make sure non-selected argument is a pseudoconstant. */ if (is_pseudo_constant_clause(lsecond(expr->args))) var = linitial(expr->args); else if (is_pseudo_constant_clause(linitial(expr->args))) var = lsecond(expr->args); else return false; /* * If it's not an "=" operator, just ignore the clause, as it's not * compatible with functional dependencies. * * This uses the function for estimating selectivity, not the operator * directly (a bit awkward, but well ...). * * XXX this is pretty dubious; probably it'd be better to check btree * or hash opclass membership, so as not to be fooled by custom * selectivity functions, and to be more consistent with decisions * elsewhere in the planner. */ if (get_oprrest(expr->opno) != F_EQSEL) return false; /* OK to proceed with checking "var" */ } else if (not_clause((Node *) rinfo->clause)) { /* * "NOT x" can be interpreted as "x = false", so get the argument and * proceed with seeing if it's a suitable Var. */ var = (Var *) get_notclausearg(rinfo->clause); } else { /* * A boolean expression "x" can be interpreted as "x = true", so * proceed with seeing if it's a suitable Var. */ var = (Var *) rinfo->clause; } /* * We may ignore any RelabelType node above the operand. (There won't be * more than one, since eval_const_expressions has been applied already.) */ if (IsA(var, RelabelType)) var = (Var *) ((RelabelType *) var)->arg; /* We only support plain Vars for now */ if (!IsA(var, Var)) return false; /* Ensure Var is from the correct relation */ if (var->varno != relid) return false; /* We also better ensure the Var is from the current level */ if (var->varlevelsup != 0) return false; /* Also ignore system attributes (we don't allow stats on those) */ if (!AttrNumberIsForUserDefinedAttr(var->varattno)) return false; *attnum = var->varattno; return true; }
const planner::AbstractPlan *PlanTransformer::TransformAgg( const AggPlanState *plan_state) { // Alias all I need const Agg *agg = plan_state->agg_plan; auto numphases = plan_state->numphases; auto numaggs = plan_state->numaggs; auto targetlist = plan_state->ps_targetlist; auto qual = plan_state->ps_qual; auto peragg = plan_state->peragg; auto tupleDesc = plan_state->result_tupleDescriptor; auto aggstrategy = plan_state->agg_plan->aggstrategy; LOG_INFO("Number of Agg phases: %d \n", numphases); // When we'll have >1 phases? if (numphases != 1) return nullptr; /* Get project info */ std::unique_ptr<const planner::ProjectInfo> proj_info( BuildProjectInfoFromTLSkipJunk(targetlist)); LOG_INFO("proj_info : \n%s", proj_info->Debug().c_str()); /* Get predicate */ std::unique_ptr<const expression::AbstractExpression> predicate( BuildPredicateFromQual(qual)); /* Get Aggregate terms */ std::vector<planner::AggregatePlan::AggTerm> unique_agg_terms; LOG_INFO("Number of (unique) Agg nodes: %d \n", numaggs); for (int aggno = 0; aggno < numaggs; aggno++) { auto transfn_oid = peragg[aggno].transfn_oid; auto itr = peloton::bridge::kPgTransitFuncMap.find(transfn_oid); if (kPgFuncMap.end() == itr) { LOG_ERROR("Unmapped Transit function Id : %u\n", transfn_oid); return nullptr; } // We don't check whether the mapped exprtype is a valid aggregate type // here. PltFuncMetaInfo fn_meta = itr->second; // We only take the first argument as input to aggregator because // we don't have multi-argument aggregator in Peloton at the moment. // WARNING: there can be no arguments (e.g., COUNT(*)) auto arguments = peragg[aggno].aggrefstate->args; expression::AbstractExpression *agg_expr = nullptr; if (arguments) { GenericExprState *gstate = (GenericExprState *)lfirst(list_head(arguments)); LOG_INFO("Creating Agg Expr"); agg_expr = ExprTransformer::TransformExpr(gstate->arg); LOG_INFO("Done creating Agg Expr"); } /* * AggStatePerAggData.sortColIdx along with other related attributes * are used to handle ORDER BY and DISTINCT *within* aggregation. * E.g., * SELECT count(DISTINCT x) ... * SELECT str_agg(y ORDER BY x) ... * Currently, we only handle the agg(DISTINCT x) case by * checking whether numDistinctCols > 0. * Note that numDistinctCols > 0 may be a necessary but not sufficient * condition for agg(DISTINCT x). */ bool distinct = (peragg[aggno].numDistinctCols > 0); unique_agg_terms.emplace_back(fn_meta.exprtype, agg_expr, distinct); LOG_INFO( "Unique Agg # : %d , transfn_oid : %u\n , aggtype = %s \n expr = %s, " "numDistinctCols = %d", aggno, transfn_oid, ExpressionTypeToString(fn_meta.exprtype).c_str(), agg_expr ? agg_expr->Debug().c_str() : "<NULL>", peragg[aggno].numDistinctCols); for (int i = 0; i < peragg[aggno].numDistinctCols; i++) { LOG_INFO("sortColIdx[%d] : %d \n", i, peragg[aggno].sortColIdx[i]); } } // end loop aggno /* Get Group by columns */ std::vector<oid_t> groupby_col_ids; LOG_INFO("agg.numCols = %d", agg->numCols); for (int i = 0; i < agg->numCols; i++) { LOG_INFO("agg.grpColIdx[%d] = %d \n", i, agg->grpColIdx[i]); auto attrno = agg->grpColIdx[i]; if (AttributeNumberIsValid(attrno) && AttrNumberIsForUserDefinedAttr(attrno)) { groupby_col_ids.emplace_back(AttrNumberGetAttrOffset(attrno)); } } /* Get output schema */ std::unique_ptr<catalog::Schema> output_schema( SchemaTransformer::GetSchemaFromTupleDesc(tupleDesc)); /* Map agg stragegy */ LOG_INFO("aggstrategy : %s\n", (AGG_HASHED == aggstrategy) ? "HASH" : (AGG_SORTED ? "SORT" : "PLAIN")); PelotonAggType agg_type = AGGREGATE_TYPE_INVALID; switch (aggstrategy) { case AGG_SORTED: agg_type = AGGREGATE_TYPE_SORTED; break; case AGG_HASHED: agg_type = AGGREGATE_TYPE_HASH; break; case AGG_PLAIN: agg_type = AGGREGATE_TYPE_PLAIN; break; } std::vector<oid_t> column_ids; for (auto agg_term : unique_agg_terms) { if (agg_term.expression) { LOG_INFO("AGG TERM :: %s", agg_term.expression->Debug().c_str()); } BuildColumnListFromExpr(column_ids, agg_term.expression); } auto retval = new planner::AggregatePlan( proj_info.release(), predicate.release(), std::move(unique_agg_terms), std::move(groupby_col_ids), output_schema.release(), agg_type); ((planner::AggregatePlan *)retval)->SetColumnIds(column_ids); // Find children auto lchild = TransformPlan(outerAbstractPlanState(plan_state)); retval->AddChild(lchild); return retval; }
/* * Open the local relation associated with the remote one. * * Optionally rebuilds the Relcache mapping if it was invalidated * by local DDL. */ LogicalRepRelMapEntry * logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) { LogicalRepRelMapEntry *entry; bool found; if (LogicalRepRelMap == NULL) logicalrep_relmap_init(); /* Search for existing entry. */ entry = hash_search(LogicalRepRelMap, (void *) &remoteid, HASH_FIND, &found); if (!found) elog(ERROR, "no relation map entry for remote relation ID %u", remoteid); /* Need to update the local cache? */ if (!OidIsValid(entry->localreloid)) { Oid relid; int i; int found; Bitmapset *idkey; TupleDesc desc; LogicalRepRelation *remoterel; MemoryContext oldctx; remoterel = &entry->remoterel; /* Try to find and lock the relation by name. */ relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname, remoterel->relname, -1), lockmode, true); if (!OidIsValid(relid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" does not exist", remoterel->nspname, remoterel->relname))); entry->localrel = heap_open(relid, NoLock); /* * We currently only support writing to regular and partitioned * tables. */ if (entry->localrel->rd_rel->relkind != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("logical replication target relation \"%s.%s\" is not a table", remoterel->nspname, remoterel->relname))); /* * Build the mapping of local attribute numbers to remote attribute * numbers and validate that we don't miss any replicated columns * as that would result in potentially unwanted data loss. */ desc = RelationGetDescr(entry->localrel); oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext); entry->attrmap = palloc(desc->natts * sizeof(int)); MemoryContextSwitchTo(oldctx); found = 0; for (i = 0; i < desc->natts; i++) { int attnum = logicalrep_rel_att_by_name(remoterel, NameStr(desc->attrs[i]->attname)); entry->attrmap[i] = attnum; if (attnum >= 0) found++; } /* TODO, detail message with names of missing columns */ if (found < remoterel->natts) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" is missing " "some replicated columns", remoterel->nspname, remoterel->relname))); /* * Check that replica identity matches. We allow for stricter replica * identity (fewer columns) on subscriber as that will not stop us * from finding unique tuple. IE, if publisher has identity * (id,timestamp) and subscriber just (id) this will not be a problem, * but in the opposite scenario it will. * * Don't throw any error here just mark the relation entry as not * updatable, as replica identity is only for updates and deletes * but inserts can be replicated even without it. */ entry->updatable = true; idkey = RelationGetIndexAttrBitmap(entry->localrel, INDEX_ATTR_BITMAP_IDENTITY_KEY); /* fallback to PK if no replica identity */ if (idkey == NULL) { idkey = RelationGetIndexAttrBitmap(entry->localrel, INDEX_ATTR_BITMAP_PRIMARY_KEY); /* * If no replica identity index and no PK, the published table * must have replica identity FULL. */ if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL) entry->updatable = false; } i = -1; while ((i = bms_next_member(idkey, i)) >= 0) { int attnum = i + FirstLowInvalidHeapAttributeNumber; if (!AttrNumberIsForUserDefinedAttr(attnum)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" uses " "system columns in REPLICA IDENTITY index", remoterel->nspname, remoterel->relname))); attnum = AttrNumberGetAttrOffset(attnum); if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys)) { entry->updatable = false; break; } } entry->localreloid = relid; } else entry->localrel = heap_open(entry->localreloid, lockmode); return entry; }