예제 #1
0
/*
 * dependency_is_compatible_clause
 *		Determines if the clause is compatible with functional dependencies
 *
 * Only clauses that have the form of equality to a pseudoconstant, or can be
 * interpreted that way, are currently accepted.  Furthermore the variable
 * part of the clause must be a simple Var belonging to the specified
 * relation, whose attribute number we return in *attnum on success.
 */
static bool
dependency_is_compatible_clause(Node *clause, Index relid, AttrNumber *attnum)
{
	RestrictInfo *rinfo = (RestrictInfo *) clause;
	Var		   *var;

	if (!IsA(rinfo, RestrictInfo))
		return false;

	/* Pseudoconstants are not interesting (they couldn't contain a Var) */
	if (rinfo->pseudoconstant)
		return false;

	/* Clauses referencing multiple, or no, varnos are incompatible */
	if (bms_membership(rinfo->clause_relids) != BMS_SINGLETON)
		return false;

	if (is_opclause(rinfo->clause))
	{
		/* If it's an opclause, check for Var = Const or Const = Var. */
		OpExpr	   *expr = (OpExpr *) rinfo->clause;

		/* Only expressions with two arguments are candidates. */
		if (list_length(expr->args) != 2)
			return false;

		/* Make sure non-selected argument is a pseudoconstant. */
		if (is_pseudo_constant_clause(lsecond(expr->args)))
			var = linitial(expr->args);
		else if (is_pseudo_constant_clause(linitial(expr->args)))
			var = lsecond(expr->args);
		else
			return false;

		/*
		 * If it's not an "=" operator, just ignore the clause, as it's not
		 * compatible with functional dependencies.
		 *
		 * This uses the function for estimating selectivity, not the operator
		 * directly (a bit awkward, but well ...).
		 *
		 * XXX this is pretty dubious; probably it'd be better to check btree
		 * or hash opclass membership, so as not to be fooled by custom
		 * selectivity functions, and to be more consistent with decisions
		 * elsewhere in the planner.
		 */
		if (get_oprrest(expr->opno) != F_EQSEL)
			return false;

		/* OK to proceed with checking "var" */
	}
	else if (not_clause((Node *) rinfo->clause))
	{
		/*
		 * "NOT x" can be interpreted as "x = false", so get the argument and
		 * proceed with seeing if it's a suitable Var.
		 */
		var = (Var *) get_notclausearg(rinfo->clause);
	}
	else
	{
		/*
		 * A boolean expression "x" can be interpreted as "x = true", so
		 * proceed with seeing if it's a suitable Var.
		 */
		var = (Var *) rinfo->clause;
	}

	/*
	 * We may ignore any RelabelType node above the operand.  (There won't be
	 * more than one, since eval_const_expressions has been applied already.)
	 */
	if (IsA(var, RelabelType))
		var = (Var *) ((RelabelType *) var)->arg;

	/* We only support plain Vars for now */
	if (!IsA(var, Var))
		return false;

	/* Ensure Var is from the correct relation */
	if (var->varno != relid)
		return false;

	/* We also better ensure the Var is from the current level */
	if (var->varlevelsup != 0)
		return false;

	/* Also ignore system attributes (we don't allow stats on those) */
	if (!AttrNumberIsForUserDefinedAttr(var->varattno))
		return false;

	*attnum = var->varattno;
	return true;
}
예제 #2
0
const planner::AbstractPlan *PlanTransformer::TransformAgg(
    const AggPlanState *plan_state) {
  // Alias all I need
  const Agg *agg = plan_state->agg_plan;
  auto numphases = plan_state->numphases;
  auto numaggs = plan_state->numaggs;
  auto targetlist = plan_state->ps_targetlist;
  auto qual = plan_state->ps_qual;
  auto peragg = plan_state->peragg;
  auto tupleDesc = plan_state->result_tupleDescriptor;
  auto aggstrategy = plan_state->agg_plan->aggstrategy;

  LOG_INFO("Number of Agg phases: %d \n", numphases);

  // When we'll have >1 phases?
  if (numphases != 1) return nullptr;

  /* Get project info */
  std::unique_ptr<const planner::ProjectInfo> proj_info(
      BuildProjectInfoFromTLSkipJunk(targetlist));
  LOG_INFO("proj_info : \n%s", proj_info->Debug().c_str());

  /* Get predicate */
  std::unique_ptr<const expression::AbstractExpression> predicate(
      BuildPredicateFromQual(qual));

  /* Get Aggregate terms */
  std::vector<planner::AggregatePlan::AggTerm> unique_agg_terms;

  LOG_INFO("Number of (unique) Agg nodes: %d \n", numaggs);
  for (int aggno = 0; aggno < numaggs; aggno++) {
    auto transfn_oid = peragg[aggno].transfn_oid;

    auto itr = peloton::bridge::kPgTransitFuncMap.find(transfn_oid);
    if (kPgFuncMap.end() == itr) {
      LOG_ERROR("Unmapped Transit function Id : %u\n", transfn_oid);
      return nullptr;
    }

    // We don't check whether the mapped exprtype is a valid aggregate type
    // here.
    PltFuncMetaInfo fn_meta = itr->second;
    // We only take the first argument as input to aggregator because
    // we don't have multi-argument aggregator in Peloton at the moment.
    // WARNING: there can be no arguments (e.g., COUNT(*))
    auto arguments = peragg[aggno].aggrefstate->args;
    expression::AbstractExpression *agg_expr = nullptr;
    if (arguments) {
      GenericExprState *gstate =
          (GenericExprState *)lfirst(list_head(arguments));
      LOG_INFO("Creating Agg Expr");
      agg_expr = ExprTransformer::TransformExpr(gstate->arg);
      LOG_INFO("Done creating Agg Expr");
    }

    /*
     * AggStatePerAggData.sortColIdx along with other related attributes
     * are used to handle ORDER BY and DISTINCT *within* aggregation.
     * E.g.,
     * SELECT count(DISTINCT x) ...
     * SELECT str_agg(y ORDER BY x) ...
     * Currently, we only handle the agg(DISTINCT x) case by
     * checking whether numDistinctCols > 0.
     * Note that numDistinctCols > 0 may be a necessary but not sufficient
     * condition for agg(DISTINCT x).
     */

    bool distinct = (peragg[aggno].numDistinctCols > 0);

    unique_agg_terms.emplace_back(fn_meta.exprtype, agg_expr, distinct);

    LOG_INFO(
        "Unique Agg # : %d , transfn_oid : %u\n , aggtype = %s \n expr = %s, "
        "numDistinctCols = %d",
        aggno, transfn_oid, ExpressionTypeToString(fn_meta.exprtype).c_str(),
        agg_expr ? agg_expr->Debug().c_str() : "<NULL>",
        peragg[aggno].numDistinctCols);

    for (int i = 0; i < peragg[aggno].numDistinctCols; i++) {
      LOG_INFO("sortColIdx[%d] : %d \n", i, peragg[aggno].sortColIdx[i]);
    }

  }  // end loop aggno

  /* Get Group by columns */
  std::vector<oid_t> groupby_col_ids;
  LOG_INFO("agg.numCols = %d", agg->numCols);
  for (int i = 0; i < agg->numCols; i++) {
    LOG_INFO("agg.grpColIdx[%d] = %d \n", i, agg->grpColIdx[i]);

    auto attrno = agg->grpColIdx[i];
    if (AttributeNumberIsValid(attrno) &&
        AttrNumberIsForUserDefinedAttr(attrno)) {
      groupby_col_ids.emplace_back(AttrNumberGetAttrOffset(attrno));
    }
  }

  /* Get output schema */
  std::unique_ptr<catalog::Schema> output_schema(
      SchemaTransformer::GetSchemaFromTupleDesc(tupleDesc));

  /* Map agg stragegy */
  LOG_INFO("aggstrategy : %s\n", (AGG_HASHED == aggstrategy)
                                     ? "HASH"
                                     : (AGG_SORTED ? "SORT" : "PLAIN"));

  PelotonAggType agg_type = AGGREGATE_TYPE_INVALID;

  switch (aggstrategy) {
    case AGG_SORTED:
      agg_type = AGGREGATE_TYPE_SORTED;
      break;
    case AGG_HASHED:
      agg_type = AGGREGATE_TYPE_HASH;
      break;
    case AGG_PLAIN:
      agg_type = AGGREGATE_TYPE_PLAIN;
      break;
  }

  std::vector<oid_t> column_ids;
  for (auto agg_term : unique_agg_terms) {
    if (agg_term.expression) {
      LOG_INFO("AGG TERM :: %s", agg_term.expression->Debug().c_str());
    }
    BuildColumnListFromExpr(column_ids, agg_term.expression);
  }

  auto retval = new planner::AggregatePlan(
      proj_info.release(), predicate.release(), std::move(unique_agg_terms),
      std::move(groupby_col_ids), output_schema.release(), agg_type);

  ((planner::AggregatePlan *)retval)->SetColumnIds(column_ids);

  // Find children
  auto lchild = TransformPlan(outerAbstractPlanState(plan_state));
  retval->AddChild(lchild);

  return retval;
}
예제 #3
0
/*
 * Open the local relation associated with the remote one.
 *
 * Optionally rebuilds the Relcache mapping if it was invalidated
 * by local DDL.
 */
LogicalRepRelMapEntry *
logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode)
{
	LogicalRepRelMapEntry  *entry;
	bool		found;

	if (LogicalRepRelMap == NULL)
		logicalrep_relmap_init();

	/* Search for existing entry. */
	entry = hash_search(LogicalRepRelMap, (void *) &remoteid,
						HASH_FIND, &found);

	if (!found)
		elog(ERROR, "no relation map entry for remote relation ID %u",
			 remoteid);

	/* Need to update the local cache? */
	if (!OidIsValid(entry->localreloid))
	{
		Oid			relid;
		int			i;
		int			found;
		Bitmapset  *idkey;
		TupleDesc	desc;
		LogicalRepRelation *remoterel;
		MemoryContext		oldctx;
		remoterel = &entry->remoterel;

		/* Try to find and lock the relation by name. */
		relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname,
											  remoterel->relname, -1),
								 lockmode, true);
		if (!OidIsValid(relid))
			ereport(ERROR,
					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
					 errmsg("logical replication target relation \"%s.%s\" does not exist",
							remoterel->nspname, remoterel->relname)));
		entry->localrel = heap_open(relid, NoLock);

		/*
		 * We currently only support writing to regular and partitioned
		 * tables.
		 */
		if (entry->localrel->rd_rel->relkind != RELKIND_RELATION)
			ereport(ERROR,
					(errcode(ERRCODE_WRONG_OBJECT_TYPE),
					 errmsg("logical replication target relation \"%s.%s\" is not a table",
							remoterel->nspname, remoterel->relname)));

		/*
		 * Build the mapping of local attribute numbers to remote attribute
		 * numbers and validate that we don't miss any replicated columns
		 * as that would result in potentially unwanted data loss.
		 */
		desc = RelationGetDescr(entry->localrel);
		oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext);
		entry->attrmap = palloc(desc->natts * sizeof(int));
		MemoryContextSwitchTo(oldctx);

		found = 0;
		for (i = 0; i < desc->natts; i++)
		{
			int	attnum = logicalrep_rel_att_by_name(remoterel,
											NameStr(desc->attrs[i]->attname));
			entry->attrmap[i] = attnum;
			if (attnum >= 0)
				found++;
		}

		/* TODO, detail message with names of missing columns */
		if (found < remoterel->natts)
			ereport(ERROR,
					(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
					 errmsg("logical replication target relation \"%s.%s\" is missing "
							"some replicated columns",
							remoterel->nspname, remoterel->relname)));

		/*
		 * Check that replica identity matches. We allow for stricter replica
		 * identity (fewer columns) on subscriber as that will not stop us
		 * from finding unique tuple. IE, if publisher has identity
		 * (id,timestamp) and subscriber just (id) this will not be a problem,
		 * but in the opposite scenario it will.
		 *
		 * Don't throw any error here just mark the relation entry as not
		 * updatable, as replica identity is only for updates and deletes
		 * but inserts can be replicated even without it.
		 */
		entry->updatable = true;
		idkey = RelationGetIndexAttrBitmap(entry->localrel,
										   INDEX_ATTR_BITMAP_IDENTITY_KEY);
		/* fallback to PK if no replica identity */
		if (idkey == NULL)
		{
			idkey = RelationGetIndexAttrBitmap(entry->localrel,
											   INDEX_ATTR_BITMAP_PRIMARY_KEY);
			/*
			 * If no replica identity index and no PK, the published table
			 * must have replica identity FULL.
			 */
			if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL)
				entry->updatable = false;
		}

		i = -1;
		while ((i = bms_next_member(idkey, i)) >= 0)
		{
			int attnum = i + FirstLowInvalidHeapAttributeNumber;

			if (!AttrNumberIsForUserDefinedAttr(attnum))
				ereport(ERROR,
						(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
						 errmsg("logical replication target relation \"%s.%s\" uses "
								"system columns in REPLICA IDENTITY index",
								remoterel->nspname, remoterel->relname)));

			attnum = AttrNumberGetAttrOffset(attnum);

			if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys))
			{
				entry->updatable = false;
				break;
			}
		}

		entry->localreloid = relid;
	}
	else
		entry->localrel = heap_open(entry->localreloid, lockmode);

	return entry;
}