/* * pg_ndistinct * output routine for type pg_ndistinct * * Produces a human-readable representation of the value. */ Datum pg_ndistinct_out(PG_FUNCTION_ARGS) { bytea *data = PG_GETARG_BYTEA_PP(0); MVNDistinct *ndist = statext_ndistinct_deserialize(data); int i; StringInfoData str; initStringInfo(&str); appendStringInfoChar(&str, '{'); for (i = 0; i < ndist->nitems; i++) { MVNDistinctItem item = ndist->items[i]; int x = -1; bool first = true; if (i > 0) appendStringInfoString(&str, ", "); while ((x = bms_next_member(item.attrs, x)) >= 0) { appendStringInfo(&str, "%s%d", first ? "\"" : ", ", x); first = false; } appendStringInfo(&str, "\": %d", (int) item.ndistinct); } appendStringInfoChar(&str, '}'); PG_RETURN_CSTRING(str.data); }
/* * fixup_inherited_columns * * When user is querying on a table with children, it implicitly accesses * child tables also. So, we also need to check security label of child * tables and columns, but here is no guarantee attribute numbers are * same between the parent ans children. * It returns a bitmapset which contains attribute number of the child * table based on the given bitmapset of the parent. */ static Bitmapset * fixup_inherited_columns(Oid parentId, Oid childId, Bitmapset *columns) { Bitmapset *result = NULL; int index; /* * obviously, no need to do anything here */ if (parentId == childId) return columns; index = -1; while ((index = bms_next_member(columns, index)) >= 0) { /* bit numbers are offset by FirstLowInvalidHeapAttributeNumber */ AttrNumber attno = index + FirstLowInvalidHeapAttributeNumber; char *attname; /* * whole-row-reference shall be fixed-up later */ if (attno == InvalidAttrNumber) { result = bms_add_member(result, index); continue; } attname = get_attname(parentId, attno); if (!attname) elog(ERROR, "cache lookup failed for attribute %d of relation %u", attno, parentId); attno = get_attnum(childId, attname); if (attno == InvalidAttrNumber) elog(ERROR, "cache lookup failed for attribute %s of relation %u", attname, childId); result = bms_add_member(result, attno - FirstLowInvalidHeapAttributeNumber); pfree(attname); } return result; }
/* * ExecScanReScan * * This must be called within the ReScan function of any plan node type * that uses ExecScan(). */ void ExecScanReScan(ScanState *node) { EState *estate = node->ps.state; /* Rescan EvalPlanQual tuple if we're inside an EvalPlanQual recheck */ if (estate->es_epqScanDone != NULL) { Index scanrelid = ((Scan *) node->ps.plan)->scanrelid; if (scanrelid > 0) estate->es_epqScanDone[scanrelid - 1] = false; else { Bitmapset *relids; int rtindex = -1; /* * If an FDW or custom scan provider has replaced the join with a * scan, there are multiple RTIs; reset the epqScanDone flag for * all of them. */ if (IsA(node->ps.plan, ForeignScan)) relids = ((ForeignScan *) node->ps.plan)->fs_relids; else if (IsA(node->ps.plan, CustomScan)) relids = ((CustomScan *) node->ps.plan)->custom_relids; else elog(ERROR, "unexpected scan node: %d", (int) nodeTag(node->ps.plan)); while ((rtindex = bms_next_member(relids, rtindex)) >= 0) { Assert(rtindex > 0); estate->es_epqScanDone[rtindex - 1] = false; } } } }
/* * statext_ndistinct_serialize * serialize ndistinct to the on-disk bytea format */ bytea * statext_ndistinct_serialize(MVNDistinct *ndistinct) { int i; bytea *output; char *tmp; Size len; Assert(ndistinct->magic == STATS_NDISTINCT_MAGIC); Assert(ndistinct->type == STATS_NDISTINCT_TYPE_BASIC); /* * Base size is size of scalar fields in the struct, plus one base struct * for each item, including number of items for each. */ len = VARHDRSZ + SizeOfMVNDistinct + ndistinct->nitems * (offsetof(MVNDistinctItem, attrs) + sizeof(int)); /* and also include space for the actual attribute numbers */ for (i = 0; i < ndistinct->nitems; i++) { int nmembers; nmembers = bms_num_members(ndistinct->items[i].attrs); Assert(nmembers >= 2); len += sizeof(AttrNumber) * nmembers; } output = (bytea *) palloc(len); SET_VARSIZE(output, len); tmp = VARDATA(output); /* Store the base struct values (magic, type, nitems) */ memcpy(tmp, &ndistinct->magic, sizeof(uint32)); tmp += sizeof(uint32); memcpy(tmp, &ndistinct->type, sizeof(uint32)); tmp += sizeof(uint32); memcpy(tmp, &ndistinct->nitems, sizeof(uint32)); tmp += sizeof(uint32); /* * store number of attributes and attribute numbers for each ndistinct * entry */ for (i = 0; i < ndistinct->nitems; i++) { MVNDistinctItem item = ndistinct->items[i]; int nmembers = bms_num_members(item.attrs); int x; memcpy(tmp, &item.ndistinct, sizeof(double)); tmp += sizeof(double); memcpy(tmp, &nmembers, sizeof(int)); tmp += sizeof(int); x = -1; while ((x = bms_next_member(item.attrs, x)) >= 0) { AttrNumber value = (AttrNumber) x; memcpy(tmp, &value, sizeof(AttrNumber)); tmp += sizeof(AttrNumber); } Assert(tmp <= ((char *) output + len)); } return output; }
/* Take care of joins */ void pathman_join_pathlist_hook(PlannerInfo *root, RelOptInfo *joinrel, RelOptInfo *outerrel, RelOptInfo *innerrel, JoinType jointype, JoinPathExtraData *extra) { JoinCostWorkspace workspace; JoinType saved_jointype = jointype; RangeTblEntry *inner_rte = root->simple_rte_array[innerrel->relid]; const PartRelationInfo *inner_prel; List *joinclauses, *otherclauses; WalkerContext context; double paramsel; Node *part_expr; ListCell *lc; /* Call hooks set by other extensions */ if (pathman_set_join_pathlist_next) pathman_set_join_pathlist_next(root, joinrel, outerrel, innerrel, jointype, extra); /* Check that both pg_pathman & RuntimeAppend nodes are enabled */ if (!IsPathmanReady() || !pg_pathman_enable_runtimeappend) return; /* We should only consider base relations */ if (innerrel->reloptkind != RELOPT_BASEREL) return; /* We shouldn't process tables with active children */ if (inner_rte->inh) return; /* We can't handle full or right outer joins */ if (jointype == JOIN_FULL || jointype == JOIN_RIGHT) return; /* Check that innerrel is a BASEREL with PartRelationInfo */ if (innerrel->reloptkind != RELOPT_BASEREL || !(inner_prel = get_pathman_relation_info(inner_rte->relid))) return; /* * Check if query is: * 1) UPDATE part_table SET = .. FROM part_table. * 2) DELETE FROM part_table USING part_table. * * Either outerrel or innerrel may be a result relation. */ if ((root->parse->resultRelation == outerrel->relid || root->parse->resultRelation == innerrel->relid) && (root->parse->commandType == CMD_UPDATE || root->parse->commandType == CMD_DELETE)) { int rti = -1, count = 0; /* Inner relation must be partitioned */ Assert(inner_prel); /* Check each base rel of outer relation */ while ((rti = bms_next_member(outerrel->relids, rti)) >= 0) { Oid outer_baserel = root->simple_rte_array[rti]->relid; /* Is it partitioned? */ if (get_pathman_relation_info(outer_baserel)) count++; } if (count > 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("DELETE and UPDATE queries with a join " "of partitioned tables are not supported"))); } /* Skip if inner table is not allowed to act as parent (e.g. FROM ONLY) */ if (PARENTHOOD_DISALLOWED == get_rel_parenthood_status(inner_rte)) return; /* * These codes are used internally in the planner, but are not supported * by the executor (nor, indeed, by most of the planner). */ if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER) jointype = JOIN_INNER; /* replace with a proper value */ /* Extract join clauses which will separate partitions */ if (IS_OUTER_JOIN(extra->sjinfo->jointype)) { extract_actual_join_clauses_compat(extra->restrictlist, joinrel->relids, &joinclauses, &otherclauses); } else { /* We can treat all clauses alike for an inner join */ joinclauses = extract_actual_clauses(extra->restrictlist, false); otherclauses = NIL; } /* Make copy of partitioning expression and fix Var's varno attributes */ part_expr = PrelExpressionForRelid(inner_prel, innerrel->relid); paramsel = 1.0; foreach (lc, joinclauses) { WrapperNode *wrap; InitWalkerContext(&context, part_expr, inner_prel, NULL); wrap = walk_expr_tree((Expr *) lfirst(lc), &context); paramsel *= wrap->paramsel; }
/* * detects functional dependencies between groups of columns * * Generates all possible subsets of columns (variations) and computes * the degree of validity for each one. For example when creating statistics * on three columns (a,b,c) there are 9 possible dependencies * * two columns three columns * ----------- ------------- * (a) -> b (a,b) -> c * (a) -> c (a,c) -> b * (b) -> a (b,c) -> a * (b) -> c * (c) -> a * (c) -> b */ MVDependencies * statext_dependencies_build(int numrows, HeapTuple *rows, Bitmapset *attrs, VacAttrStats **stats) { int i, j, k; int numattrs; int *attnums; /* result */ MVDependencies *dependencies = NULL; numattrs = bms_num_members(attrs); /* * Transform the bms into an array, to make accessing i-th member easier. */ attnums = palloc(sizeof(int) * bms_num_members(attrs)); i = 0; j = -1; while ((j = bms_next_member(attrs, j)) >= 0) attnums[i++] = j; Assert(numattrs >= 2); /* * We'll try build functional dependencies starting from the smallest ones * covering just 2 columns, to the largest ones, covering all columns * included in the statistics object. We start from the smallest ones * because we want to be able to skip already implied ones. */ for (k = 2; k <= numattrs; k++) { AttrNumber *dependency; /* array with k elements */ /* prepare a DependencyGenerator of variation */ DependencyGenerator DependencyGenerator = DependencyGenerator_init(numattrs, k); /* generate all possible variations of k values (out of n) */ while ((dependency = DependencyGenerator_next(DependencyGenerator))) { double degree; MVDependency *d; /* compute how valid the dependency seems */ degree = dependency_degree(numrows, rows, k, dependency, stats, attrs); /* * if the dependency seems entirely invalid, don't store it */ if (degree == 0.0) continue; d = (MVDependency *) palloc0(offsetof(MVDependency, attributes) + k * sizeof(AttrNumber)); /* copy the dependency (and keep the indexes into stxkeys) */ d->degree = degree; d->nattributes = k; for (i = 0; i < k; i++) d->attributes[i] = attnums[dependency[i]]; /* initialize the list of dependencies */ if (dependencies == NULL) { dependencies = (MVDependencies *) palloc0(sizeof(MVDependencies)); dependencies->magic = STATS_DEPS_MAGIC; dependencies->type = STATS_DEPS_TYPE_BASIC; dependencies->ndeps = 0; } dependencies->ndeps++; dependencies = (MVDependencies *) repalloc(dependencies, offsetof(MVDependencies, deps) + dependencies->ndeps * sizeof(MVDependency)); dependencies->deps[dependencies->ndeps - 1] = d; } /* * we're done with variations of k elements, so free the * DependencyGenerator */ DependencyGenerator_free(DependencyGenerator); } return dependencies; }
/* * validates functional dependency on the data * * An actual work horse of detecting functional dependencies. Given a variation * of k attributes, it checks that the first (k-1) are sufficient to determine * the last one. */ static double dependency_degree(int numrows, HeapTuple *rows, int k, AttrNumber *dependency, VacAttrStats **stats, Bitmapset *attrs) { int i, j; int nvalues = numrows * k; MultiSortSupport mss; SortItem *items; Datum *values; bool *isnull; int *attnums; /* counters valid within a group */ int group_size = 0; int n_violations = 0; /* total number of rows supporting (consistent with) the dependency */ int n_supporting_rows = 0; /* Make sure we have at least two input attributes. */ Assert(k >= 2); /* sort info for all attributes columns */ mss = multi_sort_init(k); /* data for the sort */ items = (SortItem *) palloc(numrows * sizeof(SortItem)); values = (Datum *) palloc(sizeof(Datum) * nvalues); isnull = (bool *) palloc(sizeof(bool) * nvalues); /* fix the pointers to values/isnull */ for (i = 0; i < numrows; i++) { items[i].values = &values[i * k]; items[i].isnull = &isnull[i * k]; } /* * Transform the bms into an array, to make accessing i-th member easier. */ attnums = (int *) palloc(sizeof(int) * bms_num_members(attrs)); i = 0; j = -1; while ((j = bms_next_member(attrs, j)) >= 0) attnums[i++] = j; /* * Verify the dependency (a,b,...)->z, using a rather simple algorithm: * * (a) sort the data lexicographically * * (b) split the data into groups by first (k-1) columns * * (c) for each group count different values in the last column */ /* prepare the sort function for the first dimension, and SortItem array */ for (i = 0; i < k; i++) { VacAttrStats *colstat = stats[dependency[i]]; TypeCacheEntry *type; type = lookup_type_cache(colstat->attrtypid, TYPECACHE_LT_OPR); if (type->lt_opr == InvalidOid) /* shouldn't happen */ elog(ERROR, "cache lookup failed for ordering operator for type %u", colstat->attrtypid); /* prepare the sort function for this dimension */ multi_sort_add_dimension(mss, i, type->lt_opr); /* accumulate all the data for both columns into an array and sort it */ for (j = 0; j < numrows; j++) { items[j].values[i] = heap_getattr(rows[j], attnums[dependency[i]], stats[i]->tupDesc, &items[j].isnull[i]); } } /* sort the items so that we can detect the groups */ qsort_arg((void *) items, numrows, sizeof(SortItem), multi_sort_compare, mss); /* * Walk through the sorted array, split it into rows according to the * first (k-1) columns. If there's a single value in the last column, we * count the group as 'supporting' the functional dependency. Otherwise we * count it as contradicting. */ /* start with the first row forming a group */ group_size = 1; /* loop 1 beyond the end of the array so that we count the final group */ for (i = 1; i <= numrows; i++) { /* * Check if the group ended, which may be either because we processed * all the items (i==numrows), or because the i-th item is not equal * to the preceding one. */ if (i == numrows || multi_sort_compare_dims(0, k - 2, &items[i - 1], &items[i], mss) != 0) { /* * If no violations were found in the group then track the rows of * the group as supporting the functional dependency. */ if (n_violations == 0) n_supporting_rows += group_size; /* Reset counters for the new group */ n_violations = 0; group_size = 1; continue; } /* first columns match, but the last one does not (so contradicting) */ else if (multi_sort_compare_dim(k - 1, &items[i - 1], &items[i], mss) != 0) n_violations++; group_size++; } pfree(items); pfree(values); pfree(isnull); pfree(mss); /* Compute the 'degree of validity' as (supporting/total). */ return (n_supporting_rows * 1.0 / numrows); }
/* * Open the local relation associated with the remote one. * * Optionally rebuilds the Relcache mapping if it was invalidated * by local DDL. */ LogicalRepRelMapEntry * logicalrep_rel_open(LogicalRepRelId remoteid, LOCKMODE lockmode) { LogicalRepRelMapEntry *entry; bool found; if (LogicalRepRelMap == NULL) logicalrep_relmap_init(); /* Search for existing entry. */ entry = hash_search(LogicalRepRelMap, (void *) &remoteid, HASH_FIND, &found); if (!found) elog(ERROR, "no relation map entry for remote relation ID %u", remoteid); /* Need to update the local cache? */ if (!OidIsValid(entry->localreloid)) { Oid relid; int i; int found; Bitmapset *idkey; TupleDesc desc; LogicalRepRelation *remoterel; MemoryContext oldctx; remoterel = &entry->remoterel; /* Try to find and lock the relation by name. */ relid = RangeVarGetRelid(makeRangeVar(remoterel->nspname, remoterel->relname, -1), lockmode, true); if (!OidIsValid(relid)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" does not exist", remoterel->nspname, remoterel->relname))); entry->localrel = heap_open(relid, NoLock); /* * We currently only support writing to regular and partitioned * tables. */ if (entry->localrel->rd_rel->relkind != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("logical replication target relation \"%s.%s\" is not a table", remoterel->nspname, remoterel->relname))); /* * Build the mapping of local attribute numbers to remote attribute * numbers and validate that we don't miss any replicated columns * as that would result in potentially unwanted data loss. */ desc = RelationGetDescr(entry->localrel); oldctx = MemoryContextSwitchTo(LogicalRepRelMapContext); entry->attrmap = palloc(desc->natts * sizeof(int)); MemoryContextSwitchTo(oldctx); found = 0; for (i = 0; i < desc->natts; i++) { int attnum = logicalrep_rel_att_by_name(remoterel, NameStr(desc->attrs[i]->attname)); entry->attrmap[i] = attnum; if (attnum >= 0) found++; } /* TODO, detail message with names of missing columns */ if (found < remoterel->natts) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" is missing " "some replicated columns", remoterel->nspname, remoterel->relname))); /* * Check that replica identity matches. We allow for stricter replica * identity (fewer columns) on subscriber as that will not stop us * from finding unique tuple. IE, if publisher has identity * (id,timestamp) and subscriber just (id) this will not be a problem, * but in the opposite scenario it will. * * Don't throw any error here just mark the relation entry as not * updatable, as replica identity is only for updates and deletes * but inserts can be replicated even without it. */ entry->updatable = true; idkey = RelationGetIndexAttrBitmap(entry->localrel, INDEX_ATTR_BITMAP_IDENTITY_KEY); /* fallback to PK if no replica identity */ if (idkey == NULL) { idkey = RelationGetIndexAttrBitmap(entry->localrel, INDEX_ATTR_BITMAP_PRIMARY_KEY); /* * If no replica identity index and no PK, the published table * must have replica identity FULL. */ if (idkey == NULL && remoterel->replident != REPLICA_IDENTITY_FULL) entry->updatable = false; } i = -1; while ((i = bms_next_member(idkey, i)) >= 0) { int attnum = i + FirstLowInvalidHeapAttributeNumber; if (!AttrNumberIsForUserDefinedAttr(attnum)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("logical replication target relation \"%s.%s\" uses " "system columns in REPLICA IDENTITY index", remoterel->nspname, remoterel->relname))); attnum = AttrNumberGetAttrOffset(attnum); if (!bms_is_member(entry->attrmap[attnum], remoterel->attkeys)) { entry->updatable = false; break; } } entry->localreloid = relid; } else entry->localrel = heap_open(entry->localreloid, lockmode); return entry; }