/* * sepgsql_attribute_setattr * * It checks privileges to alter the supplied column. */ void sepgsql_attribute_setattr(Oid relOid, AttrNumber attnum) { ObjectAddress object; char *audit_name; char relkind = get_rel_relkind(relOid); if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) return; /* * check db_column:{setattr} permission */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; audit_name = getObjectIdentity(&object); sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__SETATTR, audit_name, true); pfree(audit_name); }
/* * Callback to RangeVarGetRelidExtended(). * * Checks the following: * - the relation specified is a table. * - current user owns the table. * - the table is not a system table. * * If any of these checks fails then an error is raised. */ static void RangeVarCallbackForPolicy(const RangeVar *rv, Oid relid, Oid oldrelid, void *arg) { HeapTuple tuple; Form_pg_class classform; char relkind; tuple = SearchSysCache1(RELOID, ObjectIdGetDatum(relid)); if (!HeapTupleIsValid(tuple)) return; classform = (Form_pg_class) GETSTRUCT(tuple); relkind = classform->relkind; /* Must own relation. */ if (!pg_class_ownercheck(relid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, get_relkind_objtype(get_rel_relkind(relid)), rv->relname); /* No system table modifications unless explicitly allowed. */ if (!allowSystemTableMods && IsSystemClass(relid, classform)) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied: \"%s\" is a system catalog", rv->relname))); /* Relation type MUST be a table. */ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table", rv->relname))); ReleaseSysCache(tuple); }
/* * GetTableReplicaIdentityCommand returns the list of DDL commands to * (re)define the replica identity choice for a given table. */ static List * GetTableReplicaIdentityCommand(Oid relationId) { List *replicaIdentityCreateCommandList = NIL; char *replicaIdentityCreateCommand = NULL; /* * We skip non-relations because postgres does not support * ALTER TABLE .. REPLICA IDENTITY on non-relations. */ char relationKind = get_rel_relkind(relationId); if (relationKind != RELKIND_RELATION) { return NIL; } replicaIdentityCreateCommand = pg_get_replica_identity_command(relationId); if (replicaIdentityCreateCommand) { replicaIdentityCreateCommandList = lappend(replicaIdentityCreateCommandList, replicaIdentityCreateCommand); } return replicaIdentityCreateCommandList; }
/* * sepgsql_attribute_post_create * * This routine assigns a default security label on a newly defined * column, using ALTER TABLE ... ADD COLUMN. * Note that this routine is not invoked in the case of CREATE TABLE, * although it also defines columns in addition to table. */ void sepgsql_attribute_post_create(Oid relOid, AttrNumber attnum) { char *scontext = sepgsql_get_client_label(); char *tcontext; char *ncontext; ObjectAddress object; /* * Only attributes within regular relation have individual security * labels. */ if (get_rel_relkind(relOid) != RELKIND_RELATION) return; /* * Compute a default security label when we create a new procedure object * under the specified namespace. */ scontext = sepgsql_get_client_label(); tcontext = sepgsql_get_label(RelationRelationId, relOid, 0); ncontext = sepgsql_compute_create(scontext, tcontext, SEPG_CLASS_DB_COLUMN); /* * Assign the default security label on a new procedure */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, ncontext); pfree(tcontext); pfree(ncontext); }
/* * CStoreTable checks if the given table name belongs to a foreign columnar store * table. If it does, the function returns true. Otherwise, it returns false. */ static bool CStoreTable(RangeVar *rangeVar) { bool cstoreTable = false; Relation relation = heap_openrv(rangeVar, AccessShareLock); Oid relationId = RelationGetRelid(relation); char relationKind = get_rel_relkind(relationId); if (relationKind == RELKIND_FOREIGN_TABLE) { ForeignTable *foreignTable = GetForeignTable(relationId); ForeignServer *server = GetForeignServer(foreignTable->serverid); ForeignDataWrapper *foreignDataWrapper = GetForeignDataWrapper(server->fdwid); char *foreignWrapperName = foreignDataWrapper->fdwname; if (strncmp(foreignWrapperName, CSTORE_FDW_NAME, NAMEDATALEN) == 0) { cstoreTable = true; } } heap_close(relation, AccessShareLock); return cstoreTable; }
/* * ShardStorageType returns the shard storage type according to relation type. */ char ShardStorageType(Oid relationId) { char shardStorageType = 0; char relationType = get_rel_relkind(relationId); if (RegularTable(relationId)) { shardStorageType = SHARD_STORAGE_TABLE; } else if (relationType == RELKIND_FOREIGN_TABLE) { bool cstoreTable = CStoreTable(relationId); if (cstoreTable) { shardStorageType = SHARD_STORAGE_COLUMNAR; } else { shardStorageType = SHARD_STORAGE_FOREIGN; } } else { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("unexpected relation type: %c", relationType))); } return shardStorageType; }
/* * GetTableCreationCommands takes in a relationId, and returns the list of DDL * commands needed to reconstruct the relation, excluding indexes and * constraints. */ List * GetTableCreationCommands(Oid relationId, bool includeSequenceDefaults) { List *tableDDLEventList = NIL; char tableType = 0; char *tableSchemaDef = NULL; char *tableColumnOptionsDef = NULL; char *createSchemaCommand = NULL; Oid schemaId = InvalidOid; /* * Set search_path to NIL so that all objects outside of pg_catalog will be * schema-prefixed. pg_catalog will be added automatically when we call * PushOverrideSearchPath(), since we set addCatalog to true; */ OverrideSearchPath *overridePath = GetOverrideSearchPath(CurrentMemoryContext); overridePath->schemas = NIL; overridePath->addCatalog = true; PushOverrideSearchPath(overridePath); /* if foreign table, fetch extension and server definitions */ tableType = get_rel_relkind(relationId); if (tableType == RELKIND_FOREIGN_TABLE) { char *extensionDef = pg_get_extensiondef_string(relationId); char *serverDef = pg_get_serverdef_string(relationId); if (extensionDef != NULL) { tableDDLEventList = lappend(tableDDLEventList, extensionDef); } tableDDLEventList = lappend(tableDDLEventList, serverDef); } /* create schema if the table is not in the default namespace (public) */ schemaId = get_rel_namespace(relationId); createSchemaCommand = CreateSchemaDDLCommand(schemaId); if (createSchemaCommand != NULL) { tableDDLEventList = lappend(tableDDLEventList, createSchemaCommand); } /* fetch table schema and column option definitions */ tableSchemaDef = pg_get_tableschemadef_string(relationId, includeSequenceDefaults); tableColumnOptionsDef = pg_get_tablecolumnoptionsdef_string(relationId); tableDDLEventList = lappend(tableDDLEventList, tableSchemaDef); if (tableColumnOptionsDef != NULL) { tableDDLEventList = lappend(tableDDLEventList, tableColumnOptionsDef); } /* revert back to original search_path */ PopOverrideSearchPath(); return tableDDLEventList; }
/* * get_constraint_index * Given the OID of a unique or primary-key constraint, return the * OID of the underlying unique index. * * Return InvalidOid if the index couldn't be found; this suggests the * given OID is bogus, but we leave it to caller to decide what to do. */ Oid get_constraint_index(Oid constraintId) { Oid indexId = InvalidOid; Relation depRel; ScanKeyData key[3]; SysScanDesc scan; HeapTuple tup; /* Search the dependency table for the dependent index */ depRel = heap_open(DependRelationId, AccessShareLock); ScanKeyInit(&key[0], Anum_pg_depend_refclassid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(ConstraintRelationId)); ScanKeyInit(&key[1], Anum_pg_depend_refobjid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(constraintId)); ScanKeyInit(&key[2], Anum_pg_depend_refobjsubid, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(0)); scan = systable_beginscan(depRel, DependReferenceIndexId, true, NULL, 3, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); /* * We assume any internal dependency of an index on the constraint * must be what we are looking for. */ if (deprec->classid == RelationRelationId && deprec->objsubid == 0 && deprec->deptype == DEPENDENCY_INTERNAL) { char relkind = get_rel_relkind(deprec->objid); /* This is pure paranoia; there shouldn't be any such */ if (relkind != RELKIND_INDEX && relkind != RELKIND_PARTITIONED_INDEX) break; indexId = deprec->objid; break; } } systable_endscan(scan); heap_close(depRel, AccessShareLock); return indexId; }
/* * Collect a list of OIDs of all sequences owned by the specified relation, * and column if specified. */ List * getOwnedSequences(Oid relid, AttrNumber attnum) { List *result = NIL; Relation depRel; ScanKeyData key[3]; SysScanDesc scan; HeapTuple tup; depRel = heap_open(DependRelationId, AccessShareLock); ScanKeyInit(&key[0], Anum_pg_depend_refclassid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(RelationRelationId)); ScanKeyInit(&key[1], Anum_pg_depend_refobjid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relid)); if (attnum) ScanKeyInit(&key[2], Anum_pg_depend_refobjsubid, BTEqualStrategyNumber, F_INT4EQ, Int32GetDatum(attnum)); scan = systable_beginscan(depRel, DependReferenceIndexId, true, NULL, attnum ? 3 : 2, key); while (HeapTupleIsValid(tup = systable_getnext(scan))) { Form_pg_depend deprec = (Form_pg_depend) GETSTRUCT(tup); /* * We assume any auto or internal dependency of a sequence on a column * must be what we are looking for. (We need the relkind test because * indexes can also have auto dependencies on columns.) */ if (deprec->classid == RelationRelationId && deprec->objsubid == 0 && deprec->refobjsubid != 0 && (deprec->deptype == DEPENDENCY_AUTO || deprec->deptype == DEPENDENCY_INTERNAL) && get_rel_relkind(deprec->objid) == RELKIND_SEQUENCE) { result = lappend_oid(result, deprec->objid); } } systable_endscan(scan); heap_close(depRel, AccessShareLock); return result; }
/* * sepgsql_relation_relabel * * It checks privileges to relabel the supplied relation by the `seclabel'. */ void sepgsql_relation_relabel(Oid relOid, const char *seclabel) { char *scontext = sepgsql_get_client_label(); char *tcontext; char *audit_name; char relkind; uint16_t tclass = 0; relkind = get_rel_relkind(relOid); if (relkind == RELKIND_RELATION) tclass = SEPG_CLASS_DB_TABLE; else if (relkind == RELKIND_SEQUENCE) tclass = SEPG_CLASS_DB_SEQUENCE; else if (relkind == RELKIND_VIEW) tclass = SEPG_CLASS_DB_VIEW; else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot set security labels on relations except " "for tables, sequences or views"))); audit_name = getObjectDescriptionOids(RelationRelationId, relOid); /* * check db_xxx:{setattr relabelfrom} permission */ tcontext = sepgsql_get_label(RelationRelationId, relOid, 0); sepgsql_check_perms(scontext, tcontext, tclass, SEPG_DB_TABLE__SETATTR | SEPG_DB_TABLE__RELABELFROM, audit_name, true); /* * check db_xxx:{relabelto} permission */ sepgsql_check_perms(scontext, seclabel, tclass, SEPG_DB_TABLE__RELABELTO, audit_name, true); pfree(tcontext); pfree(audit_name); }
/* * sepgsql_relation_setattr * * It checks privileges to set attribute of the supplied relation */ void sepgsql_relation_setattr(Oid relOid) { ObjectAddress object; char *audit_name; uint16_t tclass; switch (get_rel_relkind(relOid)) { case RELKIND_RELATION: tclass = SEPG_CLASS_DB_TABLE; break; case RELKIND_SEQUENCE: tclass = SEPG_CLASS_DB_SEQUENCE; break; case RELKIND_VIEW: tclass = SEPG_CLASS_DB_VIEW; break; case RELKIND_INDEX: /* deal with indexes specially */ sepgsql_index_modify(relOid); return; default: /* other relkinds don't need additional work */ return; } object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; audit_name = getObjectDescription(&object); /* * XXX - we should add checks related to namespace stuff, when * object_access_hook get support for ALTER statement. Right now, there is * no invocation path on ALTER ... RENAME TO / SET SCHEMA. */ /* * check db_xxx:{setattr} permission */ sepgsql_avc_check_perms(&object, tclass, SEPG_DB_TABLE__SETATTR, audit_name, true); pfree(audit_name); }
/* * sepgsql_relation_relabel * * It checks privileges to relabel the supplied relation by the `seclabel'. */ void sepgsql_relation_relabel(Oid relOid, const char *seclabel) { ObjectAddress object; char *audit_name; char relkind; uint16_t tclass = 0; relkind = get_rel_relkind(relOid); if (relkind == RELKIND_RELATION) tclass = SEPG_CLASS_DB_TABLE; else if (relkind == RELKIND_SEQUENCE) tclass = SEPG_CLASS_DB_SEQUENCE; else if (relkind == RELKIND_VIEW) tclass = SEPG_CLASS_DB_VIEW; else ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot set security labels on relations except " "for tables, sequences or views"))); object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; audit_name = getObjectDescription(&object); /* * check db_xxx:{setattr relabelfrom} permission */ sepgsql_avc_check_perms(&object, tclass, SEPG_DB_TABLE__SETATTR | SEPG_DB_TABLE__RELABELFROM, audit_name, true); /* * check db_xxx:{relabelto} permission */ sepgsql_avc_check_perms_label(seclabel, tclass, SEPG_DB_TABLE__RELABELTO, audit_name, true); pfree(audit_name); }
/* * sepgsql_attribute_relabel * * It checks privileges to relabel the supplied column * by the `seclabel'. */ void sepgsql_attribute_relabel(Oid relOid, AttrNumber attnum, const char *seclabel) { char *scontext = sepgsql_get_client_label(); char *tcontext; char *audit_name; ObjectAddress object; if (get_rel_relkind(relOid) != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot set security label on non-regular columns"))); object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; audit_name = getObjectDescription(&object); /* * check db_column:{setattr relabelfrom} permission */ tcontext = sepgsql_get_label(RelationRelationId, relOid, attnum); sepgsql_check_perms(scontext, tcontext, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__SETATTR | SEPG_DB_COLUMN__RELABELFROM, audit_name, true); /* * check db_column:{relabelto} permission */ sepgsql_check_perms(scontext, seclabel, SEPG_CLASS_DB_COLUMN, SEPG_DB_PROCEDURE__RELABELTO, audit_name, true); pfree(tcontext); pfree(audit_name); }
/* * CStoreTable returns true if the given relationId belongs to a foreign cstore * table, otherwise it returns false. */ bool CStoreTable(Oid relationId) { bool cstoreTable = false; char relationKind = get_rel_relkind(relationId); if (relationKind == RELKIND_FOREIGN_TABLE) { ForeignTable *foreignTable = GetForeignTable(relationId); ForeignServer *server = GetForeignServer(foreignTable->serverid); ForeignDataWrapper *foreignDataWrapper = GetForeignDataWrapper(server->fdwid); if (strncmp(foreignDataWrapper->fdwname, CSTORE_FDW_NAME, NAMEDATALEN) == 0) { cstoreTable = true; } } return cstoreTable; }
/* * sepgsql_attribute_relabel * * It checks privileges to relabel the supplied column * by the `seclabel'. */ void sepgsql_attribute_relabel(Oid relOid, AttrNumber attnum, const char *seclabel) { char *scontext = sepgsql_get_client_label(); char *tcontext; char audit_name[NAMEDATALEN * 2 + 10]; if (get_rel_relkind(relOid) != RELKIND_RELATION) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot set security label on non-regular columns"))); snprintf(audit_name, sizeof(audit_name), "%s.%s", get_rel_name(relOid), get_attname(relOid, attnum)); /* * check db_column:{setattr relabelfrom} permission */ tcontext = sepgsql_get_label(RelationRelationId, relOid, attnum); sepgsql_check_perms(scontext, tcontext, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__SETATTR | SEPG_DB_COLUMN__RELABELFROM, audit_name, true); pfree(tcontext); /* * check db_column:{relabelto} permission */ sepgsql_check_perms(scontext, seclabel, SEPG_CLASS_DB_COLUMN, SEPG_DB_PROCEDURE__RELABELTO, audit_name, true); }
/* * sepgsql_attribute_relabel * * It checks privileges to relabel the supplied column * by the `seclabel'. */ void sepgsql_attribute_relabel(Oid relOid, AttrNumber attnum, const char *seclabel) { ObjectAddress object; char *audit_name; char relkind = get_rel_relkind(relOid); if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot set security label on non-regular columns"))); object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; audit_name = getObjectIdentity(&object); /* * check db_column:{setattr relabelfrom} permission */ sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__SETATTR | SEPG_DB_COLUMN__RELABELFROM, audit_name, true); /* * check db_column:{relabelto} permission */ sepgsql_avc_check_perms_label(seclabel, SEPG_CLASS_DB_COLUMN, SEPG_DB_PROCEDURE__RELABELTO, audit_name, true); pfree(audit_name); }
/* * sepgsql_attribute_drop * * It checks privileges to drop the supplied column. */ void sepgsql_attribute_drop(Oid relOid, AttrNumber attnum) { ObjectAddress object; char *audit_name; if (get_rel_relkind(relOid) != RELKIND_RELATION) return; /* * check db_column:{drop} permission */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; audit_name = getObjectDescription(&object); sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__DROP, audit_name, true); pfree(audit_name); }
/* * sepgsql_attribute_post_create * * This routine assigns a default security label on a newly defined * column, using ALTER TABLE ... ADD COLUMN. * Note that this routine is not invoked in the case of CREATE TABLE, * although it also defines columns in addition to table. */ void sepgsql_attribute_post_create(Oid relOid, AttrNumber attnum) { Relation rel; ScanKeyData skey[2]; SysScanDesc sscan; HeapTuple tuple; char *scontext; char *tcontext; char *ncontext; char audit_name[2 * NAMEDATALEN + 20]; ObjectAddress object; Form_pg_attribute attForm; /* * Only attributes within regular relation have individual security * labels. */ if (get_rel_relkind(relOid) != RELKIND_RELATION) return; /* * Compute a default security label of the new column underlying the * specified relation, and check permission to create it. */ rel = heap_open(AttributeRelationId, AccessShareLock); ScanKeyInit(&skey[0], Anum_pg_attribute_attrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relOid)); ScanKeyInit(&skey[1], Anum_pg_attribute_attnum, BTEqualStrategyNumber, F_INT2EQ, Int16GetDatum(attnum)); sscan = systable_beginscan(rel, AttributeRelidNumIndexId, true, SnapshotSelf, 2, &skey[0]); tuple = systable_getnext(sscan); if (!HeapTupleIsValid(tuple)) elog(ERROR, "catalog lookup failed for column %d of relation %u", attnum, relOid); attForm = (Form_pg_attribute) GETSTRUCT(tuple); scontext = sepgsql_get_client_label(); tcontext = sepgsql_get_label(RelationRelationId, relOid, 0); ncontext = sepgsql_compute_create(scontext, tcontext, SEPG_CLASS_DB_COLUMN); /* * check db_column:{create} permission */ snprintf(audit_name, sizeof(audit_name), "table %s column %s", get_rel_name(relOid), NameStr(attForm->attname)); sepgsql_avc_check_perms_label(ncontext, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__CREATE, audit_name, true); /* * Assign the default security label on a new procedure */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, ncontext); systable_endscan(sscan); heap_close(rel, AccessShareLock); pfree(tcontext); pfree(ncontext); }
/* * sepgsql_relation_drop * * It checks privileges to drop the supplied relation. */ void sepgsql_relation_drop(Oid relOid) { ObjectAddress object; char *audit_name; uint16_t tclass; char relkind; relkind = get_rel_relkind(relOid); switch (relkind) { case RELKIND_RELATION: tclass = SEPG_CLASS_DB_TABLE; break; case RELKIND_SEQUENCE: tclass = SEPG_CLASS_DB_SEQUENCE; break; case RELKIND_VIEW: tclass = SEPG_CLASS_DB_VIEW; break; case RELKIND_INDEX: /* ignore indexes on toast tables */ if (get_rel_namespace(relOid) == PG_TOAST_NAMESPACE) return; /* other indexes are handled specially below; no need for tclass */ break; default: /* ignore other relkinds */ return; } /* * check db_schema:{remove_name} permission */ object.classId = NamespaceRelationId; object.objectId = get_rel_namespace(relOid); object.objectSubId = 0; audit_name = getObjectDescription(&object); sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_SCHEMA, SEPG_DB_SCHEMA__REMOVE_NAME, audit_name, true); pfree(audit_name); /* deal with indexes specially */ if (relkind == RELKIND_INDEX) { sepgsql_index_modify(relOid); return; } /* * check db_table/sequence/view:{drop} permission */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; audit_name = getObjectDescription(&object); sepgsql_avc_check_perms(&object, tclass, SEPG_DB_TABLE__DROP, audit_name, true); pfree(audit_name); /* * check db_column:{drop} permission */ if (relkind == RELKIND_RELATION) { Form_pg_attribute attForm; CatCList *attrList; HeapTuple atttup; int i; attrList = SearchSysCacheList1(ATTNUM, ObjectIdGetDatum(relOid)); for (i = 0; i < attrList->n_members; i++) { atttup = &attrList->members[i]->tuple; attForm = (Form_pg_attribute) GETSTRUCT(atttup); if (attForm->attisdropped) continue; object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attForm->attnum; audit_name = getObjectDescription(&object); sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__DROP, audit_name, true); pfree(audit_name); } ReleaseCatCacheList(attrList); } }
/* * master_get_table_metadata takes in a relation name, and returns partition * related metadata for the relation. These metadata are grouped and returned in * a tuple, and are used by the caller when creating new shards. The function * errors if given relation does not exist, or is not partitioned. */ Datum master_get_table_metadata(PG_FUNCTION_ARGS) { text *relationName = PG_GETARG_TEXT_P(0); Oid relationId = ResolveRelationId(relationName); DistTableCacheEntry *partitionEntry = NULL; TypeFuncClass resultTypeClass = 0; Datum partitionKeyExpr = 0; Datum partitionKey = 0; Datum metadataDatum = 0; HeapTuple metadataTuple = NULL; TupleDesc metadataDescriptor = NULL; uint64 shardMaxSizeInBytes = 0; char relationType = 0; char storageType = 0; Datum values[TABLE_METADATA_FIELDS]; bool isNulls[TABLE_METADATA_FIELDS]; /* find partition tuple for partitioned relation */ partitionEntry = DistributedTableCacheEntry(relationId); /* create tuple descriptor for return value */ resultTypeClass = get_call_result_type(fcinfo, NULL, &metadataDescriptor); if (resultTypeClass != TYPEFUNC_COMPOSITE) { ereport(ERROR, (errmsg("return type must be a row type"))); } /* get decompiled expression tree for partition key */ partitionKeyExpr = PointerGetDatum(cstring_to_text(partitionEntry->partitionKeyString)); partitionKey = DirectFunctionCall2(pg_get_expr, partitionKeyExpr, ObjectIdGetDatum(relationId)); /* form heap tuple for table metadata */ memset(values, 0, sizeof(values)); memset(isNulls, false, sizeof(isNulls)); shardMaxSizeInBytes = (int64) ShardMaxSize * 1024L; /* get storage type */ relationType = get_rel_relkind(relationId); if (relationType == RELKIND_RELATION) { storageType = SHARD_STORAGE_TABLE; } else if (relationType == RELKIND_FOREIGN_TABLE) { bool cstoreTable = CStoreTable(relationId); if (cstoreTable) { storageType = SHARD_STORAGE_COLUMNAR; } else { storageType = SHARD_STORAGE_FOREIGN; } } values[0] = ObjectIdGetDatum(relationId); values[1] = storageType; values[2] = partitionEntry->partitionMethod; values[3] = partitionKey; values[4] = Int32GetDatum(ShardReplicationFactor); values[5] = Int64GetDatum(shardMaxSizeInBytes); values[6] = Int32GetDatum(ShardPlacementPolicy); metadataTuple = heap_form_tuple(metadataDescriptor, values, isNulls); metadataDatum = HeapTupleGetDatum(metadataTuple); PG_RETURN_DATUM(metadataDatum); }
/* * GetTableDDLEvents takes in a relationId, and returns the list of DDL commands * needed to reconstruct the relation. These DDL commands are all palloced; and * include the table's schema definition, optional column storage and statistics * definitions, and index and constraint defitions. */ List * GetTableDDLEvents(Oid relationId) { List *tableDDLEventList = NIL; char tableType = 0; List *sequenceIdlist = getOwnedSequences(relationId); ListCell *sequenceIdCell; char *tableSchemaDef = NULL; char *tableColumnOptionsDef = NULL; char *schemaName = NULL; Oid schemaId = InvalidOid; Relation pgIndex = NULL; SysScanDesc scanDescriptor = NULL; ScanKeyData scanKey[1]; int scanKeyCount = 1; HeapTuple heapTuple = NULL; /* * Set search_path to NIL so that all objects outside of pg_catalog will be * schema-prefixed. pg_catalog will be added automatically when we call * PushOverrideSearchPath(), since we set addCatalog to true; */ OverrideSearchPath *overridePath = GetOverrideSearchPath(CurrentMemoryContext); overridePath->schemas = NIL; overridePath->addCatalog = true; PushOverrideSearchPath(overridePath); /* if foreign table, fetch extension and server definitions */ tableType = get_rel_relkind(relationId); if (tableType == RELKIND_FOREIGN_TABLE) { char *extensionDef = pg_get_extensiondef_string(relationId); char *serverDef = pg_get_serverdef_string(relationId); if (extensionDef != NULL) { tableDDLEventList = lappend(tableDDLEventList, extensionDef); } tableDDLEventList = lappend(tableDDLEventList, serverDef); } /* create schema if the table is not in the default namespace (public) */ schemaId = get_rel_namespace(relationId); schemaName = get_namespace_name(schemaId); if (strncmp(schemaName, "public", NAMEDATALEN) != 0) { StringInfo schemaNameDef = makeStringInfo(); appendStringInfo(schemaNameDef, CREATE_SCHEMA_COMMAND, schemaName); tableDDLEventList = lappend(tableDDLEventList, schemaNameDef->data); } /* create sequences if needed */ foreach(sequenceIdCell, sequenceIdlist) { Oid sequenceRelid = lfirst_oid(sequenceIdCell); char *sequenceDef = pg_get_sequencedef_string(sequenceRelid); tableDDLEventList = lappend(tableDDLEventList, sequenceDef); }
/* * sepgsql_relation_setattr * * It checks privileges to set attribute of the supplied relation */ void sepgsql_relation_setattr(Oid relOid) { Relation rel; ScanKeyData skey; SysScanDesc sscan; HeapTuple oldtup; HeapTuple newtup; Form_pg_class oldform; Form_pg_class newform; ObjectAddress object; char *audit_name; uint16_t tclass; switch (get_rel_relkind(relOid)) { case RELKIND_RELATION: tclass = SEPG_CLASS_DB_TABLE; break; case RELKIND_SEQUENCE: tclass = SEPG_CLASS_DB_SEQUENCE; break; case RELKIND_VIEW: tclass = SEPG_CLASS_DB_VIEW; break; case RELKIND_INDEX: /* deal with indexes specially */ sepgsql_index_modify(relOid); return; default: /* other relkinds don't need additional work */ return; } /* * Fetch newer catalog */ rel = heap_open(RelationRelationId, AccessShareLock); ScanKeyInit(&skey, ObjectIdAttributeNumber, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relOid)); sscan = systable_beginscan(rel, ClassOidIndexId, true, SnapshotSelf, 1, &skey); newtup = systable_getnext(sscan); if (!HeapTupleIsValid(newtup)) elog(ERROR, "catalog lookup failed for relation %u", relOid); newform = (Form_pg_class) GETSTRUCT(newtup); /* * Fetch older catalog */ oldtup = SearchSysCache1(RELOID, ObjectIdGetDatum(relOid)); if (!HeapTupleIsValid(oldtup)) elog(ERROR, "cache lookup failed for relation %u", relOid); oldform = (Form_pg_class) GETSTRUCT(oldtup); /* * Does this ALTER command takes operation to namespace? */ if (newform->relnamespace != oldform->relnamespace) { sepgsql_schema_remove_name(oldform->relnamespace); sepgsql_schema_add_name(newform->relnamespace); } if (strcmp(NameStr(newform->relname), NameStr(oldform->relname)) != 0) sepgsql_schema_rename(oldform->relnamespace); /* * XXX - In the future version, db_tuple:{use} of system catalog entry * shall be checked, if tablespace configuration is changed. */ /* * check db_xxx:{setattr} permission */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; audit_name = getObjectIdentity(&object); sepgsql_avc_check_perms(&object, tclass, SEPG_DB_TABLE__SETATTR, audit_name, true); pfree(audit_name); ReleaseSysCache(oldtup); systable_endscan(sscan); heap_close(rel, AccessShareLock); }
/* * check_relation_privileges * * It actually checks required permissions on a certain relation * and its columns. */ static bool check_relation_privileges(Oid relOid, Bitmapset *selected, Bitmapset *inserted, Bitmapset *updated, uint32 required, bool abort_on_violation) { ObjectAddress object; char *audit_name; Bitmapset *columns; int index; char relkind = get_rel_relkind(relOid); bool result = true; /* * Hardwired Policies: SE-PostgreSQL enforces - clients cannot modify * system catalogs using DMLs - clients cannot reference/modify toast * relations using DMLs */ if (sepgsql_getenforce() > 0) { Oid relnamespace = get_rel_namespace(relOid); if (IsSystemNamespace(relnamespace) && (required & (SEPG_DB_TABLE__UPDATE | SEPG_DB_TABLE__INSERT | SEPG_DB_TABLE__DELETE)) != 0) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("SELinux: hardwired security policy violation"))); if (relkind == RELKIND_TOASTVALUE) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("SELinux: hardwired security policy violation"))); } /* * Check permissions on the relation */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; audit_name = getObjectIdentity(&object); switch (relkind) { case RELKIND_RELATION: case RELKIND_PARTITIONED_TABLE: result = sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_TABLE, required, audit_name, abort_on_violation); break; case RELKIND_SEQUENCE: Assert((required & ~SEPG_DB_TABLE__SELECT) == 0); if (required & SEPG_DB_TABLE__SELECT) result = sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_SEQUENCE, SEPG_DB_SEQUENCE__GET_VALUE, audit_name, abort_on_violation); break; case RELKIND_VIEW: result = sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_VIEW, SEPG_DB_VIEW__EXPAND, audit_name, abort_on_violation); break; default: /* nothing to be checked */ break; } pfree(audit_name); /* * Only columns owned by relations shall be checked */ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) return true; /* * Check permissions on the columns */ selected = fixup_whole_row_references(relOid, selected); inserted = fixup_whole_row_references(relOid, inserted); updated = fixup_whole_row_references(relOid, updated); columns = bms_union(selected, bms_union(inserted, updated)); while ((index = bms_first_member(columns)) >= 0) { AttrNumber attnum; uint32 column_perms = 0; if (bms_is_member(index, selected)) column_perms |= SEPG_DB_COLUMN__SELECT; if (bms_is_member(index, inserted)) { if (required & SEPG_DB_TABLE__INSERT) column_perms |= SEPG_DB_COLUMN__INSERT; } if (bms_is_member(index, updated)) { if (required & SEPG_DB_TABLE__UPDATE) column_perms |= SEPG_DB_COLUMN__UPDATE; } if (column_perms == 0) continue; /* obtain column's permission */ attnum = index + FirstLowInvalidHeapAttributeNumber; object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; audit_name = getObjectDescription(&object); result = sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_COLUMN, column_perms, audit_name, abort_on_violation); pfree(audit_name); if (!result) return result; } return true; }
/* * sepgsql_attribute_post_create * * This routine assigns a default security label on a newly defined * column, using ALTER TABLE ... ADD COLUMN. * Note that this routine is not invoked in the case of CREATE TABLE, * although it also defines columns in addition to table. */ void sepgsql_attribute_post_create(Oid relOid, AttrNumber attnum) { Relation rel; ScanKeyData skey[2]; SysScanDesc sscan; HeapTuple tuple; char *scontext; char *tcontext; char *ncontext; ObjectAddress object; Form_pg_attribute attForm; StringInfoData audit_name; char relkind = get_rel_relkind(relOid); /* * Only attributes within regular relations or partition relations have * individual security labels. */ if (relkind != RELKIND_RELATION && relkind != RELKIND_PARTITIONED_TABLE) return; /* * Compute a default security label of the new column underlying the * specified relation, and check permission to create it. */ rel = heap_open(AttributeRelationId, AccessShareLock); ScanKeyInit(&skey[0], Anum_pg_attribute_attrelid, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(relOid)); ScanKeyInit(&skey[1], Anum_pg_attribute_attnum, BTEqualStrategyNumber, F_INT2EQ, Int16GetDatum(attnum)); sscan = systable_beginscan(rel, AttributeRelidNumIndexId, true, SnapshotSelf, 2, &skey[0]); tuple = systable_getnext(sscan); if (!HeapTupleIsValid(tuple)) elog(ERROR, "could not find tuple for column %d of relation %u", attnum, relOid); attForm = (Form_pg_attribute) GETSTRUCT(tuple); scontext = sepgsql_get_client_label(); tcontext = sepgsql_get_label(RelationRelationId, relOid, 0); ncontext = sepgsql_compute_create(scontext, tcontext, SEPG_CLASS_DB_COLUMN, NameStr(attForm->attname)); /* * check db_column:{create} permission */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; initStringInfo(&audit_name); appendStringInfo(&audit_name, "%s.%s", getObjectIdentity(&object), quote_identifier(NameStr(attForm->attname))); sepgsql_avc_check_perms_label(ncontext, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__CREATE, audit_name.data, true); /* * Assign the default security label on a new procedure */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attnum; SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, ncontext); systable_endscan(sscan); heap_close(rel, AccessShareLock); pfree(tcontext); pfree(ncontext); }
/* * master_create_distributed_table inserts the table and partition column * information into the partition metadata table. Note that this function * currently assumes the table is hash partitioned. */ Datum master_create_distributed_table(PG_FUNCTION_ARGS) { text *tableNameText = PG_GETARG_TEXT_P(0); text *partitionColumnText = PG_GETARG_TEXT_P(1); char partitionMethod = PG_GETARG_CHAR(2); Oid distributedTableId = ResolveRelationId(tableNameText); char relationKind = '\0'; char *partitionColumnName = text_to_cstring(partitionColumnText); char *tableName = text_to_cstring(tableNameText); Var *partitionColumn = NULL; /* verify target relation is either regular or foreign table */ relationKind = get_rel_relkind(distributedTableId); if (relationKind != RELKIND_RELATION && relationKind != RELKIND_FOREIGN_TABLE) { ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot distribute relation: %s", tableName), errdetail("Distributed relations must be regular or " "foreign tables."))); } /* this will error out if no column exists with the specified name */ partitionColumn = ColumnNameToColumn(distributedTableId, partitionColumnName); /* check for support function needed by specified partition method */ if (partitionMethod == HASH_PARTITION_TYPE) { Oid hashSupportFunction = SupportFunctionForColumn(partitionColumn, HASH_AM_OID, HASHPROC); if (hashSupportFunction == InvalidOid) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("could not identify a hash function for type %s", format_type_be(partitionColumn->vartype)), errdatatype(partitionColumn->vartype), errdetail("Partition column types must have a hash function " "defined to use hash partitioning."))); } } else if (partitionMethod == RANGE_PARTITION_TYPE) { Oid btreeSupportFunction = InvalidOid; /* * Error out immediately since we don't yet support range partitioning, * but the checks below are ready for when we do. * * TODO: Remove when range partitioning is supported. */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("pg_shard only supports hash partitioning"))); btreeSupportFunction = SupportFunctionForColumn(partitionColumn, BTREE_AM_OID, BTORDER_PROC); if (btreeSupportFunction == InvalidOid) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("could not identify a comparison function for type %s", format_type_be(partitionColumn->vartype)), errdatatype(partitionColumn->vartype), errdetail("Partition column types must have a comparison function " "defined to use range partitioning."))); } } /* insert row into the partition metadata table */ InsertPartitionRow(distributedTableId, partitionMethod, partitionColumnText); PG_RETURN_VOID(); }
/* * sepgsql_relation_drop * * It checks privileges to drop the supplied relation. */ void sepgsql_relation_drop(Oid relOid) { ObjectAddress object; char *audit_name; uint16_t tclass = 0; char relkind; relkind = get_rel_relkind(relOid); if (relkind == RELKIND_RELATION) tclass = SEPG_CLASS_DB_TABLE; else if (relkind == RELKIND_SEQUENCE) tclass = SEPG_CLASS_DB_SEQUENCE; else if (relkind == RELKIND_VIEW) tclass = SEPG_CLASS_DB_VIEW; else return; /* * check db_schema:{remove_name} permission */ object.classId = NamespaceRelationId; object.objectId = get_rel_namespace(relOid); object.objectSubId = 0; audit_name = getObjectDescription(&object); sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_SCHEMA, SEPG_DB_SCHEMA__REMOVE_NAME, audit_name, true); pfree(audit_name); /* * check db_table/sequence/view:{drop} permission */ object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = 0; audit_name = getObjectDescription(&object); sepgsql_avc_check_perms(&object, tclass, SEPG_DB_TABLE__DROP, audit_name, true); pfree(audit_name); /* * check db_column:{drop} permission */ if (relkind == RELKIND_RELATION) { Form_pg_attribute attForm; CatCList *attrList; HeapTuple atttup; int i; attrList = SearchSysCacheList1(ATTNUM, ObjectIdGetDatum(relOid)); for (i = 0; i < attrList->n_members; i++) { atttup = &attrList->members[i]->tuple; attForm = (Form_pg_attribute) GETSTRUCT(atttup); if (attForm->attisdropped) continue; object.classId = RelationRelationId; object.objectId = relOid; object.objectSubId = attForm->attnum; audit_name = getObjectDescription(&object); sepgsql_avc_check_perms(&object, SEPG_CLASS_DB_COLUMN, SEPG_DB_COLUMN__DROP, audit_name, true); pfree(audit_name); } ReleaseCatCacheList(attrList); } }
/* * master_create_empty_shard creates an empty shard for the given distributed * table. For this, the function first gets a list of candidate nodes, connects * to these nodes, and issues DDL commands on the nodes to create empty shard * placements. The function then updates metadata on the master node to make * this shard (and its placements) visible. */ Datum master_create_empty_shard(PG_FUNCTION_ARGS) { text *relationNameText = PG_GETARG_TEXT_P(0); char *relationName = text_to_cstring(relationNameText); List *workerNodeList = WorkerNodeList(); Datum shardIdDatum = 0; int64 shardId = INVALID_SHARD_ID; List *ddlEventList = NULL; uint32 attemptableNodeCount = 0; uint32 liveNodeCount = 0; uint32 candidateNodeIndex = 0; List *candidateNodeList = NIL; text *nullMinValue = NULL; text *nullMaxValue = NULL; char partitionMethod = 0; char storageType = SHARD_STORAGE_TABLE; Oid relationId = ResolveRelationId(relationNameText); char relationKind = get_rel_relkind(relationId); char *relationOwner = TableOwner(relationId); EnsureTablePermissions(relationId, ACL_INSERT); CheckDistributedTable(relationId); /* * We check whether the table is a foreign table or not. If it is, we set * storage type as foreign also. Only exception is if foreign table is a * foreign cstore table, in this case we set storage type as columnar. * * i.e. While setting storage type, columnar has priority over foreign. */ if (relationKind == RELKIND_FOREIGN_TABLE) { bool cstoreTable = cstoreTable = CStoreTable(relationId); if (cstoreTable) { storageType = SHARD_STORAGE_COLUMNAR; } else { storageType = SHARD_STORAGE_FOREIGN; } } partitionMethod = PartitionMethod(relationId); if (partitionMethod == DISTRIBUTE_BY_HASH) { ereport(ERROR, (errmsg("relation \"%s\" is a hash partitioned table", relationName), errdetail("We currently don't support creating shards " "on hash-partitioned tables"))); } /* generate new and unique shardId from sequence */ shardIdDatum = master_get_new_shardid(NULL); shardId = DatumGetInt64(shardIdDatum); /* get table DDL commands to replay on the worker node */ ddlEventList = GetTableDDLEvents(relationId); /* if enough live nodes, add an extra candidate node as backup */ attemptableNodeCount = ShardReplicationFactor; liveNodeCount = WorkerGetLiveNodeCount(); if (liveNodeCount > ShardReplicationFactor) { attemptableNodeCount = ShardReplicationFactor + 1; } /* first retrieve a list of random nodes for shard placements */ while (candidateNodeIndex < attemptableNodeCount) { WorkerNode *candidateNode = NULL; if (ShardPlacementPolicy == SHARD_PLACEMENT_LOCAL_NODE_FIRST) { candidateNode = WorkerGetLocalFirstCandidateNode(candidateNodeList); } else if (ShardPlacementPolicy == SHARD_PLACEMENT_ROUND_ROBIN) { candidateNode = WorkerGetRoundRobinCandidateNode(workerNodeList, shardId, candidateNodeIndex); } else if (ShardPlacementPolicy == SHARD_PLACEMENT_RANDOM) { candidateNode = WorkerGetRandomCandidateNode(candidateNodeList); } else { ereport(ERROR, (errmsg("unrecognized shard placement policy"))); } if (candidateNode == NULL) { ereport(ERROR, (errmsg("could only find %u of %u possible nodes", candidateNodeIndex, attemptableNodeCount))); } candidateNodeList = lappend(candidateNodeList, candidateNode); candidateNodeIndex++; } CreateShardPlacements(relationId, shardId, ddlEventList, relationOwner, candidateNodeList, 0, ShardReplicationFactor); InsertShardRow(relationId, shardId, storageType, nullMinValue, nullMaxValue); PG_RETURN_INT64(shardId); }
/* LocalTableSize returns the size on disk of the given table. */ static uint64 LocalTableSize(Oid relationId) { uint64 tableSize = 0; char relationType = 0; Datum relationIdDatum = ObjectIdGetDatum(relationId); relationType = get_rel_relkind(relationId); if (relationType == RELKIND_RELATION) { Datum tableSizeDatum = DirectFunctionCall1(pg_table_size, relationIdDatum); tableSize = DatumGetInt64(tableSizeDatum); } else if (relationType == RELKIND_FOREIGN_TABLE) { bool cstoreTable = CStoreTable(relationId); if (cstoreTable) { /* extract schema name of cstore */ Oid cstoreId = get_extension_oid(CSTORE_FDW_NAME, false); Oid cstoreSchemaOid = get_extension_schema(cstoreId); const char *cstoreSchemaName = get_namespace_name(cstoreSchemaOid); const int tableSizeArgumentCount = 1; Oid tableSizeFunctionOid = FunctionOid(cstoreSchemaName, CSTORE_TABLE_SIZE_FUNCTION_NAME, tableSizeArgumentCount); Datum tableSizeDatum = OidFunctionCall1(tableSizeFunctionOid, relationIdDatum); tableSize = DatumGetInt64(tableSizeDatum); } else { char *relationName = get_rel_name(relationId); struct stat fileStat; int statOK = 0; StringInfo localFilePath = makeStringInfo(); appendStringInfo(localFilePath, FOREIGN_CACHED_FILE_PATH, relationName); /* extract the file size using stat, analogous to pg_stat_file */ statOK = stat(localFilePath->data, &fileStat); if (statOK < 0) { ereport(ERROR, (errcode_for_file_access(), errmsg("could not stat file \"%s\": %m", localFilePath->data))); } tableSize = (uint64) fileStat.st_size; } } else { char *relationName = get_rel_name(relationId); ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot get size for table \"%s\"", relationName), errdetail("Only regular and foreign tables are supported."))); } return tableSize; }
/* * exec_object_restorecon * * This routine is a helper called by sepgsql_restorecon; it set up * initial security labels of database objects within the supplied * catalog OID. */ static void exec_object_restorecon(struct selabel_handle *sehnd, Oid catalogId) { Relation rel; SysScanDesc sscan; HeapTuple tuple; char *database_name = get_database_name(MyDatabaseId); char *namespace_name; Oid namespace_id; char *relation_name; /* * Open the target catalog. We don't want to allow writable * accesses by other session during initial labeling. */ rel = heap_open(catalogId, AccessShareLock); sscan = systable_beginscan(rel, InvalidOid, false, SnapshotNow, 0, NULL); while (HeapTupleIsValid(tuple = systable_getnext(sscan))) { Form_pg_namespace nspForm; Form_pg_class relForm; Form_pg_attribute attForm; Form_pg_proc proForm; char *objname; int objtype = 1234; ObjectAddress object; security_context_t context; /* * The way to determine object name depends on object classes. * So, any branches set up `objtype', `objname' and `object' here. */ switch (catalogId) { case NamespaceRelationId: nspForm = (Form_pg_namespace) GETSTRUCT(tuple); objtype = SELABEL_DB_SCHEMA; objname = quote_object_name(database_name, NameStr(nspForm->nspname), NULL, NULL); object.classId = NamespaceRelationId; object.objectId = HeapTupleGetOid(tuple); object.objectSubId = 0; break; case RelationRelationId: relForm = (Form_pg_class) GETSTRUCT(tuple); if (relForm->relkind == RELKIND_RELATION) objtype = SELABEL_DB_TABLE; else if (relForm->relkind == RELKIND_SEQUENCE) objtype = SELABEL_DB_SEQUENCE; else if (relForm->relkind == RELKIND_VIEW) objtype = SELABEL_DB_VIEW; else continue; /* no need to assign security label */ namespace_name = get_namespace_name(relForm->relnamespace); objname = quote_object_name(database_name, namespace_name, NameStr(relForm->relname), NULL); pfree(namespace_name); object.classId = RelationRelationId; object.objectId = HeapTupleGetOid(tuple); object.objectSubId = 0; break; case AttributeRelationId: attForm = (Form_pg_attribute) GETSTRUCT(tuple); if (get_rel_relkind(attForm->attrelid) != RELKIND_RELATION) continue; /* no need to assign security label */ objtype = SELABEL_DB_COLUMN; namespace_id = get_rel_namespace(attForm->attrelid); namespace_name = get_namespace_name(namespace_id); relation_name = get_rel_name(attForm->attrelid); objname = quote_object_name(database_name, namespace_name, relation_name, NameStr(attForm->attname)); pfree(namespace_name); pfree(relation_name); object.classId = RelationRelationId; object.objectId = attForm->attrelid; object.objectSubId = attForm->attnum; break; case ProcedureRelationId: proForm = (Form_pg_proc) GETSTRUCT(tuple); objtype = SELABEL_DB_PROCEDURE; namespace_name = get_namespace_name(proForm->pronamespace); objname = quote_object_name(database_name, namespace_name, NameStr(proForm->proname), NULL); pfree(namespace_name); object.classId = ProcedureRelationId; object.objectId = HeapTupleGetOid(tuple); object.objectSubId = 0; break; default: elog(ERROR, "unexpected catalog id: %u", catalogId); objname = NULL; /* for compiler quiet */ break; } if (selabel_lookup_raw(sehnd, &context, objname, objtype) == 0) { PG_TRY(); { /* * Check SELinux permission to relabel the fetched object, * then do the actual relabeling. */ sepgsql_object_relabel(&object, context); SetSecurityLabel(&object, SEPGSQL_LABEL_TAG, context); } PG_CATCH(); { freecon(context); PG_RE_THROW(); } PG_END_TRY(); freecon(context); } else if (errno == ENOENT) ereport(WARNING, (errmsg("SELinux: no initial label assigned for %s (type=%d), skipping", objname, objtype))); else ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("SELinux: could not determine initial security label for %s (type=%d): %m", objname, objtype))); pfree(objname); } systable_endscan(sscan); heap_close(rel, NoLock); }
/* * master_create_worker_shards creates empty shards for the given table based * on the specified number of initial shards. The function first gets a list of * candidate nodes and issues DDL commands on the nodes to create empty shard * placements on those nodes. The function then updates metadata on the master * node to make this shard (and its placements) visible. Note that the function * assumes the table is hash partitioned and calculates the min/max hash token * ranges for each shard, giving them an equal split of the hash space. */ Datum master_create_worker_shards(PG_FUNCTION_ARGS) { text *tableNameText = PG_GETARG_TEXT_P(0); int32 shardCount = PG_GETARG_INT32(1); int32 replicationFactor = PG_GETARG_INT32(2); Oid distributedTableId = ResolveRelationId(tableNameText); char relationKind = get_rel_relkind(distributedTableId); char *tableName = text_to_cstring(tableNameText); char shardStorageType = '\0'; int32 shardIndex = 0; List *workerNodeList = NIL; List *ddlCommandList = NIL; int32 workerNodeCount = 0; uint32 placementAttemptCount = 0; uint32 hashTokenIncrement = 0; List *existingShardList = NIL; /* make sure table is hash partitioned */ CheckHashPartitionedTable(distributedTableId); /* validate that shards haven't already been created for this table */ existingShardList = LoadShardIntervalList(distributedTableId); if (existingShardList != NIL) { ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("table \"%s\" has already had shards created for it", tableName))); } /* make sure that at least one shard is specified */ if (shardCount <= 0) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("shardCount must be positive"))); } /* make sure that at least one replica is specified */ if (replicationFactor <= 0) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("replicationFactor must be positive"))); } /* calculate the split of the hash space */ hashTokenIncrement = UINT_MAX / shardCount; /* load and sort the worker node list for deterministic placement */ workerNodeList = ParseWorkerNodeFile(WORKER_LIST_FILENAME); workerNodeList = SortList(workerNodeList, CompareWorkerNodes); /* make sure we don't process cancel signals until all shards are created */ HOLD_INTERRUPTS(); /* retrieve the DDL commands for the table */ ddlCommandList = TableDDLCommandList(distributedTableId); workerNodeCount = list_length(workerNodeList); if (replicationFactor > workerNodeCount) { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("replicationFactor (%d) exceeds number of worker nodes " "(%d)", replicationFactor, workerNodeCount), errhint("Add more worker nodes or try again with a lower " "replication factor."))); } /* if we have enough nodes, add an extra placement attempt for backup */ placementAttemptCount = (uint32) replicationFactor; if (workerNodeCount > replicationFactor) { placementAttemptCount++; } /* set shard storage type according to relation type */ if (relationKind == RELKIND_FOREIGN_TABLE) { shardStorageType = SHARD_STORAGE_FOREIGN; } else { shardStorageType = SHARD_STORAGE_TABLE; } for (shardIndex = 0; shardIndex < shardCount; shardIndex++) { uint64 shardId = NextSequenceId(SHARD_ID_SEQUENCE_NAME); int32 placementCount = 0; uint32 placementIndex = 0; uint32 roundRobinNodeIndex = shardIndex % workerNodeCount; List *extendedDDLCommands = ExtendedDDLCommandList(distributedTableId, shardId, ddlCommandList); /* initialize the hash token space for this shard */ text *minHashTokenText = NULL; text *maxHashTokenText = NULL; int32 shardMinHashToken = INT_MIN + (shardIndex * hashTokenIncrement); int32 shardMaxHashToken = shardMinHashToken + hashTokenIncrement - 1; /* if we are at the last shard, make sure the max token value is INT_MAX */ if (shardIndex == (shardCount - 1)) { shardMaxHashToken = INT_MAX; } for (placementIndex = 0; placementIndex < placementAttemptCount; placementIndex++) { int32 candidateNodeIndex = (roundRobinNodeIndex + placementIndex) % workerNodeCount; WorkerNode *candidateNode = (WorkerNode *) list_nth(workerNodeList, candidateNodeIndex); char *nodeName = candidateNode->nodeName; uint32 nodePort = candidateNode->nodePort; bool created = ExecuteRemoteCommandList(nodeName, nodePort, extendedDDLCommands); if (created) { uint64 shardPlacementId = 0; ShardState shardState = STATE_FINALIZED; shardPlacementId = NextSequenceId(SHARD_PLACEMENT_ID_SEQUENCE_NAME); InsertShardPlacementRow(shardPlacementId, shardId, shardState, nodeName, nodePort); placementCount++; } else { ereport(WARNING, (errmsg("could not create shard on \"%s:%u\"", nodeName, nodePort))); } if (placementCount >= replicationFactor) { break; } } /* check if we created enough shard replicas */ if (placementCount < replicationFactor) { ereport(ERROR, (errmsg("could not satisfy specified replication factor"), errdetail("Created %d shard replicas, less than the " "requested replication factor of %d.", placementCount, replicationFactor))); } /* insert the shard metadata row along with its min/max values */ minHashTokenText = IntegerToText(shardMinHashToken); maxHashTokenText = IntegerToText(shardMaxHashToken); InsertShardRow(distributedTableId, shardId, shardStorageType, minHashTokenText, maxHashTokenText); } if (QueryCancelPending) { ereport(WARNING, (errmsg("cancel requests are ignored during shard creation"))); QueryCancelPending = false; } RESUME_INTERRUPTS(); PG_RETURN_VOID(); }