/* * Calculate total on-disk size of all indexes attached to the given table. * * Can be applied safely to an index, but you'll just get zero. */ static int64 calculate_indexes_size(Oid relOid) { int64 size = 0; Relation rel; rel = relation_open(relOid, AccessShareLock); /* * Aggregate all indexes on the given relation */ if (rel->rd_rel->relhasindex) { List *index_oids = RelationGetIndexList(rel); ListCell *cell; foreach(cell, index_oids) { Oid idxOid = lfirst_oid(cell); Relation idxRel; ForkNumber forkNum; idxRel = relation_open(idxOid, AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(idxRel->rd_node), idxRel->rd_backend, forkNum); relation_close(idxRel, AccessShareLock); }
Datum pg_relation_size_oid(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); Relation rel; int64 size = 0; if (GP_ROLE_EXECUTE == Gp_role) { ereport(ERROR, (errcode(ERRCODE_GP_COMMAND_ERROR), errmsg("pg_relation_size: cannot be executed in segment"))); } rel = try_relation_open(relOid, AccessShareLock, false); /* * While we scan pg_class with an MVCC snapshot, * someone else might drop the table. It's better to return NULL for * already-dropped tables than throw an error and abort the whole query. */ if (!RelationIsValid(rel)) PG_RETURN_NULL(); if (relOid == 0 || rel->rd_node.relNode == 0) size = 0; else size = calculate_relation_size(rel); relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); }
/* * Calculate total on-disk size of a given table, * including FSM and VM, plus TOAST table if any. * Indexes other than the TOAST table's index are not included. * * Note that this also behaves sanely if applied to an index or toast table; * those won't have attached toast tables, but they can have multiple forks. */ static int64 calculate_table_size(Oid relOid) { int64 size = 0; Relation rel; ForkNumber forkNum; rel = relation_open(relOid, AccessShareLock); /* * heap size, including FSM and VM */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(rel->rd_node), rel->rd_backend, forkNum); /* * Size of toast relation */ if (OidIsValid(rel->rd_rel->reltoastrelid)) size += calculate_toast_table_size(rel->rd_rel->reltoastrelid); relation_close(rel, AccessShareLock); return size; }
Datum currval_oid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int64 result; SeqTable elm; Relation seqrel; /* open and AccessShareLock sequence */ init_sequence(relid, &elm, &seqrel); if (pg_class_aclcheck(elm->relid, GetUserId(), ACL_SELECT | ACL_USAGE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); if (!elm->last_valid) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("currval of sequence \"%s\" is not yet defined in this session", RelationGetRelationName(seqrel)))); result = elm->last; relation_close(seqrel, NoLock); PG_RETURN_INT64(result); }
Datum pg_relation_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); Relation rel; int64 size; rel = try_relation_open(relOid, AccessShareLock); /* * Before 9.2, we used to throw an error if the relation didn't exist, but * that makes queries like "SELECT pg_relation_size(oid) FROM pg_class" * less robust, because while we scan pg_class with an MVCC snapshot, * someone else might drop the table. It's better to return NULL for * already-dropped tables than throw an error and abort the whole query. */ if (rel == NULL) PG_RETURN_NULL(); size = calculate_relation_size(&(rel->rd_node), rel->rd_backend, forkname_to_number(text_to_cstring(forkName))); relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); }
Datum lastval(PG_FUNCTION_ARGS) { Relation seqrel; int64 result; if (last_used_seq == NULL) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("lastval is not yet defined in this session"))); /* Someone may have dropped the sequence since the last nextval() */ if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(last_used_seq->relid))) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("lastval is not yet defined in this session"))); seqrel = open_share_lock(last_used_seq); /* nextval() must have already been called for this sequence */ Assert(last_used_seq->last_valid); if (pg_class_aclcheck(last_used_seq->relid, GetUserId(), ACL_SELECT | ACL_USAGE) != ACLCHECK_OK) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied for sequence %s", RelationGetRelationName(seqrel)))); result = last_used_seq->last; relation_close(seqrel, NoLock); PG_RETURN_INT64(result); }
/* * Visibility map information for a single block of a relation. */ Datum pg_visibility_map(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int64 blkno = PG_GETARG_INT64(1); int32 mapbits; Relation rel; Buffer vmbuffer = InvalidBuffer; TupleDesc tupdesc; Datum values[2]; bool nulls[2]; rel = relation_open(relid, AccessShareLock); if (blkno < 0 || blkno > MaxBlockNumber) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid block number"))); tupdesc = pg_visibility_tupdesc(false, false); MemSet(nulls, 0, sizeof(nulls)); mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0); values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0); relation_close(rel, AccessShareLock); PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); }
/* -------------------------------------------------------- * pg_relpages() * * Get the number of pages of the table/index. * * Usage: SELECT pg_relpages('t1'); * SELECT pg_relpages('t1_pkey'); * * Must keep superuser() check, see above. * -------------------------------------------------------- */ Datum pg_relpages(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); int64 relpages; Relation rel; RangeVar *relrv; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); /* only some relkinds have storage */ check_relation_relkind(rel); /* note: this will work OK on non-local temp tables */ relpages = RelationGetNumberOfBlocks(rel); relation_close(rel, AccessShareLock); PG_RETURN_INT64(relpages); }
/* * Compute the on-disk size of files for the relation according to the * stat function, including heap data, index data, and toast data. */ static int64 calculate_total_relation_size(Oid Relid) { Relation heapRel; Oid toastOid; int64 size; ListCell *cell; heapRel = relation_open(Relid, AccessShareLock); toastOid = heapRel->rd_rel->reltoastrelid; /* Get the heap size */ size = calculate_relation_size(&(heapRel->rd_node)); /* Include any dependent indexes */ if (heapRel->rd_rel->relhasindex) { List *index_oids = RelationGetIndexList(heapRel); foreach(cell, index_oids) { Oid idxOid = lfirst_oid(cell); Relation iRel; iRel = relation_open(idxOid, AccessShareLock); size += calculate_relation_size(&(iRel->rd_node)); relation_close(iRel, AccessShareLock); }
/* Must keep superuser() check, see above. */ Datum pg_relpagesbyid(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int64 relpages; Relation rel; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); rel = relation_open(relid, AccessShareLock); /* only some relkinds have storage */ check_relation_relkind(rel); /* note: this will work OK on non-local temp tables */ relpages = RelationGetNumberOfBlocks(rel); relation_close(rel, AccessShareLock); PG_RETURN_INT64(relpages); }
/* Returns the relation object for the index that we're going to use as key for a * particular table. (Indexes are relations too!) Returns null if the table is unkeyed. * The return value is opened with a shared lock; call relation_close() when finished. */ Relation table_key_index(Relation rel) { char replident = rel->rd_rel->relreplident; Oid repl_ident_oid; List *indexes; ListCell *index_oid; if (replident == REPLICA_IDENTITY_NOTHING) { return NULL; } if (replident == REPLICA_IDENTITY_INDEX) { repl_ident_oid = RelationGetReplicaIndex(rel); if (repl_ident_oid != InvalidOid) { return relation_open(repl_ident_oid, AccessShareLock); } } // There doesn't seem to be a convenient way of getting the primary key index for // a table, so we have to iterate over all the table's indexes. indexes = RelationGetIndexList(rel); foreach(index_oid, indexes) { Relation index_rel = relation_open(lfirst_oid(index_oid), AccessShareLock); Form_pg_index index = index_rel->rd_index; if (IndexIsValid(index) && IndexIsReady(index) && index->indisprimary) { list_free(indexes); return index_rel; } relation_close(index_rel, AccessShareLock); }
/* * Remove the visibility map fork for a relation. If there turn out to be * any bugs in the visibility map code that require rebuilding the VM, this * provides users with a way to do it that is cleaner than shutting down the * server and removing files by hand. * * This is a cut-down version of RelationTruncate. */ Datum pg_truncate_visibility_map(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; rel = relation_open(relid, AccessExclusiveLock); if (rel->rd_rel->relkind != RELKIND_RELATION && rel->rd_rel->relkind != RELKIND_MATVIEW && rel->rd_rel->relkind != RELKIND_TOASTVALUE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a table, materialized view, or TOAST table", RelationGetRelationName(rel)))); RelationOpenSmgr(rel); rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber; visibilitymap_truncate(rel, 0); if (RelationNeedsWAL(rel)) { xl_smgr_truncate xlrec; xlrec.blkno = 0; xlrec.rnode = rel->rd_node; xlrec.flags = SMGR_TRUNCATE_VM; XLogBeginInsert(); XLogRegisterData((char *) &xlrec, sizeof(xlrec)); XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE); } /* * Release the lock right away, not at commit time. * * It would be a problem to release the lock prior to commit if this * truncate operation sends any transactional invalidation messages. Other * backends would potentially be able to lock the relation without * processing them in the window of time between when we release the lock * here and when we sent the messages at our eventual commit. However, * we're currently only sending a non-transactional smgr invalidation, * which will have been posted to shared memory immediately from within * visibilitymap_truncate. Therefore, there should be no race here. * * The reason why it's desirable to release the lock early here is because * of the possibility that someone will need to use this to blow away many * visibility map forks at once. If we can't release the lock until * commit time, the transaction doing this will accumulate * AccessExclusiveLocks on all of those relations at the same time, which * is undesirable. However, if this turns out to be unsafe we may have no * choice... */ relation_close(rel, AccessExclusiveLock); /* Nothing to return. */ PG_RETURN_VOID(); }
/* * Calculate total on-disk size of all indexes attached to the given table. * * Can be applied safely to an index, but you'll just get zero. */ static int64 calculate_indexes_size(oid_t relOid) { int64 size = 0; struct relation * rel; rel = relation_open(relOid, ACCESS_SHR_LOCK); /* * Aggregate all indexes on the given relation */ if (rel->rd_rel->relhasindex) { struct list *index_oids = rel_get_index_list(rel); struct list_cell *cell; foreach(cell, index_oids) { oid_t idxOid = lfirst_oid(cell); struct relation * idxRel; enum fork fnr; idxRel = relation_open(idxOid, ACCESS_SHR_LOCK); for (fnr = 0; fnr <= MAX_FORK_NR; fnr++) size += calculate_relation_size(&(idxRel->rd_node), idxRel->rd_backend, fnr); relation_close(idxRel, ACCESS_SHR_LOCK); } list_free(index_oids); }
/* * Calculate total on-disk size of a TOAST relation, including its indexes. * Must not be applied to non-TOAST relations. */ static int64 calculate_toast_table_size(Oid toastrelid) { int64 size = 0; Relation toastRel; ForkNumber forkNum; ListCell *lc; List *indexlist; toastRel = relation_open(toastrelid, AccessShareLock); /* toast heap size, including FSM and VM size */ for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(toastRel->rd_node), toastRel->rd_backend, forkNum); /* toast index size, including FSM and VM size */ indexlist = RelationGetIndexList(toastRel); /* Size is calculated using all the indexes available */ foreach(lc, indexlist) { Relation toastIdxRel; toastIdxRel = relation_open(lfirst_oid(lc), AccessShareLock); for (forkNum = 0; forkNum <= MAX_FORKNUM; forkNum++) size += calculate_relation_size(&(toastIdxRel->rd_node), toastIdxRel->rd_backend, forkNum); relation_close(toastIdxRel, AccessShareLock); }
/* * Collect visibility data about a relation. */ static vbits * collect_visibility_data(Oid relid, bool include_pd) { Relation rel; BlockNumber nblocks; vbits *info; BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); rel = relation_open(relid, AccessShareLock); nblocks = RelationGetNumberOfBlocks(rel); info = palloc0(offsetof(vbits, bits) + nblocks); info->next = 0; info->count = nblocks; for (blkno = 0; blkno < nblocks; ++blkno) { int32 mapbits; /* Make sure we are interruptible. */ CHECK_FOR_INTERRUPTS(); /* Get map info. */ mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0) info->bits[blkno] |= (1 << 0); if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0) info->bits[blkno] |= (1 << 1); /* * Page-level data requires reading every block, so only get it if * the caller needs it. Use a buffer access strategy, too, to prevent * cache-trashing. */ if (include_pd) { Buffer buffer; Page page; buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer, NULL, NULL, BGP_NO_SNAPSHOT_TEST); if (PageIsAllVisible(page)) info->bits[blkno] |= (1 << 2); UnlockReleaseBuffer(buffer); } } /* Clean up. */ if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); relation_close(rel, AccessShareLock); return info; }
/* * create_distributed_table gets a table name, distribution column, * distribution method and colocate_with option, then it creates a * distributed table. */ Datum create_distributed_table(PG_FUNCTION_ARGS) { Oid relationId = InvalidOid; text *distributionColumnText = NULL; Oid distributionMethodOid = InvalidOid; text *colocateWithTableNameText = NULL; Relation relation = NULL; char *distributionColumnName = NULL; Var *distributionColumn = NULL; char distributionMethod = 0; char *colocateWithTableName = NULL; bool viaDeprecatedAPI = false; CheckCitusVersion(ERROR); EnsureCoordinator(); relationId = PG_GETARG_OID(0); distributionColumnText = PG_GETARG_TEXT_P(1); distributionMethodOid = PG_GETARG_OID(2); colocateWithTableNameText = PG_GETARG_TEXT_P(3); /* * Lock target relation with an exclusive lock - there's no way to make * sense of this table until we've committed, and we don't want multiple * backends manipulating this relation. */ relation = try_relation_open(relationId, ExclusiveLock); if (relation == NULL) { ereport(ERROR, (errmsg("could not create distributed table: " "relation does not exist"))); } /* * We should do this check here since the codes in the following lines rely * on this relation to have a supported relation kind. More extensive checks * will be performed in CreateDistributedTable. */ EnsureRelationKindSupported(relationId); distributionColumnName = text_to_cstring(distributionColumnText); distributionColumn = BuildDistributionKeyFromColumnName(relation, distributionColumnName); distributionMethod = LookupDistributionMethod(distributionMethodOid); colocateWithTableName = text_to_cstring(colocateWithTableNameText); CreateDistributedTable(relationId, distributionColumn, distributionMethod, colocateWithTableName, viaDeprecatedAPI); relation_close(relation, NoLock); PG_RETURN_VOID(); }
/** * Given the oid of a relation, this method calculates reltuples, relpages. This only looks up * local information (on master or segments). It produces meaningful values for AO and * heap tables and returns [0.0,0.0] for all other relations. * Input: * relationoid * Output: * array of two values [reltuples,relpages] */ Datum gp_statistics_estimate_reltuples_relpages_oid(PG_FUNCTION_ARGS) { float4 relpages = 0.0; float4 reltuples = 0.0; Oid relOid = PG_GETARG_OID(0); Datum values[2]; ArrayType *result; Relation rel = try_relation_open(relOid, AccessShareLock, false); if (rel != NULL) { if (rel->rd_rel->relkind == RELKIND_RELATION) { if (RelationIsHeap(rel)) { gp_statistics_estimate_reltuples_relpages_heap(rel, &reltuples, &relpages); } else if (RelationIsAoRows(rel)) { gp_statistics_estimate_reltuples_relpages_ao_rows(rel, &reltuples, &relpages); } else if (RelationIsAoCols(rel)) { gp_statistics_estimate_reltuples_relpages_ao_cs(rel, &reltuples, &relpages); } } else if (rel->rd_rel->relkind == RELKIND_INDEX) { reltuples = 1.0; relpages = RelationGetNumberOfBlocks(rel); } else { /** * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1. */ } relation_close(rel, AccessShareLock); } else { /** * Should we silently return [0.0,0.0] or error out? Currently, we choose option 1. */ } values[0] = Float4GetDatum(reltuples); values[1] = Float4GetDatum(relpages); result = construct_array(values, 2, FLOAT4OID, sizeof(float4), true, 'i'); PG_RETURN_ARRAYTYPE_P(result); }
/* * CreateReferenceTable creates a distributed table with the given relationId. The * created table has one shard and replication factor is set to the active worker * count. In fact, the above is the definition of a reference table in Citus. */ Datum create_reference_table(PG_FUNCTION_ARGS) { Oid relationId = PG_GETARG_OID(0); Relation relation = NULL; char *colocateWithTableName = NULL; List *workerNodeList = NIL; int workerCount = 0; Var *distributionColumn = NULL; bool viaDeprecatedAPI = false; EnsureCoordinator(); CheckCitusVersion(ERROR); /* * Ensure schema exists on each worker node. We can not run this function * transactionally, since we may create shards over separate sessions and * shard creation depends on the schema being present and visible from all * sessions. */ EnsureSchemaExistsOnAllNodes(relationId); /* * Lock target relation with an exclusive lock - there's no way to make * sense of this table until we've committed, and we don't want multiple * backends manipulating this relation. */ relation = relation_open(relationId, ExclusiveLock); /* * We should do this check here since the codes in the following lines rely * on this relation to have a supported relation kind. More extensive checks * will be performed in CreateDistributedTable. */ EnsureRelationKindSupported(relationId); workerNodeList = ActivePrimaryNodeList(); workerCount = list_length(workerNodeList); /* if there are no workers, error out */ if (workerCount == 0) { char *relationName = get_rel_name(relationId); ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("cannot create reference table \"%s\"", relationName), errdetail("There are no active worker nodes."))); } CreateDistributedTable(relationId, distributionColumn, DISTRIBUTE_BY_NONE, colocateWithTableName, viaDeprecatedAPI); relation_close(relation, NoLock); PG_RETURN_VOID(); }
Datum pg_relation_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); Relation rel; int64 size = 0; /** * This function is peculiar in that it does its own dispatching. * It does not work on entry db since we do not support dispatching * from entry-db currently. */ if (Gp_role == GP_ROLE_EXECUTE && Gp_segment == -1) elog(ERROR, "This query is not currently supported by GPDB."); rel = try_relation_open(relOid, AccessShareLock, false); /* * While we scan pg_class with an MVCC snapshot, * someone else might drop the table. It's better to return NULL for * already-dropped tables than throw an error and abort the whole query. */ if (!RelationIsValid(rel)) PG_RETURN_NULL(); if (relOid == 0 || rel->rd_node.relNode == 0) size = 0; else size = calculate_relation_size(rel, forkname_to_number(text_to_cstring(forkName))); if (Gp_role == GP_ROLE_DISPATCH) { StringInfoData buffer; char *schemaName; char *relName; schemaName = get_namespace_name(get_rel_namespace(relOid)); if (schemaName == NULL) elog(ERROR, "Cannot find schema for oid %d", relOid); relName = get_rel_name(relOid); if (relName == NULL) elog(ERROR, "Cannot find relation for oid %d", relOid); initStringInfo(&buffer); appendStringInfo(&buffer, "select sum(pg_relation_size('%s.%s'))::int8 from gp_dist_random('gp_id');", quote_identifier(schemaName), quote_identifier(relName)); size += get_size_from_segDBs(buffer.data); } relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); }
/* * SQL-callable function to scan through an index and summarize all ranges * that are not currently summarized. */ Datum brin_summarize_new_values(PG_FUNCTION_ARGS) { Oid indexoid = PG_GETARG_OID(0); Relation indexRel; Relation heapRel; double numSummarized = 0; heapRel = heap_open(IndexGetRelation(indexoid, false), ShareUpdateExclusiveLock); indexRel = index_open(indexoid, ShareUpdateExclusiveLock); brinsummarize(indexRel, heapRel, &numSummarized, NULL); relation_close(indexRel, ShareUpdateExclusiveLock); relation_close(heapRel, ShareUpdateExclusiveLock); PG_RETURN_INT32((int32) numSummarized); }
Datum check_SPI_gettype(PG_FUNCTION_ARGS) { int fnumber = PG_GETARG_INT32(0); Relation rel = relation_open(RelationRelationId, AccessShareLock); char *name = SPI_gettype(RelationGetDescr(rel), fnumber); relation_close(rel, AccessShareLock); PG_RETURN_TEXT_P(cstring_to_text(name)); }
Datum get_raw_page(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); Relation rel; RangeVar *relrv; bytea *raw_page; char *raw_page_data; Buffer buf; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); /* Check that this relation has storage */ if (rel->rd_rel->relkind == RELKIND_VIEW) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from view \"%s\"", RelationGetRelationName(rel)))); if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from composite type \"%s\"", RelationGetRelationName(rel)))); if (blkno >= RelationGetNumberOfBlocks(rel)) elog(ERROR, "block number %u is out of range for relation \"%s\"", blkno, RelationGetRelationName(rel)); /* Initialize buffer to copy to */ raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ); SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); raw_page_data = VARDATA(raw_page); /* Take a verbatim copy of the page */ buf = ReadBuffer(rel, blkno); LockBuffer(buf, BUFFER_LOCK_SHARE); memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ); LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); relation_close(rel, AccessShareLock); PG_RETURN_BYTEA_P(raw_page); }
/* * Reset a sequence to its initial value. * * The change is made transactionally, so that on failure of the current * transaction, the sequence will be restored to its previous state. * We do that by creating a whole new___ relfilenode for the sequence; so this * works much like the rewriting forms of ALTER TABLE. * * Caller is assumed to have acquired AccessExclusiveLock on the sequence, * which must not be released until end of transaction. Caller is also * responsible for permissions checking. */ void ResetSequence(Oid seq_relid) { Relation seq_rel; SeqTable elm; Form_pg_sequence seq; Buffer buf; HeapTupleData seqtuple; HeapTuple tuple; /* * Read the old sequence. This does a bit more work than really * necessary, but it's simple, and we do want to double-check that it's * indeed a sequence. */ init_sequence(seq_relid, &elm, &seq_rel); (void) read_seq_tuple(elm, seq_rel, &buf, &seqtuple); /* * Copy the existing sequence tuple. */ tuple = heap_copytuple(&seqtuple); /* Now we're done with the old page */ UnlockReleaseBuffer(buf); /* * Modify the copied tuple to execute the restart (compare the RESTART * action in AlterSequence) */ seq = (Form_pg_sequence) GETSTRUCT(tuple); seq->last_value = seq->start_value; seq->is_called = false; seq->log_cnt = 0; /* * Create a new___ storage file for the sequence. We want to keep the * sequence's relfrozenxid at 0, since it won't contain any unfrozen XIDs. * Same with relminmxid, since a sequence will never contain multixacts. */ RelationSetNewRelfilenode(seq_rel, seq_rel->rd_rel->relpersistence, InvalidTransactionId, InvalidMultiXactId); /* * Insert the modified tuple into the new___ storage file. */ fill_seq_with_data(seq_rel, tuple); /* Clear local cache so that we don't think we have cached numbers */ /* Note that we do not change the currval() state */ elm->cached = elm->last; relation_close(seq_rel, NoLock); }
/* * Visibility map information for a single block of a relation, plus the * page-level information for the same block. */ Datum pg_visibility(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); int64 blkno = PG_GETARG_INT64(1); int32 mapbits; Relation rel; Buffer vmbuffer = InvalidBuffer; Buffer buffer; Page page; TupleDesc tupdesc; Datum values[3]; bool nulls[3]; rel = relation_open(relid, AccessShareLock); /* Only some relkinds have a visibility map */ check_relation_relkind(rel); if (blkno < 0 || blkno > MaxBlockNumber) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid block number"))); tupdesc = pg_visibility_tupdesc(false, true); MemSet(nulls, 0, sizeof(nulls)); mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0); values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0); /* Here we have to explicitly check rel size ... */ if (blkno < RelationGetNumberOfBlocks(rel)) { buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); values[2] = BoolGetDatum(PageIsAllVisible(page)); UnlockReleaseBuffer(buffer); } else { /* As with the vismap, silently return 0 for pages past EOF */ values[2] = BoolGetDatum(false); } relation_close(rel, AccessShareLock); PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); }
Datum lock_test2(PG_FUNCTION_ARGS) { Oid table_oid; text *lock_type; float8 sleep_time; const char *lock_type_str; LOCKMODE lockmode; Relation heapRelation; table_oid = PG_GETARG_OID(0); lock_type = PG_GETARG_TEXT_P(1); sleep_time = PG_GETARG_FLOAT8(2); lock_type_str = text_to_cstring(lock_type); if (pg_strcasecmp(lock_type_str, "NOLOCK") == 0) lockmode = NoLock; else if (pg_strcasecmp(lock_type_str, "ACCESSSHARELOCK") == 0) lockmode = AccessShareLock; else if (pg_strcasecmp(lock_type_str, "ROWSHARELOCK") == 0) lockmode = RowShareLock; else if (pg_strcasecmp(lock_type_str, "ROWEXCLUSIVELOCK") == 0) lockmode = RowExclusiveLock; else if (pg_strcasecmp(lock_type_str, "SHAREUPDATEEXCLUSIVELOCK") == 0) lockmode = ShareUpdateExclusiveLock; else if (pg_strcasecmp(lock_type_str, "SHARELOCK") == 0) lockmode = ShareLock; else if (pg_strcasecmp(lock_type_str, "SHAREEXCLUSIVELOCK") == 0) lockmode = ShareRowExclusiveLock; else if (pg_strcasecmp(lock_type_str, "EXCLUSIVELOCK") == 0) lockmode = ExclusiveLock; else if (pg_strcasecmp(lock_type_str, "ACCESSEXCLUSIVELOCK") == 0) lockmode = AccessExclusiveLock; else ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("\"lock_mode\" is set to invalid string: %s", lock_type_str), errhint("\"lock_mode\" must select one among NoLock, AccessShareLock, RowShareLock, RowExclusiveLock, ShareUpdateExclusiveLock, ShareLock, ShareExclusiveLock, ExclusiveLock, and AccessExclusiveLock"))); elog(NOTICE, "enter lock %d as %s", table_oid, lock_type_str); heapRelation = relation_open(table_oid, lockmode); elog(NOTICE, "succeed locking %d as %s", table_oid, lock_type_str); pg_usleep(sleep_time * 1000000L); relation_close(heapRelation, lockmode); elog(NOTICE, "exit lock %d as %s", table_oid, lock_type_str); PG_RETURN_VOID(); }
/* * Count the number of all-visible and all-frozen pages in the visibility * map for a particular relation. */ Datum pg_visibility_map_summary(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; BlockNumber nblocks; BlockNumber blkno; Buffer vmbuffer = InvalidBuffer; int64 all_visible = 0; int64 all_frozen = 0; TupleDesc tupdesc; Datum values[2]; bool nulls[2]; rel = relation_open(relid, AccessShareLock); /* Only some relkinds have a visibility map */ check_relation_relkind(rel); nblocks = RelationGetNumberOfBlocks(rel); for (blkno = 0; blkno < nblocks; ++blkno) { int32 mapbits; /* Make sure we are interruptible. */ CHECK_FOR_INTERRUPTS(); /* Get map info. */ mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0) ++all_visible; if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0) ++all_frozen; } /* Clean up. */ if (vmbuffer != InvalidBuffer) ReleaseBuffer(vmbuffer); relation_close(rel, AccessShareLock); tupdesc = CreateTemplateTupleDesc(2, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "all_visible", INT8OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "all_frozen", INT8OID, -1, 0); tupdesc = BlessTupleDesc(tupdesc); MemSet(nulls, 0, sizeof(nulls)); values[0] = Int64GetDatum(all_visible); values[1] = Int64GetDatum(all_frozen); PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); }
Datum pg_relation_size(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); text *forkName = PG_GETARG_TEXT_P(1); ForkNumber forkNumber; Relation rel; int64 size = 0; /** * This function is peculiar in that it does its own dispatching. * It does not work on entry db since we do not support dispatching * from entry-db currently. */ if (Gp_role == GP_ROLE_EXECUTE && IS_QUERY_DISPATCHER()) elog(ERROR, "This query is not currently supported by GPDB."); rel = try_relation_open(relOid, AccessShareLock, false); /* * Before 9.2, we used to throw an error if the relation didn't exist, but * that makes queries like "SELECT pg_relation_size(oid) FROM pg_class" * less robust, because while we scan pg_class with an MVCC snapshot, * someone else might drop the table. It's better to return NULL for * already-dropped tables than throw an error and abort the whole query. */ if (rel == NULL) PG_RETURN_NULL(); forkNumber = forkname_to_number(text_to_cstring(forkName)); if (relOid == 0 || rel->rd_node.relNode == 0) size = 0; else size = calculate_relation_size(rel, forkNumber); if (Gp_role == GP_ROLE_DISPATCH) { char *sql; sql = psprintf("select pg_catalog.pg_relation_size(%u, '%s')", relOid, forkNames[forkNumber]); size += get_size_from_segDBs(sql); } relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); }
/* * IsErrorTable * * Returns true if relid is used as an error table, which has dependent object * that is an external table. Though it's not great we didn't have a clear * definition of Error Table, it satisfies the current requirements. */ bool IsErrorTable(Relation rel) { cqContext *pcqCtx, *pcqCtxExt, ctxExt; HeapTuple tup; Relation extrel; bool result = false; /* fast path to quick check */ if (!RelationIsHeap(rel)) return false; /* * Otherwise, go deeper and play with pg_depend... */ pcqCtx = caql_beginscan(NULL, cql("SELECT * FROM pg_depend " " WHERE refclassid = :1 " " AND refobjid = :2 " " AND refobjsubid = :3 ", ObjectIdGetDatum(RelationRelationId), ObjectIdGetDatum(RelationGetRelid(rel)), Int32GetDatum(0))); extrel = relation_open(ExtTableRelationId, AccessShareLock); pcqCtxExt = caql_addrel(cqclr(&ctxExt), extrel); while (HeapTupleIsValid(tup = caql_getnext(pcqCtx))) { Form_pg_depend dep = (Form_pg_depend) GETSTRUCT(tup); Oid fmterrtbl; fmterrtbl = caql_getoid(pcqCtxExt, cql("SELECT fmterrtbl FROM pg_exttable " " WHERE reloid = :1", ObjectIdGetDatum(dep->objid))); if (RelationGetRelid(rel) == fmterrtbl) { result = true; break; } } relation_close(extrel, AccessShareLock); caql_endscan(pcqCtx); return result; }
Datum pg_relation_size_oid(PG_FUNCTION_ARGS) { Oid relOid = PG_GETARG_OID(0); Relation rel; int64 size; rel = relation_open(relOid, AccessShareLock); size = calculate_relation_size(&(rel->rd_node)); relation_close(rel, AccessShareLock); PG_RETURN_INT64(size); }
/* * findPartitionKeyType * Find the type oid and typeMod for the given partition key. */ static void findPartitionKeyType(Oid parentOid, int keyAttNo, Oid *typeOid, int32 *typeMod) { Relation rel = relation_open(parentOid, NoLock); TupleDesc tupDesc = RelationGetDescr(rel); Assert(tupDesc->natts >= keyAttNo); *typeOid = tupDesc->attrs[keyAttNo - 1]->atttypid; *typeMod = tupDesc->attrs[keyAttNo - 1]->atttypmod; relation_close(rel, NoLock); }