/* * pgstat_relation */ static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo) { const char *err; /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); switch (rel->rd_rel->relkind) { case RELKIND_RELATION: case RELKIND_TOASTVALUE: case RELKIND_UNCATALOGED: case RELKIND_SEQUENCE: return pgstat_heap(rel, fcinfo); case RELKIND_INDEX: switch (rel->rd_rel->relam) { case BTREE_AM_OID: return pgstat_index(rel, BTREE_METAPAGE + 1, pgstat_btree_page, fcinfo); case HASH_AM_OID: return pgstat_index(rel, HASH_METAPAGE + 1, pgstat_hash_page, fcinfo); case GIST_AM_OID: return pgstat_index(rel, GIST_ROOT_BLKNO + 1, pgstat_gist_page, fcinfo); case GIN_AM_OID: err = "gin index"; break; default: err = "unknown index"; break; } break; case RELKIND_VIEW: err = "view"; break; case RELKIND_COMPOSITE_TYPE: err = "composite type"; break; default: err = "unknown"; break; } ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("\"%s\" (%s) is not supported", RelationGetRelationName(rel), err))); return 0; /* should not happen */ }
/*--------------------------------------------------------------------------- * This cluster code allows for clustering multiple tables at once. Because * of this, we cannot just run everything on a single transaction, or we * would be forced to acquire exclusive locks on all the tables being * clustered, simultaneously --- very likely leading to deadlock. * * To solve this we follow a similar strategy to VACUUM code, * clustering each relation in a separate transaction. For this to work, * we need to: * - provide a separate memory context so that we can pass information in * a way that survives across transactions * - start a new transaction every time a new relation is clustered * - check for validity of the information on to-be-clustered relations, * as someone might have deleted a relation behind our back, or * clustered one on a different index * - end the transaction * * The single-relation case does not have any such overhead. * * We also allow a relation to be specified without index. In that case, * the indisclustered bit will be looked up, and an ERROR will be thrown * if there is no index with the bit set. *--------------------------------------------------------------------------- */ void cluster(ClusterStmt *stmt, bool isTopLevel) { if (stmt->relation != NULL) { /* This is the single-relation case. */ Oid tableOid, indexOid = InvalidOid; Relation rel; /* Find and lock the table */ rel = heap_openrv(stmt->relation, AccessExclusiveLock); tableOid = RelationGetRelid(rel); /* Check permissions */ if (!pg_class_ownercheck(tableOid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, RelationGetRelationName(rel)); /* * Reject clustering a remote temp table ... their local buffer * manager is not going to cope. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster temporary tables of other sessions"))); if (stmt->indexname == NULL) { ListCell *index; /* We need to find the index that has indisclustered set. */ foreach(index, RelationGetIndexList(rel)) { HeapTuple idxtuple; Form_pg_index indexForm; indexOid = lfirst_oid(index); idxtuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid)); if (!HeapTupleIsValid(idxtuple)) elog(ERROR, "cache lookup failed for index %u", indexOid); indexForm = (Form_pg_index) GETSTRUCT(idxtuple); if (indexForm->indisclustered) { ReleaseSysCache(idxtuple); break; } ReleaseSysCache(idxtuple); indexOid = InvalidOid; }
/*--------------------------------------------------------------------------- * This cluster code allows for clustering multiple tables at once. Because * of this, we cannot just run everything on a single transaction, or we * would be forced to acquire exclusive locks on all the tables being * clustered, simultaneously --- very likely leading to deadlock. * * To solve this we follow a similar strategy to VACUUM code, * clustering each relation in a separate transaction. For this to work, * we need to: * - provide a separate memory context so that we can pass information in * a way that survives across transactions * - start a new transaction every time a new relation is clustered * - check for validity of the information on to-be-clustered relations, * as someone might have deleted a relation behind our back, or * clustered one on a different index * - end the transaction * * The single-relation case does not have any such overhead. * * We also allow a relation to be specified without index. In that case, * the indisclustered bit will be looked up, and an ERROR will be thrown * if there is no index with the bit set. *--------------------------------------------------------------------------- */ void cluster(ClusterStmt *stmt, bool isTopLevel) { if (stmt->relation != NULL) { /* This is the single-relation case. */ Oid tableOid, indexOid = InvalidOid; Relation rel; /* Find, lock, and check permissions on the table */ tableOid = RangeVarGetRelidExtended(stmt->relation, AccessExclusiveLock, false, false, RangeVarCallbackOwnsTable, NULL); rel = heap_open(tableOid, NoLock); /* * Reject clustering a remote temp table ... their local buffer * manager is not going to cope. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot cluster temporary tables of other sessions"))); if (stmt->indexname == NULL) { ListCell *index; /* We need to find the index that has indisclustered set. */ foreach(index, RelationGetIndexList(rel)) { HeapTuple idxtuple; Form_pg_index indexForm; indexOid = lfirst_oid(index); idxtuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid)); if (!HeapTupleIsValid(idxtuple)) elog(ERROR, "cache lookup failed for index %u", indexOid); indexForm = (Form_pg_index) GETSTRUCT(idxtuple); if (indexForm->indisclustered) { ReleaseSysCache(idxtuple); break; } ReleaseSysCache(idxtuple); indexOid = InvalidOid; }
/* * SQL-callable function to clean the insert pending list */ Datum gin_clean_pending_list(PG_FUNCTION_ARGS) { Oid indexoid = PG_GETARG_OID(0); Relation indexRel = index_open(indexoid, AccessShareLock); IndexBulkDeleteResult stats; GinState ginstate; if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("recovery is in progress"), errhint("GIN pending list cannot be cleaned up during recovery."))); /* Must be a GIN index */ if (indexRel->rd_rel->relkind != RELKIND_INDEX || indexRel->rd_rel->relam != GIN_AM_OID) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("\"%s\" is not a GIN index", RelationGetRelationName(indexRel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(indexRel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary indexes of other sessions"))); /* User must own the index (comparable to privileges needed for VACUUM) */ if (!pg_class_ownercheck(indexoid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS, RelationGetRelationName(indexRel)); memset(&stats, 0, sizeof(stats)); initGinState(&ginstate, indexRel); ginInsertCleanup(&ginstate, true, true, &stats); index_close(indexRel, AccessShareLock); PG_RETURN_INT64((int64) stats.pages_deleted); }
/* ------------------------------------------------------ * pgstathashindex() * * Usage: SELECT * FROM pgstathashindex('hashindex'); * ------------------------------------------------------ */ Datum pgstathashindex(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); BlockNumber nblocks; BlockNumber blkno; Relation rel; HashIndexStat stats; BufferAccessStrategy bstrategy; HeapTuple tuple; TupleDesc tupleDesc; Datum values[8]; bool nulls[8]; Buffer metabuf; HashMetaPage metap; float8 free_percent; uint64 total_space; rel = index_open(relid, AccessShareLock); /* index_open() checks that it's an index */ if (!IS_HASH(rel)) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("relation \"%s\" is not a HASH index", RelationGetRelationName(rel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary indexes of other sessions"))); /* Get the information we need from the metapage. */ memset(&stats, 0, sizeof(stats)); metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); stats.version = metap->hashm_version; stats.space_per_page = metap->hashm_bsize; _hash_relbuf(rel, metabuf); /* Get the current relation length */ nblocks = RelationGetNumberOfBlocks(rel); /* prepare access strategy for this index */ bstrategy = GetAccessStrategy(BAS_BULKREAD); /* Start from blkno 1 as 0th block is metapage */ for (blkno = 1; blkno < nblocks; blkno++) { Buffer buf; Page page; CHECK_FOR_INTERRUPTS(); buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buf, BUFFER_LOCK_SHARE); page = (Page) BufferGetPage(buf); if (PageIsNew(page)) stats.unused_pages++; else if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData))) ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("index \"%s\" contains corrupted page at block %u", RelationGetRelationName(rel), BufferGetBlockNumber(buf)))); else { HashPageOpaque opaque; int pagetype; opaque = (HashPageOpaque) PageGetSpecialPointer(page); pagetype = opaque->hasho_flag & LH_PAGE_TYPE; if (pagetype == LH_BUCKET_PAGE) { stats.bucket_pages++; GetHashPageStats(page, &stats); } else if (pagetype == LH_OVERFLOW_PAGE) { stats.overflow_pages++; GetHashPageStats(page, &stats); } else if (pagetype == LH_BITMAP_PAGE) stats.bitmap_pages++; else if (pagetype == LH_UNUSED_PAGE) stats.unused_pages++; else ereport(ERROR, (errcode(ERRCODE_INDEX_CORRUPTED), errmsg("unexpected page type 0x%04X in HASH index \"%s\" block %u", opaque->hasho_flag, RelationGetRelationName(rel), BufferGetBlockNumber(buf)))); } UnlockReleaseBuffer(buf); } /* Done accessing the index */ index_close(rel, AccessShareLock); /* Count unused pages as free space. */ stats.free_space += stats.unused_pages * stats.space_per_page; /* * Total space available for tuples excludes the metapage and the bitmap * pages. */ total_space = (nblocks - (stats.bitmap_pages + 1)) * stats.space_per_page; if (total_space == 0) free_percent = 0.0; else free_percent = 100.0 * stats.free_space / total_space; /* * Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); tupleDesc = BlessTupleDesc(tupleDesc); /* * Build and return the tuple */ MemSet(nulls, 0, sizeof(nulls)); values[0] = Int32GetDatum(stats.version); values[1] = Int64GetDatum((int64) stats.bucket_pages); values[2] = Int64GetDatum((int64) stats.overflow_pages); values[3] = Int64GetDatum((int64) stats.bitmap_pages); values[4] = Int64GetDatum((int64) stats.unused_pages); values[5] = Int64GetDatum(stats.live_items); values[6] = Int64GetDatum(stats.dead_items); values[7] = Float8GetDatum(free_percent); tuple = heap_form_tuple(tupleDesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); }
/* * get_raw_page * * Returns a copy of a page from shared buffers as a bytea, with hole * filled with zeros or simply without hole, with the length of the page * offset to be able to reconstitute the page entirely using the data * returned by this function. */ Datum get_raw_page(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); uint32 blkno = PG_GETARG_UINT32(1); bool with_hole = PG_GETARG_BOOL(2); bytea *raw_page; Relation rel; char raw_page_data[BLCKSZ]; Buffer buf; TupleDesc tupdesc; Datum result; Datum values[2]; bool nulls[2]; HeapTuple tuple; PageHeader page_header; int16 hole_offset, hole_length; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw functions")))); rel = relation_open(relid, AccessShareLock); /* Check that this relation has storage */ if (rel->rd_rel->relkind == RELKIND_VIEW) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from view \"%s\"", RelationGetRelationName(rel)))); if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from composite type \"%s\"", RelationGetRelationName(rel)))); if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from foreign table \"%s\"", RelationGetRelationName(rel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno >= RelationGetNumberOfBlocksInFork(rel, MAIN_FORKNUM)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("block number %u is out of range for relation \"%s\"", blkno, RelationGetRelationName(rel)))); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); /* Take a copy of the page to work on */ buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, NULL); LockBuffer(buf, BUFFER_LOCK_SHARE); memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ); LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); relation_close(rel, AccessShareLock); page_header = (PageHeader) raw_page_data; hole_length = page_header->pd_upper - page_header->pd_lower; hole_offset = page_header->pd_lower; /* * If hole is wanted in the page returned, fill it with zeros. * If not, copy to the return buffer the page without the hole. */ if (with_hole) { raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ); SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); memcpy(VARDATA(raw_page), raw_page_data, BLCKSZ); MemSet(raw_page_data + hole_offset, 0, hole_length); } else { raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ - hole_length); SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ - hole_length); memcpy(VARDATA(raw_page), raw_page_data, hole_offset); memcpy(VARDATA(raw_page) + hole_offset, raw_page_data + hole_offset + hole_length, BLCKSZ - (hole_offset + hole_length)); } /* Build and return the tuple. */ values[0] = PointerGetDatum(raw_page); if (with_hole) values[1] = UInt16GetDatum(0); else values[1] = UInt16GetDatum(hole_offset); memset(nulls, 0, sizeof(nulls)); tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); }
Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo) { Relation rel; output_type stat = {0}; TupleDesc tupdesc; bool nulls[NUM_OUTPUT_COLUMNS]; Datum values[NUM_OUTPUT_COLUMNS]; HeapTuple ret; int i = 0; if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); if (tupdesc->natts != NUM_OUTPUT_COLUMNS) elog(ERROR, "incorrect number of output arguments"); rel = relation_open(relid, AccessShareLock); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); /* * We support only ordinary relations and materialised views, because we * depend on the visibility map and free space map for our estimates about * unscanned pages. */ if (!(rel->rd_rel->relkind == RELKIND_RELATION || rel->rd_rel->relkind == RELKIND_MATVIEW)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("\"%s\" is not a table or materialized view", RelationGetRelationName(rel)))); statapprox_heap(rel, &stat); relation_close(rel, AccessShareLock); memset(nulls, 0, sizeof(nulls)); values[i++] = Int64GetDatum(stat.table_len); values[i++] = Float8GetDatum(stat.scanned_percent); values[i++] = Int64GetDatum(stat.tuple_count); values[i++] = Int64GetDatum(stat.tuple_len); values[i++] = Float8GetDatum(stat.tuple_percent); values[i++] = Int64GetDatum(stat.dead_tuple_count); values[i++] = Int64GetDatum(stat.dead_tuple_len); values[i++] = Float8GetDatum(stat.dead_tuple_percent); values[i++] = Int64GetDatum(stat.free_space); values[i++] = Float8GetDatum(stat.free_percent); ret = heap_form_tuple(tupdesc, values, nulls); return HeapTupleGetDatum(ret); }
/* ------------------------------------------------ * bt_metap() * * Get a btree's meta-page information * * Usage: SELECT * FROM bt_metap('t1_pkey') * ------------------------------------------------ */ Datum bt_metap(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); Datum result; Relation rel; RangeVar *relrv; BTMetaPageData *metad; TupleDesc tupleDesc; int j; char *values[6]; Buffer buffer; Page page; HeapTuple tuple; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pageinspect functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); buffer = ReadBuffer(rel, 0); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); metad = BTPageGetMeta(page); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); j = 0; values[j++] = psprintf("%d", metad->btm_magic); values[j++] = psprintf("%d", metad->btm_version); values[j++] = psprintf("%d", metad->btm_root); values[j++] = psprintf("%d", metad->btm_level); values[j++] = psprintf("%d", metad->btm_fastroot); values[j++] = psprintf("%d", metad->btm_fastlevel); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = HeapTupleGetDatum(tuple); UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); PG_RETURN_DATUM(result); }
/*------------------------------------------------------- * bt_page_items() * * Get IndexTupleData set in a btree page * * Usage: SELECT * FROM bt_page_items('t1_pkey', 1); *------------------------------------------------------- */ Datum bt_page_items(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); uint32 blkno = PG_GETARG_UINT32(1); Datum result; FuncCallContext *fctx; MemoryContext mctx; struct user_args *uargs; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pageinspect functions")))); if (SRF_IS_FIRSTCALL()) { RangeVar *relrv; Relation rel; Buffer buffer; BTPageOpaque opaque; TupleDesc tupleDesc; fctx = SRF_FIRSTCALL_INIT(); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the * owning session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno == 0) elog(ERROR, "block 0 is a meta page"); CHECK_RELATION_BLOCK_RANGE(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); /* * We copy the page into local storage to avoid holding pin on the * buffer longer than we must, and possibly failing to release it at * all if the calling query doesn't fetch all rows. */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); uargs = palloc(sizeof(struct user_args)); uargs->page = palloc(BLCKSZ); memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ); UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); uargs->offset = FirstOffsetNumber; opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); if (P_ISDELETED(opaque)) elog(NOTICE, "page is deleted"); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc); fctx->user_fctx = uargs; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); uargs = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { result = bt_page_print_tuples(fctx, uargs->page, uargs->offset); uargs->offset++; SRF_RETURN_NEXT(fctx, result); } else { pfree(uargs->page); pfree(uargs); SRF_RETURN_DONE(fctx); } }
Datum bt_page_items(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); Datum result; char *values[6]; HeapTuple tuple; FuncCallContext *fctx; MemoryContext mctx; struct user_args *uargs; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pageinspect functions")))); if (SRF_IS_FIRSTCALL()) { RangeVar *relrv; Relation rel; Buffer buffer; BTPageOpaque opaque; TupleDesc tupleDesc; fctx = SRF_FIRSTCALL_INIT(); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the * owning session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno == 0) elog(ERROR, "block 0 is a meta page"); CHECK_RELATION_BLOCK_RANGE(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); /* * We copy the page into local storage to avoid holding pin on the * buffer longer than we must, and possibly failing to release it at * all if the calling query doesn't fetch all rows. */ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); uargs = palloc(sizeof(struct user_args)); uargs->page = palloc(BLCKSZ); memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ); UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); uargs->offset = FirstOffsetNumber; opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); if (P_ISDELETED(opaque)) elog(NOTICE, "page is deleted"); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc); fctx->user_fctx = uargs; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); uargs = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { ItemId id; IndexTuple itup; int j; int off; int dlen; char *dump; char *ptr; id = PageGetItemId(uargs->page, uargs->offset); if (!ItemIdIsValid(id)) elog(ERROR, "invalid ItemId"); itup = (IndexTuple) PageGetItem(uargs->page, id); j = 0; values[j++] = psprintf("%d", uargs->offset); values[j++] = psprintf("(%u,%u)", BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)), itup->t_tid.ip_posid); values[j++] = psprintf("%d", (int) IndexTupleSize(itup)); values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f'); values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); dump = palloc0(dlen * 3 + 1); values[j] = dump; for (off = 0; off < dlen; off++) { if (off > 0) *dump++ = ' '; sprintf(dump, "%02x", *(ptr + off) & 0xff); dump += 2; } tuple = BuildTupleFromCStrings(fctx->attinmeta, values); result = HeapTupleGetDatum(tuple); uargs->offset = uargs->offset + 1; SRF_RETURN_NEXT(fctx, result); } else { pfree(uargs->page); pfree(uargs); SRF_RETURN_DONE(fctx); } }
/* ------------------------------------------------ * hash_bitmap_info() * * Get bitmap information for a particular overflow page * * Usage: SELECT * FROM hash_bitmap_info('con_hash_index'::regclass, 5); * ------------------------------------------------ */ Datum hash_bitmap_info(PG_FUNCTION_ARGS) { Oid indexRelid = PG_GETARG_OID(0); uint64 ovflblkno = PG_GETARG_INT64(1); HashMetaPage metap; Buffer metabuf, mapbuf; BlockNumber bitmapblkno; Page mappage; bool bit = false; TupleDesc tupleDesc; Relation indexRel; uint32 ovflbitno; int32 bitmappage, bitmapbit; HeapTuple tuple; int i, j; Datum values[3]; bool nulls[3]; uint32 *freep; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw page functions")))); indexRel = index_open(indexRelid, AccessShareLock); if (!IS_HASH(indexRel)) elog(ERROR, "relation \"%s\" is not a hash index", RelationGetRelationName(indexRel)); if (RELATION_IS_OTHER_TEMP(indexRel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (ovflblkno >= RelationGetNumberOfBlocks(indexRel)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("block number " UINT64_FORMAT " is out of range for relation \"%s\"", ovflblkno, RelationGetRelationName(indexRel)))); /* Read the metapage so we can determine which bitmap page to use */ metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); /* * Reject attempt to read the bit for a metapage or bitmap page; this is * only meaningful for overflow pages. */ if (ovflblkno == 0) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid overflow block number %u", (BlockNumber) ovflblkno))); for (i = 0; i < metap->hashm_nmaps; i++) if (metap->hashm_mapp[i] == ovflblkno) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid overflow block number %u", (BlockNumber) ovflblkno))); /* * Identify overflow bit number. This will error out for primary bucket * pages, and we've already rejected the metapage and bitmap pages above. */ ovflbitno = _hash_ovflblkno_to_bitno(metap, (BlockNumber) ovflblkno); bitmappage = ovflbitno >> BMPG_SHIFT(metap); bitmapbit = ovflbitno & BMPG_MASK(metap); if (bitmappage >= metap->hashm_nmaps) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid overflow block number %u", (BlockNumber) ovflblkno))); bitmapblkno = metap->hashm_mapp[bitmappage]; _hash_relbuf(indexRel, metabuf); /* Check the status of bitmap bit for overflow page */ mapbuf = _hash_getbuf(indexRel, bitmapblkno, HASH_READ, LH_BITMAP_PAGE); mappage = BufferGetPage(mapbuf); freep = HashPageGetBitmap(mappage); bit = ISSET(freep, bitmapbit) != 0; _hash_relbuf(indexRel, mapbuf); index_close(indexRel, AccessShareLock); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); tupleDesc = BlessTupleDesc(tupleDesc); MemSet(nulls, 0, sizeof(nulls)); j = 0; values[j++] = Int64GetDatum((int64) bitmapblkno); values[j++] = Int32GetDatum(bitmapbit); values[j++] = BoolGetDatum(bit); tuple = heap_form_tuple(tupleDesc, values, nulls); PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); }
/* * workhorse */ static bytea * get_raw_page_internal(text *relname, ForkNumber forknum, BlockNumber blkno) { bytea *raw_page; RangeVar *relrv; Relation rel; char *raw_page_data; Buffer buf; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); /* Check that this relation has storage */ if (rel->rd_rel->relkind == RELKIND_VIEW) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from view \"%s\"", RelationGetRelationName(rel)))); if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from composite type \"%s\"", RelationGetRelationName(rel)))); if (rel->rd_rel->relkind == RELKIND_FOREIGN_TABLE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from foreign table \"%s\"", RelationGetRelationName(rel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno >= RelationGetNumberOfBlocks(rel)) elog(ERROR, "block number %u is out of range for relation \"%s\"", blkno, RelationGetRelationName(rel)); /* Initialize buffer to copy to */ raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ); SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); raw_page_data = VARDATA(raw_page); /* Take a verbatim copy of the page */ buf = ReadBufferExtended(rel, forknum, blkno, RBM_NORMAL, NULL); LockBuffer(buf, BUFFER_LOCK_SHARE); memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ); LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); relation_close(rel, AccessShareLock); return raw_page; }
/* * expand_inherited_rtentry * Check whether a rangetable entry represents an inheritance set. * If so, add entries for all the child tables to the query's * rangetable, and build AppendRelInfo nodes for all the child tables * and add them to root->append_rel_list. If not, clear the entry's * "inh" flag to prevent later code from looking for AppendRelInfos. * * Note that the original RTE is considered to represent the whole * inheritance set. The first of the generated RTEs is an RTE for the same * table, but with inh = false, to represent the parent table in its role * as a simple member of the inheritance set. * * A childless table is never considered to be an inheritance set. For * regular inheritance, a parent RTE must always have at least two associated * AppendRelInfos: one corresponding to the parent table as a simple member of * inheritance set and one or more corresponding to the actual children. * Since a partitioned table is not scanned, it might have only one associated * AppendRelInfo. */ static void expand_inherited_rtentry(PlannerInfo *root, RangeTblEntry *rte, Index rti) { Oid parentOID; PlanRowMark *oldrc; Relation oldrelation; LOCKMODE lockmode; List *inhOIDs; ListCell *l; /* Does RT entry allow inheritance? */ if (!rte->inh) return; /* Ignore any already-expanded UNION ALL nodes */ if (rte->rtekind != RTE_RELATION) { Assert(rte->rtekind == RTE_SUBQUERY); return; } /* Fast path for common case of childless table */ parentOID = rte->relid; if (!has_subclass(parentOID)) { /* Clear flag before returning */ rte->inh = false; return; } /* * The rewriter should already have obtained an appropriate lock on each * relation named in the query. However, for each child relation we add * to the query, we must obtain an appropriate lock, because this will be * the first use of those relations in the parse/rewrite/plan pipeline. * Child rels should use the same lockmode as their parent. */ lockmode = rte->rellockmode; /* Scan for all members of inheritance set, acquire needed locks */ inhOIDs = find_all_inheritors(parentOID, lockmode, NULL); /* * Check that there's at least one descendant, else treat as no-child * case. This could happen despite above has_subclass() check, if table * once had a child but no longer does. */ if (list_length(inhOIDs) < 2) { /* Clear flag before returning */ rte->inh = false; return; } /* * If parent relation is selected FOR UPDATE/SHARE, we need to mark its * PlanRowMark as isParent = true, and generate a new PlanRowMark for each * child. */ oldrc = get_plan_rowmark(root->rowMarks, rti); if (oldrc) oldrc->isParent = true; /* * Must open the parent relation to examine its tupdesc. We need not lock * it; we assume the rewriter already did. */ oldrelation = heap_open(parentOID, NoLock); /* Scan the inheritance set and expand it */ if (RelationGetPartitionDesc(oldrelation) != NULL) { Assert(rte->relkind == RELKIND_PARTITIONED_TABLE); /* * If this table has partitions, recursively expand them in the order * in which they appear in the PartitionDesc. While at it, also * extract the partition key columns of all the partitioned tables. */ expand_partitioned_rtentry(root, rte, rti, oldrelation, oldrc, lockmode, &root->append_rel_list); } else { List *appinfos = NIL; RangeTblEntry *childrte; Index childRTindex; /* * This table has no partitions. Expand any plain inheritance * children in the order the OIDs were returned by * find_all_inheritors. */ foreach(l, inhOIDs) { Oid childOID = lfirst_oid(l); Relation newrelation; /* Open rel if needed; we already have required locks */ if (childOID != parentOID) newrelation = heap_open(childOID, NoLock); else newrelation = oldrelation; /* * It is possible that the parent table has children that are temp * tables of other backends. We cannot safely access such tables * (because of buffering issues), and the best thing to do seems * to be to silently ignore them. */ if (childOID != parentOID && RELATION_IS_OTHER_TEMP(newrelation)) { heap_close(newrelation, lockmode); continue; } expand_single_inheritance_child(root, rte, rti, oldrelation, oldrc, newrelation, &appinfos, &childrte, &childRTindex); /* Close child relations, but keep locks */ if (childOID != parentOID) heap_close(newrelation, NoLock); } /* * If all the children were temp tables, pretend it's a * non-inheritance situation; we don't need Append node in that case. * The duplicate RTE we added for the parent table is harmless, so we * don't bother to get rid of it; ditto for the useless PlanRowMark * node. */ if (list_length(appinfos) < 2) rte->inh = false; else root->append_rel_list = list_concat(root->append_rel_list, appinfos); }
/* ------------------------------------------------------ * pgstatindex() * * Usage: SELECT * FROM pgstatindex('t1_pkey'); * ------------------------------------------------------ */ Datum pgstatindex(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); Relation rel; RangeVar *relrv; Datum result; BlockNumber nblocks; BlockNumber blkno; BTIndexStat indexStat; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); /* * Read metapage */ { Buffer buffer = ReadBuffer(rel, 0); Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); indexStat.version = metad->btm_version; indexStat.level = metad->btm_level; indexStat.root_blkno = metad->btm_root; ReleaseBuffer(buffer); } /* -- init counters -- */ indexStat.root_pages = 0; indexStat.internal_pages = 0; indexStat.leaf_pages = 0; indexStat.empty_pages = 0; indexStat.deleted_pages = 0; indexStat.max_avail = 0; indexStat.free_space = 0; indexStat.fragments = 0; /* * Scan all blocks except the metapage */ nblocks = RelationGetNumberOfBlocks(rel); for (blkno = 1; blkno < nblocks; blkno++) { Buffer buffer; Page page; BTPageOpaque opaque; /* Read and lock buffer */ buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* Determine page type, and update totals */ if (P_ISLEAF(opaque)) { int max_avail; max_avail = BLCKSZ - (BLCKSZ - ((PageHeader) page)->pd_special + SizeOfPageHeaderData); indexStat.max_avail += max_avail; indexStat.free_space += PageGetFreeSpace(page); indexStat.leaf_pages++; /* * If the next leaf is on an earlier block, it means a * fragmentation. */ if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) indexStat.fragments++; } else if (P_ISDELETED(opaque)) indexStat.deleted_pages++; else if (P_IGNORE(opaque)) indexStat.empty_pages++; else if (P_ISROOT(opaque)) indexStat.root_pages++; else indexStat.internal_pages++; /* Unlock and release buffer */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } relation_close(rel, AccessShareLock); /*---------------------------- * Build a result tuple *---------------------------- */ { TupleDesc tupleDesc; int j; char *values[10]; HeapTuple tuple; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); j = 0; values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.version); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.level); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, (indexStat.root_pages + indexStat.leaf_pages + indexStat.internal_pages + indexStat.deleted_pages + indexStat.empty_pages) * BLCKSZ); values[j] = palloc(32); snprintf(values[j++], 32, "%u", indexStat.root_blkno); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.internal_pages); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.leaf_pages); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.empty_pages); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.deleted_pages); values[j] = palloc(32); if (indexStat.max_avail > 0) snprintf(values[j++], 32, "%.2f", 100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0); else snprintf(values[j++], 32, "NaN"); values[j] = palloc(32); if (indexStat.leaf_pages > 0) snprintf(values[j++], 32, "%.2f", (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0); else snprintf(values[j++], 32, "NaN"); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = HeapTupleGetDatum(tuple); } PG_RETURN_DATUM(result); }
static Datum pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo) { Datum result; BlockNumber nblocks; BlockNumber blkno; BTIndexStat indexStat; BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); /* * Read metapage */ { Buffer buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy); Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); indexStat.version = metad->btm_version; indexStat.level = metad->btm_level; indexStat.root_blkno = metad->btm_root; ReleaseBuffer(buffer); } /* -- init counters -- */ indexStat.internal_pages = 0; indexStat.leaf_pages = 0; indexStat.empty_pages = 0; indexStat.deleted_pages = 0; indexStat.max_avail = 0; indexStat.free_space = 0; indexStat.fragments = 0; /* * Scan all blocks except the metapage */ nblocks = RelationGetNumberOfBlocks(rel); for (blkno = 1; blkno < nblocks; blkno++) { Buffer buffer; Page page; BTPageOpaque opaque; CHECK_FOR_INTERRUPTS(); /* Read and lock buffer */ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* Determine page type, and update totals */ if (P_ISDELETED(opaque)) indexStat.deleted_pages++; else if (P_IGNORE(opaque)) indexStat.empty_pages++; /* this is the "half dead" state */ else if (P_ISLEAF(opaque)) { int max_avail; max_avail = BLCKSZ - (BLCKSZ - ((PageHeader) page)->pd_special + SizeOfPageHeaderData); indexStat.max_avail += max_avail; indexStat.free_space += PageGetFreeSpace(page); indexStat.leaf_pages++; /* * If the next leaf is on an earlier block, it means a * fragmentation. */ if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) indexStat.fragments++; } else indexStat.internal_pages++; /* Unlock and release buffer */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } relation_close(rel, AccessShareLock); /*---------------------------- * Build a result tuple *---------------------------- */ { TupleDesc tupleDesc; int j; char *values[10]; HeapTuple tuple; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); j = 0; values[j++] = psprintf("%d", indexStat.version); values[j++] = psprintf("%d", indexStat.level); values[j++] = psprintf(INT64_FORMAT, (1 + /* include the metapage in index_size */ indexStat.leaf_pages + indexStat.internal_pages + indexStat.deleted_pages + indexStat.empty_pages) * BLCKSZ); values[j++] = psprintf("%u", indexStat.root_blkno); values[j++] = psprintf(INT64_FORMAT, indexStat.internal_pages); values[j++] = psprintf(INT64_FORMAT, indexStat.leaf_pages); values[j++] = psprintf(INT64_FORMAT, indexStat.empty_pages); values[j++] = psprintf(INT64_FORMAT, indexStat.deleted_pages); if (indexStat.max_avail > 0) values[j++] = psprintf("%.2f", 100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0); else values[j++] = pstrdup("NaN"); if (indexStat.leaf_pages > 0) values[j++] = psprintf("%.2f", (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0); else values[j++] = pstrdup("NaN"); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = HeapTupleGetDatum(tuple); } return result; }
/* ------------------------------------------------------ * pgstatginindex() * * Usage: SELECT * FROM pgstatginindex('ginindex'); * ------------------------------------------------------ */ Datum pgstatginindex(PG_FUNCTION_ARGS) { Oid relid = PG_GETARG_OID(0); Relation rel; Buffer buffer; Page page; GinMetaPageData *metadata; GinIndexStat stats; HeapTuple tuple; TupleDesc tupleDesc; Datum values[3]; bool nulls[3] = {false, false, false}; Datum result; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); rel = relation_open(relid, AccessShareLock); if (!IS_INDEX(rel) || !IS_GIN(rel)) elog(ERROR, "relation \"%s\" is not a GIN index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary indexes of other sessions"))); /* * Read metapage */ buffer = ReadBuffer(rel, GIN_METAPAGE_BLKNO); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); metadata = GinPageGetMeta(page); stats.version = metadata->ginVersion; stats.pending_pages = metadata->nPendingPages; stats.pending_tuples = metadata->nPendingHeapTuples; UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); /* * Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); values[0] = Int32GetDatum(stats.version); values[1] = UInt32GetDatum(stats.pending_pages); values[2] = Int64GetDatum(stats.pending_tuples); /* * Build and return the tuple */ tuple = heap_form_tuple(tupleDesc, values, nulls); result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); }
/* ----------------------------------------------- * bt_page_stats() * * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1); * ----------------------------------------------- */ Datum bt_page_stats(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_PP(0); uint32 blkno = PG_GETARG_UINT32(1); Buffer buffer; Relation rel; RangeVar *relrv; Datum result; HeapTuple tuple; TupleDesc tupleDesc; int j; char *values[11]; BTPageStat stat; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pageinspect functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno == 0) elog(ERROR, "block 0 is a meta page"); CHECK_RELATION_BLOCK_RANGE(rel, blkno); buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); /* keep compiler quiet */ stat.btpo_prev = stat.btpo_next = InvalidBlockNumber; stat.btpo_flags = stat.free_size = stat.avg_item_size = 0; GetBTPageStatistics(blkno, buffer, &stat); UnlockReleaseBuffer(buffer); relation_close(rel, AccessShareLock); /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); j = 0; values[j++] = psprintf("%d", stat.blkno); values[j++] = psprintf("%c", stat.type); values[j++] = psprintf("%d", stat.live_items); values[j++] = psprintf("%d", stat.dead_items); values[j++] = psprintf("%d", stat.avg_item_size); values[j++] = psprintf("%d", stat.page_size); values[j++] = psprintf("%d", stat.free_size); values[j++] = psprintf("%d", stat.btpo_prev); values[j++] = psprintf("%d", stat.btpo_next); values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level); values[j++] = psprintf("%d", stat.btpo_flags); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = HeapTupleGetDatum(tuple); PG_RETURN_DATUM(result); }