Ejemplo n.º 1
0
/*---------------------------------------------------------------------------
 * This cluster code allows for clustering multiple tables at once. Because
 * of this, we cannot just run everything on a single transaction, or we
 * would be forced to acquire exclusive locks on all the tables being
 * clustered, simultaneously --- very likely leading to deadlock.
 *
 * To solve this we follow a similar strategy to VACUUM code,
 * clustering each relation in a separate transaction. For this to work,
 * we need to:
 *	- provide a separate memory context so that we can pass information in
 *	  a way that survives across transactions
 *	- start a new transaction every time a new relation is clustered
 *	- check for validity of the information on to-be-clustered relations,
 *	  as someone might have deleted a relation behind our back, or
 *	  clustered one on a different index
 *	- end the transaction
 *
 * The single-relation case does not have any such overhead.
 *
 * We also allow a relation to be specified without index.	In that case,
 * the indisclustered bit will be looked up, and an ERROR will be thrown
 * if there is no index with the bit set.
 *---------------------------------------------------------------------------
 */
void
cluster(ClusterStmt *stmt, bool isTopLevel)
{
	if (stmt->relation != NULL)
	{
		/* This is the single-relation case. */
		Oid			tableOid,
					indexOid = InvalidOid;
		Relation	rel;
		RelToCluster rvtc;

		/* Find and lock the table */
		rel = heap_openrv(stmt->relation, AccessExclusiveLock);

		tableOid = RelationGetRelid(rel);

		/* Check permissions */
		if (!pg_class_ownercheck(tableOid, GetUserId()))
			aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_CLASS,
						   RelationGetRelationName(rel));

		/*
		 * Reject clustering a remote temp table ... their local buffer
		 * manager is not going to cope.
		 */
		if (isOtherTempNamespace(RelationGetNamespace(rel)))
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			   errmsg("cannot cluster temporary tables of other sessions")));

		if (stmt->indexname == NULL)
		{
			ListCell   *index;

			/* We need to find the index that has indisclustered set. */
			foreach(index, RelationGetIndexList(rel))
			{
				HeapTuple	idxtuple;
				Form_pg_index indexForm;

				indexOid = lfirst_oid(index);
				idxtuple = SearchSysCache(INDEXRELID,
										  ObjectIdGetDatum(indexOid),
										  0, 0, 0);
				if (!HeapTupleIsValid(idxtuple))
					elog(ERROR, "cache lookup failed for index %u", indexOid);
				indexForm = (Form_pg_index) GETSTRUCT(idxtuple);
				if (indexForm->indisclustered)
				{
					ReleaseSysCache(idxtuple);
					break;
				}
				ReleaseSysCache(idxtuple);
				indexOid = InvalidOid;
			}
Ejemplo n.º 2
0
/*
 * DefineIndex
 *		Creates a new index.
 *
 * 'heapRelation': the relation the index will apply to.
 * 'indexRelationName': the name for the new index, or NULL to indicate
 *		that a nonconflicting default name should be picked.
 * 'indexRelationId': normally InvalidOid, but during bootstrap can be
 *		nonzero to specify a preselected OID for the index.
 * 'accessMethodName': name of the AM to use.
 * 'tableSpaceName': name of the tablespace to create the index in.
 *		NULL specifies using the appropriate default.
 * 'attributeList': a list of IndexElem specifying columns and expressions
 *		to index on.
 * 'predicate': the partial-index condition, or NULL if none.
 * 'rangetable': needed to interpret the predicate.
 * 'options': reloptions from WITH (in list-of-DefElem form).
 * 'unique': make the index enforce uniqueness.
 * 'primary': mark the index as a primary key in the catalogs.
 * 'isconstraint': index is for a PRIMARY KEY or UNIQUE constraint,
 *		so build a pg_constraint entry for it.
 * 'is_alter_table': this is due to an ALTER rather than a CREATE operation.
 * 'check_rights': check for CREATE rights in the namespace.  (This should
 *		be true except when ALTER is deleting/recreating an index.)
 * 'skip_build': make the catalog entries but leave the index file empty;
 *		it will be filled later.
 * 'quiet': suppress the NOTICE chatter ordinarily provided for constraints.
 * 'concurrent': avoid blocking writers to the table while building.
 */
void
DefineIndex(RangeVar *heapRelation,
			char *indexRelationName,
			Oid indexRelationId,
			char *accessMethodName,
			char *tableSpaceName,
			List *attributeList,
			Expr *predicate,
			List *rangetable,
			List *options,
			bool unique,
			bool primary,
			bool isconstraint,
			bool is_alter_table,
			bool check_rights,
			bool skip_build,
			bool quiet,
			bool concurrent)
{
	Oid		   *classObjectId;
	Oid			accessMethodId;
	Oid			relationId;
	Oid			namespaceId;
	Oid			tablespaceId;
	Relation	rel;
	HeapTuple	tuple;
	Form_pg_am	accessMethodForm;
	RegProcedure amoptions;
	Datum		reloptions;
	IndexInfo  *indexInfo;
	int			numberOfAttributes;
	List	   *old_xact_list;
	ListCell   *lc;
	uint32		ixcnt;
	LockRelId	heaprelid;
	LOCKTAG		heaplocktag;
	Snapshot	snapshot;
	Relation	pg_index;
	HeapTuple	indexTuple;
	Form_pg_index indexForm;

	/*
	 * count attributes in index
	 */
	numberOfAttributes = list_length(attributeList);
	if (numberOfAttributes <= 0)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
				 errmsg("must specify at least one column")));
	if (numberOfAttributes > INDEX_MAX_KEYS)
		ereport(ERROR,
				(errcode(ERRCODE_TOO_MANY_COLUMNS),
				 errmsg("cannot use more than %d columns in an index",
						INDEX_MAX_KEYS)));

	/*
	 * Open heap relation, acquire a suitable lock on it, remember its OID
	 *
	 * Only SELECT ... FOR UPDATE/SHARE are allowed while doing a standard
	 * index build; but for concurrent builds we allow INSERT/UPDATE/DELETE
	 * (but not VACUUM).
	 */
	rel = heap_openrv(heapRelation,
					  (concurrent ? ShareUpdateExclusiveLock : ShareLock));

	relationId = RelationGetRelid(rel);
	namespaceId = RelationGetNamespace(rel);

	/* Note: during bootstrap may see uncataloged relation */
	if (rel->rd_rel->relkind != RELKIND_RELATION &&
		rel->rd_rel->relkind != RELKIND_UNCATALOGED)
		ereport(ERROR,
				(errcode(ERRCODE_WRONG_OBJECT_TYPE),
				 errmsg("\"%s\" is not a table",
						heapRelation->relname)));

	/*
	 * Don't try to CREATE INDEX on temp tables of other backends.
	 */
	if (isOtherTempNamespace(namespaceId))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot create indexes on temporary tables of other sessions")));

	/*
	 * Verify we (still) have CREATE rights in the rel's namespace.
	 * (Presumably we did when the rel was created, but maybe not anymore.)
	 * Skip check if caller doesn't want it.  Also skip check if
	 * bootstrapping, since permissions machinery may not be working yet.
	 */
	if (check_rights && !IsBootstrapProcessingMode())
	{
		AclResult	aclresult;

		aclresult = pg_namespace_aclcheck(namespaceId, GetUserId(),
										  ACL_CREATE);
		if (aclresult != ACLCHECK_OK)
			aclcheck_error(aclresult, ACL_KIND_NAMESPACE,
						   get_namespace_name(namespaceId));
	}

	/*
	 * Select tablespace to use.  If not specified, use default_tablespace
	 * (which may in turn default to database's default).
	 */
	if (tableSpaceName)
	{
		tablespaceId = get_tablespace_oid(tableSpaceName);
		if (!OidIsValid(tablespaceId))
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_OBJECT),
					 errmsg("tablespace \"%s\" does not exist",
							tableSpaceName)));
	}
	else
	{
		tablespaceId = GetDefaultTablespace();
		/* note InvalidOid is OK in this case */
	}

	/* Check permissions except when using database's default */
	if (OidIsValid(tablespaceId))
	{
		AclResult	aclresult;

		aclresult = pg_tablespace_aclcheck(tablespaceId, GetUserId(),
										   ACL_CREATE);
		if (aclresult != ACLCHECK_OK)
			aclcheck_error(aclresult, ACL_KIND_TABLESPACE,
						   get_tablespace_name(tablespaceId));
	}

	/*
	 * Force shared indexes into the pg_global tablespace.	This is a bit of a
	 * hack but seems simpler than marking them in the BKI commands.
	 */
	if (rel->rd_rel->relisshared)
		tablespaceId = GLOBALTABLESPACE_OID;

	/*
	 * Select name for index if caller didn't specify
	 */
	if (indexRelationName == NULL)
	{
		if (primary)
			indexRelationName = ChooseRelationName(RelationGetRelationName(rel),
												   NULL,
												   "pkey",
												   namespaceId);
		else
		{
			IndexElem  *iparam = (IndexElem *) linitial(attributeList);

			indexRelationName = ChooseRelationName(RelationGetRelationName(rel),
												   iparam->name,
												   "key",
												   namespaceId);
		}
	}

	/*
	 * look up the access method, verify it can handle the requested features
	 */
	tuple = SearchSysCache(AMNAME,
						   PointerGetDatum(accessMethodName),
						   0, 0, 0);
	if (!HeapTupleIsValid(tuple))
	{
		/*
		 * Hack to provide more-or-less-transparent updating of old RTREE
		 * indexes to GIST: if RTREE is requested and not found, use GIST.
		 */
		if (strcmp(accessMethodName, "rtree") == 0)
		{
			ereport(NOTICE,
					(errmsg("substituting access method \"gist\" for obsolete method \"rtree\"")));
			accessMethodName = "gist";
			tuple = SearchSysCache(AMNAME,
								   PointerGetDatum(accessMethodName),
								   0, 0, 0);
		}

		if (!HeapTupleIsValid(tuple))
			ereport(ERROR,
					(errcode(ERRCODE_UNDEFINED_OBJECT),
					 errmsg("access method \"%s\" does not exist",
							accessMethodName)));
	}
	accessMethodId = HeapTupleGetOid(tuple);
	accessMethodForm = (Form_pg_am) GETSTRUCT(tuple);

	if (unique && !accessMethodForm->amcanunique)
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
			   errmsg("access method \"%s\" does not support unique indexes",
					  accessMethodName)));
	if (numberOfAttributes > 1 && !accessMethodForm->amcanmulticol)
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
		  errmsg("access method \"%s\" does not support multicolumn indexes",
				 accessMethodName)));

	amoptions = accessMethodForm->amoptions;

	ReleaseSysCache(tuple);

	/*
	 * If a range table was created then check that only the base rel is
	 * mentioned.
	 */
	if (rangetable != NIL)
	{
		if (list_length(rangetable) != 1 || getrelid(1, rangetable) != relationId)
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_COLUMN_REFERENCE),
					 errmsg("index expressions and predicates may refer only to the table being indexed")));
	}

	/*
	 * Validate predicate, if given
	 */
	if (predicate)
		CheckPredicate(predicate);

	/*
	 * Extra checks when creating a PRIMARY KEY index.
	 */
	if (primary)
	{
		List	   *cmds;
		ListCell   *keys;

		/*
		 * If ALTER TABLE, check that there isn't already a PRIMARY KEY. In
		 * CREATE TABLE, we have faith that the parser rejected multiple pkey
		 * clauses; and CREATE INDEX doesn't have a way to say PRIMARY KEY, so
		 * it's no problem either.
		 */
		if (is_alter_table &&
			relationHasPrimaryKey(rel))
		{
			ereport(ERROR,
					(errcode(ERRCODE_INVALID_TABLE_DEFINITION),
			 errmsg("multiple primary keys for table \"%s\" are not allowed",
					RelationGetRelationName(rel))));
		}

		/*
		 * Check that all of the attributes in a primary key are marked as not
		 * null, otherwise attempt to ALTER TABLE .. SET NOT NULL
		 */
		cmds = NIL;
		foreach(keys, attributeList)
		{
			IndexElem  *key = (IndexElem *) lfirst(keys);
			HeapTuple	atttuple;

			if (!key->name)
				ereport(ERROR,
						(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
						 errmsg("primary keys cannot be expressions")));

			/* System attributes are never null, so no problem */
			if (SystemAttributeByName(key->name, rel->rd_rel->relhasoids))
				continue;

			atttuple = SearchSysCacheAttName(relationId, key->name);
			if (HeapTupleIsValid(atttuple))
			{
				if (!((Form_pg_attribute) GETSTRUCT(atttuple))->attnotnull)
				{
					/* Add a subcommand to make this one NOT NULL */
					AlterTableCmd *cmd = makeNode(AlterTableCmd);

					cmd->subtype = AT_SetNotNull;
					cmd->name = key->name;

					cmds = lappend(cmds, cmd);
				}
				ReleaseSysCache(atttuple);
			}
			else
			{
				/*
				 * This shouldn't happen during CREATE TABLE, but can happen
				 * during ALTER TABLE.	Keep message in sync with
				 * transformIndexConstraints() in parser/analyze.c.
				 */
				ereport(ERROR,
						(errcode(ERRCODE_UNDEFINED_COLUMN),
						 errmsg("column \"%s\" named in key does not exist",
								key->name)));
			}
		}

		/*
		 * XXX: Shouldn't the ALTER TABLE .. SET NOT NULL cascade to child
		 * tables?	Currently, since the PRIMARY KEY itself doesn't cascade,
		 * we don't cascade the notnull constraint(s) either; but this is
		 * pretty debatable.
		 *
		 * XXX: possible future improvement: when being called from ALTER
		 * TABLE, it would be more efficient to merge this with the outer
		 * ALTER TABLE, so as to avoid two scans.  But that seems to
		 * complicate DefineIndex's API unduly.
		 */
		if (cmds)
			AlterTableInternal(relationId, cmds, false);
	}
Ejemplo n.º 3
0
/* ------------------------------------------------
 * bt_metap()
 *
 * Get a btree meta-page information
 *
 * Usage: SELECT * FROM bt_metap('t1_pkey')
 * ------------------------------------------------
 */
Datum
bt_metap(PG_FUNCTION_ARGS)
{
	text	   *relname = PG_GETARG_TEXT_P(0);
	Buffer		buffer;

	Relation	rel;
	RangeVar   *relrv;
	Datum		result;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser to use pgstattuple functions"))));

	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
	rel = relation_openrv(relrv, AccessShareLock);

	if (!IS_INDEX(rel) || !IS_BTREE(rel))
		elog(ERROR, "bt_metap() can be used only on b-tree index.");

	/*
	 * Reject attempts to read non-local temporary relations; we would
	 * be likely to get wrong data since we have no visibility into the
	 * owning session's local buffers.
	 */
	if (isOtherTempNamespace(RelationGetNamespace(rel)))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot access temporary tables of other sessions")));

	buffer = ReadBuffer(rel, 0);

	{
		BTMetaPageData *metad;

		TupleDesc	tupleDesc;
		int			j;
		char	   *values[BTMETAP_NCOLUMNS];
		HeapTuple	tuple;

		Page		page = BufferGetPage(buffer);

		metad = BTPageGetMeta(page);

		tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE);

		j = 0;
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", metad->btm_magic);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", metad->btm_version);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", metad->btm_root);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", metad->btm_level);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", metad->btm_fastroot);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", metad->btm_fastlevel);

		tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
									   values);

		result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple);
	}

	ReleaseBuffer(buffer);

	relation_close(rel, AccessShareLock);

	PG_RETURN_DATUM(result);
}
Ejemplo n.º 4
0
Datum
bt_page_items(PG_FUNCTION_ARGS)
{
	text	   *relname = PG_GETARG_TEXT_P(0);
	uint32		blkno = PG_GETARG_UINT32(1);

	RangeVar   *relrv;
	Datum		result;
	char	   *values[BTPAGEITEMS_NCOLUMNS];
	BTPageOpaque opaque;
	HeapTuple	tuple;
	ItemId		id;

	FuncCallContext *fctx;
	MemoryContext mctx;
	struct user_args *uargs = NULL;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser to use pgstattuple functions"))));

	if (SRF_IS_FIRSTCALL())
	{
		fctx = SRF_FIRSTCALL_INIT();
		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);

		uargs = palloc(sizeof(struct user_args));

		uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE);
		uargs->offset = FirstOffsetNumber;

		relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
		uargs->rel = relation_openrv(relrv, AccessShareLock);

		if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel))
			elog(ERROR, "bt_page_items() can be used only on b-tree index.");

		/*
		 * Reject attempts to read non-local temporary relations; we would
		 * be likely to get wrong data since we have no visibility into the
		 * owning session's local buffers.
		 */
		if (isOtherTempNamespace(RelationGetNamespace(uargs->rel)))
			ereport(ERROR,
					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
					 errmsg("cannot access temporary tables of other sessions")));

		if (blkno == 0)
			elog(ERROR, "Block 0 is a meta page.");

		CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno);

		uargs->buffer = ReadBuffer(uargs->rel, blkno);

		uargs->page = BufferGetPage(uargs->buffer);

		opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);

		if (P_ISDELETED(opaque))
			elog(NOTICE, "bt_page_items(): this page is deleted.");

		fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
		fctx->user_fctx = uargs;

		MemoryContextSwitchTo(mctx);
	}

	fctx = SRF_PERCALL_SETUP();
	uargs = fctx->user_fctx;

	if (fctx->call_cntr < fctx->max_calls)
	{
		IndexTuple	itup;

		id = PageGetItemId(uargs->page, uargs->offset);

		if (!ItemIdIsValid(id))
			elog(ERROR, "Invalid ItemId.");

		itup = (IndexTuple) PageGetItem(uargs->page, id);

		{
			int			j = 0;

			BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid));

			values[j] = palloc(32);
			snprintf(values[j++], 32, "%d", uargs->offset);
			values[j] = palloc(32);
			snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid);
			values[j] = palloc(32);
			snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup));
			values[j] = palloc(32);
			snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f');
			values[j] = palloc(32);
			snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');

			{
				int			off;
				char	   *dump;
				char	   *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);

				dump = palloc(IndexTupleSize(itup) * 3);
				memset(dump, 0, IndexTupleSize(itup) * 3);

				for (off = 0;
					 off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
					 off++)
				{
					if (dump[0] == '\0')
						sprintf(dump, "%02x", *(ptr + off) & 0xff);
					else
					{
						char		buf[4];

						sprintf(buf, " %02x", *(ptr + off) & 0xff);
						strcat(dump, buf);
					}
				}
				values[j] = dump;
			}

			tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values);
			result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple);
		}

		uargs->offset = uargs->offset + 1;

		SRF_RETURN_NEXT(fctx, result);
	}
	else
	{
		ReleaseBuffer(uargs->buffer);
		relation_close(uargs->rel, AccessShareLock);

		SRF_RETURN_DONE(fctx);
	}
}
Ejemplo n.º 5
0
/* -----------------------------------------------
 * bt_page()
 *
 * Usage: SELECT * FROM bt_page('t1_pkey', 0);
 * -----------------------------------------------
 */
Datum
bt_page_stats(PG_FUNCTION_ARGS)
{
	text	   *relname = PG_GETARG_TEXT_P(0);
	uint32		blkno = PG_GETARG_UINT32(1);
	Buffer		buffer;

	Relation	rel;
	RangeVar   *relrv;
	Datum		result;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser to use pgstattuple functions"))));

	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
	rel = relation_openrv(relrv, AccessShareLock);

	if (!IS_INDEX(rel) || !IS_BTREE(rel))
		elog(ERROR, "bt_page_stats() can be used only on b-tree index.");

	/*
	 * Reject attempts to read non-local temporary relations; we would
	 * be likely to get wrong data since we have no visibility into the
	 * owning session's local buffers.
	 */
	if (isOtherTempNamespace(RelationGetNamespace(rel)))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot access temporary tables of other sessions")));

	if (blkno == 0)
		elog(ERROR, "Block 0 is a meta page.");

	CHECK_RELATION_BLOCK_RANGE(rel, blkno);

	buffer = ReadBuffer(rel, blkno);

	{
		HeapTuple	tuple;
		TupleDesc	tupleDesc;
		int			j;
		char	   *values[BTPAGESTATS_NCOLUMNS];

		BTPageStat	stat;

		GetBTPageStatistics(blkno, buffer, &stat);

		tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE);

		j = 0;
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.blkno);

		values[j] = palloc(32);
		snprintf(values[j++], 32, "%c", stat.type);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.live_items);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.dead_items);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.avg_item_size);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.page_size);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.free_size);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.btpo_prev);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.btpo_next);

		values[j] = palloc(32);
		if (stat.type == 'd')
			snprintf(values[j++], 32, "%d", stat.btpo.xact);
		else
			snprintf(values[j++], 32, "%d", stat.btpo.level);

		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", stat.btpo_flags);

		tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
									   values);

		result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple);
	}

	ReleaseBuffer(buffer);

	relation_close(rel, AccessShareLock);

	PG_RETURN_DATUM(result);
}
Ejemplo n.º 6
0
/* ------------------------------------------------------
 * pgstatindex()
 *
 * Usage: SELECT * FROM pgstatindex('t1_pkey');
 * ------------------------------------------------------
 */
Datum
pgstatindex(PG_FUNCTION_ARGS)
{
	text	   *relname = PG_GETARG_TEXT_P(0);
	Relation	rel;
	RangeVar   *relrv;
	Datum		result;
	uint32		nblocks;
	uint32		blkno;
	BTIndexStat indexStat;

	if (!superuser())
		ereport(ERROR,
				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
				 (errmsg("must be superuser to use pgstattuple functions"))));

	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
	rel = relation_openrv(relrv, AccessShareLock);

	if (!IS_INDEX(rel) || !IS_BTREE(rel))
		elog(ERROR, "pgstatindex() can be used only on b-tree index.");

	/*
	 * Reject attempts to read non-local temporary relations; we would
	 * be likely to get wrong data since we have no visibility into the
	 * owning session's local buffers.
	 */
	if (isOtherTempNamespace(RelationGetNamespace(rel)))
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("cannot access temporary tables of other sessions")));

	/*-------------------
	 * Read a metapage
	 *-------------------
	 */
	{
		Buffer		buffer = ReadBuffer(rel, 0);
		Page		page = BufferGetPage(buffer);
		BTMetaPageData *metad = BTPageGetMeta(page);

		indexStat.magic = metad->btm_magic;
		indexStat.version = metad->btm_version;
		indexStat.root_blkno = metad->btm_root;
		indexStat.level = metad->btm_level;
		indexStat.fastroot = metad->btm_fastroot;
		indexStat.fastlevel = metad->btm_fastlevel;

		ReleaseBuffer(buffer);
	}

	nblocks = RelationGetNumberOfBlocks(rel);

	/* -- init stat -- */
	indexStat.fragments = 0;

	indexStat.root_pages = 0;
	indexStat.leaf_pages = 0;
	indexStat.internal_pages = 0;
	indexStat.empty_pages = 0;
	indexStat.deleted_pages = 0;

	indexStat.max_avail = 0;
	indexStat.free_space = 0;

	/*-----------------------
	 * Scan all blocks
	 *-----------------------
	 */
	for (blkno = 1; blkno < nblocks; blkno++)
	{
		Buffer		buffer = ReadBuffer(rel, blkno);
		BTPageStat	stat;

		CHECK_FOR_INTERRUPTS();

		/* scan one page */
		stat.blkno = blkno;
		GetBTPageStatistics(blkno, buffer, &stat);

		/*---------------------
		 * page status (type)
		 *---------------------
		 */
		switch (stat.type)
		{
			case 'd':
				indexStat.deleted_pages++;
				break;
			case 'l':
				indexStat.leaf_pages++;
				break;
			case 'i':
				indexStat.internal_pages++;
				break;
			case 'e':
				indexStat.empty_pages++;
				break;
			case 'r':
				indexStat.root_pages++;
				break;
			default:
				elog(ERROR, "unknown page status.");
		}

		/* -- leaf fragmentation -- */
		indexStat.fragments += stat.fragments;

		if (stat.type == 'l')
		{
			indexStat.max_avail += stat.max_avail;
			indexStat.free_space += stat.free_size;
		}

		ReleaseBuffer(buffer);
	}

	relation_close(rel, AccessShareLock);

	/*----------------------------
	 * Build a result tuple
	 *----------------------------
	 */
	{
		TupleDesc	tupleDesc;
		int			j;
		char	   *values[PGSTATINDEX_NCOLUMNS];

		HeapTuple	tuple;

		tupleDesc = RelationNameGetTupleDesc(PGSTATINDEX_TYPE);

		j = 0;
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", indexStat.version);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", indexStat.level);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", (indexStat.root_pages +
										 indexStat.leaf_pages +
										 indexStat.internal_pages +
										 indexStat.deleted_pages +
										 indexStat.empty_pages) * BLCKSZ);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", indexStat.root_blkno);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", indexStat.internal_pages);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", indexStat.leaf_pages);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", indexStat.empty_pages);
		values[j] = palloc(32);
		snprintf(values[j++], 32, "%d", indexStat.deleted_pages);
		values[j] = palloc(32);
		if (indexStat.max_avail > 0)
			snprintf(values[j++], 32, "%.2f",
					 100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0);
		else
			snprintf(values[j++], 32, "NaN");
		values[j] = palloc(32);
		if (indexStat.leaf_pages > 0)
			snprintf(values[j++], 32, "%.2f",
					 (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0);
		else
			snprintf(values[j++], 32, "NaN");

		tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
									   values);

		result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple);
	}

	PG_RETURN_DATUM(result);
}