Exemple #1
0
/*
 * create_ctas_internal
 *
 * Internal utility used for the creation of the definition of a relation
 * created via CREATE TABLE AS or a materialized view.  Caller needs to
 * provide a list of attributes (ColumnDef nodes).
 */
static ObjectAddress
create_ctas_internal(List *attrList, IntoClause *into)
{
	CreateStmt *create = makeNode(CreateStmt);
	bool		is_matview;
	char		relkind;
	Datum		toast_options;
	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;
	ObjectAddress intoRelationAddr;

	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
	is_matview = (into->viewQuery != NULL);
	relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;

	/*
	 * Create the target relation by faking up a CREATE TABLE parsetree and
	 * passing it to DefineRelation.
	 */
	create->relation = into->rel;
	create->tableElts = attrList;
	create->inhRelations = NIL;
	create->ofTypename = NULL;
	create->constraints = NIL;
	create->options = into->options;
	create->oncommit = into->onCommit;
	create->tablespacename = into->tableSpaceName;
	create->if_not_exists = false;

	/*
	 * Create the relation.  (This will error out if there's an existing view,
	 * so we don't need more code to complain if "replace" is false.)
	 */
	intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL, NULL);

	/*
	 * If necessary, create a TOAST table for the target table.  Note that
	 * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
	 * that the TOAST table will be visible for insertion.
	 */
	CommandCounterIncrement();

	/* parse and validate reloptions for the toast table */
	toast_options = transformRelOptions((Datum) 0,
										create->options,
										"toast",
										validnsps,
										true, false);

	(void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);

	NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);

	/* Create the "view" part of a materialized view. */
	if (is_matview)
	{
		/* StoreViewQuery scribbles on tree, so make a copy */
		Query	   *query = (Query *) copyObject(into->viewQuery);

		StoreViewQuery(intoRelationAddr.objectId, query, false);
		CommandCounterIncrement();
	}

	return intoRelationAddr;
}
Exemple #2
0
/*
 * Create append-only auxiliary relations for target relation rel.
 * Returns true if they are newly created.  If pg_appendonly has already
 * known those tables, don't create them and returns false.
 */
bool
CreateAOAuxiliaryTable(
		Relation rel,
		const char *auxiliaryNamePrefix,
		char relkind,
		Oid aoauxiliaryOid,
		Oid aoauxiliaryIndexOid,
		Oid *aoauxiliaryComptypeOid,
		TupleDesc tupledesc,
		IndexInfo  *indexInfo,
		Oid	*classObjectId,
		int16 *coloptions)
{
	char aoauxiliary_relname[NAMEDATALEN];
	char aoauxiliary_idxname[NAMEDATALEN];
	bool shared_relation;
	Oid relOid, aoauxiliary_relid, aoauxiliary_idxid;
	ObjectAddress baseobject;
	ObjectAddress aoauxiliaryobject;

	Assert(RelationIsValid(rel));
	Assert(RelationIsAoRows(rel) || RelationIsAoCols(rel));
	Assert(auxiliaryNamePrefix);
	Assert(tupledesc);
	Assert(indexInfo);
	Assert(classObjectId);

	shared_relation = rel->rd_rel->relisshared;
	/*
	 * We cannot allow creating an auxiliary table for a shared relation
	 * after initdb (because there's no way to let other databases know
	 * this visibility map.
	 */
	if (shared_relation && !IsBootstrapProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("shared tables cannot have append-only auxiliary relations after initdb")));

	relOid = RelationGetRelid(rel);

	switch(relkind)
	{
		case RELKIND_AOVISIMAP:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL, NULL,
				NULL, NULL, &aoauxiliary_relid, &aoauxiliary_idxid);
			break;
		case RELKIND_AOBLOCKDIR:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL, NULL,
				&aoauxiliary_relid, &aoauxiliary_idxid, NULL, NULL);
			break;
		case RELKIND_AOSEGMENTS:
			GetAppendOnlyEntryAuxOids(relOid, SnapshotNow,
				&aoauxiliary_relid, &aoauxiliary_idxid,
				NULL, NULL, NULL, NULL);
			break;
		default:
			elog(ERROR, "unsupported auxiliary relkind '%c'", relkind);
	}

	/*
	 * Does it have the auxiliary relation?
	 */
	if (OidIsValid(aoauxiliary_relid))
	{
		return false;
	}

	snprintf(aoauxiliary_relname, sizeof(aoauxiliary_relname),
			 "%s_%u", auxiliaryNamePrefix, relOid);
	snprintf(aoauxiliary_idxname, sizeof(aoauxiliary_idxname),
			 "%s_%u_index", auxiliaryNamePrefix, relOid);

	/*
	 * We place auxiliary relation in the pg_aoseg namespace
	 * even if its master relation is a temp table. There cannot be
	 * any naming collision, and the auxiliary relation will be
	 * destroyed when its master is, so there is no need to handle
	 * the aovisimap relation as temp.
	 */
	aoauxiliary_relid = heap_create_with_catalog(aoauxiliary_relname,
											     PG_AOSEGMENT_NAMESPACE,
											     rel->rd_rel->reltablespace,
											     aoauxiliaryOid,
											     rel->rd_rel->relowner,
											     tupledesc,
											     /* relam */ InvalidOid,
											     relkind,
											     RELSTORAGE_HEAP,
											     shared_relation,
											     true,
											     /* bufferPoolBulkLoad */ false,
											     0,
											     ONCOMMIT_NOOP,
											     NULL, /* GP Policy */
											     (Datum) 0,
											     true,
												 /* valid_opts */ false,
											     aoauxiliaryComptypeOid,
											     /* persistentTid */ NULL,
											     /* persistentSerialNum */ NULL);

	/* Make this table visible, else index creation will fail */
	CommandCounterIncrement();

	aoauxiliary_idxid = index_create(aoauxiliaryOid,
									 aoauxiliary_idxname,
									 aoauxiliaryIndexOid,
									 indexInfo,
									 BTREE_AM_OID,
									 rel->rd_rel->reltablespace,
									 classObjectId, coloptions, (Datum) 0,
									 true, false, (Oid *) NULL, true, false,
									 false, NULL);

	/* Unlock target table -- no one can see it */
	UnlockRelationOid(aoauxiliaryOid, ShareLock);
	/* Unlock the index -- no one can see it anyway */
	UnlockRelationOid(aoauxiliaryIndexOid, AccessExclusiveLock);

	/*
	 * Store the auxiliary table's OID in the parent relation's pg_appendonly row.
	 * TODO (How to generalize this?)
	 */
	switch (relkind)
	{
		case RELKIND_AOVISIMAP:
			UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, InvalidOid,
								 InvalidOid, InvalidOid,
								 aoauxiliary_relid, aoauxiliary_idxid);
			break;
		case RELKIND_AOBLOCKDIR:
			UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, InvalidOid,
								 aoauxiliary_relid, aoauxiliary_idxid,
								 InvalidOid, InvalidOid);
			break;
		case RELKIND_AOSEGMENTS:
			UpdateAppendOnlyEntryAuxOids(relOid,
								 aoauxiliary_relid, aoauxiliary_idxid,
								 InvalidOid, InvalidOid,
								 InvalidOid, InvalidOid);
			break;
		default:
			elog(ERROR, "unsupported auxiliary relkind '%c'", relkind);
	}

	/*
	 * Register dependency from the auxiliary table to the master, so that the
	 * aoseg table will be deleted if the master is.
	 */
	baseobject.classId = RelationRelationId;
	baseobject.objectId = relOid;
	baseobject.objectSubId = 0;
	aoauxiliaryobject.classId = RelationRelationId;
	aoauxiliaryobject.objectId = aoauxiliaryOid;
	aoauxiliaryobject.objectSubId = 0;

	recordDependencyOn(&aoauxiliaryobject, &baseobject, DEPENDENCY_INTERNAL);

	/*
	 * Make changes visible
	 */
	CommandCounterIncrement();

	return true;
}
Exemple #3
0
/*
 * OperatorShellMake
 *		Make a "shell" entry for a not-yet-existing operator.
 */
static Oid
OperatorShellMake(const char *operatorName,
				  Oid operatorNamespace,
				  Oid leftTypeId,
				  Oid rightTypeId)
{
	Relation	pg_operator_desc;
	Oid			operatorObjectId;
	int			i;
	HeapTuple	tup;
	Datum		values[Natts_pg_operator];
	bool		nulls[Natts_pg_operator];
	NameData	oname;
	TupleDesc	tupDesc;

	/*
	 * validate operator name
	 */
	if (!validOperatorName(operatorName))
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_NAME),
				 errmsg("\"%s\" is not a valid operator name",
						operatorName)));

	/*
	 * initialize our *nulls and *values arrays
	 */
	for (i = 0; i < Natts_pg_operator; ++i)
	{
		nulls[i] = false;
		values[i] = (Datum) NULL;		/* redundant, but safe */
	}

	/*
	 * initialize values[] with the operator name and input data types. Note
	 * that oprcode is set to InvalidOid, indicating it's a shell.
	 */
	i = 0;
	namestrcpy(&oname, operatorName);
	values[i++] = NameGetDatum(&oname); /* oprname */
	values[i++] = ObjectIdGetDatum(operatorNamespace);	/* oprnamespace */
	values[i++] = ObjectIdGetDatum(GetUserId());		/* oprowner */
	values[i++] = CharGetDatum(leftTypeId ? (rightTypeId ? 'b' : 'r') : 'l');	/* oprkind */
	values[i++] = BoolGetDatum(false);	/* oprcanmerge */
	values[i++] = BoolGetDatum(false);	/* oprcanhash */
	values[i++] = ObjectIdGetDatum(leftTypeId); /* oprleft */
	values[i++] = ObjectIdGetDatum(rightTypeId);		/* oprright */
	values[i++] = ObjectIdGetDatum(InvalidOid); /* oprresult */
	values[i++] = ObjectIdGetDatum(InvalidOid); /* oprcom */
	values[i++] = ObjectIdGetDatum(InvalidOid); /* oprnegate */
	values[i++] = ObjectIdGetDatum(InvalidOid); /* oprcode */
	values[i++] = ObjectIdGetDatum(InvalidOid); /* oprrest */
	values[i++] = ObjectIdGetDatum(InvalidOid); /* oprjoin */

	/*
	 * open pg_operator
	 */
	pg_operator_desc = heap_open(OperatorRelationId, RowExclusiveLock);
	tupDesc = pg_operator_desc->rd_att;

	/*
	 * create a new operator tuple
	 */
	tup = heap_form_tuple(tupDesc, values, nulls);

	/*
	 * insert our "shell" operator tuple
	 */
	operatorObjectId = simple_heap_insert(pg_operator_desc, tup);

	CatalogUpdateIndexes(pg_operator_desc, tup);

	/* Add dependencies for the entry */
	makeOperatorDependencies(tup);

	heap_freetuple(tup);

	/* Post creation hook for new shell operator */
	InvokeObjectAccessHook(OAT_POST_CREATE,
						   OperatorRelationId, operatorObjectId, 0);

	/*
	 * Make sure the tuple is visible for subsequent lookups/updates.
	 */
	CommandCounterIncrement();

	/*
	 * close the operator relation and return the oid.
	 */
	heap_close(pg_operator_desc, RowExclusiveLock);

	return operatorObjectId;
}
Exemple #4
0
/*
 * OperatorUpd
 *
 *	For a given operator, look up its negator and commutator operators.
 *	If they are defined, but their negator and commutator fields
 *	(respectively) are empty, then use the new operator for neg or comm.
 *	This solves a problem for users who need to insert two new operators
 *	which are the negator or commutator of each other.
 */
static void
OperatorUpd(Oid baseId, Oid commId, Oid negId)
{
	int			i;
	Relation	pg_operator_desc;
	HeapTuple	tup;
	bool		nulls[Natts_pg_operator];
	bool		replaces[Natts_pg_operator];
	Datum		values[Natts_pg_operator];

	for (i = 0; i < Natts_pg_operator; ++i)
	{
		values[i] = (Datum) 0;
		replaces[i] = false;
		nulls[i] = false;
	}

	/*
	 * check and update the commutator & negator, if necessary
	 *
	 * We need a CommandCounterIncrement here in case of a self-commutator
	 * operator: we'll need to update the tuple that we just inserted.
	 */
	CommandCounterIncrement();

	pg_operator_desc = heap_open(OperatorRelationId, RowExclusiveLock);

	tup = SearchSysCacheCopy1(OPEROID, ObjectIdGetDatum(commId));

	/*
	 * if the commutator and negator are the same operator, do one update. XXX
	 * this is probably useless code --- I doubt it ever makes sense for
	 * commutator and negator to be the same thing...
	 */
	if (commId == negId)
	{
		if (HeapTupleIsValid(tup))
		{
			Form_pg_operator t = (Form_pg_operator) GETSTRUCT(tup);

			if (!OidIsValid(t->oprcom) || !OidIsValid(t->oprnegate))
			{
				if (!OidIsValid(t->oprnegate))
				{
					values[Anum_pg_operator_oprnegate - 1] = ObjectIdGetDatum(baseId);
					replaces[Anum_pg_operator_oprnegate - 1] = true;
				}

				if (!OidIsValid(t->oprcom))
				{
					values[Anum_pg_operator_oprcom - 1] = ObjectIdGetDatum(baseId);
					replaces[Anum_pg_operator_oprcom - 1] = true;
				}

				tup = heap_modify_tuple(tup,
										RelationGetDescr(pg_operator_desc),
										values,
										nulls,
										replaces);

				simple_heap_update(pg_operator_desc, &tup->t_self, tup);

				CatalogUpdateIndexes(pg_operator_desc, tup);
			}
		}

		heap_close(pg_operator_desc, RowExclusiveLock);

		return;
	}

	/* if commutator and negator are different, do two updates */

	if (HeapTupleIsValid(tup) &&
		!(OidIsValid(((Form_pg_operator) GETSTRUCT(tup))->oprcom)))
	{
		values[Anum_pg_operator_oprcom - 1] = ObjectIdGetDatum(baseId);
		replaces[Anum_pg_operator_oprcom - 1] = true;

		tup = heap_modify_tuple(tup,
								RelationGetDescr(pg_operator_desc),
								values,
								nulls,
								replaces);

		simple_heap_update(pg_operator_desc, &tup->t_self, tup);

		CatalogUpdateIndexes(pg_operator_desc, tup);

		values[Anum_pg_operator_oprcom - 1] = (Datum) NULL;
		replaces[Anum_pg_operator_oprcom - 1] = false;
	}

	/* check and update the negator, if necessary */

	tup = SearchSysCacheCopy1(OPEROID, ObjectIdGetDatum(negId));

	if (HeapTupleIsValid(tup) &&
		!(OidIsValid(((Form_pg_operator) GETSTRUCT(tup))->oprnegate)))
	{
		values[Anum_pg_operator_oprnegate - 1] = ObjectIdGetDatum(baseId);
		replaces[Anum_pg_operator_oprnegate - 1] = true;

		tup = heap_modify_tuple(tup,
								RelationGetDescr(pg_operator_desc),
								values,
								nulls,
								replaces);

		simple_heap_update(pg_operator_desc, &tup->t_self, tup);

		CatalogUpdateIndexes(pg_operator_desc, tup);
	}

	heap_close(pg_operator_desc, RowExclusiveLock);
}
Exemple #5
0
int
inv_write(LargeObjectDesc *obj_desc, const char *buf, int nbytes)
{
	int			nwritten = 0;
	int			n;
	int			off;
	int			len;
	int32		pageno = (int32) (obj_desc->offset / LOBLKSIZE);
	ScanKeyData skey[2];
	SysScanDesc sd;
	HeapTuple	oldtuple;
	Form_pg_largeobject olddata;
	bool		neednextpage;
	bytea	   *datafield;
	bool		pfreeit;
	struct
	{
		bytea		hdr;
		char		data[LOBLKSIZE];	/* make struct big enough */
		int32		align_it;	/* ensure struct is aligned well enough */
	}			workbuf;
	char	   *workb = VARDATA(&workbuf.hdr);
	HeapTuple	newtup;
	Datum		values[Natts_pg_largeobject];
	bool		nulls[Natts_pg_largeobject];
	bool		replace[Natts_pg_largeobject];
	CatalogIndexState indstate;

	Assert(PointerIsValid(obj_desc));
	Assert(buf != NULL);

	/* enforce writability because snapshot is probably wrong otherwise */
	Assert(obj_desc->flags & IFS_WRLOCK);

	if (nbytes <= 0)
		return 0;

	/* this addition can't overflow because nbytes is only int32 */
	if ((nbytes + obj_desc->offset) > MAX_LARGE_OBJECT_SIZE)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg("invalid large object write request size: %d",
						nbytes)));

	open_lo_relation();

	indstate = CatalogOpenIndexes(lo_heap_r);

	ScanKeyInit(&skey[0],
				Anum_pg_largeobject_loid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(obj_desc->id));

	ScanKeyInit(&skey[1],
				Anum_pg_largeobject_pageno,
				BTGreaterEqualStrategyNumber, F_INT4GE,
				Int32GetDatum(pageno));

	sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
									obj_desc->snapshot, 2, skey);

	oldtuple = NULL;
	olddata = NULL;
	neednextpage = true;

	while (nwritten < nbytes)
	{
		/*
		 * If possible, get next pre-existing page of the LO.  We expect the
		 * indexscan will deliver these in order --- but there may be holes.
		 */
		if (neednextpage)
		{
			if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
			{
				if (HeapTupleHasNulls(oldtuple))		/* paranoia */
					elog(ERROR, "null field found in pg_largeobject");
				olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
				Assert(olddata->pageno >= pageno);
			}
			neednextpage = false;
		}

		/*
		 * If we have a pre-existing page, see if it is the page we want to
		 * write, or a later one.
		 */
		if (olddata != NULL && olddata->pageno == pageno)
		{
			/*
			 * Update an existing page with fresh data.
			 *
			 * First, load old data into workbuf
			 */
			getdatafield(olddata, &datafield, &len, &pfreeit);
			memcpy(workb, VARDATA(datafield), len);
			if (pfreeit)
				pfree(datafield);

			/*
			 * Fill any hole
			 */
			off = (int) (obj_desc->offset % LOBLKSIZE);
			if (off > len)
				MemSet(workb + len, 0, off - len);

			/*
			 * Insert appropriate portion of new data
			 */
			n = LOBLKSIZE - off;
			n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten);
			memcpy(workb + off, buf + nwritten, n);
			nwritten += n;
			obj_desc->offset += n;
			off += n;
			/* compute valid length of new page */
			len = (len >= off) ? len : off;
			SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ);

			/*
			 * Form and insert updated tuple
			 */
			memset(values, 0, sizeof(values));
			memset(nulls, false, sizeof(nulls));
			memset(replace, false, sizeof(replace));
			values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
			replace[Anum_pg_largeobject_data - 1] = true;
			newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r),
									   values, nulls, replace);
			simple_heap_update(lo_heap_r, &newtup->t_self, newtup);
			CatalogIndexInsert(indstate, newtup);
			heap_freetuple(newtup);

			/*
			 * We're done with this old page.
			 */
			oldtuple = NULL;
			olddata = NULL;
			neednextpage = true;
		}
		else
		{
			/*
			 * Write a brand new page.
			 *
			 * First, fill any hole
			 */
			off = (int) (obj_desc->offset % LOBLKSIZE);
			if (off > 0)
				MemSet(workb, 0, off);

			/*
			 * Insert appropriate portion of new data
			 */
			n = LOBLKSIZE - off;
			n = (n <= (nbytes - nwritten)) ? n : (nbytes - nwritten);
			memcpy(workb + off, buf + nwritten, n);
			nwritten += n;
			obj_desc->offset += n;
			/* compute valid length of new page */
			len = off + n;
			SET_VARSIZE(&workbuf.hdr, len + VARHDRSZ);

			/*
			 * Form and insert updated tuple
			 */
			memset(values, 0, sizeof(values));
			memset(nulls, false, sizeof(nulls));
			values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
			values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
			values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
			newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls);
			simple_heap_insert(lo_heap_r, newtup);
			CatalogIndexInsert(indstate, newtup);
			heap_freetuple(newtup);
		}
		pageno++;
	}

	systable_endscan_ordered(sd);

	CatalogCloseIndexes(indstate);

	/*
	 * Advance command counter so that my tuple updates will be seen by later
	 * large-object operations in this transaction.
	 */
	CommandCounterIncrement();

	return nwritten;
}
Exemple #6
0
void
inv_truncate(LargeObjectDesc *obj_desc, int64 len)
{
	int32		pageno = (int32) (len / LOBLKSIZE);
	int32		off;
	ScanKeyData skey[2];
	SysScanDesc sd;
	HeapTuple	oldtuple;
	Form_pg_largeobject olddata;
	struct
	{
		bytea		hdr;
		char		data[LOBLKSIZE];	/* make struct big enough */
		int32		align_it;	/* ensure struct is aligned well enough */
	}			workbuf;
	char	   *workb = VARDATA(&workbuf.hdr);
	HeapTuple	newtup;
	Datum		values[Natts_pg_largeobject];
	bool		nulls[Natts_pg_largeobject];
	bool		replace[Natts_pg_largeobject];
	CatalogIndexState indstate;

	Assert(PointerIsValid(obj_desc));

	/* enforce writability because snapshot is probably wrong otherwise */
	Assert(obj_desc->flags & IFS_WRLOCK);

	/*
	 * use errmsg_internal here because we don't want to expose INT64_FORMAT
	 * in translatable strings; doing better is not worth the trouble
	 */
	if (len < 0 || len > MAX_LARGE_OBJECT_SIZE)
		ereport(ERROR,
				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
				 errmsg_internal("invalid large object truncation target: " INT64_FORMAT,
								 len)));

	open_lo_relation();

	indstate = CatalogOpenIndexes(lo_heap_r);

	/*
	 * Set up to find all pages with desired loid and pageno >= target
	 */
	ScanKeyInit(&skey[0],
				Anum_pg_largeobject_loid,
				BTEqualStrategyNumber, F_OIDEQ,
				ObjectIdGetDatum(obj_desc->id));

	ScanKeyInit(&skey[1],
				Anum_pg_largeobject_pageno,
				BTGreaterEqualStrategyNumber, F_INT4GE,
				Int32GetDatum(pageno));

	sd = systable_beginscan_ordered(lo_heap_r, lo_index_r,
									obj_desc->snapshot, 2, skey);

	/*
	 * If possible, get the page the truncation point is in. The truncation
	 * point may be beyond the end of the LO or in a hole.
	 */
	olddata = NULL;
	if ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
	{
		if (HeapTupleHasNulls(oldtuple))		/* paranoia */
			elog(ERROR, "null field found in pg_largeobject");
		olddata = (Form_pg_largeobject) GETSTRUCT(oldtuple);
		Assert(olddata->pageno >= pageno);
	}

	/*
	 * If we found the page of the truncation point we need to truncate the
	 * data in it.  Otherwise if we're in a hole, we need to create a page to
	 * mark the end of data.
	 */
	if (olddata != NULL && olddata->pageno == pageno)
	{
		/* First, load old data into workbuf */
		bytea	   *datafield;
		int			pagelen;
		bool		pfreeit;

		getdatafield(olddata, &datafield, &pagelen, &pfreeit);
		memcpy(workb, VARDATA(datafield), pagelen);
		if (pfreeit)
			pfree(datafield);

		/*
		 * Fill any hole
		 */
		off = len % LOBLKSIZE;
		if (off > pagelen)
			MemSet(workb + pagelen, 0, off - pagelen);

		/* compute length of new page */
		SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);

		/*
		 * Form and insert updated tuple
		 */
		memset(values, 0, sizeof(values));
		memset(nulls, false, sizeof(nulls));
		memset(replace, false, sizeof(replace));
		values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
		replace[Anum_pg_largeobject_data - 1] = true;
		newtup = heap_modify_tuple(oldtuple, RelationGetDescr(lo_heap_r),
								   values, nulls, replace);
		simple_heap_update(lo_heap_r, &newtup->t_self, newtup);
		CatalogIndexInsert(indstate, newtup);
		heap_freetuple(newtup);
	}
	else
	{
		/*
		 * If the first page we found was after the truncation point, we're in
		 * a hole that we'll fill, but we need to delete the later page
		 * because the loop below won't visit it again.
		 */
		if (olddata != NULL)
		{
			Assert(olddata->pageno > pageno);
			simple_heap_delete(lo_heap_r, &oldtuple->t_self);
		}

		/*
		 * Write a brand new page.
		 *
		 * Fill the hole up to the truncation point
		 */
		off = len % LOBLKSIZE;
		if (off > 0)
			MemSet(workb, 0, off);

		/* compute length of new page */
		SET_VARSIZE(&workbuf.hdr, off + VARHDRSZ);

		/*
		 * Form and insert new tuple
		 */
		memset(values, 0, sizeof(values));
		memset(nulls, false, sizeof(nulls));
		values[Anum_pg_largeobject_loid - 1] = ObjectIdGetDatum(obj_desc->id);
		values[Anum_pg_largeobject_pageno - 1] = Int32GetDatum(pageno);
		values[Anum_pg_largeobject_data - 1] = PointerGetDatum(&workbuf);
		newtup = heap_form_tuple(lo_heap_r->rd_att, values, nulls);
		simple_heap_insert(lo_heap_r, newtup);
		CatalogIndexInsert(indstate, newtup);
		heap_freetuple(newtup);
	}

	/*
	 * Delete any pages after the truncation point.  If the initial search
	 * didn't find a page, then of course there's nothing more to do.
	 */
	if (olddata != NULL)
	{
		while ((oldtuple = systable_getnext_ordered(sd, ForwardScanDirection)) != NULL)
		{
			simple_heap_delete(lo_heap_r, &oldtuple->t_self);
		}
	}

	systable_endscan_ordered(sd);

	CatalogCloseIndexes(indstate);

	/*
	 * Advance command counter so that tuple updates will be seen by later
	 * large-object operations in this transaction.
	 */
	CommandCounterIncrement();
}
Exemple #7
0
/*
 * create_toast_table --- internal workhorse
 *
 * rel is already opened and locked
 * toastOid and toastIndexOid are normally InvalidOid, but during
 * bootstrap they can be nonzero to specify hand-assigned OIDs
 */
static bool
create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid,
				   Datum reloptions, LOCKMODE lockmode, bool check)
{
	Oid			relOid = RelationGetRelid(rel);
	HeapTuple	reltup;
	TupleDesc	tupdesc;
	bool		shared_relation;
	bool		mapped_relation;
	Relation	toast_rel;
	Relation	class_rel;
	Oid			toast_relid;
	Oid			toast_typid = InvalidOid;
	Oid			namespaceid;
	char		toast_relname[NAMEDATALEN];
	char		toast_idxname[NAMEDATALEN];
	IndexInfo  *indexInfo;
	Oid			collationObjectId[2];
	Oid			classObjectId[2];
	int16		coloptions[2];
	ObjectAddress baseobject,
				toastobject;

	/*
	 * Toast table is shared if and only if its parent is.
	 *
	 * We cannot allow toasting a shared relation after initdb (because
	 * there's no way to mark it toasted in other databases' pg_class).
	 */
	shared_relation = rel->rd_rel->relisshared;
	if (shared_relation && !IsBootstrapProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("shared tables cannot be toasted after initdb")));

	/* It's mapped if and only if its parent is, too */
	mapped_relation = RelationIsMapped(rel);

	/*
	 * Is it already toasted?
	 */
	if (rel->rd_rel->reltoastrelid != InvalidOid)
		return false;

	/*
	 * Check to see whether the table actually needs a TOAST table.
	 */
	if (!IsBinaryUpgrade)
	{
		/* Normal mode, normal check */
		if (!needs_toast_table(rel))
			return false;
	}
	else
	{
		/*
		 * In binary-upgrade mode, create a TOAST table if and only if
		 * pg_upgrade told us to (ie, a TOAST table OID has been provided).
		 *
		 * This indicates that the old cluster had a TOAST table for the
		 * current table.  We must create a TOAST table to receive the old
		 * TOAST file, even if the table seems not to need one.
		 *
		 * Contrariwise, if the old cluster did not have a TOAST table, we
		 * should be able to get along without one even if the new version's
		 * needs_toast_table rules suggest we should have one.  There is a lot
		 * of daylight between where we will create a TOAST table and where
		 * one is really necessary to avoid failures, so small cross-version
		 * differences in the when-to-create heuristic shouldn't be a problem.
		 * If we tried to create a TOAST table anyway, we would have the
		 * problem that it might take up an OID that will conflict with some
		 * old-cluster table we haven't seen yet.
		 */
		if (!OidIsValid(binary_upgrade_next_toast_pg_class_oid) ||
			!OidIsValid(binary_upgrade_next_toast_pg_type_oid))
			return false;
	}

	/*
	 * If requested check lockmode is sufficient. This is a cross check in
	 * case of errors or conflicting decisions in earlier code.
	 */
	if (check && lockmode != AccessExclusiveLock)
		elog(ERROR, "AccessExclusiveLock required to add toast table.");

	/*
	 * Create the toast table and its index
	 */
	snprintf(toast_relname, sizeof(toast_relname),
			 "pg_toast_%u", relOid);
	snprintf(toast_idxname, sizeof(toast_idxname),
			 "pg_toast_%u_index", relOid);

	/* this is pretty painful...  need a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(3, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "chunk_id",
					   OIDOID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "chunk_seq",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "chunk_data",
					   BYTEAOID,
					   -1, 0);

	/*
	 * Ensure that the toast table doesn't itself get toasted, or we'll be
	 * toast :-(.  This is essential for chunk_data because type bytea is
	 * toastable; hit the other two just to be sure.
	 */
	TupleDescAttr(tupdesc, 0)->attstorage = 'p';
	TupleDescAttr(tupdesc, 1)->attstorage = 'p';
	TupleDescAttr(tupdesc, 2)->attstorage = 'p';

	/*
	 * Toast tables for regular relations go in pg_toast; those for temp
	 * relations go into the per-backend temp-toast-table namespace.
	 */
	if (isTempOrTempToastNamespace(rel->rd_rel->relnamespace))
		namespaceid = GetTempToastNamespace();
	else
		namespaceid = PG_TOAST_NAMESPACE;

	/*
	 * Use binary-upgrade override for pg_type.oid, if supplied.  We might be
	 * in the post-schema-restore phase where we are doing ALTER TABLE to
	 * create TOAST tables that didn't exist in the old cluster.
	 */
	if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid))
	{
		toast_typid = binary_upgrade_next_toast_pg_type_oid;
		binary_upgrade_next_toast_pg_type_oid = InvalidOid;
	}

	toast_relid = heap_create_with_catalog(toast_relname,
										   namespaceid,
										   rel->rd_rel->reltablespace,
										   toastOid,
										   toast_typid,
										   InvalidOid,
										   rel->rd_rel->relowner,
										   tupdesc,
										   NIL,
										   RELKIND_TOASTVALUE,
										   rel->rd_rel->relpersistence,
										   shared_relation,
										   mapped_relation,
										   true,
										   0,
										   ONCOMMIT_NOOP,
										   reloptions,
										   false,
										   true,
										   true,
										   InvalidOid,
										   NULL);
	Assert(toast_relid != InvalidOid);

	/* make the toast relation visible, else heap_open will fail */
	CommandCounterIncrement();

	/* ShareLock is not really needed here, but take it anyway */
	toast_rel = heap_open(toast_relid, ShareLock);

	/*
	 * Create unique index on chunk_id, chunk_seq.
	 *
	 * NOTE: the normal TOAST access routines could actually function with a
	 * single-column index on chunk_id only. However, the slice access
	 * routines use both columns for faster access to an individual chunk. In
	 * addition, we want it to be unique as a check against the possibility of
	 * duplicate TOAST chunk OIDs. The index might also be a little more
	 * efficient this way, since btree isn't all that happy with large numbers
	 * of equal keys.
	 */

	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 2;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NULL;
	indexInfo->ii_ExclusionOps = NULL;
	indexInfo->ii_ExclusionProcs = NULL;
	indexInfo->ii_ExclusionStrats = NULL;
	indexInfo->ii_Unique = true;
	indexInfo->ii_ReadyForInserts = true;
	indexInfo->ii_Concurrent = false;
	indexInfo->ii_BrokenHotChain = false;
	indexInfo->ii_ParallelWorkers = 0;
	indexInfo->ii_Am = BTREE_AM_OID;
	indexInfo->ii_AmCache = NULL;
	indexInfo->ii_Context = CurrentMemoryContext;

	collationObjectId[0] = InvalidOid;
	collationObjectId[1] = InvalidOid;

	classObjectId[0] = OID_BTREE_OPS_OID;
	classObjectId[1] = INT4_BTREE_OPS_OID;

	coloptions[0] = 0;
	coloptions[1] = 0;

	index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid,
				 InvalidOid, InvalidOid,
				 indexInfo,
				 list_make2("chunk_id", "chunk_seq"),
				 BTREE_AM_OID,
				 rel->rd_rel->reltablespace,
				 collationObjectId, classObjectId, coloptions, (Datum) 0,
				 INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL);

	heap_close(toast_rel, NoLock);

	/*
	 * Store the toast table's OID in the parent relation's pg_class row
	 */
	class_rel = heap_open(RelationRelationId, RowExclusiveLock);

	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
	if (!HeapTupleIsValid(reltup))
		elog(ERROR, "cache lookup failed for relation %u", relOid);

	((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;

	if (!IsBootstrapProcessingMode())
	{
		/* normal case, use a transactional update */
		CatalogTupleUpdate(class_rel, &reltup->t_self, reltup);
	}
	else
	{
		/* While bootstrapping, we cannot UPDATE, so overwrite in-place */
		heap_inplace_update(class_rel, reltup);
	}

	heap_freetuple(reltup);

	heap_close(class_rel, RowExclusiveLock);

	/*
	 * Register dependency from the toast table to the master, so that the
	 * toast table will be deleted if the master is.  Skip this in bootstrap
	 * mode.
	 */
	if (!IsBootstrapProcessingMode())
	{
		baseobject.classId = RelationRelationId;
		baseobject.objectId = relOid;
		baseobject.objectSubId = 0;
		toastobject.classId = RelationRelationId;
		toastobject.objectId = toast_relid;
		toastobject.objectSubId = 0;

		recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
	}

	/*
	 * Make changes visible
	 */
	CommandCounterIncrement();

	return true;
}
Exemple #8
0
/*
 * reindex_relation - This routine is used to recreate all indexes
 * of a relation (and its toast relation too, if any).
 *
 * Returns true if any indexes were rebuilt.
 */
bool
reindex_relation(Oid relid)
{
	Relation	rel;
	Oid			toast_relid;
	bool		is_pg_class;
	bool		result;
	List	   *indexIds,
			   *doneIndexes,
			   *indexId;

	/*
	 * Ensure to hold an exclusive lock throughout the transaction. The
	 * lock could perhaps be less intensive (in the non-overwrite case)
	 * but for now it's AccessExclusiveLock for simplicity.
	 */
	rel = heap_open(relid, AccessExclusiveLock);

	toast_relid = rel->rd_rel->reltoastrelid;

	/*
	 * Get the list of index OIDs for this relation.  (We trust to the
	 * relcache to get this with a sequential scan if ignoring system
	 * indexes.)
	 */
	indexIds = RelationGetIndexList(rel);

	/*
	 * reindex_index will attempt to update the pg_class rows for the
	 * relation and index.  If we are processing pg_class itself, we
	 * want to make sure that the updates do not try to insert index
	 * entries into indexes we have not processed yet.  (When we are
	 * trying to recover from corrupted indexes, that could easily
	 * cause a crash.)  We can accomplish this because CatalogUpdateIndexes
	 * will use the relcache's index list to know which indexes to update.
	 * We just force the index list to be only the stuff we've processed.
	 *
	 * It is okay to not insert entries into the indexes we have not
	 * processed yet because all of this is transaction-safe.  If we fail
	 * partway through, the updated rows are dead and it doesn't matter
	 * whether they have index entries.  Also, a new pg_class index will
	 * be created with an entry for its own pg_class row because we do
	 * setNewRelfilenode() before we do index_build().
	 */
	is_pg_class = (RelationGetRelid(rel) == RelOid_pg_class);
	doneIndexes = NIL;

	/* Reindex all the indexes. */
	foreach(indexId, indexIds)
	{
		Oid		indexOid = lfirsto(indexId);

		if (is_pg_class)
			RelationSetIndexList(rel, doneIndexes);

		reindex_index(indexOid);

		CommandCounterIncrement();

		if (is_pg_class)
			doneIndexes = lappendo(doneIndexes, indexOid);
	}
Exemple #9
0
void
_bitmap_create_lov_heapandindex(Relation rel,
								Oid *lovHeapOid,
								Oid *lovIndexOid)
{
	char		lovHeapName[NAMEDATALEN];
	char		lovIndexName[NAMEDATALEN];
	TupleDesc	tupDesc;
	IndexInfo  *indexInfo;
	ObjectAddress	objAddr, referenced;
	Oid		   *classObjectId;
	int16	   *coloptions;
	Oid			heapid;
	Oid			idxid;
	int			indattrs;
	int			i;

	Assert(rel != NULL);

	/* create the new names for the new lov heap and index */
	snprintf(lovHeapName, sizeof(lovHeapName),
			 "pg_bm_%u", RelationGetRelid(rel));
	snprintf(lovIndexName, sizeof(lovIndexName),
			 "pg_bm_%u_index", RelationGetRelid(rel));

	heapid = get_relname_relid(lovHeapName, PG_BITMAPINDEX_NAMESPACE);

	/*
	 * If heapid exists, then this is happening during re-indexing.
	 * We allocate new relfilenodes for lov heap and lov index.
	 *
	 * XXX Each segment db may have different relfilenodes for lov heap and
	 * lov index, which should not be an issue now. Ideally, we would like each
	 * segment db use the same oids.
	 */
	if (OidIsValid(heapid))
	{
		Relation lovHeap;
		Relation lovIndex;
		Buffer btree_metabuf;
		Page   btree_metapage;

		*lovHeapOid = heapid;

		idxid = get_relname_relid(lovIndexName, PG_BITMAPINDEX_NAMESPACE);
		Assert(OidIsValid(idxid));
		*lovIndexOid = idxid;

		lovHeap = heap_open(heapid, AccessExclusiveLock);
		lovIndex = index_open(idxid, AccessExclusiveLock);

		setNewRelfilenode(lovHeap, RecentXmin);
		setNewRelfilenode(lovIndex, RecentXmin);

		/*
		 * After creating the new relfilenode for a btee index, this is not
		 * a btree anymore. We create the new metapage for this btree.
		 */
		btree_metabuf = _bt_getbuf(lovIndex, P_NEW, BT_WRITE);
		Assert (BTREE_METAPAGE == BufferGetBlockNumber(btree_metabuf));
		btree_metapage = BufferGetPage(btree_metabuf);
		_bt_initmetapage(btree_metapage, P_NONE, 0);

		/* XLOG the metapage */

		if (!lovIndex->rd_istemp)
		{
			// Fetch gp_persistent_relation_node information that will be added to XLOG record.
			RelationFetchGpRelationNodeForXLog(lovIndex);

			log_newpage_rel(lovIndex, BufferGetBlockNumber(btree_metabuf), MAIN_FORKNUM,
						btree_metapage);
		}

		/* This cache value is not valid anymore. */
		if (lovIndex->rd_amcache)
		{
			pfree(lovIndex->rd_amcache);
			lovIndex->rd_amcache = NULL;
		}
		MarkBufferDirty(btree_metabuf);
		_bt_relbuf(lovIndex, btree_metabuf);

		index_close(lovIndex, NoLock);
		heap_close(lovHeap, NoLock);

		return;
	}

	/*
	 * create a new empty heap to store all attribute values with their
	 * corresponding block number and offset in LOV.
	 */
	tupDesc = _bitmap_create_lov_heapTupleDesc(rel);

	Assert(rel->rd_rel != NULL);

  	heapid =
		heap_create_with_catalog(lovHeapName, PG_BITMAPINDEX_NAMESPACE,
								 rel->rd_rel->reltablespace,
								 InvalidOid, rel->rd_rel->relowner,
								 tupDesc, NIL,
								 /* relam */ InvalidOid, RELKIND_RELATION, RELSTORAGE_HEAP,
								 rel->rd_rel->relisshared, false, /* bufferPoolBulkLoad */ false, 0,
								 ONCOMMIT_NOOP, NULL /* GP Policy */,
								 (Datum)0, true,
								 /* valid_opts */ true,
						 		 /* persistentTid */ NULL,
						 		 /* persistentSerialNum */ NULL);
	*lovHeapOid = heapid;

	/*
	 * We must bump the command counter to make the newly-created relation
	 * tuple visible for opening.
	 */
	CommandCounterIncrement();

	objAddr.classId = RelationRelationId;
	objAddr.objectId = heapid;
	objAddr.objectSubId = 0 ;

	referenced.classId = RelationRelationId;
	referenced.objectId = RelationGetRelid(rel);
	referenced.objectSubId = 0;

	recordDependencyOn(&objAddr, &referenced, DEPENDENCY_INTERNAL);

	/*
	 * create a btree index on the newly-created heap.
	 * The key includes all attributes to be indexed in this bitmap index.
	 */
	indattrs = tupDesc->natts - 2;
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = indattrs;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = make_ands_implicit(NULL);
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = true;

	classObjectId = (Oid *) palloc(indattrs * sizeof(Oid));
	coloptions = (int16 *) palloc(indattrs * sizeof(int16));
	for (i = 0; i < indattrs; i++)
	{
		Oid typid = tupDesc->attrs[i]->atttypid;

		indexInfo->ii_KeyAttrNumbers[i] = i + 1;
		classObjectId[i] = GetDefaultOpClass(typid, BTREE_AM_OID);
		coloptions[i] = 0;
	}

	idxid = index_create(heapid, lovIndexName, InvalidOid,
						 indexInfo, BTREE_AM_OID,
						 rel->rd_rel->reltablespace,
						 classObjectId, coloptions, 0, false, false, true,
						 false, false, NULL);
	*lovIndexOid = idxid;
}
/*
 * FetchRegularTable fetches the given table's data using the copy out command.
 * The function then fetches the DDL commands necessary to create this table's
 * replica, and locally applies these DDL commands. Last, the function copies
 * the fetched table data into the created table; and on success, returns true.
 * On failure due to connectivity issues with remote node, the function returns
 * false. On other types of failures, the function errors out.
 */
static bool
FetchRegularTable(const char *nodeName, uint32 nodePort, const char *tableName)
{
	StringInfo localFilePath = NULL;
	StringInfo remoteCopyCommand = NULL;
	List *ddlCommandList = NIL;
	ListCell *ddlCommandCell = NULL;
	CopyStmt *localCopyCommand = NULL;
	RangeVar *localTable = NULL;
	uint64 shardId = 0;
	bool received = false;
	StringInfo queryString = NULL;
	const char *tableOwner = NULL;
	Oid tableOwnerId = InvalidOid;
	Oid savedUserId = InvalidOid;
	int savedSecurityContext = 0;
	List *tableNameList = NIL;

	/* copy remote table's data to this node in an idempotent manner */
	shardId = ExtractShardId(tableName);
	localFilePath = makeStringInfo();
	appendStringInfo(localFilePath, "base/%s/%s" UINT64_FORMAT,
					 PG_JOB_CACHE_DIR, TABLE_FILE_PREFIX, shardId);

	remoteCopyCommand = makeStringInfo();
	appendStringInfo(remoteCopyCommand, COPY_OUT_COMMAND, tableName);

	received = ReceiveRegularFile(nodeName, nodePort, remoteCopyCommand, localFilePath);
	if (!received)
	{
		return false;
	}

	/* fetch the ddl commands needed to create the table */
	tableOwner = RemoteTableOwner(nodeName, nodePort, tableName);
	if (tableOwner == NULL)
	{
		return false;
	}
	tableOwnerId = get_role_oid(tableOwner, false);

	/* fetch the ddl commands needed to create the table */
	ddlCommandList = TableDDLCommandList(nodeName, nodePort, tableName);
	if (ddlCommandList == NIL)
	{
		return false;
	}

	/*
	 * Apply DDL commands against the database. Note that on failure from here
	 * on, we immediately error out instead of returning false.  Have to do
	 * this as the table's owner to ensure the local table is created with
	 * compatible permissions.
	 */
	GetUserIdAndSecContext(&savedUserId, &savedSecurityContext);
	SetUserIdAndSecContext(tableOwnerId, SECURITY_LOCAL_USERID_CHANGE);

	foreach(ddlCommandCell, ddlCommandList)
	{
		StringInfo ddlCommand = (StringInfo) lfirst(ddlCommandCell);
		Node *ddlCommandNode = ParseTreeNode(ddlCommand->data);

		ProcessUtility(ddlCommandNode, ddlCommand->data, PROCESS_UTILITY_TOPLEVEL,
					   NULL, None_Receiver, NULL);
		CommandCounterIncrement();
	}
Exemple #11
0
/*
 * intorel_startup --- executor startup
 */
static void
intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo)
{
	DR_intorel *myState = (DR_intorel *) self;
	IntoClause *into = myState->into;
	bool		is_matview;
	char		relkind;
	CreateStmt *create;
	ObjectAddress intoRelationAddr;
	Relation	intoRelationDesc;
	RangeTblEntry *rte;
	Datum		toast_options;
	ListCell   *lc;
	int			attnum;
	static char *validnsps[] = HEAP_RELOPT_NAMESPACES;

	Assert(into != NULL);		/* else somebody forgot to set it */

	/* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */
	is_matview = (into->viewQuery != NULL);
	relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION;

	/*
	 * Create the target relation by faking up a CREATE TABLE parsetree and
	 * passing it to DefineRelation.
	 */
	create = makeNode(CreateStmt);
	create->relation = into->rel;
	create->tableElts = NIL;	/* will fill below */
	create->inhRelations = NIL;
	create->ofTypename = NULL;
	create->constraints = NIL;
	create->options = into->options;
	create->oncommit = into->onCommit;
	create->tablespacename = into->tableSpaceName;
	create->if_not_exists = false;

	/*
	 * Build column definitions using "pre-cooked" type and collation info. If
	 * a column name list was specified in CREATE TABLE AS, override the
	 * column names derived from the query.  (Too few column names are OK, too
	 * many are not.)
	 */
	lc = list_head(into->colNames);
	for (attnum = 0; attnum < typeinfo->natts; attnum++)
	{
		Form_pg_attribute attribute = typeinfo->attrs[attnum];
		ColumnDef  *col = makeNode(ColumnDef);
		TypeName   *coltype = makeNode(TypeName);

		if (lc)
		{
			col->colname = strVal(lfirst(lc));
			lc = lnext(lc);
		}
		else
			col->colname = NameStr(attribute->attname);
		col->typeName = coltype;
		col->inhcount = 0;
		col->is_local = true;
		col->is_not_null = false;
		col->is_from_type = false;
		col->storage = 0;
		col->raw_default = NULL;
		col->cooked_default = NULL;
		col->collClause = NULL;
		col->collOid = attribute->attcollation;
		col->constraints = NIL;
		col->fdwoptions = NIL;
		col->location = -1;

		coltype->names = NIL;
		coltype->typeOid = attribute->atttypid;
		coltype->setof = false;
		coltype->pct_type = false;
		coltype->typmods = NIL;
		coltype->typemod = attribute->atttypmod;
		coltype->arrayBounds = NIL;
		coltype->location = -1;

		/*
		 * It's possible that the column is of a collatable type but the
		 * collation could not be resolved, so double-check.  (We must check
		 * this here because DefineRelation would adopt the type's default
		 * collation rather than complaining.)
		 */
		if (!OidIsValid(col->collOid) &&
			type_is_collatable(coltype->typeOid))
			ereport(ERROR,
					(errcode(ERRCODE_INDETERMINATE_COLLATION),
					 errmsg("no collation was derived for column \"%s\" with collatable type %s",
							col->colname, format_type_be(coltype->typeOid)),
					 errhint("Use the COLLATE clause to set the collation explicitly.")));

		create->tableElts = lappend(create->tableElts, col);
	}

	if (lc != NULL)
		ereport(ERROR,
				(errcode(ERRCODE_SYNTAX_ERROR),
				 errmsg("too many column names were specified")));

	/*
	 * Actually create the target table
	 */
	intoRelationAddr = DefineRelation(create, relkind, InvalidOid, NULL);

	/*
	 * If necessary, create a TOAST table for the target table.  Note that
	 * NewRelationCreateToastTable ends with CommandCounterIncrement(), so
	 * that the TOAST table will be visible for insertion.
	 */
	CommandCounterIncrement();

	/* parse and validate reloptions for the toast table */
	toast_options = transformRelOptions((Datum) 0,
										create->options,
										"toast",
										validnsps,
										true, false);

	(void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true);

	NewRelationCreateToastTable(intoRelationAddr.objectId, toast_options);

	/* Create the "view" part of a materialized view. */
	if (is_matview)
	{
		/* StoreViewQuery scribbles on tree, so make a copy */
		Query	   *query = (Query *) copyObject(into->viewQuery);

		StoreViewQuery(intoRelationAddr.objectId, query, false);
		CommandCounterIncrement();
	}

	/*
	 * Finally we can open the target table
	 */
	intoRelationDesc = heap_open(intoRelationAddr.objectId, AccessExclusiveLock);

	/*
	 * Check INSERT permission on the constructed table.
	 *
	 * XXX: It would arguably make sense to skip this check if into->skipData
	 * is true.
	 */
	rte = makeNode(RangeTblEntry);
	rte->rtekind = RTE_RELATION;
	rte->relid = intoRelationAddr.objectId;
	rte->relkind = relkind;
	rte->requiredPerms = ACL_INSERT;

	for (attnum = 1; attnum <= intoRelationDesc->rd_att->natts; attnum++)
		rte->insertedCols = bms_add_member(rte->insertedCols,
								attnum - FirstLowInvalidHeapAttributeNumber);

	ExecCheckRTPerms(list_make1(rte), true);

	/*
	 * Make sure the constructed table does not have RLS enabled.
	 *
	 * check_enable_rls() will ereport(ERROR) itself if the user has requested
	 * something invalid, and otherwise will return RLS_ENABLED if RLS should
	 * be enabled here.  We don't actually support that currently, so throw
	 * our own ereport(ERROR) if that happens.
	 */
	if (check_enable_rls(intoRelationAddr.objectId, InvalidOid, false) == RLS_ENABLED)
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 (errmsg("policies not yet implemented for this command"))));

	/*
	 * Tentatively mark the target as populated, if it's a matview and we're
	 * going to fill it; otherwise, no change needed.
	 */
	if (is_matview && !into->skipData)
		SetMatViewPopulatedState(intoRelationDesc, true);

	/*
	 * Fill private fields of myState for use by later routines
	 */
	myState->rel = intoRelationDesc;
	myState->output_cid = GetCurrentCommandId(true);

	/* and remember the new relation's address for ExecCreateTableAs */
	CreateAsReladdr = intoRelationAddr;

	/*
	 * We can skip WAL-logging the insertions, unless PITR or streaming
	 * replication is in use. We can skip the FSM in any case.
	 */
	myState->hi_options = HEAP_INSERT_SKIP_FSM |
		(XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL);
	myState->bistate = GetBulkInsertState();

	/* Not using WAL requires smgr_targblock be initially invalid */
	Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber);
}
Exemple #12
0
/*
 * create_toast_table --- internal workhorse
 *
 * rel is already opened and locked
 * toastOid and toastIndexOid are normally InvalidOid, but during
 * bootstrap they can be nonzero to specify hand-assigned OIDs
 */
static bool
create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptions)
{
	Oid			relOid = RelationGetRelid(rel);
	HeapTuple	reltup;
	TupleDesc	tupdesc;
	bool		shared_relation;
	bool		mapped_relation;
	Relation	toast_rel;
	Relation	class_rel;
	Oid			toast_relid;
	Oid			toast_typid = InvalidOid;
	Oid			namespaceid;
	char		toast_relname[NAMEDATALEN];
	char		toast_idxname[NAMEDATALEN];
	IndexInfo  *indexInfo;
	Oid			collationObjectId[2];
	Oid			classObjectId[2];
	int16		coloptions[2];
	ObjectAddress baseobject,
				toastobject;

	/*
	 * Toast table is shared if and only if its parent is.
	 *
	 * We cannot allow toasting a shared relation after initdb (because
	 * there's no way to mark it toasted in other databases' pg_class).
	 */
	shared_relation = rel->rd_rel->relisshared;
	if (shared_relation && !IsBootstrapProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("shared tables cannot be toasted after initdb")));

	/* It's mapped if and only if its parent is, too */
	mapped_relation = RelationIsMapped(rel);

	/*
	 * Is it already toasted?
	 */
	if (rel->rd_rel->reltoastrelid != InvalidOid)
		return false;

	/*
	 * Check to see whether the table actually needs a TOAST table.
	 *
	 * If an update-in-place toast relfilenode is specified, force toast file
	 * creation even if it seems not to need one.
	 */
	if (!needs_toast_table(rel) &&
		(!IsBinaryUpgrade ||
		 !OidIsValid(binary_upgrade_next_toast_pg_class_oid)))
		return false;

	/*
	 * Create the toast table and its index
	 */
	snprintf(toast_relname, sizeof(toast_relname),
			 "pg_toast_%u", relOid);
	snprintf(toast_idxname, sizeof(toast_idxname),
			 "pg_toast_%u_index", relOid);

	/* this is pretty painful...  need a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(3, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "chunk_id",
					   OIDOID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "chunk_seq",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "chunk_data",
					   BYTEAOID,
					   -1, 0);

	/*
	 * Ensure that the toast table doesn't itself get toasted, or we'll be
	 * toast :-(.  This is essential for chunk_data because type bytea is
	 * toastable; hit the other two just to be sure.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';

	/*
	 * Toast tables for regular relations go in pg_toast; those for temp
	 * relations go into the per-backend temp-toast-table namespace.
	 */
	if (isTempOrToastNamespace(rel->rd_rel->relnamespace))
		namespaceid = GetTempToastNamespace();
	else
		namespaceid = PG_TOAST_NAMESPACE;

	/* Use binary-upgrade override for pg_type.oid, if supplied. */
	if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid))
	{
		toast_typid = binary_upgrade_next_toast_pg_type_oid;
		binary_upgrade_next_toast_pg_type_oid = InvalidOid;
	}

	toast_relid = heap_create_with_catalog(toast_relname,
										   namespaceid,
										   rel->rd_rel->reltablespace,
										   toastOid,
										   toast_typid,
										   InvalidOid,
										   rel->rd_rel->relowner,
										   tupdesc,
										   NIL,
										   RELKIND_TOASTVALUE,
										   rel->rd_rel->relpersistence,
										   shared_relation,
										   mapped_relation,
										   true,
										   0,
										   ONCOMMIT_NOOP,
										   reloptions,
										   false,
										   true,
										   true);
	Assert(toast_relid != InvalidOid);

	/* make the toast relation visible, else heap_open will fail */
	CommandCounterIncrement();

	/* ShareLock is not really needed here, but take it anyway */
	toast_rel = heap_open(toast_relid, ShareLock);

	/*
	 * Create unique index on chunk_id, chunk_seq.
	 *
	 * NOTE: the normal TOAST access routines could actually function with a
	 * single-column index on chunk_id only. However, the slice access
	 * routines use both columns for faster access to an individual chunk. In
	 * addition, we want it to be unique as a check against the possibility of
	 * duplicate TOAST chunk OIDs. The index might also be a little more
	 * efficient this way, since btree isn't all that happy with large numbers
	 * of equal keys.
	 */

	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 2;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_ExclusionOps = NULL;
	indexInfo->ii_ExclusionProcs = NULL;
	indexInfo->ii_ExclusionStrats = NULL;
	indexInfo->ii_Unique = true;
	indexInfo->ii_ReadyForInserts = true;
	indexInfo->ii_Concurrent = false;
	indexInfo->ii_BrokenHotChain = false;

	collationObjectId[0] = InvalidOid;
	collationObjectId[1] = InvalidOid;

	classObjectId[0] = OID_BTREE_OPS_OID;
	classObjectId[1] = INT4_BTREE_OPS_OID;

	coloptions[0] = 0;
	coloptions[1] = 0;

	index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid,
				 indexInfo,
				 list_make2("chunk_id", "chunk_seq"),
				 BTREE_AM_OID,
				 rel->rd_rel->reltablespace,
				 collationObjectId, classObjectId, coloptions, (Datum) 0,
				 true, false, false, false,
				 true, false, false, true);

	heap_close(toast_rel, NoLock);

	/*
	 * Store the toast table's OID in the parent relation's pg_class row
	 */
	class_rel = heap_open(RelationRelationId, RowExclusiveLock);

	reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid));
	if (!HeapTupleIsValid(reltup))
		elog(ERROR, "cache lookup failed for relation %u", relOid);

	((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid;

	if (!IsBootstrapProcessingMode())
	{
		/* normal case, use a transactional update */
		simple_heap_update(class_rel, &reltup->t_self, reltup);

		/* Keep catalog indexes current */
		CatalogUpdateIndexes(class_rel, reltup);
	}
	else
	{
		/* While bootstrapping, we cannot UPDATE, so overwrite in-place */
		heap_inplace_update(class_rel, reltup);
	}

	heap_freetuple(reltup);

	heap_close(class_rel, RowExclusiveLock);

	/*
	 * Register dependency from the toast table to the master, so that the
	 * toast table will be deleted if the master is.  Skip this in bootstrap
	 * mode.
	 */
	if (!IsBootstrapProcessingMode())
	{
		baseobject.classId = RelationRelationId;
		baseobject.objectId = relOid;
		baseobject.objectSubId = 0;
		toastobject.classId = RelationRelationId;
		toastobject.objectId = toast_relid;
		toastobject.objectSubId = 0;

		recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
	}

	/*
	 * Make changes visible
	 */
	CommandCounterIncrement();

	return true;
}
Exemple #13
0
/* ----------------------------------------------------------------
 *		index_create
 *
 * Returns OID of the created index.
 * ----------------------------------------------------------------
 */
Oid
index_create(Oid heapRelationId,
			 const char *indexRelationName,
			 IndexInfo *indexInfo,
			 Oid accessMethodObjectId,
			 Oid *classObjectId,
			 bool primary,
			 bool isconstraint,
			 bool allow_system_table_mods)
{
	Relation	heapRelation;
	Relation	indexRelation;
	TupleDesc	indexTupDesc;
	bool		shared_relation;
	Oid			namespaceId;
	Oid			indexoid;
	int			i;

	/*
	 * Only SELECT ... FOR UPDATE are allowed while doing this
	 */
	heapRelation = heap_open(heapRelationId, ShareLock);

	/*
	 * The index will be in the same namespace as its parent table, and is
	 * shared across databases if and only if the parent is.
	 */
	namespaceId = RelationGetNamespace(heapRelation);
	shared_relation = heapRelation->rd_rel->relisshared;

	/*
	 * check parameters
	 */
	if (indexInfo->ii_NumIndexAttrs < 1)
		elog(ERROR, "must index at least one column");

	if (!allow_system_table_mods &&
		IsSystemRelation(heapRelation) &&
		IsNormalProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
				 errmsg("user-defined indexes on system catalog tables are not supported")));

	/*
	 * We cannot allow indexing a shared relation after initdb (because
	 * there's no way to make the entry in other databases' pg_class).
	 * Unfortunately we can't distinguish initdb from a manually started
	 * standalone backend.	However, we can at least prevent this mistake
	 * under normal multi-user operation.
	 */
	if (shared_relation && IsUnderPostmaster)
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
			   errmsg("shared indexes cannot be created after initdb")));

	if (get_relname_relid(indexRelationName, namespaceId))
		ereport(ERROR,
				(errcode(ERRCODE_DUPLICATE_TABLE),
				 errmsg("relation \"%s\" already exists",
						indexRelationName)));

	/*
	 * construct tuple descriptor for index tuples
	 */
	indexTupDesc = ConstructTupleDescriptor(heapRelation,
											indexInfo,
											classObjectId);

	/*
	 * create the index relation's relcache entry and physical disk file.
	 * (If we fail further down, it's the smgr's responsibility to remove
	 * the disk file again.)
	 */
	indexRelation = heap_create(indexRelationName,
								namespaceId,
								indexTupDesc,
								shared_relation,
								true,
								allow_system_table_mods);

	/* Fetch the relation OID assigned by heap_create */
	indexoid = RelationGetRelid(indexRelation);

	/*
	 * Obtain exclusive lock on it.  Although no other backends can see it
	 * until we commit, this prevents deadlock-risk complaints from lock
	 * manager in cases such as CLUSTER.
	 */
	LockRelation(indexRelation, AccessExclusiveLock);

	/*
	 * Fill in fields of the index's pg_class entry that are not set
	 * correctly by heap_create.
	 *
	 * XXX should have a cleaner way to create cataloged indexes
	 */
	indexRelation->rd_rel->relowner = GetUserId();
	indexRelation->rd_rel->relam = accessMethodObjectId;
	indexRelation->rd_rel->relkind = RELKIND_INDEX;
	indexRelation->rd_rel->relhasoids = false;

	/*
	 * store index's pg_class entry
	 */
	UpdateRelationRelation(indexRelation);

	/*
	 * now update the object id's of all the attribute tuple forms in the
	 * index relation's tuple descriptor
	 */
	InitializeAttributeOids(indexRelation,
							indexInfo->ii_NumIndexAttrs,
							indexoid);

	/*
	 * append ATTRIBUTE tuples for the index
	 */
	AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs);

	/* ----------------
	 *	  update pg_index
	 *	  (append INDEX tuple)
	 *
	 *	  Note that this stows away a representation of "predicate".
	 *	  (Or, could define a rule to maintain the predicate) --Nels, Feb '92
	 * ----------------
	 */
	UpdateIndexRelation(indexoid, heapRelationId, indexInfo,
						classObjectId, primary);

	/*
	 * Register constraint and dependencies for the index.
	 *
	 * If the index is from a CONSTRAINT clause, construct a pg_constraint
	 * entry.  The index is then linked to the constraint, which in turn
	 * is linked to the table.	If it's not a CONSTRAINT, make the
	 * dependency directly on the table.
	 *
	 * We don't need a dependency on the namespace, because there'll be an
	 * indirect dependency via our parent table.
	 *
	 * During bootstrap we can't register any dependencies, and we don't try
	 * to make a constraint either.
	 */
	if (!IsBootstrapProcessingMode())
	{
		ObjectAddress myself,
					referenced;

		myself.classId = RelOid_pg_class;
		myself.objectId = indexoid;
		myself.objectSubId = 0;

		if (isconstraint)
		{
			char		constraintType;
			Oid			conOid;

			if (primary)
				constraintType = CONSTRAINT_PRIMARY;
			else if (indexInfo->ii_Unique)
				constraintType = CONSTRAINT_UNIQUE;
			else
			{
				elog(ERROR, "constraint must be PRIMARY or UNIQUE");
				constraintType = 0;		/* keep compiler quiet */
			}

			/* Shouldn't have any expressions */
			if (indexInfo->ii_Expressions)
				elog(ERROR, "constraints can't have index expressions");

			conOid = CreateConstraintEntry(indexRelationName,
										   namespaceId,
										   constraintType,
										   false,		/* isDeferrable */
										   false,		/* isDeferred */
										   heapRelationId,
										   indexInfo->ii_KeyAttrNumbers,
										   indexInfo->ii_NumIndexAttrs,
										   InvalidOid,	/* no domain */
										   InvalidOid,	/* no foreign key */
										   NULL,
										   0,
										   ' ',
										   ' ',
										   ' ',
										   InvalidOid,	/* no associated index */
										   NULL,		/* no check constraint */
										   NULL,
										   NULL);

			referenced.classId = get_system_catalog_relid(ConstraintRelationName);
			referenced.objectId = conOid;
			referenced.objectSubId = 0;

			recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL);
		}
		else
		{
			/* Create auto dependencies on simply-referenced columns */
			for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
			{
				if (indexInfo->ii_KeyAttrNumbers[i] != 0)
				{
					referenced.classId = RelOid_pg_class;
					referenced.objectId = heapRelationId;
					referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i];

					recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO);
				}
			}
		}

		/* Store dependency on operator classes */
		referenced.classId = get_system_catalog_relid(OperatorClassRelationName);
		for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++)
		{
			referenced.objectId = classObjectId[i];
			referenced.objectSubId = 0;

			recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL);
		}

		/* Store dependencies on anything mentioned in index expressions */
		if (indexInfo->ii_Expressions)
		{
			recordDependencyOnSingleRelExpr(&myself,
									  (Node *) indexInfo->ii_Expressions,
											heapRelationId,
											DEPENDENCY_NORMAL,
											DEPENDENCY_AUTO);
		}

		/* Store dependencies on anything mentioned in predicate */
		if (indexInfo->ii_Predicate)
		{
			recordDependencyOnSingleRelExpr(&myself,
										(Node *) indexInfo->ii_Predicate,
											heapRelationId,
											DEPENDENCY_NORMAL,
											DEPENDENCY_AUTO);
		}
	}

	/*
	 * Advance the command counter so that we can see the newly-entered
	 * catalog tuples for the index.
	 */
	CommandCounterIncrement();

	/*
	 * In bootstrap mode, we have to fill in the index strategy structure
	 * with information from the catalogs.  If we aren't bootstrapping,
	 * then the relcache entry has already been rebuilt thanks to sinval
	 * update during CommandCounterIncrement.
	 */
	if (IsBootstrapProcessingMode())
		RelationInitIndexAccessInfo(indexRelation);
	else
		Assert(indexRelation->rd_indexcxt != NULL);

	/*
	 * If this is bootstrap (initdb) time, then we don't actually fill in
	 * the index yet.  We'll be creating more indexes and classes later,
	 * so we delay filling them in until just before we're done with
	 * bootstrapping.  Otherwise, we call the routine that constructs the
	 * index.
	 *
	 * In normal processing mode, the heap and index relations are closed by
	 * index_build() --- but we continue to hold the ShareLock on the heap
	 * and the exclusive lock on the index that we acquired above, until
	 * end of transaction.
	 */
	if (IsBootstrapProcessingMode())
	{
		index_register(heapRelationId, indexoid, indexInfo);
		/* XXX shouldn't we close the heap and index rels here? */
	}
	else
		index_build(heapRelation, indexRelation, indexInfo);

	return indexoid;
}
Exemple #14
0
/*
 * CREATE SCHEMA
 */
void
CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString)
{
	const char *schemaName = stmt->schemaname;
	const char *authId = stmt->authid;
	Oid			namespaceId;
	OverrideSearchPath *overridePath;
	List	   *parsetree_list;
	ListCell   *parsetree_item;
	Oid			owner_uid;
	Oid			saved_uid;
	int			save_sec_context;
	AclResult	aclresult;

	GetUserIdAndSecContext(&saved_uid, &save_sec_context);

	/*
	 * Who is supposed to own the new schema?
	 */
	if (authId)
		owner_uid = get_role_oid(authId, false);
	else
		owner_uid = saved_uid;

	/*
	 * To create a schema, must have schema-create privilege on the current
	 * database and must be able to become the target role (this does not
	 * imply that the target role itself must have create-schema privilege).
	 * The latter provision guards against "giveaway" attacks.	Note that a
	 * superuser will always have both of these privileges a fortiori.
	 */
	aclresult = pg_database_aclcheck(MyDatabaseId, saved_uid, ACL_CREATE);
	if (aclresult != ACLCHECK_OK)
		aclcheck_error(aclresult, ACL_KIND_DATABASE,
					   get_database_name(MyDatabaseId));

	check_is_member_of_role(saved_uid, owner_uid);

	/* Additional check to protect reserved schema names */
	if (!allowSystemTableMods && IsReservedName(schemaName))
		ereport(ERROR,
				(errcode(ERRCODE_RESERVED_NAME),
				 errmsg("unacceptable schema name \"%s\"", schemaName),
		   errdetail("The prefix \"pg_\" is reserved for system schemas.")));

	/*
	 * If if_not_exists was given and the schema already exists, bail out.
	 * (Note: we needn't check this when not if_not_exists, because
	 * NamespaceCreate will complain anyway.)  We could do this before making
	 * the permissions checks, but since CREATE TABLE IF NOT EXISTS makes its
	 * creation-permission check first, we do likewise.
	 */
	if (stmt->if_not_exists &&
		SearchSysCacheExists1(NAMESPACENAME, PointerGetDatum(schemaName)))
	{
		ereport(NOTICE,
				(errcode(ERRCODE_DUPLICATE_SCHEMA),
				 errmsg("schema \"%s\" already exists, skipping",
						schemaName)));
		return;
	}

	/*
	 * If the requested authorization is different from the current user,
	 * temporarily set the current user so that the object(s) will be created
	 * with the correct ownership.
	 *
	 * (The setting will be restored at the end of this routine, or in case of
	 * error, transaction abort will clean things up.)
	 */
	if (saved_uid != owner_uid)
		SetUserIdAndSecContext(owner_uid,
							save_sec_context | SECURITY_LOCAL_USERID_CHANGE);

	/* Create the schema's namespace */
	namespaceId = NamespaceCreate(schemaName, owner_uid, false);

	/* Advance cmd counter to make the namespace visible */
	CommandCounterIncrement();

	/*
	 * Temporarily make the new namespace be the front of the search path, as
	 * well as the default creation target namespace.  This will be undone at
	 * the end of this routine, or upon error.
	 */
	overridePath = GetOverrideSearchPath(CurrentMemoryContext);
	overridePath->schemas = lcons_oid(namespaceId, overridePath->schemas);
	/* XXX should we clear overridePath->useTemp? */
	PushOverrideSearchPath(overridePath);

	/*
	 * Examine the list of commands embedded in the CREATE SCHEMA command, and
	 * reorganize them into a sequentially executable order with no forward
	 * references.	Note that the result is still a list of raw parsetrees ---
	 * we cannot, in general, run parse analysis on one statement until we
	 * have actually executed the prior ones.
	 */
	parsetree_list = transformCreateSchemaStmt(stmt);

	/*
	 * Execute each command contained in the CREATE SCHEMA.  Since the grammar
	 * allows only utility commands in CREATE SCHEMA, there is no need to pass
	 * them through parse_analyze() or the rewriter; we can just hand them
	 * straight to ProcessUtility.
	 */
	foreach(parsetree_item, parsetree_list)
	{
		Node	   *stmt = (Node *) lfirst(parsetree_item);

		/* do this step */
		ProcessUtility(stmt,
					   queryString,
					   NULL,
					   None_Receiver,
					   NULL,
					   PROCESS_UTILITY_SUBCOMMAND);
		/* make sure later steps can see the object created here */
		CommandCounterIncrement();
	}
Exemple #15
0
/**
 * @fn      Datum reorg_swap(PG_FUNCTION_ARGS)
 * @brief   Swapping relfilenode of tables and relation ids of toast tables
 *          and toast indexes.
 *
 * reorg_swap(oid, relname)
 *
 * TODO: remove useless CommandCounterIncrement().
 *
 * @param	oid		Oid of table of target.
 * @retval			None.
 */
Datum
reorg_swap(PG_FUNCTION_ARGS)
{
	Oid				oid = PG_GETARG_OID(0);
	const char	   *relname = get_quoted_relname(oid);
	const char	   *nspname = get_quoted_nspname(oid);
	Oid 			argtypes[1] = { OIDOID };
	bool	 		nulls[1] = { 0 };
	Datum	 		values[1];
	SPITupleTable  *tuptable;
	TupleDesc		desc;
	HeapTuple		tuple;
	uint32			records;
	uint32			i;

	Oid				reltoastrelid1;
	Oid				reltoastidxid1;
	Oid				oid2;
	Oid				reltoastrelid2;
	Oid				reltoastidxid2;
	Oid				owner1;
	Oid				owner2;

	/* authority check */
	must_be_superuser("reorg_swap");

	/* connect to SPI manager */
	reorg_init();

	/* swap relfilenode and dependencies for tables. */
	values[0] = ObjectIdGetDatum(oid);
	execute_with_args(SPI_OK_SELECT,
		"SELECT X.reltoastrelid, TX.reltoastidxid, X.relowner,"
		"       Y.oid, Y.reltoastrelid, TY.reltoastidxid, Y.relowner"
		"  FROM pg_catalog.pg_class X LEFT JOIN pg_catalog.pg_class TX"
		"         ON X.reltoastrelid = TX.oid,"
		"       pg_catalog.pg_class Y LEFT JOIN pg_catalog.pg_class TY"
		"         ON Y.reltoastrelid = TY.oid"
		" WHERE X.oid = $1"
		"   AND Y.oid = ('reorg.table_' || X.oid)::regclass",
		1, argtypes, values, nulls);

	tuptable = SPI_tuptable;
	desc = tuptable->tupdesc;
	records = SPI_processed;

	if (records == 0)
		elog(ERROR, "reorg_swap : no swap target");

	tuple = tuptable->vals[0];

	reltoastrelid1 = getoid(tuple, desc, 1);
	reltoastidxid1 = getoid(tuple, desc, 2);
	owner1 = getoid(tuple, desc, 3);
	oid2 = getoid(tuple, desc, 4);
	reltoastrelid2 = getoid(tuple, desc, 5);
	reltoastidxid2 = getoid(tuple, desc, 6);
	owner2 = getoid(tuple, desc, 7);

	/* change owner of new relation to original owner */
	if (owner1 != owner2)
	{
		ATExecChangeOwner(oid2, owner1, true, AccessExclusiveLock);
		CommandCounterIncrement();
	}

	/* swap tables. */
	swap_heap_or_index_files(oid, oid2);
	CommandCounterIncrement();

	/* swap indexes. */
	values[0] = ObjectIdGetDatum(oid);
	execute_with_args(SPI_OK_SELECT,
		"SELECT X.oid, Y.oid"
		"  FROM pg_catalog.pg_index I,"
		"       pg_catalog.pg_class X,"
		"       pg_catalog.pg_class Y"
		" WHERE I.indrelid = $1"
		"   AND I.indexrelid = X.oid"
		"   AND I.indisvalid"
		"   AND Y.oid = ('reorg.index_' || X.oid)::regclass",
		1, argtypes, values, nulls);

	tuptable = SPI_tuptable;
	desc = tuptable->tupdesc;
	records = SPI_processed;

	for (i = 0; i < records; i++)
	{
		Oid		idx1, idx2;

		tuple = tuptable->vals[i];
		idx1 = getoid(tuple, desc, 1);
		idx2 = getoid(tuple, desc, 2);
		swap_heap_or_index_files(idx1, idx2);

		CommandCounterIncrement();
	}

	/* swap names for toast tables and toast indexes */
	if (reltoastrelid1 == InvalidOid)
	{
		if (reltoastidxid1 != InvalidOid ||
			reltoastrelid2 != InvalidOid ||
			reltoastidxid2 != InvalidOid)
			elog(ERROR, "reorg_swap : unexpected toast relations (T1=%u, I1=%u, T2=%u, I2=%u",
				reltoastrelid1, reltoastidxid1, reltoastrelid2, reltoastidxid2);
		/* do nothing */
	}
	else if (reltoastrelid2 == InvalidOid)
	{
		char	name[NAMEDATALEN];

		if (reltoastidxid1 == InvalidOid ||
			reltoastidxid2 != InvalidOid)
			elog(ERROR, "reorg_swap : unexpected toast relations (T1=%u, I1=%u, T2=%u, I2=%u",
				reltoastrelid1, reltoastidxid1, reltoastrelid2, reltoastidxid2);

		/* rename X to Y */
		snprintf(name, NAMEDATALEN, "pg_toast_%u", oid2);
		RENAME_REL(reltoastrelid1, name);
		snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid2);
		RENAME_REL(reltoastidxid1, name);
		CommandCounterIncrement();
	}
	else if (reltoastrelid1 != InvalidOid)
	{
		char	name[NAMEDATALEN];
		int		pid = getpid();

		/* rename X to TEMP */
		snprintf(name, NAMEDATALEN, "pg_toast_pid%d", pid);
		RENAME_REL(reltoastrelid1, name);
		snprintf(name, NAMEDATALEN, "pg_toast_pid%d_index", pid);
		RENAME_REL(reltoastidxid1, name);
		CommandCounterIncrement();

		/* rename Y to X */
		snprintf(name, NAMEDATALEN, "pg_toast_%u", oid);
		RENAME_REL(reltoastrelid2, name);
		snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid);
		RENAME_REL(reltoastidxid2, name);
		CommandCounterIncrement();

		/* rename TEMP to Y */
		snprintf(name, NAMEDATALEN, "pg_toast_%u", oid2);
		RENAME_REL(reltoastrelid1, name);
		snprintf(name, NAMEDATALEN, "pg_toast_%u_index", oid2);
		RENAME_REL(reltoastidxid1, name);
		CommandCounterIncrement();
	}

	/* drop reorg trigger */
	execute_with_format(
		SPI_OK_UTILITY,
		"DROP TRIGGER IF EXISTS z_reorg_trigger ON %s.%s CASCADE",
		nspname, relname);

	SPI_finish();

	PG_RETURN_VOID();
}
Exemple #16
0
/*
 * create_aoblkdir_table
 *
 * rel is already opened and exclusive-locked.
 * comptypeOid is InvalidOid.
 */
static bool
create_aoblkdir_table(Relation rel, Oid aoblkdirOid,
					  Oid aoblkdirIndexOid, Oid *comptypeOid)
{
	Oid relOid = RelationGetRelid(rel);
	Oid	aoblkdir_relid;
	Oid	aoblkdir_idxid;
	bool shared_relation = rel->rd_rel->relisshared;
	char aoblkdir_relname[NAMEDATALEN];
	char aoblkdir_idxname[NAMEDATALEN];
	TupleDesc	tupdesc;
	IndexInfo  *indexInfo;
	Oid			classObjectId[3];
	ObjectAddress baseobject;
	ObjectAddress aoblkdirobject;
	Oid			tablespaceOid = ChooseTablespaceForLimitedObject(rel->rd_rel->reltablespace);

	if (!RelationIsAoRows(rel))
		return false;
	
	/*
	 * We cannot allow creating a block directory for a shared relation
	 * after initdb (because there's no way to let other databases know
	 * this block directory.
	 */
	if (shared_relation && !IsBootstrapProcessingMode())
		ereport(ERROR,
				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
				 errmsg("shared tables cannot have block directory after initdb")));

	GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL,NULL, &aoblkdir_relid, &aoblkdir_idxid);

	/*
	 * Does it have a block directory?
	 */
	if (aoblkdir_relid != InvalidOid)
	{
		return false;
	}

	snprintf(aoblkdir_relname, sizeof(aoblkdir_relname),
			 "pg_aoblkdir_%u", relOid);
	snprintf(aoblkdir_idxname, sizeof(aoblkdir_idxname),
			 "pg_aoblkdir_%u_index", relOid);
	
	/* Create a tuple descriptor */
	tupdesc = CreateTemplateTupleDesc(4, false);
	TupleDescInitEntry(tupdesc, (AttrNumber) 1,
					   "segno",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 2,
					   "columngroup_no",
					   INT4OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 3,
					   "first_row_no",
					   INT8OID,
					   -1, 0);
	TupleDescInitEntry(tupdesc, (AttrNumber) 4,
					   "minipage",
					   VARBITOID,
					   -1, 0);
	/*
	 * We don't want any toast columns here.
	 */
	tupdesc->attrs[0]->attstorage = 'p';
	tupdesc->attrs[1]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';
	tupdesc->attrs[2]->attstorage = 'p';

	/*
	 * We place aoblkdir relation in the pg_aoseg namespace
	 * even if its master relation is a temp table. There cannot be
	 * any naming collision, and the aoblkdir relation will be
	 * destroyed when its master is, so there is no need to handle
	 * the aoblkdir relation as temp.
	 */
	aoblkdir_relid = heap_create_with_catalog(aoblkdir_relname,
											  PG_AOSEGMENT_NAMESPACE,
											  tablespaceOid,
											  aoblkdirOid,
											  rel->rd_rel->relowner,
											  tupdesc,
											  /* relam */ InvalidOid,
											  RELKIND_AOBLOCKDIR,
											  RELSTORAGE_HEAP,
											  shared_relation,
											  true,
											  /* bufferPoolBulkLoad */ false,
											  0,
											  ONCOMMIT_NOOP,
											  NULL, /* GP Policy */
											  (Datum) 0,
											  true,
											  comptypeOid,
						 					  /* persistentTid */ NULL,
						 					  /* persistentSerialNum */ NULL);
	
	/* Make this table visible, else index creation will fail */
	CommandCounterIncrement();
	
	/*
	 * Create index on segno, first_row_no.
	 */
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = 3;
	indexInfo->ii_KeyAttrNumbers[0] = 1;
	indexInfo->ii_KeyAttrNumbers[1] = 2;
	indexInfo->ii_KeyAttrNumbers[2] = 3;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = NIL;
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = false;
	indexInfo->ii_Concurrent = false;
	
	classObjectId[0] = INT4_BTREE_OPS_OID;
	classObjectId[1] = INT4_BTREE_OPS_OID;
	classObjectId[2] = INT8_BTREE_OPS_OID;

	aoblkdir_idxid = index_create(aoblkdirOid, aoblkdir_idxname, aoblkdirIndexOid,
								  indexInfo,
								  BTREE_AM_OID,
								  tablespaceOid,
								  classObjectId, (Datum) 0,
								  true, false, (Oid *) NULL, true, false, false, NULL);
	
	/* Unlock target table -- no one can see it */
	UnlockRelationOid(aoblkdirOid, ShareLock);
	/* Unlock the index -- no one can see it anyway */
	UnlockRelationOid(aoblkdirIndexOid, AccessExclusiveLock);

	/*
	 * Store the aoblkdir table's OID in the parent relation's pg_appendonly row.
	 */
	UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, InvalidOid,
								 aoblkdir_relid, aoblkdir_idxid);

	/*
	 * Register dependency from the aoseg table to the master, so that the
	 * aoseg table will be deleted if the master is.
	 */
	baseobject.classId = RelationRelationId;
	baseobject.objectId = relOid;
	baseobject.objectSubId = 0;
	aoblkdirobject.classId = RelationRelationId;
	aoblkdirobject.objectId = aoblkdirOid;
	aoblkdirobject.objectSubId = 0;

	recordDependencyOn(&aoblkdirobject, &baseobject, DEPENDENCY_INTERNAL);

	/*
	 * Make changes visible
	 */
	CommandCounterIncrement();

	return true;
}
Exemple #17
0
/* ----------------
 *		UpdateStats
 *
 * Update pg_class' relpages and reltuples statistics for the given relation
 * (which can be either a table or an index).  Note that this is not used
 * in the context of VACUUM.
 * ----------------
 */
void
UpdateStats(Oid relid, double reltuples)
{
	Relation	whichRel;
	Relation	pg_class;
	HeapTuple	tuple;
	BlockNumber relpages;
	Form_pg_class rd_rel;
	HeapScanDesc pg_class_scan = NULL;
	bool		in_place_upd;

	/*
	 * This routine handles updates for both the heap and index relation
	 * statistics.	In order to guarantee that we're able to *see* the
	 * index relation tuple, we bump the command counter id here.  The
	 * index relation tuple was created in the current transaction.
	 */
	CommandCounterIncrement();

	/*
	 * CommandCounterIncrement() flushes invalid cache entries, including
	 * those for the heap and index relations for which we're updating
	 * statistics.	Now that the cache is flushed, it's safe to open the
	 * relation again.	We need the relation open in order to figure out
	 * how many blocks it contains.
	 */

	/*
	 * Grabbing lock here is probably redundant ...
	 */
	whichRel = relation_open(relid, ShareLock);

	/*
	 * Find the tuple to update in pg_class.  Normally we make a copy of
	 * the tuple using the syscache, modify it, and apply heap_update.
	 * But in bootstrap mode we can't use heap_update, so we cheat and
	 * overwrite the tuple in-place.
	 *
	 * We also must cheat if reindexing pg_class itself, because the
	 * target index may presently not be part of the set of indexes that
	 * CatalogUpdateIndexes would update (see reindex_relation).  In this
	 * case the stats updates will not be WAL-logged and so could be lost
	 * in a crash.  This seems OK considering VACUUM does the same thing.
	 */
	pg_class = heap_openr(RelationRelationName, RowExclusiveLock);

	in_place_upd = IsBootstrapProcessingMode() ||
		ReindexIsProcessingHeap(RelationGetRelid(pg_class));

	if (!in_place_upd)
	{
		tuple = SearchSysCacheCopy(RELOID,
								   ObjectIdGetDatum(relid),
								   0, 0, 0);
	}
	else
	{
		ScanKeyData key[1];

		ScanKeyEntryInitialize(&key[0], 0,
							   ObjectIdAttributeNumber,
							   F_OIDEQ,
							   ObjectIdGetDatum(relid));

		pg_class_scan = heap_beginscan(pg_class, SnapshotNow, 1, key);
		tuple = heap_getnext(pg_class_scan, ForwardScanDirection);
	}

	if (!HeapTupleIsValid(tuple))
		elog(ERROR, "could not find tuple for relation %u", relid);
	rd_rel = (Form_pg_class) GETSTRUCT(tuple);

	/*
	 * Figure values to insert.
	 *
	 * If we found zero tuples in the scan, do NOT believe it; instead put a
	 * bogus estimate into the statistics fields.  Otherwise, the common
	 * pattern "CREATE TABLE; CREATE INDEX; insert data" leaves the table
	 * with zero size statistics until a VACUUM is done.  The optimizer
	 * will generate very bad plans if the stats claim the table is empty
	 * when it is actually sizable.  See also CREATE TABLE in heap.c.
	 *
	 * Note: this path is also taken during bootstrap, because bootstrap.c
	 * passes reltuples = 0 after loading a table.	We have to estimate
	 * some number for reltuples based on the actual number of pages.
	 */
	relpages = RelationGetNumberOfBlocks(whichRel);

	if (reltuples == 0)
	{
		if (relpages == 0)
		{
			/* Bogus defaults for a virgin table, same as heap.c */
			reltuples = 1000;
			relpages = 10;
		}
		else if (whichRel->rd_rel->relkind == RELKIND_INDEX && relpages <= 2)
		{
			/* Empty index, leave bogus defaults in place */
			reltuples = 1000;
		}
		else
			reltuples = ((double) relpages) * NTUPLES_PER_PAGE(whichRel->rd_rel->relnatts);
	}

	/*
	 * Update statistics in pg_class, if they changed.	(Avoiding an
	 * unnecessary update is not just a tiny performance improvement; it
	 * also reduces the window wherein concurrent CREATE INDEX commands
	 * may conflict.)
	 */
	if (rd_rel->relpages != (int32) relpages ||
		rd_rel->reltuples != (float4) reltuples)
	{
		if (in_place_upd)
		{
			/* Bootstrap or reindex case: overwrite fields in place. */
			LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_EXCLUSIVE);
			rd_rel->relpages = (int32) relpages;
			rd_rel->reltuples = (float4) reltuples;
			LockBuffer(pg_class_scan->rs_cbuf, BUFFER_LOCK_UNLOCK);
			WriteNoReleaseBuffer(pg_class_scan->rs_cbuf);
			if (!IsBootstrapProcessingMode())
				CacheInvalidateHeapTuple(pg_class, tuple);
		}
		else
		{
			/* During normal processing, must work harder. */
			rd_rel->relpages = (int32) relpages;
			rd_rel->reltuples = (float4) reltuples;
			simple_heap_update(pg_class, &tuple->t_self, tuple);
			CatalogUpdateIndexes(pg_class, tuple);
		}
	}

	if (!pg_class_scan)
		heap_freetuple(tuple);
	else
		heap_endscan(pg_class_scan);

	/*
	 * We shouldn't have to do this, but we do...  Modify the reldesc in
	 * place with the new values so that the cache contains the latest
	 * copy.  (XXX is this really still necessary?	The relcache will get
	 * fixed at next CommandCounterIncrement, so why bother here?)
	 */
	whichRel->rd_rel->relpages = (int32) relpages;
	whichRel->rd_rel->reltuples = (float4) reltuples;

	heap_close(pg_class, RowExclusiveLock);
	relation_close(whichRel, NoLock);
}