示例#1
0
/*
 *	btbuildempty() -- build an empty btree index in the initialization fork
 */
Datum
btbuildempty(PG_FUNCTION_ARGS)
{
	Relation	index = (Relation) PG_GETARG_POINTER(0);
	Page		metapage;

	/* Construct metapage. */
	metapage = (Page) palloc(BLCKSZ);
	_bt_initmetapage(metapage, P_NONE, 0);

	/* Write the page.	If archiving/streaming, XLOG it. */
	smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
			  (char *) metapage, true);
	if (XLogIsNeeded())
		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
					BTREE_METAPAGE, metapage);

	/*
	 * An immediate sync is require even if we xlog'd the page, because the
	 * write did not go through shared_buffers and therefore a concurrent
	 * checkpoint may have move the redo pointer past our xlog record.
	 */
	smgrimmedsync(index->rd_smgr, INIT_FORKNUM);

	PG_RETURN_VOID();
}
示例#2
0
/*
 *	btbuildempty() -- build an empty btree index in the initialization fork
 */
void
btbuildempty(Relation index)
{
	Page		metapage;

	/* Construct metapage. */
	metapage = (Page) palloc(BLCKSZ);
	_bt_initmetapage(metapage, P_NONE, 0);

	/*
	 * Write the page and log it.  It might seem that an immediate sync would
	 * be sufficient to guarantee that the file exists on disk, but recovery
	 * itself might remove it while replaying, for example, an
	 * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record.  Therefore, we need
	 * this even when wal_level=minimal.
	 */
	PageSetChecksumInplace(metapage, BTREE_METAPAGE);
	smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
			  (char *) metapage, true);
	log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
				BTREE_METAPAGE, metapage, true);

	/*
	 * An immediate sync is required even if we xlog'd the page, because the
	 * write did not go through shared_buffers and therefore a concurrent
	 * checkpoint may have moved the redo pointer past our xlog record.
	 */
	smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
}
示例#3
0
/*
 *	_bt_metapinit() -- Initialize the metadata page of a new btree.
 *
 * Note: this is actually not used for standard btree index building;
 * nbtsort.c prefers not to make the metadata page valid until completion
 * of build.
 *
 * Note: there's no real need for any locking here.  Since the transaction
 * creating the index hasn't committed yet, no one else can even see the index
 * much less be trying to use it.  (In a REINDEX-in-place scenario, that's
 * not true, but we assume the caller holds sufficient locks on the index.)
 */
void
_bt_metapinit(Relation rel)
{
	Buffer		buf;
	Page		pg;
	BTMetaPageData *metad;

	if (RelationGetNumberOfBlocks(rel) != 0)
		elog(ERROR, "cannot initialize non-empty btree index \"%s\"",
			 RelationGetRelationName(rel));

	buf = ReadBuffer(rel, P_NEW);
	Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE);
	pg = BufferGetPage(buf);

	_bt_initmetapage(pg, P_NONE, 0);
	metad = BTPageGetMeta(pg);

	/* NO ELOG(ERROR) from here till newmeta op is logged */
	START_CRIT_SECTION();

	/* XLOG stuff */
	if (!rel->rd_istemp)
	{
		xl_btree_newmeta xlrec;
		XLogRecPtr	recptr;
		XLogRecData rdata[1];

		xlrec.node = rel->rd_node;
		xlrec.meta.root = metad->btm_root;
		xlrec.meta.level = metad->btm_level;
		xlrec.meta.fastroot = metad->btm_fastroot;
		xlrec.meta.fastlevel = metad->btm_fastlevel;

		rdata[0].data = (char *) &xlrec;
		rdata[0].len = SizeOfBtreeNewmeta;
		rdata[0].buffer = InvalidBuffer;
		rdata[0].next = NULL;

		recptr = XLogInsert(RM_BTREE_ID,
							XLOG_BTREE_NEWMETA,
							rdata);

		PageSetLSN(pg, recptr);
		PageSetTLI(pg, ThisTimeLineID);
	}

	END_CRIT_SECTION();

	WriteBuffer(buf);
}
示例#4
0
/*
 *	btbuildempty() -- build an empty btree index in the initialization fork
 */
void
btbuildempty(Relation index)
{
	Page		metapage;

	/* Construct metapage. */
	metapage = (Page) palloc(BLCKSZ);
	_bt_initmetapage(metapage, P_NONE, 0);

	/* Write the page.  If archiving/streaming, XLOG it. */
	PageSetChecksumInplace(metapage, BTREE_METAPAGE);
	smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE,
			  (char *) metapage, true);
	if (XLogIsNeeded())
		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
					BTREE_METAPAGE, metapage, false);

	/*
	 * An immediate sync is required even if we xlog'd the page, because the
	 * write did not go through shared_buffers and therefore a concurrent
	 * checkpoint may have moved the redo pointer past our xlog record.
	 */
	smgrimmedsync(index->rd_smgr, INIT_FORKNUM);
}
示例#5
0
/*
 * Finish writing out the completed btree.
 */
static void
_bt_uppershutdown(BTWriteState *wstate, BTPageState *state)
{
	BTPageState *s;
	BlockNumber rootblkno = P_NONE;
	uint32		rootlevel = 0;
	Page		metapage;

	/*
	 * Each iteration of this loop completes one more level of the tree.
	 */
	for (s = state; s != NULL; s = s->btps_next)
	{
		BlockNumber blkno;
		BTPageOpaque opaque;

		blkno = s->btps_blkno;
		opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page);

		/*
		 * We have to link the last page on this level to somewhere.
		 *
		 * If we're at the top, it's the root, so attach it to the metapage.
		 * Otherwise, add an entry for it to its parent using its minimum key.
		 * This may cause the last page of the parent level to split, but
		 * that's not a problem -- we haven't gotten to it yet.
		 */
		if (s->btps_next == NULL)
		{
			opaque->btpo_flags |= BTP_ROOT;
			rootblkno = blkno;
			rootlevel = s->btps_level;
		}
		else
		{
			Assert(s->btps_minkey != NULL);
			ItemPointerSet(&(s->btps_minkey->t_tid), blkno, P_HIKEY);
			_bt_buildadd(wstate, s->btps_next, s->btps_minkey);
			pfree(s->btps_minkey);
			s->btps_minkey = NULL;
		}

		/*
		 * This is the rightmost page, so the ItemId array needs to be slid
		 * back one slot.  Then we can dump out the page.
		 */
		_bt_slideleft(s->btps_page);
		_bt_blwritepage(wstate, s->btps_page, s->btps_blkno);
		s->btps_page = NULL;	/* writepage freed the workspace */
	}

	/*
	 * As the last step in the process, construct the metapage and make it
	 * point to the new root (unless we had no data at all, in which case it's
	 * set to point to "P_NONE").  This changes the index to the "valid" state
	 * by filling in a valid magic number in the metapage.
	 */
	metapage = (Page) palloc(BLCKSZ);
	_bt_initmetapage(metapage, rootblkno, rootlevel);
	_bt_blwritepage(wstate, metapage, BTREE_METAPAGE);
}
示例#6
0
void
_bitmap_create_lov_heapandindex(Relation rel,
								Oid lovComptypeOid,
								Oid *lovHeapOid,
								Oid *lovIndexOid,
								Oid lovHeapRelfilenode,
								Oid lovIndexRelfilenode)
{
	char		lovHeapName[NAMEDATALEN];
	char		lovIndexName[NAMEDATALEN];
	TupleDesc	tupDesc;
	IndexInfo  *indexInfo;
	ObjectAddress	objAddr, referenced;
	Oid		   *classObjectId;
	int16	   *coloptions;
	Oid			heapid;
	Oid			idxid;
	int			indattrs;
	int			i;
	Oid			unusedArrayOid = InvalidOid;

	Assert(rel != NULL);

	/* create the new names for the new lov heap and index */
	snprintf(lovHeapName, sizeof(lovHeapName),
			 "pg_bm_%u", RelationGetRelid(rel));
	snprintf(lovIndexName, sizeof(lovIndexName),
			 "pg_bm_%u_index", RelationGetRelid(rel));


	heapid = get_relname_relid(lovHeapName, PG_BITMAPINDEX_NAMESPACE);

	/*
	 * If heapid exists, then this is happening during re-indexing.
	 * We allocate new relfilenodes for lov heap and lov index.
	 *
	 * XXX Each segment db may have different relfilenodes for lov heap and
	 * lov index, which should not be an issue now. Ideally, we would like each
	 * segment db use the same oids.
	 */
	if (OidIsValid(heapid))
	{
		Relation lovHeap;
		Relation lovIndex;
		Buffer btree_metabuf;
		Page   btree_metapage;

		*lovHeapOid = heapid;

		idxid = get_relname_relid(lovIndexName, PG_BITMAPINDEX_NAMESPACE);
		Assert(OidIsValid(idxid));
		*lovIndexOid = idxid;

		lovComptypeOid = get_rel_type_id(heapid);
		Assert(OidIsValid(lovComptypeOid));

		lovHeap = heap_open(heapid, AccessExclusiveLock);
		lovIndex = index_open(idxid, AccessExclusiveLock);

		if (OidIsValid(lovHeapRelfilenode))
			setNewRelfilenodeToOid(lovHeap, lovHeapRelfilenode);
		else
			setNewRelfilenode(lovHeap);
		if (OidIsValid(lovIndexRelfilenode))
			setNewRelfilenodeToOid(lovIndex, lovIndexRelfilenode);
		else
			setNewRelfilenode(lovIndex);

		/*
		 * After creating the new relfilenode for a btee index, this is not
		 * a btree anymore. We create the new metapage for this btree.
		 */
		btree_metabuf = _bt_getbuf(lovIndex, P_NEW, BT_WRITE);
		Assert (BTREE_METAPAGE == BufferGetBlockNumber(btree_metabuf));
		btree_metapage = BufferGetPage(btree_metabuf);
		_bt_initmetapage(btree_metapage, P_NONE, 0);

		/* XLOG the metapage */
		if (!XLog_UnconvertedCanBypassWal() && !lovIndex->rd_istemp)
		{
			
			// Fetch gp_persistent_relation_node information that will be added to XLOG record.
			RelationFetchGpRelationNodeForXLog(lovIndex);
			
			_bt_lognewpage(lovIndex,
						   btree_metapage,
						   BufferGetBlockNumber(btree_metabuf));
		}
		
		/* This cache value is not valid anymore. */
		if (lovIndex->rd_amcache)
		{
			pfree(lovIndex->rd_amcache);
			lovIndex->rd_amcache = NULL;
		}
		MarkBufferDirty(btree_metabuf);
		_bt_relbuf(lovIndex, btree_metabuf);

		index_close(lovIndex, NoLock);
		heap_close(lovHeap, NoLock);

		return;
	}

	/*
	 * create a new empty heap to store all attribute values with their
	 * corresponding block number and offset in LOV.
	 */
	tupDesc = _bitmap_create_lov_heapTupleDesc(rel);

	Assert(rel->rd_rel != NULL);

  	heapid =
		heap_create_with_catalog(lovHeapName, PG_BITMAPINDEX_NAMESPACE,
								 rel->rd_rel->reltablespace,
								 *lovHeapOid, rel->rd_rel->relowner,
								 tupDesc,
								 /* relam */ InvalidOid, RELKIND_RELATION, RELSTORAGE_HEAP,
								 rel->rd_rel->relisshared, false, /* bufferPoolBulkLoad */ false, 0,
								 ONCOMMIT_NOOP, NULL /* GP Policy */,
								 (Datum)0, true,
								 /* valid_opts */ true,
								 &lovComptypeOid,
								 &unusedArrayOid,
						 		 /* persistentTid */ NULL,
						 		 /* persistentSerialNum */ NULL);
	Assert(heapid == *lovHeapOid);

	/*
	 * We must bump the command counter to make the newly-created relation
	 * tuple visible for opening.
	 */
	CommandCounterIncrement();

	objAddr.classId = RelationRelationId;
	objAddr.objectId = *lovHeapOid;
	objAddr.objectSubId = 0 ;

	referenced.classId = RelationRelationId;
	referenced.objectId = RelationGetRelid(rel);
	referenced.objectSubId = 0;

	recordDependencyOn(&objAddr, &referenced, DEPENDENCY_INTERNAL);

	/*
	 * create a btree index on the newly-created heap.
	 * The key includes all attributes to be indexed in this bitmap index.
	 */
	indattrs = tupDesc->natts - 2;
	indexInfo = makeNode(IndexInfo);
	indexInfo->ii_NumIndexAttrs = indattrs;
	indexInfo->ii_Expressions = NIL;
	indexInfo->ii_ExpressionsState = NIL;
	indexInfo->ii_Predicate = make_ands_implicit(NULL);
	indexInfo->ii_PredicateState = NIL;
	indexInfo->ii_Unique = true;
	indexInfo->opaque = NULL;

	classObjectId = (Oid *) palloc(indattrs * sizeof(Oid));
	coloptions = (int16 *) palloc(indattrs * sizeof(int16));
	for (i = 0; i < indattrs; i++)
	{
		Oid typid = tupDesc->attrs[i]->atttypid;

		indexInfo->ii_KeyAttrNumbers[i] = i + 1;
		classObjectId[i] = GetDefaultOpClass(typid, BTREE_AM_OID);
		coloptions[i] = 0;
	}

	idxid = index_create(*lovHeapOid, lovIndexName, *lovIndexOid,
						 indexInfo, BTREE_AM_OID,
						 rel->rd_rel->reltablespace,
						 classObjectId, coloptions, 0, false, false, (Oid *) NULL, true,
						 false, false, NULL);
	Assert(idxid == *lovIndexOid);
}
static int64
PersistentBuild_TruncateAllGpRelationNode(void)
{
	Relation pg_database;
	HeapScanDesc scan;
	HeapTuple tuple;

	int64 count;

	pg_database = heap_open(
						DatabaseRelationId,
						AccessShareLock);

	/*
	 * Truncate gp_relation_node and its index in each database.
	 */
	scan = heap_beginscan(pg_database, SnapshotNow, 0, NULL);
	count = 0;
	while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
	{
		Form_pg_database form_pg_database =
						(Form_pg_database)GETSTRUCT(tuple);

		Oid dbOid;
		Oid dattablespace;
		RelFileNode relFileNode;
		SMgrRelation smgrRelation;
		Page btree_metapage;
		
		dbOid = HeapTupleGetOid(tuple);
		dattablespace = form_pg_database->dattablespace;

		if (dbOid == HcatalogDbOid)
			continue;

		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(), 
				 "PersistentBuild_TruncateAllGpRelationNode: dbOid %u, '%s'",
				 dbOid,
				 form_pg_database->datname.data);

		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(), 
				 "Truncating gp_relation_node %u/%u/%u in database oid %u ('%s')",
				 relFileNode.spcNode,
				 relFileNode.dbNode,
				 relFileNode.relNode,
				 dbOid,
				 form_pg_database->datname.data);

		relFileNode.spcNode = dattablespace;
		relFileNode.dbNode = dbOid;
		relFileNode.relNode = GpRelfileNodeRelationId;

		/*
		 * Truncate WITHOUT generating an XLOG record (i.e. pretend it is a temp relation).
		 */
		PersistentBuild_NonTransactionTruncate(&relFileNode);
		count++;

		/*
		 * And, the index.  Unfortunately, the relfilenode OID can change due to a
		 * REINDEX {TABLE|INDEX} command.
		 */
		PersistentBuild_FindGpRelationNodeIndex(
											dbOid,
											dattablespace,
											&relFileNode);

		if (Debug_persistent_print)
			elog(Persistent_DebugPrintLevel(), 
				 "Truncating gp_relation_node_index %u/%u/%u in database oid %u ('%s').  relfilenode different %s, tablespace different %s",
				 relFileNode.spcNode,
				 relFileNode.dbNode,
				 relFileNode.relNode,
				 dbOid,
				 form_pg_database->datname.data,
				 ((relFileNode.relNode != GpRelfileNodeOidIndexId) ? "true" : "false"),
				 ((relFileNode.spcNode != dattablespace) ? "true" : "false"));

		PersistentBuild_NonTransactionTruncate(&relFileNode);

		// The BTree needs an empty meta-data block.
		smgrRelation = smgropen(relFileNode);

		btree_metapage = (Page)palloc(BLCKSZ);
		_bt_initmetapage(btree_metapage, P_NONE, 0);
		smgrwrite(
			smgrRelation, 
			/* blockNum */ 0, 
			(char*)btree_metapage,
			/* isTemp */ false);
		smgrimmedsync(smgrRelation);
		pfree(btree_metapage);

		smgrclose(smgrRelation);

		count++;
	}

	heap_endscan(scan);

	heap_close(pg_database, AccessShareLock);

	return count;
}