/* * btbuildempty() -- build an empty btree index in the initialization fork */ Datum btbuildempty(PG_FUNCTION_ARGS) { Relation index = (Relation) PG_GETARG_POINTER(0); Page metapage; /* Construct metapage. */ metapage = (Page) palloc(BLCKSZ); _bt_initmetapage(metapage, P_NONE, 0); /* Write the page. If archiving/streaming, XLOG it. */ smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE, (char *) metapage, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, BTREE_METAPAGE, metapage); /* * An immediate sync is require even if we xlog'd the page, because the * write did not go through shared_buffers and therefore a concurrent * checkpoint may have move the redo pointer past our xlog record. */ smgrimmedsync(index->rd_smgr, INIT_FORKNUM); PG_RETURN_VOID(); }
/* * btbuildempty() -- build an empty btree index in the initialization fork */ void btbuildempty(Relation index) { Page metapage; /* Construct metapage. */ metapage = (Page) palloc(BLCKSZ); _bt_initmetapage(metapage, P_NONE, 0); /* * Write the page and log it. It might seem that an immediate sync would * be sufficient to guarantee that the file exists on disk, but recovery * itself might remove it while replaying, for example, an * XLOG_DBASE_CREATE or XLOG_TBLSPC_CREATE record. Therefore, we need * this even when wal_level=minimal. */ PageSetChecksumInplace(metapage, BTREE_METAPAGE); smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE, (char *) metapage, true); log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, BTREE_METAPAGE, metapage, true); /* * An immediate sync is required even if we xlog'd the page, because the * write did not go through shared_buffers and therefore a concurrent * checkpoint may have moved the redo pointer past our xlog record. */ smgrimmedsync(index->rd_smgr, INIT_FORKNUM); }
/* * _bt_metapinit() -- Initialize the metadata page of a new btree. * * Note: this is actually not used for standard btree index building; * nbtsort.c prefers not to make the metadata page valid until completion * of build. * * Note: there's no real need for any locking here. Since the transaction * creating the index hasn't committed yet, no one else can even see the index * much less be trying to use it. (In a REINDEX-in-place scenario, that's * not true, but we assume the caller holds sufficient locks on the index.) */ void _bt_metapinit(Relation rel) { Buffer buf; Page pg; BTMetaPageData *metad; if (RelationGetNumberOfBlocks(rel) != 0) elog(ERROR, "cannot initialize non-empty btree index \"%s\"", RelationGetRelationName(rel)); buf = ReadBuffer(rel, P_NEW); Assert(BufferGetBlockNumber(buf) == BTREE_METAPAGE); pg = BufferGetPage(buf); _bt_initmetapage(pg, P_NONE, 0); metad = BTPageGetMeta(pg); /* NO ELOG(ERROR) from here till newmeta op is logged */ START_CRIT_SECTION(); /* XLOG stuff */ if (!rel->rd_istemp) { xl_btree_newmeta xlrec; XLogRecPtr recptr; XLogRecData rdata[1]; xlrec.node = rel->rd_node; xlrec.meta.root = metad->btm_root; xlrec.meta.level = metad->btm_level; xlrec.meta.fastroot = metad->btm_fastroot; xlrec.meta.fastlevel = metad->btm_fastlevel; rdata[0].data = (char *) &xlrec; rdata[0].len = SizeOfBtreeNewmeta; rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; recptr = XLogInsert(RM_BTREE_ID, XLOG_BTREE_NEWMETA, rdata); PageSetLSN(pg, recptr); PageSetTLI(pg, ThisTimeLineID); } END_CRIT_SECTION(); WriteBuffer(buf); }
/* * btbuildempty() -- build an empty btree index in the initialization fork */ void btbuildempty(Relation index) { Page metapage; /* Construct metapage. */ metapage = (Page) palloc(BLCKSZ); _bt_initmetapage(metapage, P_NONE, 0); /* Write the page. If archiving/streaming, XLOG it. */ PageSetChecksumInplace(metapage, BTREE_METAPAGE); smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE, (char *) metapage, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, BTREE_METAPAGE, metapage, false); /* * An immediate sync is required even if we xlog'd the page, because the * write did not go through shared_buffers and therefore a concurrent * checkpoint may have moved the redo pointer past our xlog record. */ smgrimmedsync(index->rd_smgr, INIT_FORKNUM); }
/* * Finish writing out the completed btree. */ static void _bt_uppershutdown(BTWriteState *wstate, BTPageState *state) { BTPageState *s; BlockNumber rootblkno = P_NONE; uint32 rootlevel = 0; Page metapage; /* * Each iteration of this loop completes one more level of the tree. */ for (s = state; s != NULL; s = s->btps_next) { BlockNumber blkno; BTPageOpaque opaque; blkno = s->btps_blkno; opaque = (BTPageOpaque) PageGetSpecialPointer(s->btps_page); /* * We have to link the last page on this level to somewhere. * * If we're at the top, it's the root, so attach it to the metapage. * Otherwise, add an entry for it to its parent using its minimum key. * This may cause the last page of the parent level to split, but * that's not a problem -- we haven't gotten to it yet. */ if (s->btps_next == NULL) { opaque->btpo_flags |= BTP_ROOT; rootblkno = blkno; rootlevel = s->btps_level; } else { Assert(s->btps_minkey != NULL); ItemPointerSet(&(s->btps_minkey->t_tid), blkno, P_HIKEY); _bt_buildadd(wstate, s->btps_next, s->btps_minkey); pfree(s->btps_minkey); s->btps_minkey = NULL; } /* * This is the rightmost page, so the ItemId array needs to be slid * back one slot. Then we can dump out the page. */ _bt_slideleft(s->btps_page); _bt_blwritepage(wstate, s->btps_page, s->btps_blkno); s->btps_page = NULL; /* writepage freed the workspace */ } /* * As the last step in the process, construct the metapage and make it * point to the new root (unless we had no data at all, in which case it's * set to point to "P_NONE"). This changes the index to the "valid" state * by filling in a valid magic number in the metapage. */ metapage = (Page) palloc(BLCKSZ); _bt_initmetapage(metapage, rootblkno, rootlevel); _bt_blwritepage(wstate, metapage, BTREE_METAPAGE); }
void _bitmap_create_lov_heapandindex(Relation rel, Oid lovComptypeOid, Oid *lovHeapOid, Oid *lovIndexOid, Oid lovHeapRelfilenode, Oid lovIndexRelfilenode) { char lovHeapName[NAMEDATALEN]; char lovIndexName[NAMEDATALEN]; TupleDesc tupDesc; IndexInfo *indexInfo; ObjectAddress objAddr, referenced; Oid *classObjectId; int16 *coloptions; Oid heapid; Oid idxid; int indattrs; int i; Oid unusedArrayOid = InvalidOid; Assert(rel != NULL); /* create the new names for the new lov heap and index */ snprintf(lovHeapName, sizeof(lovHeapName), "pg_bm_%u", RelationGetRelid(rel)); snprintf(lovIndexName, sizeof(lovIndexName), "pg_bm_%u_index", RelationGetRelid(rel)); heapid = get_relname_relid(lovHeapName, PG_BITMAPINDEX_NAMESPACE); /* * If heapid exists, then this is happening during re-indexing. * We allocate new relfilenodes for lov heap and lov index. * * XXX Each segment db may have different relfilenodes for lov heap and * lov index, which should not be an issue now. Ideally, we would like each * segment db use the same oids. */ if (OidIsValid(heapid)) { Relation lovHeap; Relation lovIndex; Buffer btree_metabuf; Page btree_metapage; *lovHeapOid = heapid; idxid = get_relname_relid(lovIndexName, PG_BITMAPINDEX_NAMESPACE); Assert(OidIsValid(idxid)); *lovIndexOid = idxid; lovComptypeOid = get_rel_type_id(heapid); Assert(OidIsValid(lovComptypeOid)); lovHeap = heap_open(heapid, AccessExclusiveLock); lovIndex = index_open(idxid, AccessExclusiveLock); if (OidIsValid(lovHeapRelfilenode)) setNewRelfilenodeToOid(lovHeap, lovHeapRelfilenode); else setNewRelfilenode(lovHeap); if (OidIsValid(lovIndexRelfilenode)) setNewRelfilenodeToOid(lovIndex, lovIndexRelfilenode); else setNewRelfilenode(lovIndex); /* * After creating the new relfilenode for a btee index, this is not * a btree anymore. We create the new metapage for this btree. */ btree_metabuf = _bt_getbuf(lovIndex, P_NEW, BT_WRITE); Assert (BTREE_METAPAGE == BufferGetBlockNumber(btree_metabuf)); btree_metapage = BufferGetPage(btree_metabuf); _bt_initmetapage(btree_metapage, P_NONE, 0); /* XLOG the metapage */ if (!XLog_UnconvertedCanBypassWal() && !lovIndex->rd_istemp) { // Fetch gp_persistent_relation_node information that will be added to XLOG record. RelationFetchGpRelationNodeForXLog(lovIndex); _bt_lognewpage(lovIndex, btree_metapage, BufferGetBlockNumber(btree_metabuf)); } /* This cache value is not valid anymore. */ if (lovIndex->rd_amcache) { pfree(lovIndex->rd_amcache); lovIndex->rd_amcache = NULL; } MarkBufferDirty(btree_metabuf); _bt_relbuf(lovIndex, btree_metabuf); index_close(lovIndex, NoLock); heap_close(lovHeap, NoLock); return; } /* * create a new empty heap to store all attribute values with their * corresponding block number and offset in LOV. */ tupDesc = _bitmap_create_lov_heapTupleDesc(rel); Assert(rel->rd_rel != NULL); heapid = heap_create_with_catalog(lovHeapName, PG_BITMAPINDEX_NAMESPACE, rel->rd_rel->reltablespace, *lovHeapOid, rel->rd_rel->relowner, tupDesc, /* relam */ InvalidOid, RELKIND_RELATION, RELSTORAGE_HEAP, rel->rd_rel->relisshared, false, /* bufferPoolBulkLoad */ false, 0, ONCOMMIT_NOOP, NULL /* GP Policy */, (Datum)0, true, /* valid_opts */ true, &lovComptypeOid, &unusedArrayOid, /* persistentTid */ NULL, /* persistentSerialNum */ NULL); Assert(heapid == *lovHeapOid); /* * We must bump the command counter to make the newly-created relation * tuple visible for opening. */ CommandCounterIncrement(); objAddr.classId = RelationRelationId; objAddr.objectId = *lovHeapOid; objAddr.objectSubId = 0 ; referenced.classId = RelationRelationId; referenced.objectId = RelationGetRelid(rel); referenced.objectSubId = 0; recordDependencyOn(&objAddr, &referenced, DEPENDENCY_INTERNAL); /* * create a btree index on the newly-created heap. * The key includes all attributes to be indexed in this bitmap index. */ indattrs = tupDesc->natts - 2; indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = indattrs; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = make_ands_implicit(NULL); indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; indexInfo->opaque = NULL; classObjectId = (Oid *) palloc(indattrs * sizeof(Oid)); coloptions = (int16 *) palloc(indattrs * sizeof(int16)); for (i = 0; i < indattrs; i++) { Oid typid = tupDesc->attrs[i]->atttypid; indexInfo->ii_KeyAttrNumbers[i] = i + 1; classObjectId[i] = GetDefaultOpClass(typid, BTREE_AM_OID); coloptions[i] = 0; } idxid = index_create(*lovHeapOid, lovIndexName, *lovIndexOid, indexInfo, BTREE_AM_OID, rel->rd_rel->reltablespace, classObjectId, coloptions, 0, false, false, (Oid *) NULL, true, false, false, NULL); Assert(idxid == *lovIndexOid); }
static int64 PersistentBuild_TruncateAllGpRelationNode(void) { Relation pg_database; HeapScanDesc scan; HeapTuple tuple; int64 count; pg_database = heap_open( DatabaseRelationId, AccessShareLock); /* * Truncate gp_relation_node and its index in each database. */ scan = heap_beginscan(pg_database, SnapshotNow, 0, NULL); count = 0; while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_database form_pg_database = (Form_pg_database)GETSTRUCT(tuple); Oid dbOid; Oid dattablespace; RelFileNode relFileNode; SMgrRelation smgrRelation; Page btree_metapage; dbOid = HeapTupleGetOid(tuple); dattablespace = form_pg_database->dattablespace; if (dbOid == HcatalogDbOid) continue; if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "PersistentBuild_TruncateAllGpRelationNode: dbOid %u, '%s'", dbOid, form_pg_database->datname.data); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Truncating gp_relation_node %u/%u/%u in database oid %u ('%s')", relFileNode.spcNode, relFileNode.dbNode, relFileNode.relNode, dbOid, form_pg_database->datname.data); relFileNode.spcNode = dattablespace; relFileNode.dbNode = dbOid; relFileNode.relNode = GpRelfileNodeRelationId; /* * Truncate WITHOUT generating an XLOG record (i.e. pretend it is a temp relation). */ PersistentBuild_NonTransactionTruncate(&relFileNode); count++; /* * And, the index. Unfortunately, the relfilenode OID can change due to a * REINDEX {TABLE|INDEX} command. */ PersistentBuild_FindGpRelationNodeIndex( dbOid, dattablespace, &relFileNode); if (Debug_persistent_print) elog(Persistent_DebugPrintLevel(), "Truncating gp_relation_node_index %u/%u/%u in database oid %u ('%s'). relfilenode different %s, tablespace different %s", relFileNode.spcNode, relFileNode.dbNode, relFileNode.relNode, dbOid, form_pg_database->datname.data, ((relFileNode.relNode != GpRelfileNodeOidIndexId) ? "true" : "false"), ((relFileNode.spcNode != dattablespace) ? "true" : "false")); PersistentBuild_NonTransactionTruncate(&relFileNode); // The BTree needs an empty meta-data block. smgrRelation = smgropen(relFileNode); btree_metapage = (Page)palloc(BLCKSZ); _bt_initmetapage(btree_metapage, P_NONE, 0); smgrwrite( smgrRelation, /* blockNum */ 0, (char*)btree_metapage, /* isTemp */ false); smgrimmedsync(smgrRelation); pfree(btree_metapage); smgrclose(smgrRelation); count++; } heap_endscan(scan); heap_close(pg_database, AccessShareLock); return count; }