/* * Build an empty SPGiST index in the initialization fork */ Datum spgbuildempty(PG_FUNCTION_ARGS) { Relation index = (Relation) PG_GETARG_POINTER(0); Page page; /* Construct metapage. */ page = (Page) palloc(BLCKSZ); SpGistInitMetapage(page); /* Write the page. If archiving/streaming, XLOG it. */ smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO, (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO, page); /* Likewise for the root page. */ SpGistInitPage(page, SPGIST_LEAF); smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_HEAD_BLKNO, (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, SPGIST_HEAD_BLKNO, page); /* * An immediate sync is required even if we xlog'd the pages, because the * writes did not go through shared buffers and therefore a concurrent * checkpoint may have moved the redo pointer past our xlog record. */ smgrimmedsync(index->rd_smgr, INIT_FORKNUM); PG_RETURN_VOID(); }
/* * given a spool loaded by successive calls to _bt_spool, * create an entire btree. */ void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) { BTWriteState wstate; #ifdef BTREE_BUILD_STATS if (log_btree_build_stats) { ShowUsage("BTREE BUILD (Spool) STATISTICS"); ResetUsage(); } #endif /* BTREE_BUILD_STATS */ tuplesort_performsort(btspool->sortstate); if (btspool2) tuplesort_performsort(btspool2->sortstate); wstate.heap = btspool->heap; wstate.index = btspool->index; /* * We need to log index creation in WAL iff WAL archiving/streaming is * enabled UNLESS the index isn't WAL-logged anyway. */ wstate.btws_use_wal = XLogIsNeeded() && RelationNeedsWAL(wstate.index); /* reserve the metapage */ wstate.btws_pages_alloced = BTREE_METAPAGE + 1; wstate.btws_pages_written = 0; wstate.btws_zeropage = NULL; /* until needed */ _bt_load(&wstate, btspool, btspool2); }
/* * pg_create_restore_point: a named point for restore * * Permission checking for this function is managed through the normal * GRANT system. */ Datum pg_create_restore_point(PG_FUNCTION_ARGS) { text *restore_name = PG_GETARG_TEXT_P(0); char *restore_name_str; XLogRecPtr restorepoint; if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errmsg("recovery is in progress"), errhint("WAL control functions cannot be executed during recovery.")))); if (!XLogIsNeeded()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL level not sufficient for creating a restore point"), errhint("wal_level must be set to \"replica\" or \"logical\" at server start."))); restore_name_str = text_to_cstring(restore_name); if (strlen(restore_name_str) >= MAXFNAMELEN) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("value too long for restore point (maximum %d characters)", MAXFNAMELEN - 1))); restorepoint = XLogRestorePoint(restore_name_str); /* * As a convenience, return the WAL location of the restore point record */ PG_RETURN_LSN(restorepoint); }
/* * btbuildempty() -- build an empty btree index in the initialization fork */ Datum btbuildempty(PG_FUNCTION_ARGS) { Relation index = (Relation) PG_GETARG_POINTER(0); Page metapage; /* Construct metapage. */ metapage = (Page) palloc(BLCKSZ); _bt_initmetapage(metapage, P_NONE, 0); /* Write the page. If archiving/streaming, XLOG it. */ smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE, (char *) metapage, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, BTREE_METAPAGE, metapage); /* * An immediate sync is require even if we xlog'd the page, because the * write did not go through shared_buffers and therefore a concurrent * checkpoint may have move the redo pointer past our xlog record. */ smgrimmedsync(index->rd_smgr, INIT_FORKNUM); PG_RETURN_VOID(); }
/* * transientrel_startup --- executor startup */ static void transientrel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) { DR_transientrel *myState = (DR_transientrel *) self; Relation transientrel; transientrel = heap_open(myState->transientoid, NoLock); /* * Fill private fields of myState for use by later routines */ myState->transientrel = transientrel; myState->output_cid = GetCurrentCommandId(true); /* * We can skip WAL-logging the insertions, unless PITR or streaming * replication is in use. We can skip the FSM in any case. */ myState->hi_options = HEAP_INSERT_SKIP_FSM | HEAP_INSERT_FROZEN; if (!XLogIsNeeded()) myState->hi_options |= HEAP_INSERT_SKIP_WAL; myState->bistate = GetBulkInsertState(); /* Not using WAL requires smgr_targblock be initially invalid */ Assert(RelationGetTargetBlock(transientrel) == InvalidBlockNumber); }
/* * Build an empty SPGiST index in the initialization fork */ void spgbuildempty(Relation index) { Page page; /* Construct metapage. */ page = (Page) palloc(BLCKSZ); SpGistInitMetapage(page); /* Write the page. If archiving/streaming, XLOG it. */ PageSetChecksumInplace(page, SPGIST_METAPAGE_BLKNO); smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO, (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, SPGIST_METAPAGE_BLKNO, page, false); /* Likewise for the root page. */ SpGistInitPage(page, SPGIST_LEAF); PageSetChecksumInplace(page, SPGIST_ROOT_BLKNO); smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_ROOT_BLKNO, (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, SPGIST_ROOT_BLKNO, page, true); /* Likewise for the null-tuples root page. */ SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS); PageSetChecksumInplace(page, SPGIST_NULL_BLKNO); smgrwrite(index->rd_smgr, INIT_FORKNUM, SPGIST_NULL_BLKNO, (char *) page, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, SPGIST_NULL_BLKNO, page, true); /* * An immediate sync is required even if we xlog'd the pages, because the * writes did not go through shared buffers and therefore a concurrent * checkpoint may have moved the redo pointer past our xlog record. */ smgrimmedsync(index->rd_smgr, INIT_FORKNUM); }
/* * For Hot Standby we need to know the highest transaction id that will * be removed by any change. VACUUM proceeds in a number of passes so * we need to consider how each pass operates. The first phase runs * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it * progresses - these will have a latestRemovedXid on each record. * In some cases this removes all of the tuples to be removed, though * often we have dead tuples with index pointers so we must remember them * for removal in phase 3. Index records for those rows are removed * in phase 2 and index blocks do not have MVCC information attached. * So before we can allow removal of any index tuples we need to issue * a WAL record containing the latestRemovedXid of rows that will be * removed in phase three. This allows recovery queries to block at the * correct place, i.e. before phase two, rather than during phase three * which would be after the rows have become inaccessible. */ static void vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) { /* * Skip this for relations for which no WAL is to be written, or if we're * not trying to support archive recovery. */ if (!RelationNeedsWAL(rel) || !XLogIsNeeded()) return; /* * No need to write the record at all unless it contains a valid value */ if (TransactionIdIsValid(vacrelstats->latestRemovedXid)) (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid); }
/* * For Hot Standby we need to know the highest transaction id that will * be removed by any change. VACUUM proceeds in a number of passes so * we need to consider how each pass operates. The first phase runs * heap_page_prune(), which can issue XLOG_HEAP2_CLEAN records as it * progresses - these will have a latestRemovedXid on each record. * In some cases this removes all of the tuples to be removed, though * often we have dead tuples with index pointers so we must remember them * for removal in phase 3. Index records for those rows are removed * in phase 2 and index blocks do not have MVCC information attached. * So before we can allow removal of any index tuples we need to issue * a WAL record containing the latestRemovedXid of rows that will be * removed in phase three. This allows recovery queries to block at the * correct place, i.e. before phase two, rather than during phase three * which would be after the rows have become inaccessible. */ static void vacuum_log_cleanup_info(Relation rel, LVRelStats *vacrelstats) { /* * No need to log changes for temp tables, they do not contain data * visible on the standby server. */ if (rel->rd_istemp || !XLogIsNeeded()) return; /* * No need to write the record at all unless it contains a valid value */ if (TransactionIdIsValid(vacrelstats->latestRemovedXid)) (void) log_heap_cleanup_info(rel->rd_node, vacrelstats->latestRemovedXid); }
/* * given a spool loaded by successive calls to _bt_spool, * create an entire btree. */ void _bt_leafbuild(BTSpool *btspool, BTSpool *btspool2) { BTWriteState wstate; #ifdef BTREE_BUILD_STATS if (log_btree_build_stats) { ShowUsage("BTREE BUILD (Spool) STATISTICS"); ResetUsage(); } #endif /* BTREE_BUILD_STATS */ tuplesort_performsort(btspool->sortstate); if (btspool2) tuplesort_performsort(btspool2->sortstate); wstate.index = btspool->index; /* * We need to log index creation in WAL iff WAL archiving/streaming is * enabled AND it's not a temp index. */ wstate.btws_use_wal = XLogIsNeeded() && !wstate.index->rd_istemp; /* * Write an XLOG UNLOGGED record if WAL-logging was skipped because WAL * archiving is not enabled. */ if (!wstate.btws_use_wal && !wstate.index->rd_istemp) { char reason[NAMEDATALEN + 20]; snprintf(reason, sizeof(reason), "b-tree build on \"%s\"", RelationGetRelationName(wstate.index)); XLogReportUnloggedStatement(reason); } /* reserve the metapage */ wstate.btws_pages_alloced = BTREE_METAPAGE + 1; wstate.btws_pages_written = 0; wstate.btws_zeropage = NULL; /* until needed */ _bt_load(&wstate, btspool, btspool2); }
/* * pg_create_restore_point: a named point for restore */ Datum pg_create_restore_point(PG_FUNCTION_ARGS) { text *restore_name = PG_GETARG_TEXT_P(0); char *restore_name_str; XLogRecPtr restorepoint; char location[MAXFNAMELEN]; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to create a restore point")))); if (RecoveryInProgress()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), (errmsg("recovery is in progress"), errhint("WAL control functions cannot be executed during recovery.")))); if (!XLogIsNeeded()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("WAL level not sufficient for creating a restore point"), errhint("wal_level must be set to \"archive\" or \"hot_standby\" at server start."))); restore_name_str = text_to_cstring(restore_name); if (strlen(restore_name_str) >= MAXFNAMELEN) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("value too long for restore point (maximum %d characters)", MAXFNAMELEN - 1))); restorepoint = XLogRestorePoint(restore_name_str); /* * As a convenience, return the WAL location of the restore point record */ snprintf(location, sizeof(location), "%X/%X", restorepoint.xlogid, restorepoint.xrecoff); PG_RETURN_TEXT_P(cstring_to_text(location)); }
/* * btbuildempty() -- build an empty btree index in the initialization fork */ void btbuildempty(Relation index) { Page metapage; /* Construct metapage. */ metapage = (Page) palloc(BLCKSZ); _bt_initmetapage(metapage, P_NONE, 0); /* Write the page. If archiving/streaming, XLOG it. */ PageSetChecksumInplace(metapage, BTREE_METAPAGE); smgrwrite(index->rd_smgr, INIT_FORKNUM, BTREE_METAPAGE, (char *) metapage, true); if (XLogIsNeeded()) log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM, BTREE_METAPAGE, metapage, false); /* * An immediate sync is required even if we xlog'd the page, because the * write did not go through shared_buffers and therefore a concurrent * checkpoint may have moved the redo pointer past our xlog record. */ smgrimmedsync(index->rd_smgr, INIT_FORKNUM); }
/* * intorel_startup --- executor startup */ static void intorel_startup(DestReceiver *self, int operation, TupleDesc typeinfo) { DR_intorel *myState = (DR_intorel *) self; IntoClause *into = myState->into; bool is_matview; char relkind; CreateStmt *create; Oid intoRelationId; Relation intoRelationDesc; RangeTblEntry *rte; Datum toast_options; ListCell *lc; int attnum; static char *validnsps[] = HEAP_RELOPT_NAMESPACES; Assert(into != NULL); /* else somebody forgot to set it */ /* This code supports both CREATE TABLE AS and CREATE MATERIALIZED VIEW */ is_matview = (into->viewQuery != NULL); relkind = is_matview ? RELKIND_MATVIEW : RELKIND_RELATION; /* * Create the target relation by faking up a CREATE TABLE parsetree and * passing it to DefineRelation. */ create = makeNode(CreateStmt); create->relation = into->rel; create->tableElts = NIL; /* will fill below */ create->inhRelations = NIL; create->ofTypename = NULL; create->constraints = NIL; create->options = into->options; create->oncommit = into->onCommit; create->tablespacename = into->tableSpaceName; create->if_not_exists = false; /* * Build column definitions using "pre-cooked" type and collation info. If * a column name list was specified in CREATE TABLE AS, override the * column names derived from the query. (Too few column names are OK, too * many are not.) */ lc = list_head(into->colNames); for (attnum = 0; attnum < typeinfo->natts; attnum++) { Form_pg_attribute attribute = typeinfo->attrs[attnum]; ColumnDef *col = makeNode(ColumnDef); TypeName *coltype = makeNode(TypeName); if (lc) { col->colname = strVal(lfirst(lc)); lc = lnext(lc); } else col->colname = NameStr(attribute->attname); col->typeName = coltype; col->inhcount = 0; col->is_local = true; col->is_not_null = false; col->is_from_type = false; col->storage = 0; col->raw_default = NULL; col->cooked_default = NULL; col->collClause = NULL; col->collOid = attribute->attcollation; col->constraints = NIL; col->fdwoptions = NIL; coltype->names = NIL; coltype->typeOid = attribute->atttypid; coltype->setof = false; coltype->pct_type = false; coltype->typmods = NIL; coltype->typemod = attribute->atttypmod; coltype->arrayBounds = NIL; coltype->location = -1; /* * It's possible that the column is of a collatable type but the * collation could not be resolved, so double-check. (We must check * this here because DefineRelation would adopt the type's default * collation rather than complaining.) */ if (!OidIsValid(col->collOid) && type_is_collatable(coltype->typeOid)) ereport(ERROR, (errcode(ERRCODE_INDETERMINATE_COLLATION), errmsg("no collation was derived for column \"%s\" with collatable type %s", col->colname, format_type_be(coltype->typeOid)), errhint("Use the COLLATE clause to set the collation explicitly."))); create->tableElts = lappend(create->tableElts, col); } if (lc != NULL) ereport(ERROR, (errcode(ERRCODE_SYNTAX_ERROR), errmsg("too many column names were specified"))); /* * Actually create the target table */ intoRelationId = DefineRelation(create, relkind, InvalidOid); /* * If necessary, create a TOAST table for the target table. Note that * AlterTableCreateToastTable ends with CommandCounterIncrement(), so that * the TOAST table will be visible for insertion. */ CommandCounterIncrement(); /* parse and validate reloptions for the toast table */ toast_options = transformRelOptions((Datum) 0, create->options, "toast", validnsps, true, false); (void) heap_reloptions(RELKIND_TOASTVALUE, toast_options, true); AlterTableCreateToastTable(intoRelationId, toast_options); /* Create the "view" part of a materialized view. */ if (is_matview) { /* StoreViewQuery scribbles on tree, so make a copy */ Query *query = (Query *) copyObject(into->viewQuery); StoreViewQuery(intoRelationId, query, false); CommandCounterIncrement(); } /* * Finally we can open the target table */ intoRelationDesc = heap_open(intoRelationId, AccessExclusiveLock); /* * Check INSERT permission on the constructed table. * * XXX: It would arguably make sense to skip this check if into->skipData * is true. */ rte = makeNode(RangeTblEntry); rte->rtekind = RTE_RELATION; rte->relid = intoRelationId; rte->relkind = relkind; rte->requiredPerms = ACL_INSERT; for (attnum = 1; attnum <= intoRelationDesc->rd_att->natts; attnum++) rte->modifiedCols = bms_add_member(rte->modifiedCols, attnum - FirstLowInvalidHeapAttributeNumber); ExecCheckRTPerms(list_make1(rte), true); /* * Tentatively mark the target as populated, if it's a matview and we're * going to fill it; otherwise, no change needed. */ if (is_matview && !into->skipData) SetMatViewPopulatedState(intoRelationDesc, true); /* * Fill private fields of myState for use by later routines */ myState->rel = intoRelationDesc; myState->output_cid = GetCurrentCommandId(true); /* * We can skip WAL-logging the insertions, unless PITR or streaming * replication is in use. We can skip the FSM in any case. */ myState->hi_options = HEAP_INSERT_SKIP_FSM | (XLogIsNeeded() ? 0 : HEAP_INSERT_SKIP_WAL); myState->bistate = GetBulkInsertState(); /* Not using WAL requires smgr_targblock be initially invalid */ Assert(RelationGetTargetBlock(intoRelationDesc) == InvalidBlockNumber); }
static void _bt_mergebuild(Spooler *self, BTSpool *btspool) { Relation heapRel = self->relinfo->ri_RelationDesc; BTWriteState wstate; BTReader reader; bool merge; Assert(btspool->index->rd_index->indisvalid); tuplesort_performsort(btspool->sortstate); wstate.index = btspool->index; /* * We need to log index creation in WAL iff WAL archiving is enabled AND * it's not a temp index. */ #if PG_VERSION_NUM >= 90000 wstate.btws_use_wal = self->use_wal && XLogIsNeeded() && !RELATION_IS_LOCAL(wstate.index); #else wstate.btws_use_wal = self->use_wal && XLogArchivingActive() && !RELATION_IS_LOCAL(wstate.index); #endif /* reserve the metapage */ wstate.btws_pages_alloced = BTREE_METAPAGE + 1; wstate.btws_pages_written = 0; wstate.btws_zeropage = NULL; /* until needed */ /* * Flush dirty buffers so that we will read the index files directly * in order to get pre-existing data. We must acquire AccessExclusiveLock * for the target table for calling FlushRelationBuffer(). */ LockRelation(wstate.index, AccessExclusiveLock); FlushRelationBuffers(wstate.index); BULKLOAD_PROFILE(&prof_flush); merge = BTReaderInit(&reader, wstate.index); elog(DEBUG1, "pg_bulkload: build \"%s\" %s merge (%s wal)", RelationGetRelationName(wstate.index), merge ? "with" : "without", wstate.btws_use_wal ? "with" : "without"); /* Assign a new file node. */ RelationSetNewRelfilenode(wstate.index, InvalidTransactionId); if (merge || (btspool->isunique && self->max_dup_errors > 0)) { /* Merge two streams into the new file node that we assigned. */ BULKLOAD_PROFILE_PUSH(); _bt_mergeload(self, &wstate, btspool, &reader, heapRel); BULKLOAD_PROFILE_POP(); BULKLOAD_PROFILE(&prof_merge); } else { /* Fast path for newly created index. */ _bt_load(&wstate, btspool, NULL); BULKLOAD_PROFILE(&prof_index); } BTReaderTerm(&reader); }