/* * RouterExecutorStart sets up the executor state and queryDesc for router * execution. */ void RouterExecutorStart(QueryDesc *queryDesc, int eflags, Task *task) { LOCKMODE lockMode = NoLock; EState *executorState = NULL; CmdType commandType = queryDesc->operation; /* ensure that the task is not NULL */ Assert(task != NULL); /* disallow transactions and triggers during distributed modify commands */ if (commandType != CMD_SELECT) { bool topLevel = true; PreventTransactionChain(topLevel, "distributed commands"); eflags |= EXEC_FLAG_SKIP_TRIGGERS; } /* signal that it is a router execution */ eflags |= EXEC_FLAG_CITUS_ROUTER_EXECUTOR; /* build empty executor state to obtain per-query memory context */ executorState = CreateExecutorState(); executorState->es_top_eflags = eflags; executorState->es_instrument = queryDesc->instrument_options; queryDesc->estate = executorState; /* * As it's similar to what we're doing, use a MaterialState node to store * our state. This is used to store our tuplestore, so cursors etc. can * work. */ queryDesc->planstate = (PlanState *) makeNode(MaterialState); #if (PG_VERSION_NUM < 90500) /* make sure that upsertQuery is false for versions that UPSERT is not available */ Assert(task->upsertQuery == false); #endif lockMode = CommutativityRuleToLockMode(commandType, task->upsertQuery); if (lockMode != NoLock) { AcquireExecutorShardLock(task, lockMode); } }
/* * PgPaxosExecutorStart blocks until the table is ready to read. */ static void PgPaxosExecutorStart(QueryDesc *queryDesc, int eflags) { PlannedStmt *plannedStmt = queryDesc->plannedstmt; List *rangeTableList = plannedStmt->rtable; CmdType commandType = queryDesc->operation; if (IsPgPaxosActive() && HasPaxosTable(rangeTableList)) { char *sqlQuery = (char *) queryDesc->sourceText; char *groupId = NULL; bool topLevel = true; /* disallow transactions during paxos commands */ PreventTransactionChain(topLevel, "paxos commands"); groupId = DeterminePaxosGroup(rangeTableList); if (commandType == CMD_INSERT || commandType == CMD_UPDATE || commandType == CMD_DELETE) { PrepareConsistentWrite(groupId, sqlQuery); } else { PrepareConsistentRead(groupId); } queryDesc->snapshot->curcid = GetCurrentCommandId(false); } /* call into the standard executor start, or hook if set */ if (PreviousExecutorStartHook != NULL) { PreviousExecutorStartHook(queryDesc, eflags); } else { standard_ExecutorStart(queryDesc, eflags); } }
static void DiscardAll(bool isTopLevel) { /* * Disallow DISCARD ALL in a transaction block. This is arguably * inconsistent (we don't make a similar check in the command sequence * that DISCARD ALL is equivalent to), but the idea is to catch mistakes: * DISCARD ALL inside a transaction block would leave the transaction * still uncommitted. */ PreventTransactionChain(isTopLevel, "DISCARD ALL"); SetPGVariable("session_authorization", NIL, false); ResetAllOptions(); DropAllPreparedStatements(); PortalHashTableDeleteAll(); Async_UnlistenAll(); LockReleaseAll(USER_LOCKMETHOD, true); ResetPlanCache(); ResetTempTableNamespace(); }
/* * Drop a table space * * Be careful to check that the tablespace is empty. */ void DropTableSpace(DropTableSpaceStmt *stmt) { #ifdef HAVE_SYMLINK char *tablespacename = stmt->tablespacename; HeapScanDesc scandesc; Relation rel; HeapTuple tuple; ScanKeyData entry[1]; Oid tablespaceoid; /* don't call this in a transaction block */ PreventTransactionChain((void *) stmt, "DROP TABLESPACE"); /* * Acquire ExclusiveLock on pg_tablespace to ensure that no one else is * trying to do DROP TABLESPACE or TablespaceCreateDbspace concurrently. */ rel = heap_open(TableSpaceRelationId, ExclusiveLock); /* * Find the target tuple */ ScanKeyInit(&entry[0], Anum_pg_tablespace_spcname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(tablespacename)); scandesc = heap_beginscan(rel, SnapshotNow, 1, entry); tuple = heap_getnext(scandesc, ForwardScanDirection); if (!HeapTupleIsValid(tuple)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("tablespace \"%s\" does not exist", tablespacename))); tablespaceoid = HeapTupleGetOid(tuple); /* Must be tablespace owner */ if (!pg_tablespace_ownercheck(tablespaceoid, GetUserId())) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TABLESPACE, tablespacename); /* Disallow drop of the standard tablespaces, even by superuser */ if (tablespaceoid == GLOBALTABLESPACE_OID || tablespaceoid == DEFAULTTABLESPACE_OID) aclcheck_error(ACLCHECK_NO_PRIV, ACL_KIND_TABLESPACE, tablespacename); /* * Remove the pg_tablespace tuple (this will roll back if we fail below) */ simple_heap_delete(rel, &tuple->t_self); heap_endscan(scandesc); /* * Remove dependency on owner. */ deleteSharedDependencyRecordsFor(TableSpaceRelationId, tablespaceoid); /* * Try to remove the physical infrastructure */ if (!remove_tablespace_directories(tablespaceoid, false)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("tablespace \"%s\" is not empty", tablespacename))); /* Record the filesystem change in XLOG */ { xl_tblspc_drop_rec xlrec; XLogRecData rdata[1]; xlrec.ts_id = tablespaceoid; rdata[0].data = (char *) &xlrec; rdata[0].len = sizeof(xl_tblspc_drop_rec); rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_DROP, rdata); } /* We keep the lock on pg_tablespace until commit */ heap_close(rel, NoLock); #else /* !HAVE_SYMLINK */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform"))); #endif /* HAVE_SYMLINK */ }
/* * Create a table space * * Only superusers can create a tablespace. This seems a reasonable restriction * since we're determining the system layout and, anyway, we probably have * root if we're doing this kind of activity */ void CreateTableSpace(CreateTableSpaceStmt *stmt) { #ifdef HAVE_SYMLINK Relation rel; Datum values[Natts_pg_tablespace]; char nulls[Natts_pg_tablespace]; HeapTuple tuple; Oid tablespaceoid; char *location; char *linkloc; Oid ownerId; /* validate */ /* don't call this in a transaction block */ PreventTransactionChain((void *) stmt, "CREATE TABLESPACE"); /* Must be super user */ if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to create tablespace \"%s\"", stmt->tablespacename), errhint("Must be superuser to create a tablespace."))); /* However, the eventual owner of the tablespace need not be */ if (stmt->owner) ownerId = get_roleid_checked(stmt->owner); else ownerId = GetUserId(); /* Unix-ify the offered path, and strip any trailing slashes */ location = pstrdup(stmt->location); canonicalize_path(location); /* disallow quotes, else CREATE DATABASE would be at risk */ if (strchr(location, '\'')) ereport(ERROR, (errcode(ERRCODE_INVALID_NAME), errmsg("tablespace location may not contain single quotes"))); /* * Allowing relative paths seems risky * * this also helps us ensure that location is not empty or whitespace */ if (!is_absolute_path(location)) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location must be an absolute path"))); /* * Check that location isn't too long. Remember that we're going to append * '/<dboid>/<relid>.<nnn>' (XXX but do we ever form the whole path * explicitly? This may be overly conservative.) */ if (strlen(location) >= (MAXPGPATH - 1 - 10 - 1 - 10 - 1 - 10)) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("tablespace location \"%s\" is too long", location))); /* * Disallow creation of tablespaces named "pg_xxx"; we reserve this * namespace for system purposes. */ if (!allowSystemTableMods && IsReservedName(stmt->tablespacename)) ereport(ERROR, (errcode(ERRCODE_RESERVED_NAME), errmsg("unacceptable tablespace name \"%s\"", stmt->tablespacename), errdetail("The prefix \"pg_\" is reserved for system tablespaces."))); /* * Check that there is no other tablespace by this name. (The unique * index would catch this anyway, but might as well give a friendlier * message.) */ if (OidIsValid(get_tablespace_oid(stmt->tablespacename))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("tablespace \"%s\" already exists", stmt->tablespacename))); /* * Insert tuple into pg_tablespace. The purpose of doing this first is to * lock the proposed tablename against other would-be creators. The * insertion will roll back if we find problems below. */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); MemSet(nulls, ' ', Natts_pg_tablespace); values[Anum_pg_tablespace_spcname - 1] = DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename)); values[Anum_pg_tablespace_spcowner - 1] = ObjectIdGetDatum(ownerId); values[Anum_pg_tablespace_spclocation - 1] = DirectFunctionCall1(textin, CStringGetDatum(location)); nulls[Anum_pg_tablespace_spcacl - 1] = 'n'; tuple = heap_formtuple(rel->rd_att, values, nulls); tablespaceoid = simple_heap_insert(rel, tuple); CatalogUpdateIndexes(rel, tuple); heap_freetuple(tuple); /* Record dependency on owner */ recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId); /* * Attempt to coerce target directory to safe permissions. If this fails, * it doesn't exist or has the wrong owner. */ if (chmod(location, 0700) != 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not set permissions on directory \"%s\": %m", location))); /* * Check the target directory is empty. */ if (!directory_is_empty(location)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("directory \"%s\" is not empty", location))); /* * Create the PG_VERSION file in the target directory. This has several * purposes: to make sure we can write in the directory, to prevent * someone from creating another tablespace pointing at the same directory * (the emptiness check above will fail), and to label tablespace * directories by PG version. */ set_short_version(location); /* * All seems well, create the symlink */ linkloc = (char *) palloc(10 + 10 + 1); sprintf(linkloc, "pg_tblspc/%u", tablespaceoid); if (symlink(location, linkloc) < 0) ereport(ERROR, (errcode_for_file_access(), errmsg("could not create symbolic link \"%s\": %m", linkloc))); /* Record the filesystem change in XLOG */ { xl_tblspc_create_rec xlrec; XLogRecData rdata[2]; xlrec.ts_id = tablespaceoid; rdata[0].data = (char *) &xlrec; rdata[0].len = offsetof(xl_tblspc_create_rec, ts_path); rdata[0].buffer = InvalidBuffer; rdata[0].next = &(rdata[1]); rdata[1].data = (char *) location; rdata[1].len = strlen(location) + 1; rdata[1].buffer = InvalidBuffer; rdata[1].next = NULL; (void) XLogInsert(RM_TBLSPC_ID, XLOG_TBLSPC_CREATE, rdata); } pfree(linkloc); pfree(location); /* We keep the lock on pg_tablespace until commit */ heap_close(rel, NoLock); #else /* !HAVE_SYMLINK */ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("tablespaces are not supported on this platform"))); #endif /* HAVE_SYMLINK */ }
/* * Primary entry point for VACUUM and ANALYZE commands. * * relid is normally InvalidOid; if it is not, then it provides the relation * OID to be processed, and vacstmt->relation is ignored. (The non-invalid * case is currently only used by autovacuum.) * * do_toast is passed as FALSE by autovacuum, because it processes TOAST * tables separately. * * for_wraparound is used by autovacuum to let us know when it's forcing * a vacuum for wraparound, which should not be auto-cancelled. * * bstrategy is normally given as NULL, but in autovacuum it can be passed * in to use the same buffer strategy object across multiple vacuum() calls. * * isTopLevel should be passed down from ProcessUtility. * * It is the caller's responsibility that vacstmt and bstrategy * (if given) be allocated in a memory context that won't disappear * at transaction commit. */ void vacuum(VacuumStmt *vacstmt, Oid relid, bool do_toast, BufferAccessStrategy bstrategy, bool for_wraparound, bool isTopLevel) { const char *stmttype; volatile bool all_rels, in_outer_xact, use_own_xacts; List *relations; /* sanity checks on options */ Assert(vacstmt->options & (VACOPT_VACUUM | VACOPT_ANALYZE)); Assert((vacstmt->options & VACOPT_VACUUM) || !(vacstmt->options & (VACOPT_FULL | VACOPT_FREEZE))); Assert((vacstmt->options & VACOPT_ANALYZE) || vacstmt->va_cols == NIL); stmttype = (vacstmt->options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside * a transaction, then our commit- and start-transaction-command calls * would not have the intended effect! There are numerous other subtle * dependencies on this, too. * * ANALYZE (without VACUUM) can run either way. */ if (vacstmt->options & VACOPT_VACUUM) { PreventTransactionChain(isTopLevel, stmttype); in_outer_xact = false; } else in_outer_xact = IsInTransactionChain(isTopLevel); /* * Send info about dead objects to the statistics collector, unless we are * in autovacuum --- autovacuum.c does this for itself. */ if ((vacstmt->options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) pgstat_vacuum_stat(); /* * Create special memory context for cross-transaction storage. * * Since it is a child of PortalContext, it will go away eventually even * if we suffer an error; there's no need for special abort cleanup logic. */ vac_context = AllocSetContextCreate(PortalContext, "Vacuum", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); /* * If caller didn't give us a buffer strategy object, make one in the * cross-transaction memory context. */ if (bstrategy == NULL) { MemoryContext old_context = MemoryContextSwitchTo(vac_context); bstrategy = GetAccessStrategy(BAS_VACUUM); MemoryContextSwitchTo(old_context); } vac_strategy = bstrategy; /* Remember whether we are processing everything in the DB */ all_rels = (!OidIsValid(relid) && vacstmt->relation == NULL); /* * Build list of relations to process, unless caller gave us one. (If we * build one, we put it in vac_context for safekeeping.) */ relations = get_rel_oids(relid, vacstmt->relation); /* * Decide whether we need to start/commit our own transactions. * * For VACUUM (with or without ANALYZE): always do so, so that we can * release locks as soon as possible. (We could possibly use the outer * transaction for a one-table VACUUM, but handling TOAST tables would be * problematic.) * * For ANALYZE (no VACUUM): if inside a transaction block, we cannot * start/commit our own transactions. Also, there's no need to do so if * only processing one relation. For multiple relations when not within a * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ if (vacstmt->options & VACOPT_VACUUM) use_own_xacts = true; else { Assert(vacstmt->options & VACOPT_ANALYZE); if (IsAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) use_own_xacts = false; else if (list_length(relations) > 1) use_own_xacts = true; else use_own_xacts = false; } /* * vacuum_rel expects to be entered with no transaction active; it will * start and commit its own transaction. But we are called by an SQL * command, and so we are executing inside a transaction already. We * commit the transaction started in PostgresMain() here, and start * another one before exiting to match the commit waiting for us back in * PostgresMain(). */ if (use_own_xacts) { /* ActiveSnapshot is not set by autovacuum */ if (ActiveSnapshotSet()) PopActiveSnapshot(); /* matches the StartTransaction in PostgresMain() */ CommitTransactionCommand(); } /* Turn vacuum cost accounting on or off */ PG_TRY(); { ListCell *cur; VacuumCostActive = (VacuumCostDelay > 0); VacuumCostBalance = 0; /* * Loop to process each selected relation. */ foreach(cur, relations) { Oid relid = lfirst_oid(cur); bool scanned_all = false; if (vacstmt->options & VACOPT_VACUUM) vacuum_rel(relid, vacstmt, do_toast, for_wraparound, &scanned_all); if (vacstmt->options & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, * we can use the outer transaction. */ if (use_own_xacts) { StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); } analyze_rel(relid, vacstmt, vac_strategy, !scanned_all); if (use_own_xacts) { PopActiveSnapshot(); CommitTransactionCommand(); } } } }
/* * Primary entry point for VACUUM and ANALYZE commands. * * options is a bitmask of VacuumOption flags, indicating what to do. * * relid, if not InvalidOid, indicate the relation to process; otherwise, * the RangeVar is used. (The latter must always be passed, because it's * used for error messages.) * * params contains a set of parameters that can be used to customize the * behavior. * * va_cols is a list of columns to analyze, or NIL to process them all. * * bstrategy is normally given as NULL, but in autovacuum it can be passed * in to use the same buffer strategy object across multiple vacuum() calls. * * isTopLevel should be passed down from ProcessUtility. * * It is the caller's responsibility that all parameters are allocated in a * memory context that will not disappear at transaction commit. */ void vacuum(int options, RangeVar *relation, Oid relid, VacuumParams *params, List *va_cols, BufferAccessStrategy bstrategy, bool isTopLevel) { const char *stmttype; volatile bool in_outer_xact, use_own_xacts; List *relations; static bool in_vacuum = false; Assert(params != NULL); stmttype = (options & VACOPT_VACUUM) ? "VACUUM" : "ANALYZE"; /* * We cannot run VACUUM inside a user transaction block; if we were inside * a transaction, then our commit- and start-transaction-command calls * would not have the intended effect! There are numerous other subtle * dependencies on this, too. * * ANALYZE (without VACUUM) can run either way. */ if (options & VACOPT_VACUUM) { PreventTransactionChain(isTopLevel, stmttype); in_outer_xact = false; } else in_outer_xact = IsInTransactionChain(isTopLevel); /* * Due to static variables vac_context, anl_context and vac_strategy, * vacuum() is not reentrant. This matters when VACUUM FULL or ANALYZE * calls a hostile index expression that itself calls ANALYZE. */ if (in_vacuum) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("%s cannot be executed from VACUUM or ANALYZE", stmttype))); /* * Sanity check DISABLE_PAGE_SKIPPING option. */ if ((options & VACOPT_FULL) != 0 && (options & VACOPT_DISABLE_PAGE_SKIPPING) != 0) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("VACUUM option DISABLE_PAGE_SKIPPING cannot be used with FULL"))); /* * Send info about dead objects to the statistics collector, unless we are * in autovacuum --- autovacuum.c does this for itself. */ if ((options & VACOPT_VACUUM) && !IsAutoVacuumWorkerProcess()) pgstat_vacuum_stat(); /* * Create special memory context for cross-transaction storage. * * Since it is a child of PortalContext, it will go away eventually even * if we suffer an error; there's no need for special abort cleanup logic. */ vac_context = AllocSetContextCreate(PortalContext, "Vacuum", ALLOCSET_DEFAULT_SIZES); /* * If caller didn't give us a buffer strategy object, make one in the * cross-transaction memory context. */ if (bstrategy == NULL) { MemoryContext old_context = MemoryContextSwitchTo(vac_context); bstrategy = GetAccessStrategy(BAS_VACUUM); MemoryContextSwitchTo(old_context); } vac_strategy = bstrategy; /* * Build list of relations to process, unless caller gave us one. (If we * build one, we put it in vac_context for safekeeping.) */ relations = get_rel_oids(relid, relation); /* * Decide whether we need to start/commit our own transactions. * * For VACUUM (with or without ANALYZE): always do so, so that we can * release locks as soon as possible. (We could possibly use the outer * transaction for a one-table VACUUM, but handling TOAST tables would be * problematic.) * * For ANALYZE (no VACUUM): if inside a transaction block, we cannot * start/commit our own transactions. Also, there's no need to do so if * only processing one relation. For multiple relations when not within a * transaction block, and also in an autovacuum worker, use own * transactions so we can release locks sooner. */ if (options & VACOPT_VACUUM) use_own_xacts = true; else { Assert(options & VACOPT_ANALYZE); if (IsAutoVacuumWorkerProcess()) use_own_xacts = true; else if (in_outer_xact) use_own_xacts = false; else if (list_length(relations) > 1) use_own_xacts = true; else use_own_xacts = false; } /* * vacuum_rel expects to be entered with no transaction active; it will * start and commit its own transaction. But we are called by an SQL * command, and so we are executing inside a transaction already. We * commit the transaction started in PostgresMain() here, and start * another one before exiting to match the commit waiting for us back in * PostgresMain(). */ if (use_own_xacts) { Assert(!in_outer_xact); /* ActiveSnapshot is not set by autovacuum */ if (ActiveSnapshotSet()) PopActiveSnapshot(); /* matches the StartTransaction in PostgresMain() */ CommitTransactionCommand(); } /* Turn vacuum cost accounting on or off */ PG_TRY(); { ListCell *cur; in_vacuum = true; VacuumCostActive = (VacuumCostDelay > 0); VacuumCostBalance = 0; VacuumPageHit = 0; VacuumPageMiss = 0; VacuumPageDirty = 0; /* * Loop to process each selected relation. */ foreach(cur, relations) { Oid relid = lfirst_oid(cur); if (options & VACOPT_VACUUM) { if (!vacuum_rel(relid, relation, options, params)) continue; } if (options & VACOPT_ANALYZE) { /* * If using separate xacts, start one for analyze. Otherwise, * we can use the outer transaction. */ if (use_own_xacts) { StartTransactionCommand(); /* functions in indexes may want a snapshot set */ PushActiveSnapshot(GetTransactionSnapshot()); } analyze_rel(relid, relation, options, params, va_cols, in_outer_xact, vac_strategy); if (use_own_xacts) { PopActiveSnapshot(); CommitTransactionCommand(); } } } }