/* * Check to see whether the table needs a TOAST table. It does only if * (1) there are any toastable attributes, and (2) the maximum length * of a tuple could exceed TOAST_TUPLE_THRESHOLD. (We don't want to * create a toast table for something like "f1 varchar(20)".) */ static bool needs_toast_table(Relation rel) { int32 data_length = 0; bool maxlength_unknown = false; bool has_toastable_attrs = false; TupleDesc tupdesc; int32 tuple_length; int i; /* * No need to create a TOAST table for partitioned tables. */ if (rel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE) return false; /* * We cannot allow toasting a shared relation after initdb (because * there's no way to mark it toasted in other databases' pg_class). */ if (rel->rd_rel->relisshared && !IsBootstrapProcessingMode()) return false; /* * Ignore attempts to create toast tables on catalog tables after initdb. * Which catalogs get toast tables is explicitly chosen in * catalog/toasting.h. (We could get here via some ALTER TABLE command if * the catalog doesn't have a toast table.) */ if (IsCatalogRelation(rel) && !IsBootstrapProcessingMode()) return false; tupdesc = rel->rd_att; for (i = 0; i < tupdesc->natts; i++) { Form_pg_attribute att = TupleDescAttr(tupdesc, i); if (att->attisdropped) continue; data_length = att_align_nominal(data_length, att->attalign); if (att->attlen > 0) { /* Fixed-length types are never toastable */ data_length += att->attlen; } else { int32 maxlen = type_maximum_size(att->atttypid, att->atttypmod); if (maxlen < 0) maxlength_unknown = true; else data_length += maxlen; if (att->attstorage != 'p') has_toastable_attrs = true; } } if (!has_toastable_attrs) return false; /* nothing to toast? */ if (maxlength_unknown) return true; /* any unlimited-length attrs? */ tuple_length = MAXALIGN(SizeofHeapTupleHeader + BITMAPLEN(tupdesc->natts)) + MAXALIGN(data_length); return (tuple_length > TOAST_TUPLE_THRESHOLD); }
/* * CacheInvalidateHeapTuple * Register the given tuple for invalidation at end of command * (ie, current command is creating or outdating this tuple). * Also, detect whether a relcache invalidation is implied. * * For an insert or delete, tuple is the target tuple and newtuple is NULL. * For an update, we are called just once, with tuple being the old tuple * version and newtuple the new version. This allows avoidance of duplicate * effort during an update. */ void CacheInvalidateHeapTuple(Relation relation, HeapTuple tuple, HeapTuple newtuple) { Oid tupleRelId; Oid databaseId; Oid relationId; /* Do nothing during bootstrap */ if (IsBootstrapProcessingMode()) return; /* * We only need to worry about invalidation for tuples that are in system * relations; user-relation tuples are never in catcaches and can't affect * the relcache either. */ if (!IsSystemRelation(relation)) return; /* * TOAST tuples can likewise be ignored here. Note that TOAST tables are * considered system relations so they are not filtered by the above test. */ if (IsToastRelation(relation)) return; /* * First let the catcache do its thing */ PrepareToInvalidateCacheTuple(relation, tuple, newtuple, RegisterCatcacheInvalidation); /* * Now, is this tuple one of the primary definers of a relcache entry? * * Note we ignore newtuple here; we assume an update cannot move a tuple * from being part of one relcache entry to being part of another. */ tupleRelId = RelationGetRelid(relation); if (tupleRelId == RelationRelationId) { Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple); relationId = HeapTupleGetOid(tuple); if (classtup->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; } else if (tupleRelId == AttributeRelationId) { Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple); relationId = atttup->attrelid; /* * KLUGE ALERT: we always send the relcache event with MyDatabaseId, * even if the rel in question is shared (which we can't easily tell). * This essentially means that only backends in this same database * will react to the relcache flush request. This is in fact * appropriate, since only those backends could see our pg_attribute * change anyway. It looks a bit ugly though. (In practice, shared * relations can't have schema changes after bootstrap, so we should * never come here for a shared rel anyway.) */ databaseId = MyDatabaseId; } else if (tupleRelId == IndexRelationId) { Form_pg_index indextup = (Form_pg_index) GETSTRUCT(tuple); /* * When a pg_index row is updated, we should send out a relcache inval * for the index relation. As above, we don't know the shared status * of the index, but in practice it doesn't matter since indexes of * shared catalogs can't have such updates. */ relationId = indextup->indexrelid; databaseId = MyDatabaseId; } else return; /* * Yes. We need to register a relcache invalidation event. */ RegisterRelcacheInvalidation(databaseId, relationId); }
/* * regoperout - converts operator OID to "opr_name" */ Datum regoperout(PG_FUNCTION_ARGS) { Oid oprid = PG_GETARG_OID(0); char *result; HeapTuple opertup; cqContext *pcqCtx; if (oprid == InvalidOid) { result = pstrdup("0"); PG_RETURN_CSTRING(result); } pcqCtx = caql_beginscan( NULL, cql("SELECT * FROM pg_operator " " WHERE oid = :1 ", ObjectIdGetDatum(oprid))); opertup = caql_getnext(pcqCtx); /* XXX XXX select oprname, oprnamespace from pg_operator */ if (HeapTupleIsValid(opertup)) { Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup); char *oprname = NameStr(operform->oprname); /* * In bootstrap mode, skip the fancy namespace stuff and just return * the oper name. (This path is only needed for debugging output * anyway.) */ if (IsBootstrapProcessingMode()) result = pstrdup(oprname); else { FuncCandidateList clist; /* * Would this oper be found (uniquely!) by regoperin? If not, * qualify it. */ clist = OpernameGetCandidates(list_make1(makeString(oprname)), '\0'); if (clist != NULL && clist->next == NULL && clist->oid == oprid) result = pstrdup(oprname); else { const char *nspname; nspname = get_namespace_name(operform->oprnamespace); nspname = quote_identifier(nspname); result = (char *) palloc(strlen(nspname) + strlen(oprname) + 2); sprintf(result, "%s.%s", nspname, oprname); } } } else { /* * If OID doesn't match any pg_operator entry, return it numerically */ result = (char *) palloc(NAMEDATALEN); snprintf(result, NAMEDATALEN, "%u", oprid); } caql_endscan(pcqCtx); PG_RETURN_CSTRING(result); }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. * * The return value indicates whether the userID is a superuser. (That * can only be tested inside a transaction, so we want to do it during * the startup transaction rather than doing a separate one in postgres.c.) * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool autovacuum = IsAutoVacuumProcess(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* Initialize SessionState entry */ SessionState_Init(); /* Initialize memory protection */ GPMemoryProtect_Init(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. In bootstrap case we may * skip this since StartupXLOG() was run instead. */ if (!bootstrap) InitXLOGAccess(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* TODO: autovacuum launcher should be done here? */ /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Figure out our postgres user id, and see if we are a superuser. * * In standalone mode and in the autovacuum process, we use a fixed id, * otherwise we figure it out from the authenticated user name. */ if (bootstrap || autovacuum) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" CREATEUSER;.", username))); } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"), errSendAlert(true))); /* * If walsender, we don't want to connect to any particular database. Just * finish the backend startup by processing any options from the startup * packet, and we're done. */ if (am_walsender) { Assert(!bootstrap); /* * We don't have replication role, which existed in postgres. */ if (!superuser()) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser role to start walsender"))); /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and path. But note we * won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db name in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); pfree(tuple); } else { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); pfree(tuple); } /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* * Now we have full access to catalog including toast tables, * we can process pg_authid.rolconfig. This ought to come before * processing startup options so that it can override the settings. */ if (!bootstrap) ProcessRoleGUC(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* * Maintenance Mode: allow superuser to connect when * gp_maintenance_conn GUC is set. We cannot check it until * process_startup_options parses the GUC. */ if (gp_maintenance_mode && Gp_role == GP_ROLE_DISPATCH && !(superuser() && gp_maintenance_conn)) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("maintenance mode: connected by superuser only"), errSendAlert(false))); /* * MPP: If we were started in utility mode then we only want to allow * incoming sessions that specify gp_session_role=utility as well. This * lets the bash scripts start the QD in utility mode and connect in but * protect ourselves from normal clients who might be trying to connect to * the system while we startup. */ if ((Gp_role == GP_ROLE_UTILITY) && (Gp_session_role != GP_ROLE_UTILITY)) { ereport(FATAL, (errcode(ERRCODE_CANNOT_CONNECT_NOW), errmsg("System was started in master-only utility mode - only utility mode connections are allowed"))); } /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* * MPP package setup * * Primary function is to establish connctions to the qExecs. * This is SKIPPED when the database is in bootstrap mode or * Is not UnderPostmaster. */ if (!bootstrap && IsUnderPostmaster) { cdb_setup(); on_proc_exit( cdb_cleanup, 0 ); } /* * MPP SharedSnapshot Setup */ if (Gp_role == GP_ROLE_DISPATCH) { addSharedSnapshot("Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_DISPATCHAGENT) { SharedLocalSnapshotSlot = NULL; } else if (Gp_segment == -1 && Gp_role == GP_ROLE_EXECUTE && !Gp_is_writer) { /* * Entry db singleton QE is a user of the shared snapshot -- not a creator. * The lookup will occur once the distributed snapshot has been received. */ lookupSharedSnapshot("Entry DB Singleton", "Query Dispatcher", gp_session_id); } else if (Gp_role == GP_ROLE_EXECUTE) { if (Gp_is_writer) { addSharedSnapshot("Writer qExec", gp_session_id); } else { /* * NOTE: This assumes that the Slot has already been * allocated by the writer. Need to make sure we * always allocate the writer qExec first. */ lookupSharedSnapshot("Reader qExec", "Writer qExec", gp_session_id); } } /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); return; }
/* ---------------------------------------------------------------- * TypeShellMake * * This procedure inserts a "shell" tuple into the pg_type relation. * The type tuple inserted has valid but dummy values, and its * "typisdefined" field is false indicating it's not really defined. * * This is used so that a tuple exists in the catalogs. The I/O * functions for the type will link to this tuple. When the full * CREATE TYPE command is issued, the bogus values will be replaced * with correct ones, and "typisdefined" will be set to true. * ---------------------------------------------------------------- */ Oid TypeShellMake(const char *typeName, Oid typeNamespace, Oid ownerId) { Relation pg_type_desc; TupleDesc tupDesc; int i; HeapTuple tup; Datum values[Natts_pg_type]; bool nulls[Natts_pg_type]; Oid typoid; NameData name; Assert(PointerIsValid(typeName)); /* * open pg_type */ pg_type_desc = heap_open(TypeRelationId, RowExclusiveLock); tupDesc = pg_type_desc->rd_att; /* * initialize our *nulls and *values arrays */ for (i = 0; i < Natts_pg_type; ++i) { nulls[i] = false; values[i] = (Datum) NULL; /* redundant, but safe */ } /* * initialize *values with the type name and dummy values * * The representational details are the same as int4 ... it doesn't really * matter what they are so long as they are consistent. Also note that we * give it typtype = TYPTYPE_PSEUDO as extra insurance that it won't be * mistaken for a usable type. */ namestrcpy(&name, typeName); values[Anum_pg_type_typname - 1] = NameGetDatum(&name); values[Anum_pg_type_typnamespace - 1] = ObjectIdGetDatum(typeNamespace); values[Anum_pg_type_typowner - 1] = ObjectIdGetDatum(ownerId); values[Anum_pg_type_typlen - 1] = Int16GetDatum(sizeof(int32)); values[Anum_pg_type_typbyval - 1] = BoolGetDatum(true); values[Anum_pg_type_typtype - 1] = CharGetDatum(TYPTYPE_PSEUDO); values[Anum_pg_type_typcategory - 1] = CharGetDatum(TYPCATEGORY_PSEUDOTYPE); values[Anum_pg_type_typispreferred - 1] = BoolGetDatum(false); values[Anum_pg_type_typisdefined - 1] = BoolGetDatum(false); values[Anum_pg_type_typdelim - 1] = CharGetDatum(DEFAULT_TYPDELIM); values[Anum_pg_type_typrelid - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typelem - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typarray - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typinput - 1] = ObjectIdGetDatum(F_SHELL_IN); values[Anum_pg_type_typoutput - 1] = ObjectIdGetDatum(F_SHELL_OUT); values[Anum_pg_type_typreceive - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typsend - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typmodin - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typmodout - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typanalyze - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typalign - 1] = CharGetDatum('i'); values[Anum_pg_type_typstorage - 1] = CharGetDatum('p'); values[Anum_pg_type_typnotnull - 1] = BoolGetDatum(false); values[Anum_pg_type_typbasetype - 1] = ObjectIdGetDatum(InvalidOid); values[Anum_pg_type_typtypmod - 1] = Int32GetDatum(-1); values[Anum_pg_type_typndims - 1] = Int32GetDatum(0); values[Anum_pg_type_typcollation - 1] = ObjectIdGetDatum(InvalidOid); nulls[Anum_pg_type_typdefaultbin - 1] = true; nulls[Anum_pg_type_typdefault - 1] = true; nulls[Anum_pg_type_typacl - 1] = true; /* * create a new type tuple */ tup = heap_form_tuple(tupDesc, values, nulls); /* Use binary-upgrade override for pg_type.oid, if supplied. */ if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_pg_type_oid)) { HeapTupleSetOid(tup, binary_upgrade_next_pg_type_oid); binary_upgrade_next_pg_type_oid = InvalidOid; } /* * insert the tuple in the relation and get the tuple's oid. */ typoid = simple_heap_insert(pg_type_desc, tup); CatalogUpdateIndexes(pg_type_desc, tup); /* * Create dependencies. We can/must skip this in bootstrap mode. */ if (!IsBootstrapProcessingMode()) GenerateTypeDependencies(typeNamespace, typoid, InvalidOid, 0, ownerId, F_SHELL_IN, F_SHELL_OUT, InvalidOid, InvalidOid, InvalidOid, InvalidOid, InvalidOid, InvalidOid, false, InvalidOid, InvalidOid, NULL, false); /* Post creation hook for new shell type */ InvokeObjectAccessHook(OAT_POST_CREATE, TypeRelationId, typoid, 0, NULL); /* * clean up and return the type-oid */ heap_freetuple(tup); heap_close(pg_type_desc, RowExclusiveLock); return typoid; }
/* * Create append-only auxiliary relations for target relation rel. * Returns true if they are newly created. If pg_appendonly has already * known those tables, don't create them and returns false. */ bool CreateAOAuxiliaryTable( Relation rel, const char *auxiliaryNamePrefix, char relkind, TupleDesc tupledesc, IndexInfo *indexInfo, Oid *classObjectId, int16 *coloptions) { char aoauxiliary_relname[NAMEDATALEN]; char aoauxiliary_idxname[NAMEDATALEN]; bool shared_relation; Oid relOid, aoauxiliary_relid = InvalidOid; Oid aoauxiliary_idxid = InvalidOid; ObjectAddress baseobject; ObjectAddress aoauxiliaryobject; Assert(RelationIsValid(rel)); Assert(RelationIsAoRows(rel) || RelationIsAoCols(rel)); Assert(auxiliaryNamePrefix); Assert(tupledesc); Assert(classObjectId); if (relkind != RELKIND_AOSEGMENTS) Assert(indexInfo); shared_relation = rel->rd_rel->relisshared; /* * We cannot allow creating an auxiliary table for a shared relation * after initdb (because there's no way to let other databases know * this visibility map. */ if (shared_relation && !IsBootstrapProcessingMode()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("shared tables cannot have append-only auxiliary relations after initdb"))); relOid = RelationGetRelid(rel); switch(relkind) { case RELKIND_AOVISIMAP: GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL, NULL, NULL, &aoauxiliary_relid, &aoauxiliary_idxid); break; case RELKIND_AOBLOCKDIR: GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, NULL, &aoauxiliary_relid, &aoauxiliary_idxid, NULL, NULL); break; case RELKIND_AOSEGMENTS: GetAppendOnlyEntryAuxOids(relOid, SnapshotNow, &aoauxiliary_relid, NULL, NULL, NULL, NULL); break; default: elog(ERROR, "unsupported auxiliary relkind '%c'", relkind); } /* * Does it have the auxiliary relation? */ if (OidIsValid(aoauxiliary_relid)) { return false; } snprintf(aoauxiliary_relname, sizeof(aoauxiliary_relname), "%s_%u", auxiliaryNamePrefix, relOid); snprintf(aoauxiliary_idxname, sizeof(aoauxiliary_idxname), "%s_%u_index", auxiliaryNamePrefix, relOid); /* * We place auxiliary relation in the pg_aoseg namespace * even if its master relation is a temp table. There cannot be * any naming collision, and the auxiliary relation will be * destroyed when its master is, so there is no need to handle * the aovisimap relation as temp. */ aoauxiliary_relid = heap_create_with_catalog(aoauxiliary_relname, PG_AOSEGMENT_NAMESPACE, rel->rd_rel->reltablespace, InvalidOid, rel->rd_rel->relowner, tupledesc, /* relam */ InvalidOid, relkind, RELSTORAGE_HEAP, shared_relation, true, /* bufferPoolBulkLoad */ false, 0, ONCOMMIT_NOOP, NULL, /* GP Policy */ (Datum) 0, true, /* valid_opts */ false, /* persistentTid */ NULL, /* persistentSerialNum */ NULL); /* Make this table visible, else index creation will fail */ CommandCounterIncrement(); /* Create an index on AO auxiliary tables (like visimap) except for pg_aoseg table */ if (relkind != RELKIND_AOSEGMENTS) { aoauxiliary_idxid = index_create(aoauxiliary_relid, aoauxiliary_idxname, InvalidOid, indexInfo, BTREE_AM_OID, rel->rd_rel->reltablespace, classObjectId, coloptions, (Datum) 0, true, false, true, false, false, NULL); /* Unlock target table -- no one can see it */ UnlockRelationOid(aoauxiliary_relid, ShareLock); /* Unlock the index -- no one can see it anyway */ UnlockRelationOid(aoauxiliary_idxid, AccessExclusiveLock); } /* * Store the auxiliary table's OID in the parent relation's pg_appendonly row. * TODO (How to generalize this?) */ switch (relkind) { case RELKIND_AOVISIMAP: UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, InvalidOid, InvalidOid, aoauxiliary_relid, aoauxiliary_idxid); break; case RELKIND_AOBLOCKDIR: UpdateAppendOnlyEntryAuxOids(relOid, InvalidOid, aoauxiliary_relid, aoauxiliary_idxid, InvalidOid, InvalidOid); break; case RELKIND_AOSEGMENTS: UpdateAppendOnlyEntryAuxOids(relOid, aoauxiliary_relid, InvalidOid, InvalidOid, InvalidOid, InvalidOid); break; default: elog(ERROR, "unsupported auxiliary relkind '%c'", relkind); } /* * Register dependency from the auxiliary table to the master, so that the * aoseg table will be deleted if the master is. */ baseobject.classId = RelationRelationId; baseobject.objectId = relOid; baseobject.objectSubId = 0; aoauxiliaryobject.classId = RelationRelationId; aoauxiliaryobject.objectId = aoauxiliary_relid; aoauxiliaryobject.objectSubId = 0; recordDependencyOn(&aoauxiliaryobject, &baseobject, DEPENDENCY_INTERNAL); /* * Make changes visible */ CommandCounterIncrement(); return true; }
bool CheckNewRelFileNodeIsOk(Oid newOid, Oid reltablespace, bool relisshared, Relation pg_class) { RelFileNode rnode; char *rpath; int fd; bool collides; if (pg_class) { Oid oidIndex; Relation indexrel; IndexScanDesc scan; ScanKeyData key; Assert(!IsBootstrapProcessingMode()); Assert(pg_class->rd_rel->relhasoids); /* The relcache will cache the identity of the OID index for us */ oidIndex = RelationGetOidIndex(pg_class); Assert(OidIsValid(oidIndex)); indexrel = index_open(oidIndex, AccessShareLock); ScanKeyInit(&key, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(newOid)); scan = index_beginscan(pg_class, indexrel, SnapshotDirty, 1, &key); collides = HeapTupleIsValid(index_getnext(scan, ForwardScanDirection)); index_endscan(scan); index_close(indexrel, AccessShareLock); if (collides) elog(ERROR, "relfilenode %d already in use in \"pg_class\"", newOid); } /* This should match RelationInitPhysicalAddr */ rnode.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace; rnode.dbNode = relisshared ? InvalidOid : MyDatabaseId; rnode.relNode = newOid; /* Check for existing file of same name */ rpath = relpath(rnode); fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0); if (fd >= 0) { /* definite collision */ gp_retry_close(fd); collides = true; } else collides = false; pfree(rpath); if (collides && !relisshared) elog(ERROR, "oid %d already in use", newOid); while(GetNewObjectId() < newOid); return !collides; }
/* ---------------------------------------------------------------- * ExecUpdate * * note: we can't run UPDATE queries with transactions * off because UPDATEs are actually INSERTs and our * scan will mistakenly loop forever, updating the tuple * it just inserted.. This should be fixed but until it * is, we don't want to get stuck in an infinite loop * which corrupts your database.. * ---------------------------------------------------------------- */ void ExecUpdate(TupleTableSlot *slot, ItemPointer tupleid, TupleTableSlot *planSlot, DestReceiver *dest, EState *estate) { HeapTuple tuple; ResultRelInfo *resultRelInfo; Relation resultRelationDesc; HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; /* * abort the operation if not running transactions */ if (IsBootstrapProcessingMode()) elog(ERROR, "cannot UPDATE during bootstrap"); /* * get the heap tuple out of the tuple table slot, making sure we have a * writable copy */ tuple = ExecFetchSlotHeapTuple(slot); /* * get information on the (current) result relation */ resultRelInfo = estate->es_result_relation_info; resultRelationDesc = resultRelInfo->ri_RelationDesc; /* see if this update would move the tuple to a different partition */ if (estate->es_result_partitions) { AttrNumber max_attr; Datum *values; bool *nulls; Oid targetid; Assert(estate->es_partition_state != NULL && estate->es_partition_state->accessMethods != NULL); if (!estate->es_partition_state->accessMethods->part_cxt) estate->es_partition_state->accessMethods->part_cxt = GetPerTupleExprContext(estate)->ecxt_per_tuple_memory; Assert(PointerIsValid(estate->es_result_partitions)); max_attr = estate->es_partition_state->max_partition_attr; slot_getsomeattrs(slot, max_attr); values = slot_get_values(slot); nulls = slot_get_isnull(slot); targetid = selectPartition(estate->es_result_partitions, values, nulls, slot->tts_tupleDescriptor, estate->es_partition_state->accessMethods); if (!OidIsValid(targetid)) ereport(ERROR, (errcode(ERRCODE_NO_PARTITION_FOR_PARTITIONING_KEY), errmsg("no partition for partitioning key"))); if (RelationGetRelid(resultRelationDesc) != targetid) { ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("moving tuple from partition \"%s\" to " "partition \"%s\" not supported", get_rel_name(RelationGetRelid(resultRelationDesc)), get_rel_name(targetid)), errOmitLocation(true))); } } /* BEFORE ROW UPDATE Triggers */ if (resultRelInfo->ri_TrigDesc && resultRelInfo->ri_TrigDesc->n_before_row[TRIGGER_EVENT_UPDATE] > 0) { HeapTuple newtuple; newtuple = ExecBRUpdateTriggers(estate, resultRelInfo, tupleid, tuple, estate->es_snapshot->curcid); if (newtuple == NULL) /* "do nothing" */ return; if (newtuple != tuple) /* modified by Trigger(s) */ { /* * Put the modified tuple into a slot for convenience of routines * below. We assume the tuple was allocated in per-tuple memory * context, and therefore will go away by itself. The tuple table * slot should not try to clear it. */ TupleTableSlot *newslot = estate->es_trig_tuple_slot; if (newslot->tts_tupleDescriptor != slot->tts_tupleDescriptor) ExecSetSlotDescriptor(newslot, slot->tts_tupleDescriptor); ExecStoreGenericTuple(newtuple, newslot, false); newslot->tts_tableOid = slot->tts_tableOid; /* for constraints */ slot = newslot; tuple = newtuple; } } /* * Check the constraints of the tuple * * If we generate a new candidate tuple after EvalPlanQual testing, we * must loop back here and recheck constraints. (We don't need to redo * triggers, however. If there are any BEFORE triggers then trigger.c * will have done heap_lock_tuple to lock the correct tuple, so there's no * need to do them again.) */ lreplace:; if (resultRelationDesc->rd_att->constr) ExecConstraints(resultRelInfo, slot, estate); if (!GpPersistent_IsPersistentRelation(resultRelationDesc->rd_id)) { /* * Normal UPDATE path. */ /* * replace the heap tuple * * Note: if es_crosscheck_snapshot isn't InvalidSnapshot, we check that * the row to be updated is visible to that snapshot, and throw a can't- * serialize error if not. This is a special-case behavior needed for * referential integrity updates in serializable transactions. */ result = heap_update(resultRelationDesc, tupleid, tuple, &update_ctid, &update_xmax, estate->es_snapshot->curcid, estate->es_crosscheck_snapshot, true /* wait for commit */ ); switch (result) { case HeapTupleSelfUpdated: /* already deleted by self; nothing to do */ return; case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: if (IsXactIsoLevelSerializable) ereport(ERROR, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("could not serialize access due to concurrent update"))); else if (!ItemPointerEquals(tupleid, &update_ctid)) { TupleTableSlot *epqslot; epqslot = EvalPlanQual(estate, resultRelInfo->ri_RangeTableIndex, &update_ctid, update_xmax, estate->es_snapshot->curcid); if (!TupIsNull(epqslot)) { *tupleid = update_ctid; slot = ExecFilterJunk(estate->es_junkFilter, epqslot); tuple = ExecFetchSlotHeapTuple(slot); goto lreplace; } } /* tuple already deleted; nothing to do */ return; default: elog(ERROR, "unrecognized heap_update status: %u", result); return; } } else { HeapTuple persistentTuple; /* * Persistent metadata path. */ persistentTuple = heap_copytuple(tuple); persistentTuple->t_self = *tupleid; frozen_heap_inplace_update(resultRelationDesc, persistentTuple); heap_freetuple(persistentTuple); } IncrReplaced(); (estate->es_processed)++; /* * Note: instead of having to update the old index tuples associated with * the heap tuple, all we do is form and insert new index tuples. This is * because UPDATEs are actually DELETEs and INSERTs, and index tuple * deletion is done later by VACUUM (see notes in ExecDelete). All we do * here is insert new index tuples. -cim 9/27/89 */ /* * insert index entries for tuple * * Note: heap_update returns the tid (location) of the new tuple in the * t_self field. */ if (resultRelInfo->ri_NumIndices > 0) ExecInsertIndexTuples(slot, &(tuple->t_self), estate, false); /* AFTER ROW UPDATE Triggers */ ExecARUpdateTriggers(estate, resultRelInfo, tupleid, tuple); }
/* * create_toast_table --- internal workhorse * * rel is already opened and exclusive-locked * toastOid and toastIndexOid are normally InvalidOid, but during * bootstrap they can be nonzero to specify hand-assigned OIDs */ static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, bool is_part_child) { Oid relOid = RelationGetRelid(rel); HeapTuple reltup; TupleDesc tupdesc; bool shared_relation; Relation class_rel; Oid toast_relid; Oid toast_idxid; Oid namespaceid; char toast_relname[NAMEDATALEN]; char toast_idxname[NAMEDATALEN]; IndexInfo *indexInfo; Oid classObjectId[2]; int16 coloptions[2]; ObjectAddress baseobject, toastobject; /* * Is it already toasted? */ if (rel->rd_rel->reltoastrelid != InvalidOid) return false; /* * Check to see whether the table actually needs a TOAST table. */ if (!RelationNeedsToastTable(rel)) return false; /* * Toast table is shared if and only if its parent is. * * We cannot allow toasting a shared relation after initdb (because * there's no way to mark it toasted in other databases' pg_class). */ shared_relation = rel->rd_rel->relisshared; if (shared_relation && !IsBootstrapProcessingMode()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("shared tables cannot be toasted after initdb"))); /* * Create the toast table and its index */ snprintf(toast_relname, sizeof(toast_relname), "pg_toast_%u", relOid); snprintf(toast_idxname, sizeof(toast_idxname), "pg_toast_%u_index", relOid); /* this is pretty painful... need a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(3, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "chunk_id", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "chunk_seq", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "chunk_data", BYTEAOID, -1, 0); /* * Ensure that the toast table doesn't itself get toasted, or we'll be * toast :-(. This is essential for chunk_data because type bytea is * toastable; hit the other two just to be sure. */ tupdesc->attrs[0]->attstorage = 'p'; tupdesc->attrs[1]->attstorage = 'p'; tupdesc->attrs[2]->attstorage = 'p'; /* * Toast tables for regular relations go in pg_toast; those for temp * relations go into the per-backend temp-toast-table namespace. */ if (rel->rd_istemp) namespaceid = GetTempToastNamespace(); else namespaceid = PG_TOAST_NAMESPACE; /* * XXX would it make sense to apply the master's reloptions to the toast * table? Or maybe some toast-specific reloptions? */ toast_relid = heap_create_with_catalog(toast_relname, namespaceid, rel->rd_rel->reltablespace, toastOid, rel->rd_rel->relowner, tupdesc, /* relam */ InvalidOid, RELKIND_TOASTVALUE, RELSTORAGE_HEAP, shared_relation, true, /* bufferPoolBulkLoad */ false, 0, ONCOMMIT_NOOP, NULL, /* CDB POLICY */ (Datum) 0, true, /* valid_opts */ false, /* persistentTid */ NULL, /* persistentSerialNum */ NULL); /* make the toast relation visible, else index creation will fail */ CommandCounterIncrement(); /* * Create unique index on chunk_id, chunk_seq. * * NOTE: the normal TOAST access routines could actually function with a * single-column index on chunk_id only. However, the slice access * routines use both columns for faster access to an individual chunk. In * addition, we want it to be unique as a check against the possibility of * duplicate TOAST chunk OIDs. The index might also be a little more * efficient this way, since btree isn't all that happy with large numbers * of equal keys. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 2; indexInfo->ii_KeyAttrNumbers[0] = 1; indexInfo->ii_KeyAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NIL; indexInfo->ii_Unique = true; indexInfo->ii_ReadyForInserts = true; indexInfo->ii_Concurrent = false; indexInfo->ii_BrokenHotChain = false; classObjectId[0] = OID_BTREE_OPS_OID; classObjectId[1] = INT4_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; toast_idxid = index_create(toast_relid, toast_idxname, toastIndexOid, indexInfo, BTREE_AM_OID, rel->rd_rel->reltablespace, classObjectId, coloptions, (Datum) 0, true, false, true, false, false, NULL); /* * If this is a partitioned child, we can unlock since the master is * already locked. */ if (is_part_child) { UnlockRelationOid(toast_relid, ShareLock); UnlockRelationOid(toast_idxid, AccessExclusiveLock); } /* * Store the toast table's OID in the parent relation's pg_class row */ class_rel = heap_open(RelationRelationId, RowExclusiveLock); reltup = SearchSysCacheCopy(RELOID, ObjectIdGetDatum(relOid), 0, 0, 0); if (!HeapTupleIsValid(reltup)) elog(ERROR, "cache lookup failed for relation %u", relOid); ((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid; if (!IsBootstrapProcessingMode()) { /* normal case, use a transactional update */ simple_heap_update(class_rel, &reltup->t_self, reltup); /* Keep catalog indexes current */ CatalogUpdateIndexes(class_rel, reltup); } else { /* While bootstrapping, we cannot UPDATE, so overwrite in-place */ heap_inplace_update(class_rel, reltup); } heap_freetuple(reltup); heap_close(class_rel, RowExclusiveLock); /* * Register dependency from the toast table to the master, so that the * toast table will be deleted if the master is. Skip this in bootstrap * mode. */ if (!IsBootstrapProcessingMode()) { baseobject.classId = RelationRelationId; baseobject.objectId = relOid; baseobject.objectSubId = 0; toastobject.classId = RelationRelationId; toastobject.objectId = toast_relid; toastobject.objectSubId = 0; recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); } /* * Make changes visible */ CommandCounterIncrement(); return true; }
/* * PrepareForTupleInvalidation * Detect whether invalidation of this tuple implies invalidation * of catalog/relation cache entries; if so, register inval events. */ static void PrepareForTupleInvalidation(Relation relation, HeapTuple tuple) { Oid tupleRelId; Oid databaseId; Oid relationId; /* Do nothing during bootstrap */ if (IsBootstrapProcessingMode()) return; /* * We only need to worry about invalidation for tuples that are in system * relations; user-relation tuples are never in catcaches and can't affect * the relcache either. */ if (!IsSystemRelation(relation)) return; /* * TOAST tuples can likewise be ignored here. Note that TOAST tables are * considered system relations so they are not filtered by the above test. */ if (IsToastRelation(relation)) return; /* * First let the catcache do its thing */ PrepareToInvalidateCacheTuple(relation, tuple, RegisterCatcacheInvalidation); /* * Now, is this tuple one of the primary definers of a relcache entry? */ tupleRelId = RelationGetRelid(relation); if (tupleRelId == RelationRelationId) { Form_pg_class classtup = (Form_pg_class) GETSTRUCT(tuple); RelFileNode rnode; relationId = HeapTupleGetOid(tuple); if (classtup->relisshared) databaseId = InvalidOid; else databaseId = MyDatabaseId; /* * We need to send out an smgr inval as well as a relcache inval. This * is needed because other backends might possibly possess smgr cache * but not relcache entries for the target relation. * * Note: during a pg_class row update that assigns a new relfilenode * or reltablespace value, we will be called on both the old and new * tuples, and thus will broadcast invalidation messages showing both * the old and new RelFileNode values. This ensures that other * backends will close smgr references to the old file. * * XXX possible future cleanup: it might be better to trigger smgr * flushes explicitly, rather than indirectly from pg_class updates. */ if (classtup->reltablespace) rnode.spcNode = classtup->reltablespace; else rnode.spcNode = MyDatabaseTableSpace; rnode.dbNode = databaseId; rnode.relNode = classtup->relfilenode; RegisterSmgrInvalidation(rnode); } else if (tupleRelId == AttributeRelationId) { Form_pg_attribute atttup = (Form_pg_attribute) GETSTRUCT(tuple); relationId = atttup->attrelid; /* * KLUGE ALERT: we always send the relcache event with MyDatabaseId, * even if the rel in question is shared (which we can't easily tell). * This essentially means that only backends in this same database * will react to the relcache flush request. This is in fact * appropriate, since only those backends could see our pg_attribute * change anyway. It looks a bit ugly though. */ databaseId = MyDatabaseId; } else return; /* * Yes. We need to register a relcache invalidation event. */ RegisterRelcacheInvalidation(databaseId, relationId); }
/* * Record multiple dependencies (of the same kind) for a single dependent * object. This has a little less overhead than recording each separately. */ void recordMultipleDependencies(const ObjectAddress *depender, const ObjectAddress *referenced, int nreferenced, DependencyType behavior) { Relation dependDesc; CatalogIndexState indstate; HeapTuple tup; int i; bool nulls[Natts_pg_depend]; Datum values[Natts_pg_depend]; if (nreferenced <= 0) return; /* nothing to do */ /* * During bootstrap, do nothing since pg_depend may not exist yet. initdb * will fill in appropriate pg_depend entries after bootstrap. */ if (IsBootstrapProcessingMode()) return; dependDesc = heap_open(DependRelationId, RowExclusiveLock); /* Don't open indexes unless we need to make an update */ indstate = NULL; memset(nulls, false, sizeof(nulls)); for (i = 0; i < nreferenced; i++, referenced++) { /* * If the referenced object is pinned by the system, there's no real * need to record dependencies on it. This saves lots of space in * pg_depend, so it's worth the time taken to check. */ if (!isObjectPinned(referenced, dependDesc)) { /* * Record the Dependency. Note we don't bother to check for * duplicate dependencies; there's no harm in them. */ values[Anum_pg_depend_classid - 1] = ObjectIdGetDatum(depender->classId); values[Anum_pg_depend_objid - 1] = ObjectIdGetDatum(depender->objectId); values[Anum_pg_depend_objsubid - 1] = Int32GetDatum(depender->objectSubId); values[Anum_pg_depend_refclassid - 1] = ObjectIdGetDatum(referenced->classId); values[Anum_pg_depend_refobjid - 1] = ObjectIdGetDatum(referenced->objectId); values[Anum_pg_depend_refobjsubid - 1] = Int32GetDatum(referenced->objectSubId); values[Anum_pg_depend_deptype - 1] = CharGetDatum((char) behavior); tup = heap_form_tuple(dependDesc->rd_att, values, nulls); simple_heap_insert(dependDesc, tup); /* keep indexes current */ if (indstate == NULL) indstate = CatalogOpenIndexes(dependDesc); CatalogIndexInsert(indstate, tup); heap_freetuple(tup); } } if (indstate != NULL) CatalogCloseIndexes(indstate); heap_close(dependDesc, RowExclusiveLock); }
/* * systable_beginscan --- set up for heap-or-index scan * * rel: catalog to scan, already opened and suitably locked * indexId: OID of index to conditionally use * indexOK: if false, forces a heap scan (see notes below) * snapshot: time qual to use (usually should be SnapshotNow) * nkeys, key: scan keys * * The attribute numbers in the scan key should be set for the heap case. * If we choose to index, we reset them to 1..n to reference the index * columns. Note this means there must be one scankey qualification per * index column! This is checked by the Asserts in the normal, index-using * case, but won't be checked if the heapscan path is taken. * * The routine checks the normal cases for whether an indexscan is safe, * but caller can make additional checks and pass indexOK=false if needed. * In standard case indexOK can simply be constant TRUE. */ SysScanDesc systable_beginscan(Relation heapRelation, Oid indexId, bool indexOK, Snapshot snapshot, int nkeys, ScanKey key) { SysScanDesc sysscan; Relation irel; if (indexOK && !IgnoreSystemIndexes && !ReindexIsProcessingIndex(indexId)) irel = index_open(indexId, AccessShareLock); else irel = NULL; sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); sysscan->heap_rel = heapRelation; sysscan->irel = irel; if (irel) { int i; if (!IsBootstrapProcessingMode()) { Insist(RelationGetRelid(heapRelation) == irel->rd_index->indrelid); } /* Change attribute numbers to be index column numbers. */ for (i = 0; i < nkeys; i++) { int j; for (j = 0; j < irel->rd_index->indnatts; j++) { if (key[i].sk_attno == irel->rd_index->indkey.values[j]) { key[i].sk_attno = j + 1; break; } } if (j == irel->rd_index->indnatts) elog(ERROR, "column is not in index"); } sysscan->iscan = index_beginscan(heapRelation, irel, snapshot, nkeys, key); sysscan->scan = NULL; } else { /* * We disallow synchronized scans when forced to use a heapscan on a * catalog. In most cases the desired rows are near the front, so * that the unpredictable start point of a syncscan is a serious * disadvantage; and there are no compensating advantages, because * it's unlikely that such scans will occur in parallel. */ sysscan->scan = heap_beginscan_strat(heapRelation, snapshot, nkeys, key, true, false); sysscan->iscan = NULL; } return sysscan; }
void InitPostgres(char *name) /* database name */ { bool bootstrap; /* true if BootstrapProcessing */ /* ---------------- * see if we're running in BootstrapProcessing mode * ---------------- */ bootstrap = IsBootstrapProcessingMode(); /* ---------------- * turn on the exception handler. Note: we cannot use elog, Assert, * AssertState, etc. until after exception handling is on. * ---------------- */ EnableExceptionHandling(true); /* ---------------- * A stupid check to make sure we don't call this more than once. * But things like ReinitPostgres() get around this by just diddling * the PostgresIsInitialized flag. * ---------------- */ AssertState(!PostgresIsInitialized); /* ---------------- * Memory system initialization. * (we may call palloc after EnableMemoryContext()) * * Note EnableMemoryContext() must happen before EnablePortalManager(). * ---------------- */ EnableMemoryContext(true); /* initializes the "top context" */ EnablePortalManager(true); /* memory for portal/transaction stuff */ /* ---------------- * initialize the backend local portal stack used by * internal PQ function calls. see src/lib/libpq/be-dumpdata.c * This is different from the "portal manager" so this goes here. * -cim 2/12/91 * ---------------- */ be_portalinit(); /* ---------------- * attach to shared memory and semaphores, and initialize our * input/output/debugging file descriptors. * ---------------- */ InitCommunication(); InitStdio(); /* * initialize the local buffer manager */ InitLocalBuffer(); if (!TransactionFlushEnabled()) on_exitpg(FlushBufferPool, (caddr_t) NULL); /* ---------------- * check for valid "meta gunk" (??? -cim 10/5/90) and change to * database directory. * * Note: DatabaseName, MyDatabaseName, and DatabasePath are all * initialized with DatabaseMetaGunkIsConsistent(), strncpy() and * DoChdirAndInitDatabase() below! XXX clean this crap up! * -cim 10/5/90 * ---------------- */ { char myPath[MAXPGPATH] = "."; /* DatabasePath points here! */ /* ---------------- * DatabaseMetaGunkIsConsistent fills in myPath, but what about * when bootstrap or Noversion is true?? -cim 10/5/90 * ---------------- */ if (! bootstrap && ! DatabaseMetaGunkIsConsistent(name, myPath) && ! Noversion) { elog(NOTICE, "InitPostgres: could not locate valid PG_VERSION\n"); elog(NOTICE, "files for %s and %s.", DataDir, name); elog(FATAL, "Have you run initdb/createdb and set PGDATA properly?"); } /* ---------------- * ok, we've figured out myName and myPath, now save these * and chdir to myPath. * ---------------- */ DoChdirAndInitDatabaseNameAndPath(name, myPath); } /* ******************************** * code after this point assumes we are in the proper directory! * ******************************** */ /* ---------------- * initialize the database id used for system caches and lock tables * ---------------- */ InitMyDatabaseId(); smgrinit(); /* ---------------- * initialize the transaction system and the relation descriptor * cache. Note we have to make certain the lock manager is off while * we do this. * ---------------- */ AmiTransactionOverride(IsBootstrapProcessingMode()); LockDisable(true); /* * Part of the initialization processing done here sets a read * lock on pg_log. Since locking is disabled the set doesn't have * intended effect of locking out writers, but this is ok, since * we only lock it to examine AMI transaction status, and this is * never written after initdb is done. -mer 15 June 1992 */ RelationInitialize(); /* pre-allocated reldescs created here */ InitializeTransactionSystem(); /* pg_log,etc init/crash recovery here */ LockDisable(false); /* ---------------- * anyone knows what this does? something having to do with * system catalog cache invalidation in the case of multiple * backends, I think -cim 10/3/90 * Sets up MyBackendId a unique backend identifier. * ---------------- */ InitSharedInvalidationState(); /* ---------------- * Set up a per backend process in shared memory. Must be done after * InitSharedInvalidationState() as it relies on MyBackendId being * initialized already. XXX -mer 11 Aug 1991 * ---------------- */ InitProcess(PostgresIpcKey); if (MyBackendId > MaxBackendId || MyBackendId <= 0) { elog(FATAL, "cinit2: bad backend id %d (%d)", MyBackendTag, MyBackendId); } /* ---------------- * initialize the access methods. * ---------------- */ initam(); /* ---------------- * initialize all the system catalog caches. * ---------------- */ zerocaches(); InitCatalogCache(); /* ---------------- * set ourselves to the proper user id and figure out our postgres * user id. If we ever add security so that we check for valid * postgres users, we might do it here. * ---------------- */ InitUserid(); /* ---------------- * ok, all done, now let's make sure we don't do it again. * ---------------- */ PostgresIsInitialized = true; /* on_exitpg(DestroyLocalRelList, (caddr_t) NULL); */ /* ---------------- * Done with "InitPostgres", now change to NormalProcessing unless * we're in BootstrapProcessing mode. * ---------------- */ if (!bootstrap) SetProcessingMode(NormalProcessing); /* if (testFlag || lockingOff) */ if (lockingOff) LockDisable(true); }
/* -------------------------------- * InitMyDatabaseId() -- Find and record the OID of the database we are * to open. * * The database's oid forms half of the unique key for the system * caches and lock tables. We therefore want it initialized before * we open any relations, since opening relations puts things in the * cache. To get around this problem, this code opens and scans the * pg_database relation by hand. * * This algorithm relies on the fact that first attribute in the * pg_database relation schema is the database name. It also knows * about the internal format of tuples on disk and the length of * the datname attribute. It knows the location of the pg_database * file. * * This code is called from InitDatabase(), after we chdir() to the * database directory but before we open any relations. * -------------------------------- */ void InitMyDatabaseId() { int dbfd; int fileflags; int nbytes; int max, i; HeapTuple tup; Page pg; PageHeader ph; char *dbfname; Form_pg_database tup_db; /* * At bootstrap time, we don't need to check the oid of the database * in use, since we're not using shared memory. This is lucky, since * the database may not be in the tables yet. */ if (IsBootstrapProcessingMode()) { LockDisable(true); return; } dbfname = (char *) palloc(strlen(DataDir) + strlen("pg_database") + 2); sprintf(dbfname, "%s%cpg_database", DataDir, SEP_CHAR); fileflags = O_RDONLY; #ifdef WIN32 fileflags |= _O_BINARY; #endif /* WIN32 */ if ((dbfd = open(dbfname, O_RDONLY, 0666)) < 0) elog(FATAL, "Cannot open %s", dbfname); pfree(dbfname); /* ---------------- * read and examine every page in pg_database * * Raw I/O! Read those tuples the hard way! Yow! * * Why don't we use the access methods or move this code * someplace else? This is really pg_database schema dependent * code. Perhaps it should go in lib/catalog/pg_database? * -cim 10/3/90 * * mao replies 4 apr 91: yeah, maybe this should be moved to * lib/catalog. however, we CANNOT use the access methods since * those use the buffer cache, which uses the relation cache, which * requires that the dbid be set, which is what we're trying to do * here. * ---------------- */ pg = (Page) palloc(BLCKSZ); ph = (PageHeader) pg; while ((nbytes = read(dbfd, pg, BLCKSZ)) == BLCKSZ) { max = PageGetMaxOffsetNumber(pg); /* look at each tuple on the page */ for (i = 0; i <= max; i++) { int offset; /* if it's a freed tuple, ignore it */ if (!(ph->pd_linp[i].lp_flags & LP_USED)) continue; /* get a pointer to the tuple itself */ offset = (int) ph->pd_linp[i].lp_off; tup = (HeapTuple) (((char *) pg) + offset); /* * if the tuple has been deleted (the database was destroyed), * skip this tuple. XXX warning, will robinson: violation of * transaction semantics happens right here. we should check * to be sure that the xact that deleted this tuple actually * committed. only way to do this at init time is to paw over * the log relation by hand, too. let's be optimistic. * * XXX This is an evil type cast. tup->t_xmax is char[5] while * TransactionId is struct * { char data[5] }. It works but * if data is ever moved and no longer the first field this * will be broken!! -mer 11 Nov 1991. */ if (TransactionIdIsValid((TransactionId)tup->t_xmax)) continue; /* * Okay, see if this is the one we want. * XXX 1 july 91: mao and mer discover that tuples now squash * t_bits. Why is this? * * 24 july 92: mer realizes that the t_bits field is only * used in the event of null values. If no * fields are null we reduce the header size * by doing the squash. t_hoff tells you exactly * how big the header actually is. use the PC * means of getting at sys cat attrs. */ tup_db = (Form_pg_database)GETSTRUCT(tup); if (strncmp(GetDatabaseName(), &(tup_db->datname.data[0]), 16) == 0) { MyDatabaseId = tup->t_oid; goto done; } } } done: (void) close(dbfd); pfree(pg); if (!OidIsValid(MyDatabaseId)) elog(FATAL, "Database %s does not exist in %s", GetDatabaseName(), DatabaseRelationName); }
/* * regclassout - converts class OID to "class_name" */ Datum regclassout(PG_FUNCTION_ARGS) { Oid classid = PG_GETARG_OID(0); char *result; HeapTuple classtup; cqContext *pcqCtx; if (classid == InvalidOid) { result = pstrdup("-"); PG_RETURN_CSTRING(result); } pcqCtx = caql_beginscan( NULL, cql("SELECT * FROM pg_class " " WHERE oid = :1 ", ObjectIdGetDatum(classid))); classtup = caql_getnext(pcqCtx); /* XXX XXX select relname, relnamespace from pg_class */ if (HeapTupleIsValid(classtup)) { Form_pg_class classform = (Form_pg_class) GETSTRUCT(classtup); char *classname = NameStr(classform->relname); /* * In bootstrap mode, skip the fancy namespace stuff and just return * the class name. (This path is only needed for debugging output * anyway.) */ if (IsBootstrapProcessingMode()) result = pstrdup(classname); else { char *nspname; /* * Would this class be found by regclassin? If not, qualify it. */ if (RelationIsVisible(classid)) nspname = NULL; else nspname = get_namespace_name(classform->relnamespace); result = quote_qualified_identifier(nspname, classname); } } else { /* If OID doesn't match any pg_class entry, return it numerically */ result = (char *) palloc(NAMEDATALEN); snprintf(result, NAMEDATALEN, "%u", classid); } caql_endscan(pcqCtx); PG_RETURN_CSTRING(result); }
/* * Create a table space * * Only superusers can create a tablespace. This seems a reasonable restriction * since we're determining the system layout and, anyway, we probably have * root if we're doing this kind of activity */ void CreateTableSpace(CreateTableSpaceStmt *stmt) { Relation rel; Relation filespaceRel; Datum values[Natts_pg_tablespace]; bool nulls[Natts_pg_tablespace]; HeapTuple tuple; Oid tablespaceoid; Oid filespaceoid; Oid ownerId; TablespaceDirNode tablespaceDirNode; ItemPointerData persistentTid; int64 persistentSerialNum; cqContext cqc; cqContext *pcqCtx; /* Must be super user */ if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to create tablespace \"%s\"", stmt->tablespacename), errhint("Must be superuser to create a tablespace."))); /* However, the eventual owner of the tablespace need not be */ if (stmt->owner) ownerId = get_roleid_checked(stmt->owner); else ownerId = GetUserId(); /* * Disallow creation of tablespaces named "pg_xxx"; we reserve this * namespace for system purposes. */ if (!allowSystemTableModsDDL && IsReservedName(stmt->tablespacename)) { ereport(ERROR, (errcode(ERRCODE_RESERVED_NAME), errmsg("unacceptable tablespace name \"%s\"", stmt->tablespacename), errdetail("The prefix \"%s\" is reserved for system tablespaces.", GetReservedPrefix(stmt->tablespacename)))); } /* * Check the specified filespace */ filespaceRel = heap_open(FileSpaceRelationId, RowShareLock); filespaceoid = get_filespace_oid(filespaceRel, stmt->filespacename); heap_close(filespaceRel, NoLock); /* hold lock until commit/abort */ if (!OidIsValid(filespaceoid)) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("filespace \"%s\" does not exist", stmt->filespacename))); /* * Filespace pg_system is reserved for system use: * - Used for pg_global and pg_default tablespaces only * * Directory layout is slightly different for the system filespace. * Instead of having subdirectories for individual tablespaces instead * the two system tablespaces have specific locations within it: * pg_global : $PG_SYSTEM/global/relfilenode * pg_default : $PG_SYSTEM/base/dboid/relfilenode * * In other words PG_SYSTEM points to the segments "datadir", or in * postgres vocabulary $PGDATA. * */ if (filespaceoid == SYSTEMFILESPACE_OID && !IsBootstrapProcessingMode()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("permission denied to create tablespace \"%s\"", stmt->tablespacename), errhint("filespace %s is reserved for system use", stmt->filespacename))); /* * Check that there is no other tablespace by this name. (The unique * index would catch this anyway, but might as well give a friendlier * message.) */ if (OidIsValid(get_tablespace_oid(stmt->tablespacename))) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("tablespace \"%s\" already exists", stmt->tablespacename))); /* * Insert tuple into pg_tablespace. The purpose of doing this first is to * lock the proposed tablename against other would-be creators. The * insertion will roll back if we find problems below. */ rel = heap_open(TableSpaceRelationId, RowExclusiveLock); pcqCtx = caql_beginscan( caql_addrel(cqclr(&cqc), rel), cql("INSERT INTO pg_tablespace", NULL)); MemSet(nulls, true, sizeof(nulls)); values[Anum_pg_tablespace_spcname - 1] = DirectFunctionCall1(namein, CStringGetDatum(stmt->tablespacename)); values[Anum_pg_tablespace_spcowner - 1] = ObjectIdGetDatum(ownerId); values[Anum_pg_tablespace_spcfsoid - 1] = ObjectIdGetDatum(filespaceoid); nulls[Anum_pg_tablespace_spcname - 1] = false; nulls[Anum_pg_tablespace_spcowner - 1] = false; nulls[Anum_pg_tablespace_spcfsoid - 1] = false; tuple = caql_form_tuple(pcqCtx, values, nulls); /* Keep oids synchonized between master and segments */ if (OidIsValid(stmt->tsoid)) HeapTupleSetOid(tuple, stmt->tsoid); tablespaceoid = caql_insert(pcqCtx, tuple); /* and Update indexes (implicit) */ heap_freetuple(tuple); /* We keep the lock on pg_tablespace until commit */ caql_endscan(pcqCtx); heap_close(rel, NoLock); /* Create the persistent directory for the tablespace */ tablespaceDirNode.tablespace = tablespaceoid; tablespaceDirNode.filespace = filespaceoid; MirroredFileSysObj_TransactionCreateTablespaceDir( &tablespaceDirNode, &persistentTid, &persistentSerialNum); /* * Record dependency on owner * * We do not record the dependency on pg_filespace because we do not track * dependencies between shared objects. Additionally the pg_tablespace * table itself contains the foreign key back to pg_filespace and can be * used to fulfill the same purpose that an entry in pg_shdepend would. */ recordDependencyOnOwner(TableSpaceRelationId, tablespaceoid, ownerId); /* * Create the PG_VERSION file in the target directory. This has several * purposes: to make sure we can write in the directory, to prevent * someone from creating another tablespace pointing at the same directory * (the emptiness check above will fail), and to label tablespace * directories by PG version. */ // set_short_version(sublocation); if (Gp_role == GP_ROLE_DISPATCH) { stmt->tsoid = tablespaceoid; CdbDispatchUtilityStatement((Node *) stmt, DF_CANCEL_ON_ERROR| DF_WITH_SNAPSHOT| DF_NEED_TWO_PHASE, NULL); /* MPP-6929: metadata tracking */ MetaTrackAddObject(TableSpaceRelationId, tablespaceoid, GetUserId(), "CREATE", "TABLESPACE" ); } /* * Force synchronous commit, to minimize the window between creating the * symlink on-disk and marking the transaction committed. It's not great * that there is any window at all, but definitely we don't want to make * it larger than necessary. */ ForceSyncCommit(); }
/* * regtypein - converts "typename" to type OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_type entry. * * In bootstrap mode the name must just equal some existing name in pg_type. * In normal mode the type name can be specified using the full type syntax * recognized by the parser; for example, DOUBLE PRECISION and INTEGER[] will * work and be translated to the correct type names. (We ignore any typmod * info generated by the parser, however.) */ Datum regtypein(PG_FUNCTION_ARGS) { char *typ_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; int32 typmod; /* '-' ? */ if (strcmp(typ_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (typ_name_or_oid[0] >= '0' && typ_name_or_oid[0] <= '9' && strspn(typ_name_or_oid, "0123456789") == strlen(typ_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(typ_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a type name, possibly schema-qualified or decorated */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_type for a match. This is needed for initializing other * system catalogs (pg_namespace may not exist yet, and certainly there * are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { int matches = 0; result = caql_getoid_plus( NULL, &matches, NULL, cql("SELECT oid FROM pg_type " " WHERE typname = :1 ", CStringGetDatum(typ_name_or_oid))); if (0 == matches) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("type \"%s\" does not exist", typ_name_or_oid))); } /* We assume there can be only one match */ PG_RETURN_OID(result); } /* * Normal case: invoke the full parser to deal with special cases such as * array syntax. */ parseTypeString(typ_name_or_oid, &result, &typmod); PG_RETURN_OID(result); }
/* * regtypein - converts "typename" to type OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_type entry. * * In bootstrap mode the name must just equal some existing name in pg_type. * In normal mode the type name can be specified using the full type syntax * recognized by the parser; for example, DOUBLE PRECISION and INTEGER[] will * work and be translated to the correct type names. (We ignore any typmod * info generated by the parser, however.) */ Datum regtypein(PG_FUNCTION_ARGS) { char *typ_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; int32 typmod; /* '-' ? */ if (strcmp(typ_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (typ_name_or_oid[0] >= '0' && typ_name_or_oid[0] <= '9' && strspn(typ_name_or_oid, "0123456789") == strlen(typ_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(typ_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a type name, possibly schema-qualified or decorated */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_type for a match. This is needed for initializing other * system catalogs (pg_namespace may not exist yet, and certainly there * are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyInit(&skey[0], Anum_pg_type_typname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(typ_name_or_oid)); hdesc = heap_open(TypeRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, TypeNameNspIndexId, true, NULL, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); else ereport(ERROR, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("type \"%s\" does not exist", typ_name_or_oid))); /* We assume there can be only one match */ systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); PG_RETURN_OID(result); } /* * Normal case: invoke the full parser to deal with special cases such as * array syntax. */ parseTypeString(typ_name_or_oid, &result, &typmod, false); PG_RETURN_OID(result); }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the computed database * name is passed out to the caller as a palloc'ed string in out_dbname. * * In bootstrap mode no parameters are used. * * The return value indicates whether the userID is a superuser. (That * can only be tested inside a transaction, so we want to do it during * the startup transaction rather than doing a separate one in postgres.c.) * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ bool InitPostgres(const char *in_dbname, Oid dboid, const char *username, char **out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool autovacuum = IsAutoVacuumWorkerProcess(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; /* * Set up the global variables holding database id and path. But note we * won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db name in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else { /* * Find tablespace of the database we're about to open. Since we're * not yet up and running we have to use one of the hackish * FindMyDatabase variants, which look in the flat-file copy of * pg_database. * * If the in_dbname param is NULL, lookup database by OID. */ if (in_dbname == NULL) { if (!FindMyDatabaseByOid(dboid, dbname, &MyDatabaseTableSpace)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); MyDatabaseId = dboid; /* pass the database name to the caller */ *out_dbname = pstrdup(dbname); } else { if (!FindMyDatabase(in_dbname, &MyDatabaseId, &MyDatabaseTableSpace)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); /* our database name is gotten from the caller */ strlcpy(dbname, in_dbname, NAMEDATALEN); } } fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); SetDatabasePath(fullpath); /* * Finish filling in the PGPROC struct, and add it to the ProcArray. (We * need to know MyDatabaseId before we can do this, since it's entered * into the PGPROC struct.) * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend id: %d", MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. In bootstrap case we may * skip this since StartupXLOG() was run instead. */ if (!bootstrap) InitXLOGAccess(); /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. */ if (!bootstrap) { StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Now that we have a transaction, we can take locks. Take a writer's * lock on the database we are trying to connect to. If there is a * concurrently running DROP DATABASE on that database, this will block us * until it finishes (and has updated the flat file copy of pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck the flat file copy of pg_database to make sure the target * database hasn't gone away. If there was a concurrent DROP DATABASE, * this ensures we will die cleanly without creating a mess. */ if (!bootstrap) { Oid dbid2; Oid tsid2; if (!FindMyDatabase(dbname, &dbid2, &tsid2) || dbid2 != MyDatabaseId || tsid2 != MyDatabaseTableSpace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase2(); /* * Figure out our postgres user id, and see if we are a superuser. * * In standalone mode and in the autovacuum process, we use a fixed id, * otherwise we figure it out from the authenticated user name. */ if (bootstrap || autovacuum) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" CREATEUSER;.", username))); } else { /* normal multiuser case */ InitializeSessionUserId(username); am_superuser = superuser(); } /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Read the real pg_database row for our database, check permissions and * set up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * If we're trying to shut down, only superusers can connect. */ if (!am_superuser && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); /* * Check a normal user hasn't connected to a superuser reserved slot. */ if (!am_superuser && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("connection limit exceeded for non-superusers"))); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); return am_superuser; }
/* * regprocin - converts "proname" to proc OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_proc entry. */ Datum regprocin(PG_FUNCTION_ARGS) { char *pro_name_or_oid = PG_GETARG_CSTRING(0); RegProcedure result = InvalidOid; List *names; FuncCandidateList clist; /* '-' ? */ if (strcmp(pro_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (pro_name_or_oid[0] >= '0' && pro_name_or_oid[0] <= '9' && strspn(pro_name_or_oid, "0123456789") == strlen(pro_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(pro_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_proc for a unique match. This is needed for * initializing other system catalogs (pg_namespace may not exist yet, and * certainly there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { int matches = 0; Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyInit(&skey[0], Anum_pg_proc_proname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(pro_name_or_oid)); hdesc = heap_open(ProcedureRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, ProcedureNameArgsNspIndexId, true, NULL, 1, skey); while (HeapTupleIsValid(tuple = systable_getnext(sysscan))) { result = (RegProcedure) HeapTupleGetOid(tuple); if (++matches > 1) break; } systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); if (matches == 0) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (matches > 1) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_proc entries in the current search path. */ names = stringToQualifiedNameList(pro_name_or_oid); clist = FuncnameGetCandidates(names, -1, NIL, false, false, false); if (clist == NULL) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (clist->next != NULL) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); result = clist->oid; PG_RETURN_OID(result); }
/* * CREATE SCHEMA */ void CreateSchemaCommand(CreateSchemaStmt *stmt, const char *queryString) { const char *schemaName = stmt->schemaname; const char *authId = stmt->authid; Oid namespaceId; OverrideSearchPath *overridePath; List *parsetree_list; ListCell *parsetree_item; Oid owner_uid; Oid saved_uid; int save_sec_context; AclResult aclresult; bool shouldDispatch = (Gp_role == GP_ROLE_DISPATCH && !IsBootstrapProcessingMode()); /* * GPDB: Creation of temporary namespaces is a special case. This statement * is dispatched by the dispatcher node the first time a temporary table is * created. It bypasses all the normal checks and logic of schema creation, * and is routed to the internal routine for creating temporary namespaces, * instead. */ if (stmt->istemp) { Assert(Gp_role == GP_ROLE_EXECUTE); Assert(stmt->schemaname == InvalidOid); Assert(stmt->authid == NULL); Assert(stmt->schemaElts == NIL); Assert(stmt->schemaOid != InvalidOid); Assert(stmt->toastSchemaOid != InvalidOid); InitTempTableNamespaceWithOids(stmt->schemaOid, stmt->toastSchemaOid); return; } GetUserIdAndSecContext(&saved_uid, &save_sec_context); /* * Who is supposed to own the new schema? */ if (authId) owner_uid = get_roleid_checked(authId); else owner_uid = saved_uid; /* * To create a schema, must have schema-create privilege on the current * database and must be able to become the target role (this does not * imply that the target role itself must have create-schema privilege). * The latter provision guards against "giveaway" attacks. Note that a * superuser will always have both of these privileges a fortiori. */ aclresult = pg_database_aclcheck(MyDatabaseId, saved_uid, ACL_CREATE); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_DATABASE, get_database_name(MyDatabaseId)); check_is_member_of_role(saved_uid, owner_uid); /* Additional check to protect reserved schema names */ if (!allowSystemTableModsDDL && IsReservedName(schemaName)) { ereport(ERROR, (errcode(ERRCODE_RESERVED_NAME), errmsg("unacceptable schema name \"%s\"", schemaName), errdetail("The prefix \"%s\" is reserved for system schemas.", GetReservedPrefix(schemaName)))); } /* * If the requested authorization is different from the current user, * temporarily set the current user so that the object(s) will be created * with the correct ownership. * * (The setting will be restored at the end of this routine, or in case * of error, transaction abort will clean things up.) */ if (saved_uid != owner_uid) SetUserIdAndSecContext(owner_uid, save_sec_context | SECURITY_LOCAL_USERID_CHANGE); /* Create the schema's namespace */ if (shouldDispatch || Gp_role != GP_ROLE_EXECUTE) { namespaceId = NamespaceCreate(schemaName, owner_uid, 0); if (shouldDispatch) { elog(DEBUG5, "shouldDispatch = true, namespaceOid = %d", namespaceId); Assert(stmt->schemaOid == 0); stmt->schemaOid = namespaceId; /* * Dispatch the command to all primary and mirror segment dbs. * Starts a global transaction and reconfigures cluster if needed. * Waits for QEs to finish. Exits via ereport(ERROR,...) if error. */ CdbDispatchUtilityStatement((Node *) stmt, DF_CANCEL_ON_ERROR | DF_WITH_SNAPSHOT | DF_NEED_TWO_PHASE, NULL); } /* MPP-6929: metadata tracking */ if (Gp_role == GP_ROLE_DISPATCH) MetaTrackAddObject(NamespaceRelationId, namespaceId, saved_uid, "CREATE", "SCHEMA" ); } else { namespaceId = NamespaceCreate(schemaName, owner_uid, stmt->schemaOid); } /* Advance cmd counter to make the namespace visible */ CommandCounterIncrement(); /* * Temporarily make the new namespace be the front of the search path, as * well as the default creation target namespace. This will be undone at * the end of this routine, or upon error. */ overridePath = GetOverrideSearchPath(CurrentMemoryContext); overridePath->schemas = lcons_oid(namespaceId, overridePath->schemas); /* XXX should we clear overridePath->useTemp? */ PushOverrideSearchPath(overridePath); /* * Examine the list of commands embedded in the CREATE SCHEMA command, and * reorganize them into a sequentially executable order with no forward * references. Note that the result is still a list of raw parsetrees --- * we cannot, in general, run parse analysis on one statement until we * have actually executed the prior ones. */ parsetree_list = transformCreateSchemaStmt(stmt); /* * Execute each command contained in the CREATE SCHEMA. Since the grammar * allows only utility commands in CREATE SCHEMA, there is no need to pass * them through parse_analyze() or the rewriter; we can just hand them * straight to ProcessUtility. */ foreach(parsetree_item, parsetree_list) { Node *stmt = (Node *) lfirst(parsetree_item); /* do this step */ ProcessUtility(stmt, queryString, NULL, false, /* not top level */ None_Receiver, NULL); /* make sure later steps can see the object created here */ CommandCounterIncrement(); }
/* * regclassin - converts "classname" to class OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_class entry. */ Datum regclassin(PG_FUNCTION_ARGS) { char *class_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; List *names; /* '-' ? */ if (strcmp(class_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (class_name_or_oid[0] >= '0' && class_name_or_oid[0] <= '9' && strspn(class_name_or_oid, "0123456789") == strlen(class_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(class_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_class for a match. This is needed for initializing * other system catalogs (pg_namespace may not exist yet, and certainly * there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { Relation hdesc; ScanKeyData skey[1]; SysScanDesc sysscan; HeapTuple tuple; ScanKeyInit(&skey[0], Anum_pg_class_relname, BTEqualStrategyNumber, F_NAMEEQ, CStringGetDatum(class_name_or_oid)); hdesc = heap_open(RelationRelationId, AccessShareLock); sysscan = systable_beginscan(hdesc, ClassNameNspIndexId, true, NULL, 1, skey); if (HeapTupleIsValid(tuple = systable_getnext(sysscan))) result = HeapTupleGetOid(tuple); else ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("relation \"%s\" does not exist", class_name_or_oid))); /* We assume there can be only one match */ systable_endscan(sysscan); heap_close(hdesc, AccessShareLock); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_class entries in the current search path. */ names = stringToQualifiedNameList(class_name_or_oid); /* We might not even have permissions on this relation; don't lock it. */ result = RangeVarGetRelid(makeRangeVarFromNameList(names), NoLock, false); PG_RETURN_OID(result); }
/* ---------------------------------------------------------------- * TypeCreate * * This does all the necessary work needed to define a new type. * * Returns the OID assigned to the new type. If newTypeOid is * zero (the normal case), a new OID is created; otherwise we * use exactly that OID. * ---------------------------------------------------------------- */ Oid TypeCreate(Oid newTypeOid, const char *typeName, Oid typeNamespace, Oid relationOid, /* only for relation rowtypes */ char relationKind, /* ditto */ Oid ownerId, int16 internalSize, char typeType, char typeCategory, bool typePreferred, char typDelim, Oid inputProcedure, Oid outputProcedure, Oid receiveProcedure, Oid sendProcedure, Oid typmodinProcedure, Oid typmodoutProcedure, Oid analyzeProcedure, Oid elementType, bool isImplicitArray, Oid arrayType, Oid baseType, const char *defaultTypeValue, /* human readable rep */ char *defaultTypeBin, /* cooked rep */ bool passedByValue, char alignment, char storage, int32 typeMod, int32 typNDims, /* Array dimensions for baseType */ bool typeNotNull, Oid typeCollation) { Relation pg_type_desc; Oid typeObjectId; bool rebuildDeps = false; HeapTuple tup; bool nulls[Natts_pg_type]; bool replaces[Natts_pg_type]; Datum values[Natts_pg_type]; NameData name; int i; Acl *typacl = NULL; /* * We assume that the caller validated the arguments individually, but did * not check for bad combinations. * * Validate size specifications: either positive (fixed-length) or -1 * (varlena) or -2 (cstring). */ if (!(internalSize > 0 || internalSize == -1 || internalSize == -2)) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("invalid type internal size %d", internalSize))); if (passedByValue) { /* * Pass-by-value types must have a fixed length that is one of the * values supported by fetch_att() and store_att_byval(); and the * alignment had better agree, too. All this code must match * access/tupmacs.h! */ if (internalSize == (int16) sizeof(char)) { if (alignment != 'c') ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d", alignment, internalSize))); } else if (internalSize == (int16) sizeof(int16)) { if (alignment != 's') ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d", alignment, internalSize))); } else if (internalSize == (int16) sizeof(int32)) { if (alignment != 'i') ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d", alignment, internalSize))); } #if SIZEOF_DATUM == 8 else if (internalSize == (int16) sizeof(Datum)) { if (alignment != 'd') ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("alignment \"%c\" is invalid for passed-by-value type of size %d", alignment, internalSize))); } #endif else ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("internal size %d is invalid for passed-by-value type", internalSize))); } else { /* varlena types must have int align or better */ if (internalSize == -1 && !(alignment == 'i' || alignment == 'd')) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("alignment \"%c\" is invalid for variable-length type", alignment))); /* cstring must have char alignment */ if (internalSize == -2 && !(alignment == 'c')) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("alignment \"%c\" is invalid for variable-length type", alignment))); } /* Only varlena types can be toasted */ if (storage != 'p' && internalSize != -1) ereport(ERROR, (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), errmsg("fixed-size types must have storage PLAIN"))); /* * initialize arrays needed for heap_form_tuple or heap_modify_tuple */ for (i = 0; i < Natts_pg_type; ++i) { nulls[i] = false; replaces[i] = true; values[i] = (Datum) 0; } /* * insert data values */ namestrcpy(&name, typeName); values[Anum_pg_type_typname - 1] = NameGetDatum(&name); values[Anum_pg_type_typnamespace - 1] = ObjectIdGetDatum(typeNamespace); values[Anum_pg_type_typowner - 1] = ObjectIdGetDatum(ownerId); values[Anum_pg_type_typlen - 1] = Int16GetDatum(internalSize); values[Anum_pg_type_typbyval - 1] = BoolGetDatum(passedByValue); values[Anum_pg_type_typtype - 1] = CharGetDatum(typeType); values[Anum_pg_type_typcategory - 1] = CharGetDatum(typeCategory); values[Anum_pg_type_typispreferred - 1] = BoolGetDatum(typePreferred); values[Anum_pg_type_typisdefined - 1] = BoolGetDatum(true); values[Anum_pg_type_typdelim - 1] = CharGetDatum(typDelim); values[Anum_pg_type_typrelid - 1] = ObjectIdGetDatum(relationOid); values[Anum_pg_type_typelem - 1] = ObjectIdGetDatum(elementType); values[Anum_pg_type_typarray - 1] = ObjectIdGetDatum(arrayType); values[Anum_pg_type_typinput - 1] = ObjectIdGetDatum(inputProcedure); values[Anum_pg_type_typoutput - 1] = ObjectIdGetDatum(outputProcedure); values[Anum_pg_type_typreceive - 1] = ObjectIdGetDatum(receiveProcedure); values[Anum_pg_type_typsend - 1] = ObjectIdGetDatum(sendProcedure); values[Anum_pg_type_typmodin - 1] = ObjectIdGetDatum(typmodinProcedure); values[Anum_pg_type_typmodout - 1] = ObjectIdGetDatum(typmodoutProcedure); values[Anum_pg_type_typanalyze - 1] = ObjectIdGetDatum(analyzeProcedure); values[Anum_pg_type_typalign - 1] = CharGetDatum(alignment); values[Anum_pg_type_typstorage - 1] = CharGetDatum(storage); values[Anum_pg_type_typnotnull - 1] = BoolGetDatum(typeNotNull); values[Anum_pg_type_typbasetype - 1] = ObjectIdGetDatum(baseType); values[Anum_pg_type_typtypmod - 1] = Int32GetDatum(typeMod); values[Anum_pg_type_typndims - 1] = Int32GetDatum(typNDims); values[Anum_pg_type_typcollation - 1] = ObjectIdGetDatum(typeCollation); /* * initialize the default binary value for this type. Check for nulls of * course. */ if (defaultTypeBin) values[Anum_pg_type_typdefaultbin - 1] = CStringGetTextDatum(defaultTypeBin); else nulls[Anum_pg_type_typdefaultbin - 1] = true; /* * initialize the default value for this type. */ if (defaultTypeValue) values[Anum_pg_type_typdefault - 1] = CStringGetTextDatum(defaultTypeValue); else nulls[Anum_pg_type_typdefault - 1] = true; typacl = get_user_default_acl(ACL_OBJECT_TYPE, ownerId, typeNamespace); if (typacl != NULL) values[Anum_pg_type_typacl - 1] = PointerGetDatum(typacl); else nulls[Anum_pg_type_typacl - 1] = true; /* * open pg_type and prepare to insert or update a row. * * NOTE: updating will not work correctly in bootstrap mode; but we don't * expect to be overwriting any shell types in bootstrap mode. */ pg_type_desc = heap_open(TypeRelationId, RowExclusiveLock); tup = SearchSysCacheCopy2(TYPENAMENSP, CStringGetDatum(typeName), ObjectIdGetDatum(typeNamespace)); if (HeapTupleIsValid(tup)) { /* * check that the type is not already defined. It may exist as a * shell type, however. */ if (((Form_pg_type) GETSTRUCT(tup))->typisdefined) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_OBJECT), errmsg("type \"%s\" already exists", typeName))); /* * shell type must have been created by same owner */ if (((Form_pg_type) GETSTRUCT(tup))->typowner != ownerId) aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_TYPE, typeName); /* trouble if caller wanted to force the OID */ if (OidIsValid(newTypeOid)) elog(ERROR, "cannot assign new OID to existing shell type"); /* * Okay to update existing shell type tuple */ tup = heap_modify_tuple(tup, RelationGetDescr(pg_type_desc), values, nulls, replaces); simple_heap_update(pg_type_desc, &tup->t_self, tup); typeObjectId = HeapTupleGetOid(tup); rebuildDeps = true; /* get rid of shell type's dependencies */ } else { tup = heap_form_tuple(RelationGetDescr(pg_type_desc), values, nulls); /* Force the OID if requested by caller */ if (OidIsValid(newTypeOid)) HeapTupleSetOid(tup, newTypeOid); /* Use binary-upgrade override for pg_type.oid, if supplied. */ else if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_pg_type_oid)) { HeapTupleSetOid(tup, binary_upgrade_next_pg_type_oid); binary_upgrade_next_pg_type_oid = InvalidOid; } /* else allow system to assign oid */ typeObjectId = simple_heap_insert(pg_type_desc, tup); } /* Update indexes */ CatalogUpdateIndexes(pg_type_desc, tup); /* * Create dependencies. We can/must skip this in bootstrap mode. */ if (!IsBootstrapProcessingMode()) GenerateTypeDependencies(typeNamespace, typeObjectId, relationOid, relationKind, ownerId, inputProcedure, outputProcedure, receiveProcedure, sendProcedure, typmodinProcedure, typmodoutProcedure, analyzeProcedure, elementType, isImplicitArray, baseType, typeCollation, (defaultTypeBin ? stringToNode(defaultTypeBin) : NULL), rebuildDeps); /* Post creation hook for new type */ InvokeObjectAccessHook(OAT_POST_CREATE, TypeRelationId, typeObjectId, 0, NULL); /* * finish up */ heap_close(pg_type_desc, RowExclusiveLock); return typeObjectId; }
/* * create_toast_table --- internal workhorse * * rel is already opened and locked * toastOid and toastIndexOid are normally InvalidOid, but during * bootstrap they can be nonzero to specify hand-assigned OIDs */ static bool create_toast_table(Relation rel, Oid toastOid, Oid toastIndexOid, Datum reloptions, LOCKMODE lockmode, bool check) { Oid relOid = RelationGetRelid(rel); HeapTuple reltup; TupleDesc tupdesc; bool shared_relation; bool mapped_relation; Relation toast_rel; Relation class_rel; Oid toast_relid; Oid toast_typid = InvalidOid; Oid namespaceid; char toast_relname[NAMEDATALEN]; char toast_idxname[NAMEDATALEN]; IndexInfo *indexInfo; Oid collationObjectId[2]; Oid classObjectId[2]; int16 coloptions[2]; ObjectAddress baseobject, toastobject; /* * Toast table is shared if and only if its parent is. * * We cannot allow toasting a shared relation after initdb (because * there's no way to mark it toasted in other databases' pg_class). */ shared_relation = rel->rd_rel->relisshared; if (shared_relation && !IsBootstrapProcessingMode()) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("shared tables cannot be toasted after initdb"))); /* It's mapped if and only if its parent is, too */ mapped_relation = RelationIsMapped(rel); /* * Is it already toasted? */ if (rel->rd_rel->reltoastrelid != InvalidOid) return false; /* * Check to see whether the table actually needs a TOAST table. */ if (!IsBinaryUpgrade) { /* Normal mode, normal check */ if (!needs_toast_table(rel)) return false; } else { /* * In binary-upgrade mode, create a TOAST table if and only if * pg_upgrade told us to (ie, a TOAST table OID has been provided). * * This indicates that the old cluster had a TOAST table for the * current table. We must create a TOAST table to receive the old * TOAST file, even if the table seems not to need one. * * Contrariwise, if the old cluster did not have a TOAST table, we * should be able to get along without one even if the new version's * needs_toast_table rules suggest we should have one. There is a lot * of daylight between where we will create a TOAST table and where * one is really necessary to avoid failures, so small cross-version * differences in the when-to-create heuristic shouldn't be a problem. * If we tried to create a TOAST table anyway, we would have the * problem that it might take up an OID that will conflict with some * old-cluster table we haven't seen yet. */ if (!OidIsValid(binary_upgrade_next_toast_pg_class_oid) || !OidIsValid(binary_upgrade_next_toast_pg_type_oid)) return false; } /* * If requested check lockmode is sufficient. This is a cross check in * case of errors or conflicting decisions in earlier code. */ if (check && lockmode != AccessExclusiveLock) elog(ERROR, "AccessExclusiveLock required to add toast table."); /* * Create the toast table and its index */ snprintf(toast_relname, sizeof(toast_relname), "pg_toast_%u", relOid); snprintf(toast_idxname, sizeof(toast_idxname), "pg_toast_%u_index", relOid); /* this is pretty painful... need a tuple descriptor */ tupdesc = CreateTemplateTupleDesc(3); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "chunk_id", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "chunk_seq", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "chunk_data", BYTEAOID, -1, 0); /* * Ensure that the toast table doesn't itself get toasted, or we'll be * toast :-(. This is essential for chunk_data because type bytea is * toastable; hit the other two just to be sure. */ TupleDescAttr(tupdesc, 0)->attstorage = 'p'; TupleDescAttr(tupdesc, 1)->attstorage = 'p'; TupleDescAttr(tupdesc, 2)->attstorage = 'p'; /* * Toast tables for regular relations go in pg_toast; those for temp * relations go into the per-backend temp-toast-table namespace. */ if (isTempOrTempToastNamespace(rel->rd_rel->relnamespace)) namespaceid = GetTempToastNamespace(); else namespaceid = PG_TOAST_NAMESPACE; /* * Use binary-upgrade override for pg_type.oid, if supplied. We might be * in the post-schema-restore phase where we are doing ALTER TABLE to * create TOAST tables that didn't exist in the old cluster. */ if (IsBinaryUpgrade && OidIsValid(binary_upgrade_next_toast_pg_type_oid)) { toast_typid = binary_upgrade_next_toast_pg_type_oid; binary_upgrade_next_toast_pg_type_oid = InvalidOid; } toast_relid = heap_create_with_catalog(toast_relname, namespaceid, rel->rd_rel->reltablespace, toastOid, toast_typid, InvalidOid, rel->rd_rel->relowner, tupdesc, NIL, RELKIND_TOASTVALUE, rel->rd_rel->relpersistence, shared_relation, mapped_relation, ONCOMMIT_NOOP, reloptions, false, true, true, InvalidOid, NULL); Assert(toast_relid != InvalidOid); /* make the toast relation visible, else heap_open will fail */ CommandCounterIncrement(); /* ShareLock is not really needed here, but take it anyway */ toast_rel = heap_open(toast_relid, ShareLock); /* * Create unique index on chunk_id, chunk_seq. * * NOTE: the normal TOAST access routines could actually function with a * single-column index on chunk_id only. However, the slice access * routines use both columns for faster access to an individual chunk. In * addition, we want it to be unique as a check against the possibility of * duplicate TOAST chunk OIDs. The index might also be a little more * efficient this way, since btree isn't all that happy with large numbers * of equal keys. */ indexInfo = makeNode(IndexInfo); indexInfo->ii_NumIndexAttrs = 2; indexInfo->ii_NumIndexKeyAttrs = 2; indexInfo->ii_IndexAttrNumbers[0] = 1; indexInfo->ii_IndexAttrNumbers[1] = 2; indexInfo->ii_Expressions = NIL; indexInfo->ii_ExpressionsState = NIL; indexInfo->ii_Predicate = NIL; indexInfo->ii_PredicateState = NULL; indexInfo->ii_ExclusionOps = NULL; indexInfo->ii_ExclusionProcs = NULL; indexInfo->ii_ExclusionStrats = NULL; indexInfo->ii_Unique = true; indexInfo->ii_ReadyForInserts = true; indexInfo->ii_Concurrent = false; indexInfo->ii_BrokenHotChain = false; indexInfo->ii_ParallelWorkers = 0; indexInfo->ii_Am = BTREE_AM_OID; indexInfo->ii_AmCache = NULL; indexInfo->ii_Context = CurrentMemoryContext; collationObjectId[0] = InvalidOid; collationObjectId[1] = InvalidOid; classObjectId[0] = OID_BTREE_OPS_OID; classObjectId[1] = INT4_BTREE_OPS_OID; coloptions[0] = 0; coloptions[1] = 0; index_create(toast_rel, toast_idxname, toastIndexOid, InvalidOid, InvalidOid, InvalidOid, indexInfo, list_make2("chunk_id", "chunk_seq"), BTREE_AM_OID, rel->rd_rel->reltablespace, collationObjectId, classObjectId, coloptions, (Datum) 0, INDEX_CREATE_IS_PRIMARY, 0, true, true, NULL); heap_close(toast_rel, NoLock); /* * Store the toast table's OID in the parent relation's pg_class row */ class_rel = heap_open(RelationRelationId, RowExclusiveLock); reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(relOid)); if (!HeapTupleIsValid(reltup)) elog(ERROR, "cache lookup failed for relation %u", relOid); ((Form_pg_class) GETSTRUCT(reltup))->reltoastrelid = toast_relid; if (!IsBootstrapProcessingMode()) { /* normal case, use a transactional update */ CatalogTupleUpdate(class_rel, &reltup->t_self, reltup); } else { /* While bootstrapping, we cannot UPDATE, so overwrite in-place */ heap_inplace_update(class_rel, reltup); } heap_freetuple(reltup); heap_close(class_rel, RowExclusiveLock); /* * Register dependency from the toast table to the master, so that the * toast table will be deleted if the master is. Skip this in bootstrap * mode. */ if (!IsBootstrapProcessingMode()) { baseobject.classId = RelationRelationId; baseobject.objectId = relOid; baseobject.objectSubId = 0; toastobject.classId = RelationRelationId; toastobject.objectId = toast_relid; toastobject.objectSubId = 0; recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL); } /* * Make changes visible */ CommandCounterIncrement(); return true; }
/* -------------------------------- * InitPostgres * Initialize POSTGRES. * * The database can be specified by name, using the in_dbname parameter, or by * OID, using the dboid parameter. In the latter case, the actual database * name can be returned to the caller in out_dbname. If out_dbname isn't * NULL, it must point to a buffer of size NAMEDATALEN. * * In bootstrap mode no parameters are used. The autovacuum launcher process * doesn't use any parameters either, because it only goes far enough to be * able to read pg_database; it doesn't connect to any particular database. * In walsender mode only username is used. * * As of PostgreSQL 8.2, we expect InitProcess() was already called, so we * already have a PGPROC struct ... but it's not completely filled in yet. * * Note: * Be very careful with the order of calls in the InitPostgres function. * -------------------------------- */ void InitPostgres(const char *in_dbname, Oid dboid, const char *username, char *out_dbname) { bool bootstrap = IsBootstrapProcessingMode(); bool am_superuser; char *fullpath; char dbname[NAMEDATALEN]; elog(DEBUG3, "InitPostgres"); /* * Add my PGPROC struct to the ProcArray. * * Once I have done this, I am visible to other backends! */ InitProcessPhase2(); /* * Initialize my entry in the shared-invalidation manager's array of * per-backend data. * * Sets up MyBackendId, a unique backend identifier. */ MyBackendId = InvalidBackendId; SharedInvalBackendInit(false); if (MyBackendId > MaxBackends || MyBackendId <= 0) elog(FATAL, "bad backend ID: %d", MyBackendId); /* Now that we have a BackendId, we can participate in ProcSignal */ ProcSignalInit(MyBackendId); /* * bufmgr needs another initialization call too */ InitBufferPoolBackend(); /* * Initialize local process's access to XLOG. */ if (IsUnderPostmaster) { /* * The postmaster already started the XLOG machinery, but we need to * call InitXLOGAccess(), if the system isn't in hot-standby mode. * This is handled by calling RecoveryInProgress and ignoring the * result. */ (void) RecoveryInProgress(); } else { /* * We are either a bootstrap process or a standalone backend. Either * way, start up the XLOG machinery, and register to have it closed * down at exit. */ StartupXLOG(); on_shmem_exit(ShutdownXLOG, 0); } /* * Initialize the relation cache and the system catalog caches. Note that * no catalog access happens here; we only set up the hashtable structure. * We must do this before starting a transaction because transaction abort * would try to touch these hashtables. */ RelationCacheInitialize(); InitCatalogCache(); InitPlanCache(); /* Initialize portal manager */ EnablePortalManager(); /* Initialize stats collection --- must happen before first xact */ if (!bootstrap) pgstat_initialize(); /* * Load relcache entries for the shared system catalogs. This must create * at least entries for pg_database and catalogs used for authentication. */ RelationCacheInitializePhase2(); /* * Set up process-exit callback to do pre-shutdown cleanup. This has to * be after we've initialized all the low-level modules like the buffer * manager, because during shutdown this has to run before the low-level * modules start to close down. On the other hand, we want it in place * before we begin our first transaction --- if we fail during the * initialization transaction, as is entirely possible, we need the * AbortTransaction call to clean up. */ on_shmem_exit(ShutdownPostgres, 0); /* The autovacuum launcher is done here */ if (IsAutoVacuumLauncherProcess()) return; /* * Start a new transaction here before first access to db, and get a * snapshot. We don't have a use for the snapshot itself, but we're * interested in the secondary effect that it sets RecentGlobalXmin. (This * is critical for anything that reads heap pages, because HOT may decide * to prune them even if the process doesn't attempt to modify any * tuples.) */ if (!bootstrap) { /* statement_timestamp must be set for timeouts to work correctly */ SetCurrentStatementStartTimestamp(); StartTransactionCommand(); (void) GetTransactionSnapshot(); } /* * Perform client authentication if necessary, then figure out our * postgres user ID, and see if we are a superuser. * * In standalone mode and in autovacuum worker processes, we use a fixed * ID, otherwise we figure it out from the authenticated user name. */ if (bootstrap || IsAutoVacuumWorkerProcess()) { InitializeSessionUserIdStandalone(); am_superuser = true; } else if (!IsUnderPostmaster) { InitializeSessionUserIdStandalone(); am_superuser = true; if (!ThereIsAtLeastOneRole()) ereport(WARNING, (errcode(ERRCODE_UNDEFINED_OBJECT), errmsg("no roles are defined in this database system"), errhint("You should immediately run CREATE USER \"%s\" SUPERUSER;.", username))); } else { /* normal multiuser case */ Assert(MyProcPort != NULL); PerformAuthentication(MyProcPort); InitializeSessionUserId(username); am_superuser = superuser(); } /* * If we're trying to shut down, only superusers can connect, and new * replication connections are not allowed. */ if ((!am_superuser || am_walsender) && MyProcPort != NULL && MyProcPort->canAcceptConnections == CAC_WAITBACKUP) { if (am_walsender) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("new replication connections are not allowed during database shutdown"))); else ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect during database shutdown"))); } /* * Binary upgrades only allowed super-user connections */ if (IsBinaryUpgrade && !am_superuser) { ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser to connect in binary upgrade mode"))); } /* * The last few connections slots are reserved for superusers. Although * replication connections currently require superuser privileges, we * don't allow them to consume the reserved slots, which are intended for * interactive use. */ if ((!am_superuser || am_walsender) && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("remaining connection slots are reserved for non-replication superuser connections"))); /* * If walsender, we don't want to connect to any particular database. Just * finish the backend startup by processing any options from the startup * packet, and we're done. */ if (am_walsender) { Assert(!bootstrap); if (!superuser() && !is_authenticated_user_replication_role()) ereport(FATAL, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), errmsg("must be superuser or replication role to start walsender"))); /* process any options passed in the startup packet */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ pgstat_bestart(); /* close the transaction we started above */ CommitTransactionCommand(); return; } /* * Set up the global variables holding database id and default tablespace. * But note we won't actually try to touch the database just yet. * * We take a shortcut in the bootstrap case, otherwise we have to look up * the db's entry in pg_database. */ if (bootstrap) { MyDatabaseId = TemplateDbOid; MyDatabaseTableSpace = DEFAULTTABLESPACE_OID; } else if (in_dbname != NULL) { HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTuple(in_dbname); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", in_dbname))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; /* take database name from the caller, just for paranoia */ strlcpy(dbname, in_dbname, sizeof(dbname)); } else { /* caller specified database by OID */ HeapTuple tuple; Form_pg_database dbform; tuple = GetDatabaseTupleByOid(dboid); if (!HeapTupleIsValid(tuple)) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database %u does not exist", dboid))); dbform = (Form_pg_database) GETSTRUCT(tuple); MyDatabaseId = HeapTupleGetOid(tuple); MyDatabaseTableSpace = dbform->dattablespace; Assert(MyDatabaseId == dboid); strlcpy(dbname, NameStr(dbform->datname), sizeof(dbname)); /* pass the database name back to the caller */ if (out_dbname) strcpy(out_dbname, dbname); } /* Now we can mark our PGPROC entry with the database ID */ /* (We assume this is an atomic store so no lock is needed) */ MyProc->databaseId = MyDatabaseId; /* * Now, take a writer's lock on the database we are trying to connect to. * If there is a concurrently running DROP DATABASE on that database, this * will block us until it finishes (and has committed its update of * pg_database). * * Note that the lock is not held long, only until the end of this startup * transaction. This is OK since we are already advertising our use of * the database in the PGPROC array; anyone trying a DROP DATABASE after * this point will see us there. * * Note: use of RowExclusiveLock here is reasonable because we envision * our session as being a concurrent writer of the database. If we had a * way of declaring a session as being guaranteed-read-only, we could use * AccessShareLock for such sessions and thereby not conflict against * CREATE DATABASE. */ if (!bootstrap) LockSharedObject(DatabaseRelationId, MyDatabaseId, 0, RowExclusiveLock); /* * Recheck pg_database to make sure the target database hasn't gone away. * If there was a concurrent DROP DATABASE, this ensures we will die * cleanly without creating a mess. */ if (!bootstrap) { HeapTuple tuple; tuple = GetDatabaseTuple(dbname); if (!HeapTupleIsValid(tuple) || MyDatabaseId != HeapTupleGetOid(tuple) || MyDatabaseTableSpace != ((Form_pg_database) GETSTRUCT(tuple))->dattablespace) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("It seems to have just been dropped or renamed."))); } /* * Now we should be able to access the database directory safely. Verify * it's there and looks reasonable. */ fullpath = GetDatabasePath(MyDatabaseId, MyDatabaseTableSpace); if (!bootstrap) { if (access(fullpath, F_OK) == -1) { if (errno == ENOENT) ereport(FATAL, (errcode(ERRCODE_UNDEFINED_DATABASE), errmsg("database \"%s\" does not exist", dbname), errdetail("The database subdirectory \"%s\" is missing.", fullpath))); else ereport(FATAL, (errcode_for_file_access(), errmsg("could not access directory \"%s\": %m", fullpath))); } ValidatePgVersion(fullpath); } SetDatabasePath(fullpath); /* * It's now possible to do real access to the system catalogs. * * Load relcache entries for the system catalogs. This must create at * least the minimum set of "nailed-in" cache entries. */ RelationCacheInitializePhase3(); /* set up ACL framework (so CheckMyDatabase can check permissions) */ initialize_acl(); /* * Re-read the pg_database row for our database, check permissions and set * up database-specific GUC settings. We can't do this until all the * database-access infrastructure is up. (Also, it wants to know if the * user is a superuser, so the above stuff has to happen first.) */ if (!bootstrap) CheckMyDatabase(dbname, am_superuser); /* * Now process any command-line switches and any additional GUC variable * settings passed in the startup packet. We couldn't do this before * because we didn't know if client is a superuser. */ if (MyProcPort != NULL) process_startup_options(MyProcPort, am_superuser); /* Process pg_db_role_setting options */ process_settings(MyDatabaseId, GetSessionUserId()); /* Apply PostAuthDelay as soon as we've read all options */ if (PostAuthDelay > 0) pg_usleep(PostAuthDelay * 1000000L); /* * Initialize various default states that can't be set up until we've * selected the active user and gotten the right GUC settings. */ /* set default namespace search path */ InitializeSearchPath(); /* initialize client encoding */ InitializeClientEncoding(); /* report this backend in the PgBackendStatus array */ if (!bootstrap) pgstat_bestart(); /* close the transaction we started above */ if (!bootstrap) CommitTransactionCommand(); }
/* * SearchCatCache * * This call searches a system cache for a tuple, opening the relation * if necessary (on the first access to a particular cache). * * The result is NULL if not found, or a pointer to a HeapTuple in * the cache. The caller must not modify the tuple, and must call * ReleaseCatCache() when done with it. * * The search key values should be expressed as Datums of the key columns' * datatype(s). (Pass zeroes for any unused parameters.) As a special * exception, the passed-in key for a NAME column can be just a C string; * the caller need not go to the trouble of converting it to a fully * null-padded NAME. */ HeapTuple SearchCatCache(CatCache *cache, Datum v1, Datum v2, Datum v3, Datum v4) { ScanKeyData cur_skey[CATCACHE_MAXKEYS]; uint32 hashValue; Index hashIndex; Dlelem *elt; CatCTup *ct; Relation relation; SysScanDesc scandesc; HeapTuple ntp; /* * one-time startup overhead for each cache */ if (cache->cc_tupdesc == NULL) CatalogCacheInitializeCache(cache); #ifdef CATCACHE_STATS cache->cc_searches++; #endif /* * initialize the search key information */ memcpy(cur_skey, cache->cc_skey, sizeof(cur_skey)); cur_skey[0].sk_argument = v1; cur_skey[1].sk_argument = v2; cur_skey[2].sk_argument = v3; cur_skey[3].sk_argument = v4; /* * find the hash bucket in which to look for the tuple */ hashValue = CatalogCacheComputeHashValue(cache, cache->cc_nkeys, cur_skey); hashIndex = HASH_INDEX(hashValue, cache->cc_nbuckets); /* * scan the hash bucket until we find a match or exhaust our tuples */ for (elt = DLGetHead(&cache->cc_bucket[hashIndex]); elt; elt = DLGetSucc(elt)) { bool res; ct = (CatCTup *) DLE_VAL(elt); if (ct->dead) continue; /* ignore dead entries */ if (ct->hash_value != hashValue) continue; /* quickly skip entry if wrong hash val */ /* * see if the cached tuple matches our key. */ HeapKeyTest(&ct->tuple, cache->cc_tupdesc, cache->cc_nkeys, cur_skey, res); if (!res) continue; /* * We found a match in the cache. Move it to the front of the list * for its hashbucket, in order to speed subsequent searches. (The * most frequently accessed elements in any hashbucket will tend to be * near the front of the hashbucket's list.) */ DLMoveToFront(&ct->cache_elem); /* * If it's a positive entry, bump its refcount and return it. If it's * negative, we can report failure to the caller. */ if (!ct->negative) { ResourceOwnerEnlargeCatCacheRefs(CurrentResourceOwner); ct->refcount++; ResourceOwnerRememberCatCacheRef(CurrentResourceOwner, &ct->tuple); CACHE3_elog(DEBUG2, "SearchCatCache(%s): found in bucket %d", cache->cc_relname, hashIndex); #ifdef CATCACHE_STATS cache->cc_hits++; #endif return &ct->tuple; } else { CACHE3_elog(DEBUG2, "SearchCatCache(%s): found neg entry in bucket %d", cache->cc_relname, hashIndex); #ifdef CATCACHE_STATS cache->cc_neg_hits++; #endif return NULL; } } /* * Tuple was not found in cache, so we have to try to retrieve it directly * from the relation. If found, we will add it to the cache; if not * found, we will add a negative cache entry instead. * * NOTE: it is possible for recursive cache lookups to occur while reading * the relation --- for example, due to shared-cache-inval messages being * processed during heap_open(). This is OK. It's even possible for one * of those lookups to find and enter the very same tuple we are trying to * fetch here. If that happens, we will enter a second copy of the tuple * into the cache. The first copy will never be referenced again, and * will eventually age out of the cache, so there's no functional problem. * This case is rare enough that it's not worth expending extra cycles to * detect. */ relation = heap_open(cache->cc_reloid, AccessShareLock); scandesc = systable_beginscan(relation, cache->cc_indexoid, IndexScanOK(cache, cur_skey), SnapshotNow, cache->cc_nkeys, cur_skey); ct = NULL; while (HeapTupleIsValid(ntp = systable_getnext(scandesc))) { ct = CatalogCacheCreateEntry(cache, ntp, hashValue, hashIndex, false); /* immediately set the refcount to 1 */ ResourceOwnerEnlargeCatCacheRefs(CurrentResourceOwner); ct->refcount++; ResourceOwnerRememberCatCacheRef(CurrentResourceOwner, &ct->tuple); break; /* assume only one match */ } systable_endscan(scandesc); heap_close(relation, AccessShareLock); /* * If tuple was not found, we need to build a negative cache entry * containing a fake tuple. The fake tuple has the correct key columns, * but nulls everywhere else. * * In bootstrap mode, we don't build negative entries, because the cache * invalidation mechanism isn't alive and can't clear them if the tuple * gets created later. (Bootstrap doesn't do UPDATEs, so it doesn't need * cache inval for that.) */ if (ct == NULL) { if (IsBootstrapProcessingMode()) return NULL; ntp = build_dummy_tuple(cache, cache->cc_nkeys, cur_skey); ct = CatalogCacheCreateEntry(cache, ntp, hashValue, hashIndex, true); heap_freetuple(ntp); CACHE4_elog(DEBUG2, "SearchCatCache(%s): Contains %d/%d tuples", cache->cc_relname, cache->cc_ntup, CacheHdr->ch_ntup); CACHE3_elog(DEBUG2, "SearchCatCache(%s): put neg entry in bucket %d", cache->cc_relname, hashIndex); /* * We are not returning the negative entry to the caller, so leave its * refcount zero. */ return NULL; } CACHE4_elog(DEBUG2, "SearchCatCache(%s): Contains %d/%d tuples", cache->cc_relname, cache->cc_ntup, CacheHdr->ch_ntup); CACHE3_elog(DEBUG2, "SearchCatCache(%s): put in bucket %d", cache->cc_relname, hashIndex); #ifdef CATCACHE_STATS cache->cc_newloads++; #endif return &ct->tuple; }
/* * regprocout - converts proc OID to "pro_name" */ Datum regprocout(PG_FUNCTION_ARGS) { RegProcedure proid = PG_GETARG_OID(0); char *result; HeapTuple proctup; cqContext *pcqCtx; if (proid == InvalidOid) { result = pstrdup("-"); PG_RETURN_CSTRING(result); } pcqCtx = caql_beginscan( NULL, cql("SELECT * FROM pg_proc " " WHERE oid = :1 ", ObjectIdGetDatum(proid))); proctup = caql_getnext(pcqCtx); /* XXX XXX select proname, pronamespace from pg_proc */ if (HeapTupleIsValid(proctup)) { Form_pg_proc procform = (Form_pg_proc) GETSTRUCT(proctup); char *proname = NameStr(procform->proname); /* * In bootstrap mode, skip the fancy namespace stuff and just return * the proc name. (This path is only needed for debugging output * anyway.) */ if (IsBootstrapProcessingMode()) result = pstrdup(proname); else { char *nspname; FuncCandidateList clist; /* * Would this proc be found (uniquely!) by regprocin? If not, * qualify it. */ clist = FuncnameGetCandidates(list_make1(makeString(proname)), -1); if (clist != NULL && clist->next == NULL && clist->oid == proid) nspname = NULL; else nspname = get_namespace_name(procform->pronamespace); result = quote_qualified_identifier(nspname, proname); } } else { /* If OID doesn't match any pg_proc entry, return it numerically */ result = (char *) palloc(NAMEDATALEN); snprintf(result, NAMEDATALEN, "%u", proid); } caql_endscan(pcqCtx); PG_RETURN_CSTRING(result); }
/* * regclassin - converts "classname" to class OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_class entry. */ Datum regclassin(PG_FUNCTION_ARGS) { char *class_name_or_oid = PG_GETARG_CSTRING(0); Oid result = InvalidOid; List *names; /* '-' ? */ if (strcmp(class_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (class_name_or_oid[0] >= '0' && class_name_or_oid[0] <= '9' && strspn(class_name_or_oid, "0123456789") == strlen(class_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(class_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_class for a match. This is needed for initializing * other system catalogs (pg_namespace may not exist yet, and certainly * there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { int matches = 0; result = caql_getoid_plus( NULL, &matches, NULL, cql("SELECT oid FROM pg_class " " WHERE relname = :1 ", CStringGetDatum(class_name_or_oid))); if (0 == matches) { ereport(ERROR, (errcode(ERRCODE_UNDEFINED_TABLE), errmsg("relation \"%s\" does not exist", class_name_or_oid))); } /* We assume there can be only one match */ PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_class entries in the current search path. */ names = stringToQualifiedNameList(class_name_or_oid); result = RangeVarGetRelid(makeRangeVarFromNameList(names), false); PG_RETURN_OID(result); }
/* * regprocin - converts "proname" to proc OID * * We also accept a numeric OID, for symmetry with the output routine. * * '-' signifies unknown (OID 0). In all other cases, the input must * match an existing pg_proc entry. */ Datum regprocin(PG_FUNCTION_ARGS) { char *pro_name_or_oid = PG_GETARG_CSTRING(0); RegProcedure result = InvalidOid; List *names; FuncCandidateList clist; /* '-' ? */ if (strcmp(pro_name_or_oid, "-") == 0) PG_RETURN_OID(InvalidOid); /* Numeric OID? */ if (pro_name_or_oid[0] >= '0' && pro_name_or_oid[0] <= '9' && strspn(pro_name_or_oid, "0123456789") == strlen(pro_name_or_oid)) { result = DatumGetObjectId(DirectFunctionCall1(oidin, CStringGetDatum(pro_name_or_oid))); PG_RETURN_OID(result); } /* Else it's a name, possibly schema-qualified */ /* * In bootstrap mode we assume the given name is not schema-qualified, and * just search pg_proc for a unique match. This is needed for * initializing other system catalogs (pg_namespace may not exist yet, and * certainly there are no schemas other than pg_catalog). */ if (IsBootstrapProcessingMode()) { int matches = 0; result = (RegProcedure) caql_getoid_plus( NULL, &matches, NULL, cql("SELECT oid FROM pg_proc " " WHERE proname = :1 ", CStringGetDatum(pro_name_or_oid))); if (matches == 0) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (matches > 1) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); PG_RETURN_OID(result); } /* * Normal case: parse the name into components and see if it matches any * pg_proc entries in the current search path. */ names = stringToQualifiedNameList(pro_name_or_oid, "regprocin"); clist = FuncnameGetCandidates(names, -1); if (clist == NULL) ereport(ERROR, (errcode(ERRCODE_UNDEFINED_FUNCTION), errmsg("function \"%s\" does not exist", pro_name_or_oid))); else if (clist->next != NULL) ereport(ERROR, (errcode(ERRCODE_AMBIGUOUS_FUNCTION), errmsg("more than one function named \"%s\"", pro_name_or_oid))); result = clist->oid; PG_RETURN_OID(result); }
/* * regoperout - converts operator OID to "opr_name" */ Datum regoperout(PG_FUNCTION_ARGS) { Oid oprid = PG_GETARG_OID(0); char *result; HeapTuple opertup; if (oprid == InvalidOid) { result = pstrdup("0"); PG_RETURN_CSTRING(result); } opertup = SearchSysCache(OPEROID, ObjectIdGetDatum(oprid), 0, 0, 0); if (HeapTupleIsValid(opertup)) { Form_pg_operator operform = (Form_pg_operator) GETSTRUCT(opertup); char *oprname = NameStr(operform->oprname); /* * In bootstrap mode, skip the fancy namespace stuff and just * return the oper name. (This path is only needed for debugging * output anyway.) */ if (IsBootstrapProcessingMode()) result = pstrdup(oprname); else { FuncCandidateList clist; /* * Would this oper be found (uniquely!) by regoperin? If not, * qualify it. */ clist = OpernameGetCandidates(makeList1(makeString(oprname)), '\0'); if (clist != NULL && clist->next == NULL && clist->oid == oprid) result = pstrdup(oprname); else { const char *nspname; nspname = get_namespace_name(operform->oprnamespace); nspname = quote_identifier(nspname); result = (char *) palloc(strlen(nspname) + strlen(oprname) + 2); sprintf(result, "%s.%s", nspname, oprname); } } ReleaseSysCache(opertup); } else { /* * If OID doesn't match any pg_operator entry, return it * numerically */ result = (char *) palloc(NAMEDATALEN); snprintf(result, NAMEDATALEN, "%u", oprid); } PG_RETURN_CSTRING(result); }