void cdb_sync_oid_to_segments(void) { if (Gp_role == GP_ROLE_DISPATCH && IsNormalProcessingMode()) { int i; /* Burn a few extra just for safety */ for (i=0;i<10;i++) GetNewObjectId(); } }
/* * hashbuild() -- build a new hash index. * * We use a global variable to record the fact that we're creating * a new index. This is used to avoid high-concurrency locking, * since the index won't be visible until this transaction commits * and since building is guaranteed to be single-threaded. */ Datum hashbuild(PG_FUNCTION_ARGS) { Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); double reltuples; HashBuildState buildstate; /* * We expect to be called exactly once for any index relation. If * that's not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* initialize the hash index metadata page */ _hash_metapinit(index); /* build the index */ buildstate.indtuples = 0; /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, hashbuildCallback, (void *) &buildstate); /* * Since we just counted the tuples in the heap, we update its stats * in pg_class to guarantee that the planner takes advantage of the * index we just created. But, only update statistics during normal * index definitions, not for indices on system catalogs created * during bootstrap processing. We must close the relations before * updating statistics to guarantee that the relcache entries are * flushed when we increment the command counter in UpdateStats(). But * we do not release any locks on the relations; those will be held * until end of transaction. */ if (IsNormalProcessingMode()) { Oid hrelid = RelationGetRelid(heap); Oid irelid = RelationGetRelid(index); heap_close(heap, NoLock); index_close(index); UpdateStats(hrelid, reltuples); UpdateStats(irelid, buildstate.indtuples); } PG_RETURN_VOID(); }
void cdb_sync_oid_to_segments(void) { if (Gp_role == GP_ROLE_DISPATCH && IsNormalProcessingMode()) { Oid max_oid = get_max_oid_from_segDBs(); /* Move our oid counter ahead of QEs */ while(GetNewObjectId() <= max_oid); /* Burn a few extra just for safety */ for (int i = 0; i < 10; i++) GetNewObjectId(); } }
/* * AtCommit_Notify * * This is called at transaction commit. * * If there are pending LISTEN/UNLISTEN actions, insert or delete * tuples in pg_listener accordingly. * * If there are outbound notify requests in the pendingNotifies list, * scan pg_listener for matching tuples, and either signal the other * backend or send a message to our own frontend. * * NOTE: we are still inside the current transaction, therefore can * piggyback on its committing of changes. */ void AtCommit_Notify(void) { Relation lRel; ListCell *p; if (pendingActions == NIL && pendingNotifies == NIL) return; /* no relevant statements in this xact */ /* * NOTIFY is disabled if not normal processing mode. This test used to be * in xact.c, but it seems cleaner to do it here. */ if (!IsNormalProcessingMode()) { ClearPendingActionsAndNotifies(); return; } if (Trace_notify) elog(DEBUG1, "AtCommit_Notify"); /* Acquire ExclusiveLock on pg_listener */ lRel = heap_open(ListenerRelationId, ExclusiveLock); /* Perform any pending listen/unlisten actions */ foreach(p, pendingActions) { ListenAction *actrec = (ListenAction *) lfirst(p); switch (actrec->action) { case LISTEN_LISTEN: Exec_Listen(lRel, actrec->condname); break; case LISTEN_UNLISTEN: Exec_Unlisten(lRel, actrec->condname); break; case LISTEN_UNLISTEN_ALL: Exec_UnlistenAll(lRel); break; } /* We must CCI after each action in case of conflicting actions */ CommandCounterIncrement(); }
/* *-------------------------------------------------------------- * AtCommit_Notify * * This is called at transaction commit. * * If there are outbound notify requests in the pendingNotifies list, * scan pg_listener for matching tuples, and either signal the other * backend or send a message to our own frontend. * * NOTE: we are still inside the current transaction, therefore can * piggyback on its committing of changes. * * Results: * XXX * * Side effects: * Tuples in pg_listener that have matching relnames and other peoples' * listenerPIDs are updated with a nonzero notification field. * *-------------------------------------------------------------- */ void AtCommit_Notify(void) { Relation lRel; TupleDesc tdesc; HeapScanDesc scan; HeapTuple lTuple, rTuple; Datum value[Natts_pg_listener]; char repl[Natts_pg_listener], nulls[Natts_pg_listener]; if (pendingNotifies == NIL) return; /* no NOTIFY statements in this transaction */ /* * NOTIFY is disabled if not normal processing mode. This test used to be * in xact.c, but it seems cleaner to do it here. */ if (!IsNormalProcessingMode()) { ClearPendingNotifies(); return; } if (Trace_notify) elog(DEBUG1, "AtCommit_Notify"); /* preset data to update notify column to MyProcPid */ nulls[0] = nulls[1] = nulls[2] = ' '; repl[0] = repl[1] = repl[2] = ' '; repl[Anum_pg_listener_notify - 1] = 'r'; value[0] = value[1] = value[2] = (Datum) 0; value[Anum_pg_listener_notify - 1] = Int32GetDatum(MyProcPid); lRel = heap_open(ListenerRelationId, ExclusiveLock); tdesc = RelationGetDescr(lRel); scan = heap_beginscan(lRel, SnapshotNow, 0, NULL); while ((lTuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { Form_pg_listener listener = (Form_pg_listener) GETSTRUCT(lTuple); char *relname = NameStr(listener->relname); int32 listenerPID = listener->listenerpid; if (!AsyncExistsPendingNotify(relname)) continue; if (listenerPID == MyProcPid) { /* * Self-notify: no need to bother with table update. Indeed, we * *must not* clear the notification field in this path, or we * could lose an outside notify, which'd be bad for applications * that ignore self-notify messages. */ if (Trace_notify) elog(DEBUG1, "AtCommit_Notify: notifying self"); NotifyMyFrontEnd(relname, listenerPID); } else { if (Trace_notify) elog(DEBUG1, "AtCommit_Notify: notifying pid %d", listenerPID); /* * If someone has already notified this listener, we don't bother * modifying the table, but we do still send a SIGUSR2 signal, * just in case that backend missed the earlier signal for some * reason. It's OK to send the signal first, because the other * guy can't read pg_listener until we unlock it. */ if (kill(listenerPID, SIGUSR2) < 0) { /* * Get rid of pg_listener entry if it refers to a PID that no * longer exists. Presumably, that backend crashed without * deleting its pg_listener entries. This code used to only * delete the entry if errno==ESRCH, but as far as I can see * we should just do it for any failure (certainly at least * for EPERM too...) */ simple_heap_delete(lRel, &lTuple->t_self); } else if (listener->notification == 0) { HTSU_Result result; ItemPointerData update_ctid; TransactionId update_xmax; rTuple = heap_modifytuple(lTuple, tdesc, value, nulls, repl); /* * We cannot use simple_heap_update here because the tuple * could have been modified by an uncommitted transaction; * specifically, since UNLISTEN releases exclusive lock on the * table before commit, the other guy could already have tried * to unlisten. There are no other cases where we should be * able to see an uncommitted update or delete. Therefore, our * response to a HeapTupleBeingUpdated result is just to * ignore it. We do *not* wait for the other guy to commit * --- that would risk deadlock, and we don't want to block * while holding the table lock anyway for performance * reasons. We also ignore HeapTupleUpdated, which could occur * if the other guy commits between our heap_getnext and * heap_update calls. */ result = heap_update(lRel, &lTuple->t_self, rTuple, &update_ctid, &update_xmax, GetCurrentCommandId(), InvalidSnapshot, false /* no wait for commit */ ); switch (result) { case HeapTupleSelfUpdated: /* Tuple was already updated in current command? */ elog(ERROR, "tuple already updated by self"); break; case HeapTupleMayBeUpdated: /* done successfully */ #ifdef NOT_USED /* currently there are no indexes */ CatalogUpdateIndexes(lRel, rTuple); #endif break; case HeapTupleBeingUpdated: /* ignore uncommitted tuples */ break; case HeapTupleUpdated: /* ignore just-committed tuples */ break; default: elog(ERROR, "unrecognized heap_update status: %u", result); break; } } } } heap_endscan(scan); /* * We do NOT release the lock on pg_listener here; we need to hold it * until end of transaction (which is about to happen, anyway) to ensure * that notified backends see our tuple updates when they look. Else they * might disregard the signal, which would make the application programmer * very unhappy. */ heap_close(lRel, NoLock); ClearPendingNotifies(); if (Trace_notify) elog(DEBUG1, "AtCommit_Notify: done"); }
/* * routine to build an index. Basically calls insert over and over */ Datum gistbuild(PG_FUNCTION_ARGS) { Relation heap = (Relation) PG_GETARG_POINTER(0); Relation index = (Relation) PG_GETARG_POINTER(1); IndexInfo *indexInfo = (IndexInfo *) PG_GETARG_POINTER(2); double reltuples; GISTBuildState buildstate; Buffer buffer; /* no locking is needed */ initGISTstate(&buildstate.giststate, index); /* * We expect to be called exactly once for any index relation. If * that's not the case, big trouble's what we have. */ if (RelationGetNumberOfBlocks(index) != 0) elog(ERROR, "index \"%s\" already contains data", RelationGetRelationName(index)); /* initialize the root page */ buffer = ReadBuffer(index, P_NEW); GISTInitBuffer(buffer, F_LEAF); WriteBuffer(buffer); /* build the index */ buildstate.numindexattrs = indexInfo->ii_NumIndexAttrs; buildstate.indtuples = 0; /* do the heap scan */ reltuples = IndexBuildHeapScan(heap, index, indexInfo, gistbuildCallback, (void *) &buildstate); /* okay, all heap tuples are indexed */ /* * Since we just counted the tuples in the heap, we update its stats * in pg_class to guarantee that the planner takes advantage of the * index we just created. But, only update statistics during normal * index definitions, not for indices on system catalogs created * during bootstrap processing. We must close the relations before * updating statistics to guarantee that the relcache entries are * flushed when we increment the command counter in UpdateStats(). But * we do not release any locks on the relations; those will be held * until end of transaction. */ if (IsNormalProcessingMode()) { Oid hrelid = RelationGetRelid(heap); Oid irelid = RelationGetRelid(index); heap_close(heap, NoLock); index_close(index); UpdateStats(hrelid, reltuples); UpdateStats(irelid, buildstate.indtuples); } freeGISTstate(&buildstate.giststate); #ifdef GISTDEBUG gist_dumptree(index, 0, GISTP_ROOT, 0); #endif PG_RETURN_VOID(); }
static void DtmXactCallback(XactEvent event, void *arg) { //XTM_INFO("%d: DtmXactCallbackevent=%d nextxid=%d\n", getpid(), event, DtmNextXid); switch (event) { case XACT_EVENT_START: //XTM_INFO("%d: normal=%d, initialized=%d, replication=%d, bgw=%d, vacuum=%d\n", // getpid(), IsNormalProcessingMode(), dtm->initialized, MMDoReplication, IsBackgroundWorker, IsAutoVacuumWorkerProcess()); if (IsNormalProcessingMode() && dtm->initialized && MMDoReplication && !am_walsender && !IsBackgroundWorker && !IsAutoVacuumWorkerProcess()) { MMBeginTransaction(); } break; #if 0 case XACT_EVENT_PRE_COMMIT: case XACT_EVENT_PARALLEL_PRE_COMMIT: { TransactionId xid = GetCurrentTransactionIdIfAny(); if (!MMIsDistributedTrans && TransactionIdIsValid(xid)) { XTM_INFO("%d: Will ignore transaction %u\n", getpid(), xid); MMMarkTransAsLocal(xid); } break; } #endif case XACT_EVENT_COMMIT: case XACT_EVENT_ABORT: if (TransactionIdIsValid(DtmNextXid)) { if (!DtmVoted) { ArbiterSetTransStatus(DtmNextXid, TRANSACTION_STATUS_ABORTED, false); } if (event == XACT_EVENT_COMMIT) { /* * Now transaction status is already written in CLOG, * so we can remove information about it from hash table */ LWLockAcquire(dtm->hashLock, LW_EXCLUSIVE); hash_search(xid_in_doubt, &DtmNextXid, HASH_REMOVE, NULL); LWLockRelease(dtm->hashLock); } #if 0 /* should be handled now using DtmVoted flag */ else { /* * Transaction at the node can be aborted because of transaction failure at some other node * before it starts doing anything and assigned Xid, in this case Postgres is not calling SetTransactionStatus, * so we have to send report to DTMD here */ if (!TransactionIdIsValid(GetCurrentTransactionIdIfAny())) { XTM_INFO("%d: abort transation on DTMD\n", getpid()); ArbiterSetTransStatus(DtmNextXid, TRANSACTION_STATUS_ABORTED, false); } } #endif DtmNextXid = InvalidTransactionId; DtmLastSnapshot = NULL; } MMIsDistributedTrans = false; break; default: break; } }
/* ---------------------------------------------------------------- * index_create * * Returns OID of the created index. * ---------------------------------------------------------------- */ Oid index_create(Oid heapRelationId, const char *indexRelationName, IndexInfo *indexInfo, Oid accessMethodObjectId, Oid *classObjectId, bool primary, bool isconstraint, bool allow_system_table_mods) { Relation heapRelation; Relation indexRelation; TupleDesc indexTupDesc; bool shared_relation; Oid namespaceId; Oid indexoid; int i; /* * Only SELECT ... FOR UPDATE are allowed while doing this */ heapRelation = heap_open(heapRelationId, ShareLock); /* * The index will be in the same namespace as its parent table, and is * shared across databases if and only if the parent is. */ namespaceId = RelationGetNamespace(heapRelation); shared_relation = heapRelation->rd_rel->relisshared; /* * check parameters */ if (indexInfo->ii_NumIndexAttrs < 1) elog(ERROR, "must index at least one column"); if (!allow_system_table_mods && IsSystemRelation(heapRelation) && IsNormalProcessingMode()) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("user-defined indexes on system catalog tables are not supported"))); /* * We cannot allow indexing a shared relation after initdb (because * there's no way to make the entry in other databases' pg_class). * Unfortunately we can't distinguish initdb from a manually started * standalone backend. However, we can at least prevent this mistake * under normal multi-user operation. */ if (shared_relation && IsUnderPostmaster) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), errmsg("shared indexes cannot be created after initdb"))); if (get_relname_relid(indexRelationName, namespaceId)) ereport(ERROR, (errcode(ERRCODE_DUPLICATE_TABLE), errmsg("relation \"%s\" already exists", indexRelationName))); /* * construct tuple descriptor for index tuples */ indexTupDesc = ConstructTupleDescriptor(heapRelation, indexInfo, classObjectId); /* * create the index relation's relcache entry and physical disk file. * (If we fail further down, it's the smgr's responsibility to remove * the disk file again.) */ indexRelation = heap_create(indexRelationName, namespaceId, indexTupDesc, shared_relation, true, allow_system_table_mods); /* Fetch the relation OID assigned by heap_create */ indexoid = RelationGetRelid(indexRelation); /* * Obtain exclusive lock on it. Although no other backends can see it * until we commit, this prevents deadlock-risk complaints from lock * manager in cases such as CLUSTER. */ LockRelation(indexRelation, AccessExclusiveLock); /* * Fill in fields of the index's pg_class entry that are not set * correctly by heap_create. * * XXX should have a cleaner way to create cataloged indexes */ indexRelation->rd_rel->relowner = GetUserId(); indexRelation->rd_rel->relam = accessMethodObjectId; indexRelation->rd_rel->relkind = RELKIND_INDEX; indexRelation->rd_rel->relhasoids = false; /* * store index's pg_class entry */ UpdateRelationRelation(indexRelation); /* * now update the object id's of all the attribute tuple forms in the * index relation's tuple descriptor */ InitializeAttributeOids(indexRelation, indexInfo->ii_NumIndexAttrs, indexoid); /* * append ATTRIBUTE tuples for the index */ AppendAttributeTuples(indexRelation, indexInfo->ii_NumIndexAttrs); /* ---------------- * update pg_index * (append INDEX tuple) * * Note that this stows away a representation of "predicate". * (Or, could define a rule to maintain the predicate) --Nels, Feb '92 * ---------------- */ UpdateIndexRelation(indexoid, heapRelationId, indexInfo, classObjectId, primary); /* * Register constraint and dependencies for the index. * * If the index is from a CONSTRAINT clause, construct a pg_constraint * entry. The index is then linked to the constraint, which in turn * is linked to the table. If it's not a CONSTRAINT, make the * dependency directly on the table. * * We don't need a dependency on the namespace, because there'll be an * indirect dependency via our parent table. * * During bootstrap we can't register any dependencies, and we don't try * to make a constraint either. */ if (!IsBootstrapProcessingMode()) { ObjectAddress myself, referenced; myself.classId = RelOid_pg_class; myself.objectId = indexoid; myself.objectSubId = 0; if (isconstraint) { char constraintType; Oid conOid; if (primary) constraintType = CONSTRAINT_PRIMARY; else if (indexInfo->ii_Unique) constraintType = CONSTRAINT_UNIQUE; else { elog(ERROR, "constraint must be PRIMARY or UNIQUE"); constraintType = 0; /* keep compiler quiet */ } /* Shouldn't have any expressions */ if (indexInfo->ii_Expressions) elog(ERROR, "constraints can't have index expressions"); conOid = CreateConstraintEntry(indexRelationName, namespaceId, constraintType, false, /* isDeferrable */ false, /* isDeferred */ heapRelationId, indexInfo->ii_KeyAttrNumbers, indexInfo->ii_NumIndexAttrs, InvalidOid, /* no domain */ InvalidOid, /* no foreign key */ NULL, 0, ' ', ' ', ' ', InvalidOid, /* no associated index */ NULL, /* no check constraint */ NULL, NULL); referenced.classId = get_system_catalog_relid(ConstraintRelationName); referenced.objectId = conOid; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_INTERNAL); } else { /* Create auto dependencies on simply-referenced columns */ for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) { if (indexInfo->ii_KeyAttrNumbers[i] != 0) { referenced.classId = RelOid_pg_class; referenced.objectId = heapRelationId; referenced.objectSubId = indexInfo->ii_KeyAttrNumbers[i]; recordDependencyOn(&myself, &referenced, DEPENDENCY_AUTO); } } } /* Store dependency on operator classes */ referenced.classId = get_system_catalog_relid(OperatorClassRelationName); for (i = 0; i < indexInfo->ii_NumIndexAttrs; i++) { referenced.objectId = classObjectId[i]; referenced.objectSubId = 0; recordDependencyOn(&myself, &referenced, DEPENDENCY_NORMAL); } /* Store dependencies on anything mentioned in index expressions */ if (indexInfo->ii_Expressions) { recordDependencyOnSingleRelExpr(&myself, (Node *) indexInfo->ii_Expressions, heapRelationId, DEPENDENCY_NORMAL, DEPENDENCY_AUTO); } /* Store dependencies on anything mentioned in predicate */ if (indexInfo->ii_Predicate) { recordDependencyOnSingleRelExpr(&myself, (Node *) indexInfo->ii_Predicate, heapRelationId, DEPENDENCY_NORMAL, DEPENDENCY_AUTO); } } /* * Advance the command counter so that we can see the newly-entered * catalog tuples for the index. */ CommandCounterIncrement(); /* * In bootstrap mode, we have to fill in the index strategy structure * with information from the catalogs. If we aren't bootstrapping, * then the relcache entry has already been rebuilt thanks to sinval * update during CommandCounterIncrement. */ if (IsBootstrapProcessingMode()) RelationInitIndexAccessInfo(indexRelation); else Assert(indexRelation->rd_indexcxt != NULL); /* * If this is bootstrap (initdb) time, then we don't actually fill in * the index yet. We'll be creating more indexes and classes later, * so we delay filling them in until just before we're done with * bootstrapping. Otherwise, we call the routine that constructs the * index. * * In normal processing mode, the heap and index relations are closed by * index_build() --- but we continue to hold the ShareLock on the heap * and the exclusive lock on the index that we acquired above, until * end of transaction. */ if (IsBootstrapProcessingMode()) { index_register(heapRelationId, indexoid, indexInfo); /* XXX shouldn't we close the heap and index rels here? */ } else index_build(heapRelation, indexRelation, indexInfo); return indexoid; }