/* * Execute the CREATE BARRIER command. Write a BARRIER WAL record and flush the * WAL buffers to disk before returning to the caller. Writing the WAL record * does not guarantee successful completion of the barrier command. */ void ProcessCreateBarrierExecute(const char *id) { StringInfoData buf; if (!IsConnFromCoord()) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("The CREATE BARRIER EXECUTE message is expected to " "arrive from a Coordinator"))); { XLogRecData rdata[1]; XLogRecPtr recptr; rdata[0].data = (char *) id; rdata[0].len = strlen(id) + 1; rdata[0].buffer = InvalidBuffer; rdata[0].next = NULL; recptr = XLogInsert(RM_BARRIER_ID, XLOG_BARRIER_CREATE, rdata); XLogFlush(recptr); } pq_beginmessage(&buf, 'b'); pq_sendstring(&buf, id); pq_endmessage(&buf); pq_flush(); }
void RequestBarrier(const char *id, char *completionTag) { PGXCNodeAllHandles *prepared_handles; const char *barrier_id; elog(DEBUG2, "CREATE BARRIER request received"); /* * Ensure that we are a Coordinator and the request is not from another * coordinator */ if (!IS_PGXC_COORDINATOR) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("CREATE BARRIER command must be sent to a Coordinator"))); if (IsConnFromCoord()) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("CREATE BARRIER command is not expected from another Coordinator"))); /* * Get a barrier id if the user has not supplied it */ barrier_id = generate_barrier_id(id); elog(DEBUG2, "CREATE BARRIER <%s>", barrier_id); /* * Step One. Prepare all Coordinators for upcoming barrier request */ prepared_handles = PrepareBarrier(barrier_id); /* * Step two. Issue BARRIER command to all involved components, including * Coordinators and Datanodes */ ExecuteBarrier(barrier_id); /* * Step three. Inform Coordinators about a successfully completed barrier */ EndBarrier(prepared_handles, barrier_id); /* Finally report the barrier to GTM to backup its restart point */ ReportBarrierGTM(barrier_id); /* Free the handles */ pfree_pgxc_all_handles(prepared_handles); if (completionTag) snprintf(completionTag, COMPLETION_TAG_BUFSIZE, "BARRIER %s", barrier_id); }
/* * GetActiveSnapshot * Return the topmost snapshot in the Active stack. */ Snapshot GetActiveSnapshot(void) { #ifdef PGXC /* * Check if topmost snapshot is null or not, * if it is, a new one will be taken from GTM. */ if (!ActiveSnapshot && IS_PGXC_COORDINATOR && !IsConnFromCoord()) return NULL; #endif Assert(ActiveSnapshot != NULL); return ActiveSnapshot->as_snap; }
/* * Mark the completion of an on-going barrier. We must have remembered the * barrier ID when we received the CREATE BARRIER PREPARE command */ void ProcessCreateBarrierEnd(const char *id) { StringInfoData buf; if (!IS_PGXC_COORDINATOR || !IsConnFromCoord()) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("The CREATE BARRIER END message is expected to " "arrive at a Coordinator from another Coordinator"))); LWLockRelease(BarrierLock); pq_beginmessage(&buf, 'b'); pq_sendstring(&buf, id); pq_endmessage(&buf); pq_flush(); /* * TODO Stop the timer */ }
/* * Prepare ourselves for an incoming BARRIER. We must disable all new 2PC * commits and let the ongoing commits to finish. We then remember the * barrier id (so that it can be matched with the final END message) and * tell the driving Coordinator to proceed with the next step. * * A simple way to implement this is to grab a lock in an exclusive mode * while all other backend starting a 2PC will grab the lock in shared * mode. So as long as we hold the exclusive lock, no other backend start a * new 2PC and there can not be any 2PC in-progress. This technique would * rely on assumption that an exclusive lock requester is not starved by * share lock requesters. * * Note: To ensure that the 2PC are not blocked for a long time, we should * set a timeout. The lock should be release after the timeout and the * barrier should be canceled. */ void ProcessCreateBarrierPrepare(const char *id) { StringInfoData buf; if (!IS_PGXC_COORDINATOR || !IsConnFromCoord()) ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("The CREATE BARRIER PREPARE message is expected to " "arrive at a Coordinator from another Coordinator"))); LWLockAcquire(BarrierLock, LW_EXCLUSIVE); pq_beginmessage(&buf, 'b'); pq_sendstring(&buf, id); pq_endmessage(&buf); pq_flush(); /* * TODO Start a timer to terminate the pending barrier after a specified * timeout */ }
/* * GetTransactionSnapshot * Get the appropriate snapshot for a new query in a transaction. * * Note that the return value may point at static storage that will be modified * by future calls and by CommandCounterIncrement(). Callers should call * RegisterSnapshot or PushActiveSnapshot on the returned snap if it is to be * used very long. */ Snapshot GetTransactionSnapshot(void) { /* First call in transaction? */ if (!FirstSnapshotSet) { Assert(RegisteredSnapshots == 0); Assert(FirstXactSnapshot == NULL); /* * In transaction-snapshot mode, the first snapshot must live until * end of xact regardless of what the caller does with it, so we must * make a copy of it rather than returning CurrentSnapshotData * directly. Furthermore, if we're running in serializable mode, * predicate.c needs to wrap the snapshot fetch in its own processing. */ if (IsolationUsesXactSnapshot()) { /* First, create the snapshot in CurrentSnapshotData */ if (IsolationIsSerializable()) CurrentSnapshot = GetSerializableTransactionSnapshot(&CurrentSnapshotData); else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); /* Make a saved copy */ CurrentSnapshot = CopySnapshot(CurrentSnapshot); FirstXactSnapshot = CurrentSnapshot; /* Mark it as "registered" in FirstXactSnapshot */ FirstXactSnapshot->regd_count++; RegisteredSnapshots++; } else CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); FirstSnapshotSet = true; return CurrentSnapshot; } if (IsolationUsesXactSnapshot()) { #ifdef PGXC /* * Consider this test case taken from portals.sql * * CREATE TABLE cursor (a int, b int) distribute by replication; * INSERT INTO cursor VALUES (10); * BEGIN; * SET TRANSACTION ISOLATION LEVEL SERIALIZABLE; * DECLARE c1 NO SCROLL CURSOR FOR SELECT * FROM cursor FOR UPDATE; * INSERT INTO cursor VALUES (2); * FETCH ALL FROM c1; * would result in * ERROR: attempted to lock invisible tuple * because FETCH would be sent as a select to the remote nodes * with command id 0, whereas the command id would be 2 * in the current snapshot. * (1 sent by Coordinator due to declare cursor & * 2 because of the insert inside the transaction) * The command id should therefore be updated in the * current snapshot. */ if (IsConnFromCoord()) SnapshotSetCommandId(GetCurrentCommandId(false)); #endif return CurrentSnapshot; } CurrentSnapshot = GetSnapshotData(&CurrentSnapshotData); return CurrentSnapshot; }