/* * ProcLockWakeup -- routine for waking up processes when a lock is * released (or a prior waiter is aborted). Scan all waiters * for lock, waken any that are no longer blocked. * * The appropriate lock partition lock must be held by caller. */ void ProcLockWakeup(LockMethod lockMethodTable, LOCK *lock) { PROC_QUEUE *waitQueue = &(lock->waitProcs); int queue_size = waitQueue->size; PGPROC *proc; LOCKMASK aheadRequests = 0; Assert(queue_size >= 0); if (queue_size == 0) return; proc = (PGPROC *) waitQueue->links.next; while (queue_size-- > 0) { LOCKMODE lockmode = proc->waitLockMode; /* * Waken if (a) doesn't conflict with requests of earlier waiters, and * (b) doesn't conflict with already-held locks. */ if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && LockCheckConflicts(lockMethodTable, lockmode, lock, proc->waitProcLock, proc) == STATUS_OK) { /* OK to waken */ GrantLock(lock, proc->waitProcLock, lockmode); proc = ProcWakeup(proc, STATUS_OK); /* * ProcWakeup removes proc from the lock's waiting process queue * and returns the next proc in chain; don't use proc's next-link, * because it's been cleared. */ } else { /* * Cannot wake this guy. Remember his request for later checks. */ aheadRequests |= LOCKBIT_ON(lockmode); proc = (PGPROC *) proc->links.next; } } Assert(waitQueue->size >= 0); }
static void DtmSerializeLock(PROCLOCK* proclock, void* arg) { ByteBuffer* buf = (ByteBuffer*)arg; LOCK* lock = proclock->tag.myLock; PGPROC* proc = proclock->tag.myProc; if (lock != NULL) { PGXACT* srcPgXact = &ProcGlobal->allPgXact[proc->pgprocno]; if (TransactionIdIsValid(srcPgXact->xid) && proc->waitLock == lock) { LockMethod lockMethodTable = GetLocksMethodTable(lock); int numLockModes = lockMethodTable->numLockModes; int conflictMask = lockMethodTable->conflictTab[proc->waitLockMode]; SHM_QUEUE *procLocks = &(lock->procLocks); int lm; ByteBufferAppendInt32(buf, srcPgXact->xid); /* waiting transaction */ proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); while (proclock) { if (proc != proclock->tag.myProc) { PGXACT* dstPgXact = &ProcGlobal->allPgXact[proclock->tag.myProc->pgprocno]; if (TransactionIdIsValid(dstPgXact->xid)) { Assert(srcPgXact->xid != dstPgXact->xid); for (lm = 1; lm <= numLockModes; lm++) { if ((proclock->holdMask & LOCKBIT_ON(lm)) && (conflictMask & LOCKBIT_ON(lm))) { XTM_INFO("%d: %u(%u) waits for %u(%u)\n", getpid(), srcPgXact->xid, proc->pid, dstPgXact->xid, proclock->tag.myProc->pid); ByteBufferAppendInt32(buf, dstPgXact->xid); /* transaction holding lock */ break; } } } } proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, offsetof(PROCLOCK, lockLink)); } ByteBufferAppendInt32(buf, 0); /* end of lock owners list */ } } }
/* * ProcSleep -- put a process to sleep on the specified lock * * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * * The lock table's partition lock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after * we release the partition lock. * * NOTES: The process queue is now a priority queue for locking. */ int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { LOCKMODE lockmode = locallock->tag.mode; LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; LWLock *partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; bool allow_autovacuum_cancel = true; int myWaitStatus; PGPROC *proc; int i; /* * Determine where to add myself in the wait queue. * * Normally I should go at the end of the queue. However, if I already * hold locks that conflict with the request of any previous waiter, put * myself in the queue just in front of the first such waiter. This is not * a necessary step, since deadlock detection would move me to before that * waiter anyway; but it's relatively cheap to detect such a conflict * immediately, and avoid delaying till deadlock timeout. * * Special case: if I find I should go in front of some waiter, check to * see if I conflict with already-held locks or the requests before that * waiter. If not, then just grant myself the requested lock immediately. * This is the same as the test for immediate grant in LockAcquire, except * we are only considering the part of the wait queue before my insertion * point. */ if (myHeldLocks != 0) { LOCKMASK aheadRequests = 0; proc = (PGPROC *) waitQueue->links.next; for (i = 0; i < waitQueue->size; i++) { /* Must he wait for me? */ if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) { /* Must I wait for him ? */ if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks) { /* * Yes, so we have a deadlock. Easiest way to clean up * correctly is to call RemoveFromWaitQueue(), but we * can't do that until we are *on* the wait queue. So, set * a flag to check below, and break out of loop. Also, * record deadlock info for later message. */ RememberSimpleDeadLock(MyProc, lockmode, lock, proc); early_deadlock = true; break; } /* I must go before this waiter. Check special case. */ if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && LockCheckConflicts(lockMethodTable, lockmode, lock, proclock) == STATUS_OK) { /* Skip the wait and just grant myself the lock. */ GrantLock(lock, proclock, lockmode); GrantAwaitedLock(); return STATUS_OK; } /* Break out of loop to put myself before him */ break; } /* Nope, so advance to next waiter */ aheadRequests |= LOCKBIT_ON(proc->waitLockMode); proc = (PGPROC *) proc->links.next; } /* * If we fall out of loop normally, proc points to waitQueue head, so * we will insert at tail of queue as desired. */ } else { /* I hold no locks, so I can't push in front of anyone. */ proc = (PGPROC *) &(waitQueue->links); } /* * Insert self into queue, ahead of the given proc (or at tail of queue). */ SHMQueueInsertBefore(&(proc->links), &(MyProc->links)); waitQueue->size++; lock->waitMask |= LOCKBIT_ON(lockmode); /* Set up wait information in PGPROC object, too */ MyProc->waitLock = lock; MyProc->waitProcLock = proclock; MyProc->waitLockMode = lockmode; MyProc->waitStatus = STATUS_WAITING; /* * If we detected deadlock, give up without waiting. This must agree with * CheckDeadLock's recovery code, except that we shouldn't release the * semaphore since we haven't tried to lock it yet. */ if (early_deadlock) { RemoveFromWaitQueue(MyProc, hashcode); return STATUS_ERROR; } /* mark that we are waiting for a lock */ lockAwaited = locallock; /* * Release the lock table's partition lock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because we * have recorded the fact that we are waiting for a lock, and so * LockErrorCleanup will clean up if cancel/die happens. */ LWLockRelease(partitionLock); /* * Also, now that we will successfully clean up after an ereport, it's * safe to check to see if there's a buffer pin deadlock against the * Startup process. Of course, that's only necessary if we're doing Hot * Standby and are not the Startup process ourselves. */ if (RecoveryInProgress() && !InRecovery) CheckRecoveryConflictDeadlock(); /* Reset deadlock_state before enabling the timeout handler */ deadlock_state = DS_NOT_YET_CHECKED; got_deadlock_timeout = false; /* * Set timer so we can wake up after awhile and check for a deadlock. If a * deadlock is detected, the handler releases the process's semaphore and * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we * must report failure rather than success. * * By delaying the check until we've waited for a bit, we can avoid * running the rather expensive deadlock-check code in most cases. * * If LockTimeout is set, also enable the timeout for that. We can save a * few cycles by enabling both timeout sources in one call. */ if (LockTimeout > 0) { EnableTimeoutParams timeouts[2]; timeouts[0].id = DEADLOCK_TIMEOUT; timeouts[0].type = TMPARAM_AFTER; timeouts[0].delay_ms = DeadlockTimeout; timeouts[1].id = LOCK_TIMEOUT; timeouts[1].type = TMPARAM_AFTER; timeouts[1].delay_ms = LockTimeout; enable_timeouts(timeouts, 2); } else enable_timeout_after(DEADLOCK_TIMEOUT, DeadlockTimeout); /* * If somebody wakes us between LWLockRelease and WaitLatch, the latch * will not wait. But a set latch does not necessarily mean that the lock * is free now, as there are many other sources for latch sets than * somebody releasing the lock. * * We process interrupts whenever the latch has been set, so cancel/die * interrupts are processed quickly. This means we must not mind losing * control to a cancel/die interrupt here. We don't, because we have no * shared-state-change work to do after being granted the lock (the * grantor did it all). We do have to worry about canceling the deadlock * timeout and updating the locallock table, but if we lose control to an * error, LockErrorCleanup will fix that up. */ do { WaitLatch(MyLatch, WL_LATCH_SET, 0); ResetLatch(MyLatch); /* check for deadlocks first, as that's probably log-worthy */ if (got_deadlock_timeout) { CheckDeadLock(); got_deadlock_timeout = false; } CHECK_FOR_INTERRUPTS(); /* * waitStatus could change from STATUS_WAITING to something else * asynchronously. Read it just once per loop to prevent surprising * behavior (such as missing log messages). */ myWaitStatus = *((volatile int *) &MyProc->waitStatus); /* * If we are not deadlocked, but are waiting on an autovacuum-induced * task, send a signal to interrupt it. */ if (deadlock_state == DS_BLOCKED_BY_AUTOVACUUM && allow_autovacuum_cancel) { PGPROC *autovac = GetBlockingAutoVacuumPgproc(); PGXACT *autovac_pgxact = &ProcGlobal->allPgXact[autovac->pgprocno]; LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE); /* * Only do it if the worker is not working to protect against Xid * wraparound. */ if ((autovac_pgxact->vacuumFlags & PROC_IS_AUTOVACUUM) && !(autovac_pgxact->vacuumFlags & PROC_VACUUM_FOR_WRAPAROUND)) { int pid = autovac->pid; StringInfoData locktagbuf; StringInfoData logbuf; /* errdetail for server log */ initStringInfo(&locktagbuf); initStringInfo(&logbuf); DescribeLockTag(&locktagbuf, &lock->tag); appendStringInfo(&logbuf, _("Process %d waits for %s on %s."), MyProcPid, GetLockmodeName(lock->tag.locktag_lockmethodid, lockmode), locktagbuf.data); /* release lock as quickly as possible */ LWLockRelease(ProcArrayLock); ereport(LOG, (errmsg("sending cancel to blocking autovacuum PID %d", pid), errdetail_log("%s", logbuf.data))); pfree(logbuf.data); pfree(locktagbuf.data); /* send the autovacuum worker Back to Old Kent Road */ if (kill(pid, SIGINT) < 0) { /* Just a warning to allow multiple callers */ ereport(WARNING, (errmsg("could not send signal to process %d: %m", pid))); } } else LWLockRelease(ProcArrayLock); /* prevent signal from being resent more than once */ allow_autovacuum_cancel = false; } /* * If awoken after the deadlock check interrupt has run, and * log_lock_waits is on, then report about the wait. */ if (log_lock_waits && deadlock_state != DS_NOT_YET_CHECKED) { StringInfoData buf, lock_waiters_sbuf, lock_holders_sbuf; const char *modename; long secs; int usecs; long msecs; SHM_QUEUE *procLocks; PROCLOCK *proclock; bool first_holder = true, first_waiter = true; int lockHoldersNum = 0; initStringInfo(&buf); initStringInfo(&lock_waiters_sbuf); initStringInfo(&lock_holders_sbuf); DescribeLockTag(&buf, &locallock->tag.lock); modename = GetLockmodeName(locallock->tag.lock.locktag_lockmethodid, lockmode); TimestampDifference(get_timeout_start_time(DEADLOCK_TIMEOUT), GetCurrentTimestamp(), &secs, &usecs); msecs = secs * 1000 + usecs / 1000; usecs = usecs % 1000; /* * we loop over the lock's procLocks to gather a list of all * holders and waiters. Thus we will be able to provide more * detailed information for lock debugging purposes. * * lock->procLocks contains all processes which hold or wait for * this lock. */ LWLockAcquire(partitionLock, LW_SHARED); procLocks = &(lock->procLocks); proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); while (proclock) { /* * we are a waiter if myProc->waitProcLock == proclock; we are * a holder if it is NULL or something different */ if (proclock->tag.myProc->waitProcLock == proclock) { if (first_waiter) { appendStringInfo(&lock_waiters_sbuf, "%d", proclock->tag.myProc->pid); first_waiter = false; } else appendStringInfo(&lock_waiters_sbuf, ", %d", proclock->tag.myProc->pid); } else { if (first_holder) { appendStringInfo(&lock_holders_sbuf, "%d", proclock->tag.myProc->pid); first_holder = false; } else appendStringInfo(&lock_holders_sbuf, ", %d", proclock->tag.myProc->pid); lockHoldersNum++; } proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, offsetof(PROCLOCK, lockLink)); } LWLockRelease(partitionLock); if (deadlock_state == DS_SOFT_DEADLOCK) ereport(LOG, (errmsg("process %d avoided deadlock for %s on %s by rearranging queue order after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); else if (deadlock_state == DS_HARD_DEADLOCK) { /* * This message is a bit redundant with the error that will be * reported subsequently, but in some cases the error report * might not make it to the log (eg, if it's caught by an * exception handler), and we want to ensure all long-wait * events get logged. */ ereport(LOG, (errmsg("process %d detected deadlock while waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); } if (myWaitStatus == STATUS_WAITING) ereport(LOG, (errmsg("process %d still waiting for %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); else if (myWaitStatus == STATUS_OK) ereport(LOG, (errmsg("process %d acquired %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs))); else { Assert(myWaitStatus == STATUS_ERROR); /* * Currently, the deadlock checker always kicks its own * process, which means that we'll only see STATUS_ERROR when * deadlock_state == DS_HARD_DEADLOCK, and there's no need to * print redundant messages. But for completeness and * future-proofing, print a message if it looks like someone * else kicked us off the lock. */ if (deadlock_state != DS_HARD_DEADLOCK) ereport(LOG, (errmsg("process %d failed to acquire %s on %s after %ld.%03d ms", MyProcPid, modename, buf.data, msecs, usecs), (errdetail_log_plural("Process holding the lock: %s. Wait queue: %s.", "Processes holding the lock: %s. Wait queue: %s.", lockHoldersNum, lock_holders_sbuf.data, lock_waiters_sbuf.data)))); } /* * At this point we might still need to wait for the lock. Reset * state so we don't print the above messages again. */ deadlock_state = DS_NO_DEADLOCK; pfree(buf.data); pfree(lock_holders_sbuf.data); pfree(lock_waiters_sbuf.data); } } while (myWaitStatus == STATUS_WAITING); /* * Disable the timers, if they are still running. As in LockErrorCleanup, * we must preserve the LOCK_TIMEOUT indicator flag: if a lock timeout has * already caused QueryCancelPending to become set, we want the cancel to * be reported as a lock timeout, not a user cancel. */ if (LockTimeout > 0) { DisableTimeoutParams timeouts[2]; timeouts[0].id = DEADLOCK_TIMEOUT; timeouts[0].keep_indicator = false; timeouts[1].id = LOCK_TIMEOUT; timeouts[1].keep_indicator = true; disable_timeouts(timeouts, 2); } else disable_timeout(DEADLOCK_TIMEOUT, false); /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we * might have a missed or duplicated locallock update). */ LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We no longer want LockErrorCleanup to do anything. */ lockAwaited = NULL; /* * If we got the lock, be sure to remember it in the locallock table. */ if (MyProc->waitStatus == STATUS_OK) GrantAwaitedLock(); /* * We don't have to do anything else, because the awaker did all the * necessary update of the lock table and MyProc. */ return MyProc->waitStatus; }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; PredicateLockData *predLockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->predLockData = GetPredicateLockStatusData(); mystatus->predLockIdx = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; while (mystatus->currIdx < lockData->nelements) { bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; LockInstanceData *instance; instance = &(lockData->locks[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (instance->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (instance->holdMask & LOCKBIT_ON(mode)) { granted = true; instance->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (instance->waitLockMode != NoLock) { /* Yes, so report it with proper mode */ mode = instance->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (instance->locktag.locktag_type <= LOCKTAG_LAST_TYPE) locktypename = LockTagTypeNames[instance->locktag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) instance->locktag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch ((LockTagType) instance->locktag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); values[4] = UInt16GetDatum(instance->locktag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TRANSACTION: values[6] = TransactionIdGetDatum(instance->locktag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, instance->locktag.locktag_field2); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[7] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[8] = ObjectIdGetDatum(instance->locktag.locktag_field3); values[9] = Int16GetDatum(instance->locktag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; break; } values[10] = VXIDGetDatum(instance->backend, instance->lxid); if (instance->pid != 0) values[11] = Int32GetDatum(instance->pid); else nulls[11] = true; values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode)); values[13] = BoolGetDatum(granted); values[14] = BoolGetDatum(instance->fastpath); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * Have returned all regular locks. Now start on the SIREAD predicate * locks. */ predLockData = mystatus->predLockData; if (mystatus->predLockIdx < predLockData->nelements) { PredicateLockTargetType lockType; PREDICATELOCKTARGETTAG *predTag = &(predLockData->locktags[mystatus->predLockIdx]); SERIALIZABLEXACT *xact = &(predLockData->xacts[mystatus->predLockIdx]); Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; mystatus->predLockIdx++; /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* lock type */ lockType = GET_PREDICATELOCKTARGETTAG_TYPE(*predTag); values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]); /* lock target */ values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag); values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag); if (lockType == PREDLOCKTAG_TUPLE) values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag); else nulls[4] = true; if ((lockType == PREDLOCKTAG_TUPLE) || (lockType == PREDLOCKTAG_PAGE)) values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag); else nulls[3] = true; /* these fields are targets for other types of locks */ nulls[5] = true; /* virtualxid */ nulls[6] = true; /* transactionid */ nulls[7] = true; /* classid */ nulls[8] = true; /* objid */ nulls[9] = true; /* objsubid */ /* lock holder */ values[10] = VXIDGetDatum(xact->vxid.backendId, xact->vxid.localTransactionId); if (xact->pid != 0) values[11] = Int32GetDatum(xact->pid); else nulls[11] = true; /* * Lock mode. Currently all predicate locks are SIReadLocks, which are * always held (never waiting) and have no fast path */ values[12] = CStringGetTextDatum("SIReadLock"); values[13] = BoolGetDatum(true); values[14] = BoolGetDatum(false); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } SRF_RETURN_DONE(funcctx); }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(16, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "transaction", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "granted", BOOLOID, -1, 0); /* * These next columns are specific to GPDB */ TupleDescInitEntry(tupdesc, (AttrNumber) 14, "mppSessionId", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "mppIsWriter", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 16, "gp_segment_id", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->numSegLocks = 0; mystatus->numsegresults = 0; mystatus->segresults = NULL; } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; /* * This loop returns all the local lock data from the segment we are running on. */ while (mystatus->currIdx < lockData->nelements) { PROCLOCK *proclock; LOCK *lock; PGPROC *proc; bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[16]; bool nulls[16]; HeapTuple tuple; Datum result; proclock = &(lockData->proclocks[mystatus->currIdx]); lock = &(lockData->locks[mystatus->currIdx]); proc = &(lockData->procs[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (proclock->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (proclock->holdMask & LOCKBIT_ON(mode)) { granted = true; proclock->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (proc->waitLock == proclock->tag.myLock) { /* Yes, so report it with proper mode */ mode = proc->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (lock->tag.locktag_type <= LOCKTAG_ADVISORY) locktypename = LockTagTypeNames[lock->tag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) lock->tag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch (lock->tag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: case LOCKTAG_RELATION_RESYNCHRONIZE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); values[4] = UInt16GetDatum(lock->tag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TRANSACTION: values[5] = TransactionIdGetDatum(lock->tag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_RESOURCE_QUEUE: values[1] = ObjectIdGetDatum(proc->databaseId); values[7] = ObjectIdGetDatum(lock->tag.locktag_field1); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[6] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); values[8] = Int16GetDatum(lock->tag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; break; } values[9] = TransactionIdGetDatum(proc->xid); if (proc->pid != 0) values[10] = Int32GetDatum(proc->pid); else nulls[10] = true; values[11] = DirectFunctionCall1(textin, CStringGetDatum((char *) GetLockmodeName(LOCK_LOCKMETHOD(*lock), mode))); values[12] = BoolGetDatum(granted); values[13] = Int32GetDatum(proc->mppSessionId); values[14] = Int32GetDatum(proc->mppIsWriter); values[15] = Int32GetDatum(Gp_segment); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * This loop only executes on the masterDB and only in dispatch mode, because that * is the only time we dispatched to the segDBs. */ while (mystatus->currIdx >= lockData->nelements && mystatus->currIdx < lockData->nelements + mystatus->numSegLocks) { HeapTuple tuple; Datum result; Datum values[16]; bool nulls[16]; int i; int whichresultset = 0; int whichelement = mystatus->currIdx - lockData->nelements; int whichrow = whichelement; Assert(Gp_role == GP_ROLE_DISPATCH); /* * Because we have one result set per segDB (rather than one big result set with everything), * we need to figure out which result set we are on, and which row within that result set * we are returning. * * So, we walk through all the result sets and all the rows in each one, in order. */ while(whichrow >= PQntuples(mystatus->segresults[whichresultset])) { whichrow -= PQntuples(mystatus->segresults[whichresultset]); whichresultset++; if (whichresultset >= mystatus->numsegresults) break; } /* * If this condition is true, we have already sent everything back, * and we just want to do the SRF_RETURN_DONE */ if (whichresultset >= mystatus->numsegresults) break; mystatus->currIdx++; /* * Form tuple with appropriate data we got from the segDBs */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* * For each column, extract out the value (which comes out in text). * Convert it to the appropriate datatype to match our tupledesc, * and put that in values. * The columns look like this (from select statement earlier): * * " (locktype text, database oid, relation oid, page int4, tuple int2," * " transactionid xid, classid oid, objid oid, objsubid int2," * " transaction xid, pid int4, mode text, granted boolean, " * " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) ," */ values[0] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 0)); values[1] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 1))); values[2] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 2))); values[3] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 3))); values[4] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 4))); values[5] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 5))); values[6] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 6))); values[7] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 7))); values[8] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 8))); values[9] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 9))); values[10] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,10))); values[11] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow,11)); values[12] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,12),"t",1)==0); values[13] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,13))); values[14] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,14),"t",1)==0); values[15] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,15))); /* * Copy the null info over. It should all match properly. */ for (i=0; i<16; i++) { nulls[i] = PQgetisnull(mystatus->segresults[whichresultset], whichrow, i); } tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * if we dispatched to the segDBs, free up the memory holding the result sets. * Otherwise we might leak this memory each time we got called (does it automatically * get freed by the pool being deleted? Probably, but this is safer). */ if (mystatus->segresults != NULL) { int i; for (i = 0; i < mystatus->numsegresults; i++) PQclear(mystatus->segresults[i]); free(mystatus->segresults); } SRF_RETURN_DONE(funcctx); }
static bool FindLockCycleRecurse(PGPROC *checkProc, int depth, EDGE *softEdges, /* output argument */ int *nSoftEdges) /* output argument */ { PGPROC *proc; LOCK *lock; PROCLOCK *proclock; SHM_QUEUE *procLocks; LockMethod lockMethodTable; PROC_QUEUE *waitQueue; int queue_size; int conflictMask; int i; int numLockModes, lm; /* * Have we already seen this proc? */ for (i = 0; i < nVisitedProcs; i++) { if (visitedProcs[i] == checkProc) { /* If we return to starting point, we have a deadlock cycle */ if (i == 0) { /* * record total length of cycle --- outer levels will now fill * deadlockDetails[] */ Assert(depth <= MaxBackends); nDeadlockDetails = depth; return true; } /* * Otherwise, we have a cycle but it does not include the start * point, so say "no deadlock". */ return false; } } /* Mark proc as seen */ Assert(nVisitedProcs < MaxBackends); visitedProcs[nVisitedProcs++] = checkProc; /* * If the proc is not waiting, we have no outgoing waits-for edges. */ if (checkProc->links.next == NULL) return false; lock = checkProc->waitLock; if (lock == NULL) return false; lockMethodTable = GetLocksMethodTable(lock); numLockModes = lockMethodTable->numLockModes; conflictMask = lockMethodTable->conflictTab[checkProc->waitLockMode]; /* * Scan for procs that already hold conflicting locks. These are "hard" * edges in the waits-for graph. */ procLocks = &(lock->procLocks); proclock = (PROCLOCK *) SHMQueueNext(procLocks, procLocks, offsetof(PROCLOCK, lockLink)); while (proclock) { proc = proclock->tag.myProc; /* A proc never blocks itself */ if (proc != checkProc) { for (lm = 1; lm <= numLockModes; lm++) { if ((proclock->holdMask & LOCKBIT_ON(lm)) && (conflictMask & LOCKBIT_ON(lm))) { /* * Look for a blocking autovacuum. There can be more than * one in the deadlock cycle, in which case we just pick a * random one. We stash the autovacuum worker's PGPROC so * that the caller can send a cancel signal to it, if * appropriate. * * Note we read vacuumFlags without any locking. This is * OK only for checking the PROC_IS_AUTOVACUUM flag, * because that flag is set at process start and never * reset; there is logic elsewhere to avoid cancelling an * autovacuum that is working for preventing Xid * wraparound problems (which needs to read a different * vacuumFlag bit), but we don't do that here to avoid * grabbing ProcArrayLock. */ if (proc->vacuumFlags & PROC_IS_AUTOVACUUM) blocking_autovacuum_proc = proc; /* This proc hard-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, softEdges, nSoftEdges)) { /* fill deadlockDetails[] */ DEADLOCK_INFO *info = &deadlockDetails[depth]; info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; return true; } /* If no deadlock, we're done looking at this proclock */ break; } } } proclock = (PROCLOCK *) SHMQueueNext(procLocks, &proclock->lockLink, offsetof(PROCLOCK, lockLink)); } /* * Scan for procs that are ahead of this one in the lock's wait queue. * Those that have conflicting requests soft-block this one. This must be * done after the hard-block search, since if another proc both hard- and * soft-blocks this one, we want to call it a hard edge. * * If there is a proposed re-ordering of the lock's wait order, use that * rather than the current wait order. */ for (i = 0; i < nWaitOrders; i++) { if (waitOrders[i].lock == lock) break; } if (i < nWaitOrders) { /* Use the given hypothetical wait queue order */ PGPROC **procs = waitOrders[i].procs; queue_size = waitOrders[i].nProcs; for (i = 0; i < queue_size; i++) { proc = procs[i]; /* Done when we reach the target proc */ if (proc == checkProc) break; /* Is there a conflict with this guy's request? */ if (((1 << proc->waitLockMode) & conflictMask) != 0) { /* This proc soft-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, softEdges, nSoftEdges)) { /* fill deadlockDetails[] */ DEADLOCK_INFO *info = &deadlockDetails[depth]; info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; /* * Add this edge to the list of soft edges in the cycle */ Assert(*nSoftEdges < MaxBackends); softEdges[*nSoftEdges].waiter = checkProc; softEdges[*nSoftEdges].blocker = proc; (*nSoftEdges)++; return true; } } } } else { /* Use the true lock wait queue order */ waitQueue = &(lock->waitProcs); queue_size = waitQueue->size; proc = (PGPROC *) waitQueue->links.next; while (queue_size-- > 0) { /* Done when we reach the target proc */ if (proc == checkProc) break; /* Is there a conflict with this guy's request? */ if (((1 << proc->waitLockMode) & conflictMask) != 0) { /* This proc soft-blocks checkProc */ if (FindLockCycleRecurse(proc, depth + 1, softEdges, nSoftEdges)) { /* fill deadlockDetails[] */ DEADLOCK_INFO *info = &deadlockDetails[depth]; info->locktag = lock->tag; info->lockmode = checkProc->waitLockMode; info->pid = checkProc->pid; /* * Add this edge to the list of soft edges in the cycle */ Assert(*nSoftEdges < MaxBackends); softEdges[*nSoftEdges].waiter = checkProc; softEdges[*nSoftEdges].blocker = proc; (*nSoftEdges)++; return true; } } proc = (PGPROC *) proc->links.next; } } /* * No conflict detected here. */ return false; }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(16, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "transaction", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "granted", BOOLOID, -1, 0); /* * These next columns are specific to GPDB */ TupleDescInitEntry(tupdesc, (AttrNumber) 14, "mppSessionId", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "mppIsWriter", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 16, "gp_segment_id", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->numSegLocks = 0; mystatus->numsegresults = 0; mystatus->segresults = NULL; /* * Seeing the locks just from the masterDB isn't enough to know what is locked, * or if there is a deadlock. That's because the segDBs also take locks. * Some locks show up only on the master, some only on the segDBs, and some on both. * * So, let's collect the lock information from all the segDBs. Sure, this means * there are a lot more rows coming back from pg_locks than before, since most locks * on the segDBs happen across all the segDBs at the same time. But not always, * so let's play it safe and get them all. */ if (Gp_role == GP_ROLE_DISPATCH) { int resultCount = 0; struct pg_result **results = NULL; StringInfoData buffer; StringInfoData errbuf; int i; initStringInfo(&buffer); /* * This query has to match the tupledesc we just made above. */ appendStringInfo(&buffer, "SELECT * FROM pg_lock_status() L " " (locktype text, database oid, relation oid, page int4, tuple int2," " transactionid xid, classid oid, objid oid, objsubid int2," " transaction xid, pid int4, mode text, granted boolean, " " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) "); initStringInfo(&errbuf); /* * Why dispatch something here, rather than do a UNION ALL in pg_locks view, and * a join to gp_dist_random('gp_id')? There are several important reasons. * * The union all method is much slower, and requires taking locks on gp_id. * More importantly, applications such as pgAdmin do queries of this view that * involve a correlated subqueries joining to other catalog tables, * which works if we do it this way, but fails * if the view includes the union all. That completely breaks the server status * display in pgAdmin. * * Why dispatch this way, rather than via SPI? There are several advantages. * First, it's easy to get "writer gang is busy" errors if we use SPI. * * Second, this should be much faster, as it doesn't require setting up * the interconnect, and doesn't need to touch any actual data tables to be * able to get the gp_segment_id. * * The downside is we get n result sets, where n == number of segDBs. * * It would be better yet if we sent a plan tree rather than a text string, * so the segDBs don't need to parse it. That would also avoid taking any relation locks * on the segDB to get this info (normally need to get an accessShareLock on pg_locks on the segDB * to make sure it doesn't go away during parsing). But the only safe way I know to do this * is to hand-build the plan tree, and I'm to lazy to do it right now. It's just a matter of * building a function scan node, and filling it in with our result set info (from the tupledesc). * * One thing to note: it's OK to join pg_locks with any catalog table or master-only table, * but joining to a distributed table will result in "writer gang busy: possible attempt to * execute volatile function in unsupported context" errors, because * the scan of the distributed table might already be running on the writer gang * when we want to dispatch this. * * This could be fixed by allocating a reader gang and dispatching to that, but the cost * of setting up a new gang is high, and I've never seen anyone need to join this to a * distributed table. * */ results = cdbdisp_dispatchRMCommand(buffer.data, true, &errbuf, &resultCount); if (errbuf.len > 0) ereport(ERROR, (errmsg("pg_lock internal error (gathered %d results from cmd '%s')", resultCount, buffer.data), errdetail("%s", errbuf.data))); /* * I don't think resultCount can ever be zero if errbuf isn't set. * But check to be sure. */ if (resultCount == 0) elog(ERROR, "pg_locks didn't get back any data from the segDBs"); for (i = 0; i < resultCount; i++) { /* * Any error here should have propagated into errbuf, so we shouldn't * ever see anything other that tuples_ok here. But, check to be * sure. */ if (PQresultStatus(results[i]) != PGRES_TUPLES_OK) { elog(ERROR,"pg_locks: resultStatus not tuples_Ok"); } else { /* * numSegLocks needs to be the total size we are returning to * the application. At the start of this loop, it has the count * for the masterDB locks. Add each of the segDB lock counts. */ mystatus->numSegLocks += PQntuples(results[i]); } } pfree(errbuf.data); mystatus->numsegresults = resultCount; /* * cdbdisp_dispatchRMCommand copies the result sets into our memory, which * will still exist on the subsequent calls. */ mystatus->segresults = results; MemoryContextSwitchTo(oldcontext); } } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; /* * This loop returns all the local lock data from the segment we are running on. */ while (mystatus->currIdx < lockData->nelements) { PROCLOCK *proclock; LOCK *lock; PGPROC *proc; bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[16]; bool nulls[16]; HeapTuple tuple; Datum result; proclock = &(lockData->proclocks[mystatus->currIdx]); lock = &(lockData->locks[mystatus->currIdx]); proc = &(lockData->procs[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (proclock->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (proclock->holdMask & LOCKBIT_ON(mode)) { granted = true; proclock->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (proc->waitLock == proclock->tag.myLock) { /* Yes, so report it with proper mode */ mode = proc->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (lock->tag.locktag_type <= LOCKTAG_ADVISORY) locktypename = LockTagTypeNames[lock->tag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) lock->tag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch (lock->tag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: case LOCKTAG_RELATION_RESYNCHRONIZE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); values[4] = UInt16GetDatum(lock->tag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_TRANSACTION: values[5] = TransactionIdGetDatum(lock->tag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; break; case LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_RESOURCE_QUEUE: values[1] = ObjectIdGetDatum(proc->databaseId); values[7] = ObjectIdGetDatum(lock->tag.locktag_field1); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[6] = ObjectIdGetDatum(lock->tag.locktag_field2); values[7] = ObjectIdGetDatum(lock->tag.locktag_field3); values[8] = Int16GetDatum(lock->tag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; break; } values[9] = TransactionIdGetDatum(proc->xid); if (proc->pid != 0) values[10] = Int32GetDatum(proc->pid); else nulls[10] = true; values[11] = DirectFunctionCall1(textin, CStringGetDatum((char *) GetLockmodeName(LOCK_LOCKMETHOD(*lock), mode))); values[12] = BoolGetDatum(granted); values[13] = Int32GetDatum(proc->mppSessionId); values[14] = Int32GetDatum(proc->mppIsWriter); values[15] = Int32GetDatum(Gp_segment); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * This loop only executes on the masterDB and only in dispatch mode, because that * is the only time we dispatched to the segDBs. */ while (mystatus->currIdx >= lockData->nelements && mystatus->currIdx < lockData->nelements + mystatus->numSegLocks) { HeapTuple tuple; Datum result; Datum values[16]; bool nulls[16]; int i; int whichresultset = 0; int whichelement = mystatus->currIdx - lockData->nelements; int whichrow = whichelement; Assert(Gp_role == GP_ROLE_DISPATCH); /* * Because we have one result set per segDB (rather than one big result set with everything), * we need to figure out which result set we are on, and which row within that result set * we are returning. * * So, we walk through all the result sets and all the rows in each one, in order. */ while(whichrow >= PQntuples(mystatus->segresults[whichresultset])) { whichrow -= PQntuples(mystatus->segresults[whichresultset]); whichresultset++; if (whichresultset >= mystatus->numsegresults) break; } /* * If this condition is true, we have already sent everything back, * and we just want to do the SRF_RETURN_DONE */ if (whichresultset >= mystatus->numsegresults) break; mystatus->currIdx++; /* * Form tuple with appropriate data we got from the segDBs */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* * For each column, extract out the value (which comes out in text). * Convert it to the appropriate datatype to match our tupledesc, * and put that in values. * The columns look like this (from select statement earlier): * * " (locktype text, database oid, relation oid, page int4, tuple int2," * " transactionid xid, classid oid, objid oid, objsubid int2," * " transaction xid, pid int4, mode text, granted boolean, " * " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) ," */ values[0] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 0)); values[1] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 1))); values[2] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 2))); values[3] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 3))); values[4] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 4))); values[5] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 5))); values[6] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 6))); values[7] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 7))); values[8] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 8))); values[9] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 9))); values[10] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,10))); values[11] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow,11)); values[12] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,12),"t",1)==0); values[13] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,13))); values[14] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,14),"t",1)==0); values[15] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,15))); /* * Copy the null info over. It should all match properly. */ for (i=0; i<16; i++) { nulls[i] = PQgetisnull(mystatus->segresults[whichresultset], whichrow, i); } tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * if we dispatched to the segDBs, free up the memory holding the result sets. * Otherwise we might leak this memory each time we got called (does it automatically * get freed by the pool being deleted? Probably, but this is safer). */ if (mystatus->segresults != NULL) { int i; for (i = 0; i < mystatus->numsegresults; i++) PQclear(mystatus->segresults[i]); free(mystatus->segresults); } SRF_RETURN_DONE(funcctx); }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; PredicateLockData *predLockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(NUM_LOCK_STATUS_COLUMNS, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 15, "fastpath", BOOLOID, -1, 0); /* * These next columns are specific to GPDB */ TupleDescInitEntry(tupdesc, (AttrNumber) 16, "mppSessionId", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 17, "mppIsWriter", BOOLOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 18, "gp_segment_id", INT4OID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; mystatus->predLockData = GetPredicateLockStatusData(); mystatus->predLockIdx = 0; mystatus->numSegLocks = 0; mystatus->numsegresults = 0; mystatus->segresults = NULL; /* * Seeing the locks just from the masterDB isn't enough to know what is locked, * or if there is a deadlock. That's because the segDBs also take locks. * Some locks show up only on the master, some only on the segDBs, and some on both. * * So, let's collect the lock information from all the segDBs. Sure, this means * there are a lot more rows coming back from pg_locks than before, since most locks * on the segDBs happen across all the segDBs at the same time. But not always, * so let's play it safe and get them all. */ if (Gp_role == GP_ROLE_DISPATCH) { CdbPgResults cdb_pgresults = {NULL, 0}; StringInfoData buffer; int i; initStringInfo(&buffer); /* * Why dispatch something here, rather than do a UNION ALL in pg_locks view, and * a join to gp_dist_random('gp_id')? There are several important reasons. * * The union all method is much slower, and requires taking locks on gp_id. * More importantly, applications such as pgAdmin do queries of this view that * involve a correlated subqueries joining to other catalog tables, * which works if we do it this way, but fails * if the view includes the union all. That completely breaks the server status * display in pgAdmin. * * Why dispatch this way, rather than via SPI? There are several advantages. * First, it's easy to get "writer gang is busy" errors if we use SPI. * * Second, this should be much faster, as it doesn't require setting up * the interconnect, and doesn't need to touch any actual data tables to be * able to get the gp_segment_id. * * The downside is we get n result sets, where n == number of segDBs. * * It would be better yet if we sent a plan tree rather than a text string, * so the segDBs don't need to parse it. That would also avoid taking any relation locks * on the segDB to get this info (normally need to get an accessShareLock on pg_locks on the segDB * to make sure it doesn't go away during parsing). But the only safe way I know to do this * is to hand-build the plan tree, and I'm to lazy to do it right now. It's just a matter of * building a function scan node, and filling it in with our result set info (from the tupledesc). * * One thing to note: it's OK to join pg_locks with any catalog table or master-only table, * but joining to a distributed table will result in "writer gang busy: possible attempt to * execute volatile function in unsupported context" errors, because * the scan of the distributed table might already be running on the writer gang * when we want to dispatch this. * * This could be fixed by allocating a reader gang and dispatching to that, but the cost * of setting up a new gang is high, and I've never seen anyone need to join this to a * distributed table. * * GPDB_84_MERGE_FIXME: Should we rewrite this in a different way now that we have * ON SEGMENT/ ON MASTER attributes on functions? */ CdbDispatchCommand("SELECT * FROM pg_catalog.pg_lock_status()", DF_WITH_SNAPSHOT, &cdb_pgresults); if (cdb_pgresults.numResults == 0) elog(ERROR, "pg_locks didn't get back any data from the segDBs"); for (i = 0; i < cdb_pgresults.numResults; i++) { /* * Any error here should have propagated into errbuf, so we shouldn't * ever see anything other that tuples_ok here. But, check to be * sure. */ if (PQresultStatus(cdb_pgresults.pg_results[i]) != PGRES_TUPLES_OK) { cdbdisp_clearCdbPgResults(&cdb_pgresults); elog(ERROR,"pg_locks: resultStatus not tuples_Ok"); } /* * numSegLocks needs to be the total size we are returning to * the application. At the start of this loop, it has the count * for the masterDB locks. Add each of the segDB lock counts. */ mystatus->numSegLocks += PQntuples(cdb_pgresults.pg_results[i]); /* * This query better match the tupledesc we just made above. */ if (PQnfields(cdb_pgresults.pg_results[i]) != tupdesc->natts) elog(ERROR, "unexpected number of columns returned from pg_lock_status() on segment (%d, expected %d)", PQnfields(cdb_pgresults.pg_results[i]), tupdesc->natts); } mystatus->numsegresults = cdb_pgresults.numResults; /* * cdbdisp_dispatchRMCommand copies the result sets into our memory, which * will still exist on the subsequent calls. */ mystatus->segresults = cdb_pgresults.pg_results; } MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; /* * This loop returns all the local lock data from the segment we are running on. */ while (mystatus->currIdx < lockData->nelements) { bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; LockInstanceData *instance; instance = &(lockData->locks[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (instance->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (instance->holdMask & LOCKBIT_ON(mode)) { granted = true; instance->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (instance->waitLockMode != NoLock) { /* Yes, so report it with proper mode */ mode = instance->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); if (instance->locktag.locktag_type <= LOCKTAG_LAST_TYPE) locktypename = LockTagTypeNames[instance->locktag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) instance->locktag.locktag_type); locktypename = tnbuf; } values[0] = CStringGetTextDatum(locktypename); switch ((LockTagType) instance->locktag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[3] = UInt32GetDatum(instance->locktag.locktag_field3); values[4] = UInt16GetDatum(instance->locktag.locktag_field4); nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_TRANSACTION: values[6] = TransactionIdGetDatum(instance->locktag.locktag_field1); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(instance->locktag.locktag_field1, instance->locktag.locktag_field2); nulls[1] = true; nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[6] = true; nulls[7] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_RELATION_APPENDONLY_SEGMENT_FILE: values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[2] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[7] = ObjectIdGetDatum(instance->locktag.locktag_field3); nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[8] = true; nulls[9] = true; break; case LOCKTAG_RESOURCE_QUEUE: #if 0 values[1] = ObjectIdGetDatum(proc->databaseId); #endif nulls[1] = true; values[8] = ObjectIdGetDatum(instance->locktag.locktag_field1); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; nulls[7] = true; nulls[9] = true; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(instance->locktag.locktag_field1); values[7] = ObjectIdGetDatum(instance->locktag.locktag_field2); values[8] = ObjectIdGetDatum(instance->locktag.locktag_field3); values[9] = Int16GetDatum(instance->locktag.locktag_field4); nulls[2] = true; nulls[3] = true; nulls[4] = true; nulls[5] = true; nulls[6] = true; break; } values[10] = VXIDGetDatum(instance->backend, instance->lxid); if (instance->pid != 0) values[11] = Int32GetDatum(instance->pid); else nulls[11] = true; values[12] = CStringGetTextDatum(GetLockmodeName(instance->locktag.locktag_lockmethodid, mode)); values[13] = BoolGetDatum(granted); values[14] = BoolGetDatum(instance->fastpath); values[15] = Int32GetDatum(instance->mppSessionId); values[16] = BoolGetDatum(instance->mppIsWriter); values[17] = Int32GetDatum(GpIdentity.segindex); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * This loop only executes on the masterDB and only in dispatch mode, because that * is the only time we dispatched to the segDBs. */ while (mystatus->currIdx >= lockData->nelements && mystatus->currIdx < lockData->nelements + mystatus->numSegLocks) { HeapTuple tuple; Datum result; Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; int i; int whichresultset = 0; int whichelement = mystatus->currIdx - lockData->nelements; int whichrow = whichelement; Assert(Gp_role == GP_ROLE_DISPATCH); /* * Because we have one result set per segDB (rather than one big result set with everything), * we need to figure out which result set we are on, and which row within that result set * we are returning. * * So, we walk through all the result sets and all the rows in each one, in order. */ while(whichrow >= PQntuples(mystatus->segresults[whichresultset])) { whichrow -= PQntuples(mystatus->segresults[whichresultset]); whichresultset++; if (whichresultset >= mystatus->numsegresults) break; } /* * If this condition is true, we have already sent everything back, * and we just want to do the SRF_RETURN_DONE */ if (whichresultset >= mystatus->numsegresults) break; mystatus->currIdx++; /* * Form tuple with appropriate data we got from the segDBs */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* * For each column, extract out the value (which comes out in text). * Convert it to the appropriate datatype to match our tupledesc, * and put that in values. * The columns look like this (from select statement earlier): * * " (locktype text, database oid, relation oid, page int4, tuple int2," * " transactionid xid, classid oid, objid oid, objsubid int2," * " transaction xid, pid int4, mode text, granted boolean, " * " mppSessionId int4, mppIsWriter boolean, gp_segment_id int4) ," */ values[0] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 0)); values[1] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 1))); values[2] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 2))); values[3] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 3))); values[4] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 4))); values[5] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 5)); values[6] = TransactionIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 6))); values[7] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 7))); values[8] = ObjectIdGetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 8))); values[9] = UInt16GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 9))); values[10] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 10)); values[11] = UInt32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 11))); values[12] = CStringGetTextDatum(PQgetvalue(mystatus->segresults[whichresultset], whichrow, 12)); values[13] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,13),"t",1)==0); values[14] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,14),"t",1)==0); values[15] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,15))); values[16] = BoolGetDatum(strncmp(PQgetvalue(mystatus->segresults[whichresultset], whichrow,16),"t",1)==0); values[17] = Int32GetDatum(atoi(PQgetvalue(mystatus->segresults[whichresultset], whichrow,17))); /* * Copy the null info over. It should all match properly. */ for (i = 0; i < NUM_LOCK_STATUS_COLUMNS; i++) { nulls[i] = PQgetisnull(mystatus->segresults[whichresultset], whichrow, i); } tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * Have returned all regular locks. Now start on the SIREAD predicate * locks. */ predLockData = mystatus->predLockData; if (mystatus->predLockIdx < predLockData->nelements) { PredicateLockTargetType lockType; PREDICATELOCKTARGETTAG *predTag = &(predLockData->locktags[mystatus->predLockIdx]); SERIALIZABLEXACT *xact = &(predLockData->xacts[mystatus->predLockIdx]); Datum values[NUM_LOCK_STATUS_COLUMNS]; bool nulls[NUM_LOCK_STATUS_COLUMNS]; HeapTuple tuple; Datum result; mystatus->predLockIdx++; /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, false, sizeof(nulls)); /* lock type */ lockType = GET_PREDICATELOCKTARGETTAG_TYPE(*predTag); values[0] = CStringGetTextDatum(PredicateLockTagTypeNames[lockType]); /* lock target */ values[1] = GET_PREDICATELOCKTARGETTAG_DB(*predTag); values[2] = GET_PREDICATELOCKTARGETTAG_RELATION(*predTag); if (lockType == PREDLOCKTAG_TUPLE) values[4] = GET_PREDICATELOCKTARGETTAG_OFFSET(*predTag); else nulls[4] = true; if ((lockType == PREDLOCKTAG_TUPLE) || (lockType == PREDLOCKTAG_PAGE)) values[3] = GET_PREDICATELOCKTARGETTAG_PAGE(*predTag); else nulls[3] = true; /* these fields are targets for other types of locks */ nulls[5] = true; /* virtualxid */ nulls[6] = true; /* transactionid */ nulls[7] = true; /* classid */ nulls[8] = true; /* objid */ nulls[9] = true; /* objsubid */ /* lock holder */ values[10] = VXIDGetDatum(xact->vxid.backendId, xact->vxid.localTransactionId); if (xact->pid != 0) values[11] = Int32GetDatum(xact->pid); else nulls[11] = true; /* * Lock mode. Currently all predicate locks are SIReadLocks, which are * always held (never waiting) and have no fast path */ values[12] = CStringGetTextDatum("SIReadLock"); values[13] = BoolGetDatum(true); values[14] = BoolGetDatum(false); /* * GPDB_91_MERGE_FIXME: what to set these GPDB-specific fields to? * These commented-out values are copy-pasted from the code above * for normal locks. */ //values[14] = Int32GetDatum(proc->mppSessionId); //values[15] = BoolGetDatum(proc->mppIsWriter); //values[16] = Int32GetDatum(Gp_segment); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } /* * if we dispatched to the segDBs, free up the memory holding the result sets. * Otherwise we might leak this memory each time we got called (does it automatically * get freed by the pool being deleted? Probably, but this is safer). */ if (mystatus->segresults != NULL) { int i; for (i = 0; i < mystatus->numsegresults; i++) PQclear(mystatus->segresults[i]); free(mystatus->segresults); } SRF_RETURN_DONE(funcctx); }
/* * ProcSleep -- put a process to sleep on the specified lock * * Caller must have set MyProc->heldLocks to reflect locks already held * on the lockable object by this process (under all XIDs). * * The lock table's partition lock must be held at entry, and will be held * at exit. * * Result: STATUS_OK if we acquired the lock, STATUS_ERROR if not (deadlock). * * ASSUME: that no one will fiddle with the queue until after * we release the partition lock. * * NOTES: The process queue is now a priority queue for locking. * * P() on the semaphore should put us to sleep. The process * semaphore is normally zero, so when we try to acquire it, we sleep. */ int ProcSleep(LOCALLOCK *locallock, LockMethod lockMethodTable) { LOCKMODE lockmode = locallock->tag.mode; LOCK *lock = locallock->lock; PROCLOCK *proclock = locallock->proclock; uint32 hashcode = locallock->hashcode; LWLockId partitionLock = LockHashPartitionLock(hashcode); PROC_QUEUE *waitQueue = &(lock->waitProcs); LOCKMASK myHeldLocks = MyProc->heldLocks; bool early_deadlock = false; PGPROC *proc; int i; /* * Determine where to add myself in the wait queue. * * Normally I should go at the end of the queue. However, if I already * hold locks that conflict with the request of any previous waiter, put * myself in the queue just in front of the first such waiter. This is not * a necessary step, since deadlock detection would move me to before that * waiter anyway; but it's relatively cheap to detect such a conflict * immediately, and avoid delaying till deadlock timeout. * * Special case: if I find I should go in front of some waiter, check to * see if I conflict with already-held locks or the requests before that * waiter. If not, then just grant myself the requested lock immediately. * This is the same as the test for immediate grant in LockAcquire, except * we are only considering the part of the wait queue before my insertion * point. */ if (myHeldLocks != 0) { LOCKMASK aheadRequests = 0; proc = (PGPROC *) MAKE_PTR(waitQueue->links.next); for (i = 0; i < waitQueue->size; i++) { /* Must he wait for me? */ if (lockMethodTable->conflictTab[proc->waitLockMode] & myHeldLocks) { /* Must I wait for him ? */ if (lockMethodTable->conflictTab[lockmode] & proc->heldLocks) { /* * Yes, so we have a deadlock. Easiest way to clean up * correctly is to call RemoveFromWaitQueue(), but we * can't do that until we are *on* the wait queue. So, set * a flag to check below, and break out of loop. Also, * record deadlock info for later message. */ RememberSimpleDeadLock(MyProc, lockmode, lock, proc); early_deadlock = true; break; } /* I must go before this waiter. Check special case. */ if ((lockMethodTable->conflictTab[lockmode] & aheadRequests) == 0 && LockCheckConflicts(lockMethodTable, lockmode, lock, proclock, MyProc) == STATUS_OK) { /* Skip the wait and just grant myself the lock. */ GrantLock(lock, proclock, lockmode); GrantAwaitedLock(); return STATUS_OK; } /* Break out of loop to put myself before him */ break; } /* Nope, so advance to next waiter */ aheadRequests |= LOCKBIT_ON(proc->waitLockMode); proc = (PGPROC *) MAKE_PTR(proc->links.next); } /* * If we fall out of loop normally, proc points to waitQueue head, so * we will insert at tail of queue as desired. */ } else { /* I hold no locks, so I can't push in front of anyone. */ proc = (PGPROC *) &(waitQueue->links); } /* * Insert self into queue, ahead of the given proc (or at tail of queue). */ SHMQueueInsertBefore(&(proc->links), &(MyProc->links)); waitQueue->size++; lock->waitMask |= LOCKBIT_ON(lockmode); /* Set up wait information in PGPROC object, too */ MyProc->waitLock = lock; MyProc->waitProcLock = proclock; MyProc->waitLockMode = lockmode; MyProc->waitStatus = STATUS_WAITING; /* * If we detected deadlock, give up without waiting. This must agree with * CheckDeadLock's recovery code, except that we shouldn't release the * semaphore since we haven't tried to lock it yet. */ if (early_deadlock) { RemoveFromWaitQueue(MyProc, hashcode); return STATUS_ERROR; } /* mark that we are waiting for a lock */ lockAwaited = locallock; /* * Release the lock table's partition lock. * * NOTE: this may also cause us to exit critical-section state, possibly * allowing a cancel/die interrupt to be accepted. This is OK because we * have recorded the fact that we are waiting for a lock, and so * LockWaitCancel will clean up if cancel/die happens. */ LWLockRelease(partitionLock); /* * Set timer so we can wake up after awhile and check for a deadlock. If a * deadlock is detected, the handler releases the process's semaphore and * sets MyProc->waitStatus = STATUS_ERROR, allowing us to know that we * must report failure rather than success. * * By delaying the check until we've waited for a bit, we can avoid * running the rather expensive deadlock-check code in most cases. */ if (!enable_sig_alarm(DeadlockTimeout, false)) elog(FATAL, "could not set timer for process wakeup"); /* * If someone wakes us between LWLockRelease and PGSemaphoreLock, * PGSemaphoreLock will not block. The wakeup is "saved" by the semaphore * implementation. While this is normally good, there are cases where a * saved wakeup might be leftover from a previous operation (for example, * we aborted ProcWaitForSignal just before someone did ProcSendSignal). * So, loop to wait again if the waitStatus shows we haven't been granted * nor denied the lock yet. * * We pass interruptOK = true, which eliminates a window in which * cancel/die interrupts would be held off undesirably. This is a promise * that we don't mind losing control to a cancel/die interrupt here. We * don't, because we have no shared-state-change work to do after being * granted the lock (the grantor did it all). We do have to worry about * updating the locallock table, but if we lose control to an error, * LockWaitCancel will fix that up. */ do { PGSemaphoreLock(&MyProc->sem, true); } while (MyProc->waitStatus == STATUS_WAITING); /* * Disable the timer, if it's still running */ if (!disable_sig_alarm(false)) elog(FATAL, "could not disable timer for process wakeup"); /* * Re-acquire the lock table's partition lock. We have to do this to hold * off cancel/die interrupts before we can mess with lockAwaited (else we * might have a missed or duplicated locallock update). */ LWLockAcquire(partitionLock, LW_EXCLUSIVE); /* * We no longer want LockWaitCancel to do anything. */ lockAwaited = NULL; /* * If we got the lock, be sure to remember it in the locallock table. */ if (MyProc->waitStatus == STATUS_OK) GrantAwaitedLock(); /* * We don't have to do anything else, because the awaker did all the * necessary update of the lock table and MyProc. */ return MyProc->waitStatus; }
/* * pg_lock_status - produce a view with one row per held or awaited lock mode */ Datum pg_lock_status(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; PG_Lock_Status *mystatus; LockData *lockData; if (SRF_IS_FIRSTCALL()) { TupleDesc tupdesc; MemoryContext oldcontext; /* create a function context for cross-call persistence */ funcctx = SRF_FIRSTCALL_INIT(); /* * switch to memory context appropriate for multiple function calls */ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); /* build tupdesc for result tuples */ /* this had better match pg_locks view in system_views.sql */ tupdesc = CreateTemplateTupleDesc(14, false); TupleDescInitEntry(tupdesc, (AttrNumber) 1, "locktype", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 2, "database", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 3, "relation", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 4, "page", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 5, "tuple", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 6, "virtualxid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 7, "transactionid", XIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 8, "classid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 9, "objid", OIDOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 10, "objsubid", INT2OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 11, "virtualtransaction", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 12, "pid", INT4OID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 13, "mode", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, (AttrNumber) 14, "granted", BOOLOID, -1, 0); funcctx->tuple_desc = BlessTupleDesc(tupdesc); /* * Collect all the locking information that we will format and send * out as a result set. */ mystatus = (PG_Lock_Status *) palloc(sizeof(PG_Lock_Status)); funcctx->user_fctx = (void *) mystatus; mystatus->lockData = GetLockStatusData(); mystatus->currIdx = 0; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); mystatus = (PG_Lock_Status *) funcctx->user_fctx; lockData = mystatus->lockData; while (mystatus->currIdx < lockData->nelements) { PROCLOCK *proclock; LOCK *lock; PGPROC *proc; bool granted; LOCKMODE mode = 0; const char *locktypename; char tnbuf[32]; Datum values[14]; char nulls[14]; HeapTuple tuple; Datum result; proclock = &(lockData->proclocks[mystatus->currIdx]); lock = &(lockData->locks[mystatus->currIdx]); proc = &(lockData->procs[mystatus->currIdx]); /* * Look to see if there are any held lock modes in this PROCLOCK. If * so, report, and destructively modify lockData so we don't report * again. */ granted = false; if (proclock->holdMask) { for (mode = 0; mode < MAX_LOCKMODES; mode++) { if (proclock->holdMask & LOCKBIT_ON(mode)) { granted = true; proclock->holdMask &= LOCKBIT_OFF(mode); break; } } } /* * If no (more) held modes to report, see if PROC is waiting for a * lock on this lock. */ if (!granted) { if (proc->waitLock == proclock->tag.myLock) { /* Yes, so report it with proper mode */ mode = proc->waitLockMode; /* * We are now done with this PROCLOCK, so advance pointer to * continue with next one on next call. */ mystatus->currIdx++; } else { /* * Okay, we've displayed all the locks associated with this * PROCLOCK, proceed to the next one. */ mystatus->currIdx++; continue; } } /* * Form tuple with appropriate data. */ MemSet(values, 0, sizeof(values)); MemSet(nulls, ' ', sizeof(nulls)); if (lock->tag.locktag_type <= LOCKTAG_LAST_TYPE) locktypename = LockTagTypeNames[lock->tag.locktag_type]; else { snprintf(tnbuf, sizeof(tnbuf), "unknown %d", (int) lock->tag.locktag_type); locktypename = tnbuf; } values[0] = DirectFunctionCall1(textin, CStringGetDatum(locktypename)); switch ((LockTagType) lock->tag.locktag_type) { case LOCKTAG_RELATION: case LOCKTAG_RELATION_EXTEND: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); nulls[3] = 'n'; nulls[4] = 'n'; nulls[5] = 'n'; nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; nulls[9] = 'n'; break; case LOCKTAG_PAGE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); nulls[4] = 'n'; nulls[5] = 'n'; nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; nulls[9] = 'n'; break; case LOCKTAG_TUPLE: values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[2] = ObjectIdGetDatum(lock->tag.locktag_field2); values[3] = UInt32GetDatum(lock->tag.locktag_field3); values[4] = UInt16GetDatum(lock->tag.locktag_field4); nulls[5] = 'n'; nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; nulls[9] = 'n'; break; case LOCKTAG_TRANSACTION: values[6] = TransactionIdGetDatum(lock->tag.locktag_field1); nulls[1] = 'n'; nulls[2] = 'n'; nulls[3] = 'n'; nulls[4] = 'n'; nulls[5] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; nulls[9] = 'n'; break; case LOCKTAG_VIRTUALTRANSACTION: values[5] = VXIDGetDatum(lock->tag.locktag_field1, lock->tag.locktag_field2); nulls[1] = 'n'; nulls[2] = 'n'; nulls[3] = 'n'; nulls[4] = 'n'; nulls[6] = 'n'; nulls[7] = 'n'; nulls[8] = 'n'; nulls[9] = 'n'; break; case LOCKTAG_OBJECT: case LOCKTAG_USERLOCK: case LOCKTAG_ADVISORY: default: /* treat unknown locktags like OBJECT */ values[1] = ObjectIdGetDatum(lock->tag.locktag_field1); values[7] = ObjectIdGetDatum(lock->tag.locktag_field2); values[8] = ObjectIdGetDatum(lock->tag.locktag_field3); values[9] = Int16GetDatum(lock->tag.locktag_field4); nulls[2] = 'n'; nulls[3] = 'n'; nulls[4] = 'n'; nulls[5] = 'n'; nulls[6] = 'n'; break; } values[10] = VXIDGetDatum(proc->backendId, proc->lxid); if (proc->pid != 0) values[11] = Int32GetDatum(proc->pid); else nulls[11] = 'n'; values[12] = DirectFunctionCall1(textin, CStringGetDatum(GetLockmodeName(LOCK_LOCKMETHOD(*lock), mode))); values[13] = BoolGetDatum(granted); tuple = heap_formtuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } SRF_RETURN_DONE(funcctx); }