/* * pgstat_heap -- returns live/dead tuples info in a heap */ static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo) { HeapScanDesc scan; HeapTuple tuple; BlockNumber nblocks; BlockNumber block = 0; /* next block to count free space in */ BlockNumber tupblock; Buffer buffer; pgstattuple_type stat = {0}; SnapshotData SnapshotDirty; /* Disable syncscan because we assume we scan from block zero upwards */ scan = heap_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); InitDirtySnapshot(SnapshotDirty); nblocks = scan->rs_nblocks; /* # blocks to be scanned */ /* scan the relation */ while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { CHECK_FOR_INTERRUPTS(); /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ LockBuffer(scan->rs_cbuf, BUFFER_LOCK_SHARE); if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, scan->rs_cbuf)) { stat.tuple_len += tuple->t_len; stat.tuple_count++; } else { stat.dead_tuple_len += tuple->t_len; stat.dead_tuple_count++; } LockBuffer(scan->rs_cbuf, BUFFER_LOCK_UNLOCK); /* * To avoid physically reading the table twice, try to do the * free-space scan in parallel with the heap scan. However, * heap_getnext may find no tuples on a given page, so we cannot * simply examine the pages returned by the heap scan. */ tupblock = BlockIdGetBlockNumber(&tuple->t_self.ip_blkid); while (block <= tupblock) { CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, scan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); block++; } } while (block < nblocks) { CHECK_FOR_INTERRUPTS(); buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, RBM_NORMAL, scan->rs_strategy); LockBuffer(buffer, BUFFER_LOCK_SHARE); stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); UnlockReleaseBuffer(buffer); block++; } heap_endscan(scan); relation_close(rel, AccessShareLock); stat.table_len = (uint64) nblocks *BLCKSZ; return build_pgstattuple_type(&stat, fcinfo); }
/* * Search the relation 'rel' for tuple using the sequential scan. * * If a matching tuple is found, lock it with lockmode, fill the slot with its * contents, and return true. Return false otherwise. * * Note that this stops on the first matching tuple. * * This can obviously be quite slow on tables that have more than few rows. */ bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; HeapScanDesc scan; SnapshotData snap; TransactionId xwait; bool found; TupleDesc desc = RelationGetDescr(rel); Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor)); /* Start an index scan. */ InitDirtySnapshot(snap); scan = heap_beginscan(rel, &snap, 0, NULL); retry: found = false; heap_rescan(scan, NULL); /* Try to find the tuple */ while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { if (!tuple_equals_slot(desc, scantuple, searchslot)) continue; found = true; ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; /* * If the tuple is locked, wait for locking transaction to finish and * retry. */ if (TransactionIdIsValid(xwait)) { XactLockTableWait(xwait, NULL, NULL, XLTW_None); goto retry; } } /* Found tuple, try to lock it in the lockmode. */ if (found) { Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; HeapTupleData locktup; ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), lockmode, LockWaitBlock, false /* don't follow updates */ , &buf, &hufd); /* the tuple slot already has the buffer pinned */ ReleaseBuffer(buf); PopActiveSnapshot(); switch (res) { case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: /* XXX: Improve handling here */ ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); default: elog(ERROR, "unexpected heap_lock_tuple status: %u", res); break; } } heap_endscan(scan); return found; }
/* * GetNewSequenceRelationOid * Get a sequence relation Oid and verify it is valid against * the pg_class relation by doing an index lookup. The caller * should have a suitable lock on pg_class. */ Oid GetNewSequenceRelationOid(Relation relation) { Oid newOid; Oid oidIndex; Relation indexrel; SnapshotData SnapshotDirty; IndexScanDesc scan; ScanKeyData key; bool collides; RelFileNode rnode; char *rpath; int fd; /* This should match RelationInitPhysicalAddr */ rnode.spcNode = relation->rd_rel->reltablespace ? relation->rd_rel->reltablespace : MyDatabaseTableSpace; rnode.dbNode = relation->rd_rel->relisshared ? InvalidOid : MyDatabaseId; /* We should only be using pg_class */ Assert(RelationGetRelid(relation) == RelationRelationId); /* The relcache will cache the identity of the OID index for us */ oidIndex = RelationGetOidIndex(relation); /* Otherwise, use the index to find a nonconflicting OID */ indexrel = index_open(oidIndex, AccessShareLock); InitDirtySnapshot(SnapshotDirty); /* Generate new sequence relation OIDs until we find one not in the table */ do { CHECK_FOR_INTERRUPTS(); newOid = GetNewSequenceRelationObjectId(); ScanKeyInit(&key, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(newOid)); /* see notes above about using SnapshotDirty */ scan = index_beginscan(relation, indexrel, &SnapshotDirty, 1, &key); collides = HeapTupleIsValid(index_getnext(scan, ForwardScanDirection)); index_endscan(scan); if (!collides) { /* Check for existing file of same name */ rpath = relpath(rnode); fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0); if (fd >= 0) { /* definite collision */ gp_retry_close(fd); collides = true; } else { /* * Here we have a little bit of a dilemma: if errno is something * other than ENOENT, should we declare a collision and loop? In * particular one might think this advisable for, say, EPERM. * However there really shouldn't be any unreadable files in a * tablespace directory, and if the EPERM is actually complaining * that we can't read the directory itself, we'd be in an infinite * loop. In practice it seems best to go ahead regardless of the * errno. If there is a colliding file we will get an smgr * failure when we attempt to create the new relation file. */ collides = false; } } /* * Also check that the OID hasn't been pre-assigned for a different * relation. * * We're a bit sloppy between OIDs and relfilenodes here; it would be * OK to use a value that's been reserved for use as a type or * relation OID here, as long as the relfilenode is free. But there's * no harm in skipping over those too, so we don't bother to * distinguish them. */ if (!collides && !IsOidAcceptable(newOid)) collides = true; } while (collides); index_close(indexrel, AccessShareLock); return newOid; }
/* * Search the relation 'rel' for tuple using the index. * * If a matching tuple is found, lock it with lockmode, fill the slot with its * contents, and return true. Return false otherwise. */ bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; ScanKeyData skey[INDEX_MAX_KEYS]; IndexScanDesc scan; SnapshotData snap; TransactionId xwait; Relation idxrel; bool found; /* Open the index. */ idxrel = index_open(idxoid, RowExclusiveLock); /* Start an index scan. */ InitDirtySnapshot(snap); scan = index_beginscan(rel, idxrel, &snap, RelationGetNumberOfAttributes(idxrel), 0); /* Build scan key. */ build_replindex_scan_key(skey, rel, idxrel, searchslot); retry: found = false; index_rescan(scan, skey, RelationGetNumberOfAttributes(idxrel), NULL, 0); /* Try to find the tuple */ if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) { found = true; ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; /* * If the tuple is locked, wait for locking transaction to finish and * retry. */ if (TransactionIdIsValid(xwait)) { XactLockTableWait(xwait, NULL, NULL, XLTW_None); goto retry; } } /* Found tuple, try to lock it in the lockmode. */ if (found) { Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; HeapTupleData locktup; ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), lockmode, LockWaitBlock, false /* don't follow updates */ , &buf, &hufd); /* the tuple slot already has the buffer pinned */ ReleaseBuffer(buf); PopActiveSnapshot(); switch (res) { case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: /* XXX: Improve handling here */ ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); default: elog(ERROR, "unexpected heap_lock_tuple status: %u", res); break; } } index_endscan(scan); /* Don't release lock until commit. */ index_close(idxrel, NoLock); return found; }
bool CheckNewRelFileNodeIsOk(Oid newOid, Oid reltablespace, bool relisshared, Relation pg_class) { RelFileNode rnode; char *rpath; int fd; bool collides; SnapshotData SnapshotDirty; InitDirtySnapshot(SnapshotDirty); if (pg_class) { Oid oidIndex; Relation indexrel; IndexScanDesc scan; ScanKeyData key; Assert(!IsBootstrapProcessingMode()); Assert(pg_class->rd_rel->relhasoids); /* The relcache will cache the identity of the OID index for us */ oidIndex = RelationGetOidIndex(pg_class); Assert(OidIsValid(oidIndex)); indexrel = index_open(oidIndex, AccessShareLock); ScanKeyInit(&key, (AttrNumber) 1, BTEqualStrategyNumber, F_OIDEQ, ObjectIdGetDatum(newOid)); scan = index_beginscan(pg_class, indexrel, &SnapshotDirty, 1, &key); collides = HeapTupleIsValid(index_getnext(scan, ForwardScanDirection)); index_endscan(scan); index_close(indexrel, AccessShareLock); if (collides) elog(ERROR, "relfilenode %d already in use in \"pg_class\"", newOid); } /* This should match RelationInitPhysicalAddr */ rnode.spcNode = reltablespace ? reltablespace : MyDatabaseTableSpace; rnode.dbNode = relisshared ? InvalidOid : MyDatabaseId; rnode.relNode = newOid; /* Check for existing file of same name */ rpath = relpath(rnode); fd = BasicOpenFile(rpath, O_RDONLY | PG_BINARY, 0); if (fd >= 0) { /* definite collision */ gp_retry_close(fd); collides = true; } else collides = false; pfree(rpath); if (collides && !relisshared) elog(ERROR, "oid %d already in use", newOid); while(GetNewObjectId() < newOid); return !collides; }