/* * Check if our cached information about a datatype is still valid */ static bool PLy_procedure_argument_valid(PLyTypeInfo *arg) { HeapTuple relTup; bool valid; /* Nothing to cache unless type is composite */ if (arg->is_rowtype != 1) return true; /* * Zero typ_relid means that we got called on an output argument of a * function returning a unnamed record type; the info for it can't change. */ if (!OidIsValid(arg->typ_relid)) return true; /* Else we should have some cached data */ Assert(TransactionIdIsValid(arg->typrel_xmin)); Assert(ItemPointerIsValid(&arg->typrel_tid)); /* Get the pg_class tuple for the data type */ relTup = SearchSysCache1(RELOID, ObjectIdGetDatum(arg->typ_relid)); if (!HeapTupleIsValid(relTup)) elog(ERROR, "cache lookup failed for relation %u", arg->typ_relid); /* If it has changed, the cached data is not valid */ valid = (arg->typrel_xmin == HeapTupleHeaderGetRawXmin(relTup->t_data) && ItemPointerEquals(&arg->typrel_tid, &relTup->t_self)); ReleaseSysCache(relTup); return valid; }
/* * gistgettuple() -- Get the next tuple in the scan */ Datum gistgettuple(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); GISTScanOpaque so; ItemPointerData tid; bool res; so = (GISTScanOpaque) scan->opaque; /* * If we have produced an index tuple in the past and the executor has * informed us we need to mark it as "killed", do so now. */ if (scan->kill_prior_tuple && ItemPointerIsValid(&(so->curpos))) killtuple(scan->indexRelation, so, &(so->curpos)); /* * Get the next tuple that matches the search key. If asked to skip killed * tuples, continue looping until we find a non-killed tuple that matches * the search key. */ res = (gistnext(scan, dir, &tid, 1, scan->ignore_killed_tuples)) ? true : false; PG_RETURN_BOOL(res); }
/* * add ItemPointer to a leaf page. */ void GinDataPageAddItemPointer(Page page, ItemPointer data, OffsetNumber offset) { OffsetNumber maxoff = GinPageGetOpaque(page)->maxoff; char *ptr; Assert(ItemPointerIsValid(data)); Assert(GinPageIsLeaf(page)); if (offset == InvalidOffsetNumber) { ptr = (char *) GinDataPageGetItemPointer(page, maxoff + 1); } else { ptr = (char *) GinDataPageGetItemPointer(page, offset); if (maxoff + 1 - offset != 0) memmove(ptr + sizeof(ItemPointerData), ptr, (maxoff - offset + 1) * sizeof(ItemPointerData)); } memcpy(ptr, data, sizeof(ItemPointerData)); GinPageGetOpaque(page)->maxoff++; }
/* * Insert the entries for one heap pointer. * * Since the entries are being inserted into a balanced binary tree, you * might think that the order of insertion wouldn't be critical, but it turns * out that inserting the entries in sorted order results in a lot of * rebalancing operations and is slow. To prevent this, we attempt to insert * the nodes in an order that will produce a nearly-balanced tree if the input * is in fact sorted. * * We do this as follows. First, we imagine that we have an array whose size * is the smallest power of two greater than or equal to the actual array * size. Second, we insert the middle entry of our virtual array into the * tree; then, we insert the middles of each half of our virtual array, then * middles of quarters, etc. */ void ginInsertBAEntries(BuildAccumulator *accum, ItemPointer heapptr, OffsetNumber attnum, Datum *entries, GinNullCategory *categories, int32 nentries) { uint32 step = nentries; if (nentries <= 0) return; Assert(ItemPointerIsValid(heapptr) && attnum >= FirstOffsetNumber); /* * step will contain largest power of 2 and <= nentries */ step |= (step >> 1); step |= (step >> 2); step |= (step >> 4); step |= (step >> 8); step |= (step >> 16); step >>= 1; step++; while (step > 0) { int i; for (i = step - 1; i < nentries && i >= 0; i += step << 1 /* *2 */ ) ginInsertBAEntry(accum, heapptr, attnum, entries[i], categories[i]); step >>= 1; /* /2 */ } }
/* * adjustiptr() -- adjust current and marked item pointers in the scan * * Depending on the type of update and the place it happened, we * need to do nothing, to back up one record, or to start over on * the same page. */ static void adjustiptr(IndexScanDesc s, ItemPointer iptr, int op, BlockNumber blkno, OffsetNumber offnum) { OffsetNumber curoff; RTreeScanOpaque so; if (ItemPointerIsValid(iptr)) { if (ItemPointerGetBlockNumber(iptr) == blkno) { curoff = ItemPointerGetOffsetNumber(iptr); so = (RTreeScanOpaque) s->opaque; switch (op) { case RTOP_DEL: /* back up one if we need to */ if (curoff >= offnum) { if (curoff > FirstOffsetNumber) { /* just adjust the item pointer */ ItemPointerSet(iptr, blkno, OffsetNumberPrev(curoff)); } else { /* * remember that we're before the current * tuple */ ItemPointerSet(iptr, blkno, FirstOffsetNumber); if (iptr == &(s->currentItemData)) so->s_flags |= RTS_CURBEFORE; else so->s_flags |= RTS_MRKBEFORE; } } break; case RTOP_SPLIT: /* back to start of page on split */ ItemPointerSet(iptr, blkno, FirstOffsetNumber); if (iptr == &(s->currentItemData)) so->s_flags &= ~RTS_CURBEFORE; else so->s_flags &= ~RTS_MRKBEFORE; break; default: elog(ERROR, "unrecognized operation in rtree scan adjust: %d", op); } } } }
/* * HeapTupleSatisfiesToast * True iff heap tuple is valid as a TOAST row. * * This is a simplified version that only checks for VACUUM moving conditions. * It's appropriate for TOAST usage because TOAST really doesn't want to do * its own time qual checks; if you can see the main table row that contains * a TOAST reference, you should be able to see the TOASTed value. However, * vacuuming a TOAST table is independent of the main table, and in case such * a vacuum fails partway through, we'd better do this much checking. * * Among other things, this means you can't do UPDATEs of rows in a TOAST * table. */ bool HeapTupleSatisfiesToast(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); if (!HeapTupleHeaderXminCommitted(tuple)) { if (HeapTupleHeaderXminInvalid(tuple)) return false; /* Used by pre-9.0 binary upgrades */ if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } } /* Used by pre-9.0 binary upgrades */ else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } } } /* otherwise assume the tuple is valid for TOAST. */ return true; }
static inline void uint64_to_itemptr(uint64 val, ItemPointer iptr) { GinItemPointerSetOffsetNumber(iptr, val & ((1 << MaxHeapTuplesPerPageBits) - 1)); val = val >> MaxHeapTuplesPerPageBits; GinItemPointerSetBlockNumber(iptr, val); Assert(ItemPointerIsValid(iptr)); }
/* * Collect data from a pending-list page in preparation for insertion into * the main index. * * Go through all tuples >= startoff on page and collect values in accum * * Note that ka is just workspace --- it does not carry any state across * calls. */ static void processPendingPage(BuildAccumulator *accum, KeyArray *ka, Page page, OffsetNumber startoff) { ItemPointerData heapptr; OffsetNumber i, maxoff; OffsetNumber attrnum; /* reset *ka to empty */ ka->nvalues = 0; maxoff = PageGetMaxOffsetNumber(page); Assert(maxoff >= FirstOffsetNumber); ItemPointerSetInvalid(&heapptr); attrnum = 0; for (i = startoff; i <= maxoff; i = OffsetNumberNext(i)) { IndexTuple itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, i)); OffsetNumber curattnum; Datum curkey; GinNullCategory curcategory; /* Check for change of heap TID or attnum */ curattnum = gintuple_get_attrnum(accum->ginstate, itup); if (!ItemPointerIsValid(&heapptr)) { heapptr = itup->t_tid; attrnum = curattnum; } else if (!(ItemPointerEquals(&heapptr, &itup->t_tid) && curattnum == attrnum)) { /* * ginInsertBAEntries can insert several datums per call, but only * for one heap tuple and one column. So call it at a boundary, * and reset ka. */ ginInsertBAEntries(accum, &heapptr, attrnum, ka->keys, ka->categories, ka->nvalues); ka->nvalues = 0; heapptr = itup->t_tid; attrnum = curattnum; } /* Add key to KeyArray */ curkey = gintuple_get_key(accum->ginstate, itup, &curcategory); addDatum(ka, curkey, curcategory); } /* Dump out all remaining keys */ ginInsertBAEntries(accum, &heapptr, attrnum, ka->keys, ka->categories, ka->nvalues); }
void PersistentStore_AddTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, Datum *values, bool flushToXLog, /* When true, the XLOG record for this change will be flushed to disk. */ ItemPointer persistentTid, /* TID of the stored tuple. */ int64 *persistentSerialNum) { #ifdef USE_ASSERT_CHECKING if (storeSharedData == NULL || !PersistentStoreSharedData_EyecatcherIsValid(storeSharedData)) elog(ERROR, "Persistent store shared-memory not valid"); #endif PTCheck_BeforeAddingEntry(storeData, values); if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_AddTuple: Going to add tuple ('%s', shared data %p)", storeData->tableName, storeSharedData); *persistentSerialNum = ++storeSharedData->maxInUseSerialNum; storeData->myHighestSerialNum = storeSharedData->maxInUseSerialNum; GlobalSequence_Set( storeData->gpGlobalSequence, *persistentSerialNum); // Overwrite with the new serial number value. values[storeData->attNumPersistentSerialNum - 1] = Int64GetDatum(*persistentSerialNum); /* * Add new tuple. */ PersistentStore_InsertTuple( storeData, storeSharedData, values, flushToXLog, persistentTid); Assert(ItemPointerIsValid(persistentTid)); storeSharedData->inUseCount++; if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_AddTuple: Added tuple ('%s', in use count " INT64_FORMAT ", shared data %p)", storeData->tableName, storeSharedData->inUseCount, storeSharedData); }
/* * Stores the visibility map entry. * * The entry/tuple is invalidated after this function call. * * Assumes that a valid visimap entry is passed. * Assumes that the entry corresponds to the latest tuple * returned by AppendOnlyVisimapStore_find. * * Should not be called twice in the same command. */ void AppendOnlyVisimapStore_Store( AppendOnlyVisimapStore* visiMapStore, AppendOnlyVisimapEntry* visiMapEntry) { MemoryContext oldContext; Relation visimapRelation; TupleDesc heapTupleDesc; HeapTuple tuple; Datum values[Natts_pg_aovisimap]; bool nulls[Natts_pg_aovisimap]; Assert(visiMapStore); Assert(visiMapEntry); elogif (Debug_appendonly_print_visimap, LOG, "Append-only visi map store: Store visimap entry: " "(segFileNum, firstRowNum) = (%u, " INT64_FORMAT ")", visiMapEntry->segmentFileNum, visiMapEntry->firstRowNum); oldContext = MemoryContextSwitchTo(visiMapStore->memoryContext); AppendOnlyVisimapEntry_Write(visiMapEntry, values, nulls); visimapRelation = visiMapStore->visimapRelation; heapTupleDesc = RelationGetDescr(visimapRelation); tuple = heap_form_tuple(heapTupleDesc, values, nulls); /* * Write out the visimap entry to the relation. * If this visimap entry already in the relation, we update * the row. Otherwise, a new row is inserted. */ if (ItemPointerIsValid(&visiMapEntry->tupleTid)) { simple_heap_update(visimapRelation, &visiMapEntry->tupleTid, tuple); } else { simple_heap_insert(visimapRelation, tuple); } CatalogUpdateIndexes(visimapRelation, tuple); heap_freetuple(tuple); MemoryContextSwitchTo(oldContext); // Invalidate the data after storing it. ItemPointerSetInvalid(&visiMapEntry->tupleTid); }
static inline void uint64_to_itemptr(uint64 val, ItemPointer iptr) { iptr->ip_posid = val & ((1 << MaxHeapTuplesPerPageBits) - 1); val = val >> MaxHeapTuplesPerPageBits; iptr->ip_blkid.bi_lo = val & 0xFFFF; val = val >> 16; iptr->ip_blkid.bi_hi = val & 0xFFFF; Assert(ItemPointerIsValid(iptr)); }
/* * update a tuple in in-memory heap table. * * if the target tuple already in the memory, * update it in-place with flag INMEM_HEAP_TUPLE_UPDATED. * else report an error. * * update should not change the otid of the old tuple, * since updated tuple should write back to the master and update there. */ void InMemHeap_Update(InMemHeapRelation relation, ItemPointer otid, HeapTuple tup) { int pos; HeapTuple target; MemoryContext oldmem = CurrentMemoryContext; Assert(ItemPointerIsValid(otid)); pos = InMemHeap_Find(relation, otid); CurrentMemoryContext = relation->memcxt; /* * not found, report error */ if (pos >= relation->tupsize) { ereport(ERROR, (errcode(ERRCODE_INTERNAL_ERROR), errmsg("update a tuple which does not exist," " relname = %s, relid = %u", relation->rel->rd_rel->relname.data, relation->relid))); } Insist(relation->hashIndex == NULL && "cannot handle index in in-memory heap when update"); /* * already in table */ Assert(relation->tuples[pos].flags == INMEM_HEAP_TUPLE_DISPATCHED || relation->tuples[pos].flags == INMEM_HEAP_TUPLE_UPDATED); relation->tuples[pos].flags = INMEM_HEAP_TUPLE_UPDATED; target = heaptuple_copy_to(tup, NULL, NULL ); /* * do not modify original tuple header */ ItemPointerCopy(&target->t_self, &relation->tuples[pos].tuple->t_self); Assert(ItemPointerEquals(&target->t_self, otid)); memcpy(target->t_data, relation->tuples[pos].tuple->t_data, sizeof(HeapTupleHeaderData)); CurrentMemoryContext = oldmem; pfree(relation->tuples[pos].tuple); relation->tuples[pos].tuple = target; }
static void pushIncompleteInsert(RelFileNode node, XLogRecPtr lsn, ItemPointerData key, BlockNumber *blkno, int lenblk, PageSplitRecord *xlinfo /* to extract blkno info */ ) { MemoryContext oldCxt; gistIncompleteInsert *ninsert; if (!ItemPointerIsValid(&key)) /* * if key is null then we should not store insertion as incomplete, * because it's a vacuum operation.. */ return; oldCxt = MemoryContextSwitchTo(insertCtx); ninsert = (gistIncompleteInsert *) palloc(sizeof(gistIncompleteInsert)); ninsert->node = node; ninsert->key = key; ninsert->lsn = lsn; if (lenblk && blkno) { ninsert->lenblk = lenblk; ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk); memcpy(ninsert->blkno, blkno, sizeof(BlockNumber) * ninsert->lenblk); ninsert->origblkno = *blkno; } else { int i; Assert(xlinfo); ninsert->lenblk = xlinfo->data->npage; ninsert->blkno = (BlockNumber *) palloc(sizeof(BlockNumber) * ninsert->lenblk); for (i = 0; i < ninsert->lenblk; i++) ninsert->blkno[i] = xlinfo->page[i].header->blkno; ninsert->origblkno = xlinfo->data->origblkno; } Assert(ninsert->lenblk > 0); /* * Stick the new incomplete insert onto the front of the list, not the * back. This is so that gist_xlog_cleanup will process incompletions in * last-in-first-out order. */ incomplete_inserts = lcons(ninsert, incomplete_inserts); MemoryContextSwitchTo(oldCxt); }
/* * XactLockTableWaitErrorContextCb * Error context callback for transaction lock waits. */ static void XactLockTableWaitErrorCb(void *arg) { XactLockTableWaitInfo *info = (XactLockTableWaitInfo *) arg; /* * We would like to print schema name too, but that would require a * syscache lookup. */ if (info->oper != XLTW_None && ItemPointerIsValid(info->ctid) && RelationIsValid(info->rel)) { const char *cxt; switch (info->oper) { case XLTW_Update: cxt = gettext_noop("while updating tuple (%u,%u) in relation \"%s\""); break; case XLTW_Delete: cxt = gettext_noop("while deleting tuple (%u,%u) in relation \"%s\""); break; case XLTW_Lock: cxt = gettext_noop("while locking tuple (%u,%u) in relation \"%s\""); break; case XLTW_LockUpdated: cxt = gettext_noop("while locking updated version (%u,%u) of tuple in relation \"%s\""); break; case XLTW_InsertIndex: cxt = gettext_noop("while inserting index tuple (%u,%u) in relation \"%s\""); break; case XLTW_InsertIndexUnique: cxt = gettext_noop("while checking uniqueness of tuple (%u,%u) in relation \"%s\""); break; case XLTW_FetchUpdated: cxt = gettext_noop("while rechecking updated tuple (%u,%u) in relation \"%s\""); break; case XLTW_RecheckExclusionConstr: cxt = gettext_noop("while checking exclusion constraint on tuple (%u,%u) in relation \"%s\""); break; default: return; } errcontext(cxt, ItemPointerGetBlockNumber(info->ctid), ItemPointerGetOffsetNumber(info->ctid), RelationGetRelationName(info->rel)); } }
static inline uint64 itemptr_to_uint64(const ItemPointer iptr) { uint64 val; Assert(ItemPointerIsValid(iptr)); Assert(GinItemPointerGetOffsetNumber(iptr) < (1 << MaxHeapTuplesPerPageBits)); val = GinItemPointerGetBlockNumber(iptr); val <<= MaxHeapTuplesPerPageBits; val |= GinItemPointerGetOffsetNumber(iptr); return val; }
void PersistentStore_FreeTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer persistentTid, /* TID of the stored tuple. */ Datum *freeValues, bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { Relation persistentRel; XLogRecPtr xlogEndLoc; /* The end location of the UPDATE XLOG record. */ Assert( LWLockHeldByMe(PersistentObjLock) ); #ifdef USE_ASSERT_CHECKING if (storeSharedData == NULL || !PersistentStoreSharedData_EyecatcherIsValid(storeSharedData)) elog(ERROR, "Persistent store shared-memory not valid"); #endif if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_FreeTuple: Going to free tuple at TID %s ('%s', shared data %p)", ItemPointerToString(persistentTid), storeData->tableName, storeSharedData); Assert(ItemPointerIsValid(persistentTid)); persistentRel = (*storeData->openRel)(); simple_heap_delete_xid(persistentRel, persistentTid, FrozenTransactionId); /* * XLOG location of the UPDATE tuple's XLOG record. */ xlogEndLoc = XLogLastInsertEndLoc(); (*storeData->closeRel)(persistentRel); storeSharedData->inUseCount--; if (flushToXLog) { XLogFlush(xlogEndLoc); XLogRecPtr_Zero(&nowaitXLogEndLoc); } else nowaitXLogEndLoc = xlogEndLoc; }
static inline uint64 itemptr_to_uint64(const ItemPointer iptr) { uint64 val; Assert(ItemPointerIsValid(iptr)); Assert(iptr->ip_posid < (1 << MaxHeapTuplesPerPageBits)); val = iptr->ip_blkid.bi_hi; val <<= 16; val |= iptr->ip_blkid.bi_lo; val <<= MaxHeapTuplesPerPageBits; val |= iptr->ip_posid; return val; }
/* * XactLockTableWait * * Wait for the specified transaction to commit or abort. If an operation * is specified, an error context callback is set up. If 'oper' is passed as * None, no error context callback is set up. * * Note that this does the right thing for subtransactions: if we wait on a * subtransaction, we will exit as soon as it aborts or its top parent commits. * It takes some extra work to ensure this, because to save on shared memory * the XID lock of a subtransaction is released when it ends, whether * successfully or unsuccessfully. So we have to check if it's "still running" * and if so wait for its parent. */ void XactLockTableWait(TransactionId xid, Relation rel, ItemPointer ctid, XLTW_Oper oper) { LOCKTAG tag; XactLockTableWaitInfo info; ErrorContextCallback callback; /* * If an operation is specified, set up our verbose error context * callback. */ if (oper != XLTW_None) { Assert(RelationIsValid(rel)); Assert(ItemPointerIsValid(ctid)); info.rel = rel; info.ctid = ctid; info.oper = oper; callback.callback = XactLockTableWaitErrorCb; callback.arg = &info; callback.previous = error_context_stack; error_context_stack = &callback; } for (;;) { Assert(TransactionIdIsValid(xid)); Assert(!TransactionIdEquals(xid, GetTopTransactionIdIfAny())); SET_LOCKTAG_TRANSACTION(tag, xid); (void) LockAcquire(&tag, ShareLock, false, false); LockRelease(&tag, ShareLock, false); if (!TransactionIdIsInProgress(xid)) break; xid = SubTransGetParent(xid); } if (oper != XLTW_None) error_context_stack = callback.previous; }
/* * HeapTupleIsSurelyDead * * Determine whether a tuple is surely dead. We sometimes use this * in lieu of HeapTupleSatisifesVacuum when the tuple has just been * tested by HeapTupleSatisfiesMVCC and, therefore, any hint bits that * can be set should already be set. We assume that if no hint bits * either for xmin or xmax, the transaction is still running. This is * therefore faster than HeapTupleSatisfiesVacuum, because we don't * consult CLOG (and also because we don't need to give an exact answer, * just whether or not the tuple is surely dead). */ bool HeapTupleIsSurelyDead(HeapTuple htup, TransactionId OldestXmin) { HeapTupleHeader tuple = htup->t_data; Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); /* * If the inserting transaction is marked invalid, then it aborted, and * the tuple is definitely dead. If it's marked neither committed nor * invalid, then we assume it's still alive (since the presumption is that * all relevant hint bits were just set moments ago). */ if (!HeapTupleHeaderXminCommitted(tuple)) return HeapTupleHeaderXminInvalid(tuple) ? true : false; /* * If the inserting transaction committed, but any deleting transaction * aborted, the tuple is still alive. */ if (tuple->t_infomask & HEAP_XMAX_INVALID) return false; /* * If the XMAX is just a lock, the tuple is still alive. */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return false; /* * If the Xmax is a MultiXact, it might be dead or alive, but we cannot * know without checking pg_multixact. */ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) return false; /* If deleter isn't known to have committed, assume it's still running. */ if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) return false; /* Deleter committed, so tuple is dead if the XID is old enough. */ return TransactionIdPrecedes(HeapTupleHeaderGetRawXmax(tuple), OldestXmin); }
/* ---------------------------------------------------------------- * tidout * ---------------------------------------------------------------- */ Datum tidout(PG_FUNCTION_ARGS) { ItemPointer itemPtr = PG_GETARG_ITEMPOINTER(0); BlockId blockId; BlockNumber blockNumber; OffsetNumber offsetNumber; char buf[32]; if (!ItemPointerIsValid(itemPtr)) PG_RETURN_CSTRING(pstrdup("()")); blockId = &(itemPtr->ip_blkid); blockNumber = BlockIdGetBlockNumber(blockId); offsetNumber = itemPtr->ip_posid; snprintf(buf, sizeof(buf), "(%u,%u)", blockNumber, offsetNumber); PG_RETURN_CSTRING(pstrdup(buf)); }
Datum rtgettuple(PG_FUNCTION_ARGS) { IndexScanDesc s = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); RTreeScanOpaque so = (RTreeScanOpaque) s->opaque; Page page; OffsetNumber offnum; /* * If we've already produced a tuple and the executor has informed us that * it should be marked "killed", do so now. */ if (s->kill_prior_tuple && ItemPointerIsValid(&(s->currentItemData))) { offnum = ItemPointerGetOffsetNumber(&(s->currentItemData)); page = BufferGetPage(so->curbuf); PageGetItemId(page, offnum)->lp_flags |= LP_DELETE; SetBufferCommitInfoNeedsSave(so->curbuf); } /* * Get the next tuple that matches the search key; if asked to skip killed * tuples, find the first non-killed tuple that matches. Return as soon as * we've run out of matches or we've found an acceptable match. */ for (;;) { bool res = rtnext(s, dir); if (res && s->ignore_killed_tuples) { offnum = ItemPointerGetOffsetNumber(&(s->currentItemData)); page = BufferGetPage(so->curbuf); if (ItemIdDeleted(PageGetItemId(page, offnum))) continue; } PG_RETURN_BOOL(res); } }
/* * hashrestrpos() -- restore scan to last saved position */ Datum hashrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; /* release pin on current data, if any */ if (BufferIsValid(so->hashso_curbuf)) _hash_dropbuf(rel, so->hashso_curbuf); so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(&(scan->currentItemData)); /* bump pin count on currentMarkData and copy to currentItemData */ if (ItemPointerIsValid(&(scan->currentMarkData))) { IncrBufferRefCount(so->hashso_mrkbuf); so->hashso_curbuf = so->hashso_mrkbuf; scan->currentItemData = scan->currentMarkData; } PG_RETURN_VOID(); }
static void forgetIncompleteInsert(RelFileNode node, ItemPointerData key) { ListCell *l; if (!ItemPointerIsValid(&key)) return; if (incomplete_inserts == NIL) return; foreach(l, incomplete_inserts) { gistIncompleteInsert *insert = (gistIncompleteInsert *) lfirst(l); if (RelFileNodeEquals(node, insert->node) && ItemPointerEQ(&(insert->key), &(key))) { /* found */ incomplete_inserts = list_delete_ptr(incomplete_inserts, insert); pfree(insert->blkno); pfree(insert); break; } }
/* * hashmarkpos() -- save current scan position */ Datum hashmarkpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; /* release pin on old marked data, if any */ if (BufferIsValid(so->hashso_mrkbuf)) _hash_dropbuf(rel, so->hashso_mrkbuf); so->hashso_mrkbuf = InvalidBuffer; ItemPointerSetInvalid(&(scan->currentMarkData)); /* bump pin count on currentItemData and copy to currentMarkData */ if (ItemPointerIsValid(&(scan->currentItemData))) { so->hashso_mrkbuf = _hash_getbuf(rel, BufferGetBlockNumber(so->hashso_curbuf), HASH_NOLOCK); scan->currentMarkData = scan->currentItemData; } PG_RETURN_VOID(); }
/* * hashgettuple() -- Get the next tuple in the scan. */ bool hashgettuple(IndexScanDesc scan, ScanDirection dir) { HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; Buffer buf; Page page; OffsetNumber offnum; ItemPointer current; bool res; /* Hash indexes are always lossy since we store only the hash code */ scan->xs_recheck = true; /* * We hold pin but not lock on current buffer while outside the hash AM. * Reacquire the read lock here. */ if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ); /* * If we've already initialized this scan, we can just advance it in the * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ current = &(so->hashso_curpos); if (ItemPointerIsValid(current)) { /* * An insertion into the current index page could have happened while * we didn't have read lock on it. Re-find our position by looking * for the TID we previously returned. (Because we hold share lock on * the bucket, no deletions or splits could have occurred; therefore * we can expect that the TID still exists in the current index page, * at an offset >= where we were.) */ OffsetNumber maxoffnum; buf = so->hashso_curbuf; Assert(BufferIsValid(buf)); page = BufferGetPage(buf); TestForOldSnapshot(scan->xs_snapshot, rel, page); maxoffnum = PageGetMaxOffsetNumber(page); for (offnum = ItemPointerGetOffsetNumber(current); offnum <= maxoffnum; offnum = OffsetNumberNext(offnum)) { IndexTuple itup; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); if (ItemPointerEquals(&(so->hashso_heappos), &(itup->t_tid))) break; } if (offnum > maxoffnum) elog(ERROR, "failed to re-find scan position within index \"%s\"", RelationGetRelationName(rel)); ItemPointerSetOffsetNumber(current, offnum); /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, so mark it by setting the LP_DEAD state in the item flags. */ ItemIdMarkDead(PageGetItemId(page, offnum)); /* * Since this can be redone later if needed, mark as a hint. */ MarkBufferDirtyHint(buf, true); } /* * Now continue the scan. */ res = _hash_next(scan, dir); } else res = _hash_first(scan, dir); /* * Skip killed tuples if asked to. */ if (scan->ignore_killed_tuples) { while (res) { offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(so->hashso_curbuf); if (!ItemIdIsDead(PageGetItemId(page, offnum))) break; res = _hash_next(scan, dir); } } /* Release read lock on current buffer, but keep it pinned */ if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK); /* Return current heap TID on success */ scan->xs_ctup.t_self = so->hashso_heappos; return res; }
/* * HeapTupleSatisfiesUpdate * * This function returns a more detailed result code than most of the * functions in this file, since UPDATE needs to know more than "is it * visible?". It also allows for user-supplied CommandId rather than * relying on CurrentCommandId. * * The possible return codes are: * * HeapTupleInvisible: the tuple didn't exist at all when the scan started, * e.g. it was created by a later CommandId. * * HeapTupleMayBeUpdated: The tuple is valid and visible, so it may be * updated. * * HeapTupleSelfUpdated: The tuple was updated by the current transaction, * after the current scan started. * * HeapTupleUpdated: The tuple was updated by a committed transaction. * * HeapTupleBeingUpdated: The tuple is being updated by an in-progress * transaction other than the current transaction. (Note: this includes * the case where the tuple is share-locked by a MultiXact, even if the * MultiXact includes the current transaction. Callers that want to * distinguish that case must test for it themselves.) */ HTSU_Result HeapTupleSatisfiesUpdate(HeapTuple htup, CommandId curcid, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); if (!HeapTupleHeaderXminCommitted(tuple)) { if (HeapTupleHeaderXminInvalid(tuple)) return HeapTupleInvisible; /* Used by pre-9.0 binary upgrades */ if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return HeapTupleInvisible; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HeapTupleInvisible; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } } /* Used by pre-9.0 binary upgrades */ else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return HeapTupleInvisible; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HeapTupleInvisible; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= curcid) return HeapTupleInvisible; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return HeapTupleMayBeUpdated; if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { TransactionId xmax; xmax = HeapTupleHeaderGetRawXmax(tuple); /* * Careful here: even though this tuple was created by our own * transaction, it might be locked by other transactions, if * the original version was key-share locked when we updated * it. */ if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { if (MultiXactHasRunningRemoteMembers(xmax)) return HeapTupleBeingUpdated; else return HeapTupleMayBeUpdated; } /* if locker is gone, all's well */ if (!TransactionIdIsInProgress(xmax)) return HeapTupleMayBeUpdated; if (!TransactionIdIsCurrentTransactionId(xmax)) return HeapTupleBeingUpdated; else return HeapTupleMayBeUpdated; } if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { TransactionId xmax; xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); /* updating subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) { if (MultiXactHasRunningRemoteMembers(HeapTupleHeaderGetRawXmax(tuple))) return HeapTupleBeingUpdated; return HeapTupleMayBeUpdated; } else { if (HeapTupleHeaderGetCmax(tuple) >= curcid) return HeapTupleSelfUpdated; /* updated after scan * started */ else return HeapTupleInvisible; /* updated before scan * started */ } } if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } if (HeapTupleHeaderGetCmax(tuple) >= curcid) return HeapTupleSelfUpdated; /* updated after scan started */ else return HeapTupleInvisible; /* updated before scan started */ } else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) return HeapTupleInvisible; else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetRawXmin(tuple)); else { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return HeapTupleInvisible; } } /* by here, the inserting transaction has committed */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return HeapTupleMayBeUpdated; if (tuple->t_infomask & HEAP_XMAX_COMMITTED) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return HeapTupleMayBeUpdated; return HeapTupleUpdated; /* updated by other */ } if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { TransactionId xmax; if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { /* * If it's only locked but neither EXCL_LOCK nor KEYSHR_LOCK is * set, it cannot possibly be running. Otherwise need to check. */ if ((tuple->t_infomask & (HEAP_XMAX_EXCL_LOCK | HEAP_XMAX_KEYSHR_LOCK)) && MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple))) return HeapTupleBeingUpdated; SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); if (TransactionIdIsCurrentTransactionId(xmax)) { if (HeapTupleHeaderGetCmax(tuple) >= curcid) return HeapTupleSelfUpdated; /* updated after scan started */ else return HeapTupleInvisible; /* updated before scan started */ } if (TransactionIdIsInProgress(xmax)) return HeapTupleBeingUpdated; if (TransactionIdDidCommit(xmax)) return HeapTupleUpdated; /* * By here, the update in the Xmax is either aborted or crashed, but * what about the other members? */ if (!MultiXactIdIsRunning(HeapTupleHeaderGetRawXmax(tuple))) { /* * There's no member, even just a locker, alive anymore, so we can * mark the Xmax as invalid. */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } else { /* There are lockers running */ return HeapTupleBeingUpdated; } } if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return HeapTupleMayBeUpdated; if (HeapTupleHeaderGetCmax(tuple) >= curcid) return HeapTupleSelfUpdated; /* updated after scan started */ else return HeapTupleInvisible; /* updated before scan started */ } if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) return HeapTupleBeingUpdated; if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } /* xmax transaction committed */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return HeapTupleMayBeUpdated; } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetRawXmax(tuple)); return HeapTupleUpdated; /* updated by other */ }
/* * HeapTupleSatisfiesDirty * True iff heap tuple is valid including effects of open transactions. * * Here, we consider the effects of: * all committed and in-progress transactions (as of the current instant) * previous commands of this transaction * changes made by the current command * * This is essentially like HeapTupleSatisfiesSelf as far as effects of * the current transaction and committed/aborted xacts are concerned. * However, we also include the effects of other xacts still in progress. * * A special hack is that the passed-in snapshot struct is used as an * output argument to return the xids of concurrent xacts that affected the * tuple. snapshot->xmin is set to the tuple's xmin if that is another * transaction that's still in progress; or to InvalidTransactionId if the * tuple's xmin is committed good, committed dead, or my own xact. Similarly * for snapshot->xmax and the tuple's xmax. */ bool HeapTupleSatisfiesDirty(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); snapshot->xmin = snapshot->xmax = InvalidTransactionId; if (!HeapTupleHeaderXminCommitted(tuple)) { if (HeapTupleHeaderXminInvalid(tuple)) return false; /* Used by pre-9.0 binary upgrades */ if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } } /* Used by pre-9.0 binary upgrades */ else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */ return true; if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { TransactionId xmax; xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); /* updating subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) return true; else return false; } if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } return false; } else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) { snapshot->xmin = HeapTupleHeaderGetRawXmin(tuple); /* XXX shouldn't we fall through to look at xmax? */ return true; /* in insertion by other */ } else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetRawXmin(tuple)); else { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } /* by here, the inserting transaction has committed */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; if (tuple->t_infomask & HEAP_XMAX_COMMITTED) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; /* updated by other */ } if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { TransactionId xmax; if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); if (TransactionIdIsCurrentTransactionId(xmax)) return false; if (TransactionIdIsInProgress(xmax)) { snapshot->xmax = xmax; return true; } if (TransactionIdDidCommit(xmax)) return false; /* it must have aborted or crashed */ return true; } if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; return false; } if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) { if (!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) snapshot->xmax = HeapTupleHeaderGetRawXmax(tuple); return true; } if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } /* xmax transaction committed */ if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) { SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetRawXmax(tuple)); return false; /* updated by other */ }
/* * HeapTupleSatisfiesMVCC * True iff heap tuple is valid for the given MVCC snapshot. * * Here, we consider the effects of: * all transactions committed as of the time of the given snapshot * previous commands of this transaction * * Does _not_ include: * transactions shown as in-progress by the snapshot * transactions started after the snapshot was taken * changes made by the current command * * (Notice, however, that the tuple status hint bits will be updated on the * basis of the true state of the transaction, even if we then pretend we * can't see it.) */ bool HeapTupleSatisfiesMVCC(HeapTuple htup, Snapshot snapshot, Buffer buffer) { HeapTupleHeader tuple = htup->t_data; Assert(ItemPointerIsValid(&htup->t_self)); Assert(htup->t_tableOid != InvalidOid); if (!HeapTupleHeaderXminCommitted(tuple)) { if (HeapTupleHeaderXminInvalid(tuple)) return false; /* Used by pre-9.0 binary upgrades */ if (tuple->t_infomask & HEAP_MOVED_OFF) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (TransactionIdIsCurrentTransactionId(xvac)) return false; if (!TransactionIdIsInProgress(xvac)) { if (TransactionIdDidCommit(xvac)) { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); } } /* Used by pre-9.0 binary upgrades */ else if (tuple->t_infomask & HEAP_MOVED_IN) { TransactionId xvac = HeapTupleHeaderGetXvac(tuple); if (!TransactionIdIsCurrentTransactionId(xvac)) { if (TransactionIdIsInProgress(xvac)) return false; if (TransactionIdDidCommit(xvac)) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, InvalidTransactionId); else { SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } } else if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmin(tuple))) { if (HeapTupleHeaderGetCmin(tuple) >= snapshot->curcid) return false; /* inserted after scan started */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid */ return true; if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) /* not deleter */ return true; if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { TransactionId xmax; xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); /* updating subtransaction must have aborted */ if (!TransactionIdIsCurrentTransactionId(xmax)) return true; else if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* updated after scan started */ else return false; /* updated before scan started */ } if (!TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { /* deleting subtransaction must have aborted */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } else if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmin(tuple))) return false; else if (TransactionIdDidCommit(HeapTupleHeaderGetRawXmin(tuple))) SetHintBits(tuple, buffer, HEAP_XMIN_COMMITTED, HeapTupleHeaderGetRawXmin(tuple)); else { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMIN_INVALID, InvalidTransactionId); return false; } } /* * By here, the inserting transaction has committed - have to check * when... */ if (!HeapTupleHeaderXminFrozen(tuple) && XidInMVCCSnapshot(HeapTupleHeaderGetRawXmin(tuple), snapshot)) return false; /* treat as still in progress */ if (tuple->t_infomask & HEAP_XMAX_INVALID) /* xid invalid or aborted */ return true; if (HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)) return true; if (tuple->t_infomask & HEAP_XMAX_IS_MULTI) { TransactionId xmax; /* already checked above */ Assert(!HEAP_XMAX_IS_LOCKED_ONLY(tuple->t_infomask)); xmax = HeapTupleGetUpdateXid(tuple); /* not LOCKED_ONLY, so it has to have an xmax */ Assert(TransactionIdIsValid(xmax)); if (TransactionIdIsCurrentTransactionId(xmax)) { if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } if (TransactionIdIsInProgress(xmax)) return true; if (TransactionIdDidCommit(xmax)) { /* updating transaction committed, but when? */ if (XidInMVCCSnapshot(xmax, snapshot)) return true; /* treat as still in progress */ return false; } /* it must have aborted or crashed */ return true; } if (!(tuple->t_infomask & HEAP_XMAX_COMMITTED)) { if (TransactionIdIsCurrentTransactionId(HeapTupleHeaderGetRawXmax(tuple))) { if (HeapTupleHeaderGetCmax(tuple) >= snapshot->curcid) return true; /* deleted after scan started */ else return false; /* deleted before scan started */ } if (TransactionIdIsInProgress(HeapTupleHeaderGetRawXmax(tuple))) return true; if (!TransactionIdDidCommit(HeapTupleHeaderGetRawXmax(tuple))) { /* it must have aborted or crashed */ SetHintBits(tuple, buffer, HEAP_XMAX_INVALID, InvalidTransactionId); return true; } /* xmax transaction committed */ SetHintBits(tuple, buffer, HEAP_XMAX_COMMITTED, HeapTupleHeaderGetRawXmax(tuple)); } /* * OK, the deleting transaction committed too ... but when? */ if (XidInMVCCSnapshot(HeapTupleHeaderGetRawXmax(tuple), snapshot)) return true; /* treat as still in progress */ return false; }
/* * _hash_step() -- step to the next valid item in a scan in the bucket. * * If no valid record exists in the requested direction, return * false. Else, return true and set the CurrentItemData for the * scan to the right thing. * * 'bufP' points to the current buffer, which is pinned and read-locked. * On success exit, we have pin and read-lock on whichever page * contains the right item; on failure, we have released all buffers. */ bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ItemPointer current; Buffer buf; Page page; HashPageOpaque opaque; OffsetNumber maxoff; OffsetNumber offnum; BlockNumber blkno; IndexTuple itup; current = &(scan->currentItemData); buf = *bufP; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); /* * If _hash_step is called from _hash_first, current will not be valid, so * we can't dereference it. However, in that case, we presumably want to * start at the beginning/end of the page... */ maxoff = PageGetMaxOffsetNumber(page); if (ItemPointerIsValid(current)) offnum = ItemPointerGetOffsetNumber(current); else offnum = InvalidOffsetNumber; /* * 'offnum' now points to the last tuple we examined (if any). * * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ do { switch (dir) { case ForwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberNext(offnum); /* move forward */ else offnum = FirstOffsetNumber; /* new page */ while (offnum > maxoff) { /* * either this page is empty (maxoff == * InvalidOffsetNumber) or we ran off the end. */ _hash_readnext(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); offnum = FirstOffsetNumber; } else { /* end of bucket */ maxoff = offnum = InvalidOffsetNumber; break; /* exit while */ } } break; case BackwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberPrev(offnum); /* move back */ else offnum = maxoff; /* new page */ while (offnum < FirstOffsetNumber) { /* * either this page is empty (offnum == * InvalidOffsetNumber) or we ran off the end. */ _hash_readprev(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) maxoff = offnum = PageGetMaxOffsetNumber(page); else { /* end of bucket */ maxoff = offnum = InvalidOffsetNumber; break; /* exit while */ } } break; default: /* NoMovementScanDirection */ /* this should not be reached */ break; } /* we ran off the end of the world without finding a match */ if (offnum == InvalidOffsetNumber) { /* we ran off the end of the bucket without finding a match */ *bufP = so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(current); return false; } /* get ready to check this tuple */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); } while (!_hash_checkqual(scan, itup)); /* if we made it to here, we've found a valid tuple */ blkno = BufferGetBlockNumber(buf); *bufP = so->hashso_curbuf = buf; ItemPointerSet(current, blkno, offnum); return true; }
/* * Insert a tuple to the new relation. This has to track heap_insert * and its subsidiary functions! * * t_self of the tuple is set to the new TID of the tuple. If t_ctid of the * tuple is invalid on entry, it's replaced with the new TID as well (in * the inserted data only, not in the caller's copy). */ static void raw_heap_insert(RewriteState state, HeapTuple tup) { Page page = state->rs_buffer; Size pageFreeSpace, saveFreeSpace; Size len; OffsetNumber newoff; HeapTuple heaptup; /* * If the new tuple is too big for storage or contains already toasted * out-of-line attributes from some other relation, invoke the toaster. * * Note: below this point, heaptup is the data we actually intend to store * into the relation; tup is the caller's original untoasted data. */ if (state->rs_new_rel->rd_rel->relkind == RELKIND_TOASTVALUE) { /* toast table entries should never be recursively toasted */ Assert(!HeapTupleHasExternal(tup)); heaptup = tup; } else if (HeapTupleHasExternal(tup) || tup->t_len > TOAST_TUPLE_THRESHOLD) heaptup = toast_insert_or_update(state->rs_new_rel, tup, NULL, HEAP_INSERT_SKIP_FSM | (state->rs_use_wal ? 0 : HEAP_INSERT_SKIP_WAL)); else heaptup = tup; len = MAXALIGN(heaptup->t_len); /* be conservative */ /* * If we're gonna fail for oversize tuple, do it right away */ if (len > MaxHeapTupleSize) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("row is too big: size %zu, maximum size %zu", len, MaxHeapTupleSize))); /* Compute desired extra freespace due to fillfactor option */ saveFreeSpace = RelationGetTargetPageFreeSpace(state->rs_new_rel, HEAP_DEFAULT_FILLFACTOR); /* Now we can check to see if there's enough free space already. */ if (state->rs_buffer_valid) { pageFreeSpace = PageGetHeapFreeSpace(page); if (len + saveFreeSpace > pageFreeSpace) { /* Doesn't fit, so write out the existing page */ /* XLOG stuff */ if (state->rs_use_wal) log_newpage(&state->rs_new_rel->rd_node, MAIN_FORKNUM, state->rs_blockno, page, true); /* * Now write the page. We say isTemp = true even if it's not a * temp table, because there's no need for smgr to schedule an * fsync for this write; we'll do it ourselves in * end_heap_rewrite. */ RelationOpenSmgr(state->rs_new_rel); PageSetChecksumInplace(page, state->rs_blockno); smgrextend(state->rs_new_rel->rd_smgr, MAIN_FORKNUM, state->rs_blockno, (char *) page, true); state->rs_blockno++; state->rs_buffer_valid = false; } } if (!state->rs_buffer_valid) { /* Initialize a new empty page */ PageInit(page, BLCKSZ, 0); state->rs_buffer_valid = true; } /* And now we can insert the tuple into the page */ newoff = PageAddItem(page, (Item) heaptup->t_data, heaptup->t_len, InvalidOffsetNumber, false, true); if (newoff == InvalidOffsetNumber) elog(ERROR, "failed to add tuple"); /* Update caller's t_self to the actual position where it was stored */ ItemPointerSet(&(tup->t_self), state->rs_blockno, newoff); /* * Insert the correct position into CTID of the stored tuple, too, if the * caller didn't supply a valid CTID. */ if (!ItemPointerIsValid(&tup->t_data->t_ctid)) { ItemId newitemid; HeapTupleHeader onpage_tup; newitemid = PageGetItemId(page, newoff); onpage_tup = (HeapTupleHeader) PageGetItem(page, newitemid); onpage_tup->t_ctid = tup->t_self; } /* If heaptup is a private copy, release it. */ if (heaptup != tup) heap_freetuple(heaptup); }