/* * XLogReadBufferExtended * Read a page during XLOG replay * * This is functionally comparable to ReadBufferExtended. There's some * differences in the behavior wrt. the "mode" argument: * * In RBM_NORMAL mode, if the page doesn't exist, or contains all-zeroes, we * return InvalidBuffer. In this case the caller should silently skip the * update on this page. (In this situation, we expect that the page was later * dropped or truncated. If we don't see evidence of that later in the WAL * sequence, we'll complain at the end of WAL replay.) * * In RBM_ZERO and RBM_ZERO_ON_ERROR modes, if the page doesn't exist, the * relation is extended with all-zeroes pages up to the given block number. * * In RBM_NORMAL_NO_LOG mode, we return InvalidBuffer if the page doesn't * exist, and we don't check for all-zeroes. Thus, no log entry is made * to imply that the page should be dropped or truncated later. */ Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode) { BlockNumber lastblock; Buffer buffer; SMgrRelation smgr; Assert(blkno != P_NEW); /* Open the relation at smgr level */ smgr = smgropen(rnode, InvalidBackendId); /* * Create the target file if it doesn't already exist. This lets us cope * if the replay sequence contains writes to a relation that is later * deleted. (The original coding of this routine would instead suppress * the writes, but that seems like it risks losing valuable data if the * filesystem loses an inode during a crash. Better to write the data * until we are actually told to delete the file.) */ smgrcreate(smgr, forknum, true); lastblock = smgrnblocks(smgr, forknum); if (blkno < lastblock) { /* page exists in file */ buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); } else { /* hm, page doesn't exist in file */ if (mode == RBM_NORMAL) { log_invalid_page(rnode, forknum, blkno, false); return InvalidBuffer; } if (mode == RBM_NORMAL_NO_LOG) return InvalidBuffer; /* OK to extend the file */ /* we do this in recovery only - no rel-extension lock needed */ Assert(InRecovery); buffer = InvalidBuffer; do { if (buffer != InvalidBuffer) ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, P_NEW, mode, NULL); } while (BufferGetBlockNumber(buffer) < blkno); /* Handle the corner case that P_NEW returns non-consecutive pages */ if (BufferGetBlockNumber(buffer) != blkno) { ReleaseBuffer(buffer); buffer = ReadBufferWithoutRelcache(rnode, forknum, blkno, mode, NULL); } } if (mode == RBM_NORMAL) { /* check that page has been initialized */ Page page = (Page) BufferGetPage(buffer); /* * We assume that PageIsNew is safe without a lock. During recovery, * there should be no other backends that could modify the buffer at * the same time. */ if (PageIsNew(page)) { ReleaseBuffer(buffer); log_invalid_page(rnode, forknum, blkno, true); return InvalidBuffer; } } return buffer; }
/* ------------------------------------------------ * bt_metap() * * Get a btree meta-page information * * Usage: SELECT * FROM bt_metap('t1_pkey') * ------------------------------------------------ */ Datum bt_metap(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); Buffer buffer; Relation rel; RangeVar *relrv; Datum result; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "bt_metap() can be used only on b-tree index."); /* * Reject attempts to read non-local temporary relations; we would * be likely to get wrong data since we have no visibility into the * owning session's local buffers. */ if (isOtherTempNamespace(RelationGetNamespace(rel))) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); buffer = ReadBuffer(rel, 0); { BTMetaPageData *metad; TupleDesc tupleDesc; int j; char *values[BTMETAP_NCOLUMNS]; HeapTuple tuple; Page page = BufferGetPage(buffer); metad = BTPageGetMeta(page); tupleDesc = RelationNameGetTupleDesc(BTMETAP_TYPE); j = 0; values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_magic); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_version); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_root); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_level); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_fastroot); values[j] = palloc(32); snprintf(values[j++], 32, "%d", metad->btm_fastlevel); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); } ReleaseBuffer(buffer); relation_close(rel, AccessShareLock); PG_RETURN_DATUM(result); }
/* * Get a buffer of the type and parity specified by flags, having at least * as much free space as indicated by needSpace. We use the lastUsedPages * cache to assign the same buffer previously requested when possible. * The returned buffer is already pinned and exclusive-locked. * * *isNew is set true if the page was initialized here, false if it was * already valid. */ Buffer SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew) { SpGistCache *cache = spgGetCache(index); SpGistLastUsedPage *lup; /* Bail out if even an empty page wouldn't meet the demand */ if (needSpace > SPGIST_PAGE_CAPACITY) elog(ERROR, "desired SPGiST tuple size is too big"); /* * If possible, increase the space request to include relation's * fillfactor. This ensures that when we add unrelated tuples to a page, * we try to keep 100-fillfactor% available for adding tuples that are * related to the ones already on it. But fillfactor mustn't cause an * error for requests that would otherwise be legal. */ needSpace += RelationGetTargetPageFreeSpace(index, SPGIST_DEFAULT_FILLFACTOR); needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY); /* Get the cache entry for this flags setting */ lup = GET_LUP(cache, flags); /* If we have nothing cached, just turn it over to allocNewBuffer */ if (lup->blkno == InvalidBlockNumber) { *isNew = true; return allocNewBuffer(index, flags); } /* fixed pages should never be in cache */ Assert(!SpGistBlockIsFixed(lup->blkno)); /* If cached freeSpace isn't enough, don't bother looking at the page */ if (lup->freeSpace >= needSpace) { Buffer buffer; Page page; buffer = ReadBuffer(index, lup->blkno); if (!ConditionalLockBuffer(buffer)) { /* * buffer is locked by another process, so return a new buffer */ ReleaseBuffer(buffer); *isNew = true; return allocNewBuffer(index, flags); } page = BufferGetPage(buffer); if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page)) { /* OK to initialize the page */ uint16 pageflags = 0; if (GBUF_REQ_LEAF(flags)) pageflags |= SPGIST_LEAF; if (GBUF_REQ_NULLS(flags)) pageflags |= SPGIST_NULLS; SpGistInitBuffer(buffer, pageflags); lup->freeSpace = PageGetExactFreeSpace(page) - needSpace; *isNew = true; return buffer; } /* * Check that page is of right type and has enough space. We must * recheck this since our cache isn't necessarily up to date. */ if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) && (GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page))) { int freeSpace = PageGetExactFreeSpace(page); if (freeSpace >= needSpace) { /* Success, update freespace info and return the buffer */ lup->freeSpace = freeSpace - needSpace; *isNew = false; return buffer; } } /* * fallback to allocation of new buffer */ UnlockReleaseBuffer(buffer); } /* No success with cache, so return a new buffer */ *isNew = true; return allocNewBuffer(index, flags); }
static void PersistentStore_DiagnoseDumpTable( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData) { if (disable_persistent_diagnostic_dump) { return; } MIRROREDLOCK_BUFMGR_DECLARE; PersistentStoreScan storeScan; ItemPointerData persistentTid; int64 persistentSerialNum; Datum *values; BlockNumber lastDisplayedBlockNum; bool displayedOne; BlockNumber currentBlockNum; elog(LOG, "Diagnostic dump of persistent table ('%s'): maximum in-use serial number " INT64_FORMAT ", maximum free order number " INT64_FORMAT ", free TID %s, maximum known TID %s", storeData->tableName, storeSharedData->maxInUseSerialNum, storeSharedData->maxFreeOrderNum, ItemPointerToString(&storeSharedData->freeTid), ItemPointerToString2(&storeSharedData->maxTid)); values = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); PersistentStore_BeginScan( storeData, storeSharedData, &storeScan); lastDisplayedBlockNum = 0; displayedOne = false; while (PersistentStore_GetNext( &storeScan, values, &persistentTid, &persistentSerialNum)) { /* * Use the BlockIdGetBlockNumber routine because ItemPointerGetBlockNumber * asserts for valid TID. */ currentBlockNum = BlockIdGetBlockNumber(&persistentTid.ip_blkid); if (!displayedOne || currentBlockNum != lastDisplayedBlockNum) { Buffer buffer; PageHeader page; XLogRecPtr lsn; /* * Fetch the block and display the LSN. */ // -------- MirroredLock ---------- MIRROREDLOCK_BUFMGR_LOCK; buffer = ReadBuffer( storeScan.persistentRel, currentBlockNum); page = (PageHeader) BufferGetPage(buffer); lsn = PageGetLSN(page); ReleaseBuffer(buffer); MIRROREDLOCK_BUFMGR_UNLOCK; // -------- MirroredLock ---------- elog(LOG, "Diagnostic LSN %s of page %u", XLogLocationToString(&lsn), currentBlockNum); lastDisplayedBlockNum = currentBlockNum; displayedOne = true; } /* * Display the persistent tuple. */ (*storeData->printTupleCallback)( LOG, "DIAGNOSE", &persistentTid, values); } PersistentStore_EndScan(&storeScan); pfree(values); }
/* ----------------------------------------------- * bt_page() * * Usage: SELECT * FROM bt_page('t1_pkey', 0); * ----------------------------------------------- */ Datum bt_page_stats(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); Buffer buffer; Relation rel; RangeVar *relrv; Datum result; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "bt_page_stats() can be used only on b-tree index."); /* * Reject attempts to read non-local temporary relations; we would * be likely to get wrong data since we have no visibility into the * owning session's local buffers. */ if (isOtherTempNamespace(RelationGetNamespace(rel))) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno == 0) elog(ERROR, "Block 0 is a meta page."); CHECK_RELATION_BLOCK_RANGE(rel, blkno); buffer = ReadBuffer(rel, blkno); { HeapTuple tuple; TupleDesc tupleDesc; int j; char *values[BTPAGESTATS_NCOLUMNS]; BTPageStat stat; GetBTPageStatistics(blkno, buffer, &stat); tupleDesc = RelationNameGetTupleDesc(BTPAGESTATS_TYPE); j = 0; values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.blkno); values[j] = palloc(32); snprintf(values[j++], 32, "%c", stat.type); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.live_items); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.dead_items); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.avg_item_size); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.page_size); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.free_size); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.btpo_prev); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.btpo_next); values[j] = palloc(32); if (stat.type == 'd') snprintf(values[j++], 32, "%d", stat.btpo.xact); else snprintf(values[j++], 32, "%d", stat.btpo.level); values[j] = palloc(32); snprintf(values[j++], 32, "%d", stat.btpo_flags); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); } ReleaseBuffer(buffer); relation_close(rel, AccessShareLock); PG_RETURN_DATUM(result); }
/* * RelationGetBufferForTuple * * Returns pinned and exclusive-locked buffer of a page in given relation * with free space >= given len. * * If otherBuffer is not InvalidBuffer, then it references a previously * pinned buffer of another page in the same relation; on return, this * buffer will also be exclusive-locked. (This case is used by heap_update; * the otherBuffer contains the tuple being updated.) * * The reason for passing otherBuffer is that if two backends are doing * concurrent heap_update operations, a deadlock could occur if they try * to lock the same two buffers in opposite orders. To ensure that this * can't happen, we impose the rule that buffers of a relation must be * locked in increasing page number order. This is most conveniently done * by having RelationGetBufferForTuple lock them both, with suitable care * for ordering. * * NOTE: it is unlikely, but not quite impossible, for otherBuffer to be the * same buffer we select for insertion of the new tuple (this could only * happen if space is freed in that page after heap_update finds there's not * enough there). In that case, the page will be pinned and locked only once. * * For the vmbuffer and vmbuffer_other arguments, we avoid deadlock by * locking them only after locking the corresponding heap page, and taking * no further lwlocks while they are locked. * * We normally use FSM to help us find free space. However, * if HEAP_INSERT_SKIP_FSM is specified, we just append a new empty page to * the end of the relation if the tuple won't fit on the current target page. * This can save some cycles when we know the relation is new and doesn't * contain useful amounts of free space. * * HEAP_INSERT_SKIP_FSM is also useful for non-WAL-logged additions to a * relation, if the caller holds exclusive lock and is careful to invalidate * relation's smgr_targblock before the first insertion --- that ensures that * all insertions will occur into newly added pages and not be intermixed * with tuples from other transactions. That way, a crash can't risk losing * any committed data of other transactions. (See heap_insert's comments * for additional constraints needed for safe usage of this behavior.) * * The caller can also provide a BulkInsertState object to optimize many * insertions into the same relation. This keeps a pin on the current * insertion target page (to save pin/unpin cycles) and also passes a * BULKWRITE buffer selection strategy object to the buffer manager. * Passing NULL for bistate selects the default behavior. * * We always try to avoid filling existing pages further than the fillfactor. * This is OK since this routine is not consulted when updating a tuple and * keeping it on the same page, which is the scenario fillfactor is meant * to reserve space for. * * ereport(ERROR) is allowed here, so this routine *must* be called * before any (unlogged) changes are made in buffer pool. */ Buffer RelationGetBufferForTuple(Relation relation, Size len, Buffer otherBuffer, int options, BulkInsertState bistate, Buffer *vmbuffer, Buffer *vmbuffer_other) { bool use_fsm = !(options & HEAP_INSERT_SKIP_FSM); Buffer buffer = InvalidBuffer; Page page; Size pageFreeSpace, saveFreeSpace; BlockNumber targetBlock, otherBlock; bool needLock; len = MAXALIGN(len); /* be conservative */ /* Bulk insert is not supported for updates, only inserts. */ Assert(otherBuffer == InvalidBuffer || !bistate); /* * If we're gonna fail for oversize tuple, do it right away */ if (len > MaxHeapTupleSize) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("row is too big: size %lu, maximum size %lu", (unsigned long) len, (unsigned long) MaxHeapTupleSize))); /* Compute desired extra freespace due to fillfactor option */ saveFreeSpace = RelationGetTargetPageFreeSpace(relation, HEAP_DEFAULT_FILLFACTOR); if (otherBuffer != InvalidBuffer) otherBlock = BufferGetBlockNumber(otherBuffer); else otherBlock = InvalidBlockNumber; /* just to keep compiler quiet */ /* * We first try to put the tuple on the same page we last inserted a tuple * on, as cached in the BulkInsertState or relcache entry. If that * doesn't work, we ask the Free Space Map to locate a suitable page. * Since the FSM's info might be out of date, we have to be prepared to * loop around and retry multiple times. (To insure this isn't an infinite * loop, we must update the FSM with the correct amount of free space on * each page that proves not to be suitable.) If the FSM has no record of * a page with enough free space, we give up and extend the relation. * * When use_fsm is false, we either put the tuple onto the existing target * page or extend the relation. */ if (len + saveFreeSpace > MaxHeapTupleSize) { /* can't fit, don't bother asking FSM */ targetBlock = InvalidBlockNumber; use_fsm = false; } else if (bistate && bistate->current_buf != InvalidBuffer) targetBlock = BufferGetBlockNumber(bistate->current_buf); else targetBlock = RelationGetTargetBlock(relation); if (targetBlock == InvalidBlockNumber && use_fsm) { /* * We have no cached target page, so ask the FSM for an initial * target. */ targetBlock = GetPageWithFreeSpace(relation, len + saveFreeSpace); /* * If the FSM knows nothing of the rel, try the last page before we * give up and extend. This avoids one-tuple-per-page syndrome during * bootstrapping or in a recently-started system. */ if (targetBlock == InvalidBlockNumber) { BlockNumber nblocks = RelationGetNumberOfBlocks(relation); if (nblocks > 0) targetBlock = nblocks - 1; } } while (targetBlock != InvalidBlockNumber) { /* * Read and exclusive-lock the target block, as well as the other * block if one was given, taking suitable care with lock ordering and * the possibility they are the same block. * * If the page-level all-visible flag is set, caller will need to * clear both that and the corresponding visibility map bit. However, * by the time we return, we'll have x-locked the buffer, and we don't * want to do any I/O while in that state. So we check the bit here * before taking the lock, and pin the page if it appears necessary. * Checking without the lock creates a risk of getting the wrong * answer, so we'll have to recheck after acquiring the lock. */ if (otherBuffer == InvalidBuffer) { /* easy case */ buffer = ReadBufferBI(relation, targetBlock, bistate); if (PageIsAllVisible(BufferGetPage(buffer))) visibilitymap_pin(relation, targetBlock, vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else if (otherBlock == targetBlock) { /* also easy case */ buffer = otherBuffer; if (PageIsAllVisible(BufferGetPage(buffer))) visibilitymap_pin(relation, targetBlock, vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else if (otherBlock < targetBlock) { /* lock other buffer first */ buffer = ReadBuffer(relation, targetBlock); if (PageIsAllVisible(BufferGetPage(buffer))) visibilitymap_pin(relation, targetBlock, vmbuffer); LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); } else { /* lock target buffer first */ buffer = ReadBuffer(relation, targetBlock); if (PageIsAllVisible(BufferGetPage(buffer))) visibilitymap_pin(relation, targetBlock, vmbuffer); LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); } /* * We now have the target page (and the other buffer, if any) pinned * and locked. However, since our initial PageIsAllVisible checks * were performed before acquiring the lock, the results might now be * out of date, either for the selected victim buffer, or for the * other buffer passed by the caller. In that case, we'll need to * give up our locks, go get the pin(s) we failed to get earlier, and * re-lock. That's pretty painful, but hopefully shouldn't happen * often. * * Note that there's a small possibility that we didn't pin the page * above but still have the correct page pinned anyway, either because * we've already made a previous pass through this loop, or because * caller passed us the right page anyway. * * Note also that it's possible that by the time we get the pin and * retake the buffer locks, the visibility map bit will have been * cleared by some other backend anyway. In that case, we'll have * done a bit of extra work for no gain, but there's no real harm * done. */ if (otherBuffer == InvalidBuffer || buffer <= otherBuffer) GetVisibilityMapPins(relation, buffer, otherBuffer, targetBlock, otherBlock, vmbuffer, vmbuffer_other); else GetVisibilityMapPins(relation, otherBuffer, buffer, otherBlock, targetBlock, vmbuffer_other, vmbuffer); /* * Now we can check to see if there's enough free space here. If so, * we're done. */ page = BufferGetPage(buffer); pageFreeSpace = PageGetHeapFreeSpace(page); if (len + saveFreeSpace <= pageFreeSpace) { /* use this page as future insert target, too */ RelationSetTargetBlock(relation, targetBlock); return buffer; } /* * Not enough space, so we must give up our page locks and pin (if * any) and prepare to look elsewhere. We don't care which order we * unlock the two buffers in, so this can be slightly simpler than the * code above. */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); if (otherBuffer == InvalidBuffer) ReleaseBuffer(buffer); else if (otherBlock != targetBlock) { LockBuffer(otherBuffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } /* Without FSM, always fall out of the loop and extend */ if (!use_fsm) break; /* * Update FSM as to condition of this page, and ask for another page * to try. */ targetBlock = RecordAndGetPageWithFreeSpace(relation, targetBlock, pageFreeSpace, len + saveFreeSpace); } /* * Have to extend the relation. * * We have to use a lock to ensure no one else is extending the rel at the * same time, else we will both try to initialize the same new page. We * can skip locking for new or temp relations, however, since no one else * could be accessing them. */ needLock = !RELATION_IS_LOCAL(relation); if (needLock) LockRelationForExtension(relation, ExclusiveLock); /* * XXX This does an lseek - rather expensive - but at the moment it is the * only way to accurately determine how many blocks are in a relation. Is * it worth keeping an accurate file length in shared memory someplace, * rather than relying on the kernel to do it for us? */ buffer = ReadBufferBI(relation, P_NEW, bistate); /* * We can be certain that locking the otherBuffer first is OK, since it * must have a lower page number. */ if (otherBuffer != InvalidBuffer) LockBuffer(otherBuffer, BUFFER_LOCK_EXCLUSIVE); /* * Now acquire lock on the new page. */ LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); /* * Release the file-extension lock; it's now OK for someone else to extend * the relation some more. Note that we cannot release this lock before * we have buffer lock on the new page, or we risk a race condition * against vacuumlazy.c --- see comments therein. */ if (needLock) UnlockRelationForExtension(relation, ExclusiveLock); /* * We need to initialize the empty new page. Double-check that it really * is empty (this should never happen, but if it does we don't want to * risk wiping out valid data). */ page = BufferGetPage(buffer); if (!PageIsNew(page)) elog(ERROR, "page %u of relation \"%s\" should be empty but is not", BufferGetBlockNumber(buffer), RelationGetRelationName(relation)); PageInit(page, BufferGetPageSize(buffer), 0); if (len > PageGetHeapFreeSpace(page)) { /* We should not get here given the test at the top */ elog(PANIC, "tuple is too big: size %lu", (unsigned long) len); } /* * Remember the new page as our target for future insertions. * * XXX should we enter the new page into the free space map immediately, * or just keep it for this backend's exclusive use in the short run * (until VACUUM sees it)? Seems to depend on whether you expect the * current backend to make more insertions or not, which is probably a * good bet most of the time. So for now, don't add it to FSM yet. */ RelationSetTargetBlock(relation, BufferGetBlockNumber(buffer)); return buffer; }
void PersistentStore_ReplaceTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer persistentTid, /* TID of the stored tuple. */ HeapTuple tuple, Datum *newValues, bool *replaces, bool flushToXLog) /* When true, the XLOG record for this change will be flushed to disk. */ { Relation persistentRel; bool *nulls; HeapTuple replacementTuple = NULL; XLogRecPtr xlogUpdateEndLoc; #ifdef USE_ASSERT_CHECKING if (storeSharedData == NULL || !PersistentStoreSharedData_EyecatcherIsValid(storeSharedData)) elog(ERROR, "Persistent store shared-memory not valid"); #endif if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReplaceTuple: Going to replace set of columns in tuple at TID %s ('%s', shared data %p)", ItemPointerToString(persistentTid), storeData->tableName, storeSharedData); persistentRel = (*storeData->openRel)(); /* * In order to keep the tuples the exact same size to enable direct reuse of * free tuples, we do not use NULLs. */ nulls = (bool*)palloc0(storeData->numAttributes * sizeof(bool)); /* * Modify the tuple. */ replacementTuple = heap_modify_tuple(tuple, persistentRel->rd_att, newValues, nulls, replaces); replacementTuple->t_self = *persistentTid; frozen_heap_inplace_update(persistentRel, replacementTuple); /* * Return the XLOG location of the UPDATE tuple's XLOG record. */ xlogUpdateEndLoc = XLogLastInsertEndLoc(); heap_freetuple(replacementTuple); pfree(nulls); if (Debug_persistent_store_print) { Datum *readValues; bool *readNulls; HeapTupleData readTuple; Buffer buffer; HeapTuple readTupleCopy; elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReplaceTuple: Replaced set of columns in tuple at TID %s ('%s')", ItemPointerToString(persistentTid), storeData->tableName); readValues = (Datum*)palloc(storeData->numAttributes * sizeof(Datum)); readNulls = (bool*)palloc(storeData->numAttributes * sizeof(bool)); readTuple.t_self = *persistentTid; if (!heap_fetch(persistentRel, SnapshotAny, &readTuple, &buffer, false, NULL)) { elog(ERROR, "Failed to fetch persistent tuple at %s ('%s')", ItemPointerToString(&readTuple.t_self), storeData->tableName); } readTupleCopy = heaptuple_copy_to(&readTuple, NULL, NULL); ReleaseBuffer(buffer); heap_deform_tuple(readTupleCopy, persistentRel->rd_att, readValues, readNulls); (*storeData->printTupleCallback)( PersistentStore_DebugPrintLevel(), "STORE REPLACED TUPLE", persistentTid, readValues); heap_freetuple(readTupleCopy); pfree(readValues); pfree(readNulls); } (*storeData->closeRel)(persistentRel); if (flushToXLog) { XLogFlush(xlogUpdateEndLoc); XLogRecPtr_Zero(&nowaitXLogEndLoc); } else nowaitXLogEndLoc = xlogUpdateEndLoc; }
/* * _hash_dropbuf() -- release an unlocked buffer. * * This is used to unpin a buffer on which we hold no lock. */ void _hash_dropbuf(Relation rel, Buffer buf) { ReleaseBuffer(buf); }
STDMETHODIMP CDecWMV9::ProcessOutput() { HRESULT hr = S_OK; DWORD dwStatus = 0; BYTE *pBuffer = GetBuffer(m_pRawBufferSize); CMediaBuffer *pOutBuffer = new CMediaBuffer(pBuffer, m_pRawBufferSize, true); pOutBuffer->SetLength(0); DMO_OUTPUT_DATA_BUFFER OutputBufferStructs[1]; memset(&OutputBufferStructs[0], 0, sizeof(DMO_OUTPUT_DATA_BUFFER)); OutputBufferStructs[0].pBuffer = pOutBuffer; hr = m_pDMO->ProcessOutput(0, 1, OutputBufferStructs, &dwStatus); if (FAILED(hr)) { ReleaseBuffer(pBuffer); DbgLog((LOG_TRACE, 10, L"-> ProcessOutput failed with hr: %x", hr)); return S_FALSE; } if (hr == S_FALSE) { ReleaseBuffer(pBuffer); return S_FALSE; } LAVFrame *pFrame = nullptr; AllocateFrame(&pFrame); BITMAPINFOHEADER *pBMI = nullptr; videoFormatTypeHandler(mtOut, &pBMI); pFrame->width = pBMI->biWidth; pFrame->height = pBMI->biHeight; pFrame->format = m_OutPixFmt; pFrame->key_frame = (OutputBufferStructs[0].dwStatus & DMO_OUTPUT_DATA_BUFFERF_SYNCPOINT); AVRational display_aspect_ratio; int64_t num = (int64_t)m_StreamAR.num * pBMI->biWidth; int64_t den = (int64_t)m_StreamAR.den * pBMI->biHeight; av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den, num, den, INT_MAX); BYTE contentType = 0; DWORD dwPropSize = 1; pOutBuffer->GetProperty(WM_SampleExtensionGUID_ContentType, &contentType, &dwPropSize); pFrame->interlaced = !!(contentType & WM_CT_INTERLACED); pFrame->repeat = !!(contentType & WM_CT_REPEAT_FIRST_FIELD); LAVDeintFieldOrder fo = m_pSettings->GetDeintFieldOrder(); pFrame->tff = (fo == DeintFieldOrder_Auto) ? !!(contentType & WM_CT_TOP_FIELD_FIRST) : (fo == DeintFieldOrder_TopFieldFirst); if (pFrame->interlaced && !m_bInterlaced) m_bInterlaced = TRUE; pFrame->interlaced = (pFrame->interlaced || (m_bInterlaced && m_pSettings->GetDeinterlacingMode() == DeintMode_Aggressive) || m_pSettings->GetDeinterlacingMode() == DeintMode_Force) && !(m_pSettings->GetDeinterlacingMode() == DeintMode_Disable); if (m_bManualReorder) { if (!m_timestampQueue.empty()) { pFrame->rtStart = m_timestampQueue.front(); m_timestampQueue.pop(); if (OutputBufferStructs[0].dwStatus & DMO_OUTPUT_DATA_BUFFERF_TIMELENGTH) { pFrame->rtStop = pFrame->rtStart + OutputBufferStructs[0].rtTimelength; } } } else { if (OutputBufferStructs[0].dwStatus & DMO_OUTPUT_DATA_BUFFERF_TIME) { pFrame->rtStart = OutputBufferStructs[0].rtTimestamp; if (OutputBufferStructs[0].dwStatus & DMO_OUTPUT_DATA_BUFFERF_TIMELENGTH) { pFrame->rtStop = pFrame->rtStart + OutputBufferStructs[0].rtTimelength; } } } // Check alignment // If not properly aligned, we need to make the data aligned. int alignment = (m_OutPixFmt == LAVPixFmt_NV12) ? 16 : 32; if ((pFrame->width % alignment) != 0) { AllocLAVFrameBuffers(pFrame); size_t ySize = pFrame->width * pFrame->height; memcpy_plane(pFrame->data[0], pBuffer, pFrame->width, pFrame->stride[0], pFrame->height); if (m_OutPixFmt == LAVPixFmt_NV12) { memcpy_plane(pFrame->data[1], pBuffer+ySize, pFrame->width, pFrame->stride[1], pFrame->height / 2); } else if (m_OutPixFmt == LAVPixFmt_YUV420) { size_t uvSize = ySize / 4; memcpy_plane(pFrame->data[2], pBuffer+ySize, pFrame->width / 2, pFrame->stride[2], pFrame->height / 2); memcpy_plane(pFrame->data[1], pBuffer+ySize+uvSize, pFrame->width / 2, pFrame->stride[1], pFrame->height / 2); } ReleaseBuffer(pBuffer); } else { if (m_OutPixFmt == LAVPixFmt_NV12) { pFrame->data[0] = pBuffer; pFrame->data[1] = pBuffer + pFrame->width * pFrame->height; pFrame->stride[0] = pFrame->stride[1] = pFrame->width; } else if (m_OutPixFmt == LAVPixFmt_YUV420) { pFrame->data[0] = pBuffer; pFrame->data[2] = pBuffer + pFrame->width * pFrame->height; pFrame->data[1] = pFrame->data[2] + (pFrame->width / 2) * (pFrame->height / 2); pFrame->stride[0] = pFrame->width; pFrame->stride[1] = pFrame->stride[2] = pFrame->width / 2; } pFrame->destruct = wmv9_buffer_destruct; pFrame->priv_data = this; } pFrame->flags |= LAV_FRAME_FLAG_BUFFER_MODIFY; Deliver(pFrame); SafeRelease(&pOutBuffer); if (OutputBufferStructs[0].dwStatus & DMO_OUTPUT_DATA_BUFFERF_INCOMPLETE) return ProcessOutput(); return hr; }
/* * Search the relation 'rel' for tuple using the index. * * If a matching tuple is found, lock it with lockmode, fill the slot with its * contents, and return true. Return false otherwise. */ bool RelationFindReplTupleByIndex(Relation rel, Oid idxoid, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; ScanKeyData skey[INDEX_MAX_KEYS]; IndexScanDesc scan; SnapshotData snap; TransactionId xwait; Relation idxrel; bool found; /* Open the index. */ idxrel = index_open(idxoid, RowExclusiveLock); /* Start an index scan. */ InitDirtySnapshot(snap); scan = index_beginscan(rel, idxrel, &snap, IndexRelationGetNumberOfKeyAttributes(idxrel), 0); /* Build scan key. */ build_replindex_scan_key(skey, rel, idxrel, searchslot); retry: found = false; index_rescan(scan, skey, IndexRelationGetNumberOfKeyAttributes(idxrel), NULL, 0); /* Try to find the tuple */ if ((scantuple = index_getnext(scan, ForwardScanDirection)) != NULL) { found = true; ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; /* * If the tuple is locked, wait for locking transaction to finish and * retry. */ if (TransactionIdIsValid(xwait)) { XactLockTableWait(xwait, NULL, NULL, XLTW_None); goto retry; } } /* Found tuple, try to lock it in the lockmode. */ if (found) { Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; HeapTupleData locktup; ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), lockmode, LockWaitBlock, false /* don't follow updates */ , &buf, &hufd); /* the tuple slot already has the buffer pinned */ ReleaseBuffer(buf); PopActiveSnapshot(); switch (res) { case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: /* XXX: Improve handling here */ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid)) ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying"))); else ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); default: elog(ERROR, "unexpected heap_lock_tuple status: %u", res); break; } } index_endscan(scan); /* Don't release lock until commit. */ index_close(idxrel, NoLock); return found; }
/* * Search the relation 'rel' for tuple using the sequential scan. * * If a matching tuple is found, lock it with lockmode, fill the slot with its * contents, and return true. Return false otherwise. * * Note that this stops on the first matching tuple. * * This can obviously be quite slow on tables that have more than few rows. */ bool RelationFindReplTupleSeq(Relation rel, LockTupleMode lockmode, TupleTableSlot *searchslot, TupleTableSlot *outslot) { HeapTuple scantuple; HeapScanDesc scan; SnapshotData snap; TransactionId xwait; bool found; TupleDesc desc = RelationGetDescr(rel); Assert(equalTupleDescs(desc, outslot->tts_tupleDescriptor)); /* Start a heap scan. */ InitDirtySnapshot(snap); scan = heap_beginscan(rel, &snap, 0, NULL); retry: found = false; heap_rescan(scan, NULL); /* Try to find the tuple */ while ((scantuple = heap_getnext(scan, ForwardScanDirection)) != NULL) { if (!tuple_equals_slot(desc, scantuple, searchslot)) continue; found = true; ExecStoreTuple(scantuple, outslot, InvalidBuffer, false); ExecMaterializeSlot(outslot); xwait = TransactionIdIsValid(snap.xmin) ? snap.xmin : snap.xmax; /* * If the tuple is locked, wait for locking transaction to finish and * retry. */ if (TransactionIdIsValid(xwait)) { XactLockTableWait(xwait, NULL, NULL, XLTW_None); goto retry; } } /* Found tuple, try to lock it in the lockmode. */ if (found) { Buffer buf; HeapUpdateFailureData hufd; HTSU_Result res; HeapTupleData locktup; ItemPointerCopy(&outslot->tts_tuple->t_self, &locktup.t_self); PushActiveSnapshot(GetLatestSnapshot()); res = heap_lock_tuple(rel, &locktup, GetCurrentCommandId(false), lockmode, LockWaitBlock, false /* don't follow updates */ , &buf, &hufd); /* the tuple slot already has the buffer pinned */ ReleaseBuffer(buf); PopActiveSnapshot(); switch (res) { case HeapTupleMayBeUpdated: break; case HeapTupleUpdated: /* XXX: Improve handling here */ if (ItemPointerIndicatesMovedPartitions(&hufd.ctid)) ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("tuple to be locked was already moved to another partition due to concurrent update, retrying"))); else ereport(LOG, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("concurrent update, retrying"))); goto retry; case HeapTupleInvisible: elog(ERROR, "attempted to lock invisible tuple"); default: elog(ERROR, "unexpected heap_lock_tuple status: %u", res); break; } } heap_endscan(scan); return found; }
void C_TCPServerConnection::run() { assert(m_pMsgConsolePrompter); m_pMsgConsolePrompter->PromptMessageLineBy(_T("TCP_CONNECTION_0")); socket().setReceiveTimeout(Poco::Timespan(90,0)); try { char* pszCmdBuffer = NULL; while (true) { long nCmdLen = 0; recvData(socket(), &pszCmdBuffer, nCmdLen); if (nCmdLen == 0) { if (!m_szClientID.empty()) { C_NetCmdLogOff logOff; logOff.HandleRequest(socket(), NULL, m_DBOperate, m_szClientID.c_str()); } m_pMsgConsolePrompter->PromptMessageLineBy(_T("TCP_CONNECTION_1")); break; } if (nCmdLen > 0) { C_NetCommandDecomposer netCmdDecomposer; if(!netCmdDecomposer.Decompose(pszCmdBuffer)) { ReleaseBuffer(&pszCmdBuffer); continue; } tstring szCmdName = netCmdDecomposer.GetCmdName().c_str(); C_NetCommand* pRequestCmd = m_NetCmder.CreateCommand(szCmdName.c_str(), netCmdDecomposer.GetCmdType().c_str()); if(pRequestCmd == NULL) { ReleaseBuffer(&pszCmdBuffer); continue; } if(szCmdName ==_T("log on") || szCmdName ==_T("log off") || szCmdName == _T("fetch channels") || szCmdName == _T("fetch content")) { if(!m_bLogOn) { C_NetCmdLogOn *pLogOn = dynamic_cast<C_NetCmdLogOn*>(pRequestCmd); if(pLogOn == NULL) continue; tstring szMac = netCmdDecomposer.GetCmdPara(_T("id")); if( pLogOn->HandleRequest(socket(), pszCmdBuffer, m_DBOperate, szMac.c_str()) ) { m_szClientID = szMac; m_bLogOn = true; ReleaseBuffer(&pszCmdBuffer); pLogOn->Release(); C_AreaEvent::GetInstance()->PushEvent(socket(), szMac.c_str()); } continue; } } pRequestCmd->HandleRequest(socket(), pszCmdBuffer, m_DBOperate, m_szClientID.c_str()); pRequestCmd->Release(); ReleaseBuffer(&pszCmdBuffer); } } } catch (Poco::Exception& exc) { if (!m_szClientID.empty()) { C_NetCmdLogOff logOff; logOff.HandleRequest(socket(), NULL, m_DBOperate, m_szClientID.c_str()); } m_pMsgConsolePrompter->PromptMessageLineBy(_T("TCP_CONNECTION_2"), exc.what()); return; } }
/* * workhorse */ static bytea * get_raw_page_internal(text *relname, ForkNumber forknum, BlockNumber blkno) { bytea *raw_page; RangeVar *relrv; Relation rel; char *raw_page_data; Buffer buf; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use raw functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); /* Check that this relation has storage */ if (rel->rd_rel->relkind == RELKIND_VIEW) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from view \"%s\"", RelationGetRelationName(rel)))); if (rel->rd_rel->relkind == RELKIND_COMPOSITE_TYPE) ereport(ERROR, (errcode(ERRCODE_WRONG_OBJECT_TYPE), errmsg("cannot get raw page from composite type \"%s\"", RelationGetRelationName(rel)))); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno >= RelationGetNumberOfBlocks(rel)) elog(ERROR, "block number %u is out of range for relation \"%s\"", blkno, RelationGetRelationName(rel)); /* Initialize buffer to copy to */ raw_page = (bytea *) palloc(BLCKSZ + VARHDRSZ); SET_VARSIZE(raw_page, BLCKSZ + VARHDRSZ); raw_page_data = VARDATA(raw_page); /* Take a verbatim copy of the page */ buf = ReadBufferExtended(rel, forknum, blkno, RBM_NORMAL, NULL); LockBuffer(buf, BUFFER_LOCK_SHARE); memcpy(raw_page_data, BufferGetPage(buf), BLCKSZ); LockBuffer(buf, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buf); relation_close(rel, AccessShareLock); return raw_page; }
/* * pg_prewarm(regclass, mode text, fork text, * first_block int8, last_block int8) * * The first argument is the relation to be prewarmed; the second controls * how prewarming is done; legal options are 'prefetch', 'read', and 'buffer'. * The third is the name of the relation fork to be prewarmed. The fourth * and fifth arguments specify the first and last block to be prewarmed. * If the fourth argument is NULL, it will be taken as 0; if the fifth argument * is NULL, it will be taken as the number of blocks in the relation. The * return value is the number of blocks successfully prewarmed. */ Datum pg_prewarm(PG_FUNCTION_ARGS) { Oid relOid; text *forkName; text *type; int64 first_block; int64 last_block; int64 nblocks; int64 blocks_done = 0; int64 block; Relation rel; ForkNumber forkNumber; char *forkString; char *ttype; PrewarmType ptype; AclResult aclresult; /* Basic sanity checking. */ if (PG_ARGISNULL(0)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("relation cannot be null"))); relOid = PG_GETARG_OID(0); if (PG_ARGISNULL(1)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errmsg("prewarm type cannot be null")))); type = PG_GETARG_TEXT_P(1); ttype = text_to_cstring(type); if (strcmp(ttype, "prefetch") == 0) ptype = PREWARM_PREFETCH; else if (strcmp(ttype, "read") == 0) ptype = PREWARM_READ; else if (strcmp(ttype, "buffer") == 0) ptype = PREWARM_BUFFER; else { ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid prewarm type"), errhint("Valid prewarm types are \"prefetch\", \"read\", and \"buffer\"."))); PG_RETURN_INT64(0); /* Placate compiler. */ } if (PG_ARGISNULL(2)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), (errmsg("relation fork cannot be null")))); forkName = PG_GETARG_TEXT_P(2); forkString = text_to_cstring(forkName); forkNumber = forkname_to_number(forkString); /* Open relation and check privileges. */ rel = relation_open(relOid, AccessShareLock); aclresult = pg_class_aclcheck(relOid, GetUserId(), ACL_SELECT); if (aclresult != ACLCHECK_OK) aclcheck_error(aclresult, ACL_KIND_CLASS, get_rel_name(relOid)); /* Check that the fork exists. */ RelationOpenSmgr(rel); if (!smgrexists(rel->rd_smgr, forkNumber)) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("fork \"%s\" does not exist for this relation", forkString))); /* Validate block numbers, or handle nulls. */ nblocks = RelationGetNumberOfBlocksInFork(rel, forkNumber); if (PG_ARGISNULL(3)) first_block = 0; else { first_block = PG_GETARG_INT64(3); if (first_block < 0 || first_block >= nblocks) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("starting block number must be between 0 and " INT64_FORMAT, nblocks - 1))); } if (PG_ARGISNULL(4)) last_block = nblocks - 1; else { last_block = PG_GETARG_INT64(4); if (last_block < 0 || last_block >= nblocks) ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("ending block number must be between 0 and " INT64_FORMAT, nblocks - 1))); } /* Now we're ready to do the real work. */ if (ptype == PREWARM_PREFETCH) { #ifdef USE_PREFETCH /* * In prefetch mode, we just hint the OS to read the blocks, but we * don't know whether it really does it, and we don't wait for it to * finish. * * It would probably be better to pass our prefetch requests in chunks * of a megabyte or maybe even a whole segment at a time, but there's * no practical way to do that at present without a gross modularity * violation, so we just do this. */ for (block = first_block; block <= last_block; ++block) { CHECK_FOR_INTERRUPTS(); PrefetchBuffer(rel, forkNumber, block); ++blocks_done; } #else ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("prefetch is not supported by this build"))); #endif } else if (ptype == PREWARM_READ) { /* * In read mode, we actually read the blocks, but not into shared * buffers. This is more portable than prefetch mode (it works * everywhere) and is synchronous. */ for (block = first_block; block <= last_block; ++block) { CHECK_FOR_INTERRUPTS(); smgrread(rel->rd_smgr, forkNumber, block, blockbuffer.data); ++blocks_done; } } else if (ptype == PREWARM_BUFFER) { /* * In buffer mode, we actually pull the data into shared_buffers. */ for (block = first_block; block <= last_block; ++block) { Buffer buf; CHECK_FOR_INTERRUPTS(); buf = ReadBufferExtended(rel, forkNumber, block, RBM_NORMAL, NULL); ReleaseBuffer(buf); ++blocks_done; } } /* Close relation, release lock. */ relation_close(rel, AccessShareLock); PG_RETURN_INT64(blocks_done); }
FarString& FarString::TrimRight() { FarSF::RTrim (GetBuffer()); ReleaseBuffer(); return * this; }
/* * Delete a posting tree page. */ static void ginDeletePage(GinVacuumState *gvs, BlockNumber deleteBlkno, BlockNumber leftBlkno, BlockNumber parentBlkno, OffsetNumber myoff, bool isParentRoot) { Buffer dBuffer; Buffer lBuffer; Buffer pBuffer; Page page, parentPage; BlockNumber rightlink; /* * Lock the pages in the same order as an insertion would, to avoid * deadlocks: left, then right, then parent. */ lBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, leftBlkno, RBM_NORMAL, gvs->strategy); dBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, deleteBlkno, RBM_NORMAL, gvs->strategy); pBuffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, parentBlkno, RBM_NORMAL, gvs->strategy); LockBuffer(lBuffer, GIN_EXCLUSIVE); LockBuffer(dBuffer, GIN_EXCLUSIVE); if (!isParentRoot) /* parent is already locked by * LockBufferForCleanup() */ LockBuffer(pBuffer, GIN_EXCLUSIVE); START_CRIT_SECTION(); /* Unlink the page by changing left sibling's rightlink */ page = BufferGetPage(dBuffer); rightlink = GinPageGetOpaque(page)->rightlink; page = BufferGetPage(lBuffer); GinPageGetOpaque(page)->rightlink = rightlink; /* Delete downlink from parent */ parentPage = BufferGetPage(pBuffer); #ifdef USE_ASSERT_CHECKING do { PostingItem *tod = GinDataPageGetPostingItem(parentPage, myoff); Assert(PostingItemGetBlockNumber(tod) == deleteBlkno); } while (0); #endif GinPageDeletePostingItem(parentPage, myoff); page = BufferGetPage(dBuffer); /* * we shouldn't change rightlink field to save workability of running * search scan */ GinPageGetOpaque(page)->flags = GIN_DELETED; MarkBufferDirty(pBuffer); MarkBufferDirty(lBuffer); MarkBufferDirty(dBuffer); if (RelationNeedsWAL(gvs->index)) { XLogRecPtr recptr; ginxlogDeletePage data; /* * We can't pass REGBUF_STANDARD for the deleted page, because we * didn't set pd_lower on pre-9.4 versions. The page might've been * binary-upgraded from an older version, and hence not have pd_lower * set correctly. Ditto for the left page, but removing the item from * the parent updated its pd_lower, so we know that's OK at this * point. */ XLogBeginInsert(); XLogRegisterBuffer(0, dBuffer, 0); XLogRegisterBuffer(1, pBuffer, REGBUF_STANDARD); XLogRegisterBuffer(2, lBuffer, 0); data.parentOffset = myoff; data.rightLink = GinPageGetOpaque(page)->rightlink; XLogRegisterData((char *) &data, sizeof(ginxlogDeletePage)); recptr = XLogInsert(RM_GIN_ID, XLOG_GIN_DELETE_PAGE); PageSetLSN(page, recptr); PageSetLSN(parentPage, recptr); PageSetLSN(BufferGetPage(lBuffer), recptr); } if (!isParentRoot) LockBuffer(pBuffer, GIN_UNLOCK); ReleaseBuffer(pBuffer); UnlockReleaseBuffer(lBuffer); UnlockReleaseBuffer(dBuffer); END_CRIT_SECTION(); gvs->result->pages_deleted++; }
FarString& FarString::Unquote() { FarSF::Unquote( GetBuffer() ); ReleaseBuffer(); return * this; }
/* * scans posting tree and deletes empty pages */ static bool ginScanToDelete(GinVacuumState *gvs, BlockNumber blkno, bool isRoot, DataPageDeleteStack *parent, OffsetNumber myoff) { DataPageDeleteStack *me; Buffer buffer; Page page; bool meDelete = FALSE; bool isempty; if (isRoot) { me = parent; } else { if (!parent->child) { me = (DataPageDeleteStack *) palloc0(sizeof(DataPageDeleteStack)); me->parent = parent; parent->child = me; me->leftBlkno = InvalidBlockNumber; } else me = parent->child; } buffer = ReadBufferExtended(gvs->index, MAIN_FORKNUM, blkno, RBM_NORMAL, gvs->strategy); page = BufferGetPage(buffer); Assert(GinPageIsData(page)); if (!GinPageIsLeaf(page)) { OffsetNumber i; me->blkno = blkno; for (i = FirstOffsetNumber; i <= GinPageGetOpaque(page)->maxoff; i++) { PostingItem *pitem = GinDataPageGetPostingItem(page, i); if (ginScanToDelete(gvs, PostingItemGetBlockNumber(pitem), FALSE, me, i)) i--; } } if (GinPageIsLeaf(page)) isempty = GinDataLeafPageIsEmpty(page); else isempty = GinPageGetOpaque(page)->maxoff < FirstOffsetNumber; if (isempty) { /* we never delete the left- or rightmost branch */ if (me->leftBlkno != InvalidBlockNumber && !GinPageRightMost(page)) { Assert(!isRoot); ginDeletePage(gvs, blkno, me->leftBlkno, me->parent->blkno, myoff, me->parent->isRoot); meDelete = TRUE; } } ReleaseBuffer(buffer); if (!meDelete) me->leftBlkno = blkno; return meDelete; }
/* * Try to find parent for current stack position, returns correct * parent and child's offset in stack->parent. * Function should never release root page to prevent conflicts * with vacuum process */ void ginFindParents(GinBtree btree, GinBtreeStack *stack, BlockNumber rootBlkno) { Page page; Buffer buffer; BlockNumber blkno, leftmostBlkno; OffsetNumber offset; GinBtreeStack *root = stack->parent; GinBtreeStack *ptr; if (!root) { /* XLog mode... */ root = (GinBtreeStack *) palloc(sizeof(GinBtreeStack)); root->blkno = rootBlkno; root->buffer = ReadBuffer(btree->index, rootBlkno); LockBuffer(root->buffer, GIN_EXCLUSIVE); root->parent = NULL; } else { /* * find root, we should not release root page until update is * finished!! */ while (root->parent) { ReleaseBuffer(root->buffer); root = root->parent; } Assert(root->blkno == rootBlkno); Assert(BufferGetBlockNumber(root->buffer) == rootBlkno); LockBuffer(root->buffer, GIN_EXCLUSIVE); } root->off = InvalidOffsetNumber; page = BufferGetPage(root->buffer); Assert(!GinPageIsLeaf(page)); /* check trivial case */ if ((root->off = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) != InvalidOffsetNumber) { stack->parent = root; return; } leftmostBlkno = blkno = btree->getLeftMostPage(btree, page); LockBuffer(root->buffer, GIN_UNLOCK); Assert(blkno != InvalidBlockNumber); for (;;) { buffer = ReadBuffer(btree->index, blkno); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); if (GinPageIsLeaf(page)) elog(ERROR, "Lost path"); leftmostBlkno = btree->getLeftMostPage(btree, page); while ((offset = btree->findChildPtr(btree, page, stack->blkno, InvalidOffsetNumber)) == InvalidOffsetNumber) { blkno = GinPageGetOpaque(page)->rightlink; LockBuffer(buffer, GIN_UNLOCK); ReleaseBuffer(buffer); if (blkno == InvalidBlockNumber) break; buffer = ReadBuffer(btree->index, blkno); LockBuffer(buffer, GIN_EXCLUSIVE); page = BufferGetPage(buffer); } if (blkno != InvalidBlockNumber) { ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack)); ptr->blkno = blkno; ptr->buffer = buffer; ptr->parent = root; /* it's may be wrong, but in next call we will * correct */ ptr->off = offset; stack->parent = ptr; return; } blkno = leftmostBlkno; } }
STDMETHODIMP CDecWMV9MFT::ProcessOutput() { HRESULT hr = S_OK; DWORD dwStatus = 0; MFT_OUTPUT_STREAM_INFO outputInfo = {0}; m_pMFT->GetOutputStreamInfo(0, &outputInfo); IMFMediaBuffer *pMFBuffer = nullptr; ASSERT(!(outputInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES)); MFT_OUTPUT_DATA_BUFFER OutputBuffer = {0}; if (!(outputInfo.dwFlags & MFT_OUTPUT_STREAM_PROVIDES_SAMPLES)) { pMFBuffer = GetBuffer(outputInfo.cbSize); if (!pMFBuffer) { DbgLog((LOG_TRACE, 10, L"Unable to allocate media buffere")); return E_FAIL; } IMFSample *pSampleOut = nullptr; hr = MF.CreateSample(&pSampleOut); if (FAILED(hr)) { DbgLog((LOG_TRACE, 10, L"Unable to allocate MF sample, hr: 0x%x", hr)); ReleaseBuffer(pMFBuffer); return E_FAIL; } pSampleOut->AddBuffer(pMFBuffer); OutputBuffer.pSample = pSampleOut; } hr = m_pMFT->ProcessOutput(0, 1, &OutputBuffer, &dwStatus); // We don't process events, just release them SafeRelease(&OutputBuffer.pEvents); // handle stream format changes if (hr == MF_E_TRANSFORM_STREAM_CHANGE || OutputBuffer.dwStatus == MFT_OUTPUT_DATA_BUFFER_FORMAT_CHANGE ) { SafeRelease(&OutputBuffer.pSample); ReleaseBuffer(pMFBuffer); hr = SelectOutputType(); if (FAILED(hr)) { DbgLog((LOG_TRACE, 10, L"-> Failed to handle stream change, hr: %x", hr)); return E_FAIL; } // try again with the new type, it should work now! return ProcessOutput(); } // the MFT generated no output, discard the sample and return if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT || OutputBuffer.dwStatus == MFT_OUTPUT_DATA_BUFFER_NO_SAMPLE) { SafeRelease(&OutputBuffer.pSample); ReleaseBuffer(pMFBuffer); return S_FALSE; } // unknown error condition if (FAILED(hr)) { DbgLog((LOG_TRACE, 10, L"-> ProcessOutput failed with hr: %x", hr)); SafeRelease(&OutputBuffer.pSample); ReleaseBuffer(pMFBuffer); return E_FAIL; } LAVFrame *pFrame = nullptr; AllocateFrame(&pFrame); IMFMediaType *pMTOut = nullptr; m_pMFT->GetOutputCurrentType(0, &pMTOut); MFGetAttributeSize(pMTOut, MF_MT_FRAME_SIZE, (UINT32 *)&pFrame->width, (UINT32 *)&pFrame->height); pFrame->format = m_OutPixFmt; AVRational pixel_aspect_ratio = {1, 1}; MFGetAttributeRatio(pMTOut, MF_MT_PIXEL_ASPECT_RATIO, (UINT32*)&pixel_aspect_ratio.num, (UINT32*)&pixel_aspect_ratio.den); AVRational display_aspect_ratio = {0, 0}; av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den, (int64_t)pixel_aspect_ratio.num * pFrame->width, (int64_t)pixel_aspect_ratio.den * pFrame->height, INT_MAX); pFrame->interlaced = MFGetAttributeUINT32(OutputBuffer.pSample, MFSampleExtension_Interlaced, FALSE); pFrame->repeat = MFGetAttributeUINT32(OutputBuffer.pSample, MFSampleExtension_RepeatFirstField, FALSE); LAVDeintFieldOrder fo = m_pSettings->GetDeintFieldOrder(); pFrame->tff = (fo == DeintFieldOrder_Auto) ? !MFGetAttributeUINT32(OutputBuffer.pSample, MFSampleExtension_BottomFieldFirst, FALSE) : (fo == DeintFieldOrder_TopFieldFirst); if (pFrame->interlaced && !m_bInterlaced) m_bInterlaced = TRUE; pFrame->interlaced = (pFrame->interlaced || (m_bInterlaced && m_pSettings->GetDeinterlacingMode() == DeintMode_Aggressive) || m_pSettings->GetDeinterlacingMode() == DeintMode_Force) && !(m_pSettings->GetDeinterlacingMode() == DeintMode_Disable); pFrame->ext_format.VideoPrimaries = MFGetAttributeUINT32(pMTOut, MF_MT_VIDEO_PRIMARIES, MFVideoPrimaries_Unknown); pFrame->ext_format.VideoTransferFunction = MFGetAttributeUINT32(pMTOut, MF_MT_TRANSFER_FUNCTION, MFVideoTransFunc_Unknown); pFrame->ext_format.VideoTransferMatrix = MFGetAttributeUINT32(pMTOut, MF_MT_YUV_MATRIX, MFVideoTransferMatrix_Unknown); pFrame->ext_format.VideoChromaSubsampling = MFGetAttributeUINT32(pMTOut, MF_MT_VIDEO_CHROMA_SITING, MFVideoChromaSubsampling_Unknown); pFrame->ext_format.NominalRange = MFGetAttributeUINT32(pMTOut, MF_MT_VIDEO_NOMINAL_RANGE, MFNominalRange_Unknown); // HACK: don't flag range=limited if its the only value set, since its also the implied default, this helps to avoid a reconnect // The MFT always sets this value, even if the bitstream says nothing about it, causing a reconnect on every vc1/wmv3 file if (pFrame->ext_format.value == 0x2000) pFrame->ext_format.value = 0; // Timestamps if (m_bManualReorder) { if (!m_timestampQueue.empty()) { pFrame->rtStart = m_timestampQueue.front(); m_timestampQueue.pop(); LONGLONG llDuration = 0; hr = OutputBuffer.pSample->GetSampleDuration(&llDuration); if (SUCCEEDED(hr) && llDuration > 0) { pFrame->rtStop = pFrame->rtStart + llDuration; } } } else { LONGLONG llTimestamp = 0; hr = OutputBuffer.pSample->GetSampleTime(&llTimestamp); if (SUCCEEDED(hr)) { pFrame->rtStart = llTimestamp; LONGLONG llDuration = 0; hr = OutputBuffer.pSample->GetSampleDuration(&llDuration); if (SUCCEEDED(hr) && llDuration > 0) { pFrame->rtStop = pFrame->rtStart + llDuration; } } } SafeRelease(&pMTOut); // Lock memory in the buffer BYTE *pBuffer = nullptr; pMFBuffer->Lock(&pBuffer, NULL, NULL); // Check alignment // If not properly aligned, we need to make the data aligned. int alignment = (m_OutPixFmt == LAVPixFmt_NV12) ? 16 : 32; if ((pFrame->width % alignment) != 0) { hr = AllocLAVFrameBuffers(pFrame); if (FAILED(hr)) { pMFBuffer->Unlock(); ReleaseBuffer(pMFBuffer); SafeRelease(&OutputBuffer.pSample); return hr; } size_t ySize = pFrame->width * pFrame->height; memcpy_plane(pFrame->data[0], pBuffer, pFrame->width, pFrame->stride[0], pFrame->height); if (m_OutPixFmt == LAVPixFmt_NV12) { memcpy_plane(pFrame->data[1], pBuffer + ySize, pFrame->width, pFrame->stride[1], pFrame->height / 2); } else if (m_OutPixFmt == LAVPixFmt_YUV420) { size_t uvSize = ySize / 4; memcpy_plane(pFrame->data[2], pBuffer + ySize, pFrame->width / 2, pFrame->stride[2], pFrame->height / 2); memcpy_plane(pFrame->data[1], pBuffer + ySize + uvSize, pFrame->width / 2, pFrame->stride[1], pFrame->height / 2); } pMFBuffer->Unlock(); ReleaseBuffer(pMFBuffer); } else { if (m_OutPixFmt == LAVPixFmt_NV12) { pFrame->data[0] = pBuffer; pFrame->data[1] = pBuffer + pFrame->width * pFrame->height; pFrame->stride[0] = pFrame->stride[1] = pFrame->width; } else if (m_OutPixFmt == LAVPixFmt_YUV420) { pFrame->data[0] = pBuffer; pFrame->data[2] = pBuffer + pFrame->width * pFrame->height; pFrame->data[1] = pFrame->data[2] + (pFrame->width / 2) * (pFrame->height / 2); pFrame->stride[0] = pFrame->width; pFrame->stride[1] = pFrame->stride[2] = pFrame->width / 2; } pFrame->data[3] = (BYTE *)pMFBuffer; pFrame->destruct = wmv9_buffer_destruct; pFrame->priv_data = this; } pFrame->flags |= LAV_FRAME_FLAG_BUFFER_MODIFY; Deliver(pFrame); SafeRelease(&OutputBuffer.pSample); if (OutputBuffer.dwStatus == MFT_OUTPUT_DATA_BUFFER_INCOMPLETE) return ProcessOutput(); return hr; }
void PersistentStore_ReadTuple( PersistentStoreData *storeData, PersistentStoreSharedData *storeSharedData, ItemPointer readTid, Datum *values, HeapTuple *tupleCopy) { Relation persistentRel; HeapTupleData tuple; Buffer buffer; bool *nulls; #ifdef USE_ASSERT_CHECKING if (storeSharedData == NULL || !PersistentStoreSharedData_EyecatcherIsValid(storeSharedData)) elog(ERROR, "Persistent store shared-memory not valid"); #endif if (Debug_persistent_store_print) elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReadTuple: Going to read tuple at TID %s ('%s', shared data %p)", ItemPointerToString(readTid), storeData->tableName, storeSharedData); if (PersistentStore_IsZeroTid(readTid)) elog(ERROR, "TID for fetch persistent tuple is invalid (0,0) ('%s')", storeData->tableName); // UNDONE: I think the InRecovery test only applies to physical Master Mirroring on Standby. /* Only test this outside of recovery scenarios */ if (!InRecovery && (PersistentStore_IsZeroTid(&storeSharedData->maxTid) || ItemPointerCompare( readTid, &storeSharedData->maxTid) == 1 // Greater-than. )) { elog(ERROR, "TID %s for fetch persistent tuple is greater than the last known TID %s ('%s')", ItemPointerToString(readTid), ItemPointerToString2(&storeSharedData->maxTid), storeData->tableName); } persistentRel = (*storeData->openRel)(); tuple.t_self = *readTid; if (!heap_fetch(persistentRel, SnapshotAny, &tuple, &buffer, false, NULL)) { elog(ERROR, "Failed to fetch persistent tuple at %s (maximum known TID %s, '%s')", ItemPointerToString(&tuple.t_self), ItemPointerToString2(&storeSharedData->maxTid), storeData->tableName); } *tupleCopy = heaptuple_copy_to(&tuple, NULL, NULL); ReleaseBuffer(buffer); /* * In order to keep the tuples the exact same size to enable direct reuse of * free tuples, we do not use NULLs. */ nulls = (bool*)palloc(storeData->numAttributes * sizeof(bool)); heap_deform_tuple(*tupleCopy, persistentRel->rd_att, values, nulls); (*storeData->closeRel)(persistentRel); if (Debug_persistent_store_print) { elog(PersistentStore_DebugPrintLevel(), "PersistentStore_ReadTuple: Successfully read tuple at TID %s ('%s')", ItemPointerToString(readTid), storeData->tableName); (*storeData->printTupleCallback)( PersistentStore_DebugPrintLevel(), "STORE READ TUPLE", readTid, values); } pfree(nulls); }
LocalString::~LocalString() { IOTJS_ASSERT(_strp != NULL); ReleaseBuffer(const_cast<char*>(_strp)); }
/* ------------------------------------------------------ * pgstatindex() * * Usage: SELECT * FROM pgstatindex('t1_pkey'); * ------------------------------------------------------ */ Datum pgstatindex(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); Relation rel; RangeVar *relrv; Datum result; uint32 nblocks; uint32 blkno; BTIndexStat indexStat; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "pgstatindex() can be used only on b-tree index."); /* * Reject attempts to read non-local temporary relations; we would * be likely to get wrong data since we have no visibility into the * owning session's local buffers. */ if (isOtherTempNamespace(RelationGetNamespace(rel))) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); /*------------------- * Read a metapage *------------------- */ { Buffer buffer = ReadBuffer(rel, 0); Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); indexStat.magic = metad->btm_magic; indexStat.version = metad->btm_version; indexStat.root_blkno = metad->btm_root; indexStat.level = metad->btm_level; indexStat.fastroot = metad->btm_fastroot; indexStat.fastlevel = metad->btm_fastlevel; ReleaseBuffer(buffer); } nblocks = RelationGetNumberOfBlocks(rel); /* -- init stat -- */ indexStat.fragments = 0; indexStat.root_pages = 0; indexStat.leaf_pages = 0; indexStat.internal_pages = 0; indexStat.empty_pages = 0; indexStat.deleted_pages = 0; indexStat.max_avail = 0; indexStat.free_space = 0; /*----------------------- * Scan all blocks *----------------------- */ for (blkno = 1; blkno < nblocks; blkno++) { Buffer buffer = ReadBuffer(rel, blkno); BTPageStat stat; CHECK_FOR_INTERRUPTS(); /* scan one page */ stat.blkno = blkno; GetBTPageStatistics(blkno, buffer, &stat); /*--------------------- * page status (type) *--------------------- */ switch (stat.type) { case 'd': indexStat.deleted_pages++; break; case 'l': indexStat.leaf_pages++; break; case 'i': indexStat.internal_pages++; break; case 'e': indexStat.empty_pages++; break; case 'r': indexStat.root_pages++; break; default: elog(ERROR, "unknown page status."); } /* -- leaf fragmentation -- */ indexStat.fragments += stat.fragments; if (stat.type == 'l') { indexStat.max_avail += stat.max_avail; indexStat.free_space += stat.free_size; } ReleaseBuffer(buffer); } relation_close(rel, AccessShareLock); /*---------------------------- * Build a result tuple *---------------------------- */ { TupleDesc tupleDesc; int j; char *values[PGSTATINDEX_NCOLUMNS]; HeapTuple tuple; tupleDesc = RelationNameGetTupleDesc(PGSTATINDEX_TYPE); j = 0; values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.version); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.level); values[j] = palloc(32); snprintf(values[j++], 32, "%d", (indexStat.root_pages + indexStat.leaf_pages + indexStat.internal_pages + indexStat.deleted_pages + indexStat.empty_pages) * BLCKSZ); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.root_blkno); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.internal_pages); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.leaf_pages); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.empty_pages); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.deleted_pages); values[j] = palloc(32); if (indexStat.max_avail > 0) snprintf(values[j++], 32, "%.2f", 100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0); else snprintf(values[j++], 32, "NaN"); values[j] = palloc(32); if (indexStat.leaf_pages > 0) snprintf(values[j++], 32, "%.2f", (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0); else snprintf(values[j++], 32, "NaN"); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = TupleGetDatum(TupleDescGetSlot(tupleDesc), tuple); } PG_RETURN_DATUM(result); }
Datum readindex(PG_FUNCTION_ARGS) { FuncCallContext *funcctx; readindexinfo *info; MIRROREDLOCK_BUFMGR_DECLARE; if (SRF_IS_FIRSTCALL()) { Oid irelid = PG_GETARG_OID(0); TupleDesc tupdesc; MemoryContext oldcontext; AttrNumber outattnum; Relation irel; TupleDesc itupdesc; int i; AttrNumber attno; irel = index_open(irelid, AccessShareLock); itupdesc = RelationGetDescr(irel); outattnum = FIXED_COLUMN + itupdesc->natts; funcctx = SRF_FIRSTCALL_INIT(); oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); tupdesc = CreateTemplateTupleDesc(outattnum, false); attno = 1; TupleDescInitEntry(tupdesc, attno++, "ictid", TIDOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "hctid", TIDOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "aotid", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "istatus", TEXTOID, -1, 0); TupleDescInitEntry(tupdesc, attno++, "hstatus", TEXTOID, -1, 0); for (i = 0; i < itupdesc->natts; i++) { Form_pg_attribute attr = itupdesc->attrs[i]; TupleDescInitEntry(tupdesc, attno++, NameStr(attr->attname), attr->atttypid, attr->atttypmod, 0); } funcctx->tuple_desc = BlessTupleDesc(tupdesc); info = (readindexinfo *) palloc(sizeof(readindexinfo)); funcctx->user_fctx = (void *) info; info->outattnum = outattnum; info->irel = irel; info->hrel = relation_open(irel->rd_index->indrelid, AccessShareLock); if (info->hrel->rd_rel != NULL && (info->hrel->rd_rel->relstorage == 'a' || info->hrel->rd_rel->relstorage == 'c')) { relation_close(info->hrel, AccessShareLock); info->hrel = NULL; } info->num_pages = RelationGetNumberOfBlocks(irel); info->blkno = BTREE_METAPAGE + 1; info->page = NULL; MemoryContextSwitchTo(oldcontext); } funcctx = SRF_PERCALL_SETUP(); info = (readindexinfo *) funcctx->user_fctx; while (info->blkno < info->num_pages) { Datum values[255]; bool nulls[255]; ItemPointerData itid; HeapTuple tuple; Datum result; if (info->page == NULL) { MIRROREDLOCK_BUFMGR_LOCK; info->buf = ReadBuffer(info->irel, info->blkno); info->page = BufferGetPage(info->buf); info->opaque = (BTPageOpaque) PageGetSpecialPointer(info->page); info->minoff = P_FIRSTDATAKEY(info->opaque); info->maxoff = PageGetMaxOffsetNumber(info->page); info->offnum = info->minoff; MIRROREDLOCK_BUFMGR_UNLOCK; } if (!P_ISLEAF(info->opaque) || info->offnum > info->maxoff) { ReleaseBuffer(info->buf); info->page = NULL; info->blkno++; continue; } MemSet(nulls, false, info->outattnum * sizeof(bool)); ItemPointerSet(&itid, info->blkno, info->offnum); values[0] = ItemPointerGetDatum(&itid); readindextuple(info, values, nulls); info->offnum = OffsetNumberNext(info->offnum); tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); result = HeapTupleGetDatum(tuple); SRF_RETURN_NEXT(funcctx, result); } if (info->hrel != NULL) relation_close(info->hrel, AccessShareLock); index_close(info->irel, AccessShareLock); SRF_RETURN_DONE(funcctx); }
Datum bt_page_items(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); uint32 blkno = PG_GETARG_UINT32(1); RangeVar *relrv; Datum result; char *values[BTPAGEITEMS_NCOLUMNS]; BTPageOpaque opaque; HeapTuple tuple; ItemId id; FuncCallContext *fctx; MemoryContext mctx; struct user_args *uargs = NULL; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); if (SRF_IS_FIRSTCALL()) { fctx = SRF_FIRSTCALL_INIT(); mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); uargs = palloc(sizeof(struct user_args)); uargs->tupd = RelationNameGetTupleDesc(BTPAGEITEMS_TYPE); uargs->offset = FirstOffsetNumber; relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); uargs->rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(uargs->rel) || !IS_BTREE(uargs->rel)) elog(ERROR, "bt_page_items() can be used only on b-tree index."); /* * Reject attempts to read non-local temporary relations; we would * be likely to get wrong data since we have no visibility into the * owning session's local buffers. */ if (isOtherTempNamespace(RelationGetNamespace(uargs->rel))) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); if (blkno == 0) elog(ERROR, "Block 0 is a meta page."); CHECK_RELATION_BLOCK_RANGE(uargs->rel, blkno); uargs->buffer = ReadBuffer(uargs->rel, blkno); uargs->page = BufferGetPage(uargs->buffer); opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page); if (P_ISDELETED(opaque)) elog(NOTICE, "bt_page_items(): this page is deleted."); fctx->max_calls = PageGetMaxOffsetNumber(uargs->page); fctx->user_fctx = uargs; MemoryContextSwitchTo(mctx); } fctx = SRF_PERCALL_SETUP(); uargs = fctx->user_fctx; if (fctx->call_cntr < fctx->max_calls) { IndexTuple itup; id = PageGetItemId(uargs->page, uargs->offset); if (!ItemIdIsValid(id)) elog(ERROR, "Invalid ItemId."); itup = (IndexTuple) PageGetItem(uargs->page, id); { int j = 0; BlockNumber blkno = BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)); values[j] = palloc(32); snprintf(values[j++], 32, "%d", uargs->offset); values[j] = palloc(32); snprintf(values[j++], 32, "(%u,%u)", blkno, itup->t_tid.ip_posid); values[j] = palloc(32); snprintf(values[j++], 32, "%d", (int) IndexTupleSize(itup)); values[j] = palloc(32); snprintf(values[j++], 32, "%c", IndexTupleHasNulls(itup) ? 't' : 'f'); values[j] = palloc(32); snprintf(values[j++], 32, "%c", IndexTupleHasVarwidths(itup) ? 't' : 'f'); { int off; char *dump; char *ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info); dump = palloc(IndexTupleSize(itup) * 3); memset(dump, 0, IndexTupleSize(itup) * 3); for (off = 0; off < IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info); off++) { if (dump[0] == '\0') sprintf(dump, "%02x", *(ptr + off) & 0xff); else { char buf[4]; sprintf(buf, " %02x", *(ptr + off) & 0xff); strcat(dump, buf); } } values[j] = dump; } tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(uargs->tupd), values); result = TupleGetDatum(TupleDescGetSlot(uargs->tupd), tuple); } uargs->offset = uargs->offset + 1; SRF_RETURN_NEXT(fctx, result); } else { ReleaseBuffer(uargs->buffer); relation_close(uargs->rel, AccessShareLock); SRF_RETURN_DONE(fctx); } }
/* * Move tuples from pending pages into regular GIN structure. * * This can be called concurrently by multiple backends, so it must cope. * On first glance it looks completely not concurrent-safe and not crash-safe * either. The reason it's okay is that multiple insertion of the same entry * is detected and treated as a no-op by gininsert.c. If we crash after * posting entries to the main index and before removing them from the * pending list, it's okay because when we redo the posting later on, nothing * bad will happen. Likewise, if two backends simultaneously try to post * a pending entry into the main index, one will succeed and one will do * nothing. We try to notice when someone else is a little bit ahead of * us in the process, but that's just to avoid wasting cycles. Only the * action of removing a page from the pending list really needs exclusive * lock. * * fill_fsm indicates that ginInsertCleanup should add deleted pages * to FSM otherwise caller is responsible to put deleted pages into * FSM. * * If stats isn't null, we count deleted pending pages into the counts. */ void ginInsertCleanup(GinState *ginstate, bool fill_fsm, IndexBulkDeleteResult *stats) { Relation index = ginstate->index; Buffer metabuffer, buffer; Page metapage, page; GinMetaPageData *metadata; MemoryContext opCtx, oldCtx; BuildAccumulator accum; KeyArray datums; BlockNumber blkno; bool fsm_vac = false; metabuffer = ReadBuffer(index, GIN_METAPAGE_BLKNO); LockBuffer(metabuffer, GIN_SHARE); metapage = BufferGetPage(metabuffer); metadata = GinPageGetMeta(metapage); if (metadata->head == InvalidBlockNumber) { /* Nothing to do */ UnlockReleaseBuffer(metabuffer); return; } /* * Read and lock head of pending list */ blkno = metadata->head; buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); LockBuffer(metabuffer, GIN_UNLOCK); /* * Initialize. All temporary space will be in opCtx */ opCtx = AllocSetContextCreate(CurrentMemoryContext, "GIN insert cleanup temporary context", ALLOCSET_DEFAULT_MINSIZE, ALLOCSET_DEFAULT_INITSIZE, ALLOCSET_DEFAULT_MAXSIZE); oldCtx = MemoryContextSwitchTo(opCtx); initKeyArray(&datums, 128); ginInitBA(&accum); accum.ginstate = ginstate; /* * At the top of this loop, we have pin and lock on the current page of * the pending list. However, we'll release that before exiting the loop. * Note we also have pin but not lock on the metapage. */ for (;;) { if (GinPageIsDeleted(page)) { /* another cleanup process is running concurrently */ UnlockReleaseBuffer(buffer); fsm_vac = false; break; } /* * read page's datums into accum */ processPendingPage(&accum, &datums, page, FirstOffsetNumber); vacuum_delay_point(); /* * Is it time to flush memory to disk? Flush if we are at the end of * the pending list, or if we have a full row and memory is getting * full. * * XXX using up maintenance_work_mem here is probably unreasonably * much, since vacuum might already be using that much. */ if (GinPageGetOpaque(page)->rightlink == InvalidBlockNumber || (GinPageHasFullRow(page) && (accum.allocatedMemory >= (Size)maintenance_work_mem * 1024L))) { ItemPointerData *list; uint32 nlist; Datum key; GinNullCategory category; OffsetNumber maxoff, attnum; /* * Unlock current page to increase performance. Changes of page * will be checked later by comparing maxoff after completion of * memory flush. */ maxoff = PageGetMaxOffsetNumber(page); LockBuffer(buffer, GIN_UNLOCK); /* * Moving collected data into regular structure can take * significant amount of time - so, run it without locking pending * list. */ ginBeginBAScan(&accum); while ((list = ginGetBAEntry(&accum, &attnum, &key, &category, &nlist)) != NULL) { ginEntryInsert(ginstate, attnum, key, category, list, nlist, NULL); vacuum_delay_point(); } /* * Lock the whole list to remove pages */ LockBuffer(metabuffer, GIN_EXCLUSIVE); LockBuffer(buffer, GIN_SHARE); if (GinPageIsDeleted(page)) { /* another cleanup process is running concurrently */ UnlockReleaseBuffer(buffer); LockBuffer(metabuffer, GIN_UNLOCK); fsm_vac = false; break; } /* * While we left the page unlocked, more stuff might have gotten * added to it. If so, process those entries immediately. There * shouldn't be very many, so we don't worry about the fact that * we're doing this with exclusive lock. Insertion algorithm * guarantees that inserted row(s) will not continue on next page. * NOTE: intentionally no vacuum_delay_point in this loop. */ if (PageGetMaxOffsetNumber(page) != maxoff) { ginInitBA(&accum); processPendingPage(&accum, &datums, page, maxoff + 1); ginBeginBAScan(&accum); while ((list = ginGetBAEntry(&accum, &attnum, &key, &category, &nlist)) != NULL) ginEntryInsert(ginstate, attnum, key, category, list, nlist, NULL); } /* * Remember next page - it will become the new list head */ blkno = GinPageGetOpaque(page)->rightlink; UnlockReleaseBuffer(buffer); /* shiftList will do exclusive * locking */ /* * remove read pages from pending list, at this point all * content of read pages is in regular structure */ if (shiftList(index, metabuffer, blkno, fill_fsm, stats)) { /* another cleanup process is running concurrently */ LockBuffer(metabuffer, GIN_UNLOCK); fsm_vac = false; break; } /* At this point, some pending pages have been freed up */ fsm_vac = true; Assert(blkno == metadata->head); LockBuffer(metabuffer, GIN_UNLOCK); /* * if we removed the whole pending list just exit */ if (blkno == InvalidBlockNumber) break; /* * release memory used so far and reinit state */ MemoryContextReset(opCtx); initKeyArray(&datums, datums.maxvalues); ginInitBA(&accum); } else { blkno = GinPageGetOpaque(page)->rightlink; UnlockReleaseBuffer(buffer); } /* * Read next page in pending list */ vacuum_delay_point(); buffer = ReadBuffer(index, blkno); LockBuffer(buffer, GIN_SHARE); page = BufferGetPage(buffer); } ReleaseBuffer(metabuffer); /* * As pending list pages can have a high churn rate, it is * desirable to recycle them immediately to the FreeSpace Map when * ordinary backends clean the list. */ if (fsm_vac && fill_fsm) IndexFreeSpaceMapVacuum(index); /* Clean up temporary space */ MemoryContextSwitchTo(oldCtx); MemoryContextDelete(opCtx); }
/* * Recursive guts of FreeSpaceMapVacuum */ static uint8 fsm_vacuum_page(Relation rel, FSMAddress addr, bool *eof_p) { Buffer buf; Page page; uint8 max_avail; /* Read the page if it exists, or return EOF */ buf = fsm_readbuf(rel, addr, false); if (!BufferIsValid(buf)) { *eof_p = true; return 0; } else *eof_p = false; page = BufferGetPage(buf); /* * Recurse into children, and fix the information stored about them at * this level. */ if (addr.level > FSM_BOTTOM_LEVEL) { int slot; bool eof = false; for (slot = 0; slot < SlotsPerFSMPage; slot++) { int child_avail; CHECK_FOR_INTERRUPTS(); /* After we hit end-of-file, just clear the rest of the slots */ if (!eof) child_avail = fsm_vacuum_page(rel, fsm_get_child(addr, slot), &eof); else child_avail = 0; /* Update information about the child */ if (fsm_get_avail(page, slot) != child_avail) { LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); fsm_set_avail(BufferGetPage(buf), slot, child_avail); MarkBufferDirtyHint(buf, false); LockBuffer(buf, BUFFER_LOCK_UNLOCK); } } } max_avail = fsm_get_max_avail(BufferGetPage(buf)); /* * Reset the next slot pointer. This encourages the use of low-numbered * pages, increasing the chances that a later vacuum can truncate the * relation. */ ((FSMPage) PageGetContents(page))->fp_next_slot = 0; ReleaseBuffer(buf); return max_avail; }
FarString& FarString::TrimLeft() { FarSF::LTrim (GetBuffer()); ReleaseBuffer(); return * this; }
/* -------------------------------- * ExecStoreTuple * * This function is used to store a physical tuple into a specified * slot in the tuple table. * * tuple: tuple to store * slot: slot to store it in * buffer: disk buffer if tuple is in a disk page, else InvalidBuffer * shouldFree: true if ExecClearTuple should pfree() the tuple * when done with it * * If 'buffer' is not InvalidBuffer, the tuple table code acquires a pin * on the buffer which is held until the slot is cleared, so that the tuple * won't go away on us. * * shouldFree is normally set 'true' for tuples constructed on-the-fly. * It must always be 'false' for tuples that are stored in disk pages, * since we don't want to try to pfree those. * * Another case where it is 'false' is when the referenced tuple is held * in a tuple table slot belonging to a lower-level executor Proc node. * In this case the lower-level slot retains ownership and responsibility * for eventually releasing the tuple. When this method is used, we must * be certain that the upper-level Proc node will lose interest in the tuple * sooner than the lower-level one does! If you're not certain, copy the * lower-level tuple with heap_copytuple and let the upper-level table * slot assume ownership of the copy! * * Return value is just the passed-in slot pointer. * * NOTE: before PostgreSQL 8.1, this function would accept a NULL tuple * pointer and effectively behave like ExecClearTuple (though you could * still specify a buffer to pin, which would be an odd combination). * This saved a couple lines of code in a few places, but seemed more likely * to mask logic errors than to be really useful, so it's now disallowed. * -------------------------------- */ TupleTableSlot * ExecStoreHeapTuple(HeapTuple tuple, TupleTableSlot *slot, Buffer buffer, bool shouldFree) { /* * sanity checks */ Assert(tuple != NULL); Assert(slot != NULL); Assert(slot->tts_tupleDescriptor != NULL); /* passing shouldFree=true for a tuple on a disk page is not sane */ Assert(BufferIsValid(buffer) ? (!shouldFree) : true); /* * Actually we are storing a memtuple! */ if(is_heaptuple_memtuple(tuple)) { Assert(buffer == InvalidBuffer); return ExecStoreMemTuple((MemTuple) tuple, slot, shouldFree); } /* * Free any old physical tuple belonging to the slot. */ free_heaptuple_memtuple(slot); /* * Store the new tuple into the specified slot. */ /* Clear tts_flags, here isempty set to false */ slot->PRIVATE_tts_flags = shouldFree ? TTS_SHOULDFREE : 0; /* store the tuple */ slot->PRIVATE_tts_heaptuple = (void *) tuple; /* Mark extracted state invalid */ slot->PRIVATE_tts_nvalid = 0; /* * If tuple is on a disk page, keep the page pinned as long as we hold a * pointer into it. We assume the caller already has such a pin. * * This is coded to optimize the case where the slot previously held a * tuple on the same disk page: in that case releasing and re-acquiring * the pin is a waste of cycles. This is a common situation during * seqscans, so it's worth troubling over. */ if (slot->tts_buffer != buffer) { if (BufferIsValid(slot->tts_buffer)) ReleaseBuffer(slot->tts_buffer); slot->tts_buffer = buffer; if (BufferIsValid(buffer)) IncrBufferRefCount(buffer); } return slot; }
/* ------------------------------------------------------ * pgstatindex() * * Usage: SELECT * FROM pgstatindex('t1_pkey'); * ------------------------------------------------------ */ Datum pgstatindex(PG_FUNCTION_ARGS) { text *relname = PG_GETARG_TEXT_P(0); Relation rel; RangeVar *relrv; Datum result; BlockNumber nblocks; BlockNumber blkno; BTIndexStat indexStat; if (!superuser()) ereport(ERROR, (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), (errmsg("must be superuser to use pgstattuple functions")))); relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); rel = relation_openrv(relrv, AccessShareLock); if (!IS_INDEX(rel) || !IS_BTREE(rel)) elog(ERROR, "relation \"%s\" is not a btree index", RelationGetRelationName(rel)); /* * Reject attempts to read non-local temporary relations; we would be * likely to get wrong data since we have no visibility into the owning * session's local buffers. */ if (RELATION_IS_OTHER_TEMP(rel)) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("cannot access temporary tables of other sessions"))); /* * Read metapage */ { Buffer buffer = ReadBuffer(rel, 0); Page page = BufferGetPage(buffer); BTMetaPageData *metad = BTPageGetMeta(page); indexStat.version = metad->btm_version; indexStat.level = metad->btm_level; indexStat.root_blkno = metad->btm_root; ReleaseBuffer(buffer); } /* -- init counters -- */ indexStat.root_pages = 0; indexStat.internal_pages = 0; indexStat.leaf_pages = 0; indexStat.empty_pages = 0; indexStat.deleted_pages = 0; indexStat.max_avail = 0; indexStat.free_space = 0; indexStat.fragments = 0; /* * Scan all blocks except the metapage */ nblocks = RelationGetNumberOfBlocks(rel); for (blkno = 1; blkno < nblocks; blkno++) { Buffer buffer; Page page; BTPageOpaque opaque; /* Read and lock buffer */ buffer = ReadBuffer(rel, blkno); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); opaque = (BTPageOpaque) PageGetSpecialPointer(page); /* Determine page type, and update totals */ if (P_ISLEAF(opaque)) { int max_avail; max_avail = BLCKSZ - (BLCKSZ - ((PageHeader) page)->pd_special + SizeOfPageHeaderData); indexStat.max_avail += max_avail; indexStat.free_space += PageGetFreeSpace(page); indexStat.leaf_pages++; /* * If the next leaf is on an earlier block, it means a * fragmentation. */ if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) indexStat.fragments++; } else if (P_ISDELETED(opaque)) indexStat.deleted_pages++; else if (P_IGNORE(opaque)) indexStat.empty_pages++; else if (P_ISROOT(opaque)) indexStat.root_pages++; else indexStat.internal_pages++; /* Unlock and release buffer */ LockBuffer(buffer, BUFFER_LOCK_UNLOCK); ReleaseBuffer(buffer); } relation_close(rel, AccessShareLock); /*---------------------------- * Build a result tuple *---------------------------- */ { TupleDesc tupleDesc; int j; char *values[10]; HeapTuple tuple; /* Build a tuple descriptor for our result type */ if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) elog(ERROR, "return type must be a row type"); j = 0; values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.version); values[j] = palloc(32); snprintf(values[j++], 32, "%d", indexStat.level); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, (indexStat.root_pages + indexStat.leaf_pages + indexStat.internal_pages + indexStat.deleted_pages + indexStat.empty_pages) * BLCKSZ); values[j] = palloc(32); snprintf(values[j++], 32, "%u", indexStat.root_blkno); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.internal_pages); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.leaf_pages); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.empty_pages); values[j] = palloc(32); snprintf(values[j++], 32, INT64_FORMAT, indexStat.deleted_pages); values[j] = palloc(32); snprintf(values[j++], 32, "%.2f", 100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0); values[j] = palloc(32); snprintf(values[j++], 32, "%.2f", (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0); tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), values); result = HeapTupleGetDatum(tuple); } PG_RETURN_DATUM(result); }