/* * btendscan() -- close down a scan */ void btendscan(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } so->markItemIndex = -1; BTScanPosUnpinIfPinned(so->markPos); /* No need to invalidate positions, the RAM is about to be freed. */ /* Release storage */ if (so->keyData != NULL) pfree(so->keyData); /* so->arrayKeyData and so->arrayKeys are in arrayContext */ if (so->arrayContext != NULL) MemoryContextDelete(so->arrayContext); if (so->killedItems != NULL) pfree(so->killedItems); if (so->currTuples != NULL) pfree(so->currTuples); /* so->markTuples should not be pfree'd, see btrescan */ pfree(so); }
/* * btendscan() -- close down a scan */ Datum btendscan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; if (so->killedItems != NULL) pfree(so->killedItems); if (so->keyData != NULL) pfree(so->keyData); pfree(so); PG_RETURN_VOID(); }
/* * btrescan() -- rescan an index relation */ Datum btrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); /* remaining arguments are ignored */ BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); BTScanPosInvalidate(so->currPos); } so->markItemIndex = -1; BTScanPosUnpinIfPinned(so->markPos); BTScanPosInvalidate(so->markPos); /* * Allocate tuple workspace arrays, if needed for an index-only scan and * not already done in a previous rescan call. To save on palloc * overhead, both workspaces are allocated as one palloc block; only this * function and btendscan know that. * * NOTE: this data structure also makes it safe to return data from a * "name" column, even though btree name_ops uses an underlying storage * datatype of cstring. The risk there is that "name" is supposed to be * padded to NAMEDATALEN, but the actual index tuple is probably shorter. * However, since we only return data out of tuples sitting in the * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some * data out of the markTuples array --- running off the end of memory for * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats * adding special-case treatment for name_ops elsewhere. */ if (scan->xs_want_itup && so->currTuples == NULL) { so->currTuples = (char *) palloc(BLCKSZ * 2); so->markTuples = so->currTuples + BLCKSZ; } /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ /* If any keys are SK_SEARCHARRAY type, set up array-key info */ _bt_preprocess_array_keys(scan); PG_RETURN_VOID(); }
/* * btrescan() -- rescan an index relation */ Datum btrescan(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); BTScanOpaque so; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; so = (BTScanOpaque) scan->opaque; if (so == NULL) /* if called from btbeginscan */ { so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); so->currPos.buf = so->markPos.buf = InvalidBuffer; if (scan->numberOfKeys > 0) so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); else so->keyData = NULL; so->killedItems = NULL; /* until needed */ so->numKilled = 0; scan->opaque = so; } /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_VOID(); }
/* * btrestrpos() -- restore scan to last saved position */ void btrestrpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The scan has never moved to a new page since the last mark. Just * restore the itemIndex. * * NB: In this case we can't count on anything in so->markPos to be * accurate. */ so->currPos.itemIndex = so->markItemIndex; } else { /* * The scan moved to a new page after last mark or restore, and we are * now restoring to the marked page. We aren't holding any read * locks, but if we're still holding the pin for the current position, * we must drop it. */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ if (BTScanPosIsPinned(so->markPos)) IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } else BTScanPosInvalidate(so->currPos); } }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The mark position is on the same page we are currently on. Just * restore the itemIndex. */ so->currPos.itemIndex = so->markItemIndex; } else { /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0 && so->currPos.buf != so->markPos.buf) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } } PG_RETURN_VOID(); }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; if (so->markItemIndex >= 0) { /* * The mark position is on the same page we are currently on. Just * restore the itemIndex. */ so->currPos.itemIndex = so->markItemIndex; } else { /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0 && so->currPos.buf != so->markPos.buf) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); } } MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_VOID(); }
/* * btendscan() -- close down a scan */ Datum btendscan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; /* Release storage */ if (so->keyData != NULL) pfree(so->keyData); /* so->arrayKeyData and so->arrayKeys are in arrayContext */ if (so->arrayContext != NULL) MemoryContextDelete(so->arrayContext); if (so->killedItems != NULL) pfree(so->killedItems); if (so->currTuples != NULL) pfree(so->currTuples); /* so->markTuples should not be pfree'd, see btrescan */ pfree(so); PG_RETURN_VOID(); }
/* * btrescan() -- rescan an index relation */ Datum btrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); /* remaining arguments are ignored */ BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ PG_RETURN_VOID(); }
/* * _bt_steppage() -- Step to next page containing valid data for scan * * On entry, so->currPos.buf must be pinned and read-locked. We'll drop * the lock and pin before moving to next page. * * On success exit, we hold pin and read-lock on the next interesting page, * and so->currPos is updated to contain data from that page. * * If there are no more matching records in the given direction, we drop all * locks and pins, set so->currPos.buf to InvalidBuffer, and return FALSE. */ static bool _bt_steppage(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; Relation rel; Page page; BTPageOpaque opaque; /* we must have the buffer pinned and locked */ Assert(BufferIsValid(so->currPos.buf)); /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, true); /* * Before we modify currPos, make a copy of the page data if there was a * mark position that needs it. */ if (so->markItemIndex >= 0) { /* bump pin on current buffer for assignment to mark buffer */ IncrBufferRefCount(so->currPos.buf); memcpy(&so->markPos, &so->currPos, offsetof(BTScanPosData, items[1]) + so->currPos.lastItem * sizeof(BTScanPosItem)); if (so->markTuples) memcpy(so->markTuples, so->currTuples, so->currPos.nextTupleOffset); so->markPos.itemIndex = so->markItemIndex; so->markItemIndex = -1; } rel = scan->indexRelation; if (ScanDirectionIsForward(dir)) { /* Walk right to the next page with data */ /* We must rely on the previously saved nextPage link! */ BlockNumber blkno = so->currPos.nextPage; /* Remember we left a page with data */ so->currPos.moreLeft = true; for (;;) { /* release the previous buffer */ _bt_relbuf(rel, so->currPos.buf); so->currPos.buf = InvalidBuffer; /* if we're at end of scan, give up */ if (blkno == P_NONE || !so->currPos.moreRight) return false; /* check for interrupts while we're not holding any buffer lock */ CHECK_FOR_INTERRUPTS(); /* step right one page */ so->currPos.buf = _bt_getbuf(rel, blkno, BT_READ); /* check for deleted page */ page = BufferGetPage(so->currPos.buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { PredicateLockPage(rel, blkno, scan->xs_snapshot); /* see if there are any matches on this page */ /* note that this will clear moreRight if we can stop */ if (_bt_readpage(scan, dir, P_FIRSTDATAKEY(opaque))) break; } /* nope, keep going */ blkno = opaque->btpo_next; } } else { /* Remember we left a page with data */ so->currPos.moreRight = true; /* * Walk left to the next page with data. This is much more complex * than the walk-right case because of the possibility that the page * to our left splits while we are in flight to it, plus the * possibility that the page we were on gets deleted after we leave * it. See nbtree/README for details. */ for (;;) { /* Done if we know there are no matching keys to the left */ if (!so->currPos.moreLeft) { _bt_relbuf(rel, so->currPos.buf); so->currPos.buf = InvalidBuffer; return false; } /* Step to next physical page */ so->currPos.buf = _bt_walk_left(rel, so->currPos.buf); /* if we're physically at end of index, return failure */ if (so->currPos.buf == InvalidBuffer) return false; /* * Okay, we managed to move left to a non-deleted page. Done if * it's not half-dead and contains matching tuples. Else loop back * and do it all again. */ page = BufferGetPage(so->currPos.buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); if (!P_IGNORE(opaque)) { PredicateLockPage(rel, BufferGetBlockNumber(so->currPos.buf), scan->xs_snapshot); /* see if there are any matches on this page */ /* note that this will clear moreLeft if we can stop */ if (_bt_readpage(scan, dir, PageGetMaxOffsetNumber(page))) break; } } } return true; }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The scan has never moved to a new___ page since the last mark. Just * restore the itemIndex. * * NB: In this case we can't count on anything in so->markPos to be * accurate. */ so->currPos.itemIndex = so->markItemIndex; } else if (so->currPos.currPage == so->markPos.currPage) { /* * so->markItemIndex < 0 but mark and current positions are on the * same page. This would be an unusual case, where the scan moved to * a new___ index page after the mark, restored, and later restored again * without moving off the marked page. It is not clear that this code * can currently be reached, but it seems better to make this function * robust for this case than to Assert() or elog() that it can't * happen. * * We neither want to set so->markItemIndex >= 0 (because that could * cause a later move to a new___ page to redo the memcpy() executions) * nor re-execute the memcpy() functions for a restore within the same * page. The previous restore to this page already set everything * except markPos as it should be. */ so->currPos.itemIndex = so->markPos.itemIndex; } else { /* * The scan moved to a new___ page after last mark or restore, and we are * now restoring to the marked page. We aren't holding any read * locks, but if we're still holding the pin for the current position, * we must drop it. */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ if (BTScanPosIsPinned(so->markPos)) IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } else BTScanPosInvalidate(so->currPos); } PG_RETURN_VOID(); }