/* * btmarkpos() -- save current scan position */ Datum btmarkpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } /* * Just record the current itemIndex. If we later step to next page * before releasing the marked position, _bt_steppage makes a full copy of * the currPos struct in markPos. If (as often happens) the mark is moved * before we leave the page, we don't have to do that work. */ if (BTScanPosIsValid(so->currPos)) so->markItemIndex = so->currPos.itemIndex; else so->markItemIndex = -1; /* Also record the current positions of any array keys */ if (so->numArrayKeys) _bt_mark_array_keys(scan); PG_RETURN_VOID(); }
/* * btmarkpos() -- save current scan position */ Datum btmarkpos(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } /* * Just record the current itemIndex. If we later step to next page * before releasing the marked position, _bt_steppage makes a full copy of * the currPos struct in markPos. If (as often happens) the mark is moved * before we leave the page, we don't have to do that work. */ if (BTScanPosIsValid(so->currPos)) so->markItemIndex = so->currPos.itemIndex; else so->markItemIndex = -1; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_VOID(); }
/* * btendscan() -- close down a scan */ Datum btendscan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; if (so->killedItems != NULL) pfree(so->killedItems); if (so->keyData != NULL) pfree(so->keyData); pfree(so); PG_RETURN_VOID(); }
/* * btrescan() -- rescan an index relation */ Datum btrescan(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); BTScanOpaque so; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; so = (BTScanOpaque) scan->opaque; if (so == NULL) /* if called from btbeginscan */ { so = (BTScanOpaque) palloc(sizeof(BTScanOpaqueData)); so->currPos.buf = so->markPos.buf = InvalidBuffer; if (scan->numberOfKeys > 0) so->keyData = (ScanKey) palloc(scan->numberOfKeys * sizeof(ScanKeyData)); else so->keyData = NULL; so->killedItems = NULL; /* until needed */ so->numKilled = 0; scan->opaque = so; } /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_VOID(); }
/* * btrestrpos() -- restore scan to last saved position */ void btrestrpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The scan has never moved to a new page since the last mark. Just * restore the itemIndex. * * NB: In this case we can't count on anything in so->markPos to be * accurate. */ so->currPos.itemIndex = so->markItemIndex; } else { /* * The scan moved to a new page after last mark or restore, and we are * now restoring to the marked page. We aren't holding any read * locks, but if we're still holding the pin for the current position, * we must drop it. */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ if (BTScanPosIsPinned(so->markPos)) IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } else BTScanPosInvalidate(so->currPos); } }
/* * btmarkpos() -- save current scan position */ void btmarkpos(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; /* There may be an old mark with a pin (but no lock). */ BTScanPosUnpinIfPinned(so->markPos); /* * Just record the current itemIndex. If we later step to next page * before releasing the marked position, _bt_steppage makes a full copy of * the currPos struct in markPos. If (as often happens) the mark is moved * before we leave the page, we don't have to do that work. */ if (BTScanPosIsValid(so->currPos)) so->markItemIndex = so->currPos.itemIndex; else { BTScanPosInvalidate(so->markPos); so->markItemIndex = -1; } /* Also record the current positions of any array keys */ if (so->numArrayKeys) _bt_mark_array_keys(scan); }
/* * btendscan() -- close down a scan */ void btendscan(IndexScanDesc scan) { BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } so->markItemIndex = -1; BTScanPosUnpinIfPinned(so->markPos); /* No need to invalidate positions, the RAM is about to be freed. */ /* Release storage */ if (so->keyData != NULL) pfree(so->keyData); /* so->arrayKeyData and so->arrayKeys are in arrayContext */ if (so->arrayContext != NULL) MemoryContextDelete(so->arrayContext); if (so->killedItems != NULL) pfree(so->killedItems); if (so->currTuples != NULL) pfree(so->currTuples); /* so->markTuples should not be pfree'd, see btrescan */ pfree(so); }
/* * btrescan() -- rescan an index relation */ Datum btrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); /* remaining arguments are ignored */ BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); BTScanPosInvalidate(so->currPos); } so->markItemIndex = -1; BTScanPosUnpinIfPinned(so->markPos); BTScanPosInvalidate(so->markPos); /* * Allocate tuple workspace arrays, if needed for an index-only scan and * not already done in a previous rescan call. To save on palloc * overhead, both workspaces are allocated as one palloc block; only this * function and btendscan know that. * * NOTE: this data structure also makes it safe to return data from a * "name" column, even though btree name_ops uses an underlying storage * datatype of cstring. The risk there is that "name" is supposed to be * padded to NAMEDATALEN, but the actual index tuple is probably shorter. * However, since we only return data out of tuples sitting in the * currTuples array, a fetch of NAMEDATALEN bytes can at worst pull some * data out of the markTuples array --- running off the end of memory for * a SIGSEGV is not possible. Yeah, this is ugly as sin, but it beats * adding special-case treatment for name_ops elsewhere. */ if (scan->xs_want_itup && so->currTuples == NULL) { so->currTuples = (char *) palloc(BLCKSZ * 2); so->markTuples = so->currTuples + BLCKSZ; } /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ /* If any keys are SK_SEARCHARRAY type, set up array-key info */ _bt_preprocess_array_keys(scan); PG_RETURN_VOID(); }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The mark position is on the same page we are currently on. Just * restore the itemIndex. */ so->currPos.itemIndex = so->markItemIndex; } else { /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0 && so->currPos.buf != so->markPos.buf) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } } PG_RETURN_VOID(); }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; if (so->markItemIndex >= 0) { /* * The mark position is on the same page we are currently on. Just * restore the itemIndex. */ so->currPos.itemIndex = so->markItemIndex; } else { /* we aren't holding any read locks, but gotta drop the pin */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0 && so->currPos.buf != so->markPos.buf) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); } } MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_VOID(); }
/* * btgettuple() -- Get the next tuple in the scan. */ Datum btgettuple(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; /* * If we've already initialized this scan, we can just advance it in the * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ if (BTScanPosIsValid(so->currPos)) { /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, remember it for later. (We'll deal with all such tuples * at once right before leaving the index page.) The test for * numKilled overrun is not just paranoia: if the caller reverses * direction in the indexscan then the same item might get entered * multiple times. It's not worth trying to optimize that, so we * don't detect it, but instead just forget any excess entries. */ if (so->killedItems == NULL) so->killedItems = (int *) palloc(MaxIndexTuplesPerPage * sizeof(int)); if (so->numKilled < MaxIndexTuplesPerPage) so->killedItems[so->numKilled++] = so->currPos.itemIndex; } /* * Now continue the scan. */ res = _bt_next(scan, dir); } else res = _bt_first(scan, dir); MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_BOOL(res); }
/* * btendscan() -- close down a scan */ Datum btendscan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; /* Release storage */ if (so->keyData != NULL) pfree(so->keyData); /* so->arrayKeyData and so->arrayKeys are in arrayContext */ if (so->arrayContext != NULL) MemoryContextDelete(so->arrayContext); if (so->killedItems != NULL) pfree(so->killedItems); if (so->currTuples != NULL) pfree(so->currTuples); /* so->markTuples should not be pfree'd, see btrescan */ pfree(so); PG_RETURN_VOID(); }
/* * btrescan() -- rescan an index relation */ Datum btrescan(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanKey scankey = (ScanKey) PG_GETARG_POINTER(1); /* remaining arguments are ignored */ BTScanOpaque so = (BTScanOpaque) scan->opaque; /* we aren't holding any read locks, but gotta drop the pins */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan, false); ReleaseBuffer(so->currPos.buf); so->currPos.buf = InvalidBuffer; } if (BTScanPosIsValid(so->markPos)) { ReleaseBuffer(so->markPos.buf); so->markPos.buf = InvalidBuffer; } so->markItemIndex = -1; /* * Reset the scan keys. Note that keys ordering stuff moved to _bt_first. * - vadim 05/05/97 */ if (scankey && scan->numberOfKeys > 0) memmove(scan->keyData, scankey, scan->numberOfKeys * sizeof(ScanKeyData)); so->numberOfKeys = 0; /* until _bt_preprocess_keys sets it */ PG_RETURN_VOID(); }
/* * btgettuple() -- Get the next tuple in the scan. */ bool btgettuple(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; /* btree indexes are never lossy */ scan->xs_recheck = false; /* * If we have any array keys, initialize them during first call for a * scan. We can't do this in btrescan because we don't know the scan * direction at that time. */ if (so->numArrayKeys && !BTScanPosIsValid(so->currPos)) { /* punt if we have any unsatisfiable array keys */ if (so->numArrayKeys < 0) return false; _bt_start_array_keys(scan, dir); } /* This loop handles advancing to the next array elements, if any */ do { /* * If we've already initialized this scan, we can just advance it in * the appropriate direction. If we haven't done so yet, we call * _bt_first() to get the first item in the scan. */ if (!BTScanPosIsValid(so->currPos)) res = _bt_first(scan, dir); else { /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, remember it for later. (We'll deal with all such * tuples at once right before leaving the index page.) The * test for numKilled overrun is not just paranoia: if the * caller reverses direction in the indexscan then the same * item might get entered multiple times. It's not worth * trying to optimize that, so we don't detect it, but instead * just forget any excess entries. */ if (so->killedItems == NULL) so->killedItems = (int *) palloc(MaxIndexTuplesPerPage * sizeof(int)); if (so->numKilled < MaxIndexTuplesPerPage) so->killedItems[so->numKilled++] = so->currPos.itemIndex; } /* * Now continue the scan. */ res = _bt_next(scan, dir); } /* If we have a tuple, return it ... */ if (res) break; /* ... otherwise see if we have more array keys to deal with */ } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir)); return res; }
/* * btgetmulti() -- construct a HashBitmap. */ Datum btgetmulti(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); Node *n = (Node *)PG_GETARG_POINTER(1); HashBitmap *hashBitmap; BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res = true; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; if (n == NULL || IsA(n, StreamBitmap)) { /* XXX should we use less than work_mem for this? */ hashBitmap = tbm_create(work_mem * 1024L); } else { hashBitmap = (HashBitmap *)n; } /* If we haven't started the scan yet, fetch the first page & tuple. */ if (!BTScanPosIsValid(so->currPos)) { res = _bt_first(scan, ForwardScanDirection); if (res) { /* Save tuple ID, and continue scanning */ tbm_add_tuples(hashBitmap, &(scan->xs_ctup.t_self), 1); } } while (res) { /* * Advance to next tuple within page. This is the same as the * easy case in _bt_next(). */ if (++so->currPos.itemIndex > so->currPos.lastItem) { /* let _bt_next do the heavy lifting */ res = _bt_next(scan, ForwardScanDirection); if (!res) break; } /* Save tuple ID, and continue scanning */ tbm_add_tuples(hashBitmap, &(so->currPos.items[so->currPos.itemIndex].heapTid), 1); } if(n && IsA(n, StreamBitmap)) { stream_add_node((StreamBitmap *)n, tbm_create_stream_node(hashBitmap), BMS_OR); PG_RETURN_POINTER(n); } MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_POINTER(hashBitmap); }
/* * btrestrpos() -- restore scan to last saved position */ Datum btrestrpos(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); BTScanOpaque so = (BTScanOpaque) scan->opaque; /* Restore the marked positions of any array keys */ if (so->numArrayKeys) _bt_restore_array_keys(scan); if (so->markItemIndex >= 0) { /* * The scan has never moved to a new___ page since the last mark. Just * restore the itemIndex. * * NB: In this case we can't count on anything in so->markPos to be * accurate. */ so->currPos.itemIndex = so->markItemIndex; } else if (so->currPos.currPage == so->markPos.currPage) { /* * so->markItemIndex < 0 but mark and current positions are on the * same page. This would be an unusual case, where the scan moved to * a new___ index page after the mark, restored, and later restored again * without moving off the marked page. It is not clear that this code * can currently be reached, but it seems better to make this function * robust for this case than to Assert() or elog() that it can't * happen. * * We neither want to set so->markItemIndex >= 0 (because that could * cause a later move to a new___ page to redo the memcpy() executions) * nor re-execute the memcpy() functions for a restore within the same * page. The previous restore to this page already set everything * except markPos as it should be. */ so->currPos.itemIndex = so->markPos.itemIndex; } else { /* * The scan moved to a new___ page after last mark or restore, and we are * now restoring to the marked page. We aren't holding any read * locks, but if we're still holding the pin for the current position, * we must drop it. */ if (BTScanPosIsValid(so->currPos)) { /* Before leaving current page, deal with any killed items */ if (so->numKilled > 0) _bt_killitems(scan); BTScanPosUnpinIfPinned(so->currPos); } if (BTScanPosIsValid(so->markPos)) { /* bump pin on mark buffer for assignment to current buffer */ if (BTScanPosIsPinned(so->markPos)) IncrBufferRefCount(so->markPos.buf); memcpy(&so->currPos, &so->markPos, offsetof(BTScanPosData, items[1]) + so->markPos.lastItem * sizeof(BTScanPosItem)); if (so->currTuples) memcpy(so->currTuples, so->markTuples, so->markPos.nextTupleOffset); } else BTScanPosInvalidate(so->currPos); } PG_RETURN_VOID(); }