/* * btgetbitmap() -- gets all matching tuples, and adds them to a bitmap */ Datum btgetbitmap(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); BTScanOpaque so = (BTScanOpaque) scan->opaque; int64 ntids = 0; ItemPointer heapTid; /* * If we have any array keys, initialize them. */ if (so->numArrayKeys) { /* punt if we have any unsatisfiable array keys */ if (so->numArrayKeys < 0) PG_RETURN_INT64(ntids); _bt_start_array_keys(scan, ForwardScanDirection); } /* This loop handles advancing to the next array elements, if any */ do { /* Fetch the first page & tuple */ if (_bt_first(scan, ForwardScanDirection)) { /* Save tuple ID, and continue scanning */ heapTid = &scan->xs_ctup.t_self; tbm_add_tuples(tbm, heapTid, 1, false); ntids++; for (;;) { /* * Advance to next tuple within page. This is the same as the * easy case in _bt_next(). */ if (++so->currPos.itemIndex > so->currPos.lastItem) { /* let _bt_next do the heavy lifting */ if (!_bt_next(scan, ForwardScanDirection)) break; } /* Save tuple ID, and continue scanning */ heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid; tbm_add_tuples(tbm, heapTid, 1, false); ntids++; } } /* Now see if we have more array keys to deal with */ } while (so->numArrayKeys && _bt_advance_array_keys(scan, ForwardScanDirection)); PG_RETURN_INT64(ntids); }
/* * btgettuple() -- Get the next tuple in the scan. */ Datum btgettuple(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); ScanDirection dir = (ScanDirection) PG_GETARG_INT32(1); BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; /* * If we've already initialized this scan, we can just advance it in the * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ if (BTScanPosIsValid(so->currPos)) { /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, remember it for later. (We'll deal with all such tuples * at once right before leaving the index page.) The test for * numKilled overrun is not just paranoia: if the caller reverses * direction in the indexscan then the same item might get entered * multiple times. It's not worth trying to optimize that, so we * don't detect it, but instead just forget any excess entries. */ if (so->killedItems == NULL) so->killedItems = (int *) palloc(MaxIndexTuplesPerPage * sizeof(int)); if (so->numKilled < MaxIndexTuplesPerPage) so->killedItems[so->numKilled++] = so->currPos.itemIndex; } /* * Now continue the scan. */ res = _bt_next(scan, dir); } else res = _bt_first(scan, dir); MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_BOOL(res); }
/* * btgetbitmap() -- gets all matching tuples, and adds them to a bitmap */ Datum btgetbitmap(PG_FUNCTION_ARGS) { IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); TIDBitmap *tbm = (TIDBitmap *) PG_GETARG_POINTER(1); BTScanOpaque so = (BTScanOpaque) scan->opaque; int64 ntids = 0; ItemPointer heapTid; /* Fetch the first page & tuple. */ if (!_bt_first(scan, ForwardScanDirection)) { /* empty scan */ PG_RETURN_INT64(0); } /* Save tuple ID, and continue scanning */ heapTid = &scan->xs_ctup.t_self; tbm_add_tuples(tbm, heapTid, 1, false); ntids++; for (;;) { /* * Advance to next tuple within page. This is the same as the easy * case in _bt_next(). */ if (++so->currPos.itemIndex > so->currPos.lastItem) { /* let _bt_next do the heavy lifting */ if (!_bt_next(scan, ForwardScanDirection)) break; } /* Save tuple ID, and continue scanning */ heapTid = &so->currPos.items[so->currPos.itemIndex].heapTid; tbm_add_tuples(tbm, heapTid, 1, false); ntids++; } PG_RETURN_INT64(ntids); }
/* * btgettuple() -- Get the next tuple in the scan. */ bool btgettuple(IndexScanDesc scan, ScanDirection dir) { BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res; /* btree indexes are never lossy */ scan->xs_recheck = false; /* * If we have any array keys, initialize them during first call for a * scan. We can't do this in btrescan because we don't know the scan * direction at that time. */ if (so->numArrayKeys && !BTScanPosIsValid(so->currPos)) { /* punt if we have any unsatisfiable array keys */ if (so->numArrayKeys < 0) return false; _bt_start_array_keys(scan, dir); } /* This loop handles advancing to the next array elements, if any */ do { /* * If we've already initialized this scan, we can just advance it in * the appropriate direction. If we haven't done so yet, we call * _bt_first() to get the first item in the scan. */ if (!BTScanPosIsValid(so->currPos)) res = _bt_first(scan, dir); else { /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, remember it for later. (We'll deal with all such * tuples at once right before leaving the index page.) The * test for numKilled overrun is not just paranoia: if the * caller reverses direction in the indexscan then the same * item might get entered multiple times. It's not worth * trying to optimize that, so we don't detect it, but instead * just forget any excess entries. */ if (so->killedItems == NULL) so->killedItems = (int *) palloc(MaxIndexTuplesPerPage * sizeof(int)); if (so->numKilled < MaxIndexTuplesPerPage) so->killedItems[so->numKilled++] = so->currPos.itemIndex; } /* * Now continue the scan. */ res = _bt_next(scan, dir); } /* If we have a tuple, return it ... */ if (res) break; /* ... otherwise see if we have more array keys to deal with */ } while (so->numArrayKeys && _bt_advance_array_keys(scan, dir)); return res; }
/* * btgetmulti() -- construct a HashBitmap. */ Datum btgetmulti(PG_FUNCTION_ARGS) { MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_DECLARE; IndexScanDesc scan = (IndexScanDesc) PG_GETARG_POINTER(0); Node *n = (Node *)PG_GETARG_POINTER(1); HashBitmap *hashBitmap; BTScanOpaque so = (BTScanOpaque) scan->opaque; bool res = true; MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_ENTER; if (n == NULL || IsA(n, StreamBitmap)) { /* XXX should we use less than work_mem for this? */ hashBitmap = tbm_create(work_mem * 1024L); } else { hashBitmap = (HashBitmap *)n; } /* If we haven't started the scan yet, fetch the first page & tuple. */ if (!BTScanPosIsValid(so->currPos)) { res = _bt_first(scan, ForwardScanDirection); if (res) { /* Save tuple ID, and continue scanning */ tbm_add_tuples(hashBitmap, &(scan->xs_ctup.t_self), 1); } } while (res) { /* * Advance to next tuple within page. This is the same as the * easy case in _bt_next(). */ if (++so->currPos.itemIndex > so->currPos.lastItem) { /* let _bt_next do the heavy lifting */ res = _bt_next(scan, ForwardScanDirection); if (!res) break; } /* Save tuple ID, and continue scanning */ tbm_add_tuples(hashBitmap, &(so->currPos.items[so->currPos.itemIndex].heapTid), 1); } if(n && IsA(n, StreamBitmap)) { stream_add_node((StreamBitmap *)n, tbm_create_stream_node(hashBitmap), BMS_OR); PG_RETURN_POINTER(n); } MIRROREDLOCK_BUFMGR_VERIFY_NO_LOCK_LEAK_EXIT; PG_RETURN_POINTER(hashBitmap); }
/* * _bt_first() -- Find the first item in a scan. * * We need to be clever about the type of scan, the operation it's * performing, and the tree ordering. We find the * first item in the tree that satisfies the qualification * associated with the scan descriptor. On exit, the page containing * the current index tuple is read locked and pinned, and the scan's * opaque data entry is updated to include the buffer. */ bool _bt_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; BTScanOpaque so = (BTScanOpaque) scan->opaque; Buffer buf; Page page; BTStack stack; OffsetNumber offnum; BTItem btitem; IndexTuple itup; ItemPointer current; BlockNumber blkno; StrategyNumber strat; bool res; int32 result; bool scanFromEnd; bool continuescan; ScanKey scankeys = NULL; int keysCount = 0; int *nKeyIs = NULL; int i, j; StrategyNumber strat_total; /* * Order the scan keys in our canonical fashion and eliminate any * redundant keys. */ _bt_orderkeys(scan); /* * Quit now if _bt_orderkeys() discovered that the scan keys can never * be satisfied (eg, x == 1 AND x > 2). */ if (!so->qual_ok) return false; /* * Examine the scan keys to discover where we need to start the scan. */ scanFromEnd = false; strat_total = BTEqualStrategyNumber; if (so->numberOfKeys > 0) { nKeyIs = (int *) palloc(so->numberOfKeys * sizeof(int)); for (i = 0; i < so->numberOfKeys; i++) { AttrNumber attno = so->keyData[i].sk_attno; /* ignore keys for already-determined attrs */ if (attno <= keysCount) continue; /* if we didn't find a boundary for the preceding attr, quit */ if (attno > keysCount + 1) break; strat = _bt_getstrat(rel, attno, so->keyData[i].sk_procedure); /* * Can we use this key as a starting boundary for this attr? * * We can use multiple keys if they look like, say, = >= = but we * have to stop after accepting a > or < boundary. */ if (strat == strat_total || strat == BTEqualStrategyNumber) nKeyIs[keysCount++] = i; else if (ScanDirectionIsBackward(dir) && (strat == BTLessStrategyNumber || strat == BTLessEqualStrategyNumber)) { nKeyIs[keysCount++] = i; strat_total = strat; if (strat == BTLessStrategyNumber) break; } else if (ScanDirectionIsForward(dir) && (strat == BTGreaterStrategyNumber || strat == BTGreaterEqualStrategyNumber)) { nKeyIs[keysCount++] = i; strat_total = strat; if (strat == BTGreaterStrategyNumber) break; } } if (keysCount == 0) scanFromEnd = true; } else scanFromEnd = true; /* if we just need to walk down one edge of the tree, do that */ if (scanFromEnd) { if (nKeyIs) pfree(nKeyIs); return _bt_endpoint(scan, dir); } /* * We want to start the scan somewhere within the index. Set up a * scankey we can use to search for the correct starting point. */ scankeys = (ScanKey) palloc(keysCount * sizeof(ScanKeyData)); for (i = 0; i < keysCount; i++) { FmgrInfo *procinfo; j = nKeyIs[i]; /* * _bt_orderkeys disallows it, but it's place to add some code * later */ if (so->keyData[j].sk_flags & SK_ISNULL) { pfree(nKeyIs); pfree(scankeys); elog(ERROR, "btree doesn't support is(not)null, yet"); return false; } procinfo = index_getprocinfo(rel, i + 1, BTORDER_PROC); ScanKeyEntryInitializeWithInfo(scankeys + i, so->keyData[j].sk_flags, i + 1, procinfo, CurrentMemoryContext, so->keyData[j].sk_argument); } if (nKeyIs) pfree(nKeyIs); current = &(scan->currentItemData); /* * Use the manufactured scan key to descend the tree and position * ourselves on the target leaf page. */ stack = _bt_search(rel, keysCount, scankeys, &buf, BT_READ); /* don't need to keep the stack around... */ _bt_freestack(stack); if (!BufferIsValid(buf)) { /* Only get here if index is completely empty */ ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; pfree(scankeys); return false; } /* remember which buffer we have pinned */ so->btso_curbuf = buf; blkno = BufferGetBlockNumber(buf); page = BufferGetPage(buf); /* position to the precise item on the page */ offnum = _bt_binsrch(rel, buf, keysCount, scankeys); ItemPointerSet(current, blkno, offnum); /* * At this point we are positioned at the first item >= scan key, or * possibly at the end of a page on which all the existing items are * less than the scan key and we know that everything on later pages * is greater than or equal to scan key. * * We could step forward in the latter case, but that'd be a waste of * time if we want to scan backwards. So, it's now time to examine * the scan strategy to find the exact place to start the scan. * * Note: if _bt_step fails (meaning we fell off the end of the index in * one direction or the other), we either return false (no matches) or * call _bt_endpoint() to set up a scan starting at that index * endpoint, as appropriate for the desired scan type. * * it's yet other place to add some code later for is(not)null ... */ switch (strat_total) { case BTLessStrategyNumber: /* * Back up one to arrive at last item < scankey */ if (!_bt_step(scan, &buf, BackwardScanDirection)) { pfree(scankeys); return false; } break; case BTLessEqualStrategyNumber: /* * We need to find the last item <= scankey, so step forward * till we find one > scankey, then step back one. */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return _bt_endpoint(scan, dir); } } for (;;) { offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); result = _bt_compare(rel, keysCount, scankeys, page, offnum); if (result < 0) break; if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return _bt_endpoint(scan, dir); } } if (!_bt_step(scan, &buf, BackwardScanDirection)) { pfree(scankeys); return false; } break; case BTEqualStrategyNumber: /* * Make sure we are on the first equal item; might have to * step forward if currently at end of page. */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); } result = _bt_compare(rel, keysCount, scankeys, page, offnum); if (result != 0) goto nomatches; /* no equal items! */ /* * If a backward scan was specified, need to start with last * equal item not first one. */ if (ScanDirectionIsBackward(dir)) { do { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return _bt_endpoint(scan, dir); } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); result = _bt_compare(rel, keysCount, scankeys, page, offnum); } while (result == 0); if (!_bt_step(scan, &buf, BackwardScanDirection)) elog(ERROR, "equal items disappeared?"); } break; case BTGreaterEqualStrategyNumber: /* * We want the first item >= scankey, which is where we are... * unless we're not anywhere at all... */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } } break; case BTGreaterStrategyNumber: /* * We want the first item > scankey, so make sure we are on an * item and then step over any equal items. */ if (offnum > PageGetMaxOffsetNumber(page)) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); } result = _bt_compare(rel, keysCount, scankeys, page, offnum); while (result == 0) { if (!_bt_step(scan, &buf, ForwardScanDirection)) { pfree(scankeys); return false; } offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); result = _bt_compare(rel, keysCount, scankeys, page, offnum); } break; } /* okay, current item pointer for the scan is right */ offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); btitem = (BTItem) PageGetItem(page, PageGetItemId(page, offnum)); itup = &btitem->bti_itup; /* is the first item actually acceptable? */ if (_bt_checkkeys(scan, itup, dir, &continuescan)) { /* yes, return it */ scan->xs_ctup.t_self = itup->t_tid; res = true; } else if (continuescan) { /* no, but there might be another one that is */ res = _bt_next(scan, dir); } else { /* no tuples in the index match this scan key */ nomatches: ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; _bt_relbuf(rel, buf); res = false; } pfree(scankeys); return res; }
/* * _bt_endpoint() -- Find the first or last key in the index. * * This is used by _bt_first() to set up a scan when we've determined * that the scan must start at the beginning or end of the index (for * a forward or backward scan respectively). */ static bool _bt_endpoint(IndexScanDesc scan, ScanDirection dir) { Relation rel; Buffer buf; Page page; BTPageOpaque opaque; ItemPointer current; OffsetNumber maxoff; OffsetNumber start; BlockNumber blkno; BTItem btitem; IndexTuple itup; BTScanOpaque so; bool res; bool continuescan; rel = scan->indexRelation; current = &(scan->currentItemData); so = (BTScanOpaque) scan->opaque; /* * Scan down to the leftmost or rightmost leaf page. This is a * simplified version of _bt_search(). We don't maintain a stack * since we know we won't need it. */ buf = _bt_get_endpoint(rel, 0, ScanDirectionIsBackward(dir)); if (!BufferIsValid(buf)) { /* empty index... */ ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; return false; } blkno = BufferGetBlockNumber(buf); page = BufferGetPage(buf); opaque = (BTPageOpaque) PageGetSpecialPointer(page); Assert(P_ISLEAF(opaque)); maxoff = PageGetMaxOffsetNumber(page); if (ScanDirectionIsForward(dir)) { /* There could be dead pages to the left, so not this: */ /* Assert(P_LEFTMOST(opaque)); */ start = P_FIRSTDATAKEY(opaque); } else if (ScanDirectionIsBackward(dir)) { Assert(P_RIGHTMOST(opaque)); start = PageGetMaxOffsetNumber(page); if (start < P_FIRSTDATAKEY(opaque)) /* watch out for empty * page */ start = P_FIRSTDATAKEY(opaque); } else { elog(ERROR, "invalid scan direction: %d", (int) dir); start = 0; /* keep compiler quiet */ } ItemPointerSet(current, blkno, start); /* remember which buffer we have pinned */ so->btso_curbuf = buf; /* * Left/rightmost page could be empty due to deletions, if so step * till we find a nonempty page. */ if (start > maxoff) { if (!_bt_step(scan, &buf, dir)) return false; start = ItemPointerGetOffsetNumber(current); page = BufferGetPage(buf); } btitem = (BTItem) PageGetItem(page, PageGetItemId(page, start)); itup = &(btitem->bti_itup); /* see if we picked a winner */ if (_bt_checkkeys(scan, itup, dir, &continuescan)) { /* yes, return it */ scan->xs_ctup.t_self = itup->t_tid; res = true; } else if (continuescan) { /* no, but there might be another one that is */ res = _bt_next(scan, dir); } else { /* no tuples in the index match this scan key */ ItemPointerSetInvalid(current); so->btso_curbuf = InvalidBuffer; _bt_relbuf(rel, buf); res = false; } return res; }