/* * hashgettuple() -- Get the next tuple in the scan. */ bool hashgettuple(IndexScanDesc scan, ScanDirection dir) { HashScanOpaque so = (HashScanOpaque) scan->opaque; Relation rel = scan->indexRelation; Buffer buf; Page page; OffsetNumber offnum; ItemPointer current; bool res; /* Hash indexes are always lossy since we store only the hash code */ scan->xs_recheck = true; /* * We hold pin but not lock on current buffer while outside the hash AM. * Reacquire the read lock here. */ if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_NOLOCK, HASH_READ); /* * If we've already initialized this scan, we can just advance it in the * appropriate direction. If we haven't done so yet, we call a routine to * get the first item in the scan. */ current = &(so->hashso_curpos); if (ItemPointerIsValid(current)) { /* * An insertion into the current index page could have happened while * we didn't have read lock on it. Re-find our position by looking * for the TID we previously returned. (Because we hold share lock on * the bucket, no deletions or splits could have occurred; therefore * we can expect that the TID still exists in the current index page, * at an offset >= where we were.) */ OffsetNumber maxoffnum; buf = so->hashso_curbuf; Assert(BufferIsValid(buf)); page = BufferGetPage(buf); TestForOldSnapshot(scan->xs_snapshot, rel, page); maxoffnum = PageGetMaxOffsetNumber(page); for (offnum = ItemPointerGetOffsetNumber(current); offnum <= maxoffnum; offnum = OffsetNumberNext(offnum)) { IndexTuple itup; itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); if (ItemPointerEquals(&(so->hashso_heappos), &(itup->t_tid))) break; } if (offnum > maxoffnum) elog(ERROR, "failed to re-find scan position within index \"%s\"", RelationGetRelationName(rel)); ItemPointerSetOffsetNumber(current, offnum); /* * Check to see if we should kill the previously-fetched tuple. */ if (scan->kill_prior_tuple) { /* * Yes, so mark it by setting the LP_DEAD state in the item flags. */ ItemIdMarkDead(PageGetItemId(page, offnum)); /* * Since this can be redone later if needed, mark as a hint. */ MarkBufferDirtyHint(buf, true); } /* * Now continue the scan. */ res = _hash_next(scan, dir); } else res = _hash_first(scan, dir); /* * Skip killed tuples if asked to. */ if (scan->ignore_killed_tuples) { while (res) { offnum = ItemPointerGetOffsetNumber(current); page = BufferGetPage(so->hashso_curbuf); if (!ItemIdIsDead(PageGetItemId(page, offnum))) break; res = _hash_next(scan, dir); } } /* Release read lock on current buffer, but keep it pinned */ if (BufferIsValid(so->hashso_curbuf)) _hash_chgbufaccess(rel, so->hashso_curbuf, HASH_READ, HASH_NOLOCK); /* Return current heap TID on success */ scan->xs_ctup.t_self = so->hashso_heappos; return res; }
/* * Scan all items on the GiST index page identified by *pageItem, and insert * them into the queue (or directly to output areas) * * scan: index scan we are executing * pageItem: search queue item identifying an index page to scan * myDistances: distances array associated with pageItem, or NULL at the root * tbm: if not NULL, gistgetbitmap's output bitmap * ntids: if not NULL, gistgetbitmap's output tuple counter * * If tbm/ntids aren't NULL, we are doing an amgetbitmap scan, and heap * tuples should be reported directly into the bitmap. If they are NULL, * we're doing a plain or ordered indexscan. For a plain indexscan, heap * tuple TIDs are returned into so->pageData[]. For an ordered indexscan, * heap tuple TIDs are pushed into individual search queue items. In an * index-only scan, reconstructed index tuples are returned along with the * TIDs. * * If we detect that the index page has split since we saw its downlink * in the parent, we push its new right sibling onto the queue so the * sibling will be processed next. */ static void gistScanPage(IndexScanDesc scan, GISTSearchItem *pageItem, double *myDistances, TIDBitmap *tbm, int64 *ntids) { GISTScanOpaque so = (GISTScanOpaque) scan->opaque; GISTSTATE *giststate = so->giststate; Relation r = scan->indexRelation; Buffer buffer; Page page; GISTPageOpaque opaque; OffsetNumber maxoff; OffsetNumber i; MemoryContext oldcxt; Assert(!GISTSearchItemIsHeap(*pageItem)); buffer = ReadBuffer(scan->indexRelation, pageItem->blkno); LockBuffer(buffer, GIST_SHARE); PredicateLockPage(r, BufferGetBlockNumber(buffer), scan->xs_snapshot); gistcheckpage(scan->indexRelation, buffer); page = BufferGetPage(buffer); TestForOldSnapshot(scan->xs_snapshot, r, page); opaque = GistPageGetOpaque(page); /* * Check if we need to follow the rightlink. We need to follow it if the * page was concurrently split since we visited the parent (in which case * parentlsn < nsn), or if the system crashed after a page split but * before the downlink was inserted into the parent. */ if (!XLogRecPtrIsInvalid(pageItem->data.parentlsn) && (GistFollowRight(page) || pageItem->data.parentlsn < GistPageGetNSN(page)) && opaque->rightlink != InvalidBlockNumber /* sanity check */ ) { /* There was a page split, follow right link to add pages */ GISTSearchItem *item; /* This can't happen when starting at the root */ Assert(myDistances != NULL); oldcxt = MemoryContextSwitchTo(so->queueCxt); /* Create new GISTSearchItem for the right sibling index page */ item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys)); item->blkno = opaque->rightlink; item->data.parentlsn = pageItem->data.parentlsn; /* Insert it into the queue using same distances as for this page */ memcpy(item->distances, myDistances, sizeof(double) * scan->numberOfOrderBys); pairingheap_add(so->queue, &item->phNode); MemoryContextSwitchTo(oldcxt); } so->nPageData = so->curPageData = 0; scan->xs_hitup = NULL; /* might point into pageDataCxt */ if (so->pageDataCxt) MemoryContextReset(so->pageDataCxt); /* * We save the LSN of the page as we read it, so that we know whether it * safe to apply LP_DEAD hints to the page later. This allows us to drop * the pin for MVCC scans, which allows vacuum to avoid blocking. */ so->curPageLSN = BufferGetLSNAtomic(buffer); /* * check all tuples on page */ maxoff = PageGetMaxOffsetNumber(page); for (i = FirstOffsetNumber; i <= maxoff; i = OffsetNumberNext(i)) { ItemId iid = PageGetItemId(page, i); IndexTuple it; bool match; bool recheck; bool recheck_distances; /* * If the scan specifies not to return killed tuples, then we treat a * killed tuple as not passing the qual. */ if (scan->ignore_killed_tuples && ItemIdIsDead(iid)) continue; it = (IndexTuple) PageGetItem(page, iid); /* * Must call gistindex_keytest in tempCxt, and clean up any leftover * junk afterward. */ oldcxt = MemoryContextSwitchTo(so->giststate->tempCxt); match = gistindex_keytest(scan, it, page, i, &recheck, &recheck_distances); MemoryContextSwitchTo(oldcxt); MemoryContextReset(so->giststate->tempCxt); /* Ignore tuple if it doesn't match */ if (!match) continue; if (tbm && GistPageIsLeaf(page)) { /* * getbitmap scan, so just push heap tuple TIDs into the bitmap * without worrying about ordering */ tbm_add_tuples(tbm, &it->t_tid, 1, recheck); (*ntids)++; } else if (scan->numberOfOrderBys == 0 && GistPageIsLeaf(page)) { /* * Non-ordered scan, so report tuples in so->pageData[] */ so->pageData[so->nPageData].heapPtr = it->t_tid; so->pageData[so->nPageData].recheck = recheck; so->pageData[so->nPageData].offnum = i; /* * In an index-only scan, also fetch the data from the tuple. The * reconstructed tuples are stored in pageDataCxt. */ if (scan->xs_want_itup) { oldcxt = MemoryContextSwitchTo(so->pageDataCxt); so->pageData[so->nPageData].recontup = gistFetchTuple(giststate, r, it); MemoryContextSwitchTo(oldcxt); } so->nPageData++; } else { /* * Must push item into search queue. We get here for any lower * index page, and also for heap tuples if doing an ordered * search. */ GISTSearchItem *item; oldcxt = MemoryContextSwitchTo(so->queueCxt); /* Create new GISTSearchItem for this item */ item = palloc(SizeOfGISTSearchItem(scan->numberOfOrderBys)); if (GistPageIsLeaf(page)) { /* Creating heap-tuple GISTSearchItem */ item->blkno = InvalidBlockNumber; item->data.heap.heapPtr = it->t_tid; item->data.heap.recheck = recheck; item->data.heap.recheckDistances = recheck_distances; /* * In an index-only scan, also fetch the data from the tuple. */ if (scan->xs_want_itup) item->data.heap.recontup = gistFetchTuple(giststate, r, it); } else { /* Creating index-page GISTSearchItem */ item->blkno = ItemPointerGetBlockNumber(&it->t_tid); /* * LSN of current page is lsn of parent page for child. We * only have a shared lock, so we need to get the LSN * atomically. */ item->data.parentlsn = BufferGetLSNAtomic(buffer); } /* Insert it into the queue using new distance data */ memcpy(item->distances, so->distances, sizeof(double) * scan->numberOfOrderBys); pairingheap_add(so->queue, &item->phNode); MemoryContextSwitchTo(oldcxt); } } UnlockReleaseBuffer(buffer); }
/* * Insert all matching tuples into to a bitmap. */ int64 blgetbitmap(IndexScanDesc scan, TIDBitmap *tbm) { int64 ntids = 0; BlockNumber blkno = BLOOM_HEAD_BLKNO, npages; int i; BufferAccessStrategy bas; BloomScanOpaque so = (BloomScanOpaque) scan->opaque; if (so->sign == NULL) { /* New search: have to calculate search signature */ ScanKey skey = scan->keyData; so->sign = palloc0(sizeof(SignType) * so->state.opts.bloomLength); for (i = 0; i < scan->numberOfKeys; i++) { /* * Assume bloom-indexable operators to be strict, so nothing could * be found for NULL key. */ if (skey->sk_flags & SK_ISNULL) { pfree(so->sign); so->sign = NULL; return 0; } /* Add next value to the signature */ signValue(&so->state, so->sign, skey->sk_argument, skey->sk_attno - 1); skey++; } } /* * We're going to read the whole index. This is why we use appropriate * buffer access strategy. */ bas = GetAccessStrategy(BAS_BULKREAD); npages = RelationGetNumberOfBlocks(scan->indexRelation); for (blkno = BLOOM_HEAD_BLKNO; blkno < npages; blkno++) { Buffer buffer; Page page; buffer = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, blkno, RBM_NORMAL, bas); LockBuffer(buffer, BUFFER_LOCK_SHARE); page = BufferGetPage(buffer); TestForOldSnapshot(scan->xs_snapshot, scan->indexRelation, page); if (!BloomPageIsDeleted(page)) { OffsetNumber offset, maxOffset = BloomPageGetMaxOffset(page); for (offset = 1; offset <= maxOffset; offset++) { BloomTuple *itup = BloomPageGetTuple(&so->state, page, offset); bool res = true; /* Check index signature with scan signature */ for (i = 0; i < so->state.opts.bloomLength; i++) { if ((itup->sign[i] & so->sign[i]) != so->sign[i]) { res = false; break; } } /* Add matching tuples to bitmap */ if (res) { tbm_add_tuples(tbm, &itup->heapPtr, 1, true); ntids++; } } } UnlockReleaseBuffer(buffer); CHECK_FOR_INTERRUPTS(); } FreeAccessStrategy(bas); return ntids; }
/* * Descend the tree to the leaf page that contains or would contain the key * we're searching for. The key should already be filled in 'btree', in * tree-type specific manner. If btree->fullScan is true, descends to the * leftmost leaf page. * * If 'searchmode' is false, on return stack->buffer is exclusively locked, * and the stack represents the full path to the root. Otherwise stack->buffer * is share-locked, and stack->parent is NULL. */ GinBtreeStack * ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot) { GinBtreeStack *stack; stack = (GinBtreeStack *) palloc(sizeof(GinBtreeStack)); stack->blkno = btree->rootBlkno; stack->buffer = ReadBuffer(btree->index, btree->rootBlkno); stack->parent = NULL; stack->predictNumber = 1; for (;;) { Page page; BlockNumber child; int access; stack->off = InvalidOffsetNumber; page = BufferGetPage(stack->buffer); TestForOldSnapshot(snapshot, btree->index, page); access = ginTraverseLock(stack->buffer, searchMode); /* * If we're going to modify the tree, finish any incomplete splits we * encounter on the way. */ if (!searchMode && GinPageIsIncompleteSplit(page)) ginFinishSplit(btree, stack, false, NULL); /* * ok, page is correctly locked, we should check to move right .., * root never has a right link, so small optimization */ while (btree->fullScan == FALSE && stack->blkno != btree->rootBlkno && btree->isMoveRight(btree, page)) { BlockNumber rightlink = GinPageGetOpaque(page)->rightlink; if (rightlink == InvalidBlockNumber) /* rightmost page */ break; stack->buffer = ginStepRight(stack->buffer, btree->index, access); stack->blkno = rightlink; page = BufferGetPage(stack->buffer); TestForOldSnapshot(snapshot, btree->index, page); if (!searchMode && GinPageIsIncompleteSplit(page)) ginFinishSplit(btree, stack, false, NULL); } if (GinPageIsLeaf(page)) /* we found, return locked page */ return stack; /* now we have correct buffer, try to find child */ child = btree->findChildPage(btree, stack); LockBuffer(stack->buffer, GIN_UNLOCK); Assert(child != InvalidBlockNumber); Assert(stack->blkno != child); if (searchMode) { /* in search mode we may forget path to leaf */ stack->blkno = child; stack->buffer = ReleaseAndReadBuffer(stack->buffer, btree->index, stack->blkno); } else { GinBtreeStack *ptr = (GinBtreeStack *) palloc(sizeof(GinBtreeStack)); ptr->parent = stack; stack = ptr; stack->blkno = child; stack->buffer = ReadBuffer(btree->index, stack->blkno); stack->predictNumber = 1; } } }