/* * _hash_step() -- step to the next valid item in a scan in the bucket. * * If no valid record exists in the requested direction, return * false. Else, return true and set the CurrentItemData for the * scan to the right thing. * * 'bufP' points to the current buffer, which is pinned and read-locked. * On success exit, we have pin and read-lock on whichever page * contains the right item; on failure, we have released all buffers. */ bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ItemPointer current; Buffer buf; Page page; HashPageOpaque opaque; OffsetNumber maxoff; OffsetNumber offnum; BlockNumber blkno; IndexTuple itup; current = &(scan->currentItemData); buf = *bufP; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); /* * If _hash_step is called from _hash_first, current will not be valid, so * we can't dereference it. However, in that case, we presumably want to * start at the beginning/end of the page... */ maxoff = PageGetMaxOffsetNumber(page); if (ItemPointerIsValid(current)) offnum = ItemPointerGetOffsetNumber(current); else offnum = InvalidOffsetNumber; /* * 'offnum' now points to the last tuple we examined (if any). * * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ do { switch (dir) { case ForwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberNext(offnum); /* move forward */ else offnum = FirstOffsetNumber; /* new page */ while (offnum > maxoff) { /* * either this page is empty (maxoff == * InvalidOffsetNumber) or we ran off the end. */ _hash_readnext(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); offnum = FirstOffsetNumber; } else { /* end of bucket */ maxoff = offnum = InvalidOffsetNumber; break; /* exit while */ } } break; case BackwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberPrev(offnum); /* move back */ else offnum = maxoff; /* new page */ while (offnum < FirstOffsetNumber) { /* * either this page is empty (offnum == * InvalidOffsetNumber) or we ran off the end. */ _hash_readprev(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) maxoff = offnum = PageGetMaxOffsetNumber(page); else { /* end of bucket */ maxoff = offnum = InvalidOffsetNumber; break; /* exit while */ } } break; default: /* NoMovementScanDirection */ /* this should not be reached */ break; } /* we ran off the end of the world without finding a match */ if (offnum == InvalidOffsetNumber) { /* we ran off the end of the bucket without finding a match */ *bufP = so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(current); return false; } /* get ready to check this tuple */ itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); } while (!_hash_checkqual(scan, itup)); /* if we made it to here, we've found a valid tuple */ blkno = BufferGetBlockNumber(buf); *bufP = so->hashso_curbuf = buf; ItemPointerSet(current, blkno, offnum); return true; }
/* * _hash_first() -- Find the first item in a scan. * * Find the first item in the index that * satisfies the qualification associated with the scan descriptor. On * success, the page containing the current index tuple is read locked * and pinned, and the scan's opaque data entry is updated to * include the buffer. */ bool _hash_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; uint32 hashkey; Bucket bucket; BlockNumber blkno; Buffer buf; Buffer metabuf; Page page; HashPageOpaque opaque; HashMetaPage metap; IndexTuple itup; ItemPointer current; OffsetNumber offnum; MIRROREDLOCK_BUFMGR_MUST_ALREADY_BE_HELD; pgstat_count_index_scan(rel); current = &(scan->currentItemData); ItemPointerSetInvalid(current); /* * We do not support hash scans with no index qualification, because we * would have to read the whole index rather than just one bucket. That * creates a whole raft of problems, since we haven't got a practical way * to lock all the buckets against splits or compactions. */ if (scan->numberOfKeys < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hash indexes do not support whole-index scans"))); /* * If the constant in the index qual is NULL, assume it cannot match any * items in the index. */ if (scan->keyData[0].sk_flags & SK_ISNULL) return false; /* * Okay to compute the hash key. We want to do this before acquiring any * locks, in case a user-defined hash function happens to be slow. */ hashkey = _hash_datum2hashkey(rel, scan->keyData[0].sk_argument); /* * Acquire shared split lock so we can compute the target bucket safely * (see README). */ _hash_getlock(rel, 0, HASH_SHARE); /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ); _hash_checkpage(rel, metabuf, LH_META_PAGE); metap = (HashMetaPage) BufferGetPage(metabuf); /* * Compute the target bucket number, and convert to block number. */ bucket = _hash_hashkey2bucket(hashkey, metap->hashm_maxbucket, metap->hashm_highmask, metap->hashm_lowmask); blkno = BUCKET_TO_BLKNO(metap, bucket); /* done with the metapage */ _hash_relbuf(rel, metabuf); /* * Acquire share lock on target bucket; then we can release split lock. */ _hash_getlock(rel, blkno, HASH_SHARE); _hash_droplock(rel, 0, HASH_SHARE); /* Update scan opaque state to show we have lock on the bucket */ so->hashso_bucket = bucket; so->hashso_bucket_valid = true; so->hashso_bucket_blkno = blkno; /* Fetch the primary bucket page for the bucket */ buf = _hash_getbuf(rel, blkno, HASH_READ); _hash_checkpage(rel, buf, LH_BUCKET_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(opaque->hasho_bucket == bucket); /* If a backwards scan is requested, move to the end of the chain */ if (ScanDirectionIsBackward(dir)) { while (BlockNumberIsValid(opaque->hasho_nextblkno)) _hash_readnext(rel, &buf, &page, &opaque); } /* Now find the first tuple satisfying the qualification */ if (!_hash_step(scan, &buf, dir)) return false; /* if we're here, _hash_step found a valid tuple */ offnum = ItemPointerGetOffsetNumber(current); _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); scan->xs_ctup.t_self = itup->t_tid; return true; }
/* * _hash_step() -- step to the next valid item in a scan in the bucket. * * If no valid record exists in the requested direction, return * false. Else, return true and set the hashso_curpos for the * scan to the right thing. * * 'bufP' points to the current buffer, which is pinned and read-locked. * On success exit, we have pin and read-lock on whichever page * contains the right item; on failure, we have released all buffers. */ bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ItemPointer current; Buffer buf; Page page; HashPageOpaque opaque; OffsetNumber maxoff; OffsetNumber offnum; BlockNumber blkno; IndexTuple itup; current = &(so->hashso_curpos); buf = *bufP; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); /* * If _hash_step is called from _hash_first, current will not be valid, so * we can't dereference it. However, in that case, we presumably want to * start at the beginning/end of the page... */ maxoff = PageGetMaxOffsetNumber(page); if (ItemPointerIsValid(current)) offnum = ItemPointerGetOffsetNumber(current); else offnum = InvalidOffsetNumber; /* * 'offnum' now points to the last tuple we examined (if any). * * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ do { switch (dir) { case ForwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberNext(offnum); /* move forward */ else { /* new page, locate starting position by binary search */ offnum = _hash_binsearch(page, so->hashso_sk_hash); } for (;;) { /* * check if we're still in the range of items with the * target hash key */ if (offnum <= maxoff) { Assert(offnum >= FirstOffsetNumber); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) break; /* yes, so exit for-loop */ } /* * ran off the end of this page, try the next */ _hash_readnext(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); offnum = _hash_binsearch(page, so->hashso_sk_hash); } else { /* end of bucket */ itup = NULL; break; /* exit for-loop */ } } break; case BackwardScanDirection: if (offnum != InvalidOffsetNumber) offnum = OffsetNumberPrev(offnum); /* move back */ else { /* new page, locate starting position by binary search */ offnum = _hash_binsearch_last(page, so->hashso_sk_hash); } for (;;) { /* * check if we're still in the range of items with the * target hash key */ if (offnum >= FirstOffsetNumber) { Assert(offnum <= maxoff); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) break; /* yes, so exit for-loop */ } /* * ran off the end of this page, try the next */ _hash_readprev(rel, &buf, &page, &opaque); if (BufferIsValid(buf)) { maxoff = PageGetMaxOffsetNumber(page); offnum = _hash_binsearch_last(page, so->hashso_sk_hash); } else { /* end of bucket */ itup = NULL; break; /* exit for-loop */ } } break; default: /* NoMovementScanDirection */ /* this should not be reached */ itup = NULL; break; } if (itup == NULL) { /* we ran off the end of the bucket without finding a match */ *bufP = so->hashso_curbuf = InvalidBuffer; ItemPointerSetInvalid(current); return false; } /* check the tuple quals, loop around if not met */ } while (!_hash_checkqual(scan, itup)); /* if we made it to here, we've found a valid tuple */ blkno = BufferGetBlockNumber(buf); *bufP = so->hashso_curbuf = buf; ItemPointerSet(current, blkno, offnum); return true; }
/* * _hash_first() -- Find the first item in a scan. * * Find the first item in the index that * satisfies the qualification associated with the scan descriptor. On * success, the page containing the current index tuple is read locked * and pinned, and the scan's opaque data entry is updated to * include the buffer. */ bool _hash_first(IndexScanDesc scan, ScanDirection dir) { Relation rel = scan->indexRelation; HashScanOpaque so = (HashScanOpaque) scan->opaque; ScanKey cur; uint32 hashkey; Bucket bucket; BlockNumber blkno; Buffer buf; Buffer metabuf; Page page; HashPageOpaque opaque; HashMetaPage metap; IndexTuple itup; ItemPointer current; OffsetNumber offnum; pgstat_count_index_scan(rel); current = &(so->hashso_curpos); ItemPointerSetInvalid(current); /* * We do not support hash scans with no index qualification, because we * would have to read the whole index rather than just one bucket. That * creates a whole raft of problems, since we haven't got a practical way * to lock all the buckets against splits or compactions. */ if (scan->numberOfKeys < 1) ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("hash indexes do not support whole-index scans"))); /* There may be more than one index qual, but we hash only the first */ cur = &scan->keyData[0]; /* We support only single-column hash indexes */ Assert(cur->sk_attno == 1); /* And there's only one operator strategy, too */ Assert(cur->sk_strategy == HTEqualStrategyNumber); /* * If the constant in the index qual is NULL, assume it cannot match any * items in the index. */ if (cur->sk_flags & SK_ISNULL) return false; /* * Okay to compute the hash key. We want to do this before acquiring any * locks, in case a user-defined hash function happens to be slow. * * If scankey operator is not a cross-type comparison, we can use the * cached hash function; otherwise gotta look it up in the catalogs. * * We support the convention that sk_subtype == InvalidOid means the * opclass input type; this is a hack to simplify life for ScanKeyInit(). */ if (cur->sk_subtype == rel->rd_opcintype[0] || cur->sk_subtype == InvalidOid) hashkey = _hash_datum2hashkey(rel, cur->sk_argument); else hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument, cur->sk_subtype); so->hashso_sk_hash = hashkey; /* * Acquire shared split lock so we can compute the target bucket safely * (see README). */ _hash_getlock(rel, 0, HASH_SHARE); /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HashPageGetMeta(BufferGetPage(metabuf)); /* * Compute the target bucket number, and convert to block number. */ bucket = _hash_hashkey2bucket(hashkey, metap->hashm_maxbucket, metap->hashm_highmask, metap->hashm_lowmask); blkno = BUCKET_TO_BLKNO(metap, bucket); /* done with the metapage */ _hash_relbuf(rel, metabuf); /* * Acquire share lock on target bucket; then we can release split lock. */ _hash_getlock(rel, blkno, HASH_SHARE); _hash_droplock(rel, 0, HASH_SHARE); /* Update scan opaque state to show we have lock on the bucket */ so->hashso_bucket = bucket; so->hashso_bucket_valid = true; so->hashso_bucket_blkno = blkno; /* Fetch the primary bucket page for the bucket */ buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE); page = BufferGetPage(buf); opaque = (HashPageOpaque) PageGetSpecialPointer(page); Assert(opaque->hasho_bucket == bucket); /* If a backwards scan is requested, move to the end of the chain */ if (ScanDirectionIsBackward(dir)) { while (BlockNumberIsValid(opaque->hasho_nextblkno)) _hash_readnext(rel, &buf, &page, &opaque); } /* Now find the first tuple satisfying the qualification */ if (!_hash_step(scan, &buf, dir)) return false; /* if we're here, _hash_step found a valid tuple */ offnum = ItemPointerGetOffsetNumber(current); _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BufferGetPage(buf); itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offnum)); so->hashso_heappos = itup->t_tid; return true; }
/* * _hash_step() -- step to the next valid item in a scan in the bucket. * * If no valid record exists in the requested direction, return * false. Else, return true and set the hashso_curpos for the * scan to the right thing. * * 'bufP' points to the current buffer, which is pinned and read-locked. * On success exit, we have pin and read-lock on whichever page * contains the right item; on failure, we have released all buffers. */ bool _hash_step(struct index_scan* scan, buf_id_t* bufP, enum scandir dir) { struct relation *rel = scan->indexRelation; struct hash_scan_opaque_data *so = (struct hash_scan_opaque_data *)scan->opaque; struct item_ptr *current; buf_id_t buf; page_p page; struct hash_page *opaque; item_id_t maxoff; item_id_t offnum; block_t blkno; struct index_tuple *itup; current = &(so->hashso_curpos); buf = *bufP; _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BUF_PAGE(buf); opaque = (struct hash_page *)PAGE_SPECIAL_PTR(page); /* * If _hash_step is called from _hash_first, current will not be valid, so * we can't dereference it. However, in that case, we presumably want to * start at the beginning/end of the page... */ maxoff = PAGE_MAX_ITEM_ID(page); if (ITEM_PTR_VALID(current)) offnum = ITEM_PTR_OFFSET(current); else offnum = INVALID_ITEM_ID; /* * 'offnum' now points to the last tuple we examined (if any). * * continue to step through tuples until: 1) we get to the end of the * bucket chain or 2) we find a valid tuple. */ do { switch (dir) { case FORWARD_SCANDIR: if (offnum != INVALID_ITEM_ID) { offnum = ITEM_ID_NEXT(offnum); /* move forward */ } else { /* new page, locate starting position by binary search */ offnum = _hash_binsearch(page, so->hashso_sk_hash); } for (;;) { /* * check if we're still in the range of items with the * target hash key */ if (offnum <= maxoff) { ASSERT(offnum >= FIRST_ITEM_ID); itup = (struct index_tuple*) PAGE_GET_ITEM( page, PAGE_ITEM_ID(page, offnum)); if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) break; /* yes, so exit for-loop */ } /* * ran off the end of this page, try the next */ _hash_readnext(rel, &buf, &page, &opaque); if (BUF_VALID(buf)) { maxoff = PAGE_MAX_ITEM_ID(page); offnum = _hash_binsearch(page, so->hashso_sk_hash); } else { /* end of bucket */ itup = NULL; break; /* exit for-loop */ } } break; case BACKWARD_SCANDIR: if (offnum != INVALID_ITEM_ID) offnum = ITEM_ID_PREV(offnum); /* move back */ else { /* new page, locate starting position by binary search */ offnum = _hash_binsearch_last(page, so->hashso_sk_hash); } for (;;) { /* * check if we're still in the range of items with the * target hash key */ if (offnum >= FIRST_ITEM_ID) { ASSERT(offnum <= maxoff); itup = (struct index_tuple*) PAGE_GET_ITEM( page, PAGE_ITEM_ID(page, offnum)); if (so->hashso_sk_hash == _hash_get_indextuple_hashkey(itup)) break; /* yes, so exit for-loop */ } /* * ran off the end of this page, try the next */ _hash_readprev(rel, &buf, &page, &opaque); if (BUF_VALID(buf)) { maxoff = PAGE_MAX_ITEM_ID(page); offnum = _hash_binsearch_last(page, so->hashso_sk_hash); } else { /* end of bucket */ itup = NULL; break; /* exit for-loop */ } } break; default: /* NO_MOVEMENT_SCANDIR */ /* this should not be reached */ itup = NULL; break; } if (itup == NULL) { /* we ran off the end of the bucket without finding a match */ *bufP = so->hashso_curbuf = INVALID_BUF; ITEM_PTR_SET_INVALID(current); return false; } /* check the tuple quals, loop around if not met */ } while (!_hash_checkqual(scan, itup)); /* if we made it to here, we've found a valid tuple */ blkno = buf_block_nr(buf); *bufP = so->hashso_curbuf = buf; ITEM_PTR_SET(current, blkno, offnum); return true; }
/* * _hash_first() -- Find the first item in a scan. * * Find the first item in the index that * satisfies the qualification associated with the scan descriptor. On * success, the page containing the current index tuple is read locked * and pinned, and the scan's opaque data entry is updated to * include the buffer. */ bool _hash_first(struct index_scan *scan, enum scandir dir) { struct relation *rel = scan->indexRelation; struct hash_scan_opaque_data *so = (struct hash_scan_opaque_data *)scan->opaque; struct scankey *cur; uint32 hashkey; bucket_t bucket; block_t blkno; buf_id_t buf; buf_id_t metabuf; page_p page; struct hash_page *opaque; struct hash_meta_page_data *metap; struct index_tuple *itup; struct item_ptr *current; item_id_t offnum; stat_index_scan(rel); current = &(so->hashso_curpos); ITEM_PTR_SET_INVALID(current); /* * We do not support hash scans with no index qualification, because we * would have to read the whole index rather than just one bucket. That * creates a whole raft of problems, since we haven't got a practical way * to lock all the buckets against splits or compactions. */ if (scan->numberOfKeys < 1) { ereport(ERROR, ( errcode(E_FEATURE_NOT_SUPPORTED), errmsg("hash indexes do not support whole-index scans"))); } /* There may be more than one index qual, but we hash only the first */ cur = &scan->keyData[0]; /* We support only single-column hash indexes */ ASSERT(cur->sk_attno == 1); /* And there's only one operator strategy, too */ ASSERT(cur->sk_strategy == HT_EQ_STRATEGY_NR); /* * If the constant in the index qual is NULL, assume it cannot match any * items in the index. */ if (cur->sk_flags & SK_ISNULL) return false; /* * Okay to compute the hash key. We want to do this before acquiring any * locks, in case a user-defined hash function happens to be slow. * * If scankey operator is not a cross-type comparison, we can use the * cached hash function; otherwise gotta look it up in the catalogs. * * We support the convention that sk_subtype == INVALID_OID means the * opclass input type; this is a hack to simplify life for scankey_init(). */ if (cur->sk_subtype == rel->rd_opcintype[0] || cur->sk_subtype == INVALID_OID) hashkey = _hash_datum2hashkey(rel, cur->sk_argument); else hashkey = _hash_datum2hashkey_type(rel, cur->sk_argument, cur->sk_subtype); so->hashso_sk_hash = hashkey; /* * Acquire shared split lock so we can compute the target bucket safely * (see README). */ _hash_getlock(rel, 0, HASH_SHARE); /* Read the metapage */ metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); metap = HASH_PAGE_GET_META(BUF_PAGE(metabuf)); /* * Compute the target bucket number, and convert to block number. */ bucket = _hash_hashkey2bucket( hashkey, metap->hashm_maxbucket, metap->hashm_highmask, metap->hashm_lowmask); blkno = BUCKET_TO_BLKNO(metap, bucket); /* done with the metapage */ _hash_relbuf(rel, metabuf); /* * Acquire share lock on target bucket; then we can release split lock. */ _hash_getlock(rel, blkno, HASH_SHARE); _hash_droplock(rel, 0, HASH_SHARE); /* Update scan opaque state to show we have lock on the bucket */ so->hashso_bucket = bucket; so->hashso_bucket_valid = true; so->hashso_bucket_blkno = blkno; /* Fetch the primary bucket page for the bucket */ buf = _hash_getbuf(rel, blkno, HASH_READ, LH_BUCKET_PAGE); page = BUF_PAGE(buf); opaque = (struct hash_page*) PAGE_SPECIAL_PTR(page); ASSERT(opaque->hasho_bucket == bucket); /* If a backwards scan is requested, move to the end of the chain */ if (SCANDIR_BACKWARD(dir)) { while (BLK_NR_VALID(opaque->hasho_nextblkno)) _hash_readnext(rel, &buf, &page, &opaque); } /* Now find the first tuple satisfying the qualification */ if (!_hash_step(scan, &buf, dir)) return false; /* if we're here, _hash_step found a valid tuple */ offnum = ITEM_PTR_OFFSET(current); _hash_checkpage(rel, buf, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE); page = BUF_PAGE(buf); itup = (struct index_tuple *)PAGE_GET_ITEM(page, PAGE_ITEM_ID(page, offnum)); so->hashso_heappos = itup->t_tid; return true; }